// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "structs.h" #include "defines.h" #define DEBUG_MEM_ALLOC 0 /** typedef struct _bufferStruct { void * m_pIn; void * m_pOut; cl_mem m_outBuffer; cl_mem m_inBuffer; size_t m_bufSize; } bufferStruct; */ clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue) { clState * pResult = (clState *)malloc(sizeof(clState)); #if DEBUG_MEM_ALLOC log_info("malloc clState * %x\n", pResult); #endif pResult->m_device = device; pResult->m_context = context; pResult->m_queue = queue; pResult->m_kernel = NULL; pResult->m_program = NULL; return pResult; } clState * destroyClState(clState * pState) { clStateDestroyProgramAndKernel(pState); #if DEBUG_MEM_ALLOC log_info("delete (free) clState * %x\n", pState); #endif free(pState); return NULL; } int clStateMakeProgram(clState * pState, const char * prog, const char * kernelName) { const char * srcArr[1] = {NULL}; srcArr[0] = prog; int err = create_single_kernel_helper(pState->m_context, &(pState->m_program), &(pState->m_kernel), 1, srcArr, kernelName ); #if DEBUG_MEM_ALLOC log_info("create program and kernel\n"); #endif return err; } int runKernel(clState * pState, size_t numThreads) { int err; pState->m_numThreads = numThreads; err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL, &(pState->m_numThreads), NULL, 0, NULL, NULL); if(err != CL_SUCCESS) { log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err); return -1; } return 0; } void clStateDestroyProgramAndKernel(clState * pState) { #if DEBUG_MEM_ALLOC log_info("destroy program and kernel\n"); #endif if(pState->m_kernel != NULL) { clReleaseKernel( pState->m_kernel ); pState->m_kernel = NULL; } if(pState->m_program != NULL) { clReleaseProgram( pState->m_program ); pState->m_program = NULL; } } bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) { int error; bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct)); #if DEBUG_MEM_ALLOC log_info("malloc bufferStruct * %x\n", pResult); #endif pResult->m_bufSizeIn = inSize; pResult->m_bufSizeOut = outSize; pResult->m_pIn = malloc(inSize); pResult->m_pOut = malloc(outSize); #if DEBUG_MEM_ALLOC log_info("malloc m_pIn %x\n", pResult->m_pIn); log_info("malloc m_pOut %x\n", pResult->m_pOut); #endif pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY, inSize, NULL, &error); if( pResult->m_inBuffer == NULL ) { vlog_error( "clCreateArray failed for input (%d)\n", error ); return destroyBufferStruct(pResult, pClState); } #if DEBUG_MEM_ALLOC log_info("clCreateBuffer %x\n", pResult->m_inBuffer); #endif pResult->m_outBuffer = clCreateBuffer( pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error ); if( pResult->m_outBuffer == NULL ) { vlog_error( "clCreateArray failed for output (%d)\n", error ); return destroyBufferStruct(pResult, pClState); } #if DEBUG_MEM_ALLOC log_info("clCreateBuffer %x\n", pResult->m_outBuffer); #endif pResult->m_bufferUploaded = false; return pResult; } bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) { if(destroyMe) { if(destroyMe->m_outBuffer != NULL) { #if DEBUG_MEM_ALLOC log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer); #endif clReleaseMemObject(destroyMe->m_outBuffer); destroyMe->m_outBuffer = NULL; } if(destroyMe->m_inBuffer != NULL) { #if DEBUG_MEM_ALLOC log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer); #endif clReleaseMemObject(destroyMe->m_inBuffer); destroyMe->m_inBuffer = NULL; } if(destroyMe->m_pIn != NULL) { #if DEBUG_MEM_ALLOC log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn); #endif free(destroyMe->m_pIn); destroyMe->m_pIn = NULL; } if(destroyMe->m_pOut != NULL) { #if DEBUG_MEM_ALLOC log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut); #endif free(destroyMe->m_pOut); destroyMe->m_pOut = NULL; } #if DEBUG_MEM_ALLOC log_info("delete (free) bufferStruct * %x\n", destroyMe); #endif free((void *)destroyMe); destroyMe = NULL; } return destroyMe; } void initContents(bufferStruct * pBufferStruct, clState * pClState, size_t typeSize, size_t countIn, size_t countOut ) { size_t i; uint64_t start = 0; switch(typeSize) { case 1: { uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn); for (i=0; i < countIn; ++i) { ub[i] = (uint8_t)start++; } break; } case 2: { uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn); for (i=0; i < countIn; ++i) { us[i] = (uint16_t)start++; } break; } case 4: { if (!g_wimpyMode) { uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn); for (i=0; i < countIn; ++i) { ui[i] = (uint32_t)start++; } } else { // The short test doesn't iterate over the entire 32 bit space so // we alternate between positive and negative values int32_t* ui = (int32_t *)(pBufferStruct->m_pIn); int32_t sign = 1; for (i=0; i < countIn; ++i, ++start) { ui[i] = (int32_t)start*sign; sign = sign * -1; } } break; } case 8: { // We don't iterate over the entire space of 64 bit so for the // selects, we want to test positive and negative values int64_t* ll = (int64_t *)(pBufferStruct->m_pIn); int64_t sign = 1; for (i=0; i < countIn; ++i, ++start) { ll[i] = start*sign; sign = sign * -1; } break; } default: { log_error("invalid type size %x\n", (int)typeSize); } } // pBufferStruct->m_bufSizeIn // pBufferStruct->m_bufSizeOut } int pushArgs(bufferStruct * pBufferStruct, clState * pClState) { int err; if( !pBufferStruct->m_bufferUploaded ) { err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer, CL_TRUE, 0, pBufferStruct->m_bufSizeIn, pBufferStruct->m_pIn, 0, NULL, NULL); #if DEBUG_MEM_ALLOC log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer); #endif if(err != CL_SUCCESS) { log_error("clEnqueueWriteBuffer failed\n"); return -1; } pBufferStruct->m_bufferUploaded = true; } err = clSetKernelArg(pClState->m_kernel, 0, sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn, &(pBufferStruct->m_inBuffer)); #if DEBUG_MEM_ALLOC // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer); #endif if(err != CL_SUCCESS) { log_error("clSetKernelArgs failed, first arg (0)\n"); return -1; } err = clSetKernelArg(pClState->m_kernel, 1, sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut, &(pBufferStruct->m_outBuffer)); if(err != CL_SUCCESS) { log_error("clSetKernelArgs failed, second arg (1)\n"); return -1; } #if DEBUG_MEM_ALLOC // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer); #endif return 0; } int retrieveResults(bufferStruct * pBufferStruct, clState * pClState) { int err; err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer, CL_TRUE, 0, pBufferStruct->m_bufSizeOut, pBufferStruct->m_pOut, 0, NULL, NULL); if(err != CL_SUCCESS) { log_error("clEnqueueReadBuffer failed\n"); return -1; } return 0; } // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames // and g_arrVecSizes int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState, size_t minAlign) { size_t i; cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut); for(i = 0; i < pClState->m_numThreads; ++i) { if((targetArr[i])%minAlign != (cl_uint)0) { vlog_error("Error %d (of %d). Expected a multple of %x, got %x\n", i, pClState->m_numThreads, minAlign, targetArr[i]); return -1; } } /* log_info("\n"); for(i = 0; i < 4; ++i) { log_info("%lx, ", targetArr[i]); } log_info("\n"); fflush(stdout); */ return 0; } // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames // and g_arrVecSizes int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState, size_t totSize, size_t beforeSize) { size_t i; cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut); for(i = 0; i < pClState->m_numThreads; ++i) { if((targetArr[i]-beforeSize)%totSize != (cl_uint)0) { vlog_error("Error %d (of %d). Expected %d more than a multple of %d, got %d \n", i, pClState->m_numThreads, beforeSize, totSize, targetArr[i]); return -1; } } /* log_info("\n"); for(i = 0; i < 4; ++i) { log_info("%lx, ", targetArr[i]); } log_info("\n"); fflush(stdout); */ return 0; }