2013-06-16 17:18:23 +01:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
|
|
/*
|
|
|
|
* This file is part of the LibreOffice project.
|
|
|
|
*
|
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2013-06-26 13:07:47 +01:00
|
|
|
#include "sal/config.h"
|
2013-06-16 17:55:11 +01:00
|
|
|
#include "random.hxx"
|
2013-06-16 17:18:23 +01:00
|
|
|
#include "openclwrapper.hxx"
|
|
|
|
#include "oclkernels.hxx"
|
2013-06-26 13:07:47 +01:00
|
|
|
#ifdef SAL_WIN32
|
2013-06-26 12:19:51 +01:00
|
|
|
#include <Windows.h>
|
|
|
|
#endif
|
|
|
|
//#define USE_KERNEL_FILE
|
2013-06-28 10:19:27 +03:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
GPUEnv OpenclDevice::gpuEnv;
|
|
|
|
int OpenclDevice::isInited =0;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
|
2013-06-26 13:07:47 +01:00
|
|
|
#ifdef SAL_WIN32
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
#define OPENCL_DLL_NAME "opencllo.dll"
|
|
|
|
#define OCLERR -1
|
|
|
|
#define OCLSUCCESS 1
|
|
|
|
HINSTANCE HOpenclDll = NULL;
|
|
|
|
void *OpenclDll = NULL;
|
|
|
|
|
|
|
|
int OpenclDevice::LoadOpencl()
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
//fprintf(stderr, " LoadOpenclDllxx... \n");
|
|
|
|
OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
|
|
|
|
OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
|
|
|
|
if (!static_cast<HINSTANCE>(OpenclDll))
|
|
|
|
{
|
|
|
|
fprintf(stderr, " Load opencllo.dll failed! \n");
|
|
|
|
FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
|
|
|
|
return OCLERR;
|
|
|
|
}
|
|
|
|
fprintf(stderr, " Load opencllo.dll successfully!\n");
|
|
|
|
return OCLSUCCESS;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
void OpenclDevice::FreeOpenclDll()
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
fprintf(stderr, " Free opencllo.dll ... \n");
|
|
|
|
if(!static_cast<HINSTANCE>(OpenclDll))
|
|
|
|
FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
#endif
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::InitEnv()
|
|
|
|
{
|
2013-06-26 13:07:47 +01:00
|
|
|
#ifdef SAL_WIN32
|
2013-07-08 10:49:05 +01:00
|
|
|
while(1)
|
2013-06-26 13:07:47 +01:00
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
if(1==LoadOpencl())
|
|
|
|
break;
|
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
#endif
|
2013-07-08 10:49:05 +01:00
|
|
|
InitOpenclRunEnv(0,NULL);
|
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int OpenclDevice::ReleaseOpenclRunEnv() {
|
2013-07-08 10:49:05 +01:00
|
|
|
ReleaseOpenclEnv(&gpuEnv);
|
2013-06-26 13:07:47 +01:00
|
|
|
#ifdef SAL_WIN32
|
2013-07-08 10:49:05 +01:00
|
|
|
FreeOpenclDll();
|
2013-06-26 12:19:51 +01:00
|
|
|
#endif
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
///////////////////////////////////////////////////////
|
|
|
|
///////////////////////////////////////////////////////
|
2013-07-08 21:35:26 +01:00
|
|
|
inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName)
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
strcpy(gpuEnv.mArrykernelNames[kCount], kName);
|
|
|
|
gpuEnv.mnKernelCount++;
|
2013-06-16 17:18:23 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::RegistOpenclKernel()
|
|
|
|
{
|
|
|
|
if (!gpuEnv.mnIsUserCreated)
|
2013-06-26 12:19:51 +01:00
|
|
|
memset(&gpuEnv, 0, sizeof(gpuEnv));
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuEnv.mnFileCount = 0; //argc;
|
|
|
|
gpuEnv.mnKernelCount = 0UL;
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
AddKernelConfig(0, (const char*) "hello");
|
|
|
|
AddKernelConfig(1, (const char*) "oclformula");
|
|
|
|
AddKernelConfig(2, (const char*) "oclFormulaMin");
|
|
|
|
AddKernelConfig(3, (const char*) "oclFormulaMax");
|
|
|
|
AddKernelConfig(4, (const char*) "oclFormulaSum");
|
|
|
|
AddKernelConfig(5, (const char*) "oclFormulaCount");
|
|
|
|
AddKernelConfig(6, (const char*) "oclFormulaAverage");
|
|
|
|
AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
|
|
|
|
AddKernelConfig(8, (const char*) "oclFormulaMinverse");
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
AddKernelConfig(9, (const char*) "oclSignedAdd");
|
2013-06-26 12:19:51 +01:00
|
|
|
AddKernelConfig(10, (const char*) "oclSignedSub");
|
|
|
|
AddKernelConfig(11, (const char*) "oclSignedMul");
|
|
|
|
AddKernelConfig(12, (const char*) "oclSignedDiv");
|
2013-07-08 21:35:26 +01:00
|
|
|
AddKernelConfig(13, (const char*) "oclAverageDelta");
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
return 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 21:35:26 +01:00
|
|
|
|
|
|
|
OpenclDevice::OpenclDevice()
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
//InitEnv();
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
OpenclDevice::~OpenclDevice()
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
//ReleaseOpenclRunEnv();
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
int OpenclDevice::SetKernelEnv(KernelEnv *envInfo)
|
|
|
|
{
|
|
|
|
envInfo->mpkContext = gpuEnv.mpContext;
|
|
|
|
envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
|
|
|
|
envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
2013-07-08 21:35:26 +01:00
|
|
|
|
|
|
|
int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName)
|
|
|
|
{
|
2013-06-26 12:19:51 +01:00
|
|
|
//printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
|
|
|
|
int kCount;
|
2013-07-08 10:49:05 +01:00
|
|
|
for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) {
|
|
|
|
if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) {
|
2013-07-08 21:35:26 +01:00
|
|
|
printf("match %s kernel right\n",kernelName);
|
|
|
|
break;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount];
|
|
|
|
strcpy(envInfo->mckKernelName, kernelName);
|
2013-06-26 12:19:51 +01:00
|
|
|
if (envInfo == (KernelEnv *) NULL)
|
|
|
|
{
|
|
|
|
printf("get err func and env\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::ConvertToString(const char *filename, char **source)
|
|
|
|
{
|
2013-06-16 17:18:23 +01:00
|
|
|
int file_size;
|
|
|
|
size_t result;
|
|
|
|
FILE *file = NULL;
|
|
|
|
file_size = 0;
|
|
|
|
result = 0;
|
|
|
|
file = fopen(filename, "rb+");
|
2013-06-26 12:19:51 +01:00
|
|
|
printf("open kernel file %s.\n",filename);
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
if (file != NULL) {
|
2013-07-08 10:49:05 +01:00
|
|
|
printf("Open ok!\n");
|
2013-06-16 17:18:23 +01:00
|
|
|
fseek(file, 0, SEEK_END);
|
|
|
|
|
|
|
|
file_size = ftell(file);
|
|
|
|
rewind(file);
|
2013-07-04 13:37:35 +03:00
|
|
|
*source = (char*) malloc(file_size + 1);
|
2013-06-16 17:18:23 +01:00
|
|
|
if (*source == (char*) NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
result = fread(*source, 1, file_size, file);
|
|
|
|
if (result != (size_t) file_size) {
|
|
|
|
free(*source);
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
(*source)[file_size] = '\0';
|
|
|
|
fclose(file);
|
|
|
|
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
printf("open kernel file failed.\n");
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle)
|
|
|
|
{
|
|
|
|
unsigned int i = 0;
|
2013-07-08 10:49:05 +01:00
|
|
|
cl_int status;
|
|
|
|
char *str = NULL;
|
|
|
|
FILE *fd = NULL;
|
|
|
|
cl_uint numDevices=0;
|
|
|
|
status = clGetDeviceIDs(gpuEnv.mpPlatformID, // platform
|
2013-07-11 11:43:57 +03:00
|
|
|
CL_DEVICE_TYPE_ALL, // device_type
|
2013-07-08 10:49:05 +01:00
|
|
|
0, // num_entries
|
|
|
|
NULL, // devices ID
|
|
|
|
&numDevices);
|
|
|
|
for (i = 0; i <numDevices; i++) {
|
|
|
|
char fileName[256] = { 0 }, cl_name[128] = { 0 };
|
|
|
|
if (gpuEnv.mpArryDevsID[i] != 0) {
|
|
|
|
char deviceName[1024];
|
|
|
|
status = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
str = (char*) strstr(clFileName, (char*) ".cl");
|
|
|
|
memcpy(cl_name, clFileName, str - clFileName);
|
|
|
|
cl_name[str - clFileName] = '\0';
|
|
|
|
sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
|
|
|
|
fd = fopen(fileName, "rb");
|
|
|
|
status = (fd != NULL) ? 1 : 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (fd != NULL) {
|
|
|
|
*fhandle = fd;
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
|
2013-07-08 21:35:26 +01:00
|
|
|
size_t numBytes)
|
|
|
|
{
|
2013-06-16 17:18:23 +01:00
|
|
|
FILE *output = NULL;
|
|
|
|
output = fopen(fileName, "wb");
|
|
|
|
if (output == NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-04 13:37:35 +03:00
|
|
|
fwrite(birary, 1, numBytes, output);
|
2013-06-16 17:18:23 +01:00
|
|
|
fclose(output);
|
|
|
|
|
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
|
2013-07-08 21:35:26 +01:00
|
|
|
const char * clFileName)
|
|
|
|
{
|
2013-06-26 12:19:51 +01:00
|
|
|
unsigned int i = 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
cl_int status;
|
2013-07-11 11:30:42 +03:00
|
|
|
size_t *binarySizes;
|
|
|
|
cl_uint numDevices;
|
2013-07-08 10:49:05 +01:00
|
|
|
cl_device_id *mpArryDevsID;
|
2013-06-16 17:18:23 +01:00
|
|
|
char **binaries, *str = NULL;
|
|
|
|
|
|
|
|
status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
|
|
|
|
sizeof(numDevices), &numDevices, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
|
|
|
|
if (mpArryDevsID == NULL) {
|
2013-06-16 17:18:23 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* grab the handles to all of the devices in the program. */
|
|
|
|
status = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
|
2013-07-08 10:49:05 +01:00
|
|
|
sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
/* figure out the sizes of each of the binaries. */
|
|
|
|
binarySizes = (size_t*) malloc(sizeof(size_t) * numDevices);
|
|
|
|
|
|
|
|
status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
|
|
|
|
sizeof(size_t) * numDevices, binarySizes, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
/* copy over all of the generated binaries. */
|
|
|
|
binaries = (char**) malloc(sizeof(char *) * numDevices);
|
|
|
|
if (binaries == NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < numDevices; i++) {
|
|
|
|
if (binarySizes[i] != 0) {
|
2013-07-04 13:37:35 +03:00
|
|
|
binaries[i] = (char*) malloc(binarySizes[i]);
|
2013-06-16 17:18:23 +01:00
|
|
|
if (binaries[i] == NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
binaries[i] = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
status = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
|
|
|
|
sizeof(char *) * numDevices, binaries, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
/* dump out each binary into its own separate file. */
|
|
|
|
for (i = 0; i < numDevices; i++) {
|
|
|
|
char fileName[256] = { 0 }, cl_name[128] = { 0 };
|
|
|
|
|
|
|
|
if (binarySizes[i] != 0) {
|
|
|
|
char deviceName[1024];
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
|
2013-06-16 17:18:23 +01:00
|
|
|
sizeof(deviceName), deviceName, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
str = (char*) strstr(clFileName, (char*) ".cl");
|
|
|
|
memcpy(cl_name, clFileName, str - clFileName);
|
|
|
|
cl_name[str - clFileName] = '\0';
|
2013-06-16 17:18:23 +01:00
|
|
|
sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
|
2013-06-16 17:18:23 +01:00
|
|
|
printf("opencl-wrapper: write binary[%s] failds\n", fileName);
|
|
|
|
return 0;
|
|
|
|
} //else
|
|
|
|
printf("opencl-wrapper: write binary[%s] succesfully\n", fileName);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release all resouces and memory
|
|
|
|
for (i = 0; i < numDevices; i++) {
|
|
|
|
if (binaries[i] != NULL) {
|
|
|
|
free(binaries[i]);
|
|
|
|
binaries[i] = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (binaries != NULL) {
|
|
|
|
free(binaries);
|
|
|
|
binaries = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (binarySizes != NULL) {
|
|
|
|
free(binarySizes);
|
|
|
|
binarySizes = NULL;
|
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
if (mpArryDevsID != NULL) {
|
|
|
|
free(mpArryDevsID);
|
|
|
|
mpArryDevsID = NULL;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::InitOpenclAttr(OpenCLEnv * env)
|
|
|
|
{
|
|
|
|
if (gpuEnv.mnIsUserCreated)
|
2013-06-16 17:18:23 +01:00
|
|
|
return 1;
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuEnv.mpContext = env->mpOclContext;
|
|
|
|
gpuEnv.mpPlatformID = env->mpOclPlatformID;
|
|
|
|
gpuEnv.mpDevID = env->mpOclDevsID;
|
|
|
|
gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuEnv.mnIsUserCreated = 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env)
|
|
|
|
{
|
2013-06-16 17:18:23 +01:00
|
|
|
int status;
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status);
|
2013-07-08 10:49:05 +01:00
|
|
|
env->mpkContext = gpuEnv.mpContext;
|
|
|
|
env->mpkCmdQueue = gpuEnv.mpCmdQueue;
|
2013-06-16 17:18:23 +01:00
|
|
|
return status != CL_SUCCESS ? 1 : 0;
|
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::ReleaseKernel(KernelEnv * env)
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
int status = clReleaseKernel(env->mpkKernel);
|
2013-06-16 17:18:23 +01:00
|
|
|
return status != CL_SUCCESS ? 1 : 0;
|
|
|
|
}
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo)
|
|
|
|
{
|
2013-06-16 17:18:23 +01:00
|
|
|
int i = 0;
|
|
|
|
int status = 0;
|
|
|
|
|
|
|
|
if (!isInited) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
for (i = 0; i < gpuEnv.mnFileCount; i++) {
|
|
|
|
if (gpuEnv.mpArryPrograms[i]) {
|
|
|
|
status = clReleaseProgram(gpuEnv.mpArryPrograms[i]);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuEnv.mpArryPrograms[i] = NULL;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
if (gpuEnv.mpCmdQueue) {
|
|
|
|
clReleaseCommandQueue(gpuEnv.mpCmdQueue);
|
|
|
|
gpuEnv.mpCmdQueue = NULL;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
if (gpuEnv.mpContext) {
|
|
|
|
clReleaseContext(gpuEnv.mpContext);
|
|
|
|
gpuEnv.mpContext = NULL;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
isInited = 0;
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mnIsUserCreated = 0;
|
|
|
|
free(gpuInfo->mpArryDevsID);
|
2013-06-16 17:18:23 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
|
2013-07-08 21:35:26 +01:00
|
|
|
const char * kernelName, void **usrdata)
|
|
|
|
{
|
2013-06-26 12:19:51 +01:00
|
|
|
printf("oclwrapper:RunKernel_wrapper...\n");
|
|
|
|
if (RegisterKernelWrapper(kernelName, function) != 1) {
|
2013-06-16 17:18:23 +01:00
|
|
|
fprintf(stderr,
|
2013-06-26 12:19:51 +01:00
|
|
|
"Error:RunKernel_wrapper:RegisterKernelWrapper fail!\n");
|
2013-06-16 17:18:23 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
return (RunKernel(kernelName, usrdata));
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
|
2013-07-08 21:35:26 +01:00
|
|
|
const char * clFileName)
|
|
|
|
{
|
|
|
|
int i;
|
2013-07-08 10:49:05 +01:00
|
|
|
for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
|
|
|
|
if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
|
|
|
|
if (gpuEnvCached->mpArryPrograms[i] != NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
|
2013-06-16 17:18:23 +01:00
|
|
|
cl_int status;
|
|
|
|
size_t length;
|
|
|
|
char *buildLog = NULL, *binary;
|
|
|
|
const char *source;
|
|
|
|
size_t source_size[1];
|
|
|
|
int b_error, binary_status, binaryExisted, idx;
|
2013-07-11 11:30:42 +03:00
|
|
|
cl_uint numDevices;
|
2013-07-08 10:49:05 +01:00
|
|
|
cl_device_id *mpArryDevsID;
|
2013-06-16 17:18:23 +01:00
|
|
|
FILE *fd, *fd1;
|
|
|
|
const char* filename = "kernel.cl";
|
2013-07-08 10:49:05 +01:00
|
|
|
fprintf(stderr, "CompileKernelFile ... \n");
|
2013-06-26 12:19:51 +01:00
|
|
|
if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
idx = gpuInfo->mnFileCount;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
source = kernel_src;
|
|
|
|
|
|
|
|
source_size[0] = strlen(source);
|
|
|
|
binaryExisted = 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) {
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
|
2013-06-16 17:18:23 +01:00
|
|
|
sizeof(numDevices), &numDevices, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
2013-07-08 10:49:05 +01:00
|
|
|
|
|
|
|
mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
|
|
|
|
if (mpArryDevsID == NULL) {
|
2013-06-16 17:18:23 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
b_error = 0;
|
|
|
|
length = 0;
|
|
|
|
b_error |= fseek(fd, 0, SEEK_END) < 0;
|
|
|
|
b_error |= (length = ftell(fd)) <= 0;
|
|
|
|
b_error |= fseek(fd, 0, SEEK_SET) < 0;
|
|
|
|
if (b_error) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-04 15:30:16 +03:00
|
|
|
binary = (char*) malloc(length);
|
2013-06-16 17:18:23 +01:00
|
|
|
if (!binary) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-04 15:30:16 +03:00
|
|
|
memset(binary, 0, length);
|
2013-06-16 17:18:23 +01:00
|
|
|
b_error |= fread(binary, 1, length, fd) != length;
|
|
|
|
|
|
|
|
fclose(fd);
|
|
|
|
fd = NULL;
|
|
|
|
// grab the handles to all of the devices in the context.
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
|
|
|
|
sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(gpuInfo->mpContext,
|
|
|
|
numDevices, mpArryDevsID, &length, (const unsigned char**) &binary,
|
2013-06-16 17:18:23 +01:00
|
|
|
&binary_status, &status);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
free(binary);
|
2013-07-08 10:49:05 +01:00
|
|
|
free(mpArryDevsID);
|
|
|
|
mpArryDevsID = NULL;
|
2013-06-16 17:18:23 +01:00
|
|
|
} else {
|
|
|
|
// create a CL program using the kernel source
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuEnv.mpArryPrograms[idx] = clCreateProgramWithSource(gpuEnv.mpContext,
|
2013-06-16 17:18:23 +01:00
|
|
|
1, &source, source_size, &status);
|
2013-06-26 12:19:51 +01:00
|
|
|
CHECK_OPENCL(status);
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
//char options[512];
|
|
|
|
// create a cl program executable for all the devices specified
|
2013-07-08 10:49:05 +01:00
|
|
|
if (!gpuInfo->mnIsUserCreated) {
|
|
|
|
status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
|
2013-06-26 12:19:51 +01:00
|
|
|
buildOption, NULL, NULL);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
} else {
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
|
2013-06-26 12:19:51 +01:00
|
|
|
buildOption, NULL, NULL);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
}
|
|
|
|
printf("BuildProgram.\n");
|
|
|
|
|
|
|
|
if (status != CL_SUCCESS) {
|
2013-07-08 10:49:05 +01:00
|
|
|
if (!gpuInfo->mnIsUserCreated) {
|
|
|
|
status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
|
|
|
|
gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
|
2013-06-16 17:18:23 +01:00
|
|
|
&length);
|
|
|
|
} else {
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
|
|
|
|
gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
if (status != CL_SUCCESS) {
|
|
|
|
printf("opencl create build log fail\n");
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
buildLog = (char*) malloc(length);
|
|
|
|
if (buildLog == (char*) NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
if (!gpuInfo->mnIsUserCreated) {
|
|
|
|
status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
|
|
|
|
gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, length,
|
2013-06-16 17:18:23 +01:00
|
|
|
buildLog, &length);
|
|
|
|
} else {
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
|
|
|
|
gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, length, buildLog,
|
2013-06-16 17:18:23 +01:00
|
|
|
&length);
|
|
|
|
}
|
|
|
|
|
|
|
|
fd1 = fopen("kernel-build.log", "w+");
|
|
|
|
if (fd1 != NULL) {
|
2013-07-04 13:37:35 +03:00
|
|
|
fwrite(buildLog, 1, length, fd1);
|
2013-06-16 17:18:23 +01:00
|
|
|
fclose(fd1);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(buildLog);
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
strcpy(gpuEnv.mArryKnelSrcFile[idx], filename);
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
if (binaryExisted == 0)
|
2013-07-08 10:49:05 +01:00
|
|
|
GeneratBinFromKernelSource(gpuEnv.mpArryPrograms[idx], filename);
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mnFileCount += 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
|
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
|
2013-06-16 17:18:23 +01:00
|
|
|
KernelEnv *env, cl_kernel_function *function) {
|
|
|
|
int i; //,program_idx ;
|
2013-07-08 10:49:05 +01:00
|
|
|
//printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
|
|
|
|
for (i = 0; i < gpuEnv.mnKernelCount; i++) {
|
|
|
|
if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0) {
|
|
|
|
env->mpkContext = gpuEnv.mpContext;
|
|
|
|
env->mpkCmdQueue = gpuEnv.mpCmdQueue;
|
|
|
|
env->mpkProgram = gpuEnv.mpArryPrograms[0];
|
|
|
|
env->mpkKernel = gpuEnv.mpArryKernels[i];
|
|
|
|
*function = gpuEnv.mpArryKnelFuncs[i];
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
}
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
|
2013-07-08 10:49:05 +01:00
|
|
|
KernelEnv kEnv;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
cl_kernel_function function;
|
|
|
|
|
|
|
|
int status;
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
memset(&kEnv, 0, sizeof(KernelEnv));
|
|
|
|
status = GetKernelEnvAndFunc(kernelName, &kEnv, &function);
|
|
|
|
strcpy(kEnv.mckKernelName, kernelName);
|
2013-06-16 17:18:23 +01:00
|
|
|
if (status == 1) {
|
2013-07-08 10:49:05 +01:00
|
|
|
if (&kEnv == (KernelEnv *) NULL
|
2013-06-16 17:18:23 +01:00
|
|
|
|| &function == (cl_kernel_function *) NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
return (function(userdata, &kEnv));
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-07-08 21:35:26 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
|
2013-06-16 17:18:23 +01:00
|
|
|
{
|
|
|
|
int status = 0;
|
|
|
|
if (MAX_CLKERNEL_NUM <= 0) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if ((argc > MAX_CLFILE_NUM) || (argc < 0)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!isInited) {
|
2013-06-26 12:19:51 +01:00
|
|
|
RegistOpenclKernel();
|
2013-06-16 17:18:23 +01:00
|
|
|
//initialize devices, context, comand_queue
|
2013-06-26 12:19:51 +01:00
|
|
|
status = InitOpenclRunEnv(&gpuEnv);
|
2013-06-16 17:18:23 +01:00
|
|
|
if (status) {
|
|
|
|
printf("init_opencl_env failed.\n");
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
printf("init_opencl_env successed.\n");
|
2013-06-26 12:19:51 +01:00
|
|
|
//initialize program, kernelName, kernelCount
|
|
|
|
status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles);
|
2013-07-08 10:49:05 +01:00
|
|
|
if (status == 0 || gpuEnv.mnKernelCount == 0) {
|
2013-06-26 12:19:51 +01:00
|
|
|
printf("CompileKernelFile failed.\n");
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
printf("CompileKernelFile successed.\n");
|
2013-06-16 17:18:23 +01:00
|
|
|
isInited = 1;
|
|
|
|
}
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
|
|
|
|
{
|
|
|
|
size_t length;
|
|
|
|
cl_int status;
|
|
|
|
cl_uint numPlatforms, numDevices;
|
|
|
|
cl_platform_id *platforms;
|
|
|
|
cl_context_properties cps[3];
|
|
|
|
char platformName[100];
|
|
|
|
unsigned int i;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
// Have a look at the available platforms.
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
if (!gpuInfo->mnIsUserCreated) {
|
2013-06-26 12:19:51 +01:00
|
|
|
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
|
|
|
if (status != CL_SUCCESS) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mpPlatformID = NULL;
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
if (0 < numPlatforms) {
|
|
|
|
platforms = (cl_platform_id*) malloc(
|
|
|
|
numPlatforms * sizeof(cl_platform_id));
|
|
|
|
if (platforms == (cl_platform_id*) NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
|
|
|
|
|
|
|
if (status != CL_SUCCESS) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < numPlatforms; i++) {
|
|
|
|
status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
|
|
|
|
sizeof(platformName), platformName, NULL);
|
|
|
|
|
|
|
|
if (status != CL_SUCCESS) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mpPlatformID = platforms[i];
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
//if (!strcmp(platformName, "Intel(R) Coporation"))
|
|
|
|
//if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mpPlatformID = platforms[i];
|
2013-06-26 12:19:51 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
|
2013-07-11 11:43:57 +03:00
|
|
|
CL_DEVICE_TYPE_ALL, // device_type
|
2013-07-08 10:49:05 +01:00
|
|
|
0, // num_entries
|
|
|
|
NULL, // devices
|
|
|
|
&numDevices);
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
if (status != CL_SUCCESS) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (numDevices) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(platforms);
|
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
if (NULL == gpuInfo->mpPlatformID) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Use available platform.
|
|
|
|
|
|
|
|
cps[0] = CL_CONTEXT_PLATFORM;
|
2013-07-08 10:49:05 +01:00
|
|
|
cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
|
2013-06-26 12:19:51 +01:00
|
|
|
cps[2] = 0;
|
|
|
|
// Check for GPU.
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
|
|
|
|
gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL,
|
2013-06-26 12:19:51 +01:00
|
|
|
NULL, &status);
|
|
|
|
|
2013-07-11 11:43:57 +03:00
|
|
|
// If no GPU, check for CPU.
|
2013-07-08 10:49:05 +01:00
|
|
|
if ((gpuInfo->mpContext == (cl_context) NULL)
|
2013-06-26 12:19:51 +01:00
|
|
|
|| (status != CL_SUCCESS)) {
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
|
|
|
|
gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
|
2013-06-26 12:19:51 +01:00
|
|
|
NULL, NULL, &status);
|
|
|
|
}
|
2013-07-11 11:43:57 +03:00
|
|
|
|
|
|
|
// If no GPU or CPU, check for a "default" type.
|
2013-07-08 10:49:05 +01:00
|
|
|
if ((gpuInfo->mpContext == (cl_context) NULL)
|
2013-06-26 12:19:51 +01:00
|
|
|
|| (status != CL_SUCCESS)) {
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
|
|
|
|
gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
|
2013-06-26 12:19:51 +01:00
|
|
|
NULL, NULL, &status);
|
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
if ((gpuInfo->mpContext == (cl_context) NULL)
|
2013-06-26 12:19:51 +01:00
|
|
|
|| (status != CL_SUCCESS)) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
// Detect OpenCL devices.
|
|
|
|
// First, get the size of device list data
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0,
|
2013-06-26 12:19:51 +01:00
|
|
|
NULL, &length);
|
|
|
|
if ((status != CL_SUCCESS) || (length == 0)) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
// Now allocate memory for device list based on the size we got earlier
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mpArryDevsID = (cl_device_id*) malloc(length);
|
|
|
|
if (gpuInfo->mpArryDevsID == (cl_device_id*) NULL) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
// Now, get the device list data
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
|
|
|
|
gpuInfo->mpArryDevsID, NULL);
|
2013-06-26 12:19:51 +01:00
|
|
|
if (status != CL_SUCCESS) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create OpenCL command queue.
|
2013-07-08 10:49:05 +01:00
|
|
|
gpuInfo->mpCmdQueue = clCreateCommandQueue(gpuInfo->mpContext,
|
|
|
|
gpuInfo->mpArryDevsID[0], 0, &status);
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
if (status != CL_SUCCESS) {
|
2013-06-26 22:15:19 +01:00
|
|
|
return 1;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function)
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
int i;
|
|
|
|
//printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.mnKernelCount);
|
|
|
|
for (i = 0; i < gpuEnv.mnKernelCount; i++)
|
|
|
|
{
|
|
|
|
if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0)
|
|
|
|
{
|
|
|
|
gpuEnv.mpArryKnelFuncs[i] = function;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2013-06-26 22:15:19 +01:00
|
|
|
return 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
|
|
|
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 13:07:47 +01:00
|
|
|
void OpenclDevice::SetOpenclState(int state)
|
|
|
|
{
|
2013-06-26 12:19:51 +01:00
|
|
|
//printf("OpenclDevice::setOpenclState...\n");
|
2013-06-16 17:18:23 +01:00
|
|
|
isInited = state;
|
|
|
|
}
|
|
|
|
|
2013-06-26 13:07:47 +01:00
|
|
|
int OpenclDevice::GetOpenclState()
|
|
|
|
{
|
2013-06-16 17:18:23 +01:00
|
|
|
return isInited;
|
|
|
|
}
|
|
|
|
//ocldbg
|
|
|
|
|
|
|
|
int OclFormulax(void ** usrdata, KernelEnv *env) {
|
|
|
|
fprintf(stderr, "In OpenclDevice,...Formula_proc\n");
|
|
|
|
cl_int clStatus;
|
|
|
|
int status;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
float tdata[NUM];
|
|
|
|
|
|
|
|
double *data = (double *) usrdata[0];
|
|
|
|
const formulax type = *((const formulax *) usrdata[1]);
|
|
|
|
double ret = 0.0;
|
|
|
|
|
|
|
|
for (int i = 0; i < NUM; i++) {
|
|
|
|
tdata[i] = (float) data[i];
|
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
|
2013-06-16 17:18:23 +01:00
|
|
|
//printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
|
|
|
|
int size = NUM;
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
cl_mem formula_data = clCreateBuffer(env->mpkContext,
|
2013-06-16 17:18:23 +01:00
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
|
|
|
|
size * sizeof(float), (void *) tdata, &clStatus);
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
|
2013-06-16 17:18:23 +01:00
|
|
|
(void *) &formula_data);
|
|
|
|
CHECK_OPENCL(status)
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
|
2013-06-16 17:18:23 +01:00
|
|
|
(void *) &type);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...after global_work_size\n");
|
|
|
|
//PPAStartCpuEvent(ppa_proc);
|
|
|
|
|
|
|
|
while (global_work_size[0] != 1) {
|
|
|
|
global_work_size[0] = global_work_size[0] / 2;
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
|
2013-06-16 17:18:23 +01:00
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
}
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
|
2013-06-16 17:18:23 +01:00
|
|
|
sizeof(float), (void *) &tdata, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clFinish(env->mpkCmdQueue);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
//PPAStopCpuEvent(ppa_proc);
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clReleaseKernel(env->mpkKernel);
|
2013-06-16 17:18:23 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
status = clReleaseMemObject(formula_data);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
if (type == AVG)
|
|
|
|
ret = (double) tdata[0] / NUM;
|
|
|
|
else
|
|
|
|
ret = (double) tdata[0];
|
|
|
|
|
|
|
|
printf("size = %d ret = %f.\n\n", NUM, ret);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
|
|
|
|
|
|
|
|
fprintf(stderr, "In OclFormulaxDll...\n");
|
|
|
|
cl_int clStatus;
|
|
|
|
int status;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
float tdata[NUM];
|
|
|
|
|
|
|
|
double *data = (double *) usrdata[0];
|
|
|
|
const formulax type = *((const formulax *) usrdata[1]);
|
|
|
|
double ret = 0.0;
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
for (int i = 0; i < NUM; i++) {
|
|
|
|
tdata[i] = (float) data[i];
|
|
|
|
}
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
|
2013-06-26 12:19:51 +01:00
|
|
|
//printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
|
|
|
|
int size = NUM;
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
cl_mem formula_data = clCreateBuffer(env->mpkContext,
|
2013-06-26 12:19:51 +01:00
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
|
|
|
|
size * sizeof(float), (void *) tdata, &clStatus);
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
|
2013-06-26 12:19:51 +01:00
|
|
|
(void *) &formula_data);
|
|
|
|
CHECK_OPENCL(status)
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
|
2013-06-26 12:19:51 +01:00
|
|
|
(void *) &type);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...after global_work_size\n");
|
|
|
|
//PPAStartCpuEvent(ppa_proc);
|
|
|
|
|
|
|
|
while (global_work_size[0] != 1) {
|
|
|
|
global_work_size[0] = global_work_size[0] / 2;
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
|
2013-06-26 12:19:51 +01:00
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
}
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
|
2013-06-26 12:19:51 +01:00
|
|
|
sizeof(float), (void *) &tdata, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status)
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clFinish(env->mpkCmdQueue);
|
2013-06-26 12:19:51 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
//PPAStopCpuEvent(ppa_proc);
|
|
|
|
//fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
|
2013-07-08 10:49:05 +01:00
|
|
|
status = clReleaseKernel(env->mpkKernel);
|
2013-06-26 12:19:51 +01:00
|
|
|
CHECK_OPENCL(status)
|
|
|
|
status = clReleaseMemObject(formula_data);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
|
|
|
|
if (type == AVG)
|
|
|
|
ret = (double) tdata[0] / NUM;
|
|
|
|
else
|
|
|
|
ret = (double) tdata[0];
|
|
|
|
|
|
|
|
printf("OclFormulaxDllxx:size = %d ret = %f.\n\n", NUM, ret);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2013-06-26 13:07:47 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax type)
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
fprintf(stderr, "\n OpenclDevice, proc...begin\n");
|
|
|
|
double ret = 0;
|
|
|
|
void *usrdata[2];
|
|
|
|
usrdata[0] = (void *) data;
|
|
|
|
usrdata[1] = (void *) &type;
|
|
|
|
RunKernelWrapper(function, "oclformula", usrdata);
|
|
|
|
return ret;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
double OclCalc::OclTest() {
|
|
|
|
double data[NUM];
|
|
|
|
|
|
|
|
for (int i = 0; i < NUM; i++) {
|
2013-06-16 17:55:11 +01:00
|
|
|
data[i] = sc::rng::uniform();
|
2013-06-16 17:18:23 +01:00
|
|
|
fprintf(stderr, "%f\t", data[i]);
|
|
|
|
}
|
|
|
|
OclProcess(&OclFormulax, data, AVG);
|
2013-06-26 12:19:51 +01:00
|
|
|
return 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
double OclCalc::OclTestDll() {
|
|
|
|
double data[NUM];
|
2013-06-16 17:18:23 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
for (int i = 0; i < NUM; i++) {
|
2013-06-26 12:52:53 +01:00
|
|
|
data[i] = sc::rng::uniform();
|
2013-06-26 12:19:51 +01:00
|
|
|
fprintf(stderr, "%f\t", data[i]);
|
|
|
|
}
|
|
|
|
OclProcess(&OclFormulaxDll, data, AVG);
|
2013-06-16 17:18:23 +01:00
|
|
|
return 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
OclCalc::OclCalc()
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
fprintf(stderr,"OclCalc:: init opencl ...\n");
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
OclCalc::~OclCalc()
|
|
|
|
{
|
2013-07-08 10:49:05 +01:00
|
|
|
fprintf(stderr,"OclCalc:: opencl end ...\n");
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-06-26 13:07:47 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
2013-07-08 10:49:05 +01:00
|
|
|
#ifdef GPU_64BITS
|
|
|
|
int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size)
|
|
|
|
{
|
|
|
|
KernelEnv kEnv;
|
|
|
|
const char *kernelName = "oclFormulaMax";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
int alignSize = size + end[0]-start[0];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
cl_int ret=0;
|
|
|
|
cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
alignSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size * sizeof(unsigned int), NULL, &ret);
|
|
|
|
cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size * sizeof(unsigned int), NULL, &ret);
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size* sizeof(float), NULL, &ret);
|
|
|
|
|
|
|
|
float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
|
|
|
|
int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
{
|
|
|
|
hostMapStart[i] = start[i];
|
|
|
|
hostMapEnd[i] = end[i];
|
|
|
|
}
|
|
|
|
for(int i=0;i<alignSize;i++)
|
|
|
|
hostMapSrc[i] = (float)srcData[i];
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&inputCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&startCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&endCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
|
|
|
|
(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
output[i]=outPutMap[i];
|
|
|
|
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(inputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(startCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(endCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 21:35:26 +01:00
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size)
|
|
|
|
{
|
|
|
|
KernelEnv kEnv;
|
|
|
|
const char *kernelName = "oclFormulaMin";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
int alignSize = size + end[0]-start[0];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
cl_int ret=0;
|
|
|
|
cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
alignSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size * sizeof(unsigned int), NULL, &ret);
|
|
|
|
cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size * sizeof(unsigned int), NULL, &ret);
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size* sizeof(float), NULL, &ret);
|
|
|
|
|
|
|
|
float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
|
|
|
|
int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
{
|
|
|
|
hostMapStart[i] = start[i];
|
|
|
|
hostMapEnd[i] = end[i];
|
|
|
|
}
|
|
|
|
for(int i=0;i<alignSize;i++)
|
|
|
|
hostMapSrc[i] = (float)srcData[i];
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&inputCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&startCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&endCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
|
|
|
|
(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
output[i]=outPutMap[i];
|
|
|
|
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(inputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(startCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(endCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size)
|
|
|
|
{
|
|
|
|
KernelEnv kEnv;
|
|
|
|
const char *kernelName = "oclFormulaAverage";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
int alignSize = size + end[0]-start[0];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
cl_int ret=0;
|
|
|
|
cl_mem inputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
alignSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
size * sizeof(unsigned int), NULL, &ret);
|
|
|
|
cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
size * sizeof(unsigned int), NULL, &ret);
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
size* sizeof(float), NULL, &ret);
|
|
|
|
|
2013-07-13 22:54:17 +03:00
|
|
|
float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
|
|
|
|
int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
|
2013-07-08 10:49:05 +01:00
|
|
|
#if 1
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
{
|
|
|
|
hostMapStart[i] = start[i];
|
|
|
|
hostMapEnd[i] = end[i];
|
|
|
|
}
|
|
|
|
for(int i=0;i<alignSize;i++)
|
|
|
|
hostMapSrc[i] = (float)srcData[i];
|
|
|
|
//memcpy(hostMapSrc,srcData,alignSize * sizeof(float));
|
|
|
|
#endif
|
|
|
|
for(sal_Int32 i = 0; i < alignSize; ++i){//dbg
|
|
|
|
fprintf(stderr,"In avg host,hostMapSrc[%d] is ...%f\n",i,hostMapSrc[i]);
|
|
|
|
}
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc, 0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl, hostMapEnd, 0,NULL,NULL);
|
2013-06-26 12:19:51 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&inputCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&startCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&endCl);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
|
|
|
|
(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
|
|
|
|
for(int i=0;i<size;i++){
|
|
|
|
//fprintf(stderr,"In avg host,outPutMap[%d] is ...%f\n",i,outPutMap[i]);
|
|
|
|
output[i]=outPutMap[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(inputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(startCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(endCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
|
|
|
|
int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) {
|
|
|
|
|
|
|
|
KernelEnv kEnv;
|
|
|
|
int status;
|
|
|
|
const char *kernelName = "oclSignedAdd";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
|
|
|
|
float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
{
|
|
|
|
hostMapLeftData[i] = (float)lData[i];
|
|
|
|
hostMapRightData[i] = (float)rData[i];
|
|
|
|
}
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
|
|
|
|
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&clLiftData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&clRightData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clResult);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
global_work_size[0] = dSize;
|
|
|
|
status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clLiftData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clRightData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
return 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OclCalc::OclHostSignedSub(double *lData,double *rData,double *rResult,int dSize) {
|
2013-07-08 10:49:05 +01:00
|
|
|
KernelEnv kEnv;
|
|
|
|
int status;
|
|
|
|
const char *kernelName = "oclSignedSub";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
|
|
|
|
float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
{
|
|
|
|
hostMapLeftData[i] = (float)lData[i];
|
|
|
|
hostMapRightData[i] = (float)rData[i];
|
|
|
|
}
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
|
|
|
|
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&clLiftData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&clRightData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clResult);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
global_work_size[0] = dSize;
|
|
|
|
status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clLiftData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clRightData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
return 0;
|
2013-06-26 12:19:51 +01:00
|
|
|
}
|
2013-07-08 10:49:05 +01:00
|
|
|
|
|
|
|
int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) {
|
|
|
|
KernelEnv kEnv;
|
|
|
|
int status;
|
|
|
|
const char *kernelName = "oclSignedMul";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
|
|
|
|
size_t global_work_size[1];
|
|
|
|
cl_int clStatus;
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
|
|
|
|
cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
|
|
|
|
float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
{
|
|
|
|
hostMapLeftData[i] = (float)lData[i];
|
|
|
|
hostMapRightData[i] = (float)rData[i];
|
|
|
|
}
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
|
|
|
|
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&clLiftData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&clRightData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clResult);
|
|
|
|
CHECK_OPENCL(status)
|
|
|
|
global_work_size[0] = dSize;
|
|
|
|
status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clLiftData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clRightData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-26 12:19:51 +01:00
|
|
|
int OclCalc::OclHostSignedDiv(double *lData,double *rData,double *rResult,int dSize) {
|
2013-07-08 10:49:05 +01:00
|
|
|
KernelEnv kEnv;
|
|
|
|
int status;
|
|
|
|
const char *kernelName = "oclSignedDiv";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
dSize * sizeof(float), NULL, &clStatus);
|
|
|
|
|
|
|
|
float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
{
|
|
|
|
hostMapLeftData[i] = (float)lData[i];
|
|
|
|
hostMapRightData[i] = (float)rData[i];
|
|
|
|
}
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
|
|
|
|
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&clLiftData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&clRightData);
|
|
|
|
status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clResult);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
global_work_size[0] = dSize;
|
|
|
|
status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<dSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clLiftData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clRightData);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
status = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(status);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif // GPU_64BITS
|
|
|
|
int OclCalc::CreateBuffer(float *&fpSrcData,uint *&npStartPos,uint *&npEndPos,int nBufferSize)
|
|
|
|
{
|
|
|
|
cl_int clStatus = 0;
|
|
|
|
SetKernelEnv(&kEnv);
|
|
|
|
|
|
|
|
mpClmemSrcData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
nBufferSize * sizeof(float), NULL, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
mpClmemStartPos = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
nBufferSize * sizeof(unsigned int), NULL, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
mpClmemEndPos = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
nBufferSize * sizeof(unsigned int), NULL, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
2013-07-13 22:54:17 +03:00
|
|
|
fpSrcData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemSrcData,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus);
|
2013-07-08 10:49:05 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-13 22:54:17 +03:00
|
|
|
npStartPos = (uint *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemStartPos,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,nBufferSize * sizeof(uint),0,NULL,NULL,&clStatus);
|
2013-07-08 10:49:05 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-13 22:54:17 +03:00
|
|
|
npEndPos = (uint *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemEndPos,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,nBufferSize * sizeof(uint),0,NULL,NULL,&clStatus);
|
2013-07-08 10:49:05 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
//printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int OclCalc::CreateBuffer(float *&fpLeftData,float *&fpRightData,int nBufferSize)
|
|
|
|
{
|
|
|
|
cl_int clStatus = 0;
|
|
|
|
SetKernelEnv(&kEnv);
|
|
|
|
|
|
|
|
mpClmemLeftData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
nBufferSize * sizeof(float), NULL, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
mpClmemRightData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
nBufferSize * sizeof(unsigned int), NULL, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-13 22:54:17 +03:00
|
|
|
fpLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemLeftData,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus);
|
2013-07-08 10:49:05 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-13 22:54:17 +03:00
|
|
|
fpRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemRightData,CL_TRUE,OPENCLWRAPPER_CL_MAP_WRITE_FLAG,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus);
|
2013-07-08 10:49:05 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
//printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int OclCalc::OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
//int alignSize = size + end[0]-start[0];
|
|
|
|
//for(int u=0;u < size;u++)
|
|
|
|
//fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]);
|
|
|
|
const char *kernelName = "oclFormulaMax";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
size* sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue,
|
|
|
|
kEnv.mpkKernel,
|
|
|
|
1,
|
|
|
|
NULL,
|
|
|
|
global_work_size,
|
|
|
|
NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
|
|
|
|
outputCl,
|
|
|
|
CL_TRUE,CL_MAP_READ,
|
|
|
|
0,
|
|
|
|
size*sizeof(float),
|
|
|
|
0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
output[i]=outputMap[i];// from gpu float type to cpu double type
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemSrcData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemStartPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemEndPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
int OclCalc::OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
//int alignSize = size + end[0]-start[0];
|
|
|
|
//for(int u=0;u < size;u++)
|
|
|
|
//fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]);
|
|
|
|
const char *kernelName = "oclFormulaMin";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
size* sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue,
|
|
|
|
kEnv.mpkKernel,
|
|
|
|
1,
|
|
|
|
NULL,
|
|
|
|
global_work_size,
|
|
|
|
NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
|
|
|
|
outputCl,
|
|
|
|
CL_TRUE,CL_MAP_READ,
|
|
|
|
0,
|
|
|
|
size*sizeof(float),
|
|
|
|
0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
output[i]=outputMap[i];// from gpu float type to cpu double type
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemSrcData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemStartPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemEndPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
2013-06-16 17:18:23 +01:00
|
|
|
}
|
|
|
|
|
2013-07-08 10:49:05 +01:00
|
|
|
int OclCalc::OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
//int alignSize = size + end[0]-start[0];
|
|
|
|
//for(int u=0;u < size;u++)
|
|
|
|
//fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]);
|
|
|
|
const char *kernelName = "oclFormulaAverage";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
size* sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = size;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue,
|
|
|
|
kEnv.mpkKernel,
|
|
|
|
1,
|
|
|
|
NULL,
|
|
|
|
global_work_size,
|
|
|
|
NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
|
|
|
|
outputCl,
|
|
|
|
CL_TRUE,CL_MAP_READ,
|
|
|
|
0,
|
|
|
|
size*sizeof(float),
|
|
|
|
0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<size;i++)
|
|
|
|
output[i]=outputMap[i];// from gpu float type to cpu double type
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemSrcData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemStartPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemEndPos);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int OclCalc::OclHostSignedAdd32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
const char *kernelName = "oclSignedAdd";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
nRowSize * sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nRowSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<nRowSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
int OclCalc::OclHostSignedSub32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
const char *kernelName = "oclSignedSub";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
nRowSize * sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nRowSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<nRowSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
int OclCalc::OclHostSignedMul32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
const char *kernelName = "oclSignedMul";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
nRowSize * sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nRowSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<nRowSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
int OclCalc::OclHostSignedDiv32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)
|
|
|
|
{
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
const char *kernelName = "oclSignedDiv";
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
nRowSize * sizeof(float),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nRowSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
for(int i=0;i<nRowSize;i++)
|
|
|
|
rResult[i]=hostMapResult[i];
|
|
|
|
clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemLeftData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(mpClmemRightData);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clResult);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef FORMULAE_DEBUG
|
|
|
|
int OclCalc::OclHostFormulaSum(float *dpsrcData,int *npStart,int *npEnd,float *dpOutput,int nSize) {
|
|
|
|
KernelEnv kEnv;
|
|
|
|
const char *cpKernelName = "oclFormulaSum";
|
|
|
|
CheckKernelName(&kEnv,cpKernelName);
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
if(nSize < 1 )
|
|
|
|
{
|
|
|
|
printf("The nSize should be bigger than one\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
int nAlignSize = npEnd[nSize-1]-npStart[0]+1;
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
|
|
|
|
cl_mem clpInput = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),
|
|
|
|
nAlignSize * sizeof(float), (void *)dpsrcData, &clStatus);
|
|
|
|
cl_mem clpStart = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR ),
|
|
|
|
nSize * sizeof(unsigned int), (void *)npStart, &clStatus);
|
|
|
|
cl_mem clpEnd = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),
|
|
|
|
nSize * sizeof(unsigned int), (void *)npEnd, &clStatus);
|
|
|
|
cl_mem clpOutput = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR),
|
|
|
|
nSize* sizeof(float), NULL, &clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&clpInput);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&clpStart);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clpEnd);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
|
|
|
|
(void *)&clpOutput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
float * fpOutPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clpOutput,CL_TRUE,CL_MAP_READ,0,nSize*sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
for(int i=0;i<nSize;i++)
|
|
|
|
dpOutput[i]=fpOutPutMap[i];
|
|
|
|
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clpOutput,fpOutPutMap,0,NULL,NULL);
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpInput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpStart);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpEnd);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpOutput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int OclCalc::OclHostFormulaCount(int *npStart,int *npEnd,float *dpOutput,int nSize) {
|
|
|
|
KernelEnv kEnv;
|
|
|
|
const char *cpKernelName = "oclFormulaCount";
|
|
|
|
CheckKernelName(&kEnv,cpKernelName);
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
|
|
|
|
cl_mem clpStart = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR ),
|
|
|
|
nSize * sizeof(unsigned int), (void *)npEnd, &clStatus);
|
|
|
|
cl_mem clpEnd = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR),
|
|
|
|
nSize * sizeof(unsigned int), (void *)dpOutput, &clStatus);
|
|
|
|
cl_mem clpOutput = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_ONLY),
|
|
|
|
nSize * sizeof(float), (void *)npStart, &clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&clpStart);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&clpEnd);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clpOutput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus)
|
|
|
|
|
|
|
|
clStatus=clEnqueueReadBuffer(kEnv.mpkCmdQueue,clpOutput,CL_TRUE,0,nSize*sizeof(float),(void *)dpOutput,0,NULL,NULL);
|
|
|
|
if(clStatus!=0)
|
|
|
|
{
|
|
|
|
printf("clEnqueueReadBuffer err\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpStart);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpEnd);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpOutput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The dpsrcData is double rows,one col is the first column data,second is the second column data.if get a cell data range,the range
|
|
|
|
*save the npStart array eg:a4-a8;b10-b14,the npStart will store a4,b10,and the npEnd will store a8,b14 range.So it can if(i +1)%2 to judge
|
|
|
|
* the a cloumn or b cloumn npStart range.so as b bolumn.
|
|
|
|
*/
|
|
|
|
int OclCalc::OclHostFormulaSumProduct(float *dpSrcData,int *npStart,int *npEnd,float *dpOutput,int nSize) {
|
|
|
|
KernelEnv kEnv;
|
|
|
|
const char *cpKernelName = "oclFormulaSumproduct";
|
|
|
|
CheckKernelName(&kEnv,cpKernelName);
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
|
|
|
|
cl_mem clpFirstCol = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE ),
|
|
|
|
nSize * sizeof(float),NULL, &clStatus);
|
|
|
|
cl_mem clpSecondCol = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
nSize * sizeof(float),NULL, &clStatus);
|
|
|
|
cl_mem clpStart = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR ),
|
|
|
|
nSize *2 * sizeof(unsigned int), (void *)npStart, &clStatus);
|
|
|
|
cl_mem clpEnd = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR ),
|
|
|
|
nSize *2 * sizeof(unsigned int),(void *)npEnd , &clStatus);
|
|
|
|
cl_mem clpOutput = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY),
|
|
|
|
nSize* sizeof(float), (void *)dpOutput, &clStatus);
|
|
|
|
|
|
|
|
float * fpHostMapFirCol = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clpFirstCol,CL_TRUE,CL_MAP_WRITE,0,nSize * sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
//checkUpPoint("hostMapSrc",hostMapSrc);
|
|
|
|
float * fpHostMapSecCol = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clpSecondCol,CL_TRUE,CL_MAP_WRITE,0,nSize * sizeof(float),0,NULL,NULL,NULL);
|
|
|
|
//checkUpPoint("hostMapSrc",hostMapSrc);
|
|
|
|
if(NULL==fpHostMapFirCol||NULL==fpHostMapSecCol)
|
|
|
|
{
|
|
|
|
printf("In File %s at %d line alloc err\n",__FILE__,__LINE__);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
for(int i=0;i<nSize;i++)
|
|
|
|
{
|
|
|
|
fpHostMapFirCol[i] = dpSrcData[2*i];
|
|
|
|
fpHostMapSecCol[i] = dpSrcData[2*i+1];
|
|
|
|
}
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clpFirstCol,fpHostMapFirCol,0,NULL,NULL);
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clpSecondCol,fpHostMapSecCol,0,NULL,NULL);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&fpHostMapFirCol);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&fpHostMapSecCol);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
|
|
|
|
(void *)&clpStart);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
|
|
|
|
(void *)&clpEnd);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem),
|
|
|
|
(void *)&clpOutput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
global_work_size[0] = nSize;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus)
|
|
|
|
|
|
|
|
clStatus=clEnqueueReadBuffer(kEnv.mpkCmdQueue,clpOutput,CL_TRUE,0,nSize*sizeof(float),(void *)dpOutput,0,NULL,NULL);
|
|
|
|
if(clStatus!=0)
|
|
|
|
{
|
|
|
|
printf("clEnqueueReadBuffer err\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpFirstCol);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpSecondCol);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpStart);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpEnd);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(clpOutput);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-07-08 21:35:26 +01:00
|
|
|
#if 0
|
|
|
|
typedef double fp_;
|
|
|
|
#else
|
|
|
|
typedef float fp_t;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// FIXME: should be templatised in <double> - double buffering [sic] rocks
|
|
|
|
static cl_mem allocateDoubleBuffer(KernelEnv &rEnv, const double *_pValues,
|
|
|
|
size_t nElements, cl_int *pStatus)
|
|
|
|
{
|
|
|
|
// Ugh - horrible redundant copying ...
|
|
|
|
cl_mem xValues = clCreateBuffer(rEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
|
|
|
|
nElements * sizeof(double), NULL, pStatus);
|
|
|
|
fp_t *pValues = (fp_t *)clEnqueueMapBuffer(rEnv.mpkCmdQueue,xValues,CL_TRUE,CL_MAP_WRITE,0,
|
|
|
|
nElements * sizeof(fp_t),0,NULL,NULL,NULL);
|
2013-07-11 10:05:55 +03:00
|
|
|
for(size_t i=0;i<nElements;i++)
|
2013-07-08 21:35:26 +01:00
|
|
|
pValues[i] = (fp_t)_pValues[i];
|
|
|
|
|
|
|
|
clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
|
|
|
|
|
|
|
|
return xValues;
|
|
|
|
}
|
|
|
|
|
|
|
|
double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray,
|
|
|
|
const double *pSubtractSingle, size_t nElements)
|
|
|
|
{
|
|
|
|
KernelEnv kEnv;
|
2013-07-08 21:49:31 +01:00
|
|
|
SetKernelEnv(&kEnv);
|
2013-07-08 21:35:26 +01:00
|
|
|
|
|
|
|
// select a kernel: cut & paste coding is utterly evil.
|
|
|
|
const char *kernelName;
|
|
|
|
switch (eOp) {
|
|
|
|
case ocMax:
|
|
|
|
case ocMin:
|
|
|
|
; // FIXME: fallthrough for now
|
|
|
|
case ocAverage:
|
|
|
|
kernelName = "oclAverageDelta";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
CheckKernelName(&kEnv,kernelName);
|
|
|
|
|
|
|
|
cl_int clStatus;
|
|
|
|
size_t global_work_size[1];
|
|
|
|
|
|
|
|
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
|
2013-07-09 12:02:43 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-08 21:35:26 +01:00
|
|
|
|
|
|
|
// Ugh - horrible redundant copying ...
|
|
|
|
cl_mem valuesCl = allocateDoubleBuffer(kEnv, pOpArray, nElements, &clStatus);
|
|
|
|
cl_mem subtractCl = allocateDoubleBuffer(kEnv, pSubtractSingle, nElements, &clStatus);
|
|
|
|
|
|
|
|
cl_int start = 0;
|
|
|
|
cl_int end = (cl_int) nElements;
|
|
|
|
cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
|
|
|
|
CL_MEM_READ_WRITE,
|
|
|
|
nElements * sizeof(fp_t),
|
|
|
|
NULL,
|
|
|
|
&clStatus);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
|
|
|
|
(void *)&valuesCl);
|
2013-07-09 12:02:43 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-08 21:35:26 +01:00
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
|
|
|
|
(void *)&subtractCl);
|
2013-07-09 12:02:43 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_int),
|
2013-07-08 21:35:26 +01:00
|
|
|
(void *)&start);
|
2013-07-09 12:02:43 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_int),
|
2013-07-08 21:35:26 +01:00
|
|
|
(void *)&end);
|
2013-07-09 12:02:43 +01:00
|
|
|
CHECK_OPENCL(clStatus);
|
2013-07-08 21:35:26 +01:00
|
|
|
clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem),
|
|
|
|
(void *)&outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
fprintf(stderr, "prior to enqueue range kernel\n");
|
|
|
|
|
|
|
|
global_work_size[0] = nElements;
|
|
|
|
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
|
|
|
|
NULL, global_work_size, NULL, 0, NULL, NULL);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
double *pResult = new double[nElements];
|
|
|
|
if(!pResult)
|
|
|
|
return NULL; // leak.
|
|
|
|
|
|
|
|
fp_t *pOutput = (fp_t *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,
|
|
|
|
CL_MAP_READ,0,nElements*sizeof(fp_t),
|
|
|
|
0,NULL,NULL,NULL);
|
2013-07-11 10:05:55 +03:00
|
|
|
for(size_t i = 0; i < nElements; i++)
|
2013-07-08 21:35:26 +01:00
|
|
|
pResult[i] = (double)pOutput[i];
|
|
|
|
|
|
|
|
clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,pOutput,0,NULL,NULL);
|
|
|
|
|
|
|
|
clStatus = clFinish(kEnv.mpkCmdQueue);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseKernel(kEnv.mpkKernel);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(valuesCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(subtractCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
clStatus = clReleaseMemObject(outputCl);
|
|
|
|
CHECK_OPENCL(clStatus);
|
|
|
|
|
|
|
|
fprintf(stderr, "completed opencl delta operation\n");
|
|
|
|
|
|
|
|
return pResult;
|
|
|
|
}
|
|
|
|
|
2013-06-16 17:18:23 +01:00
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|