c++ - Kernel doesn't wait for events -
i have problem kernel invocation. code looks this:
std::vector<cl::event> events; ... queue.enqueuewritebuffer(arrayfirst, cl_false, 0, sizeofarray, null, null, &arrayevent); events.push_back(arrayevent); queue.enqueuewritebuffer(arraysecond, cl_false, 0, sizeofarraysecond, this->arraysecond, null, &arraysecondevent); events.push_back(arraysecondevent); kernel(cl::enqueueargs(queue, events, cl::ndrange(512), cl::ndrange(128)), arrayfirst, arraysecond);
and when run it, doesn't go inside kernel code, when change "make_kernel" invocation this:
kernel(cl::enqueueargs(queue, arraysecondevent, cl::ndrange(512), cl::ndrange(128)), arrayfirst, arraysecond);
it goes inside kernel, don't have surety memory "arrayfirst" allocated correctly, check documentation of opencl 1.2 wrapper , found invocation should looks this:
cl::enqueueargs::enqueueargs(commandqueue &queue, const vector_class<event> &events, ndrange offset, ndrange global, ndrange local) //page 42
but when try pass address of events vector compilation error, there no suitable method following arguments..
error:
error: no instance of constructor "cl::enqueueargs::enqueueargs" matches argument list argument types are: (cl::commandqueue, std::vector<cl::event, std::allocator<cl::event>> *, cl::ndrange, cl::ndrange) valueofimagekernel(cl::enqueueargs(valueofimagequeue, &events, cl::ndrange(512), cl::ndrange(128)),
anyone have idea i'm doing wrong?
it seems below line don't need causing trouble: queue.enqueuewritebuffer(arrayfirst, cl_false, 0, sizeofarray, null, null, &arrayevent);
need call write buffer when have write there not case here.
below full working example, tested on phi (just clarity error handling omitted):
#include <iostream> #include <vector> #include <string> #include <cl/cl.hpp> int main() { const char *kernel_str{ "kernel void k1(global int *data1, global int *data2){" " int local_id = get_local_id(0);" " data1[local_id] = data2[local_id] + data2[local_id];" "}" }; cl_int err = cl_success; std::vector<cl::platform> platforms; cl::platform::get(&platforms); cl::platform plat; (auto &p : platforms) { std::vector<cl::device> devices; p.getdevices(cl_device_type_accelerator, &devices); if (!devices.empty()) { plat = p; break; } } if (plat() == 0) { std::cout << "no opencl platform found."; return -1; } cl_context_properties properties[] = { cl_context_platform, (cl_context_properties)(plat)(), 0 }; cl::context context(cl_device_type_accelerator, properties); std::vector<cl::device> devices = context.getinfo<cl_context_devices>(); cl::program::sources source(1, std::make_pair(kernel_str, strlen(kernel_str))); cl::program program = cl::program(context, source); err = program.build(devices); cl::commandqueue queue(context, devices[0], 0, &err); size_t sizeofarray = 512, sizeofarraysecond = 512; std::vector<int> varrayfirst(sizeofarray); std::vector<int> varraysecond(sizeofarraysecond); (size_t x = 0; x < sizeofarraysecond; ++x) varraysecond[x] = x; cl::buffer arrayfirst(context, cl_mem_write_only, sizeofarray*sizeof(varrayfirst[0])); cl::buffer arraysecond(context, cl_mem_read_only, sizeofarraysecond*sizeof(varraysecond[0])); cl::event arraysecondevent; std::vector<cl::event> events; err = queue.enqueuewritebuffer(arraysecond, cl_false, 0, sizeofarraysecond*sizeof(varraysecond[0]), &varraysecond[0], null, &arraysecondevent); events.push_back(arraysecondevent); cl::make_kernel<cl::buffer&, cl::buffer&> kernel(program, "k1"); cl::event ev = kernel(cl::enqueueargs(queue, events, cl::ndrange(512), cl::ndrange(128)), arrayfirst, arraysecond); std::vector<cl::event> evs(1, ev); err = queue.enqueuereadbuffer(arrayfirst, cl_true, 0, sizeofarray*sizeof(varrayfirst[0]), &varrayfirst[0], &evs); // final blocking read std::cout << "outputting first 10 values: " << std::endl; (int x = 0; x < 10; ++x) std::cout << varrayfirst[x] << ", "; std::cout << std::endl; return 0; }
output:
outputting first 10 values: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18,
Comments
Post a Comment