Intel(R) Threading Building Blocks Doxygen Documentation version 4.2.3
flow_graph_opencl_node.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
18
19#if !defined(__TBB_show_deprecation_message_flow_graph_opencl_node_H) && defined(__TBB_show_deprecated_header_message)
20#define __TBB_show_deprecation_message_flow_graph_opencl_node_H
21#pragma message("TBB Warning: tbb/flow_graph_opencl_node.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
22#endif
23
24#if defined(__TBB_show_deprecated_header_message)
25#undef __TBB_show_deprecated_header_message
26#endif
27
28#ifndef __TBB_flow_graph_opencl_node_H
29#define __TBB_flow_graph_opencl_node_H
30
31#define __TBB_flow_graph_opencl_node_H_include_area
33
34#include "tbb/tbb_config.h"
35#if __TBB_PREVIEW_OPENCL_NODE
36
37#include "flow_graph.h"
38
39#include <vector>
40#include <string>
41#include <algorithm>
42#include <iostream>
43#include <fstream>
44#include <map>
45#include <mutex>
46
47#ifdef __APPLE__
48#include <OpenCL/opencl.h>
49#else
50#include <CL/cl.h>
51#endif
52
53namespace tbb {
54namespace flow {
55
56namespace interface11 {
57
58template <typename DeviceFilter>
59class opencl_factory;
60
61namespace opencl_info {
63}
64
65template <typename Factory>
66class opencl_program;
67
68inline void enforce_cl_retcode(cl_int err, std::string msg) {
69 if (err != CL_SUCCESS) {
70 std::cerr << msg << "; error code: " << err << std::endl;
71 throw msg;
72 }
73}
74
75template <typename T>
76T event_info(cl_event e, cl_event_info i) {
77 T res;
78 enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
79 return res;
80}
81
82template <typename T>
83T device_info(cl_device_id d, cl_device_info i) {
84 T res;
85 enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
86 return res;
87}
88
89template <>
90inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
91 size_t required;
92 enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
93
94 char *buff = (char*)alloca(required);
95 enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
96
97 return buff;
98}
99
100template <typename T>
101T platform_info(cl_platform_id p, cl_platform_info i) {
102 T res;
103 enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
104 return res;
105}
106
107template <>
108inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
109 size_t required;
110 enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
111
112 char *buff = (char*)alloca(required);
113 enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
114
115 return buff;
116}
117
118
120public:
121 typedef size_t device_id_type;
122 enum : device_id_type {
123 unknown = device_id_type( -2 ),
124 host = device_id_type( -1 )
125 };
126
127 opencl_device() : my_device_id( unknown ), my_cl_device_id( NULL ), my_cl_command_queue( NULL ) {}
128
129 opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
130
131 opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ), my_cl_command_queue( NULL ) {}
132
133 std::string platform_profile() const {
134 return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
135 }
136 std::string platform_version() const {
137 return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
138 }
139 std::string platform_name() const {
140 return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
141 }
142 std::string platform_vendor() const {
143 return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
144 }
145 std::string platform_extensions() const {
146 return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
147 }
148
149 template <typename T>
150 void info( cl_device_info i, T &t ) const {
151 t = device_info<T>( my_cl_device_id, i );
152 }
153 std::string version() const {
154 // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
155 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
156 }
157 int major_version() const {
158 int major;
159 std::sscanf( version().c_str(), "OpenCL %d", &major );
160 return major;
161 }
162 int minor_version() const {
163 int major, minor;
164 std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
165 return minor;
166 }
168#if CL_VERSION_2_0
169 if ( major_version() >= 2 )
170 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
171 else
172#endif /* CL_VERSION_2_0 */
173 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
174 }
176#if CL_VERSION_2_0
177 if ( major_version() >= 2 )
178 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
179 else
180#endif /* CL_VERSION_2_0 */
181 return false;
182 }
183 std::array<size_t, 3> max_work_item_sizes() const {
184 return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
185 }
186 size_t max_work_group_size() const {
187 return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
188 }
189 bool built_in_kernel_available( const std::string& k ) const {
190 const std::string semi = ";";
191 // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
192 return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
193 }
194 std::string built_in_kernels() const {
195 return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
196 }
197 std::string name() const {
198 return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
199 }
200 cl_bool available() const {
201 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
202 }
203 cl_bool compiler_available() const {
204 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
205 }
206 cl_bool linker_available() const {
207 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
208 }
209 bool extension_available( const std::string &ext ) const {
210 const std::string space = " ";
211 // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
212 return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
213 }
214 std::string extensions() const {
215 return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
216 }
217
218 cl_device_type type() const {
219 return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
220 }
221
222 std::string vendor() const {
223 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
224 }
225
226 cl_uint address_bits() const {
227 return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
228 }
229
230 cl_device_id device_id() const {
231 return my_cl_device_id;
232 }
233
234 cl_command_queue command_queue() const {
235 return my_cl_command_queue;
236 }
237
238 void set_command_queue( cl_command_queue cmd_queue ) {
239 my_cl_command_queue = cmd_queue;
240 }
241
242 cl_platform_id platform_id() const {
243 return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
244 }
245
246private:
247
249 cl_device_id my_cl_device_id;
250 cl_command_queue my_cl_command_queue;
251
253
254 template <typename DeviceFilter>
255 friend class opencl_factory;
256 template <typename Factory>
257 friend class opencl_memory;
258 template <typename Factory>
259 friend class opencl_program;
260
261#if TBB_USE_ASSERT
262 template <typename T, typename Factory>
263 friend class opencl_buffer;
264#endif
265};
266
268 typedef std::vector<opencl_device> container_type;
269public:
270 typedef container_type::iterator iterator;
271 typedef container_type::const_iterator const_iterator;
272 typedef container_type::size_type size_type;
273
275 opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
276
277 void add( opencl_device d ) { my_container.push_back( d ); }
278 size_type size() const { return my_container.size(); }
279 bool empty() const { return my_container.empty(); }
280 iterator begin() { return my_container.begin(); }
281 iterator end() { return my_container.end(); }
282 const_iterator begin() const { return my_container.begin(); }
283 const_iterator end() const { return my_container.end(); }
284 const_iterator cbegin() const { return my_container.cbegin(); }
285 const_iterator cend() const { return my_container.cend(); }
286
287private:
289};
290
291namespace internal {
292
293// Retrieve all OpenCL devices from machine
295 opencl_device_list opencl_devices;
296
297 cl_uint num_platforms;
298 enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
299
300 std::vector<cl_platform_id> platforms(num_platforms);
301 enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
302
303 cl_uint num_devices;
304 std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
305 cl_uint num_all_devices = 0;
306 while (platforms_it != platforms.end()) {
307 cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
308 if (err == CL_DEVICE_NOT_FOUND) {
309 platforms_it = platforms.erase(platforms_it);
310 }
311 else {
312 enforce_cl_retcode(err, "clGetDeviceIDs failed");
313 num_all_devices += num_devices;
314 ++platforms_it;
315 }
316 }
317
318 std::vector<cl_device_id> devices(num_all_devices);
319 std::vector<cl_device_id>::iterator devices_it = devices.begin();
320 for (auto p = platforms.begin(); p != platforms.end(); ++p) {
321 enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
322 devices_it += num_devices;
323 }
324
325 for (auto d = devices.begin(); d != devices.end(); ++d) {
326 opencl_devices.add(opencl_device((*d)));
327 }
328
329 return opencl_devices;
330}
331
332} // namespace internal
333
334// TODO: consider this namespace as public API
335namespace opencl_info {
336
338 // Static storage for all available OpenCL devices on machine
339 static const opencl_device_list my_devices = internal::find_available_devices();
340 return my_devices;
341 }
342
343} // namespace opencl_info
344
345
347public:
348 virtual void call() = 0;
349 virtual ~callback_base() {}
350};
351
352template <typename Callback, typename T>
353class callback : public callback_base {
354 Callback my_callback;
356public:
357 callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
358
361 }
362};
363
364template <typename T, typename Factory = opencl_info::default_opencl_factory>
366public:
367 typedef T value_type;
368
369 opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
370 my_callback_flag_ptr->store<tbb::relaxed>(false);
371 }
372
373 explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
374 my_callback_flag_ptr->store<tbb::relaxed>(false);
375 }
376
377 opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
378 my_callback_flag_ptr->store<tbb::relaxed>(false);
379 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
380 }
381
382 T& data( bool wait = true ) {
383 if ( my_is_event && wait ) {
384 enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
385 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
386 my_is_event = false;
387 }
388 return my_data;
389 }
390
391 const T& data( bool wait = true ) const {
392 if ( my_is_event && wait ) {
393 enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
394 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
395 my_is_event = false;
396 }
397 return my_data;
398 }
399
400 opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
401 my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
402 my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
403 {
404 if ( my_is_event )
405 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
406 }
407
409 my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
410 my_callback_flag_ptr( std::move(dmsg.my_callback_flag_ptr) )
411 {
412 dmsg.my_is_event = false;
413 }
414
416 async_msg<T>::operator =(dmsg);
417
418 // Release original event
419 if ( my_is_event )
420 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
421
422 my_data = dmsg.my_data;
423 my_event = dmsg.my_event;
424 my_is_event = dmsg.my_is_event;
425
426 // Retain copied event
427 if ( my_is_event )
428 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
429
430 my_callback_flag_ptr = dmsg.my_callback_flag_ptr;
431 return *this;
432 }
433
435 if ( my_is_event )
436 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
437 }
438
439 cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
440 void set_event( cl_event e ) const {
441 if ( my_is_event ) {
442 cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
443 if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
444 enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
445 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
446 }
447 my_is_event = true;
448 my_event = e;
449 clRetainEvent( my_event );
450 }
451
452 void clear_event() const {
453 if ( my_is_event ) {
454 enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
455 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
456 }
457 my_is_event = false;
458 }
459
460 template <typename Callback>
461 void register_callback( Callback c ) const {
462 __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
463 enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
464 }
465
466 operator T&() { return data(); }
467 operator const T&() const { return data(); }
468
469protected:
470 // Overridden in this derived class to inform that
471 // async calculation chain is over
474 if (! my_callback_flag_ptr->fetch_and_store(true)) {
475 opencl_async_msg a(*this);
476 if (my_is_event) {
477 register_callback([a](const T& t) mutable {
478 a.set(t);
479 });
480 }
481 else {
482 a.set(my_data);
483 }
484 }
485 clear_event();
486 }
487
488private:
489 static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
490 tbb::internal::suppress_unused_warning( event_command_exec_status );
491 __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
492 __TBB_ASSERT( data, NULL );
493 callback_base *c = static_cast<callback_base*>(data);
494 c->call();
495 delete c;
496 }
497
499 mutable cl_event my_event;
500 mutable bool my_is_event = false;
501
502 std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
503};
504
505template <typename K, typename T, typename Factory>
508 const T &t = dmsg.data( false );
509 __TBB_STATIC_ASSERT( true, "" );
510 return key_from_message<K, T>( t );
511}
512
513template <typename Factory>
515public:
517 opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
518 my_curr_device_id = my_factory->devices().begin()->my_device_id;
519 }
520
521 virtual ~opencl_memory() {
522 if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
523 enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
524 }
525
526 cl_mem get_cl_mem() const {
527 return my_cl_mem;
528 }
529
530 void* get_host_ptr() {
531 if ( !my_host_ptr ) {
533 d.data();
534 __TBB_ASSERT( d.data() == my_host_ptr, NULL );
535 }
536 return my_host_ptr;
537 }
538
539 Factory *factory() const { return my_factory; }
540
543 if (e) {
545 } else {
547 }
548
549 // Concurrent receives are prohibited so we do not worry about synchronization.
551 map_memory(*my_factory->devices().begin(), d);
553 my_host_ptr = d.data(false);
554 }
555 // Release the sending event
557 enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
559 }
560 return d;
561 }
562
565 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
566 {
568 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
569 __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
571 my_factory->enqueue_unmap_buffer(device, *this, d);
572 my_sending_event = *d.get_event();
574 enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
575 my_host_ptr = NULL;
576 my_curr_device_id.store<tbb::release>(device_id);
577 }
578 }
580 }
581
582 // !e means that buffer has come from the host
584
585 __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
587 }
588
590protected:
591 cl_mem my_cl_mem;
592 tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
594 Factory *my_factory;
595
599};
600
601template <typename Factory>
602class opencl_buffer_impl : public opencl_memory<Factory> {
603 size_t my_size;
604public:
605 opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
606 cl_int err;
607 this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
608 enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
609 }
610
611 // The constructor for subbuffers.
612 opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
613 cl_int err;
614 cl_buffer_region region = { index, size };
615 this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
616 enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
617 }
618
619 size_t size() const {
620 return my_size;
621 }
622
624 this->my_factory->enqueue_map_buffer( device, *this, dmsg );
625 }
626
627#if TBB_USE_ASSERT
628 template <typename, typename>
629 friend class opencl_buffer;
630#endif
631};
632
638
639template <typename T, typename Factory = opencl_info::default_opencl_factory>
642
643template <typename T, typename Factory = opencl_info::default_opencl_factory>
646public:
647 typedef cl_mem native_object_type;
649 typedef Factory opencl_factory_type;
650
651 template<access_type a> using iterator = T*;
652
653 template <access_type a>
655 T* ptr = (T*)my_impl->get_host_ptr();
656 __TBB_ASSERT( ptr, NULL );
657 return iterator<a>( ptr );
658 }
659
660 T* data() const { return &access<read_write>()[0]; }
661
662 template <access_type a = read_write>
663 iterator<a> begin() const { return access<a>(); }
664
665 template <access_type a = read_write>
666 iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
667
668 size_t size() const { return my_impl->size()/sizeof(T); }
669
670 T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
671
674 opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
675
676 cl_mem native_object() const {
677 return my_impl->get_cl_mem();
678 }
679
681 return *this;
682 }
683
685 __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
686 opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
687 const cl_event *e = d.get_event();
688 if ( e ) dependency.set_event( *e );
689 else dependency.clear_event();
690 }
691 void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
692 __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
693 opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
694 const cl_event *e = d.get_event();
695 if ( e ) dependency.set_event( *e );
696 else dependency.clear_event();
697 }
698
699 opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
700private:
701 // The constructor for subbuffers.
702 opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
703
705
706 std::shared_ptr<impl_type> my_impl;
707
709 return lhs.my_impl == rhs.my_impl;
710 }
711
712 template <typename>
713 friend class opencl_factory;
714 template <typename, typename>
715 friend class opencl_subbuffer;
716};
717
718template <typename T, typename Factory>
720opencl_subbuffer : public opencl_buffer<T, Factory> {
722public:
724 opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
725 opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
726};
727
728template <typename T, typename Factory>
730 return opencl_subbuffer<T, Factory>( *this, index, size );
731}
732
733
734#define is_typedef(type) \
735 template <typename T> \
736 struct is_##type { \
737 template <typename C> \
738 static std::true_type check( typename C::type* ); \
739 template <typename C> \
740 static std::false_type check( ... ); \
741 \
742 static const bool value = decltype(check<T>(0))::value; \
743 }
744
745is_typedef( native_object_type );
746is_typedef( memory_object_type );
747
748template <typename T>
749typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
750 return t.native_object();
751}
752
753template <typename T>
755 return t;
756}
757
758// send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
759template <typename T, typename Factory>
761 const T &t = dmsg.data( false );
762 typedef typename T::memory_object_type mem_obj_t;
763 mem_obj_t mem_obj = t.memory_object();
765 if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
766 mem_obj.send( device, d );
767 if ( d.get_event() ) dmsg.set_event( *d.get_event() );
768}
769
770template <typename T>
772 typedef typename T::memory_object_type mem_obj_t;
773 mem_obj_t mem_obj = t.memory_object();
775 mem_obj.send( device, dmsg );
776}
777
778template <typename T>
780
781// receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
782template <typename T, typename Factory>
784 const T &t = dmsg.data( false );
785 typedef typename T::memory_object_type mem_obj_t;
786 mem_obj_t mem_obj = t.memory_object();
788 if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
789 mem_obj.receive( d );
790 if ( d.get_event() ) dmsg.set_event( *d.get_event() );
791}
792
793template <typename T>
795
797public:
798 typedef size_t range_index_type;
799 typedef std::array<range_index_type, 3> nd_range_type;
800
801 template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
802 typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
803 opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
804 auto g_it = global_work.begin();
805 auto l_it = local_work.begin();
806 my_global_work_size = { {size_t(-1), size_t(-1), size_t(-1)} };
807 // my_local_work_size is still uninitialized
808 for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
809 __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
810 my_global_work_size[s] = *g_it;
811 my_local_work_size[s] = *l_it;
812 }
813 }
814
815 const nd_range_type& global_range() const { return my_global_work_size; }
816 const nd_range_type& local_range() const { return my_local_work_size; }
817
818private:
821};
822
823template <typename DeviceFilter>
825public:
828
830 public:
831 kernel( const kernel& k ) : my_factory( k.my_factory ) {
832 // Clone my_cl_kernel via opencl_program
833 size_t ret_size = 0;
834
835 std::vector<char> kernel_name;
836 for ( size_t curr_size = 32;; curr_size <<= 1 ) {
837 kernel_name.resize( curr_size <<= 1 );
838 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
839 if ( ret_size < curr_size ) break;
840 }
841
842 cl_program program;
843 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
844 __TBB_ASSERT( ret_size == sizeof(program), NULL );
845
846 my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
847 }
848
850 enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
851 }
852
853 private:
855
856 kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
857
858 // Data
859 cl_kernel my_cl_kernel;
861
862 template <typename DeviceFilter_>
863 friend class opencl_factory;
864
865 template <typename Factory>
866 friend class opencl_program;
867 };
868
870
871 // 'range_type' enables kernel_executor with range support
872 // it affects expectations for enqueue_kernel(.....) interface method
874
877 if ( my_devices.size() ) {
878 for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
879 enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
880 }
881 enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
882 }
883 }
884
885 bool init( const opencl_device_list &device_list ) {
886 tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
887 if ( !my_devices.size() ) {
888 my_devices = device_list;
889 return true;
890 }
891 return false;
892 }
893
894
895private:
896 template <typename Factory>
898 cl_event const* e1 = dmsg.get_event();
899 cl_event e2;
900 cl_int err;
901 void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
902 e1 == NULL ? 0 : 1, e1, &e2, &err );
903 enforce_cl_retcode( err, "Failed to map a buffer" );
904 dmsg.data( false ) = ptr;
905 dmsg.set_event( e2 );
906 enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
907 }
908
909
910 template <typename Factory>
912 cl_event const* e1 = dmsg.get_event();
913 cl_event e2;
915 clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
916 "Failed to unmap a buffer" );
917 dmsg.set_event( e2 );
918 enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
919 }
920
921 // --------- Kernel argument & event list helpers --------- //
922 template <size_t NUM_ARGS, typename T>
923 void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
924 auto p = get_native_object(t);
925 enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
926 }
927
928 template <size_t NUM_ARGS, typename T, typename F>
929 void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
930 __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
931
932 const cl_event * const e = msg.get_event();
933 if (e != NULL) {
934 events[num_events++] = *e;
935 }
936
937 process_one_arg( kernel, events, num_events, place, msg.data(false) );
938 }
939
940 template <size_t NUM_ARGS, typename T, typename ...Rest>
941 void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
942 process_one_arg( kernel, events, num_events, place, t );
943 process_arg_list( kernel, events, num_events, place, args... );
944 }
945
946 template <size_t NUM_ARGS>
947 void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
948 // ------------------------------------------- //
949 template <typename T>
950 void update_one_arg( cl_event, T& ) {}
951
952 template <typename T, typename F>
953 void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
954 msg.set_event( e );
955 }
956
957 template <typename T, typename ...Rest>
958 void update_arg_list( cl_event e, T& t, Rest&... args ) {
959 update_one_arg( e, t );
960 update_arg_list( e, args... );
961 }
962
963 void update_arg_list( cl_event ) {}
964 // ------------------------------------------- //
965public:
966 template <typename ...Args>
967 void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
968 std::array<cl_event, sizeof...(Args)> events;
969 int num_events = 0;
970 int place = 0;
971 process_arg_list( kernel, events, num_events, place, args... );
972
973 const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
974
975 update_arg_list(e, args...);
976
977 // Release our own reference to cl_event
978 enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
979 }
980
981 // ------------------------------------------- //
982 template <typename T, typename ...Rest>
983 void send_data(opencl_device device, T& t, Rest&... args) {
984 send_if_memory_object( device, t );
985 send_data( device, args... );
986 }
987
989 // ------------------------------------------- //
990
991private:
992 cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
993 const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
994 const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
995 const typename range_type::nd_range_type& g_size = work_size.global_range();
996 const typename range_type::nd_range_type& l_size = work_size.local_range();
997 cl_uint s;
998 for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
999 cl_event event;
1001 clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
1002 g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
1003 "Failed to enqueue a kernel" );
1004 return event;
1005 }
1006
1007 // ------------------------------------------- //
1008 template <typename T>
1009 bool get_event_from_one_arg( cl_event&, const T& ) {
1010 return false;
1011 }
1012
1013 template <typename T, typename F>
1014 bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1015 cl_event const *e_ptr = msg.get_event();
1016
1017 if ( e_ptr != NULL ) {
1018 e = *e_ptr;
1019 return true;
1020 }
1021
1022 return false;
1023 }
1024
1025 template <typename T, typename ...Rest>
1026 bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1027 if ( get_event_from_one_arg( e, t ) ) {
1028 return true;
1029 }
1030
1031 return get_event_from_args( e, args... );
1032 }
1033
1034 bool get_event_from_args( cl_event& ) {
1035 return false;
1036 }
1037 // ------------------------------------------- //
1038
1040 virtual ~finalize_fn() {}
1041 virtual void operator() () {}
1042 };
1043
1044 template<typename Fn>
1047 finalize_fn_leaf(Fn fn) : my_fn(fn) {}
1048 void operator() () __TBB_override { my_fn(); }
1049 };
1050
1051 static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1052 tbb::internal::suppress_unused_warning(event_command_exec_status);
1053 __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1054
1055 finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1056 __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1057 (*fn_ptr)();
1058
1059 // Function pointer was created by 'new' & this callback must be called once only
1060 delete fn_ptr;
1061 }
1062public:
1063 template <typename FinalizeFn, typename ...Args>
1064 void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1065 cl_event e;
1066
1067 if ( get_event_from_args( e, args... ) ) {
1068 enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1069 new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1070 }
1071
1072 enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1073 }
1074
1076 std::call_once( my_once_flag, &opencl_factory::init_once, this );
1077 return my_devices;
1078 }
1079
1080private:
1082 __TBB_ASSERT( d1 != opencl_device::unknown && d2 != opencl_device::unknown, NULL );
1083 // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1084 if ( d1 != opencl_device::host && d2 != opencl_device::host )
1085 return true;
1086 return d1 == d2;
1087 }
1088private:
1091
1092 cl_context context() {
1093 std::call_once( my_once_flag, &opencl_factory::init_once, this );
1094 return my_cl_context;
1095 }
1096
1097 void init_once() {
1098 {
1099 tbb::spin_mutex::scoped_lock lock(my_devices_mutex);
1100 if (!my_devices.size())
1101 my_devices = DeviceFilter()( opencl_info::available_devices() );
1102 }
1103
1104 enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1105 cl_platform_id platform_id = my_devices.begin()->platform_id();
1106 for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1107 enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1108
1109 std::vector<cl_device_id> cl_device_ids;
1110 for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1111 cl_device_ids.push_back((*d).my_cl_device_id);
1112 }
1113
1114 cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1115 cl_int err;
1116 cl_context ctx = clCreateContext(context_properties,
1117 (cl_uint)cl_device_ids.size(),
1118 cl_device_ids.data(),
1119 NULL, NULL, &err);
1120 enforce_cl_retcode(err, "Failed to create context");
1121 my_cl_context = ctx;
1122
1123 size_t device_counter = 0;
1124 for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1125 (*d).my_device_id = device_counter++;
1126 cl_int err2;
1127 cl_command_queue cq;
1128#if CL_VERSION_2_0
1129 if ((*d).major_version() >= 2) {
1130 if ((*d).out_of_order_exec_mode_on_host_present()) {
1131 cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1132 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1133 } else {
1134 cl_queue_properties props[] = { 0 };
1135 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1136 }
1137 } else
1138#endif
1139 {
1140 cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1141 // Suppress "declared deprecated" warning for the next line.
1142#if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1143#pragma GCC diagnostic push
1144#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1145#endif
1146#if _MSC_VER || __INTEL_COMPILER
1147#pragma warning( push )
1148#if __INTEL_COMPILER
1149#pragma warning (disable: 1478)
1150#else
1151#pragma warning (disable: 4996)
1152#endif
1153#endif
1154 cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1155#if _MSC_VER || __INTEL_COMPILER
1156#pragma warning( pop )
1157#endif
1158#if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1159#pragma GCC diagnostic pop
1160#endif
1161 }
1162 enforce_cl_retcode(err2, "Failed to create command queue");
1163 (*d).my_cl_command_queue = cq;
1164 }
1165 }
1166
1167 std::once_flag my_once_flag;
1169 cl_context my_cl_context;
1170
1172
1173 template <typename Factory>
1174 friend class opencl_program;
1175 template <typename Factory>
1177 template <typename Factory>
1178 friend class opencl_memory;
1179}; // class opencl_factory
1180
1181// TODO: consider this namespace as public API
1182namespace opencl_info {
1183
1184// Default types
1185
1186template <typename Factory>
1189 __TBB_ASSERT(!f.devices().empty(), "No available devices");
1190 return *(f.devices().begin());
1191 }
1192};
1193
1197 cl_platform_id platform_id = devices.begin()->platform_id();
1198 for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1199 if (it->platform_id() == platform_id) {
1200 dl.add(*it);
1201 }
1202 }
1203 return dl;
1204 }
1205};
1206
1207class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1208public:
1210
1212
1213private:
1215};
1216
1219 return default_factory;
1220}
1221
1222} // namespace opencl_info
1223
1224template <typename T, typename Factory>
1225opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1226
1227
1229 SOURCE,
1231 SPIR
1232};
1233
1234template <typename Factory = opencl_info::default_opencl_factory>
1236public:
1237 typedef typename Factory::kernel_type kernel_type;
1238
1239 opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1240 opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1241 opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1242
1243 opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1244 opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1245 opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1247
1248 opencl_program( const opencl_program &src ) : my_factory( src.my_factory ), my_type( src.type ), my_arg_str( src.my_arg_str ), my_cl_program( src.my_cl_program ) {
1249 // Set my_do_once_flag to the called state.
1250 std::call_once( my_do_once_flag, [](){} );
1251 }
1252
1253 kernel_type get_kernel( const std::string& k ) const {
1254 return kernel_type( get_cl_kernel(k), my_factory );
1255 }
1256
1257private:
1258 opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1259 // Set my_do_once_flag to the called state.
1260 std::call_once( my_do_once_flag, [](){} );
1261 }
1262
1263 cl_kernel get_cl_kernel( const std::string& k ) const {
1264 std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1265 cl_int err;
1266 cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1267 enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1268 return kernel;
1269 }
1270
1272 public:
1273 file_reader( const std::string& filepath ) {
1274 std::ifstream file_descriptor( filepath, std::ifstream::binary );
1275 if ( !file_descriptor.is_open() ) {
1276 std::string str = std::string( "Could not open file: " ) + filepath;
1277 std::cerr << str << std::endl;
1278 throw str;
1279 }
1280 file_descriptor.seekg( 0, file_descriptor.end );
1281 size_t length = size_t( file_descriptor.tellg() );
1282 file_descriptor.seekg( 0, file_descriptor.beg );
1283 my_content.resize( length );
1284 char* begin = &*my_content.begin();
1285 file_descriptor.read( begin, length );
1286 file_descriptor.close();
1287 }
1288 const char* content() { return &*my_content.cbegin(); }
1289 size_t length() { return my_content.length(); }
1290 private:
1291 std::string my_content;
1292 };
1293
1295 public:
1296 typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1297 opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1298 cl_uint num_devices, cl_device_id* device_list,
1299 const char* options, cl_callback_type callback,
1300 void* user_data ) {
1301 cl_int err = clBuildProgram( program, num_devices, device_list, options,
1302 callback, user_data );
1303 if( err == CL_SUCCESS )
1304 return;
1305 std::string str = std::string( "Failed to build program: " ) + name;
1306 if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1307 const opencl_device_list &devices = f.devices();
1308 for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1309 std::cerr << "Build log for device: " << (*d).name() << std::endl;
1310 size_t log_size;
1311 cl_int query_err = clGetProgramBuildInfo(
1312 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1313 &log_size );
1314 enforce_cl_retcode( query_err, "Failed to get build log size" );
1315 if( log_size ) {
1316 std::vector<char> output;
1317 output.resize( log_size );
1318 query_err = clGetProgramBuildInfo(
1319 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1320 output.size(), output.data(), NULL );
1321 enforce_cl_retcode( query_err, "Failed to get build output" );
1322 std::cerr << output.data() << std::endl;
1323 } else {
1324 std::cerr << "No build log available" << std::endl;
1325 }
1326 }
1327 }
1328 enforce_cl_retcode( err, str );
1329 }
1330 };
1331
1333 public:
1334 template<typename Filter>
1335 opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1336 Filter filter, const char* message ) {
1337 for ( cl_uint i = 0; i < num_devices; ++i )
1338 if ( filter(device_list[i]) ) {
1339 device_list[i--] = device_list[--num_devices];
1340 }
1341 if ( !num_devices )
1342 enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1343 }
1344 };
1345
1346 void init( const std::string& ) const {
1347 cl_uint num_devices;
1348 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1349 "Failed to get OpenCL context info" );
1350 if ( !num_devices )
1351 enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1352 cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1353 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1354 "Failed to get OpenCL context info" );
1355 const char *options = NULL;
1356 switch ( my_type ) {
1358 file_reader fr( my_arg_str );
1359 const char *s[] = { fr.content() };
1360 const size_t l[] = { fr.length() };
1361 cl_int err;
1362 my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1363 enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1365 num_devices, device_list,
1366 []( const opencl_device& d ) -> bool {
1367 return !d.compiler_available() || !d.linker_available();
1368 }, "No one device supports building program from sources" );
1370 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1371 options, /*callback*/ NULL, /*user data*/NULL );
1372 break;
1373 }
1375 options = "-x spir";
1377 file_reader fr( my_arg_str );
1378 std::vector<const unsigned char*> s(
1379 num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1380 std::vector<size_t> l( num_devices, fr.length() );
1381 std::vector<cl_int> bin_statuses( num_devices, -1 );
1382 cl_int err;
1383 my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1384 device_list, l.data(), s.data(),
1385 bin_statuses.data(), &err );
1386 if( err != CL_SUCCESS ) {
1387 std::string statuses_str;
1388 for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1389 statuses_str += std::to_string((*st));
1390 }
1391
1392 enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1393 std::string( ", binary_statuses = " ) + statuses_str );
1394 }
1396 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1397 options, /*callback*/ NULL, /*user data*/NULL );
1398 break;
1399 }
1400 default:
1401 __TBB_ASSERT( false, "Unsupported program type" );
1402 }
1403 }
1404
1405 Factory& my_factory;
1407 std::string my_arg_str;
1408 mutable cl_program my_cl_program;
1409 mutable std::once_flag my_do_once_flag;
1410
1411 template <typename DeviceFilter>
1412 friend class opencl_factory;
1413
1414 friend class Factory::kernel;
1415};
1416
1417template<typename... Args>
1419
1420template<typename JP, typename Factory, typename... Ports>
1422opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1423 typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1424public:
1425 typedef typename base_type::kernel_type kernel_type;
1426
1427 opencl_node( graph &g, const kernel_type& kernel )
1428 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1429 {
1430 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1431 }
1432
1433 opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1434 : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1435 {
1436 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1437 }
1438
1439 template <typename DeviceSelector>
1440 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1441 : base_type( g, kernel, d, f)
1442 {
1443 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1444 }
1445};
1446
1447template<typename JP, typename... Ports>
1449opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1451public:
1452 typedef typename base_type::kernel_type kernel_type;
1453
1454 opencl_node( graph &g, const kernel_type& kernel )
1455 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1456 {}
1457
1458 template <typename DeviceSelector>
1459 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1460 : base_type( g, kernel, d, opencl_info::default_factory() )
1461 {}
1462};
1463
1464template<typename... Ports>
1466opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1468public:
1469 typedef typename base_type::kernel_type kernel_type;
1470
1471 opencl_node( graph &g, const kernel_type& kernel )
1472 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1473 {}
1474
1475 template <typename DeviceSelector>
1476 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1477 : base_type( g, kernel, d, opencl_info::default_factory() )
1478 {}
1479};
1480
1481} // namespace interfaceX
1482
1487using interface11::opencl_buffer;
1489using interface11::opencl_device;
1490using interface11::opencl_device_list;
1491using interface11::opencl_program;
1493using interface11::opencl_async_msg;
1494using interface11::opencl_factory;
1495using interface11::opencl_range;
1496
1497} // namespace flow
1498} // namespace tbb
1499#endif /* __TBB_PREVIEW_OPENCL_NODE */
1500
1502#undef __TBB_flow_graph_opencl_node_H_include_area
1503
1504#endif // __TBB_flow_graph_opencl_node_H
The graph related classes and functions.
#define is_typedef(type)
class __TBB_DEPRECATED streaming_node
#define CODEPTR()
#define __TBB_DEPRECATED_IN_VERBOSE_MODE
Definition: tbb_config.h:647
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
#define __TBB_override
Definition: tbb_stddef.h:240
#define __TBB_STATIC_ASSERT(condition, msg)
Definition: tbb_stddef.h:553
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void const char const char int ITT_FORMAT __itt_group_sync s
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp begin
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s const char ITT_FORMAT s __itt_frame ITT_FORMAT p __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu __itt_counter unsigned long long ITT_FORMAT lu __itt_counter __itt_clock_domain unsigned long long void ITT_FORMAT p const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
void const char const char int ITT_FORMAT __itt_group_sync x void const char * name
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
STL namespace.
The graph class.
void move(tbb_thread &t1, tbb_thread &t2)
Definition: tbb_thread.h:319
@ release
Release.
Definition: atomic.h:59
@ relaxed
No ordering.
Definition: atomic.h:61
@ acquire
Acquire.
Definition: atomic.h:57
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:398
static void fgt_multiinput_multioutput_node(void *, string_index, void *, void *)
K key_from_message(const T &t)
Definition: flow_graph.h:721
std::enable_if<!is_memory_object_type< T >::value >::type receive_if_memory_object(const T &)
T device_info(cl_device_id d, cl_device_info i)
class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_node
std::enable_if<!is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device, T &)
class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_subbuffer
void enforce_cl_retcode(cl_int err, std::string msg)
T platform_info(cl_platform_id p, cl_platform_info i)
K key_from_message(const opencl_async_msg< T, Factory > &dmsg)
class __TBB_DEPRECATED async_msg
Definition: flow_graph.h:216
std::enable_if< is_memory_object_type< T >::value >::type receive_if_memory_object(const opencl_async_msg< T, Factory > &dmsg)
std::enable_if<!is_native_object_type< T >::value, T >::type get_native_object(T t)
T event_info(cl_event e, cl_event_info i)
default_opencl_factory & default_factory()
const opencl_device_list & available_devices()
bool is_same_context(opencl_device::device_id_type d1, opencl_device::device_id_type d2)
void update_one_arg(cl_event e, opencl_async_msg< T, F > &msg)
opencl_factory & operator=(const opencl_factory &)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
void enqueue_unmap_buffer(opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
void send_data(opencl_device device, T &t, Rest &... args)
void update_arg_list(cl_event e, T &t, Rest &... args)
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
bool init(const opencl_device_list &device_list)
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
void process_arg_list(const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
void send_kernel(opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
bool get_event_from_one_arg(cl_event &, const T &)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
void enqueue_map_buffer(opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
void finalize(opencl_device device, FinalizeFn fn, Args &... args)
bool get_event_from_one_arg(cl_event &e, const opencl_async_msg< T, F > &msg)
opencl_factory(const opencl_factory &)
kernel_type get_kernel(const std::string &k) const
opencl_program(Factory &factory, const char *program_name)
opencl_program(opencl_program_type type, const std::string &program_name)
opencl_program(Factory &factory, const std::string &program_name)
opencl_program(Factory &factory, opencl_program_type type, const std::string &program_name)
cl_kernel get_cl_kernel(const std::string &k) const
opencl_program(Factory &factory, cl_program program)
opencl_program(const std::string &program_name)
std::array< size_t, 3 > max_work_item_sizes() const
void info(cl_device_info i, T &t) const
opencl_device(cl_device_id cl_d_id, device_id_type device_id)
bool built_in_kernel_available(const std::string &k) const
friend bool operator==(opencl_device d1, opencl_device d2)
bool extension_available(const std::string &ext) const
void set_command_queue(cl_command_queue cmd_queue)
opencl_device_list(std::initializer_list< opencl_device > il)
opencl_async_msg(const opencl_async_msg &dmsg)
std::shared_ptr< tbb::atomic< bool > > my_callback_flag_ptr
opencl_async_msg(const T &data, cl_event event)
opencl_async_msg & operator=(const opencl_async_msg &dmsg)
static void CL_CALLBACK register_callback_func(cl_event, cl_int event_command_exec_status, void *data)
opencl_async_msg< void *, Factory > receive(const cl_event *e)
tbb::atomic< opencl_device::device_id_type > my_curr_device_id
virtual void map_memory(opencl_device, opencl_async_msg< void *, Factory > &)=0
opencl_async_msg< void *, Factory > send(opencl_device device, const cl_event *e)
void map_memory(opencl_device device, opencl_async_msg< void *, Factory > &dmsg) __TBB_override
opencl_buffer_impl(cl_mem m, size_t index, size_t size, Factory &f)
opencl_buffer_impl< Factory > impl_type
const opencl_buffer & memory_object() const
opencl_buffer(Factory &f, cl_mem m, size_t index, size_t size)
friend bool operator==(const opencl_buffer< T, Factory > &lhs, const opencl_buffer< T, Factory > &rhs)
void receive(const opencl_async_msg< opencl_buffer, Factory > &dependency) const
void send(opencl_device device, opencl_async_msg< opencl_buffer, Factory > &dependency) const
opencl_subbuffer< T, Factory > subbuffer(size_t index, size_t size) const
opencl_subbuffer(const opencl_buffer< T, Factory > &owner, size_t index, size_t size)
std::array< range_index_type, 3 > nd_range_type
const nd_range_type & global_range() const
const nd_range_type & local_range() const
opencl_range(G &&global_work=std::initializer_list< int >({ 0 }), L &&local_work=std::initializer_list< int >({ 0, 0, 0 }))
kernel(const cl_kernel &k, factory_type &f)
opencl_device_list operator()(const opencl_device_list &devices)
opencl_program_builder(Factory &f, const std::string &name, cl_program program, cl_uint num_devices, cl_device_id *device_list, const char *options, cl_callback_type callback, void *user_data)
opencl_device_filter(cl_uint &num_devices, cl_device_id *device_list, Filter filter, const char *message)
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d, Factory &f)
opencl_node< tuple< Ports... >, JP, opencl_info::default_opencl_factory > base_type
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
opencl_node< tuple< Ports... >, queueing, opencl_info::default_opencl_factory > base_type
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
A stage in a pipeline.
Definition: pipeline.h:64
A lock that occupies a single byte.
Definition: spin_mutex.h:39
Represents acquisition of a mutex.
Definition: spin_mutex.h:53
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
Base class for types that should not be copied or assigned.
Definition: tbb_stddef.h:330

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.