From 75c680ad48daac344deb6c5639baa7bb944903e9 Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Mon, 6 Nov 2023 14:53:00 +0200 Subject: [PATCH 001/190] Update cl_khr_command_buffer_mutable_dispatch.asciidoc Add type cl_mutable_dispatch_promises_khr and its possible values --- ...r_command_buffer_mutable_dispatch.asciidoc | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 2fef8ca19..53dd351f5 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -101,6 +101,9 @@ typedef cl_uint cl_mutable_command_info_khr; // Identifies the type of a structure to allow structure pointer chains typedef cl_uint cl_command_buffer_structure_type_khr; + +// Bitfield covering certain promises by the user to the implementation, enabling possible optimizations +typedef cl_bitfield cl_mutable_dispatch_promises_khr; ---- Struct type for setting kernel arguments normally passed using {clSetKernelArg} @@ -233,6 +236,7 @@ CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 // Property to cl_ndrange_kernel_command_properties_khr CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 +CL_MUTABLE_DISPATCH_PROMISES_KHR 0x12B2 // Bits for cl_mutable_dispatch_fields_khr bitfield CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (0x1 << 0) @@ -254,6 +258,9 @@ CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD // Bits for cl_command_buffer_flags_khr CL_COMMAND_BUFFER_MUTABLE_KHR (0x1 << 1) + +// Bits for cl_mutable_dispatch_promises_khr bitfield +CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) ---- Enum values for {cl_command_buffer_structure_type_khr_TYPE} allowing the structure @@ -321,6 +328,14 @@ description of property values. | {CL_COMMAND_BUFFER_MUTABLE_KHR} - Enables modification of the command-buffer, by default command-buffers are immutable. If set, commands in the command-buffer may be updated via {clUpdateMutableCommandsKHR}. + +| {CL_MUTABLE_DISPATCH_PROMISES_KHR} +| {cl_mutable_dispatch_promises_khr_TYPE} +| This is a bitfield and can be set to a combination of the following values: + + {CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR} + A promise by the user that the number of work-groups of any ND-range kernel recorded in this command + buffer will not be updated beyond the number defined when the ND-range kernel was recorded. |==== ==== Modifications to clCommandNDRangeKernelKHR @@ -392,6 +407,14 @@ in the table below. If {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} is not specified then it defaults to the value returned by the {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} device query. + +| {CL_MUTABLE_DISPATCH_PROMISES_KHR} +| {cl_mutable_dispatch_promises_khr_TYPE} +| This is a bitfield and can be set to a combination of the following values: + + {CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR} + A promise by the user that the number of work-groups of this ND-range kernel will not be updated beyond + the number defined when the ND-range kernel was recorded. |==== ===== Mutable Handle Parameter From c37e00f1423379a882c173c12ed1aa5ca9d8e51d Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Tue, 7 Nov 2023 11:50:25 +0200 Subject: [PATCH 002/190] Update cl_khr_command_buffer_mutable_dispatch.asciidoc --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 53dd351f5..ab85fb6cf 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -20,6 +20,7 @@ commands between command-buffer enqueues. |==== | *Date* | *Version* | *Description* | 2022-08-31 | 0.9.0 | First assigned version (provisional). +| 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_promises_khr and its possible values |==== ==== Dependencies @@ -236,7 +237,7 @@ CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 // Property to cl_ndrange_kernel_command_properties_khr CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 -CL_MUTABLE_DISPATCH_PROMISES_KHR 0x12B2 +CL_MUTABLE_DISPATCH_PROMISES_KHR 0x12B7 // Bits for cl_mutable_dispatch_fields_khr bitfield CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (0x1 << 0) @@ -556,9 +557,9 @@ the array violates the defined conditions: * {CL_INVALID_VALUE} if _type_ is not {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. -* {CL_INVALID_OPERATION} if values of _local_work_size_ and/or - _global_work_size_ result in an increase to the number of work-groups in the - ND-range. +* {CL_INVALID_OPERATION} if {CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR} is specified, + and values of _local_work_size_ and/or _global_work_size_ result in an increase to the number of work- + groups in the ND-range over the number specified when the ND-range kernel was recorded. * {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or _global_work_size_ result in a change to work-group uniformity. From ed629a972fd6a2377b120b64e78fa1ceb2b9f8d3 Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Tue, 7 Nov 2023 12:20:56 +0200 Subject: [PATCH 003/190] Update ext/cl_khr_command_buffer_mutable_dispatch.asciidoc Co-authored-by: Ewan Crawford --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index ab85fb6cf..6f2c8a3b4 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -20,7 +20,7 @@ commands between command-buffer enqueues. |==== | *Date* | *Version* | *Description* | 2022-08-31 | 0.9.0 | First assigned version (provisional). -| 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_promises_khr and its possible values +| 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_promises_khr and its possible values (provisional). |==== ==== Dependencies From e7d3343d95113e93e59eb76e4458896b07c7eded Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Fri, 10 Nov 2023 16:12:31 +0200 Subject: [PATCH 004/190] changes to cl.xml --- ...r_command_buffer_mutable_dispatch.asciidoc | 36 ++++++++++--------- xml/cl.xml | 11 +++++- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 6f2c8a3b4..a8b234df7 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -20,7 +20,7 @@ commands between command-buffer enqueues. |==== | *Date* | *Version* | *Description* | 2022-08-31 | 0.9.0 | First assigned version (provisional). -| 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_promises_khr and its possible values (provisional). +| 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_asserts_khr and its possible values (provisional). |==== ==== Dependencies @@ -103,8 +103,8 @@ typedef cl_uint cl_mutable_command_info_khr; // Identifies the type of a structure to allow structure pointer chains typedef cl_uint cl_command_buffer_structure_type_khr; -// Bitfield covering certain promises by the user to the implementation, enabling possible optimizations -typedef cl_bitfield cl_mutable_dispatch_promises_khr; +// Bitfield covering certain asserts by the user to the implementation, enabling possible optimizations +typedef cl_bitfield cl_mutable_dispatch_asserts_khr; ---- Struct type for setting kernel arguments normally passed using {clSetKernelArg} @@ -237,7 +237,7 @@ CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 // Property to cl_ndrange_kernel_command_properties_khr CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 -CL_MUTABLE_DISPATCH_PROMISES_KHR 0x12B7 +CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 // Bits for cl_mutable_dispatch_fields_khr bitfield CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (0x1 << 0) @@ -260,8 +260,8 @@ CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD // Bits for cl_command_buffer_flags_khr CL_COMMAND_BUFFER_MUTABLE_KHR (0x1 << 1) -// Bits for cl_mutable_dispatch_promises_khr bitfield -CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) +// Bits for cl_mutable_dispatch_asserts_khr bitfield +CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) ---- Enum values for {cl_command_buffer_structure_type_khr_TYPE} allowing the structure @@ -330,12 +330,12 @@ description of property values. command-buffer, by default command-buffers are immutable. If set, commands in the command-buffer may be updated via {clUpdateMutableCommandsKHR}. -| {CL_MUTABLE_DISPATCH_PROMISES_KHR} -| {cl_mutable_dispatch_promises_khr_TYPE} +| {CL_MUTABLE_DISPATCH_ASSERTS_KHR} +| {cl_mutable_dispatch_asserts_khr_TYPE} | This is a bitfield and can be set to a combination of the following values: - {CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR} - A promise by the user that the number of work-groups of any ND-range kernel recorded in this command + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} + An assertion by the user that the number of work-groups of any ND-range kernel recorded in this command buffer will not be updated beyond the number defined when the ND-range kernel was recorded. |==== @@ -409,13 +409,17 @@ in the table below. defaults to the value returned by the {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} device query. -| {CL_MUTABLE_DISPATCH_PROMISES_KHR} -| {cl_mutable_dispatch_promises_khr_TYPE} +| {CL_MUTABLE_DISPATCH_ASSERTS_KHR} +| {cl_mutable_dispatch_asserts_khr_TYPE} | This is a bitfield and can be set to a combination of the following values: - {CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR} - A promise by the user that the number of work-groups of this ND-range kernel will not be updated beyond - the number defined when the ND-range kernel was recorded. + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} + An assertion by the user that the number of work-groups of this ND-range kernel will not be updated beyond + the number defined when the ND-range kernel was recorded. The number of work-groups is + defined as the product for each _i_ from _0_ to _work_dim - 1_ of + _ceil((global_work_size[i] - global_work_offset[i])/local_work_size[i])_ (if + _global_work_offset_ is NULL, _global_work_offset[i]_ should be replaced with _0_). + In case _local_work_size_ is NULL, the effect of this flag is undefined. |==== ===== Mutable Handle Parameter @@ -557,7 +561,7 @@ the array violates the defined conditions: * {CL_INVALID_VALUE} if _type_ is not {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. -* {CL_INVALID_OPERATION} if {CL_MUTABLE_DISPATCH_PROMISE_NO_ADDITIONAL_WORK_GROUPS_KHR} is specified, +* {CL_INVALID_OPERATION} if {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} is specified, and values of _local_work_size_ and/or _global_work_size_ result in an increase to the number of work- groups in the ND-range over the number specified when the ND-range kernel was recorded. diff --git a/xml/cl.xml b/xml/cl.xml index 69a63d931..f4597b7c6 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -252,6 +252,7 @@ server's OpenCL/api-docs repository. typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; + typedef cl_bitfield cl_mutable_dispatch_promises_khr Structure types @@ -1343,6 +1344,9 @@ server's OpenCL/api-docs repository. + + + @@ -1779,7 +1783,8 @@ server's OpenCL/api-docs repository. - + + @@ -7277,6 +7282,7 @@ server's OpenCL/api-docs repository. + @@ -7313,6 +7319,9 @@ server's OpenCL/api-docs repository. + + + From 392d97b7fba824bfa4ece034003e1a8172df6c98 Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Sun, 12 Nov 2023 09:19:41 +0200 Subject: [PATCH 005/190] update extension's version --- xml/cl.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index f4597b7c6..063e2e3f2 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1783,7 +1783,7 @@ server's OpenCL/api-docs repository. - + @@ -7270,7 +7270,7 @@ server's OpenCL/api-docs repository. - + From 995886a6f41ef24cd2e88f4fbdf7b3397c9a8258 Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Mon, 13 Nov 2023 13:19:23 +0200 Subject: [PATCH 006/190] rename "promise" to "assert" --- .asciidoctorconfig.adoc | 12 + .project | 11 + xml/cgenerator.py | 420 +++++++++ xml/checklinks.py | 71 ++ xml/cl.xml | 14 +- xml/clconventions.py | 241 ++++++ xml/conventions.py | 358 ++++++++ xml/docgenerator.py | 454 ++++++++++ xml/extensionmetadocgenerator.py | 659 ++++++++++++++ xml/genRef.py | 1019 ++++++++++++++++++++++ xml/gen_dictionaries.py | 258 ++++++ xml/gen_version_notes.py | 127 +++ xml/gencl.py | 464 ++++++++++ xml/generator.py | 1186 +++++++++++++++++++++++++ xml/pygenerator.py | 365 ++++++++ xml/realign.py | 47 + xml/reflib.py | 663 ++++++++++++++ xml/reg.py | 1397 ++++++++++++++++++++++++++++++ 18 files changed, 7759 insertions(+), 7 deletions(-) create mode 100644 .asciidoctorconfig.adoc create mode 100644 .project create mode 100644 xml/cgenerator.py create mode 100644 xml/checklinks.py create mode 100644 xml/clconventions.py create mode 100644 xml/conventions.py create mode 100644 xml/docgenerator.py create mode 100644 xml/extensionmetadocgenerator.py create mode 100644 xml/genRef.py create mode 100644 xml/gen_dictionaries.py create mode 100644 xml/gen_version_notes.py create mode 100644 xml/gencl.py create mode 100644 xml/generator.py create mode 100644 xml/pygenerator.py create mode 100644 xml/realign.py create mode 100644 xml/reflib.py create mode 100644 xml/reg.py diff --git a/.asciidoctorconfig.adoc b/.asciidoctorconfig.adoc new file mode 100644 index 000000000..53b403dbc --- /dev/null +++ b/.asciidoctorconfig.adoc @@ -0,0 +1,12 @@ +// +++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// + Initial AsciiDoc editor configuration file - V1.0 + +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Did not found any configuration files, so create this at project root level. +// If you do not like those files to be generated - you can turn it off inside Asciidoctor Editor preferences. +// +// You can define editor specific parts here. +// For example: with next line you could set imagesdir attribute to subfolder "images" relative to the folder where this config file is located. +// :imagesdir: {asciidoctorconfigdir}/images +// +// For more information please take a look at https://github.com/de-jcup/eclipse-asciidoctor-editor/wiki/Asciidoctor-configfiles diff --git a/.project b/.project new file mode 100644 index 000000000..9877aec6c --- /dev/null +++ b/.project @@ -0,0 +1,11 @@ + + + OpenCL-Docs + + + + + + + + diff --git a/xml/cgenerator.py b/xml/cgenerator.py new file mode 100644 index 000000000..4b2a8f1e2 --- /dev/null +++ b/xml/cgenerator.py @@ -0,0 +1,420 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +import os +import re +from generator import (GeneratorOptions, OutputGenerator, noneStr, + regSortFeatures, write) + + +class CGeneratorOptions(GeneratorOptions): + """CGeneratorOptions - subclass of GeneratorOptions. + + Adds options used by COutputGenerator objects during C language header + generation.""" + + def __init__(self, + prefixText="", + genFuncPointers=True, + protectFile=True, + protectFeature=True, + protectProto=None, + protectProtoStr=None, + apicall='', + apientry='', + apientryp='', + indentFuncProto=True, + indentFuncPointer=False, + alignFuncParam=0, + genEnumBeginEndRange=False, + genAliasMacro=False, + aliasMacro='', + misracstyle=False, + misracppstyle=False, + **kwargs + ): + """Constructor. + Additional parameters beyond parent class: + + - prefixText - list of strings to prefix generated header with + (usually a copyright statement + calling convention macros). + - protectFile - True if multiple inclusion protection should be + generated (based on the filename) around the entire header. + - protectFeature - True if #ifndef..#endif protection should be + generated around a feature interface in the header file. + - genFuncPointers - True if function pointer typedefs should be + generated + - protectProto - If conditional protection should be generated + around prototype declarations, set to either '#ifdef' + to require opt-in (#ifdef protectProtoStr) or '#ifndef' + to require opt-out (#ifndef protectProtoStr). Otherwise + set to None. + - protectProtoStr - #ifdef/#ifndef symbol to use around prototype + declarations, if protectProto is set + - apicall - string to use for the function declaration prefix, + such as APICALL on Windows. + - apientry - string to use for the calling convention macro, + in typedefs, such as APIENTRY. + - apientryp - string to use for the calling convention macro + in function pointer typedefs, such as APIENTRYP. + - indentFuncProto - True if prototype declarations should put each + parameter on a separate line + - indentFuncPointer - True if typedefed function pointers should put each + parameter on a separate line + - alignFuncParam - if nonzero and parameters are being put on a + separate line, align parameter names at the specified column + - genEnumBeginEndRange - True if BEGIN_RANGE / END_RANGE macros should + be generated for enumerated types + - genAliasMacro - True if the OpenXR alias macro should be generated + for aliased types (unclear what other circumstances this is useful) + - aliasMacro - alias macro to inject when genAliasMacro is True + - misracstyle - generate MISRA C-friendly headers + - misracppstyle - generate MISRA C++-friendly headers""" + + GeneratorOptions.__init__(self, **kwargs) + + self.prefixText = prefixText + """list of strings to prefix generated header with (usually a copyright statement + calling convention macros).""" + + self.genFuncPointers = genFuncPointers + """True if function pointer typedefs should be generated""" + + self.protectFile = protectFile + """True if multiple inclusion protection should be generated (based on the filename) around the entire header.""" + + self.protectFeature = protectFeature + """True if #ifndef..#endif protection should be generated around a feature interface in the header file.""" + + self.protectProto = protectProto + """If conditional protection should be generated around prototype declarations, set to either '#ifdef' to require opt-in (#ifdef protectProtoStr) or '#ifndef' to require opt-out (#ifndef protectProtoStr). Otherwise set to None.""" + + self.protectProtoStr = protectProtoStr + """#ifdef/#ifndef symbol to use around prototype declarations, if protectProto is set""" + + self.apicall = apicall + """string to use for the function declaration prefix, such as APICALL on Windows.""" + + self.apientry = apientry + """string to use for the calling convention macro, in typedefs, such as APIENTRY.""" + + self.apientryp = apientryp + """string to use for the calling convention macro in function pointer typedefs, such as APIENTRYP.""" + + self.indentFuncProto = indentFuncProto + """True if prototype declarations should put each parameter on a separate line""" + + self.indentFuncPointer = indentFuncPointer + """True if typedefed function pointers should put each parameter on a separate line""" + + self.alignFuncParam = alignFuncParam + """if nonzero and parameters are being put on a separate line, align parameter names at the specified column""" + + self.genEnumBeginEndRange = genEnumBeginEndRange + """True if BEGIN_RANGE / END_RANGE macros should be generated for enumerated types""" + + self.genAliasMacro = genAliasMacro + """True if the OpenXR alias macro should be generated for aliased types (unclear what other circumstances this is useful)""" + + self.aliasMacro = aliasMacro + """alias macro to inject when genAliasMacro is True""" + + self.misracstyle = misracstyle + """generate MISRA C-friendly headers""" + + self.misracppstyle = misracppstyle + """generate MISRA C++-friendly headers""" + + self.codeGenerator = True + """True if this generator makes compilable code""" + + +class COutputGenerator(OutputGenerator): + """Generates C-language API interfaces.""" + + # This is an ordered list of sections in the header file. + TYPE_SECTIONS = ['include', 'define', 'basetype', 'handle', 'enum', + 'group', 'bitmask', 'funcpointer', 'struct'] + ALL_SECTIONS = TYPE_SECTIONS + ['commandPointer', 'command'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Internal state - accumulators for different inner block text + self.sections = {section: [] for section in self.ALL_SECTIONS} + self.feature_not_empty = False + self.may_alias = None + + def beginFile(self, genOpts): + OutputGenerator.beginFile(self, genOpts) + # C-specific + # + # Multiple inclusion protection & C++ wrappers. + if genOpts.protectFile and self.genOpts.filename: + headerSym = re.sub(r'\.h', '_h_', + os.path.basename(self.genOpts.filename)).upper() + write('#ifndef', headerSym, file=self.outFile) + write('#define', headerSym, '1', file=self.outFile) + self.newline() + + # User-supplied prefix text, if any (list of strings) + if genOpts.prefixText: + for s in genOpts.prefixText: + write(s, file=self.outFile) + + # C++ extern wrapper - after prefix lines so they can add includes. + self.newline() + write('#ifdef __cplusplus', file=self.outFile) + write('extern "C" {', file=self.outFile) + write('#endif', file=self.outFile) + self.newline() + + def endFile(self): + # C-specific + # Finish C++ wrapper and multiple inclusion protection + self.newline() + write('#ifdef __cplusplus', file=self.outFile) + write('}', file=self.outFile) + write('#endif', file=self.outFile) + if self.genOpts.protectFile and self.genOpts.filename: + self.newline() + write('#endif', file=self.outFile) + # Finish processing in superclass + OutputGenerator.endFile(self) + + def beginFeature(self, interface, emit): + # Start processing in superclass + OutputGenerator.beginFeature(self, interface, emit) + # C-specific + # Accumulate includes, defines, types, enums, function pointer typedefs, + # end function prototypes separately for this feature. They're only + # printed in endFeature(). + self.sections = {section: [] for section in self.ALL_SECTIONS} + self.feature_not_empty = False + + def endFeature(self): + "Actually write the interface to the output file." + # C-specific + if self.emit: + if self.feature_not_empty: + if self.genOpts.conventions.writeFeature(self.featureExtraProtect, self.genOpts.filename): + self.newline() + if self.genOpts.protectFeature: + write('#ifndef', self.featureName, file=self.outFile) + # If type declarations are needed by other features based on + # this one, it may be necessary to suppress the ExtraProtect, + # or move it below the 'for section...' loop. + if self.featureExtraProtect is not None: + write('#ifdef', self.featureExtraProtect, file=self.outFile) + self.newline() + write('#define', self.featureName, '1', file=self.outFile) + for section in self.TYPE_SECTIONS: + contents = self.sections[section] + if contents: + write('\n'.join(contents), file=self.outFile) + if self.genOpts.genFuncPointers and self.sections['commandPointer']: + write('\n'.join(self.sections['commandPointer']), file=self.outFile) + self.newline() + if self.sections['command']: + if self.genOpts.protectProto: + write(self.genOpts.protectProto, + self.genOpts.protectProtoStr, file=self.outFile) + write('\n'.join(self.sections['command']), end='', file=self.outFile) + if self.genOpts.protectProto: + write('#endif', file=self.outFile) + else: + self.newline() + if self.featureExtraProtect is not None: + write('#endif /*', self.featureExtraProtect, '*/', file=self.outFile) + if self.genOpts.protectFeature: + write('#endif /*', self.featureName, '*/', file=self.outFile) + # Finish processing in superclass + OutputGenerator.endFeature(self) + + def appendSection(self, section, text): + "Append a definition to the specified section" + # self.sections[section].append('SECTION: ' + section + '\n') + self.sections[section].append(text) + self.feature_not_empty = True + + def genType(self, typeinfo, name, alias): + "Generate type." + OutputGenerator.genType(self, typeinfo, name, alias) + typeElem = typeinfo.elem + + # Vulkan: + # Determine the category of the type, and the type section to add + # its definition to. + # 'funcpointer' is added to the 'struct' section as a workaround for + # internal issue #877, since structures and function pointer types + # can have cross-dependencies. + category = typeElem.get('category') + if category == 'funcpointer': + section = 'struct' + else: + section = category + + if category in ('struct', 'union'): + # If the type is a struct type, generate it using the + # special-purpose generator. + self.genStruct(typeinfo, name, alias) + else: + # OpenXR: this section was not under 'else:' previously, just fell through + if alias: + # If the type is an alias, just emit a typedef declaration + body = 'typedef ' + alias + ' ' + name + ';\n' + else: + # Replace tags with an APIENTRY-style string + # (from self.genOpts). Copy other text through unchanged. + # If the resulting text is an empty string, don't emit it. + body = noneStr(typeElem.text) + for elem in typeElem: + if elem.tag == 'apientry': + body += self.genOpts.apientry + noneStr(elem.tail) + else: + body += noneStr(elem.text) + noneStr(elem.tail) + if body: + # Add extra newline after multi-line entries. + if '\n' in body[0:-1]: + body += '\n' + self.appendSection(section, body) + + def genProtectString(self, protect_str): + """Generate protection string. + + Protection strings are the strings defining the OS/Platform/Graphics + requirements for a given OpenXR command. When generating the + language header files, we need to make sure the items specific to a + graphics API or OS platform are properly wrapped in #ifs.""" + protect_if_str = '' + protect_end_str = '' + if not protect_str: + return (protect_if_str, protect_end_str) + + if ',' in protect_str: + protect_list = protect_str.split(",") + protect_defs = ('defined(%s)' % d for d in protect_list) + protect_def_str = ' && '.join(protect_defs) + protect_if_str = '#if %s\n' % protect_def_str + protect_end_str = '#endif // %s\n' % protect_def_str + else: + protect_if_str = '#ifdef %s\n' % protect_str + protect_end_str = '#endif // %s\n' % protect_str + + return (protect_if_str, protect_end_str) + + def typeMayAlias(self, typeName): + if not self.may_alias: + # First time we've asked if a type may alias. + # So, let's populate the set of all names of types that may. + + # Everyone with an explicit mayalias="true" + self.may_alias = set(typeName + for typeName, data in self.registry.typedict.items() + if data.elem.get('mayalias') == 'true') + + # Every type mentioned in some other type's parentstruct attribute. + parent_structs = (otherType.elem.get('parentstruct') + for otherType in self.registry.typedict.values()) + self.may_alias.update(set(x for x in parent_structs + if x is not None)) + return typeName in self.may_alias + + def genStruct(self, typeinfo, typeName, alias): + """Generate struct (e.g. C "struct" type). + + This is a special case of the tag where the contents are + interpreted as a set of tags instead of freeform C + C type declarations. The tags are just like + tags - they are a declaration of a struct or union member. + Only simple member declarations are supported (no nested + structs etc.) + + If alias is not None, then this struct aliases another; just + generate a typedef of that alias.""" + OutputGenerator.genStruct(self, typeinfo, typeName, alias) + + typeElem = typeinfo.elem + + if alias: + body = 'typedef ' + alias + ' ' + typeName + ';\n' + else: + body = '' + (protect_begin, protect_end) = self.genProtectString(typeElem.get('protect')) + if protect_begin: + body += protect_begin + body += 'typedef ' + typeElem.get('category') + + # This is an OpenXR-specific alternative where aliasing refers + # to an inheritance hierarchy of types rather than C-level type + # aliases. + if self.genOpts.genAliasMacro and self.typeMayAlias(typeName): + body += ' ' + self.genOpts.aliasMacro + + body += ' ' + typeName + ' {\n' + + targetLen = self.getMaxCParamTypeLength(typeinfo) + for member in typeElem.findall('.//member'): + body += self.makeCParamDecl(member, targetLen + 4) + body += ';\n' + body += '} ' + typeName + ';\n' + if protect_end: + body += protect_end + + self.appendSection('struct', body) + + def genGroup(self, groupinfo, groupName, alias=None): + """Generate groups (e.g. C "enum" type). + + These are concatenated together with other types. + + If alias is not None, it is the name of another group type + which aliases this type; just generate that alias.""" + OutputGenerator.genGroup(self, groupinfo, groupName, alias) + groupElem = groupinfo.elem + + # After either enumerated type or alias paths, add the declaration + # to the appropriate section for the group being defined. + if groupElem.get('type') == 'bitmask': + section = 'bitmask' + else: + section = 'group' + + if alias: + # If the group name is aliased, just emit a typedef declaration + # for the alias. + body = 'typedef ' + alias + ' ' + groupName + ';\n' + self.appendSection(section, body) + else: + (section, body) = self.buildEnumCDecl(self.genOpts.genEnumBeginEndRange, groupinfo, groupName) + self.appendSection(section, "\n" + body) + + def genEnum(self, enuminfo, name, alias): + """Generate the C declaration for a constant (a single value).""" + + OutputGenerator.genEnum(self, enuminfo, name, alias) + + body = self.buildConstantCDecl(enuminfo, name, alias) + self.appendSection('enum', body) + + def genCmd(self, cmdinfo, name, alias): + "Command generation" + OutputGenerator.genCmd(self, cmdinfo, name, alias) + + # if alias: + # prefix = '// ' + name + ' is an alias of command ' + alias + '\n' + # else: + # prefix = '' + + prefix = '' + decls = self.makeCDecls(cmdinfo.elem) + self.appendSection('command', prefix + decls[0] + '\n') + if self.genOpts.genFuncPointers: + self.appendSection('commandPointer', decls[1]) + + def misracstyle(self): + return self.genOpts.misracstyle; + + def misracppstyle(self): + return self.genOpts.misracppstyle; diff --git a/xml/checklinks.py b/xml/checklinks.py new file mode 100644 index 000000000..94b650dad --- /dev/null +++ b/xml/checklinks.py @@ -0,0 +1,71 @@ +#!/usr/bin/python3 +# +# Copyright 2013-2023 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import os +import re + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-d', action='store', dest='directory', + default='../api', + help='Directory containing files to check') + parser.add_argument('--unlinked', action='store_true', + help='Check for unlinked APIs and enums (may have false positives!)') + + args = parser.parse_args() + + links = set() + anchors = set() + + for filename in os.listdir(args.directory): + filename = args.directory + '/' + filename + sourcefile = open(filename, 'r') + sourcetext = sourcefile.read() + sourcefile.close() + + # We're not going to check API links. + #filelinks = re.findall(r"{((cl\w+)|(CL\w+))}", sourcetext) + filelinks = re.findall(r"{((CL\w+))}", sourcetext) + fileanchors = re.findall(r"{((cl\w+)|(CL\w+))_anchor}", sourcetext) + + filelinks = [re.sub(r"_anchor\b", "", link[0]) for link in filelinks] + fileanchors = [anchor[0] for anchor in fileanchors] + + links = links.union(set(filelinks) - set(fileanchors)) + anchors = anchors.union(set(fileanchors)) + + #print("=== " + filename) + #print("links:") + #print(' '.join(filelinks)) + #print("anchors:") + #print(' '.join(fileanchors)) + + if args.unlinked: + # Look for APIs and enums that do not begin with: + # { = asciidoctor attribute link + # character = middle of word + # < = asciidoctor link + # ' = refpage description + # / = proto include + fileunlinkedapi = sorted(list(set(re.findall(r"[^{\w<'/](cl[A-Z]\w+)\b[^'](?!.')", sourcetext)))) + fileunlinkedenums = sorted(list(set(re.findall("r[^{\w<](CL_\w+)", sourcetext)))) + fileunlinkedtypes = sorted(list(set(re.findall("r[^{\w<](cl_\w+)", sourcetext)))) + + if len(fileunlinkedapi) != 0: + print("unlinked APIs in " + filename + ":\n\t" + '\n\t'.join(fileunlinkedapi)) + + if len(fileunlinkedenums) != 0: + print("unlinked enums in " + filename + ":\n\t" + '\n\t'.join(fileunlinkedenums)) + + if len(fileunlinkedtypes) != 0: + print("unlinked types in " + filename + ":\n\t" + '\n\t'.join(fileunlinkedtypes)) + + linkswithoutanchors = sorted(list(links - anchors)) + anchorswithoutlinks = sorted(list(anchors - links)) + + print("links without anchors:\n\t" + '\n\t'.join(linkswithoutanchors)) + #print("anchors without links:\n\t" + '\n\t'.join(anchorswithoutlinks)) diff --git a/xml/cl.xml b/xml/cl.xml index 063e2e3f2..283b068d5 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -252,7 +252,7 @@ server's OpenCL/api-docs repository. typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; - typedef cl_bitfield cl_mutable_dispatch_promises_khr + typedef cl_bitfield cl_mutable_dispatch_asserts_khr Structure types @@ -1344,8 +1344,8 @@ server's OpenCL/api-docs repository. - - + + @@ -1783,7 +1783,7 @@ server's OpenCL/api-docs repository. - + @@ -7282,7 +7282,7 @@ server's OpenCL/api-docs repository. - + @@ -7319,8 +7319,8 @@ server's OpenCL/api-docs repository. - - + + diff --git a/xml/clconventions.py b/xml/clconventions.py new file mode 100644 index 000000000..f4df49d2d --- /dev/null +++ b/xml/clconventions.py @@ -0,0 +1,241 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# Working-group-specific style conventions, +# used in generation. + +import re + +from conventions import ConventionsBase + + +class OpenCLConventions(ConventionsBase): + def formatExtension(self, name): + """Mark up a name as an extension for the spec.""" + return '`<<{}>>`'.format(name) + + @property + def null(self): + """Preferred spelling of NULL.""" + return '`NULL`' + + @property + def constFlagBits(self): + """Returns True if static const flag bits should be generated, False if an enumerated type should be generated.""" + return False + + @property + def struct_macro(self): + return 'sname:' + + @property + def external_macro(self): + return 'code:' + + @property + def structtype_member_name(self): + """Return name of the structure type member""" + return 'sType' + + @property + def nextpointer_member_name(self): + """Return name of the structure pointer chain member""" + return 'pNext' + + @property + def valid_pointer_prefix(self): + """Return prefix to pointers which must themselves be valid""" + return 'valid' + + def is_structure_type_member(self, paramtype, paramname): + """Determine if member type and name match the structure type member.""" + return False + + def is_nextpointer_member(self, paramtype, paramname): + """Determine if member type and name match the next pointer chain member.""" + return paramtype == 'void' and paramname == self.nextpointer_member_name + + def generate_structure_type_from_name(self, structname): + """Generate a structure type name token from a structure name. + This should never be called for OpenCL, just other APIs.""" + return '' + + @property + def warning_comment(self): + """Return warning comment to be placed in header of generated Asciidoctor files""" + return '// WARNING: DO NOT MODIFY! This file is automatically generated from the cl.xml registry' + + @property + def file_suffix(self): + """Return suffix of generated Asciidoctor files""" + return '.txt' + + def api_name(self, spectype='api'): + """Return API or specification name for citations in ref pages.ref + pages should link to for + + spectype is the spec this refpage is for: 'api' is the OpenCL API + Specification, 'clang' is the OpenCL C Language specification. + Defaults to 'api'. If an unrecognized spectype is given, returns + None. + """ + if spectype == 'api' or spectype is None: + return 'OpenCL' + elif spectype == 'clang': + return 'OpenCL C' + else: + return None + + @property + def xml_supported_name_of_api(self): + """Return the supported= attribute used in API XML""" + return 'opencl' + + @property + def api_prefix(self): + """Return API token prefix""" + return 'CL_' + + @property + def api_version_prefix(self): + """Return API core version token prefix""" + return 'CL_VERSION_' + + @property + def KHR_prefix(self): + """Return extension name prefix for KHR extensions""" + return 'cl_khr_' + + @property + def EXT_prefix(self): + """Return extension name prefix for EXT extensions""" + return 'cl_ext_' + + @property + def write_contacts(self): + """Return whether contact list should be written to extension appendices""" + return True + + @property + def write_refpage_include(self): + """Return whether refpage include should be written to extension appendices""" + return False + + def writeFeature(self, featureExtraProtect, filename): + """Returns True if OutputGenerator.endFeature should write this feature. + Used in COutputGenerator + """ + return True + + def requires_error_validation(self, return_type): + """Returns True if the return_type element is an API result code + requiring error validation. + """ + return False + + @property + def required_errors(self): + """Return a list of required error codes for validation.""" + return [] + + def is_externsync_command(self, protoname): + """Returns True if the protoname element is an API command requiring + external synchronization + """ + return False + + def is_api_name(self, name): + """Returns True if name is in the reserved API namespace. + For OpenCL, these are names with a case-insensitive 'cl' prefix. + """ + return name[0:2].lower() == 'cl' + + def is_voidpointer_alias(self, tag, text, tail): + """Return True if the declaration components (tag,text,tail) of an + element represents a void * type + """ + return tag == 'type' and text == 'void' and tail.startswith('*') + + def make_voidpointer_alias(self, tail): + """Reformat a void * declaration to include the API alias macro. + Vulkan doesn't have an API alias macro, so do nothing. + """ + return tail + + def specURL(self, spectype = 'api'): + """Return public registry URL which ref pages should link to for + full Specification, so xrefs in the asciidoc source that aren't + to ref pages can link into it instead. + + spectype is the spec this refpage is for: 'api' is the OpenCL API + Specification, 'clang' is the OpenCL C Language specification. + Defaults to 'api'. If an unrecognized spectype is given, returns + None. + """ + if spectype == 'api' or spectype is None: + return 'https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html' + elif spectype == 'clang': + return 'https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_C.html' + else: + return None + + @property + def xml_api_name(self): + """Return the name used in the default API XML registry for the default API""" + return 'opencl' + + @property + def registry_path(self): + """Return relpath to the default API XML registry in this project.""" + return 'xml/cl.xml' + + @property + def specification_path(self): + """Return relpath to the Asciidoctor specification sources in this project.""" + return '../appendices/meta' + + @property + def extra_refpage_headers(self): + """Return any extra text to add to refpage headers.""" + return 'include::{config}/attribs.txt[]\n' + \ + 'include::{config}/opencl.asciidoc[]\n' + \ + 'include::{apispec}/footnotes.asciidoc[]\n' + \ + 'include::{cspec}/footnotes.asciidoc[]\n' + \ + 'include::{cspec}/feature-dictionary.asciidoc[]\n' + \ + 'include::{generated}/api/api-dictionary-no-links.asciidoc[]' + + @property + def extension_index_prefixes(self): + """Return a list of extension prefixes used to group extension refpages.""" + return ['cl_khr', 'cl_ext', 'cl'] + + @property + def unified_flag_refpages(self): + """Return True if Flags/FlagBits refpages are unified, False if + they're separate. + """ + return False + + @property + def spec_reflow_path(self): + """Return the relative path to the spec source folder to reflow""" + return '.' + + @property + def spec_no_reflow_dirs(self): + """Return a set of directories not to automatically descend into + when reflowing spec text + """ + return ('scripts', 'style') + + @property + def should_skip_checking_codes(self): + """Return True if more than the basic validation of return codes should + be skipped for a command. + + OpenCL has a different style of error handling than OpenXR or + Vulkan, so these checks are not appropriate.""" + + return True diff --git a/xml/conventions.py b/xml/conventions.py new file mode 100644 index 000000000..6b6b23d14 --- /dev/null +++ b/xml/conventions.py @@ -0,0 +1,358 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +# Base class for working-group-specific style conventions, +# used in generation. + +from enum import Enum + +# Type categories that respond "False" to isStructAlwaysValid +# basetype is home to typedefs like ..Bool32 +CATEGORIES_REQUIRING_VALIDATION = set(('handle', + 'enum', + 'bitmask', + 'basetype', + None)) + +# These are basic C types pulled in via openxr_platform_defines.h +TYPES_KNOWN_ALWAYS_VALID = set(('char', + 'float', + 'int8_t', 'uint8_t', + 'int32_t', 'uint32_t', + 'int64_t', 'uint64_t', + 'size_t', + 'uintptr_t', + 'int', + )) + + +class ProseListFormats(Enum): + """A connective, possibly with a quantifier.""" + AND = 0 + EACH_AND = 1 + OR = 2 + ANY_OR = 3 + + @classmethod + def from_string(cls, s): + if s == 'or': + return cls.OR + if s == 'and': + return cls.AND + return None + + @property + def connective(self): + if self in (ProseListFormats.OR, ProseListFormats.ANY_OR): + return 'or' + return 'and' + + def quantifier(self, n): + """Return the desired quantifier for a list of a given length.""" + if self == ProseListFormats.ANY_OR: + if n > 1: + return 'any of ' + elif self == ProseListFormats.EACH_AND: + if n > 2: + return 'each of ' + if n == 2: + return 'both of ' + return '' + + +class ConventionsBase: + """WG-specific conventions.""" + + def __init__(self): + self._command_prefix = None + self._type_prefix = None + + def formatExtension(self, name): + """Mark up an extension name as a link the spec.""" + return '`apiext:{}`'.format(name) + + @property + def null(self): + """Preferred spelling of NULL.""" + raise NotImplementedError + + def makeProseList(self, elements, fmt=ProseListFormats.AND, with_verb=False, *args, **kwargs): + """Make a (comma-separated) list for use in prose. + + Adds a connective (by default, 'and') + before the last element if there are more than 1. + + Adds the right one of "is" or "are" to the end if with_verb is true. + + Optionally adds a quantifier (like 'any') before a list of 2 or more, + if specified by fmt. + + Override with a different method or different call to + _implMakeProseList if you want to add a comma for two elements, + or not use a serial comma. + """ + return self._implMakeProseList(elements, fmt, with_verb, *args, **kwargs) + + @property + def struct_macro(self): + """Get the appropriate format macro for a structure. + + May override. + """ + return 'slink:' + + @property + def external_macro(self): + """Get the appropriate format macro for an external type like uint32_t. + + May override. + """ + return 'code:' + + def makeStructName(self, name): + """Prepend the appropriate format macro for a structure to a structure type name. + + Uses struct_macro, so just override that if you want to change behavior. + """ + return self.struct_macro + name + + def makeExternalTypeName(self, name): + """Prepend the appropriate format macro for an external type like uint32_t to a type name. + + Uses external_macro, so just override that if you want to change behavior. + """ + return self.external_macro + name + + def _implMakeProseList(self, elements, fmt, with_verb, comma_for_two_elts=False, serial_comma=True): + """Internal-use implementation to make a (comma-separated) list for use in prose. + + Adds a connective (by default, 'and') + before the last element if there are more than 1, + and only includes commas if there are more than 2 + (if comma_for_two_elts is False). + + Adds the right one of "is" or "are" to the end if with_verb is true. + + Optionally adds a quantifier (like 'any') before a list of 2 or more, + if specified by fmt. + + Don't edit these defaults, override self.makeProseList(). + """ + assert(serial_comma) # didn't implement what we didn't need + if isinstance(fmt, str): + fmt = ProseListFormats.from_string(fmt) + + my_elts = list(elements) + if len(my_elts) > 1: + my_elts[-1] = '{} {}'.format(fmt.connective, my_elts[-1]) + + if not comma_for_two_elts and len(my_elts) <= 2: + prose = ' '.join(my_elts) + else: + prose = ', '.join(my_elts) + + quantifier = fmt.quantifier(len(my_elts)) + + parts = [quantifier, prose] + + if with_verb: + if len(my_elts) > 1: + parts.append(' are') + else: + parts.append(' is') + return ''.join(parts) + + @property + def file_suffix(self): + """Return suffix of generated Asciidoctor files""" + raise NotImplementedError + + def api_name(self, spectype=None): + """Return API or specification name for citations in ref pages. + + spectype is the spec this refpage is for. + 'api' (the default value) is the main API Specification. + If an unrecognized spectype is given, returns None. + + Must implement.""" + raise NotImplementedError + + def should_insert_may_alias_macro(self, genOpts): + """Return true if we should insert a "may alias" macro in this file. + + Only used by OpenXR right now.""" + return False + + @property + def command_prefix(self): + """Return the expected prefix of commands/functions. + + Implemented in terms of api_prefix.""" + if not self._command_prefix: + self._command_prefix = self.api_prefix[:].replace('_', '').lower() + return self._command_prefix + + @property + def type_prefix(self): + """Return the expected prefix of type names. + + Implemented in terms of command_prefix (and in turn, api_prefix).""" + if not self._type_prefix: + self._type_prefix = ''.join( + (self.command_prefix[0:1].upper(), self.command_prefix[1:])) + return self._type_prefix + + @property + def api_prefix(self): + """Return API token prefix. + + Typically two uppercase letters followed by an underscore. + + Must implement.""" + raise NotImplementedError + + @property + def api_version_prefix(self): + """Return API core version token prefix. + + Implemented in terms of api_prefix. + + May override.""" + return self.api_prefix + 'VERSION_' + + @property + def KHR_prefix(self): + """Return extension name prefix for KHR extensions. + + Implemented in terms of api_prefix. + + May override.""" + return self.api_prefix + 'KHR_' + + @property + def EXT_prefix(self): + """Return extension name prefix for EXT extensions. + + Implemented in terms of api_prefix. + + May override.""" + return self.api_prefix + 'EXT_' + + def writeFeature(self, featureExtraProtect, filename): + """Return True if OutputGenerator.endFeature should write this feature. + + Defaults to always True. + Used in COutputGenerator. + + May override.""" + return True + + def requires_error_validation(self, return_type): + """Return True if the return_type element is an API result code + requiring error validation. + + Defaults to always False. + + May override.""" + return False + + @property + def required_errors(self): + """Return a list of required error codes for validation. + + Defaults to an empty list. + + May override.""" + return [] + + def is_voidpointer_alias(self, tag, text, tail): + """Return True if the declaration components (tag,text,tail) of an + element represents a void * type. + + Defaults to a reasonable implementation. + + May override.""" + return tag == 'type' and text == 'void' and tail.startswith('*') + + def make_voidpointer_alias(self, tail): + """Reformat a void * declaration to include the API alias macro. + + Defaults to a no-op. + + Must override if you actually want to use this feature in your project.""" + return tail + + def category_requires_validation(self, category): + """Return True if the given type 'category' always requires validation. + + Defaults to a reasonable implementation. + + May override.""" + return category in CATEGORIES_REQUIRING_VALIDATION + + def type_always_valid(self, typename): + """Return True if the given type name is always valid (never requires validation). + + This is for things like integers. + + Defaults to a reasonable implementation. + + May override.""" + return typename in TYPES_KNOWN_ALWAYS_VALID + + @property + def should_skip_checking_codes(self): + """Return True if more than the basic validation of return codes should + be skipped for a command.""" + + return False + + @property + def generate_index_terms(self): + """Return True if asiidoctor index terms should be generated as part + of an API interface from the docgenerator.""" + + return False + + @property + def generate_enum_table(self): + """Return True if asciidoctor tables describing enumerants in a + group should be generated as part of group generation.""" + return False + + @property + def generate_max_enum_in_docs(self): + """Return True if MAX_ENUM tokens should be generated in + documentation includes.""" + return False + + + def extension_include_string(self, ext): + """Return format string for include:: line for an extension appendix + file. ext is an object with the following members: + - name - extension string string + - vendor - vendor portion of name + - barename - remainder of name + + Must implement.""" + raise NotImplementedError + + @property + def refpage_generated_include_path(self): + """Return path relative to the generated reference pages, to the + generated API include files. + + Must implement.""" + raise NotImplementedError + + def valid_flag_bit(self, bitpos): + """Return True if bitpos is an allowed numeric bit position for + an API flag. + + Behavior depends on the data type used for flags (which may be 32 + or 64 bits), and may depend on assumptions about compiler + handling of sign bits in enumerated types, as well.""" + return True diff --git a/xml/docgenerator.py b/xml/docgenerator.py new file mode 100644 index 000000000..073552534 --- /dev/null +++ b/xml/docgenerator.py @@ -0,0 +1,454 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +from generator import GeneratorOptions, OutputGenerator, noneStr, write + +ENUM_TABLE_PREFIX = """ +[cols=",",options="header",] +|======================================================================= +|Enum |Description""" + +ENUM_TABLE_SUFFIX = """|=======================================================================""" + +FLAG_BLOCK_PREFIX = """.Flag Descriptions +****""" + +FLAG_BLOCK_SUFFIX = """****""" + + +class DocGeneratorOptions(GeneratorOptions): + """DocGeneratorOptions - subclass of GeneratorOptions for + generating declaration snippets for the spec. + + Shares many members with CGeneratorOptions, since + both are writing C-style declarations.""" + + def __init__(self, + prefixText="", + apicall='', + apientry='', + apientryp='', + indentFuncProto=True, + indentFuncPointer=False, + alignFuncParam=0, + secondaryInclude=False, + expandEnumerants=True, + extEnumerantAdditions=False, + extEnumerantFormatString=" (Added by the {} extension)", + **kwargs): + """Constructor. + + Since this generator outputs multiple files at once, + the filename is just a "stamp" to indicate last generation time. + + Shares many parameters/members with CGeneratorOptions, since + both are writing C-style declarations: + + - prefixText - list of strings to prefix generated header with + (usually a copyright statement + calling convention macros). + - apicall - string to use for the function declaration prefix, + such as APICALL on Windows. + - apientry - string to use for the calling convention macro, + in typedefs, such as APIENTRY. + - apientryp - string to use for the calling convention macro + in function pointer typedefs, such as APIENTRYP. + - indentFuncProto - True if prototype declarations should put each + parameter on a separate line + - indentFuncPointer - True if typedefed function pointers should put each + parameter on a separate line + - alignFuncParam - if nonzero and parameters are being put on a + separate line, align parameter names at the specified column + + Additional parameters/members: + + - expandEnumerants - if True, add BEGIN/END_RANGE macros in enumerated + type declarations + - secondaryInclude - if True, add secondary (no xref anchor) versions + of generated files + - extEnumerantAdditions - if True, include enumerants added by extensions + in comment tables for core enumeration types. + - extEnumerantFormatString - A format string for any additional message for + enumerants from extensions if extEnumerantAdditions is True. The correctly- + marked-up extension name will be passed. + """ + GeneratorOptions.__init__(self, **kwargs) + self.prefixText = prefixText + """list of strings to prefix generated header with (usually a copyright statement + calling convention macros).""" + + self.apicall = apicall + """string to use for the function declaration prefix, such as APICALL on Windows.""" + + self.apientry = apientry + """string to use for the calling convention macro, in typedefs, such as APIENTRY.""" + + self.apientryp = apientryp + """string to use for the calling convention macro in function pointer typedefs, such as APIENTRYP.""" + + self.indentFuncProto = indentFuncProto + """True if prototype declarations should put each parameter on a separate line""" + + self.indentFuncPointer = indentFuncPointer + """True if typedefed function pointers should put each parameter on a separate line""" + + self.alignFuncParam = alignFuncParam + """if nonzero and parameters are being put on a separate line, align parameter names at the specified column""" + + self.secondaryInclude = secondaryInclude + """if True, add secondary (no xref anchor) versions of generated files""" + + self.expandEnumerants = expandEnumerants + """if True, add BEGIN/END_RANGE macros in enumerated type declarations""" + + self.extEnumerantAdditions = extEnumerantAdditions + """if True, include enumerants added by extensions in comment tables for core enumeration types.""" + + self.extEnumerantFormatString = extEnumerantFormatString + """A format string for any additional message for + enumerants from extensions if extEnumerantAdditions is True. The correctly- + marked-up extension name will be passed.""" + + +class DocOutputGenerator(OutputGenerator): + """DocOutputGenerator - subclass of OutputGenerator. + + Generates AsciiDoc includes with C-language API interfaces, for reference + pages and the corresponding specification. Similar to COutputGenerator, + but each interface is written into a different file as determined by the + options, only actual C types are emitted, and none of the boilerplate + preprocessor code is emitted.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Keep track of all extension numbers + self.extension_numbers = set() + + def beginFile(self, genOpts): + OutputGenerator.beginFile(self, genOpts) + + # This should be a separate conventions property rather than an + # inferred type name pattern for different APIs. + self.result_type = genOpts.conventions.type_prefix + "Result" + + def endFile(self): + OutputGenerator.endFile(self) + + def beginFeature(self, interface, emit): + # Start processing in superclass + OutputGenerator.beginFeature(self, interface, emit) + + # Decide if we're in a core or an + self.in_core = (interface.tag == 'feature') + + # Verify that each has a unique number during doc + # generation + # TODO move this to consistency_tools + if not self.in_core: + extension_number = interface.get('number') + if extension_number is not None and extension_number != "0": + if extension_number in self.extension_numbers: + self.logMsg('error', 'Duplicate extension number ', extension_number, ' detected in feature ', interface.get('name'), '\n') + exit(1) + else: + self.extension_numbers.add(extension_number) + + def endFeature(self): + # Finish processing in superclass + OutputGenerator.endFeature(self) + + def genRequirements(self, name, mustBeFound = True): + """Generate text showing what core versions and extensions introduce + an API. This relies on the map in api.py, which may be loaded at + runtime into self.apidict. If not present, no message is + generated. + + - name - name of the API + - mustBeFound - If True, when requirements for 'name' cannot be + determined, a warning comment is generated. + """ + + if self.apidict: + if name in self.apidict.requiredBy: + features = [] + for (base,dependency) in self.apidict.requiredBy[name]: + if dependency is not None: + features.append('{} with {}'.format(base, dependency)) + else: + features.append(base) + return '// Provided by {}\n'.format(', '.join(features)) + else: + if mustBeFound: + self.logMsg('warn', 'genRequirements: API {} not found'.format(name)) + return '' + else: + # No API dictionary available, return nothing + return '' + + def writeInclude(self, directory, basename, contents): + """Generate an include file. + + - directory - subdirectory to put file in + - basename - base name of the file + - contents - contents of the file (Asciidoc boilerplate aside)""" + # Create subdirectory, if needed + directory = self.genOpts.directory + '/' + directory + self.makeDir(directory) + + # Create file + filename = directory + '/' + basename + '.txt' + self.logMsg('diag', '# Generating include file:', filename) + fp = open(filename, 'w', encoding='utf-8') + + # Asciidoc anchor + write(self.genOpts.conventions.warning_comment, file=fp) + write('[[{0},{0}]]'.format(basename), file=fp) + + if self.genOpts.conventions.generate_index_terms: + index_terms = [] + if basename.startswith(self.conventions.command_prefix): + index_terms.append(basename[2:] + " (function)") + elif basename.startswith(self.conventions.type_prefix): + index_terms.append(basename[2:] + " (type)") + elif basename.startswith(self.conventions.api_prefix): + index_terms.append(basename[len(self.conventions.api_prefix):] + " (define)") + index_terms.append(basename) + write('indexterm:[{}]'.format(','.join(index_terms)), file=fp) + + write('[source,opencl]', file=fp) + write('----', file=fp) + write(contents, file=fp) + write('----', file=fp) + fp.close() + + if self.genOpts.secondaryInclude: + # Create secondary no cross-reference include file + filename = directory + '/' + basename + '.no-xref.txt' + self.logMsg('diag', '# Generating include file:', filename) + fp = open(filename, 'w', encoding='utf-8') + + # Asciidoc anchor + write(self.genOpts.conventions.warning_comment, file=fp) + write('// Include this no-xref version without cross reference id for multiple includes of same file', file=fp) + write('[source,opencl]', file=fp) + write('----', file=fp) + write(contents, file=fp) + write('----', file=fp) + fp.close() + + def writeTable(self, basename, values): + """Output a table of enumerants.""" + directory = Path(self.genOpts.directory) / 'enums' + self.makeDir(str(directory)) + + filename = str(directory / '{}.comments.txt'.format(basename)) + self.logMsg('diag', '# Generating include file:', filename) + + with open(filename, 'w', encoding='utf-8') as fp: + write(self.conventions.warning_comment, file=fp) + write(ENUM_TABLE_PREFIX, file=fp) + + for data in values: + write("|ename:{}".format(data['name']), file=fp) + write("|{}".format(data['comment']), file=fp) + + write(ENUM_TABLE_SUFFIX, file=fp) + + def writeFlagBox(self, basename, values): + """Output a box of flag bit comments.""" + directory = Path(self.genOpts.directory) / 'enums' + self.makeDir(str(directory)) + + filename = str(directory / '{}.comments.txt'.format(basename)) + self.logMsg('diag', '# Generating include file:', filename) + + with open(filename, 'w', encoding='utf-8') as fp: + write(self.conventions.warning_comment, file=fp) + write(FLAG_BLOCK_PREFIX, file=fp) + + for data in values: + write("* ename:{} -- {}".format(data['name'], + data['comment']), + file=fp) + + write(FLAG_BLOCK_SUFFIX, file=fp) + + def genType(self, typeinfo, name, alias): + """Generate type.""" + OutputGenerator.genType(self, typeinfo, name, alias) + typeElem = typeinfo.elem + # If the type is a struct type, traverse the embedded tags + # generating a structure. Otherwise, emit the tag text. + category = typeElem.get('category') + + if category in ('struct', 'union'): + # If the type is a struct type, generate it using the + # special-purpose generator. + self.genStruct(typeinfo, name, alias) + else: + body = self.genRequirements(name) + if alias: + # If the type is an alias, just emit a typedef declaration + body += 'typedef ' + alias + ' ' + name + ';\n' + self.writeInclude(OutputGenerator.categoryToPath[category], + name, body) + else: + # Replace tags with an APIENTRY-style string + # (from self.genOpts). Copy other text through unchanged. + # If the resulting text is an empty string, don't emit it. + body += noneStr(typeElem.text) + for elem in typeElem: + if elem.tag == 'apientry': + body += self.genOpts.apientry + noneStr(elem.tail) + else: + body += noneStr(elem.text) + noneStr(elem.tail) + + if body: + if category in OutputGenerator.categoryToPath: + self.writeInclude(OutputGenerator.categoryToPath[category], + name, body + '\n') + else: + self.logMsg('diag', '# NOT writing include file for type:', + name, '- bad category: ', category) + else: + self.logMsg('diag', '# NOT writing empty include file for type', name) + + def genStruct(self, typeinfo, typeName, alias): + """Generate struct.""" + OutputGenerator.genStruct(self, typeinfo, typeName, alias) + + typeElem = typeinfo.elem + + body = self.genRequirements(typeName) + if alias: + body += 'typedef ' + alias + ' ' + typeName + ';\n' + else: + body += 'typedef ' + typeElem.get('category') + ' ' + typeName + ' {\n' + + targetLen = self.getMaxCParamTypeLength(typeinfo) + for member in typeElem.findall('.//member'): + body += self.makeCParamDecl(member, targetLen + 4) + body += ';\n' + body += '} ' + typeName + ';' + + self.writeInclude('structs', typeName, body) + + def genEnumTable(self, groupinfo, groupName): + """Generate tables of enumerant values and short descriptions from + the XML.""" + + values = [] + got_comment = False + missing_comments = [] + for elem in groupinfo.elem.findall('enum'): + if not elem.get('required'): + continue + name = elem.get('name') + + data = { + 'name': name, + } + + (numVal, strVal) = self.enumToValue(elem, True) + data['value'] = numVal + + extname = elem.get('extname') + + added_by_extension_to_core = (extname is not None and self.in_core) + if added_by_extension_to_core and not self.genOpts.extEnumerantAdditions: + # We're skipping such values + continue + + comment = elem.get('comment') + if comment: + got_comment = True + elif name.endswith('_UNKNOWN') and numVal == 0: + # This is a placeholder for 0-initialization to be clearly invalid. + # Just skip this silently + continue + else: + # Skip but record this in case it's an odd-one-out missing a comment. + missing_comments.append(name) + continue + + if added_by_extension_to_core and self.genOpts.extEnumerantFormatString: + # Add a note to the comment + comment += self.genOpts.extEnumerantFormatString.format( + self.conventions.formatExtension(extname)) + + data['comment'] = comment + values.append(data) + + if got_comment: + # If any had a comment, output it. + + if missing_comments: + self.logMsg('warn', 'The following values for', groupName, + 'were omitted from the table due to missing comment attributes:', + ', '.join(missing_comments)) + + group_type = groupinfo.elem.get('type') + if groupName == self.result_type: + # Split this into success and failure + self.writeTable(groupName + '.success', + (data for data in values + if data['value'] >= 0)) + self.writeTable(groupName + '.error', + (data for data in values + if data['value'] < 0)) + elif group_type == 'bitmask': + self.writeFlagBox(groupName, values) + elif group_type == 'enum': + self.writeTable(groupName, values) + else: + raise RuntimeError("Unrecognized enums type: " + str(group_type)) + + def genGroup(self, groupinfo, groupName, alias): + """Generate group (e.g. C "enum" type).""" + OutputGenerator.genGroup(self, groupinfo, groupName, alias) + + body = self.genRequirements(groupName) + if alias: + # If the group name is aliased, just emit a typedef declaration + # for the alias. + body += 'typedef ' + alias + ' ' + groupName + ';\n' + else: + expand = self.genOpts.expandEnumerants + (_, enumbody) = self.buildEnumCDecl(expand, groupinfo, groupName) + body += enumbody + if self.genOpts.conventions.generate_enum_table: + self.genEnumTable(groupinfo, groupName) + + self.writeInclude('enums', groupName, body) + + def genEnum(self, enuminfo, name, alias): + """Generate the C declaration for a constant (a single value).""" + + OutputGenerator.genEnum(self, enuminfo, name, alias) + + body = self.buildConstantCDecl(enuminfo, name, alias) + + self.writeInclude('enums', name, body) + + def genCmd(self, cmdinfo, name, alias): + "Generate command." + OutputGenerator.genCmd(self, cmdinfo, name, alias) + + return_type = cmdinfo.elem.find('proto/type') + if self.genOpts.conventions.requires_error_validation(return_type): + # This command returns an API result code, so check that it + # returns at least the required errors. + # TODO move this to consistency_tools + required_errors = set(self.genOpts.conventions.required_errors) + errorcodes = cmdinfo.elem.get('errorcodes').split(',') + if not required_errors.issubset(set(errorcodes)): + self.logMsg('error', 'Missing required error code for command: ', name, '\n') + exit(1) + + body = self.genRequirements(name) + decls = self.makeCDecls(cmdinfo.elem) + body += decls[0] + self.writeInclude('protos', name, body) diff --git a/xml/extensionmetadocgenerator.py b/xml/extensionmetadocgenerator.py new file mode 100644 index 000000000..d6243889d --- /dev/null +++ b/xml/extensionmetadocgenerator.py @@ -0,0 +1,659 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +import os +import re +import sys +from functools import total_ordering +from generator import GeneratorOptions, OutputGenerator, regSortFeatures, write + +class ExtensionMetaDocGeneratorOptions(GeneratorOptions): + """ExtensionMetaDocGeneratorOptions - subclass of GeneratorOptions. + + Represents options during extension metainformation generation for Asciidoc""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +EXT_NAME_DECOMPOSE_RE = re.compile(r'[A-Z]+_(?P[A-Z]+)_(?P[\w_]+)') + + +@total_ordering +class Extension: + def __init__(self, + generator, # needed for logging and API conventions + filename, + name, + number, + ext_type, + requires, + requiresCore, + contact, + promotedTo, + deprecatedBy, + obsoletedBy, + provisional, + revision, + specialuse ): + self.generator = generator + self.conventions = generator.genOpts.conventions + self.filename = filename + self.name = name + self.number = number + self.ext_type = ext_type + self.requires = requires + self.requiresCore = requiresCore + self.contact = contact + self.promotedTo = promotedTo + self.deprecatedBy = deprecatedBy + self.obsoletedBy = obsoletedBy + self.provisional = provisional + self.revision = revision + self.specialuse = specialuse + + self.deprecationType = None + self.supercedingAPIVersion = None + self.supercedingExtension = None + + if self.promotedTo is not None and self.deprecatedBy is not None and self.obsoletedBy is not None: + self.generator.logMsg('warn', 'All \'promotedto\', \'deprecatedby\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'promotedto\' and \'deprecatedby\'.') + elif self.promotedTo is not None and self.deprecatedBy is not None: + self.generator.logMsg('warn', 'Both \'promotedto\' and \'deprecatedby\' attributes used on extension ' + self.name + '! Ignoring \'deprecatedby\'.') + elif self.promotedTo is not None and self.obsoletedBy is not None: + self.generator.logMsg('warn', 'Both \'promotedto\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'promotedto\'.') + elif self.deprecatedBy is not None and self.obsoletedBy is not None: + self.generator.logMsg('warn', 'Both \'deprecatedby\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'deprecatedby\'.') + + supercededBy = None + if self.promotedTo is not None: + self.deprecationType = 'promotion' + supercededBy = promotedTo + elif self.deprecatedBy is not None: + self.deprecationType = 'deprecation' + supercededBy = deprecatedBy + elif self.obsoletedBy is not None: + self.deprecationType = 'obsoletion' + supercededBy = obsoletedBy + + if supercededBy is not None: + if supercededBy == '' and not self.deprecationType == 'promotion': + pass # supercedingAPIVersion, supercedingExtension is None + elif supercededBy.startswith(self.conventions.api_version_prefix): + self.supercedingAPIVersion = supercededBy + elif supercededBy.startswith(self.conventions.api_prefix): + self.supercedingExtension = supercededBy + else: + self.generator.logMsg('error', 'Unrecognized ' + self.deprecationType + ' attribute value \'' + supercededBy + '\'!') + + match = EXT_NAME_DECOMPOSE_RE.match(self.name) + self.vendor = match.group('tag') + self.bare_name = match.group('name') + + def __str__(self): + return self.name + def __eq__(self, other): + return self.name == other.name + def __ne__(self, other): + return self.name != other.name + + def __lt__(self, other): + self_is_KHR = self.name.startswith(self.conventions.KHR_prefix) + self_is_EXT = self.name.startswith(self.conventions.EXT_prefix) + other_is_KHR = other.name.startswith(self.conventions.KHR_prefix) + other_is_EXT = other.name.startswith(self.conventions.EXT_prefix) + + swap = False + if self_is_KHR and not other_is_KHR: + return not swap + if other_is_KHR and not self_is_KHR: + return swap + if self_is_EXT and not other_is_EXT: + return not swap + if other_is_EXT and not self_is_EXT: + return swap + + return self.name < other.name + + def typeToStr(self): + if self.ext_type == 'instance': + return 'Instance extension' + if self.ext_type == 'device': + return 'Device extension' + + if self.ext_type is not None: + self.generator.logMsg('warn', 'The type attribute of ' + self.name + ' extension is neither \'instance\' nor \'device\'. That is invalid (at the time this script was written).') + else: # should be unreachable + self.generator.logMsg('error', 'Logic error in typeToStr(): Missing type attribute!') + return None + + def specLink(self, xrefName, xrefText, isRefpage = False): + """Generate a string containing a link to a specification anchor in + asciidoctor markup form. + + - xrefName - anchor name in the spec + - xrefText - text to show for the link, or None + - isRefpage = True if generating a refpage include, False if + generating a specification extension appendix include""" + + if isRefpage: + # Always link into API spec + specURL = self.conventions.specURL('api') + return 'link:{}#{}[{}^]'.format(specURL, xrefName, xrefText) + else: + return '<<' + xrefName + ', ' + xrefText + '>>' + + def conditionalLinkCoreAPI(self, apiVersion, linkSuffix, isRefpage): + versionMatch = re.match(self.conventions.api_version_prefix + r'(\d+)_(\d+)', apiVersion) + major = versionMatch.group(1) + minor = versionMatch.group(2) + + dottedVersion = major + '.' + minor + + xrefName = 'versions-' + dottedVersion + linkSuffix + xrefText = self.conventions.api_name() + ' ' + dottedVersion + + doc = 'ifdef::' + apiVersion + '[]\n' + doc += ' ' + self.specLink(xrefName, xrefText, isRefpage) + '\n' + doc += 'endif::' + apiVersion + '[]\n' + doc += 'ifndef::' + apiVersion + '[]\n' + doc += ' ' + self.conventions.api_name() + ' ' + dottedVersion + '\n' + doc += 'endif::' + apiVersion + '[]\n' + + return doc + + def conditionalLinkExt(self, extName, indent = ' '): + doc = 'ifdef::' + extName + '[]\n' + doc += indent + self.conventions.formatExtension(extName) + '\n' + doc += 'endif::' + extName + '[]\n' + doc += 'ifndef::' + extName + '[]\n' + doc += indent + '`' + extName + '`\n' + doc += 'endif::' + extName + '[]\n' + + return doc + + def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): + ext = next(x for x in extensionsList if x.name == succeededBy) + + if ext.deprecationType: + if ext.deprecationType == 'promotion': + if ext.supercedingAPIVersion: + write(' ** Which in turn was _promoted_ to\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-promotions', isRefpage), file=file) + else: # ext.supercedingExtension + write(' ** Which in turn was _promoted_ to extension\n' + ext.conditionalLinkExt(ext.supercedingExtension), file=file) + ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + elif ext.deprecationType == 'deprecation': + if ext.supercedingAPIVersion: + write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) + elif ext.supercedingExtension: + write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) + ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + else: + write(' ** Which in turn was _deprecated_ without replacement', file=file) + elif ext.deprecationType == 'obsoletion': + if ext.supercedingAPIVersion: + write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) + elif ext.supercedingExtension: + write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) + ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + else: + write(' ** Which in turn was _obsoleted_ without replacement', file=file) + else: # should be unreachable + self.generator.logMsg('error', 'Logic error in resolveDeprecationChain(): deprecationType is neither \'promotion\', \'deprecation\' nor \'obsoletion\'!') + + + def writeTag(self, tag, value, isRefpage, fp): + """Write a tag and (if non-None) a tag value to a file. + + - tag - string tag name + - value - tag value, or None + - isRefpage - controls style in which the tag is marked up + - fp - open file pointer to write to""" + + if isRefpage: + # Use subsection headers for the tag name + tagPrefix = '== ' + tagSuffix = '' + else: + # Use an bolded item list for the tag name + tagPrefix = '*' + tagSuffix = '*::' + + write(tagPrefix + tag + tagSuffix, file=fp) + if value is not None: + write(value, file=fp) + + if isRefpage: + write('', file=fp) + + def makeMetafile(self, extensionsList, isRefpage = False): + """Generate a file containing extension metainformation in + asciidoctor markup form. + + - extensionsList - list of extensions spec is being generated against + - isRefpage - True if generating a refpage include, False if + generating a specification extension appendix include""" + + if isRefpage: + filename = self.filename.replace('meta/', 'meta/refpage.') + else: + filename = self.filename + + fp = self.generator.newFile(filename) + + if not isRefpage: + write('[[' + self.name + ']]', file=fp) + write('=== ' + self.name, file=fp) + write('', file=fp) + + self.writeTag('Name String', '`' + self.name + '`', isRefpage, fp) + self.writeTag('Extension Type', self.typeToStr(), isRefpage, fp) + + self.writeTag('Registered Extension Number', self.number, isRefpage, fp) + self.writeTag('Revision', self.revision, isRefpage, fp) + + # Only API extension dependencies are coded in XML, others are explicit + self.writeTag('Extension and Version Dependencies', None, isRefpage, fp) + + write(' * Requires ' + self.conventions.api_name() + ' ' + self.requiresCore, file=fp) + if self.requires: + for dep in self.requires.split(','): + write(' * Requires', self.conventions.formatExtension(dep), + file=fp) + if self.provisional == 'true': + write(' * *This is a _provisional_ extension and must: be used with caution.', file=fp) + write(' See the ' + + self.specLink(xrefName = 'boilerplate-provisional-header', + xrefText = 'description', + isRefpage = isRefpage) + + ' of provisional header files for enablement and stability details.*', file=fp) + write('', file=fp) + + if self.deprecationType: + self.writeTag('Deprecation state', None, isRefpage, fp) + + if self.deprecationType == 'promotion': + if self.supercedingAPIVersion: + write(' * _Promoted_ to\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-promotions', isRefpage), file=fp) + else: # ext.supercedingExtension + write(' * _Promoted_ to\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension', file=fp) + self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + elif self.deprecationType == 'deprecation': + if self.supercedingAPIVersion: + write(' * _Deprecated_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) + elif self.supercedingExtension: + write(' * _Deprecated_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) + self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + else: + write(' * _Deprecated_ without replacement' , file=fp) + elif self.deprecationType == 'obsoletion': + if self.supercedingAPIVersion: + write(' * _Obsoleted_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) + elif self.supercedingExtension: + write(' * _Obsoleted_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) + self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + else: + # TODO: Does not make sense to retroactively ban use of extensions from 1.0. + # Needs some tweaks to the semantics and this message, when such extension(s) occur. + write(' * _Obsoleted_ without replacement' , file=fp) + else: # should be unreachable + self.generator.logMsg('error', 'Logic error in makeMetafile(): deprecationType is neither \'promotion\', \'deprecation\' nor \'obsoletion\'!') + write('', file=fp) + + if self.specialuse is not None: + specialuses = self.specialuse.split(',') + if len(specialuses) > 1: + header = 'Special Uses' + else: + header = 'Special Use' + self.writeTag(header, None, isRefpage, fp) + + for use in specialuses: + # Each specialuse attribute value expands an asciidoctor + # attribute of the same name, instead of using the shorter, + # and harder to understand attribute + write('* {}'.format( + self.specLink( + xrefName = self.conventions.special_use_section_anchor, + xrefText = '{' + use + '}', + isRefpage = isRefpage)), file=fp) + write('', file=fp) + + if self.conventions.write_contacts: + self.writeTag('Contact', None, isRefpage, fp) + + contacts = self.contact.split(',') + for contact in contacts: + contactWords = contact.strip().split() + name = ' '.join(contactWords[:-1]) + handle = contactWords[-1] + if handle.startswith('gitlab:'): + prettyHandle = 'icon:gitlab[alt=GitLab, role="red"]' + handle.replace('gitlab:@', '') + elif handle.startswith('@'): + issuePlaceholderText = '[' + self.name + '] ' + handle + issuePlaceholderText += '%0A<>' + trackerLink = 'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body=' + issuePlaceholderText + '++' + prettyHandle = trackerLink + '[icon:github[alt=GitHub,role="black"]' + handle[1:] + ', window=_blank]' + else: + prettyHandle = handle + + write(' * ' + name + ' ' + prettyHandle, file=fp) + write('', file=fp) + + # Check if a proposal document for this extension exists in the + # current repository, and link to the same document (parameterized + # by a URL prefix attribute) if it does. + # The assumption is that a proposal document for an extension + # VK_name will be located in 'proposals/VK_name.asciidoc' relative + # to the repository root, and that this script will be invoked from + # the repository root. + path = 'proposals/{}.asciidoc'.format(self.name) + if os.path.exists(path) and os.access(path, os.R_OK): + self.writeTag('Extension Proposal', + 'link:{{specRepositoryURL}}/{}[{}]'.format(path, self.name), isRefpage, fp) + + fp.close() + +class ExtensionMetaDocOutputGenerator(OutputGenerator): + """ExtensionMetaDocOutputGenerator - subclass of OutputGenerator. + + Generates AsciiDoc includes with metainformation for the API extension + appendices. The fields used from tags in the API XML are: + + - name extension name string + - number extension number (optional) + - contact name and GitHub login or email address (optional) + - type 'instance' | 'device' (optional) + - requires list of comma-separated required API extensions (optional) + - requiresCore required core version of API (optional) + - promotedTo extension or API version it was promoted to + - deprecatedBy extension or API version which deprecated this extension, + or empty string if deprecated without replacement + - obsoletedBy extension or API version which obsoleted this extension, + or empty string if obsoleted without replacement + - provisional 'true' if this extension is released provisionally""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.extensions = [] + # List of strings containing all vendor tags + self.vendor_tags = [] + self.file_suffix = '' + + def newFile(self, filename): + self.logMsg('diag', '# Generating include file:', filename) + fp = open(filename, 'w', encoding='utf-8') + write(self.genOpts.conventions.warning_comment, file=fp) + return fp + + def beginFile(self, genOpts): + OutputGenerator.beginFile(self, genOpts) + + self.directory = self.genOpts.directory + self.file_suffix = self.genOpts.conventions.file_suffix + + # Iterate over all 'tag' Elements and add the names of all the valid vendor + # tags to the list + root = self.registry.tree.getroot() + for tag in root.findall('tags/tag'): + self.vendor_tags.append(tag.get('name')) + + # Create subdirectory, if needed + self.makeDir(self.directory) + + def conditionalExt(self, extName, content, ifdef = None, condition = None): + doc = '' + + innerdoc = 'ifdef::' + extName + '[]\n' + innerdoc += content + '\n' + innerdoc += 'endif::' + extName + '[]\n' + + if ifdef: + if ifdef == 'ifndef': + if condition: + doc += 'ifndef::' + condition + '[]\n' + doc += innerdoc + doc += 'endif::' + condition + '[]\n' + else: # no condition is as if condition is defined; "nothing" is always defined :p + pass # so no output + elif ifdef == 'ifdef': + if condition: + doc += 'ifdef::' + condition + '+' + extName + '[]\n' + doc += content + '\n' # does not include innerdoc; the ifdef was merged with the one above + doc += 'endif::' + condition + '+' + extName + '[]\n' + else: # no condition is as if condition is defined; "nothing" is always defined :p + doc += innerdoc + else: # should be unreachable + raise RuntimeError('Should be unreachable: ifdef is neither \'ifdef \' nor \'ifndef\'!') + else: + doc += innerdoc + + return doc + + def makeExtensionInclude(self, ext): + return self.conventions.extension_include_string(ext) + + def endFile(self): + self.extensions.sort() + + # Generate metadoc extension files, in refpage and non-refpage form + for ext in self.extensions: + ext.makeMetafile(self.extensions, isRefpage = False) + if self.conventions.write_refpage_include: + ext.makeMetafile(self.extensions, isRefpage = True) + + # Generate list of promoted extensions + promotedExtensions = {} + for ext in self.extensions: + if ext.deprecationType == 'promotion' and ext.supercedingAPIVersion: + promotedExtensions.setdefault(ext.supercedingAPIVersion, []).append(ext) + + for coreVersion, extensions in promotedExtensions.items(): + promoted_extensions_fp = self.newFile(self.directory + '/promoted_extensions_' + coreVersion + self.file_suffix) + + for ext in extensions: + indent = '' + write(' * {blank}\n+\n' + ext.conditionalLinkExt(ext.name, indent), file=promoted_extensions_fp) + + promoted_extensions_fp.close() + + # Re-sort to match earlier behavior + # TODO: Remove this extra sort when re-arranging section order OK. + + def makeSortKey(ext): + name = ext.name.lower() + prefixes = self.conventions.extension_index_prefixes + for i, prefix in enumerate(prefixes): + if ext.name.startswith(prefix): + return (i, name) + return (len(prefixes), name) + + self.extensions.sort(key=makeSortKey) + + # Generate include directives for the extensions appendix, grouping + # extensions by status (current, deprecated, provisional, etc.) + with self.newFile(self.directory + '/current_extensions_appendix' + self.file_suffix) as current_extensions_appendix_fp, \ + self.newFile(self.directory + '/deprecated_extensions_appendix' + self.file_suffix) as deprecated_extensions_appendix_fp, \ + self.newFile(self.directory + '/current_extension_appendices' + self.file_suffix) as current_extension_appendices_fp, \ + self.newFile(self.directory + '/current_extension_appendices_toc' + self.file_suffix) as current_extension_appendices_toc_fp, \ + self.newFile(self.directory + '/deprecated_extension_appendices' + self.file_suffix) as deprecated_extension_appendices_fp, \ + self.newFile(self.directory + '/deprecated_extension_appendices_toc' + self.file_suffix) as deprecated_extension_appendices_toc_fp, \ + self.newFile(self.directory + '/deprecated_extensions_guard_macro' + self.file_suffix) as deprecated_extensions_guard_macro_fp, \ + self.newFile(self.directory + '/provisional_extensions_appendix' + self.file_suffix) as provisional_extensions_appendix_fp, \ + self.newFile(self.directory + '/provisional_extension_appendices' + self.file_suffix) as provisional_extension_appendices_fp, \ + self.newFile(self.directory + '/provisional_extension_appendices_toc' + self.file_suffix) as provisional_extension_appendices_toc_fp, \ + self.newFile(self.directory + '/provisional_extensions_guard_macro' + self.file_suffix) as provisional_extensions_guard_macro_fp: + + write('', file=current_extensions_appendix_fp) + write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('', file=current_extensions_appendix_fp) + write('ifndef::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) + write('[[extension-appendices-list]]', file=current_extensions_appendix_fp) + write('== List of Extensions', file=current_extensions_appendix_fp) + write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) + write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) + write('[[extension-appendices-list]]', file=current_extensions_appendix_fp) + write('== List of Current Extensions', file=current_extensions_appendix_fp) + write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) + write('', file=current_extensions_appendix_fp) + write('include::current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('\n<<<\n', file=current_extensions_appendix_fp) + write('include::current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + + write('', file=deprecated_extensions_appendix_fp) + write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('', file=deprecated_extensions_appendix_fp) + write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) + write('[[deprecated-extension-appendices-list]]', file=deprecated_extensions_appendix_fp) + write('== List of Deprecated Extensions', file=deprecated_extensions_appendix_fp) + write('include::deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('\n<<<\n', file=deprecated_extensions_appendix_fp) + write('include::deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) + + # add include guards to allow multiple includes + write('ifndef::DEPRECATED_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=deprecated_extensions_guard_macro_fp) + write(':DEPRECATED_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD:\n', file=deprecated_extensions_guard_macro_fp) + write('ifndef::PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=provisional_extensions_guard_macro_fp) + write(':PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD:\n', file=provisional_extensions_guard_macro_fp) + + write('', file=provisional_extensions_appendix_fp) + write('include::provisional_extensions_guard_macro' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('', file=provisional_extensions_appendix_fp) + write('ifdef::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) + write('[[provisional-extension-appendices-list]]', file=provisional_extensions_appendix_fp) + write('== List of Provisional Extensions', file=provisional_extensions_appendix_fp) + write('include::provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('\n<<<\n', file=provisional_extensions_appendix_fp) + write('include::provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('endif::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) + + for ext in self.extensions: + include = self.makeExtensionInclude(ext) + link = ' * ' + self.conventions.formatExtension(ext.name) + if ext.provisional == 'true': + write(self.conditionalExt(ext.name, include), file=provisional_extension_appendices_fp) + write(self.conditionalExt(ext.name, link), file=provisional_extension_appendices_toc_fp) + write(self.conditionalExt(ext.name, ':HAS_PROVISIONAL_EXTENSIONS:'), file=provisional_extensions_guard_macro_fp) + elif ext.deprecationType is None: + write(self.conditionalExt(ext.name, include), file=current_extension_appendices_fp) + write(self.conditionalExt(ext.name, link), file=current_extension_appendices_toc_fp) + else: + condition = ext.supercedingAPIVersion if ext.supercedingAPIVersion else ext.supercedingExtension # potentially None too + + write(self.conditionalExt(ext.name, include, 'ifndef', condition), file=current_extension_appendices_fp) + write(self.conditionalExt(ext.name, link, 'ifndef', condition), file=current_extension_appendices_toc_fp) + + write(self.conditionalExt(ext.name, include, 'ifdef', condition), file=deprecated_extension_appendices_fp) + write(self.conditionalExt(ext.name, link, 'ifdef', condition), file=deprecated_extension_appendices_toc_fp) + + write(self.conditionalExt(ext.name, ':HAS_DEPRECATED_EXTENSIONS:', 'ifdef', condition), file=deprecated_extensions_guard_macro_fp) + + write('endif::DEPRECATED_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=deprecated_extensions_guard_macro_fp) + write('endif::PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=provisional_extensions_guard_macro_fp) + + OutputGenerator.endFile(self) + + def beginFeature(self, interface, emit): + # Start processing in superclass + OutputGenerator.beginFeature(self, interface, emit) + + if interface.tag != 'extension': + self.logMsg('diag', 'beginFeature: ignoring non-extension feature', self.featureName) + return + + # These attributes must exist + name = self.featureName + number = self.getAttrib(interface, 'number') + ext_type = self.getAttrib(interface, 'type') + revision = self.getSpecVersion(interface, name) + + # These attributes are optional + OPTIONAL = False + requires = self.getAttrib(interface, 'requires', OPTIONAL) + requiresCore = self.getAttrib(interface, 'requiresCore', OPTIONAL, '1.0') # TODO update this line with update_version.py + contact = self.getAttrib(interface, 'contact', OPTIONAL) + promotedTo = self.getAttrib(interface, 'promotedto', OPTIONAL) + deprecatedBy = self.getAttrib(interface, 'deprecatedby', OPTIONAL) + obsoletedBy = self.getAttrib(interface, 'obsoletedby', OPTIONAL) + provisional = self.getAttrib(interface, 'provisional', OPTIONAL, 'false') + specialuse = self.getAttrib(interface, 'specialuse', OPTIONAL) + + filename = self.directory + '/' + name + self.file_suffix + + extdata = Extension( + generator = self, + filename = filename, + name = name, + number = number, + ext_type = ext_type, + requires = requires, + requiresCore = requiresCore, + contact = contact, + promotedTo = promotedTo, + deprecatedBy = deprecatedBy, + obsoletedBy = obsoletedBy, + provisional = provisional, + revision = revision, + specialuse = specialuse) + self.extensions.append(extdata) + + + def endFeature(self): + # Finish processing in superclass + OutputGenerator.endFeature(self) + + def getAttrib(self, elem, attribute, required=True, default=None): + """Query an attribute from an element, or return a default value + + - elem - element to query + - attribute - attribute name + - required - whether attribute must exist + - default - default value if attribute not present""" + attrib = elem.get(attribute, default) + if required and (attrib is None): + name = elem.get('name', 'UNKNOWN') + self.logMsg('error', 'While processing \'' + self.featureName + ', <' + elem.tag + '> \'' + name + '\' does not contain required attribute \'' + attribute + '\'') + return attrib + + def numbersToWords(self, name): + allowlist = ['WIN32', 'INT16', 'D3D1'] + + # temporarily replace allowlist items + for i, w in enumerate(allowlist): + name = re.sub(w, '{' + str(i) + '}', name) + + name = re.sub(r'(?<=[A-Z])(\d+)(?![A-Z])', r'_\g<1>', name) + + # undo allowlist substitution + for i, w in enumerate(allowlist): + name = re.sub('\\{' + str(i) + '}', w, name) + + return name + + def getSpecVersion(self, elem, extname, default=None): + """Determine the extension revision from the EXTENSION_NAME_SPEC_VERSION + enumerant. + + - elem - element to query + - extname - extension name from the 'name' attribute + - default - default value if SPEC_VERSION token not present""" + # The literal enumerant name to match + versioningEnumName = self.numbersToWords(extname.upper()) + '_SPEC_VERSION' + + for enum in elem.findall('./require/enum'): + enumName = self.getAttrib(enum, 'name') + if enumName == versioningEnumName: + return self.getAttrib(enum, 'value') + + #if not found: + for enum in elem.findall('./require/enum'): + enumName = self.getAttrib(enum, 'name') + if enumName.find('SPEC_VERSION') != -1: + self.logMsg('diag', 'Missing ' + versioningEnumName + '! Potential misnamed candidate ' + enumName + '.') + return self.getAttrib(enum, 'value') + + self.logMsg('error', 'Missing ' + versioningEnumName + '!') + return default diff --git a/xml/genRef.py b/xml/genRef.py new file mode 100644 index 000000000..87d8d7a60 --- /dev/null +++ b/xml/genRef.py @@ -0,0 +1,1019 @@ +#!/usr/bin/python3 +# +# Copyright 2016-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +# genRef.py - create API ref pages from spec source files +# +# Usage: genRef.py files + +import argparse +import io +import os +import re +import sys +from collections import OrderedDict +from reflib import (findRefs, fixupRefs, loadFile, logDiag, logWarn, + printPageInfo, setLogFile) +from reg import Registry +from clconventions import OpenCLConventions as APIConventions + + +def makeExtensionInclude(name): + """Return an include command, given an extension name.""" + return 'include::{}/refpage.{}{}[]'.format( + conventions.specification_path, + name, + conventions.file_suffix) + + +def makeAPIInclude(type, name): + """Return an include command for a generated API interface + - type - type of the API, e.g. 'flags', 'handles', etc + - name - name of the API""" + + return 'include::{}/api/{}/{}{}\n'.format( + conventions.refpage_generated_include_path, + type, name, conventions.file_suffix) + + +def isextension(name): + """Return True if name is an API extension name (ends with an upper-case + author ID). + + This assumes that author IDs are at least two characters.""" + return name[-2:].isalpha() and name[-2:].isupper() + + +def printCopyrightSourceComments(fp): + """Print Khronos CC-BY copyright notice on open file fp. + + Writes an asciidoc comment block, which copyrights the source + file.""" + print('// Copyright 2014-2023 The Khronos Group, Inc.', file=fp) + print('//', file=fp) + # This works around constraints of the 'reuse' tool + print('// SPDX' + '-License-Identifier: CC-BY-4.0', file=fp) + print('', file=fp) + + +def printFooter(fp): + """Print footer material at the end of each refpage on open file fp. + + If generating separate refpages, adds the copyright. + If generating the single combined refpage, just add a separator.""" + + print('ifdef::doctype-manpage[]', + '== Copyright', + '', + 'include::{config}/copyright-ccby.txt[]', + 'endif::doctype-manpage[]', + '', + 'ifndef::doctype-manpage[]', + '<<<', + 'endif::doctype-manpage[]', + '', + sep='\n', file=fp) + + +def macroPrefix(name): + """Add a spec asciidoc macro prefix to an API name, depending on its type + (protos, structs, enums, etc.). + + If the name is not recognized, use the generic link macro 'reflink:'.""" + if name in api.basetypes: + return 'basetype:' + name + if name in api.defines: + return 'dlink:' + name + if name in api.enums: + return 'elink:' + name + if name in api.flags: + return 'elink:' + name + if name in api.funcpointers: + return 'tlink:' + name + if name in api.handles: + return 'slink:' + name + if name in api.protos: + return 'flink:' + name + if name in api.structs: + return 'slink:' + name + if name == 'TBD': + return 'No cross-references are available' + return 'reflink:' + name + + +def seeAlsoList(apiName, explicitRefs=None, apiAliases=[]): + """Return an asciidoc string with a list of 'See Also' references for the + API entity 'apiName', based on the relationship mapping in the api module. + + 'explicitRefs' is a list of additional cross-references. + + If apiAliases is not None, it is a list of aliases of apiName whose + cross-references will also be included. + + If no relationships are available, return None.""" + + refs = set(()) + + # apiName and its aliases are treated equally + allApis = apiAliases.copy() + allApis.append(apiName) + + # Add all the implicit references to refs + for name in allApis: + if name in api.mapDict: + refs.update(api.mapDict[name]) + + # Add all the explicit references + if explicitRefs is not None: + if isinstance(explicitRefs, str): + explicitRefs = explicitRefs.split() + refs.update(name for name in explicitRefs) + + # Add extensions / core versions based on dependencies + for name in allApis: + if name in api.requiredBy: + for (base,dependency) in api.requiredBy[name]: + refs.add(base) + if dependency is not None: + refs.add(dependency) + + if len(refs) == 0: + return None + else: + return ', '.join(macroPrefix(name) for name in sorted(refs)) + '\n' + + +def remapIncludes(lines, baseDir, specDir): + """Remap include directives in a list of lines so they can be extracted to a + different directory. + + Returns remapped lines. + + - lines - text to remap + - baseDir - target directory + - specDir - source directory""" + # This should be compiled only once + includePat = re.compile(r'^include::(?P.*)\[\]') + + newLines = [] + for line in lines: + matches = includePat.search(line) + if matches is not None: + path = matches.group('path') + + if path[0] != '{': + # Relative path to include file from here + incPath = specDir + '/' + path + # Remap to be relative to baseDir + newPath = os.path.relpath(incPath, baseDir) + newLine = 'include::' + newPath + '[]\n' + logDiag('remapIncludes: remapping', line, '->', newLine) + newLines.append(newLine) + else: + # An asciidoctor variable starts the path. + # This must be an absolute path, not needing to be rewritten. + newLines.append(line) + else: + newLines.append(line) + return newLines + + +def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tail_content=None, man_section=3): + """Generate body of a reference page. + + - pageName - string name of the page + - pageDesc - string short description of the page + - fp - file to write to + - head_content - text to include before the sections + - sections - iterable returning (title,body) for each section. + - tail_content - text to include after the sections + - man_section - Unix man page section""" + + printCopyrightSourceComments(fp) + + print(':data-uri:', + ':icons: font', + conventions.extra_refpage_headers, + '', + sep='\n', file=fp) + + s = '{}({})'.format(pageName, man_section) + print('= ' + s, + '', + sep='\n', file=fp) + if pageDesc.strip() == '': + pageDesc = 'NO SHORT DESCRIPTION PROVIDED' + logWarn('refPageHead: no short description provided for', pageName) + + print('== Name', + '{} - {}'.format(pageName, pageDesc), + '', + sep='\n', file=fp) + + if head_content is not None: + print(head_content, + '', + sep='\n', file=fp) + + if sections is not None: + for title, content in sections.items(): + print('== {}'.format(title), + '', + content, + '', + sep='\n', file=fp) + + if tail_content is not None: + print(tail_content, + '', + sep='\n', file=fp) + + +def refPageHead(pageName, pageDesc, specText, fieldName, fieldText, descText, fp): + """Generate header of a reference page. + + - pageName - string name of the page + - pageDesc - string short description of the page + - specType - string containing 'spec' field from refpage open block, or None. + Used to determine containing spec name and URL. + - specText - string that goes in the "C Specification" section + - fieldName - string heading an additional section following specText, if not None + - fieldText - string that goes in the additional section + - descText - string that goes in the "Description" section + - fp - file to write to""" + sections = OrderedDict() + + if specText is not None: + sections['C Specification'] = specText + + if fieldName is not None: + sections[fieldName] = fieldText + + if descText is None or descText.strip() == '': + logWarn('refPageHead: no description provided for', pageName) + + if descText is not None: + sections['Description'] = descText + + refPageShell(pageName, pageDesc, fp, head_content=None, sections=sections) + + +def refPageTail(pageName, + specType=None, + specAnchor=None, + seeAlso=None, + fp=None, + auto=False): + """Generate end boilerplate of a reference page. + + - pageName - name of the page + - specType - None or the 'spec' attribute from the refpage block, + identifying the specification name and URL this refpage links to. + - specAnchor - None or the 'anchor' attribute from the refpage block, + identifying the anchor in the specification this refpage links to. If + None, the pageName is assumed to be a valid anchor.""" + + specName = conventions.api_name(specType) + specURL = conventions.specURL(specType) + if specAnchor is None: + specAnchor = pageName + + if seeAlso is None: + seeAlso = 'No cross-references are available\n' + + notes = [ + 'For more information, see the {}#{}[{} Specification^]'.format( + specURL, specAnchor, specName), + '', + ] + + if auto: + notes.extend(( + 'This page is a generated document.', + 'Fixes and changes should be made to the generator scripts, ' + 'not directly.', + )) + else: + notes.extend(( + 'This page is extracted from the ' + specName + ' Specification. ', + 'Fixes and changes should be made to the Specification, ' + 'not directly.', + )) + + print('== See Also', + '', + seeAlso, + '', + sep='\n', file=fp) + + print('== Document Notes', + '', + '\n'.join(notes), + '', + sep='\n', file=fp) + + printFooter(fp) + + +def xrefRewriteInitialize(): + """Initialize substitution patterns for asciidoctor xrefs.""" + + global refLinkPattern, refLinkSubstitute + global refLinkTextPattern, refLinkTextSubstitute + global specLinkPattern, specLinkSubstitute + + # These are xrefs to Vulkan API entities, rewritten to link to refpages + # The refLink variants are for xrefs with only an anchor and no text. + # The refLinkText variants are for xrefs with both anchor and text + refLinkPattern = re.compile(r'<<([Vv][Kk][^>,]+)>>') + refLinkSubstitute = r'link:\1.html[\1^]' + + refLinkTextPattern = re.compile(r'<<([Vv][Kk][^>,]+)[,]?[ \t\n]*([^>,]*)>>') + refLinkTextSubstitute = r'link:\1.html[\2^]' + + # These are xrefs to other anchors, rewritten to link to the spec + specLinkPattern = re.compile(r'<<([^>,]+)[,]?[ \t\n]*([^>,]*)>>') + + # Unfortunately, specLinkSubstitute depends on the link target, + # so can't be constructed in advance. + specLinkSubstitute = None + + +def xrefRewrite(text, specURL): + """Rewrite asciidoctor xrefs in text to resolve properly in refpages. + Xrefs which are to Vulkan refpages are rewritten to link to those + refpages. The remainder are rewritten to generate external links into + the supplied specification document URL. + + - text - string to rewrite, or None + - specURL - URL to target + + Returns rewritten text, or None, respectively""" + + global refLinkPattern, refLinkSubstitute + global refLinkTextPattern, refLinkTextSubstitute + global specLinkPattern, specLinkSubstitute + + specLinkSubstitute = r'link:{}#\1[\2^]'.format(specURL) + + if text is not None: + text, _ = refLinkPattern.subn(refLinkSubstitute, text) + text, _ = refLinkTextPattern.subn(refLinkTextSubstitute, text) + text, _ = specLinkPattern.subn(specLinkSubstitute, text) + + return text + +def emitPage(baseDir, specDir, pi, file): + """Extract a single reference page into baseDir. + + - baseDir - base directory to emit page into + - specDir - directory extracted page source came from + - pi - pageInfo for this page relative to file + - file - list of strings making up the file, indexed by pi""" + pageName = baseDir + '/' + pi.name + '.txt' + + # Add a dictionary entry for this page + global genDict + genDict[pi.name] = None + logDiag('emitPage:', pageName) + + # Short description + if pi.desc is None: + pi.desc = '(no short description available)' + + # Member/parameter section label and text, if there is one + field = None + fieldText = None + + if pi.type != 'freeform' and pi.type != 'spirv': + if pi.include is None: + # Not sure how this happens yet + logWarn('emitPage:', pageName, 'INCLUDE is None, no page generated') + return + + # Specification text from beginning to just before the parameter + # section. This covers the description, the prototype, the version + # note, and any additional version note text. If a parameter section + # is absent then go a line beyond the include. + remap_end = pi.include + 1 if pi.param is None else pi.param + lines = remapIncludes(file[pi.begin:remap_end], baseDir, specDir) + specText = ''.join(lines) + + if pi.param is not None: + if pi.type == 'structs': + field = 'Members' + elif pi.type in ['protos', 'funcpointers']: + field = 'Parameters' + else: + logWarn('emitPage: unknown field type:', pi.type, + 'for', pi.name) + lines = remapIncludes(file[pi.param:pi.body], baseDir, specDir) + fieldText = ''.join(lines) + + # Description text + if pi.body != pi.include: + lines = remapIncludes(file[pi.body:pi.end + 1], baseDir, specDir) + descText = ''.join(lines) + else: + descText = None + logWarn('emitPage: INCLUDE == BODY, so description will be empty for', pi.name) + if pi.begin != pi.include: + logWarn('emitPage: Note: BEGIN != INCLUDE, so the description might be incorrectly located before the API include!') + else: + specText = None + descText = ''.join(file[pi.begin:pi.end + 1]) + + # Rewrite asciidoctor xrefs to resolve properly in refpages + specURL = conventions.specURL(pi.spec) + + specText = xrefRewrite(specText, specURL) + fieldText = xrefRewrite(fieldText, specURL) + descText = xrefRewrite(descText, specURL) + + fp = open(pageName, 'w', encoding='utf-8') + refPageHead(pi.name, + pi.desc, + specText, + field, fieldText, + descText, + fp) + refPageTail(pageName=pi.name, + specType=pi.spec, + specAnchor=pi.anchor, + seeAlso=seeAlsoList(pi.name, pi.refs, pi.alias.split()), + fp=fp, + auto=False) + fp.close() + + +def autoGenEnumsPage(baseDir, pi, file): + """Autogenerate a single reference page in baseDir. + + Script only knows how to do this for /enums/ pages, at present. + + - baseDir - base directory to emit page into + - pi - pageInfo for this page relative to file + - file - list of strings making up the file, indexed by pi""" + pageName = baseDir + '/' + pi.name + '.txt' + fp = open(pageName, 'w', encoding='utf-8') + + # Add a dictionary entry for this page + global genDict + genDict[pi.name] = None + logDiag('autoGenEnumsPage:', pageName) + + # Short description + if pi.desc is None: + pi.desc = '(no short description available)' + + # Description text. Allow for the case where an enum definition + # is not embedded. + if not pi.embed: + embedRef = '' + else: + embedRef = ''.join(( + ' * The reference page for ', + macroPrefix(pi.embed), + ', where this interface is defined.\n')) + + txt = ''.join(( + 'For more information, see:\n\n', + embedRef, + ' * The See Also section for other reference pages using this type.\n', + ' * The ' + apiName + ' Specification.\n')) + + refPageHead(pi.name, + pi.desc, + ''.join(file[pi.begin:pi.include + 1]), + None, None, + txt, + fp) + refPageTail(pageName=pi.name, + specType=pi.spec, + specAnchor=pi.anchor, + seeAlso=seeAlsoList(pi.name, pi.refs, pi.alias.split()), + fp=fp, + auto=True) + fp.close() + + +# Pattern to break apart an API *Flags{authorID} name, used in +# autoGenFlagsPage. +flagNamePat = re.compile(r'(?P\w+)Flags(?P[A-Z]*)') + + +def autoGenFlagsPage(baseDir, flagName): + """Autogenerate a single reference page in baseDir for an API *Flags type. + + - baseDir - base directory to emit page into + - flagName - API *Flags name""" + pageName = baseDir + '/' + flagName + '.txt' + fp = open(pageName, 'w', encoding='utf-8') + + # Add a dictionary entry for this page + global genDict + genDict[flagName] = None + logDiag('autoGenFlagsPage:', pageName) + + # Short description + matches = flagNamePat.search(flagName) + if matches is not None: + name = matches.group('name') + author = matches.group('author') + logDiag('autoGenFlagsPage: split name into', name, 'Flags', author) + flagBits = name + 'FlagBits' + author + desc = 'Bitmask of ' + flagBits + else: + logWarn('autoGenFlagsPage:', pageName, 'does not end in "Flags{author ID}". Cannot infer FlagBits type.') + flagBits = None + desc = 'Unknown ' + apiName + ' flags type' + + # Description text + if flagBits is not None: + txt = ''.join(( + 'etext:' + flagName, + ' is a mask of zero or more elink:' + flagBits + '.\n', + 'It is used as a member and/or parameter of the structures and commands\n', + 'in the See Also section below.\n')) + else: + txt = ''.join(( + 'etext:' + flagName, + ' is an unknown ' + apiName + ' type, assumed to be a bitmask.\n')) + + refPageHead(flagName, + desc, + makeAPIInclude('flags', flagName), + None, None, + txt, + fp) + refPageTail(pageName=flagName, + specType=pi.spec, + specAnchor=pi.anchor, + seeAlso=seeAlsoList(flagName, None), + fp=fp, + auto=True) + fp.close() + + +def autoGenHandlePage(baseDir, handleName): + """Autogenerate a single handle page in baseDir for an API handle type. + + - baseDir - base directory to emit page into + - handleName - API handle name""" + # @@ Need to determine creation function & add handles/ include for the + # @@ interface in generator.py. + pageName = baseDir + '/' + handleName + '.txt' + fp = open(pageName, 'w', encoding='utf-8') + + # Add a dictionary entry for this page + global genDict + genDict[handleName] = None + logDiag('autoGenHandlePage:', pageName) + + # Short description + desc = apiName + ' object handle' + + descText = ''.join(( + 'sname:' + handleName, + ' is an object handle type, referring to an object used\n', + 'by the ' + apiName + ' implementation. These handles are created or allocated\n', + 'by the @@ TBD @@ function, and used by other ' + apiName + ' structures\n', + 'and commands in the See Also section below.\n')) + + refPageHead(handleName, + desc, + makeAPIInclude('handles', handleName), + None, None, + descText, + fp) + refPageTail(pageName=handleName, + specType=pi.spec, + specAnchor=pi.anchor, + seeAlso=seeAlsoList(handleName, None), + fp=fp, + auto=True) + fp.close() + + +def genRef(specFile, baseDir): + """Extract reference pages from a spec asciidoc source file. + + - specFile - filename to extract from + - baseDir - output directory to generate page in""" + file = loadFile(specFile) + if file is None: + return + + # Save the path to this file for later use in rewriting relative includes + specDir = os.path.dirname(os.path.abspath(specFile)) + + pageMap = findRefs(file, specFile) + logDiag(specFile + ': found', len(pageMap.keys()), 'potential pages') + + sys.stderr.flush() + + # Fix up references in pageMap + fixupRefs(pageMap, specFile, file) + + # Create each page, if possible + pages = {} + + for name in sorted(pageMap): + pi = pageMap[name] + + printPageInfo(pi, file) + + if pi.Warning: + logDiag('genRef:', pi.name + ':', pi.Warning) + + if pi.extractPage: + emitPage(baseDir, specDir, pi, file) + elif pi.type == 'enums': + autoGenEnumsPage(baseDir, pi, file) + elif pi.type == 'flags': + autoGenFlagsPage(baseDir, pi.name) + else: + # Don't extract this page + logWarn('genRef: Cannot extract or autogenerate:', pi.name) + + pages[pi.name] = pi + for alias in pi.alias.split(): + pages[alias] = pi + + return pages + + +def genSinglePageRef(baseDir): + """Generate baseDir/apispec.txt, the single-page version of the ref pages. + + This assumes there's a page for everything in the api module dictionaries. + Extensions (KHR, EXT, etc.) are currently skipped""" + # Accumulate head of page + head = io.StringIO() + + printCopyrightSourceComments(head) + + print('= ' + apiName + ' API Reference Pages', + ':data-uri:', + ':icons: font', + ':doctype: book', + ':numbered!:', + ':max-width: 200', + ':data-uri:', + ':toc2:', + ':toclevels: 2', + '', + sep='\n', file=head) + + print('== Copyright', file=head) + print('', file=head) + print('include::{config}/copyright-ccby.txt[]', file=head) + print('', file=head) + # Inject the table of contents. Asciidoc really ought to be generating + # this for us. + + sections = [ + [api.protos, 'protos', apiName + ' Commands'], + [api.handles, 'handles', 'Object Handles'], + [api.structs, 'structs', 'Structures'], + [api.enums, 'enums', 'Enumerations'], + [api.flags, 'flags', 'Flags'], + [api.funcpointers, 'funcpointers', 'Function Pointer Types'], + [api.basetypes, 'basetypes', apiName + ' Scalar types'], + [api.defines, 'defines', 'C Macro Definitions'], + [extensions, 'extensions', apiName + ' Extensions'] + ] + + # Accumulate body of page + body = io.StringIO() + + for (apiDict, label, title) in sections: + # Add section title/anchor header to body + anchor = '[[' + label + ',' + title + ']]' + print(anchor, + '== ' + title, + '', + ':leveloffset: 2', + '', + sep='\n', file=body) + + if label == 'extensions': + # preserve order of extensions since we already sorted the way we want. + keys = apiDict.keys() + else: + keys = sorted(apiDict.keys()) + + for refPage in keys: + # Don't generate links for aliases, which are included with the + # aliased page + if refPage not in api.alias: + # Add page to body + if 'FlagBits' in refPage and conventions.unified_flag_refpages: + # OpenXR does not create separate ref pages for FlagBits: + # the FlagBits includes go in the Flags refpage. + # Previously the Vulkan script would only emit non-empty + # Vk*Flags pages, via the logic + # if refPage not in api.flags or api.flags[refPage] is not None + # emit page + # Now, all are emitted. + continue + else: + print('include::' + refPage + '.txt[]', file=body) + else: + # Alternatively, we could (probably should) link to the + # aliased refpage + logWarn('(Benign) Not including', refPage, + 'in single-page reference', + 'because it is an alias of', api.alias[refPage]) + + print('\n' + ':leveloffset: 0' + '\n', file=body) + + # Write head and body to the output file + pageName = baseDir + '/apispec.txt' + fp = open(pageName, 'w', encoding='utf-8') + + print(head.getvalue(), file=fp, end='') + print(body.getvalue(), file=fp, end='') + + head.close() + body.close() + fp.close() + + +def genExtension(baseDir, extpath, name, info): + """Generate refpage, and add dictionary entry for an extension + + - baseDir - output directory to generate page in + - extpath - None, or path to per-extension specification sources if + those are to be included in extension refpages + - name - extension name + - info - Element from XML""" + + # Add a dictionary entry for this page + global genDict + genDict[name] = None + declares = [] + elem = info.elem + + # Type of extension (instance, device, etc.) + ext_type = elem.get('type') + + # Autogenerate interfaces from entry + for required in elem.find('require'): + req_name = required.get('name') + if not req_name: + # This isn't what we're looking for + continue + if req_name.endswith('_SPEC_VERSION') or req_name.endswith('_EXTENSION_NAME'): + # Don't link to spec version or extension name - those ref pages aren't created. + continue + + if required.get('extends'): + # These are either extensions of enumerated types, or const enum + # values: neither of which get a ref page - although we could + # include the enumerated types in the See Also list. + continue + + if req_name not in genDict: + logWarn('ERROR: {} (in extension {}) does not have a ref page.'.format(req_name, name)) + + declares.append(req_name) + + # import pdb + # pdb.set_trace() + + appbody = None + if extpath is not None: + appfp = open('{}/{}.txt'.format(extpath, name), 'r', encoding='utf-8') + if appfp is not None: + appbody = appfp.read() + + # Transform internal links to crosslinks + specURL = conventions.specURL() + appbody = xrefRewrite(appbody, specURL) + else: + logWarn('Cannot find extension appendix for', name) + + # Fall through to autogenerated page + extpath = None + appbody = None + appfp.close() + + # Include the extension appendix without an extra title + # head_content = 'include::{{appendices}}/{}.txt[]'.format(name) + + # Write the extension refpage + pageName = baseDir + '/' + name + '.txt' + logDiag('genExtension:', pageName) + fp = open(pageName, 'w', encoding='utf-8') + + # There are no generated titled sections + sections = None + + # 'See link:{html_spec_relative}#%s[ %s] in the main specification for complete information.' % ( + # name, name) + refPageShell(name, + "{} extension".format(ext_type), + fp, + appbody, + sections=sections) + refPageTail(pageName=name, + specType=None, + specAnchor=name, + seeAlso=seeAlsoList(name, declares), + fp=fp, + auto=True) + fp.close() + + +if __name__ == '__main__': + global genDict, extensions, conventions, apiName + genDict = {} + extensions = OrderedDict() + conventions = APIConventions() + apiName = conventions.api_name('api') + + parser = argparse.ArgumentParser() + + parser.add_argument('-diag', action='store', dest='diagFile', + help='Set the diagnostic file') + parser.add_argument('-warn', action='store', dest='warnFile', + help='Set the warning file') + parser.add_argument('-log', action='store', dest='logFile', + help='Set the log file for both diagnostics and warnings') + parser.add_argument('-genpath', action='store', + default='gen', + help='Path to directory containing generated files') + parser.add_argument('-basedir', action='store', dest='baseDir', + default=None, + help='Set the base directory in which pages are generated') + parser.add_argument('-noauto', action='store_true', + help='Don\'t generate inferred ref pages automatically') + parser.add_argument('files', metavar='filename', nargs='*', + help='a filename to extract ref pages from') + parser.add_argument('--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-extension', action='append', + default=[], + help='Specify an extension or extensions to add to targets') + parser.add_argument('-rewrite', action='store', + default=None, + help='Name of output file to write Apache mod_rewrite directives to') + parser.add_argument('-toc', action='store', + default=None, + help='Name of output file to write an alphabetical TOC to') + parser.add_argument('-registry', action='store', + default=conventions.registry_path, + help='Use specified registry file instead of default') + parser.add_argument('-extpath', action='store', + default=None, + help='Use extension descriptions from this directory instead of autogenerating extension refpages') + + results = parser.parse_args() + + # Look for api.py in the specified directory + if results.genpath is not None: + sys.path.insert(0, results.genpath) + import api + + setLogFile(True, True, results.logFile) + setLogFile(True, False, results.diagFile) + setLogFile(False, True, results.warnFile) + + # Initialize static rewrite patterns for spec xrefs + xrefRewriteInitialize() + + if results.baseDir is None: + baseDir = results.genpath + '/ref' + else: + baseDir = results.baseDir + + # Dictionary of pages & aliases + pages = {} + + for file in results.files: + d = genRef(file, baseDir) + pages.update(d) + + # Now figure out which pages *weren't* generated from the spec. + # This relies on the dictionaries of API constructs in the api module. + + if not results.noauto: + registry = Registry() + registry.loadFile(results.registry) + + if conventions.write_refpage_include: + # Only extensions with a supported="..." attribute in this set + # will be considered for extraction/generation. + supported_strings = set((conventions.xml_api_name,)) + ext_names = set(k for k, v in registry.extdict.items() + if v.supported in supported_strings) + + desired_extensions = ext_names.intersection(set(results.extension)) + for prefix in conventions.extension_index_prefixes: + # Splits up into chunks, sorted within each chunk. + filtered_extensions = sorted( + [name for name in desired_extensions + if name.startswith(prefix) and name not in extensions]) + for name in filtered_extensions: + # logWarn('NOT autogenerating extension refpage for', name) + extensions[name] = None + genExtension(baseDir, results.extpath, name, registry.extdict[name]) + + # autoGenFlagsPage is no longer needed because they are added to + # the spec sources now. + # for page in api.flags: + # if page not in genDict: + # autoGenFlagsPage(baseDir, page) + + # autoGenHandlePage is no longer needed because they are added to + # the spec sources now. + # for page in api.structs: + # if typeCategory[page] == 'handle': + # autoGenHandlePage(baseDir, page) + + sections = [ + (api.flags, 'Flag Types'), + (api.enums, 'Enumerated Types'), + (api.structs, 'Structures'), + (api.protos, 'Prototypes'), + (api.funcpointers, 'Function Pointers'), + (api.basetypes, apiName + ' Scalar Types'), + (extensions, apiName + ' Extensions'), + ] + + # Summarize pages that weren't generated, for good or bad reasons + + for (apiDict, title) in sections: + # OpenXR was keeping a 'flagged' state which only printed out a + # warning for the first non-generated page, but was otherwise + # unused. This doesn't seem helpful. + for page in apiDict: + if page not in genDict: + # Page was not generated - why not? + if page in api.alias: + logWarn('(Benign, is an alias) Ref page for', title, page, 'is aliased into', api.alias[page]) + elif page in api.flags and api.flags[page] is None: + logWarn('(Benign, no FlagBits defined) No ref page generated for ', title, + page) + else: + # Could introduce additional logic to detect + # external types and not emit them. + logWarn('No ref page generated for ', title, page) + + genSinglePageRef(baseDir) + + if results.rewrite: + # Generate Apache rewrite directives for refpage aliases + fp = open(results.rewrite, 'w', encoding='utf-8') + + for page in sorted(pages): + p = pages[page] + rewrite = p.name + + if page != rewrite: + print('RewriteRule ^', page, '.html$ ', rewrite, '.html', + sep='', file=fp) + fp.close() + + if results.toc: + # Generate dynamic portion of refpage TOC + fp = open(results.toc, 'w', encoding='utf-8') + + # Run through dictionary of pages generating an TOC + print(12 * ' ', '
  • Alphabetic Contents', sep='', file=fp) + print(16 * ' ', '
      ', sep='', file=fp) + lastLetter = None + + for page in sorted(pages, key=str.upper): + p = pages[page] + letter = page[0:1].upper() + + if letter != lastLetter: + if lastLetter: + # End previous block + print(24 * ' ', '
    ', sep='', file=fp) + print(20 * ' ', '
  • ', sep='', file=fp) + # Start new block + print(20 * ' ', '
  • ', letter, sep='', file=fp) + print(24 * ' ', '
      ', sep='', file=fp) + lastLetter = letter + + # Add this page to the list + print(28 * ' ', '
    • ', page, '
    • ', + sep='', file=fp) + + if lastLetter: + # Close the final letter block + print(24 * ' ', '
    ', sep='', file=fp) + print(20 * ' ', '
  • ', sep='', file=fp) + + # Close the list + print(16 * ' ', '', sep='', file=fp) + print(12 * ' ', '', sep='', file=fp) + + # print('name {} -> page {}'.format(page, pages[page].name)) + + fp.close() diff --git a/xml/gen_dictionaries.py b/xml/gen_dictionaries.py new file mode 100644 index 000000000..069800dbb --- /dev/null +++ b/xml/gen_dictionaries.py @@ -0,0 +1,258 @@ +#!/usr/bin/python3 + +# Copyright 2019-2023 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +from collections import OrderedDict + +import argparse +import sys +import urllib +import xml.etree.ElementTree as etree +import urllib.request + +def parse_xml(path): + file = urllib.request.urlopen(path) if path.startswith("http") else open(path, 'r') + with file: + tree = etree.parse(file) + return tree + +# File Header: +def GetHeader(): + return """// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + +""" + +# File Footer: +def GetFooter(): + return """ +""" + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-registry', action='store', + default='cl.xml', + help='Use specified registry file instead of cl.xml') + parser.add_argument('-o', action='store', dest='directory', + default='.', + help='Create target and related files in specified directory') + + args = parser.parse_args() + + linkFileName = args.directory + '/api-dictionary.asciidoc' + nolinkFileName = args.directory + '/api-dictionary-no-links.asciidoc' + typeFileName = args.directory + '/api-types.txt' + + specpath = args.registry + #specpath = "https://raw.githubusercontent.com/KhronosGroup/OpenCL-Registry/main/xml/cl.xml" + + print('Generating dictionaries from: ' + specpath) + + spec = parse_xml(specpath) + + linkFile = open(linkFileName, 'w') + nolinkFile = open(nolinkFileName, 'w') + linkFile.write( GetHeader() ) + nolinkFile.write( GetHeader() ) + typeFile = open(typeFileName, 'w') + + # Generate the API functions dictionaries: + + numberOfFuncs = 0 + + # Add core API functions with and without links: + for feature in spec.findall('feature/require'): + for api in feature.findall('command'): + name = api.get('name') + #print('found api: ' + name) + + # Example with link: + # + # // clEnqueueNDRangeKernel + # :clEnqueueNDRangeKernel_label: pass:q[*clEnqueueNDRangeKernel*] + # :clEnqueueNDRangeKernel: <> + linkFile.write('// ' + name + '\n') + linkFile.write(':' + name + '_label: pass:q[*' + name + '*]\n') + linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') + linkFile.write('\n') + + # Example without link: + # + # // clEnqueueNDRangeKernel + # :clEnqueueNDRangeKernel: pass:q[*clEnqueueNDRangeKernel*] + nolinkFile.write('// ' + name + '\n') + nolinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + nolinkFile.write('\n') + + numberOfFuncs = numberOfFuncs + 1 + + # Add extension API functions without links: + for extension in spec.findall('extensions/extension/require'): + for api in extension.findall('command'): + name = api.get('name') + #print('found extension api: ' +name) + + # Example without link: + # + # // clGetGLObjectInfo + # :clGetGLObjectInfo: pass:q[*clGetGLObjectInfo*] + linkFile.write('// ' + name + '\n') + linkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + linkFile.write('\n') + + nolinkFile.write('// ' + name + '\n') + nolinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + nolinkFile.write('\n') + + numberOfFuncs = numberOfFuncs + 1 + + print('Found ' + str(numberOfFuncs) + ' API functions.') + + # Generate the API enums dictionaries: + + numberOfEnums = 0 + + for enums in spec.findall('enums'): + name = enums.get('name') + for enum in enums.findall('enum'): + name = enum.get('name') + #print('found enum: ' + name) + + # Create a variant of the name that precedes underscores with + # "zero width" spaces. This causes some long names to be + # broken at more intuitive places. + htmlName = name[:3] + name[3:].replace("_", "_") + otherName = name[:3] + name[3:].replace("_", "_​") + + # Example with link: + # + # // CL_MEM_READ_ONLY + #:CL_MEM_READ_ONLY_label: pass:q[`CL_MEM_READ_ONLY`] + #:CL_MEM_READ_ONLY: <> + #:CL_MEM_READ_ONLY_anchor: [[CL_MEM_READ_ONLY]]{CL_MEM_READ_ONLY} + linkFile.write('// ' + name + '\n') + linkFile.write('ifdef::backend-html5[]\n') + linkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write('ifndef::backend-html5[]\n') + linkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') + linkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') + linkFile.write('\n') + + # Example without link: + # + # // CL_MEM_READ_ONLY + #:CL_MEM_READ_ONLY: pass:q[`CL_MEM_READ_ONLY`] + #:CL_MEM_READ_ONLY_anchor: {CL_MEM_READ_ONLY} + nolinkFile.write('// ' + name + '\n') + nolinkFile.write('ifdef::backend-html5[]\n') + nolinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') + nolinkFile.write('endif::[]\n') + nolinkFile.write('ifndef::backend-html5[]\n') + nolinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') + nolinkFile.write('endif::[]\n') + nolinkFile.write(':' + name + '_anchor: {' + name + '}\n') + nolinkFile.write('\n') + + numberOfEnums = numberOfEnums + 1 + + print('Found ' + str(numberOfEnums) + ' API enumerations.') + + # Generate the API types dictionaries: + + numberOfTypes = 0 + + for types in spec.findall('types'): + for type in types.findall('type'): + addLink = False + name = "" + category = type.get('category') + if category == 'basetype': + name = type.get('name') + elif category == 'struct': + addLink = True + name = type.get('name') + elif category == 'define': + name = type.find('name').text + else: + continue + + #print('found type: ' +name) + + # Create a variant of the name that precedes underscores with + # "zero width" spaces. This causes some long names to be + # broken at more intuitive places. + if name.endswith('_t'): + htmlName = name + otherName = name + else: + htmlName = name[:3] + name[3:].replace("_", "_") + otherName = name[:3] + name[3:].replace("_", "_​") + + # Some types can have spaces in the name (such as unsigned char), + # but Asciidoctor attributes cannot. So, replace spaces with + # underscores for the attribute name. + attribName = name.replace(" ", "_") + + # Append the type suffix for disambiguation, since asciidoctor + # attributes are not case-sensitive (currently). + attribName = attribName + "_TYPE" + + # Example with link: + # + # // cl_image_desc + # :cl_image_desc_TYPE_label: pass:q[`cl_image_desc`] + # :cl_image_desc_TYPE: <> + linkFile.write('// ' + name + '\n') + if addLink: + linkFile.write('ifdef::backend-html5[]\n') + linkFile.write(':' + attribName + '_label: pass:q[`' + htmlName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write('ifndef::backend-html5[]\n') + linkFile.write(':' + attribName + '_label: pass:q[`' + otherName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write(':' + attribName + ': <<' + name + ',{' + attribName + '_label}>>\n') + else: + linkFile.write('ifdef::backend-html5[]\n') + linkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write('ifndef::backend-html5[]\n') + linkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write('\n') + + # // cl_image_desc + # :cl_image_desc_TYPE: pass:q[`cl_image_desc`] + nolinkFile.write('// ' + name + '\n') + nolinkFile.write('ifdef::backend-html5[]\n') + nolinkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') + nolinkFile.write('endif::[]\n') + nolinkFile.write('ifndef::backend-html5[]\n') + nolinkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') + nolinkFile.write('endif::[]\n') + nolinkFile.write('\n') + + # Print the type list to a file for custom syntax highlighting. + # For this we only care about CL types, not base types. + if category != 'basetype': + typeFile.write(' ' + name + '\n') + + numberOfTypes = numberOfTypes + 1 + + print('Found ' + str(numberOfTypes) + ' API types.') + + linkFile.write( GetFooter() ) + linkFile.close() + nolinkFile.write( GetFooter() ) + nolinkFile.close() + typeFile.close() + + print('Successfully generated file: ' + linkFileName) + print('Successfully generated file: ' + nolinkFileName) + print('Successfully generated file: ' + typeFileName) + diff --git a/xml/gen_version_notes.py b/xml/gen_version_notes.py new file mode 100644 index 000000000..9fed05178 --- /dev/null +++ b/xml/gen_version_notes.py @@ -0,0 +1,127 @@ +#!/usr/bin/python3 + +# Copyright 2019-2023 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +from collections import OrderedDict + +import argparse +import sys +import os +import urllib +import xml.etree.ElementTree as etree +import urllib.request + + +def parse_xml(path): + file = urllib.request.urlopen(path) if path.startswith("http") else open( + path, 'r') + with file: + tree = etree.parse(file) + return tree + + +# File Header: +def GetHeader(): + return """// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ +""" + + +# File Footer: +def GetFooter(): + return """ +""" + +def FullNote(name, added_in, deprecated_by): + # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in + # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. + if added_in == "1.0" and deprecated_by == None: + return "\n// Intentionally empty, %s has always been present." % name + if added_in != "1.0" and deprecated_by == None: + return "\nIMPORTANT: {%s} is <> version %s." % (name, added_in) + if added_in == "1.0" and deprecated_by != None: + return "\nIMPORTANT: {%s} is <> version %s." % (name, deprecated_by) + if added_in != "1.0" and deprecated_by != None: + return "\nIMPORTANT: {%s} is <> version %s and <> version %s." % (name, added_in, deprecated_by) + +def ShortNote(name, added_in, deprecated_by): + # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in + # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. + if added_in == "1.0" and deprecated_by == None: + return "// Intentionally empty, %s has always been present." % name + if added_in != "1.0" and deprecated_by == None: + return "<> version %s." % added_in + if added_in == "1.0" and deprecated_by != None: + return "<> version %s." % deprecated_by + if added_in != "1.0" and deprecated_by != None: + return "<> version %s and <> version %s." % (added_in, deprecated_by) + +# Find feature groups that are parents of a feature/require/${entry_type} +# hierarchy, and then find all the ${entry_type} within each hierarchy: +def process_xml(spec, entry_type, note_printer): + numberOfEntries = 0 + numberOfNewEntries = 0 + numberOfDeprecatedEntries = 0 + + for feature in spec.findall('.//feature/require/%s/../..' % entry_type): + for entry in feature.findall('.//%s' % entry_type): + name = entry.get('name') + + numberOfEntries += 1 + added_in = feature.get('number') + deprecated_by = None + + # All the groups that this specific API ${entry_type} belongs. + categories = spec.findall( + './/require[@comment]/%s[@name="%s"]/..' % (entry_type, name)) + for category in categories: + comment = category.get('comment') + if "deprecated in OpenCL" in comment: + words = comment.split(" ") + assert " ".join(words[-4:-1]) == "deprecated in OpenCL" + assert deprecated_by == None # Can't deprecate something twice. + deprecated_by = words[-1] + + versionFileName = os.path.join(args.directory, name + ".asciidoc") + with open(versionFileName, 'w') as versionFile: + versionFile.write(GetHeader()) + versionFile.write(note_printer(name, added_in, deprecated_by)) + versionFile.write(GetFooter()) + + numberOfNewEntries += 0 if added_in == "1.0" else 1 + numberOfDeprecatedEntries += 0 if deprecated_by == None else 1 + + print('Found ' + str(numberOfEntries) + ' API ' + entry_type + 's, ' + + str(numberOfNewEntries) + " newer than 1.0, " + + str(numberOfDeprecatedEntries) + " are deprecated.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + '-registry', + action='store', + default='cl.xml', + help='Use specified registry file instead of cl.xml') + parser.add_argument( + '-o', + action='store', + dest='directory', + default='.', + help='Create target and related files in specified directory') + + args = parser.parse_args() + + specpath = args.registry + + print('Generating version notes from: ' + specpath) + + spec = parse_xml(specpath) + + # Generate the API functions dictionaries: + + process_xml(spec, "command", FullNote) + process_xml(spec, "enum", ShortNote) diff --git a/xml/gencl.py b/xml/gencl.py new file mode 100644 index 000000000..eb77a8cdf --- /dev/null +++ b/xml/gencl.py @@ -0,0 +1,464 @@ +#!/usr/bin/python3 +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import pdb +import re +import sys +import time +import xml.etree.ElementTree as etree + +from cgenerator import CGeneratorOptions, COutputGenerator +from docgenerator import DocGeneratorOptions, DocOutputGenerator +from extensionmetadocgenerator import (ExtensionMetaDocGeneratorOptions, + ExtensionMetaDocOutputGenerator) + +from generator import write + + +from pygenerator import PyOutputGenerator +from reflib import logDiag, logWarn, setLogFile +from reg import Registry + +from clconventions import OpenCLConventions as APIConventions + + +# Simple timer functions +startTime = None + + +def startTimer(timeit): + global startTime + if timeit: + startTime = time.process_time() + + +def endTimer(timeit, msg): + global startTime + if timeit: + endTime = time.process_time() + logDiag(msg, endTime - startTime) + startTime = None + + +def makeREstring(strings, default=None, strings_are_regex=False): + """Turn a list of strings into a regexp string matching exactly those strings.""" + if strings or default is None: + if not strings_are_regex: + strings = (re.escape(s) for s in strings) + return '^(' + '|'.join(strings) + ')$' + return default + +def makeGenOpts(args): + """Returns a directory of [ generator function, generator options ] indexed + by specified short names. The generator options incorporate the following + parameters: + + args is an parsed argument object; see below for the fields that are used.""" + global genOpts + genOpts = {} + + # Default class of extensions to include, or None + defaultExtensions = args.defaultExtensions + + # Additional extensions to include (list of extensions) + extensions = args.extension + + # Extensions to remove (list of extensions) + removeExtensions = args.removeExtensions + + # Extensions to emit (list of extensions) + emitExtensions = args.emitExtensions + + # SPIR-V capabilities / features to emit (list of extensions & capabilities) + # emitSpirv = args.emitSpirv + + # Features to include (list of features) + features = args.feature + + # Whether to disable inclusion protect in headers + protect = args.protect + + # Output target directory + directory = args.directory + + # Path to generated files, particularly api.py + genpath = args.genpath + + # Generate MISRA C-friendly headers + misracstyle = args.misracstyle; + + # Generate MISRA C++-friendly headers + misracppstyle = args.misracppstyle; + + # Descriptive names for various regexp patterns used to select + # versions and extensions + allSpirv = allFeatures = allExtensions = r'.*' + + # Turn lists of names/patterns into matching regular expressions + addExtensionsPat = makeREstring(extensions, None) + removeExtensionsPat = makeREstring(removeExtensions, None) + emitExtensionsPat = makeREstring(emitExtensions, allExtensions) + # emitSpirvPat = makeREstring(emitSpirv, allSpirv) + featuresPat = makeREstring(features, allFeatures) + + # Copyright text prefixing all headers (list of strings). + # The SPDX formatting below works around constraints of the 'reuse' tool + prefixStrings = [ + '/*', + '** Copyright 2015-2023 The Khronos Group Inc.', + '**', + '** SPDX' + '-License-Identifier: Apache-2.0', + '*/', + '' + ] + + # Text specific to OpenCL headers + clPrefixStrings = [ + '/*', + '** This header is generated from the Khronos OpenCL XML API Registry.', + '**', + '*/', + '' + ] + + # Defaults for generating re-inclusion protection wrappers (or not) + protectFile = protect + + # An API style conventions object + conventions = APIConventions() + + # API include files for spec and ref pages + # Overwrites include subdirectories in spec source tree + # The generated include files do not include the calling convention + # macros (apientry etc.), unlike the header files. + # Because the 1.0 core branch includes ref pages for extensions, + # all the extension interfaces need to be generated, even though + # none are used by the core spec itself. + genOpts['apiinc'] = [ + DocOutputGenerator, + DocGeneratorOptions( + conventions = conventions, + filename = 'timeMarker', + directory = directory, + genpath = genpath, + apiname = 'opencl', + profile = None, + versions = featuresPat, + emitversions = featuresPat, + defaultExtensions = defaultExtensions, + addExtensions = addExtensionsPat, + removeExtensions = removeExtensionsPat, + emitExtensions = emitExtensionsPat, + prefixText = prefixStrings + clPrefixStrings, + apicall = '', + apientry = '', + apientryp = '*', + alignFuncParam = 0, + expandEnumerants = False) + ] + + # Python representation of API information, used by scripts that + # don't need to load the full XML. + genOpts['api.py'] = [ + PyOutputGenerator, + DocGeneratorOptions( + conventions = conventions, + filename = 'api.py', + directory = directory, + genpath = genpath, + apiname = 'opencl', + profile = None, + versions = featuresPat, + emitversions = featuresPat, + defaultExtensions = None, + addExtensions = addExtensionsPat, + removeExtensions = removeExtensionsPat, + emitExtensions = emitExtensionsPat, + reparentEnums = False) + ] + + # Extension metainformation for spec extension appendices + # Includes all extensions by default, but only so that the generated + # 'promoted_extensions_*' files refer to all extensions that were + # promoted to a core version. + genOpts['extinc'] = [ + ExtensionMetaDocOutputGenerator, + ExtensionMetaDocGeneratorOptions( + conventions = conventions, + filename = 'timeMarker', + directory = directory, + genpath = None, + apiname = 'opencl', + profile = None, + versions = featuresPat, + emitversions = None, + defaultExtensions = defaultExtensions, + addExtensions = addExtensionsPat, + removeExtensions = None, + emitExtensions = emitExtensionsPat) + ] + + # Platform extensions, in their own header files + # Each element of the platforms[] array defines information for + # generating a single platform: + # [0] is the generated header file name + # [1] is the set of platform extensions to generate + # [2] is additional extensions whose interfaces should be considered, + # but suppressed in the output, to avoid duplicate definitions of + # dependent types like VkDisplayKHR and VkSurfaceKHR which come from + # non-platform extensions. + + # Track all platform extensions, for exclusion from vulkan_core.h + allPlatformExtensions = [] + + # # Extensions suppressed for all platforms. + # # Covers common WSI extension types. + # commonSuppressExtensions = [ 'VK_KHR_display', 'VK_KHR_swapchain' ] + # + # platforms = [ + # [ 'vulkan_android.h', [ 'VK_KHR_android_surface', + # 'VK_ANDROID_external_memory_android_hardware_buffer' + # ], commonSuppressExtensions ], + # [ 'vulkan_fuchsia.h', [ 'VK_FUCHSIA_imagepipe_surface'], commonSuppressExtensions ], + # [ 'vulkan_ios.h', [ 'VK_MVK_ios_surface' ], commonSuppressExtensions ], + # [ 'vulkan_macos.h', [ 'VK_MVK_macos_surface' ], commonSuppressExtensions ], + # [ 'vulkan_vi.h', [ 'VK_NN_vi_surface' ], commonSuppressExtensions ], + # [ 'vulkan_wayland.h', [ 'VK_KHR_wayland_surface' ], commonSuppressExtensions ], + # [ 'vulkan_win32.h', [ 'VK_.*_win32(|_.*)' ], commonSuppressExtensions + [ 'VK_KHR_external_semaphore', 'VK_KHR_external_memory_capabilities', 'VK_KHR_external_fence', 'VK_KHR_external_fence_capabilities', 'VK_NV_external_memory_capabilities' ] ], + # [ 'vulkan_xcb.h', [ 'VK_KHR_xcb_surface' ], commonSuppressExtensions ], + # [ 'vulkan_xlib.h', [ 'VK_KHR_xlib_surface' ], commonSuppressExtensions ], + # [ 'vulkan_xlib_xrandr.h', [ 'VK_EXT_acquire_xlib_display' ], commonSuppressExtensions ], + # ] + # + # for platform in platforms: + # headername = platform[0] + # + # allPlatformExtensions += platform[1] + # + # addPlatformExtensionsRE = makeREstring(platform[1] + platform[2]) + # emitPlatformExtensionsRE = makeREstring(platform[1]) + # + # opts = CGeneratorOptions( + # filename = headername, + # directory = directory, + # apiname = 'vulkan', + # profile = None, + # versions = featuresPat, + # emitversions = None, + # defaultExtensions = None, + # addExtensions = addPlatformExtensionsRE, + # removeExtensions = None, + # emitExtensions = emitPlatformExtensionsRE, + # prefixText = prefixStrings + clPrefixStrings, + # genFuncPointers = True, + # protectFile = protectFile, + # protectFeature = False, + # protectProto = '#ifndef', + # protectProtoStr = 'VK_NO_PROTOTYPES', + # apicall = 'VKAPI_ATTR ', + # apientry = 'VKAPI_CALL ', + # apientryp = 'VKAPI_PTR *', + # alignFuncParam = 0) + # + # genOpts[headername] = [ COutputGenerator, opts ] + + # Header for core API + extensions. + # To generate just the core API, + # change to 'defaultExtensions = None' below. + # + # By default this adds all enabled, non-platform extensions. + # It removes all platform extensions (from the platform headers options + # constructed above) as well as any explicitly specified removals. + + removeExtensionsPat = makeREstring( + allPlatformExtensions + removeExtensions, None, strings_are_regex=True) + + genOpts['cl.h'] = [ + COutputGenerator, + CGeneratorOptions( + conventions = conventions, + filename = 'cl.h', + directory = directory, + genpath = None, + apiname = 'opencl', + profile = None, + versions = featuresPat, + emitversions = featuresPat, + defaultExtensions = defaultExtensions, + addExtensions = None, + removeExtensions = removeExtensionsPat, + emitExtensions = emitExtensionsPat, + prefixText = prefixStrings + clPrefixStrings, + genFuncPointers = False, + protectFile = protectFile, + protectFeature = False, + protectProto = '#ifndef', + protectProtoStr = 'CL_NO_PROTOTYPES', + apicall = 'CL_API_ENTRY ', + apientry = 'CL_API_CALL ', + apientryp = 'CL_API_CALL *', + alignFuncParam = 0, + misracstyle = misracstyle, + misracppstyle = misracppstyle) + ] + +def genTarget(args): + """Create an API generator and corresponding generator options based on + the requested target and command line options. + + This is encapsulated in a function so it can be profiled and/or timed. + The args parameter is an parsed argument object containing the following + fields that are used: + + - target - target to generate + - directory - directory to generate it in + - protect - True if re-inclusion wrappers should be created + - extensions - list of additional extensions to include in generated interfaces""" + + # Create generator options with parameters specified on command line + makeGenOpts(args) + + # pdb.set_trace() + + # Select a generator matching the requested target + if args.target in genOpts: + createGenerator = genOpts[args.target][0] + options = genOpts[args.target][1] + + logDiag('* Building', options.filename) + logDiag('* options.versions =', options.versions) + logDiag('* options.emitversions =', options.emitversions) + logDiag('* options.defaultExtensions =', options.defaultExtensions) + logDiag('* options.addExtensions =', options.addExtensions) + logDiag('* options.removeExtensions =', options.removeExtensions) + logDiag('* options.emitExtensions =', options.emitExtensions) + + gen = createGenerator(errFile=errWarn, + warnFile=errWarn, + diagFile=diag) + return (gen, options) + else: + logErr('No generator options for unknown target:', args.target) + return None + + +# -feature name +# -extension name +# For both, "name" may be a single name, or a space-separated list +# of names, or a regular expression. +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-defaultExtensions', action='store', + default='opencl', + help='Specify a single class of extensions to add to targets') + parser.add_argument('-extension', action='append', + default=[], + help='Specify an extension or extensions to add to targets') + parser.add_argument('-removeExtensions', action='append', + default=[], + help='Specify an extension or extensions to remove from targets') + parser.add_argument('-emitExtensions', action='append', + default=[], + help='Specify an extension or extensions to emit in targets') + + + + parser.add_argument('-feature', action='append', + default=[], + help='Specify a core API feature name or names to add to targets') + parser.add_argument('-debug', action='store_true', + help='Enable debugging') + parser.add_argument('-dump', action='store_true', + help='Enable dump to stderr') + parser.add_argument('-diagfile', action='store', + default=None, + help='Write diagnostics to specified file') + parser.add_argument('-errfile', action='store', + default=None, + help='Write errors and warnings to specified file instead of stderr') + parser.add_argument('-noprotect', dest='protect', action='store_false', + help='Disable inclusion protection in output headers') + parser.add_argument('-profile', action='store_true', + help='Enable profiling') + parser.add_argument('-registry', action='store', + default='cl.xml', + help='Use specified registry file instead of cl.xml') + parser.add_argument('-time', action='store_true', + help='Enable timing') + parser.add_argument('-validate', action='store_true', + help='Validate the registry properties and exit') + parser.add_argument('-genpath', action='store', default='gen', + help='Path to generated files') + parser.add_argument('-o', action='store', dest='directory', + default='.', + help='Create target and related files in specified directory') + parser.add_argument('target', metavar='target', nargs='?', + help='Specify target') + parser.add_argument('-quiet', action='store_true', default=True, + help='Suppress script output during normal execution.') + parser.add_argument('-verbose', action='store_false', dest='quiet', default=True, + help='Enable script output during normal execution.') + parser.add_argument('-misracstyle', dest='misracstyle', action='store_true', + help='generate MISRA C-friendly headers') + parser.add_argument('-misracppstyle', dest='misracppstyle', action='store_true', + help='generate MISRA C++-friendly headers') + + args = parser.parse_args() + + # This splits arguments which are space-separated lists + args.feature = [name for arg in args.feature for name in arg.split()] + args.extension = [name for arg in args.extension for name in arg.split()] + + # create error/warning & diagnostic files + if args.errfile: + errWarn = open(args.errfile, 'w', encoding='utf-8') + else: + errWarn = sys.stderr + + if args.diagfile: + diag = open(args.diagfile, 'w', encoding='utf-8') + else: + diag = None + + (gen, options) = (None, None) + if not args.validate: + # Create the API generator & generator options + (gen, options) = genTarget(args) + + # Create the registry object with the specified generator and generator + # options. The options are set before XML loading as they may affect it. + reg = Registry(gen, options) + + # Parse the specified registry XML into an ElementTree object + startTimer(args.time) + tree = etree.parse(args.registry) + endTimer(args.time, '* Time to make ElementTree =') + + # Load the XML tree into the registry object + startTimer(args.time) + reg.loadElementTree(tree) + endTimer(args.time, '* Time to parse ElementTree =') + + if args.validate: + success = reg.validateRegistry() + sys.exit(0 if success else 1) + + if args.dump: + logDiag('* Dumping registry to regdump.txt') + reg.dumpReg(filehandle=open('regdump.txt', 'w', encoding='utf-8')) + + # Finally, use the output generator to create the requested target + if args.debug: + pdb.run('reg.apiGen()') + else: + startTimer(args.time) + reg.apiGen() + endTimer(args.time, '* Time to generate ' + options.filename + ' =') + + if not args.quiet: + logDiag('* Generated', options.filename) diff --git a/xml/generator.py b/xml/generator.py new file mode 100644 index 000000000..c7c460d95 --- /dev/null +++ b/xml/generator.py @@ -0,0 +1,1186 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 +"""Base class for source/header/doc generators, as well as some utility functions.""" + +from __future__ import unicode_literals + +import io +import os +import pdb +import re +import shutil +import sys +import tempfile +try: + from pathlib import Path +except ImportError: + from pathlib2 import Path + +from spec_tools.util import getElemName, getElemType + + +def write(*args, **kwargs): + file = kwargs.pop('file', sys.stdout) + end = kwargs.pop('end', '\n') + file.write(' '.join(str(arg) for arg in args)) + file.write(end) + + +def noneStr(s): + """Return string argument, or "" if argument is None. + + Used in converting etree Elements into text. + s - string to convert""" + if s: + return s + return "" + + +def enquote(s): + """Return string argument with surrounding quotes, + for serialization into Python code.""" + if s: + return "'{}'".format(s) + return None + + +def regSortCategoryKey(feature): + """Sort key for regSortFeatures. + Sorts by category of the feature name string: + + - Core API features (those defined with a `` tag) + - ARB/KHR/OES (Khronos extensions) + - other (EXT/vendor extensions)""" + + if feature.elem.tag == 'feature': + return 0 + if (feature.category == 'ARB' + or feature.category == 'KHR' + or feature.category == 'OES'): + return 1 + + return 2 + + +def regSortOrderKey(feature): + """Sort key for regSortFeatures - key is the sortorder attribute.""" + + # print("regSortOrderKey {} -> {}".format(feature.name, feature.sortorder)) + return feature.sortorder + + +def regSortFeatureVersionKey(feature): + """Sort key for regSortFeatures - key is the feature version. + `` elements all have version number 0.""" + + return float(feature.versionNumber) + + +def regSortExtensionNumberKey(feature): + """Sort key for regSortFeatures - key is the extension number. + `` elements all have extension number 0.""" + + return int(feature.number) + + +def regSortFeatures(featureList): + """Default sort procedure for features. + + - Sorts by explicit sort order (default 0) relative to other features + - then by feature category ('feature' or 'extension'), + - then by version number (for features) + - then by extension number (for extensions)""" + featureList.sort(key=regSortExtensionNumberKey) + featureList.sort(key=regSortFeatureVersionKey) + featureList.sort(key=regSortCategoryKey) + featureList.sort(key=regSortOrderKey) + + +class GeneratorOptions: + """Base class for options used during header/documentation production. + + These options are target language independent, and used by + Registry.apiGen() and by base OutputGenerator objects.""" + + def __init__(self, + conventions=None, + filename=None, + directory='.', + genpath=None, + apiname=None, + profile=None, + versions='.*', + emitversions='.*', + defaultExtensions=None, + addExtensions=None, + removeExtensions=None, + emitExtensions=None, + emitSpirv=None, + reparentEnums=True, + sortProcedure=regSortFeatures): + """Constructor. + + Arguments: + + - conventions - may be mandatory for some generators: + an object that implements ConventionsBase + - filename - basename of file to generate, or None to write to stdout. + - directory - directory in which to generate files + - genpath - path to previously generated files, such as api.py + - apiname - string matching `` 'apiname' attribute, e.g. 'gl'. + - profile - string specifying API profile , e.g. 'core', or None. + - versions - regex matching API versions to process interfaces for. + Normally `'.*'` or `'[0-9][.][0-9]'` to match all defined versions. + - emitversions - regex matching API versions to actually emit + interfaces for (though all requested versions are considered + when deciding which interfaces to generate). For GL 4.3 glext.h, + this might be `'1[.][2-5]|[2-4][.][0-9]'`. + - defaultExtensions - If not None, a string which must in its + entirety match the pattern in the "supported" attribute of + the ``. Defaults to None. Usually the same as apiname. + - addExtensions - regex matching names of additional extensions + to include. Defaults to None. + - removeExtensions - regex matching names of extensions to + remove (after defaultExtensions and addExtensions). Defaults + to None. + - emitExtensions - regex matching names of extensions to actually emit + interfaces for (though all requested versions are considered when + deciding which interfaces to generate). + to None. + - emitSpirv - regex matching names of extensions and capabilities + to actually emit interfaces for. + - reparentEnums - move elements which extend an enumerated + type from or elements to the target + element. This is required for almost all purposes, but the + InterfaceGenerator relies on the list of interfaces in the + or being complete. Defaults to True. + - sortProcedure - takes a list of FeatureInfo objects and sorts + them in place to a preferred order in the generated output. + Default is core API versions, ARB/KHR/OES extensions, all other + extensions, by core API version number or extension number in each + group. + + The regex patterns can be None or empty, in which case they match + nothing.""" + self.conventions = conventions + """may be mandatory for some generators: + an object that implements ConventionsBase""" + + self.filename = filename + "basename of file to generate, or None to write to stdout." + + self.genpath = genpath + """path to previously generated files, such as api.py""" + + self.directory = directory + "directory in which to generate filename" + + self.apiname = apiname + "string matching `` 'apiname' attribute, e.g. 'gl'." + + self.profile = profile + "string specifying API profile , e.g. 'core', or None." + + self.versions = self.emptyRegex(versions) + """regex matching API versions to process interfaces for. + Normally `'.*'` or `'[0-9][.][0-9]'` to match all defined versions.""" + + self.emitversions = self.emptyRegex(emitversions) + """regex matching API versions to actually emit + interfaces for (though all requested versions are considered + when deciding which interfaces to generate). For GL 4.3 glext.h, + this might be `'1[.][2-5]|[2-4][.][0-9]'`.""" + + self.defaultExtensions = defaultExtensions + """If not None, a string which must in its + entirety match the pattern in the "supported" attribute of + the ``. Defaults to None. Usually the same as apiname.""" + + self.addExtensions = self.emptyRegex(addExtensions) + """regex matching names of additional extensions + to include. Defaults to None.""" + + self.removeExtensions = self.emptyRegex(removeExtensions) + """regex matching names of extensions to + remove (after defaultExtensions and addExtensions). Defaults + to None.""" + + self.emitExtensions = self.emptyRegex(emitExtensions) + """regex matching names of extensions to actually emit + interfaces for (though all requested versions are considered when + deciding which interfaces to generate).""" + + self.emitSpirv = self.emptyRegex(emitSpirv) + """regex matching names of extensions and capabilities + to actually emit interfaces for.""" + + self.reparentEnums = reparentEnums + """boolean specifying whether to remove elements from + or when extending an type.""" + + self.sortProcedure = sortProcedure + """takes a list of FeatureInfo objects and sorts + them in place to a preferred order in the generated output. + Default is core API versions, ARB/KHR/OES extensions, all + other extensions, alphabetically within each group.""" + + self.codeGenerator = False + """True if this generator makes compilable code""" + + def emptyRegex(self, pat): + """Substitute a regular expression which matches no version + or extension names for None or the empty string.""" + if not pat: + return '_nomatch_^' + + return pat + + +class OutputGenerator: + """Generate specified API interfaces in a specific style, such as a C header. + + Base class for generating API interfaces. + Manages basic logic, logging, and output file control. + Derived classes actually generate formatted output. + """ + + # categoryToPath - map XML 'category' to include file directory name + categoryToPath = { + 'bitmask': 'flags', + 'enum': 'enums', + 'funcpointer': 'funcpointers', + 'handle': 'handles', + 'define': 'defines', + 'basetype': 'basetypes', + } + + def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout): + """Constructor + + - errFile, warnFile, diagFile - file handles to write errors, + warnings, diagnostics to. May be None to not write.""" + self.outFile = None + self.errFile = errFile + self.warnFile = warnFile + self.diagFile = diagFile + # Internal state + self.featureName = None + self.genOpts = None + self.registry = None + self.featureDictionary = {} + # Used for extension enum value generation + self.extBase = 1000000000 + self.extBlockSize = 1000 + self.madeDirs = {} + + # API dictionary, which may be loaded by the beginFile method of + # derived generators. + self.apidict = None + + def logMsg(self, level, *args): + """Write a message of different categories to different + destinations. + + - `level` + - 'diag' (diagnostic, voluminous) + - 'warn' (warning) + - 'error' (fatal error - raises exception after logging) + + - `*args` - print()-style arguments to direct to corresponding log""" + if level == 'error': + strfile = io.StringIO() + write('ERROR:', *args, file=strfile) + if self.errFile is not None: + write(strfile.getvalue(), file=self.errFile) + raise UserWarning(strfile.getvalue()) + elif level == 'warn': + if self.warnFile is not None: + write('WARNING:', *args, file=self.warnFile) + elif level == 'diag': + if self.diagFile is not None: + write('DIAG:', *args, file=self.diagFile) + else: + raise UserWarning( + '*** FATAL ERROR in Generator.logMsg: unknown level:' + level) + + def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): + """Parse and convert an `` tag into a value. + + Returns a list: + + - first element - integer representation of the value, or None + if needsNum is False. The value must be a legal number + if needsNum is True. + - second element - string representation of the value + + There are several possible representations of values. + + - A 'value' attribute simply contains the value. + - A 'bitpos' attribute defines a value by specifying the bit + position which is set in that value. + - An 'offset','extbase','extends' triplet specifies a value + as an offset to a base value defined by the specified + 'extbase' extension name, which is then cast to the + typename specified by 'extends'. This requires probing + the registry database, and imbeds knowledge of the + API extension enum scheme in this function. + - An 'alias' attribute contains the name of another enum + which this is an alias of. The other enum must be + declared first when emitting this enum.""" + name = elem.get('name') + numVal = None + if 'value' in elem.keys(): + value = elem.get('value') + # print('About to translate value =', value, 'type =', type(value)) + if needsNum: + numVal = int(value, 0) + # If there's a non-integer, numeric 'type' attribute (e.g. 'u' or + # 'ull'), append it to the string value. + # t = enuminfo.elem.get('type') + # if t is not None and t != '' and t != 'i' and t != 's': + # value += enuminfo.type + if forceSuffix: + if bitwidth == 64: + value = value + 'ULL' + else: + value = value + 'U' + self.logMsg('diag', 'Enum', name, '-> value [', numVal, ',', value, ']') + return [numVal, value] + if 'bitpos' in elem.keys(): + value = elem.get('bitpos') + bitpos = int(value, 0) + numVal = 1 << bitpos + value = '0x%08x' % numVal + if bitwidth == 64: + value = value + 'ULL' + elif forceSuffix: + value = value + 'U' + self.logMsg('diag', 'Enum', name, '-> bitpos [', numVal, ',', value, ']') + return [numVal, value] + if 'offset' in elem.keys(): + # Obtain values in the mapping from the attributes + enumNegative = False + offset = int(elem.get('offset'), 0) + extnumber = int(elem.get('extnumber'), 0) + extends = elem.get('extends') + if 'dir' in elem.keys(): + enumNegative = True + self.logMsg('diag', 'Enum', name, 'offset =', offset, + 'extnumber =', extnumber, 'extends =', extends, + 'enumNegative =', enumNegative) + # Now determine the actual enumerant value, as defined + # in the "Layers and Extensions" appendix of the spec. + numVal = self.extBase + (extnumber - 1) * self.extBlockSize + offset + if enumNegative: + numVal *= -1 + value = '%d' % numVal + # More logic needed! + self.logMsg('diag', 'Enum', name, '-> offset [', numVal, ',', value, ']') + return [numVal, value] + if 'alias' in elem.keys(): + return [None, elem.get('alias')] + return [None, None] + + def checkDuplicateEnums(self, enums): + """Check enumerated values for duplicates. + + - enums - list of `` Elements + + returns the list with duplicates stripped""" + # Dictionaries indexed by name and numeric value. + # Entries are [ Element, numVal, strVal ] matching name or value + + nameMap = {} + valueMap = {} + + stripped = [] + for elem in enums: + name = elem.get('name') + (numVal, strVal) = self.enumToValue(elem, True) + + if name in nameMap: + # Duplicate name found; check values + (name2, numVal2, strVal2) = nameMap[name] + + # Duplicate enum values for the same name are benign. This + # happens when defining the same enum conditionally in + # several extension blocks. + if (strVal2 == strVal or (numVal is not None + and numVal == numVal2)): + True + # self.logMsg('info', 'checkDuplicateEnums: Duplicate enum (' + name + + # ') found with the same value:' + strVal) + else: + self.logMsg('warn', 'checkDuplicateEnums: Duplicate enum (' + name + + ') found with different values:' + strVal + + ' and ' + strVal2) + + # Don't add the duplicate to the returned list + continue + elif numVal in valueMap: + # Duplicate value found (such as an alias); report it, but + # still add this enum to the list. + (name2, numVal2, strVal2) = valueMap[numVal] + + msg = 'Two enums found with the same value: {} = {} = {}'.format( + name, name2.get('name'), strVal) + self.logMsg('error', msg) + + # Track this enum to detect followon duplicates + nameMap[name] = [elem, numVal, strVal] + if numVal is not None: + valueMap[numVal] = [elem, numVal, strVal] + + # Add this enum to the list + stripped.append(elem) + + # Return the list + return stripped + + def misracstyle(self): + return False; + + def misracppstyle(self): + return False; + + def buildEnumCDecl(self, expand, groupinfo, groupName): + """Generate the C declaration for an enum""" + groupElem = groupinfo.elem + + # Determine the required bit width for the enum group. + # 32 is the default, which generates C enum types for the values. + bitwidth = 32 + + # If the constFlagBits preference is set, 64 is the default for bitmasks + if self.genOpts.conventions.constFlagBits and groupElem.get('type') == 'bitmask': + bitwidth = 64 + + # Check for an explicitly defined bitwidth, which will override any defaults. + if groupElem.get('bitwidth'): + try: + bitwidth = int(groupElem.get('bitwidth')) + except ValueError as ve: + self.logMsg('error', 'Invalid value for bitwidth attribute (', groupElem.get('bitwidth'), ') for ', groupName, ' - must be an integer value\n') + exit(1) + + usebitmask = False + usedefine = False + + # Bitmask flags can be generated as either "static const uint{32,64}_t" values, + # or as 32-bit C enums. 64-bit types must use uint64_t values. + if groupElem.get('type') == 'bitmask': + if bitwidth > 32 or self.misracppstyle(): + usebitmask = True + if self.misracstyle(): + usedefine = True + + if usedefine or usebitmask: + # Validate the bitwidth and generate values appropriately + if bitwidth > 64: + self.logMsg('error', 'Invalid value for bitwidth attribute (', groupElem.get('bitwidth'), ') for bitmask type ', groupName, ' - must be less than or equal to 64\n') + exit(1) + else: + return self.buildEnumCDecl_BitmaskOrDefine(groupinfo, groupName, bitwidth, usedefine) + else: + # Validate the bitwidth and generate values appropriately + if bitwidth > 32: + self.logMsg('error', 'Invalid value for bitwidth attribute (', groupElem.get('bitwidth'), ') for enum type ', groupName, ' - must be less than or equal to 32\n') + exit(1) + else: + return self.buildEnumCDecl_Enum(expand, groupinfo, groupName) + + def buildEnumCDecl_BitmaskOrDefine(self, groupinfo, groupName, bitwidth, usedefine): + """Generate the C declaration for an "enum" that is actually a + set of flag bits""" + groupElem = groupinfo.elem + flagTypeName = groupElem.get('name') + + # Prefix + body = "// Flag bits for " + flagTypeName + "\n" + + if bitwidth == 64: + body += "typedef VkFlags64 %s;\n" % flagTypeName; + else: + body += "typedef VkFlags %s;\n" % flagTypeName; + + # Maximum allowable value for a flag (unsigned 64-bit integer) + maxValidValue = 2**(64) - 1 + minValidValue = 0 + + # Get a list of nested 'enum' tags. + enums = groupElem.findall('enum') + + # Check for and report duplicates, and return a list with them + # removed. + enums = self.checkDuplicateEnums(enums) + + # Accumulate non-numeric enumerant values separately and append + # them following the numeric values, to allow for aliases. + # NOTE: this doesn't do a topological sort yet, so aliases of + # aliases can still get in the wrong order. + aliasText = '' + + # Loop over the nested 'enum' tags. + for elem in enums: + # Convert the value to an integer and use that to track min/max. + # Values of form -(number) are accepted but nothing more complex. + # Should catch exceptions here for more complex constructs. Not yet. + (numVal, strVal) = self.enumToValue(elem, True, bitwidth, True) + name = elem.get('name') + + # Range check for the enum value + if numVal is not None and (numVal > maxValidValue or numVal < minValidValue): + self.logMsg('error', 'Allowable range for flag types in C is [', minValidValue, ',', maxValidValue, '], but', name, 'flag has a value outside of this (', strVal, ')\n') + exit(1) + + decl = self.genRequirements(name, mustBeFound = False) + + if self.isEnumRequired(elem): + protect = elem.get('protect') + if protect is not None: + body += '#ifdef {}\n'.format(protect) + + if usedefine: + decl += "#define {} {}\n".format(name, strVal) + elif self.misracppstyle(): + decl += "static constexpr {} {} {{{}}};\n".format(flagTypeName, name, strVal) + else: + # Some C compilers only allow initializing a 'static const' variable with a literal value. + # So initializing an alias from another 'static const' value would fail to compile. + # Work around this by chasing the aliases to get the actual value. + while numVal is None: + alias = self.registry.tree.find("enums/enum[@name='" + strVal + "']") + (numVal, strVal) = self.enumToValue(alias, True, bitwidth, True) + decl += "static const {} {} = {};\n".format(flagTypeName, name, strVal) + + if numVal is not None: + body += decl + else: + aliasText += decl + + if protect is not None: + body += '#endif\n' + + # Now append the non-numeric enumerant values + body += aliasText + + # Postfix + + return ("bitmask", body) + + def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): + """Generate the C declaration for an enumerated type""" + groupElem = groupinfo.elem + + # Break the group name into prefix and suffix portions for range + # enum generation + expandName = re.sub(r'([0-9]+|[a-z_])([A-Z0-9])', r'\1_\2', groupName).upper() + expandPrefix = expandName + expandSuffix = '' + expandSuffixMatch = re.search(r'[A-Z][A-Z]+$', groupName) + if expandSuffixMatch: + expandSuffix = '_' + expandSuffixMatch.group() + # Strip off the suffix from the prefix + expandPrefix = expandName.rsplit(expandSuffix, 1)[0] + + # Prefix + body = ["typedef enum %s {" % groupName] + + # @@ Should use the type="bitmask" attribute instead + isEnum = ('FLAG_BITS' not in expandPrefix) + + # Allowable range for a C enum - which is that of a signed 32-bit integer + maxValidValue = 2**(32 - 1) - 1 + minValidValue = (maxValidValue * -1) - 1 + + + # Get a list of nested 'enum' tags. + enums = groupElem.findall('enum') + + # Check for and report duplicates, and return a list with them + # removed. + enums = self.checkDuplicateEnums(enums) + + # Loop over the nested 'enum' tags. Keep track of the minimum and + # maximum numeric values, if they can be determined; but only for + # core API enumerants, not extension enumerants. This is inferred + # by looking for 'extends' attributes. + minName = None + + # Accumulate non-numeric enumerant values separately and append + # them following the numeric values, to allow for aliases. + # NOTE: this doesn't do a topological sort yet, so aliases of + # aliases can still get in the wrong order. + aliasText = [] + + for elem in enums: + # Convert the value to an integer and use that to track min/max. + # Values of form -(number) are accepted but nothing more complex. + # Should catch exceptions here for more complex constructs. Not yet. + (numVal, strVal) = self.enumToValue(elem, True) + name = elem.get('name') + + # Extension enumerants are only included if they are required + if self.isEnumRequired(elem): + decl = '' + + protect = elem.get('protect') + if protect is not None: + decl += '#ifdef {}\n'.format(protect) + + # Indent requirements comment, if there is one + requirements = self.genRequirements(name, mustBeFound = False) + if requirements != '': + requirements = ' ' + requirements + decl += requirements + decl += ' {} = {},'.format(name, strVal) + + if protect is not None: + decl += '\n#endif' + + if numVal is not None: + body.append(decl) + else: + aliasText.append(decl) + + # Range check for the enum value + if numVal is not None and (numVal > maxValidValue or numVal < minValidValue): + self.logMsg('error', 'Allowable range for C enum types is [', minValidValue, ',', maxValidValue, '], but', name, 'has a value outside of this (', strVal, ')\n') + exit(1) + + # Don't track min/max for non-numbers (numVal is None) + if isEnum and numVal is not None and elem.get('extends') is None: + if minName is None: + minName = maxName = name + minValue = maxValue = numVal + elif numVal < minValue: + minName = name + minValue = numVal + elif numVal > maxValue: + maxName = name + maxValue = numVal + + # Now append the non-numeric enumerant values + body.extend(aliasText) + + # Generate min/max value tokens - legacy use case. + if isEnum and expand: + body.extend((" {}_BEGIN_RANGE{} = {},".format(expandPrefix, expandSuffix, minName), + " {}_END_RANGE{} = {},".format( + expandPrefix, expandSuffix, maxName), + " {}_RANGE_SIZE{} = ({} - {} + 1),".format(expandPrefix, expandSuffix, maxName, minName))) + + # Generate a range-padding value to ensure the enum is 32 bits, but + # only in code generators, so it doesn't appear in documentation + if (self.genOpts.codeGenerator or + self.conventions.generate_max_enum_in_docs): + body.append(" {}_MAX_ENUM{} = 0x7FFFFFFF".format( + expandPrefix, expandSuffix)) + + # Postfix + body.append("} %s;" % groupName) + + # Determine appropriate section for this declaration + if groupElem.get('type') == 'bitmask': + section = 'bitmask' + else: + section = 'group' + + return (section, '\n'.join(body)) + + def buildConstantCDecl(self, enuminfo, name, alias): + """Generate the C declaration for a constant (a single + value). + + tags may specify their values in several ways, but are + usually just integers or floating-point numbers.""" + + (_, strVal) = self.enumToValue(enuminfo.elem, False) + + if self.misracppstyle() and enuminfo.elem.get('type') and not alias: + # Generate e.g.: static constexpr uint32_t x = ~static_cast(1U); + # This appeases MISRA "underlying type" rules. + typeStr = enuminfo.elem.get('type'); + invert = '~' in strVal + number = strVal.strip("()~UL") + if typeStr != "float": + number += 'U' + strVal = "~" if invert else "" + strVal += "static_cast<" + typeStr + ">(" + number + ")" + body = 'static constexpr ' + typeStr.ljust(9) + name.ljust(33) + ' {' + strVal + '};' + elif enuminfo.elem.get('type') and not alias: + # Generate e.g.: #define x (~0ULL) + typeStr = enuminfo.elem.get('type'); + invert = '~' in strVal + paren = '(' in strVal + number = strVal.strip("()~UL") + if typeStr != "float": + if typeStr == "uint64_t": + number += 'ULL' + else: + number += 'U' + strVal = "~" if invert else "" + strVal += number + if paren: + strVal = "(" + strVal + ")"; + body = '#define ' + name.ljust(33) + ' ' + strVal; + else: + body = '#define ' + name.ljust(33) + ' ' + strVal + + return body + + def makeDir(self, path): + """Create a directory, if not already done. + + Generally called from derived generators creating hierarchies.""" + self.logMsg('diag', 'OutputGenerator::makeDir(' + path + ')') + if path not in self.madeDirs: + # This can get race conditions with multiple writers, see + # https://stackoverflow.com/questions/273192/ + if not os.path.exists(path): + os.makedirs(path) + self.madeDirs[path] = None + + def beginFile(self, genOpts): + """Start a new interface file + + - genOpts - GeneratorOptions controlling what's generated and how""" + self.genOpts = genOpts + self.should_insert_may_alias_macro = \ + self.genOpts.conventions.should_insert_may_alias_macro(self.genOpts) + + # Try to import the API dictionary, api.py, if it exists. Nothing in + # api.py cannot be extracted directly from the XML, and in the + # future we should do that. + if self.genOpts.genpath is not None: + try: + sys.path.insert(0, self.genOpts.genpath) + import api + self.apidict = api + except ImportError: + self.apidict = None + + self.conventions = genOpts.conventions + + # Open a temporary file for accumulating output. + if self.genOpts.filename is not None: + self.outFile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', newline='\n', delete=False) + else: + self.outFile = sys.stdout + + def endFile(self): + if self.errFile: + self.errFile.flush() + if self.warnFile: + self.warnFile.flush() + if self.diagFile: + self.diagFile.flush() + self.outFile.flush() + if self.outFile != sys.stdout and self.outFile != sys.stderr: + self.outFile.close() + + # On successfully generating output, move the temporary file to the + # target file. + if self.genOpts.filename is not None: + if sys.platform == 'win32': + directory = Path(self.genOpts.directory) + if not Path.exists(directory): + os.makedirs(directory) + shutil.copy(self.outFile.name, self.genOpts.directory + '/' + self.genOpts.filename) + os.remove(self.outFile.name) + self.genOpts = None + + def beginFeature(self, interface, emit): + """Write interface for a feature and tag generated features as having been done. + + - interface - element for the `` / `` to generate + - emit - actually write to the header only when True""" + self.emit = emit + self.featureName = interface.get('name') + # If there's an additional 'protect' attribute in the feature, save it + self.featureExtraProtect = interface.get('protect') + + def endFeature(self): + """Finish an interface file, closing it when done. + + Derived classes responsible for emitting feature""" + self.featureName = None + self.featureExtraProtect = None + + def genRequirements(self, name, mustBeFound = True): + """Generate text showing what core versions and extensions introduce + an API. This exists in the base Generator class because it's used by + the shared enumerant-generating interfaces (buildEnumCDecl, etc.). + Here it returns an empty string for most generators, but can be + overridden by e.g. DocGenerator. + + - name - name of the API + - mustBeFound - If True, when requirements for 'name' cannot be + determined, a warning comment is generated. + """ + + return '' + + def validateFeature(self, featureType, featureName): + """Validate we're generating something only inside a `` tag""" + if self.featureName is None: + raise UserWarning('Attempt to generate', featureType, + featureName, 'when not in feature') + + def genType(self, typeinfo, name, alias): + """Generate interface for a type + + - typeinfo - TypeInfo for a type + + Extend to generate as desired in your derived class.""" + self.validateFeature('type', name) + + def genStruct(self, typeinfo, typeName, alias): + """Generate interface for a C "struct" type. + + - typeinfo - TypeInfo for a type interpreted as a struct + + Extend to generate as desired in your derived class.""" + self.validateFeature('struct', typeName) + + # The mixed-mode tags may contain no-op tags. + # It is convenient to remove them here where all output generators + # will benefit. + for member in typeinfo.elem.findall('.//member'): + for comment in member.findall('comment'): + member.remove(comment) + + def genGroup(self, groupinfo, groupName, alias): + """Generate interface for a group of enums (C "enum") + + - groupinfo - GroupInfo for a group. + + Extend to generate as desired in your derived class.""" + + self.validateFeature('group', groupName) + + def genEnum(self, enuminfo, typeName, alias): + """Generate interface for an enum (constant). + + - enuminfo - EnumInfo for an enum + - name - enum name + + Extend to generate as desired in your derived class.""" + self.validateFeature('enum', typeName) + + def genCmd(self, cmd, cmdinfo, alias): + """Generate interface for a command. + + - cmdinfo - CmdInfo for a command + + Extend to generate as desired in your derived class.""" + self.validateFeature('command', cmdinfo) + + def genSpirv(self, spirv, spirvinfo, alias): + """Generate interface for a spirv element. + + - spirvinfo - SpirvInfo for a command + + Extend to generate as desired in your derived class.""" + return + + def makeProtoName(self, name, tail): + """Turn a `` `` into C-language prototype + and typedef declarations for that name. + + - name - contents of `` tag + - tail - whatever text follows that tag in the Element""" + return self.genOpts.apientry + name + tail + + def makeTypedefName(self, name, tail): + """Make the function-pointer typedef name for a command.""" + return '(' + self.genOpts.apientryp + 'PFN_' + name + tail + ')' + + def makeCParamDecl(self, param, aligncol): + """Return a string which is an indented, formatted + declaration for a `` or `` block (e.g. function parameter + or structure/union member). + + - param - Element (`` or ``) to format + - aligncol - if non-zero, attempt to align the nested `` element + at this column""" + indent = ' ' + paramdecl = indent + prefix = noneStr(param.text) + + for elem in param: + text = noneStr(elem.text) + tail = noneStr(elem.tail) + + if self.should_insert_may_alias_macro and self.genOpts.conventions.is_voidpointer_alias(elem.tag, text, tail): + # OpenXR-specific macro insertion - but not in apiinc for the spec + tail = self.genOpts.conventions.make_voidpointer_alias(tail) + if elem.tag == 'name' and aligncol > 0: + self.logMsg('diag', 'Aligning parameter', elem.text, 'to column', self.genOpts.alignFuncParam) + # Align at specified column, if possible + paramdecl = paramdecl.rstrip() + oldLen = len(paramdecl) + # This works around a problem where very long type names - + # longer than the alignment column - would run into the tail + # text. + paramdecl = paramdecl.ljust(aligncol - 1) + ' ' + newLen = len(paramdecl) + self.logMsg('diag', 'Adjust length of parameter decl from', oldLen, 'to', newLen, ':', paramdecl) + + if (self.misracppstyle() and prefix.find('const ') != -1): + # Change pointer type order from e.g. "const void *" to "void const *". + # If the string starts with 'const', reorder it to be after the first type. + paramdecl += prefix.replace('const ', '') + text + ' const' + tail + else: + paramdecl += prefix + text + tail + + # Clear prefix for subsequent iterations + prefix = '' + + # If prefix was originally non-empty and the param has no elements + # (e.g. is nothing but text), preserve it. + paramdecl = paramdecl + prefix + + if aligncol == 0: + # Squeeze out multiple spaces other than the indentation + paramdecl = indent + ' '.join(paramdecl.split()) + return paramdecl + + def getCParamTypeLength(self, param): + """Return the length of the type field is an indented, formatted + declaration for a `` or `` block (e.g. function parameter + or structure/union member). + + - param - Element (`` or ``) to identify""" + + # Allow for missing tag + newLen = 0 + paramdecl = ' ' + noneStr(param.text) + for elem in param: + text = noneStr(elem.text) + tail = noneStr(elem.tail) + + if self.should_insert_may_alias_macro and self.genOpts.conventions.is_voidpointer_alias(elem.tag, text, tail): + # OpenXR-specific macro insertion + tail = self.genOpts.conventions.make_voidpointer_alias(tail) + if elem.tag == 'name': + # Align at specified column, if possible + newLen = len(paramdecl.rstrip()) + self.logMsg('diag', 'Identifying length of', elem.text, 'as', newLen) + paramdecl += text + tail + + return newLen + + def getMaxCParamTypeLength(self, info): + """Return the length of the longest type field for a member/parameter. + + - info - TypeInfo or CommandInfo. + """ + lengths = (self.getCParamTypeLength(member) + for member in info.getMembers()) + return max(lengths) + + def getHandleParent(self, typename): + """Get the parent of a handle object.""" + info = self.registry.typedict.get(typename) + if info is None: + return None + + elem = info.elem + if elem is not None: + return elem.get('parent') + + return None + + def iterateHandleAncestors(self, typename): + """Iterate through the ancestors of a handle type.""" + current = self.getHandleParent(typename) + while current is not None: + yield current + current = self.getHandleParent(current) + + def getHandleAncestors(self, typename): + """Get the ancestors of a handle object.""" + return list(self.iterateHandleAncestors(typename)) + + def getTypeCategory(self, typename): + """Get the category of a type.""" + info = self.registry.typedict.get(typename) + if info is None: + return None + + elem = info.elem + if elem is not None: + return elem.get('category') + return None + + def isStructAlwaysValid(self, structname): + """Try to do check if a structure is always considered valid (i.e. there's no rules to its acceptance).""" + # A conventions object is required for this call. + if not self.conventions: + raise RuntimeError("To use isStructAlwaysValid, be sure your options include a Conventions object.") + + if self.conventions.type_always_valid(structname): + return True + + category = self.getTypeCategory(structname) + if self.conventions.category_requires_validation(category): + return False + + info = self.registry.typedict.get(structname) + assert(info is not None) + + members = info.getMembers() + + for member in members: + member_name = getElemName(member) + if member_name in (self.conventions.structtype_member_name, + self.conventions.nextpointer_member_name): + return False + + if member.get('noautovalidity'): + return False + + member_type = getElemType(member) + + if member_type in ('void', 'char') or self.paramIsArray(member) or self.paramIsPointer(member): + return False + + if self.conventions.type_always_valid(member_type): + continue + + member_category = self.getTypeCategory(member_type) + + if self.conventions.category_requires_validation(member_category): + return False + + if member_category in ('struct', 'union'): + if self.isStructAlwaysValid(member_type) is False: + return False + + return True + + def isEnumRequired(self, elem): + """Return True if this `` element is + required, False otherwise + + - elem - `` element to test""" + required = elem.get('required') is not None + self.logMsg('diag', 'isEnumRequired:', elem.get('name'), + '->', required) + return required + + # @@@ This code is overridden by equivalent code now run in + # @@@ Registry.generateFeature + + required = False + + extname = elem.get('extname') + if extname is not None: + # 'supported' attribute was injected when the element was + # moved into the group in Registry.parseTree() + if self.genOpts.defaultExtensions == elem.get('supported'): + required = True + elif re.match(self.genOpts.addExtensions, extname) is not None: + required = True + elif elem.get('version') is not None: + required = re.match(self.genOpts.emitversions, elem.get('version')) is not None + else: + required = True + + return required + + def makeCDecls(self, cmd): + """Return C prototype and function pointer typedef for a + `` Element, as a two-element list of strings. + + - cmd - Element containing a `` tag""" + proto = cmd.find('proto') + params = cmd.findall('param') + # Begin accumulating prototype and typedef strings + pdecl = self.genOpts.apicall + tdecl = 'typedef ' + + # Insert the function return type/name. + # For prototypes, add APIENTRY macro before the name + # For typedefs, add (APIENTRY *) around the name and + # use the PFN_cmdnameproc naming convention. + # Done by walking the tree for element by element. + # etree has elem.text followed by (elem[i], elem[i].tail) + # for each child element and any following text + # Leading text + pdecl += noneStr(proto.text) + tdecl += noneStr(proto.text) + # For each child element, if it's a wrap in appropriate + # declaration. Otherwise append its contents and tail contents. + for elem in proto: + text = noneStr(elem.text) + tail = noneStr(elem.tail) + if elem.tag == 'name': + pdecl += self.makeProtoName(text, tail) + tdecl += self.makeTypedefName(text, tail) + else: + pdecl += text + tail + tdecl += text + tail + + if self.genOpts.alignFuncParam == 0: + # Squeeze out multiple spaces - there is no indentation + pdecl = ' '.join(pdecl.split()) + tdecl = ' '.join(tdecl.split()) + + # Now add the parameter declaration list, which is identical + # for prototypes and typedefs. Concatenate all the text from + # a node without the tags. No tree walking required + # since all tags are ignored. + # Uses: self.indentFuncProto + # self.indentFuncPointer + # self.alignFuncParam + n = len(params) + # Indented parameters + if n > 0: + indentdecl = '(\n' + indentdecl += ',\n'.join(self.makeCParamDecl(p, self.genOpts.alignFuncParam) + for p in params) + indentdecl += ');' + else: + indentdecl = '(void);' + # Non-indented parameters + paramdecl = '(' + if n > 0: + paramnames = [] + if self.misracppstyle(): + for p in params: + param = '' + firstIter = True; + for t in p.itertext(): + if (firstIter): + prefix = t + firstIter = False + else: + # Change pointer type order from e.g. "const void *" to "void const *". + # If the string starts with 'const', reorder it to be after the first type. + if (prefix.find('const ') != -1): + param += prefix.replace('const ', '') + t + ' const ' + else: + param += prefix + t + # Clear prefix for subsequent iterations + prefix = '' + paramnames.append(param); + else: + paramnames = (''.join(t for t in p.itertext()) + for p in params) + paramdecl += ', '.join(paramnames) + else: + paramdecl += 'void' + paramdecl += ");" + return [pdecl + indentdecl, tdecl + paramdecl] + + def newline(self): + """Print a newline to the output file (utility function)""" + write('', file=self.outFile) + + def setRegistry(self, registry): + self.registry = registry diff --git a/xml/pygenerator.py b/xml/pygenerator.py new file mode 100644 index 000000000..b2e76e66d --- /dev/null +++ b/xml/pygenerator.py @@ -0,0 +1,365 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +import sys +from generator import OutputGenerator, enquote, noneStr, write +import pprint + +class PyOutputGenerator(OutputGenerator): + """PyOutputGenerator - subclass of OutputGenerator. + Generates Python data structures describing API names and relationships. + Similar to DocOutputGenerator, but writes a single file.""" + + def apiName(self, name): + """Return True if name is in the reserved API namespace. + + Delegates to the conventions object. """ + return self.genOpts.conventions.is_api_name(name) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Track features being generated + self.features = [] + + # Reverse map from interface names to features requiring them + self.apimap = {} + + def beginFile(self, genOpts): + OutputGenerator.beginFile(self, genOpts) + # + # Dictionaries are keyed by the name of the entity (e.g. + # self.structs is keyed by structure names). Values are + # the names of related entities (e.g. structs contain + # a list of type names of members, enums contain a list + # of enumerants belong to the enumerated type, etc.), or + # just None if there are no directly related entities. + # + # Collect the mappings, then emit the Python script in endFile + self.basetypes = {} + self.consts = {} + self.enums = {} + self.flags = {} + self.funcpointers = {} + self.protos = {} + self.structs = {} + self.handles = {} + self.defines = {} + self.alias = {} + # Dictionary containing the type of a type name + # (e.g. the string name of the dictionary with its contents). + self.typeCategory = {} + self.mapDict = {} + + def addInterfaceMapping(self, api, feature, required): + """Add a reverse mapping in self.apimap from an API to a feature + requiring that API. + + - api - name of the API + - feature - name of the feature requiring it + - required - None, or an additional feature dependency within + 'feature' """ + + # Each entry in self.apimap contains one or more + # ( feature, required ) tuples. + deps = ( feature, required ) + + if api in self.apimap: + self.apimap[api].append(deps) + else: + self.apimap[api] = [ deps ] + + def mapInterfaceKeys(self, feature, key): + """Construct reverse mapping of APIs to features requiring them in + self.apimap. + + - feature - name of the feature being generated + - key - API category - 'define', 'basetype', etc.""" + + dict = self.featureDictionary[feature][key] + + if dict: + # Not clear why handling of command vs. type APIs is different - + # see interfacedocgenerator.py, which this was based on. + if key == 'command': + for required in dict: + for api in dict[required]: + self.addInterfaceMapping(api, feature, required) + else: + for required in dict: + for parent in dict[required]: + for api in dict[required][parent]: + self.addInterfaceMapping(api, feature, required) + + def mapInterfaces(self, feature): + """Construct reverse mapping of APIs to features requiring them in + self.apimap. + + - feature - name of the feature being generated""" + + # Map each category of interface + self.mapInterfaceKeys(feature, 'basetype') + self.mapInterfaceKeys(feature, 'bitmask') + self.mapInterfaceKeys(feature, 'command') + self.mapInterfaceKeys(feature, 'define') + self.mapInterfaceKeys(feature, 'enum') + self.mapInterfaceKeys(feature, 'enumconstant') + self.mapInterfaceKeys(feature, 'funcpointer') + self.mapInterfaceKeys(feature, 'handle') + self.mapInterfaceKeys(feature, 'include') + self.mapInterfaceKeys(feature, 'struct') + self.mapInterfaceKeys(feature, 'union') + + def endFile(self): + # Print out all the dictionaries as Python strings. + # Could just print(dict) but that's not human-readable + dicts = ( [ self.basetypes, 'basetypes' ], + [ self.consts, 'consts' ], + [ self.enums, 'enums' ], + [ self.flags, 'flags' ], + [ self.funcpointers, 'funcpointers' ], + [ self.protos, 'protos' ], + [ self.structs, 'structs' ], + [ self.handles, 'handles' ], + [ self.defines, 'defines' ], + [ self.typeCategory, 'typeCategory' ], + [ self.alias, 'alias' ] ) + for (entry_dict, name) in dicts: + write(name + ' = {}', file=self.outFile) + for key in sorted(entry_dict.keys()): + write(name + '[' + enquote(key) + '] = ', entry_dict[key], + file=self.outFile) + + # Dictionary containing the relationships of a type + # (e.g. a dictionary with each related type as keys). + write('mapDict = {}', file=self.outFile) + + # Could just print(self.mapDict), but prefer something + # human-readable and stable-ordered + for baseType in sorted(self.mapDict.keys()): + write('mapDict[' + enquote(baseType) + '] = ', file=self.outFile, end='') + pprint.pprint(self.mapDict[baseType], self.outFile) + + # Generate feature <-> interface mappings + for feature in self.features: + self.mapInterfaces(feature) + + # Write out the reverse map from APIs to requiring features + write('requiredBy = {}', file=self.outFile) + + for api in sorted(self.apimap): + # Construct list of requirements as Python list arguments + ##reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) + ##write('requiredBy[{}] = ( {} )'.format(enquote(api), reqs), file=self.outFile) + + # Ideally these would be sorted by dep[0] as well + reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) + write('requiredBy[{}] = {}'.format(enquote(api), pprint.saferepr(self.apimap[api])), file=self.outFile) + + OutputGenerator.endFile(self) + + def beginFeature(self, interface, emit): + # Start processing in superclass + OutputGenerator.beginFeature(self, interface, emit) + + # Add this feature to the list being tracked + self.features.append( self.featureName ) + + def endFeature(self): + # Finish processing in superclass + OutputGenerator.endFeature(self) + + def addName(self, entry_dict, name, value): + """Add a string entry to the dictionary, quoting it so it gets printed + out correctly in self.endFile().""" + entry_dict[name] = enquote(value) + + def addMapping(self, baseType, refType): + """Add a mapping between types to mapDict. + + Only include API types, so we don't end up with a lot of useless uint32_t and void types.""" + if not self.apiName(baseType) or not self.apiName(refType): + self.logMsg('diag', 'PyOutputGenerator::addMapping: IGNORE map from', baseType, '<->', refType) + return + + self.logMsg('diag', 'PyOutputGenerator::addMapping: map from', + baseType, '<->', refType) + + if baseType not in self.mapDict: + baseDict = {} + self.mapDict[baseType] = baseDict + else: + baseDict = self.mapDict[baseType] + if refType not in self.mapDict: + refDict = {} + self.mapDict[refType] = refDict + else: + refDict = self.mapDict[refType] + + baseDict[refType] = None + refDict[baseType] = None + + def genType(self, typeinfo, name, alias): + """Generate type. + + - For 'struct' or 'union' types, defer to genStruct() to + add to the dictionary. + - For 'bitmask' types, add the type name to the 'flags' dictionary, + with the value being the corresponding 'enums' name defining + the acceptable flag bits. + - For 'enum' types, add the type name to the 'enums' dictionary, + with the value being '@STOPHERE@' (because this case seems + never to happen). + - For 'funcpointer' types, add the type name to the 'funcpointers' + dictionary. + - For 'handle' and 'define' types, add the handle or #define name + to the 'struct' dictionary, because that's how the spec sources + tag these types even though they aren't structs.""" + OutputGenerator.genType(self, typeinfo, name, alias) + typeElem = typeinfo.elem + # If the type is a struct type, traverse the embedded tags + # generating a structure. Otherwise, emit the tag text. + category = typeElem.get('category') + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, category) + + if category in ('struct', 'union'): + self.genStruct(typeinfo, name, alias) + else: + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + + # Always emit an alias (?!) + count = 1 + + # May want to only emit full type definition when not an alias? + else: + # Extract the type name + # (from self.genOpts). Copy other text through unchanged. + # If the resulting text is an empty string, don't emit it. + count = len(noneStr(typeElem.text)) + for elem in typeElem: + count += len(noneStr(elem.text)) + len(noneStr(elem.tail)) + + if count > 0: + if category == 'bitmask': + requiredEnum = typeElem.get('requires') + self.addName(self.flags, name, requiredEnum) + + # This happens when the Flags type is defined, but no + # FlagBits are defined yet. + if requiredEnum is not None: + self.addMapping(name, requiredEnum) + elif category == 'enum': + # This case does not seem to come up. It nominally would + # result from + # , + # but the output generator doesn't emit them directly. + self.logMsg('warn', 'PyOutputGenerator::genType: invalid \'enum\' category for name:', name) + elif category == 'funcpointer': + self.funcpointers[name] = None + elif category == 'handle': + self.handles[name] = None + elif category == 'define': + self.defines[name] = None + elif category == 'basetype': + # Don't add an entry for base types that are not API types + # e.g. an API Bool type gets an entry, uint32_t does not + if self.apiName(name): + self.basetypes[name] = None + self.addName(self.typeCategory, name, 'basetype') + else: + self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name, 'category:', category) + else: + self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name) + + def genStruct(self, typeinfo, typeName, alias): + """Generate struct (e.g. C "struct" type). + + Add the struct name to the 'structs' dictionary, with the + value being an ordered list of the struct member names.""" + OutputGenerator.genStruct(self, typeinfo, typeName, alias) + + if alias: + # Add name -> alias mapping + self.addName(self.alias, typeName, alias) + else: + # May want to only emit definition on this branch + True + + members = [member.text for member in typeinfo.elem.findall('.//member/name')] + self.structs[typeName] = members + memberTypes = [member.text for member in typeinfo.elem.findall('.//member/type')] + for member_type in memberTypes: + self.addMapping(typeName, member_type) + + def genGroup(self, groupinfo, groupName, alias): + """Generate group (e.g. C "enum" type). + + These are concatenated together with other types. + + - Add the enum type name to the 'enums' dictionary, with + the value being an ordered list of the enumerant names. + - Add each enumerant name to the 'consts' dictionary, with + the value being the enum type the enumerant is part of.""" + OutputGenerator.genGroup(self, groupinfo, groupName, alias) + groupElem = groupinfo.elem + + if alias: + # Add name -> alias mapping + self.addName(self.alias, groupName, alias) + else: + # May want to only emit definition on this branch + True + + # Loop over the nested 'enum' tags. + enumerants = [elem.get('name') for elem in groupElem.findall('enum')] + for name in enumerants: + self.addName(self.consts, name, groupName) + self.enums[groupName] = enumerants + + def genEnum(self, enuminfo, name, alias): + """Generate enumerant (compile-time constants). + + - Add the constant name to the 'consts' dictionary, with the + value being None to indicate that the constant isn't + an enumeration value.""" + OutputGenerator.genEnum(self, enuminfo, name, alias) + + if name not in self.consts: + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, 'consts') + self.consts[name] = None + # Otherwise, don't add it to the consts dictionary because it's + # already present. This happens due to the generator 'reparentEnums' + # parameter being False, so each extension enum appears in both the + # type and in the or it originally + # came from. + + def genCmd(self, cmdinfo, name, alias): + """Generate command. + + - Add the command name to the 'protos' dictionary, with the + value being an ordered list of the parameter names.""" + OutputGenerator.genCmd(self, cmdinfo, name, alias) + + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + else: + # May want to only emit definition on this branch + True + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, 'protos') + + params = [param.text for param in cmdinfo.elem.findall('param/name')] + self.protos[name] = params + paramTypes = [param.text for param in cmdinfo.elem.findall('param/type')] + for param_type in paramTypes: + self.addMapping(name, param_type) diff --git a/xml/realign.py b/xml/realign.py new file mode 100644 index 000000000..b59865b3d --- /dev/null +++ b/xml/realign.py @@ -0,0 +1,47 @@ +#!/usr/bin/python3 +# +# Copyright 2013-2023 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# Usage: realign [infile] > outfile +# Used to realign XML tags in the Vulkan registry after it's operated on by +# some other filter, since whitespace inside a tag isn't part of the +# internal representation. + +import copy, sys, string, re + +def realignXML(fp): + patterns = [ + [ '(^ *\ 1): + realignXML(open(sys.argv[1], 'r', encoding='utf-8')) + else: + realignXML(sys.stdin) diff --git a/xml/reflib.py b/xml/reflib.py new file mode 100644 index 000000000..426a1811b --- /dev/null +++ b/xml/reflib.py @@ -0,0 +1,663 @@ +#!/usr/bin/python3 +# +# Copyright 2016-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +# Utility functions for automatic ref page generation and other script stuff + +import io +import re +import sys +import subprocess + +# global errFile, warnFile, diagFile + +errFile = sys.stderr +warnFile = sys.stdout +diagFile = None +logSourcefile = None +logProcname = None +logLine = None + +def unescapeQuotes(s): + """Remove \' escape sequences in a string (refpage description)""" + return s.replace('\\\'', '\'') + +def write(*args, **kwargs ): + file = kwargs.pop('file',sys.stdout) + end = kwargs.pop('end','\n') + file.write(' '.join(str(arg) for arg in args)) + file.write(end) + +def setLogSourcefile(filename): + """Metadata which may be printed (if not None) for diagnostic messages""" + global logSourcefile + logSourcefile = filename + +def setLogProcname(procname): + global logProcname + logProcname = procname + +def setLogLine(line): + global logLine + logLine = line + +def logHeader(severity): + """Generate prefix for a diagnostic line using metadata and severity""" + global logSourcefile, logProcname, logLine + + msg = severity + ': ' + if logProcname: + msg = msg + ' in ' + logProcname + if logSourcefile: + msg = msg + ' for ' + logSourcefile + if logLine: + msg = msg + ' line ' + str(logLine) + return msg + ' ' + +def setLogFile(setDiag, setWarn, filename): + """Set the file handle to log either or both warnings and diagnostics to. + + - setDiag and setWarn are True if the corresponding handle is to be set. + - filename is None for no logging, '-' for stdout, or a pathname.""" + global diagFile, warnFile + + if filename is None: + return + + if filename == '-': + fp = sys.stdout + else: + fp = open(filename, 'w', encoding='utf-8') + + if setDiag: + diagFile = fp + if setWarn: + warnFile = fp + +def logDiag(*args, **kwargs): + file = kwargs.pop('file', diagFile) + end = kwargs.pop('end','\n') + if file is not None: + file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args)) + file.write(end) + +def logWarn(*args, **kwargs): + file = kwargs.pop('file', warnFile) + end = kwargs.pop('end','\n') + if file is not None: + file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args)) + file.write(end) + +def logErr(*args, **kwargs): + file = kwargs.pop('file', errFile) + end = kwargs.pop('end','\n') + + strfile = io.StringIO() + strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args)) + strfile.write(end) + + if file is not None: + file.write(strfile.getvalue()) + sys.exit(1) + +def isempty(s): + """Return True if s is nothing but white space, False otherwise""" + return len(''.join(s.split())) == 0 + +class pageInfo: + """Information about a ref page relative to the file it's extracted from.""" + def __init__(self): + self.extractPage = True + """True if page should be extracted""" + + self.Warning = None + """string warning if page is suboptimal or can't be generated""" + + self.embed = False + """False or the name of the ref page this include is embedded within""" + + self.type = None + """'structs', 'protos', 'funcpointers', 'flags', 'enums'""" + + self.name = None + """struct/proto/enumerant/etc. name""" + + self.desc = None + """short description of ref page""" + + self.begin = None + """index of first line of the page (heuristic or // refBegin)""" + + self.include = None + """index of include:: line defining the page""" + + self.param = None + """index of first line of parameter/member definitions""" + + self.body = None + """index of first line of body text""" + + self.validity = None + """index of validity include""" + + self.end = None + """index of last line of the page (heuristic validity include, or // refEnd)""" + + self.alias = '' + """aliases of this name, if supplied, or ''""" + + self.refs = '' + """cross-references on // refEnd line, if supplied""" + + self.spec = None + """'spec' attribute in refpage open block, if supplied, or None for the default ('api') type""" + + self.anchor = None + """'anchor' attribute in refpage open block, if supplied, or inferred to be the same as the 'name'""" + +def printPageInfoField(desc, line, file): + """Print a single field of a pageInfo struct, possibly None. + + - desc - string description of field + - line - field value or None + - file - indexed by line""" + if line is not None: + logDiag(desc + ':', line + 1, '\t-> ', file[line], end='') + else: + logDiag(desc + ':', line) + +def printPageInfo(pi, file): + """Print out fields of a pageInfo struct + + - pi - pageInfo + - file - indexed by pageInfo""" + logDiag('TYPE: ', pi.type) + logDiag('NAME: ', pi.name) + logDiag('WARNING:', pi.Warning) + logDiag('EXTRACT:', pi.extractPage) + logDiag('EMBED: ', pi.embed) + logDiag('DESC: ', pi.desc) + printPageInfoField('BEGIN ', pi.begin, file) + printPageInfoField('INCLUDE ', pi.include, file) + printPageInfoField('PARAM ', pi.param, file) + printPageInfoField('BODY ', pi.body, file) + printPageInfoField('VALIDITY', pi.validity, file) + printPageInfoField('END ', pi.end, file) + logDiag('REFS: "' + pi.refs + '"') + +def prevPara(file, line): + """Go back one paragraph from the specified line and return the line number + of the first line of that paragraph. + + Paragraphs are delimited by blank lines. It is assumed that the + current line is the first line of a paragraph. + + - file is an array of strings + - line is the starting point (zero-based)""" + # Skip over current paragraph + while (line >= 0 and not isempty(file[line])): + line = line - 1 + # Skip over white space + while (line >= 0 and isempty(file[line])): + line = line - 1 + # Skip to first line of previous paragraph + while (line >= 1 and not isempty(file[line-1])): + line = line - 1 + return line + +def nextPara(file, line): + """Go forward one paragraph from the specified line and return the line + number of the first line of that paragraph. + + Paragraphs are delimited by blank lines. It is assumed that the + current line is standalone (which is bogus). + + - file is an array of strings + - line is the starting point (zero-based)""" + maxLine = len(file) - 1 + # Skip over current paragraph + while (line != maxLine and not isempty(file[line])): + line = line + 1 + # Skip over white space + while (line != maxLine and isempty(file[line])): + line = line + 1 + return line + +def lookupPage(pageMap, name): + """Return (creating if needed) the pageInfo entry in pageMap for name""" + if name not in pageMap: + pi = pageInfo() + pi.name = name + pageMap[name] = pi + else: + pi = pageMap[name] + return pi + +def loadFile(filename): + """Load a file into a list of strings. Return the list or None on failure""" + try: + fp = open(filename, 'r', encoding='utf-8') + except: + logWarn('Cannot open file', filename, ':', sys.exc_info()[0]) + return None + + file = fp.readlines() + fp.close() + + return file + +def clampToBlock(line, minline, maxline): + """Clamp a line number to be in the range [minline,maxline]. + + If the line number is None, just return it. + If minline is None, don't clamp to that value.""" + if line is None: + return line + if minline and line < minline: + return minline + if line > maxline: + return maxline + + return line + +def fixupRefs(pageMap, specFile, file): + """Fill in missing fields in pageInfo structures, to the extent they can be + inferred. + + - pageMap - dictionary of pageInfo structures + - specFile - filename + - file - list of strings making up the file, indexed by pageInfo""" + # All potential ref pages are now in pageMap. Process them to + # identify actual page start/end/description boundaries, if + # not already determined from the text. + for name in sorted(pageMap.keys()): + pi = pageMap[name] + + # # If nothing is found but an include line with no begin, validity, + # # or end, this is not intended as a ref page (yet). Set the begin + # # line to the include line, so autogeneration can at least + # # pull the include out, but mark it not to be extracted. + # # Examples include the host sync table includes in + # # chapters/fundamentals.txt and the table of Vk*Flag types in + # # appendices/boilerplate.txt. + # if pi.begin is None and pi.validity is None and pi.end is None: + # pi.begin = pi.include + # pi.extractPage = False + # pi.Warning = 'No begin, validity, or end lines identified' + # continue + + # Using open block delimiters, ref pages must *always* have a + # defined begin and end. If either is undefined, that's fatal. + if pi.begin is None: + pi.extractPage = False + pi.Warning = 'Can\'t identify begin of ref page open block' + continue + + if pi.end is None: + pi.extractPage = False + pi.Warning = 'Can\'t identify end of ref page open block' + continue + + # If there's no description of the page, infer one from the type + if pi.desc is None: + if pi.type is not None: + # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)' + pi.Warning = 'No short description available; could infer from the type and name' + else: + pi.extractPage = False + pi.Warning = 'No short description available, cannot infer from the type' + continue + + # Try to determine where the parameter and body sections of the page + # begin. funcpointer, proto, and struct pages infer the location of + # the parameter and body sections. Other pages infer the location of + # the body, but have no parameter sections. + if pi.include is not None: + if pi.type in ['funcpointers', 'protos', 'structs']: + pi.param = nextPara(file, pi.include) + if pi.body is None: + pi.body = nextPara(file, pi.param) + else: + if pi.body is None: + pi.body = nextPara(file, pi.include) + else: + pi.Warning = 'Page does not have an API definition include::' + + # It's possible for the inferred param and body lines to run past + # the end of block, if, for example, there is no parameter section. + pi.param = clampToBlock(pi.param, pi.include, pi.end) + pi.body = clampToBlock(pi.body, pi.param, pi.end) + + # We can get to this point with .include, .param, and .validity + # all being None, indicating those sections weren't found. + + logDiag('fixupRefs: after processing,', pi.name, 'looks like:') + printPageInfo(pi, file) + + # Now that all the valid pages have been found, try to make some + # inferences about invalid pages. + # + # If a reference without a .end is entirely inside a valid reference, + # then it's intentionally embedded - may want to create an indirect + # page that links into the embedding page. This is done by a very + # inefficient double loop, but the loop depth is small. + for name in sorted(pageMap.keys()): + pi = pageMap[name] + + if pi.end is None: + for embedName in sorted(pageMap.keys()): + logDiag('fixupRefs: comparing', pi.name, 'to', embedName) + embed = pageMap[embedName] + # Don't check embeddings which are themselves invalid + if not embed.extractPage: + logDiag('Skipping check for embedding in:', embed.name) + continue + if embed.begin is None or embed.end is None: + logDiag('fixupRefs:', name + ':', + 'can\'t compare to unanchored ref:', embed.name, + 'in', specFile, 'at line', pi.include ) + printPageInfo(pi, file) + printPageInfo(embed, file) + # If an embed is found, change the error to a warning + elif (pi.include is not None and pi.include >= embed.begin and + pi.include <= embed.end): + logDiag('fixupRefs: Found embed for:', name, + 'inside:', embedName, + 'in', specFile, 'at line', pi.include ) + pi.embed = embed.name + pi.Warning = 'Embedded in definition for ' + embed.name + break + else: + logDiag('fixupRefs: No embed match for:', name, + 'inside:', embedName, 'in', specFile, + 'at line', pi.include) + + +# Patterns used to recognize interesting lines in an asciidoc source file. +# These patterns are only compiled once. +INCSVAR_DEF = re.compile(r':INCS-VAR: (?P.*)') +endifPat = re.compile(r'^endif::(?P[\w_+,]+)\[\]') +beginPat = re.compile(r'^\[open,(?Prefpage=.*)\]') +# attribute key/value pairs of an open block +attribStr = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'" +attribPat = re.compile(attribStr) +bodyPat = re.compile(r'^// *refBody') +errorPat = re.compile(r'^// *refError') + +# This regex transplanted from check_spec_links +# It looks for either OpenXR or Vulkan generated file conventions, and for +# the api/validity include (generated_type), protos/struct/etc path +# (category), and API name (entity_name). It could be put into the API +# conventions object. +INCLUDE = re.compile( + r'include::(?P((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P[\w]+)/(?P\w+)/(?P[^./]+).txt[\[][\]]') + + +def findRefs(file, filename): + """Identify reference pages in a list of strings, returning a dictionary of + pageInfo entries for each one found, or None on failure.""" + setLogSourcefile(filename) + setLogProcname('findRefs') + + # To reliably detect the open blocks around reference pages, we must + # first detect the '[open,refpage=...]' markup delimiting the block; + # skip past the '--' block delimiter on the next line; and identify the + # '--' block delimiter closing the page. + # This can't be done solely with pattern matching, and requires state to + # track 'inside/outside block'. + # When looking for open blocks, possible states are: + # 'outside' - outside a block + # 'start' - have found the '[open...]' line + # 'inside' - have found the following '--' line + openBlockState = 'outside' + + # Dictionary of interesting line numbers and strings related to an API + # name + pageMap = {} + + numLines = len(file) + line = 0 + + # Track the pageInfo object corresponding to the current open block + pi = None + incsvar = None + + while (line < numLines): + setLogLine(line) + + # Look for a file-wide definition + matches = INCSVAR_DEF.match(file[line]) + if matches: + incsvar = matches.group('value') + logDiag('Matched INCS-VAR definition:', incsvar) + + line = line + 1 + continue + + # Perform INCS-VAR substitution immediately. + if incsvar and '{INCS-VAR}' in file[line]: + newLine = file[line].replace('{INCS-VAR}', incsvar) + logDiag('PERFORMING SUBSTITUTION', file[line], '->', newLine) + file[line] = newLine + + # Only one of the patterns can possibly match. Add it to + # the dictionary for that name. + + # [open,refpage=...] starting a refpage block + matches = beginPat.search(file[line]) + if matches is not None: + logDiag('Matched open block pattern') + attribs = matches.group('attribs') + + # If the previous open block wasn't closed, raise an error + if openBlockState != 'outside': + logErr('Nested open block starting at line', line, 'of', + filename) + + openBlockState = 'start' + + # Parse the block attributes + matches = attribPat.findall(attribs) + + # Extract each attribute + name = None + desc = None + refpage_type = None + spec_type = None + anchor = None + alias = None + xrefs = None + + for (key,value) in matches: + logDiag('got attribute', key, '=', value) + if key == 'refpage': + name = value + elif key == 'desc': + desc = unescapeQuotes(value) + elif key == 'type': + refpage_type = value + elif key == 'spec': + spec_type = value + elif key == 'anchor': + anchor = value + elif key == 'alias': + alias = value + elif key == 'xrefs': + xrefs = value + else: + logWarn('unknown open block attribute:', key) + + if name is None or desc is None or refpage_type is None: + logWarn('missing one or more required open block attributes:' + 'refpage, desc, or type') + # Leave pi is None so open block delimiters are ignored + else: + pi = lookupPage(pageMap, name) + pi.desc = desc + # Must match later type definitions in interface/validity includes + pi.type = refpage_type + pi.spec = spec_type + pi.anchor = anchor + if alias: + pi.alias = alias + if xrefs: + pi.refs = xrefs + logDiag('open block for', name, 'added DESC =', desc, + 'TYPE =', refpage_type, 'ALIAS =', alias, + 'XREFS =', xrefs, 'SPEC =', spec_type, + 'ANCHOR =', anchor) + + line = line + 1 + continue + + # '--' starting or ending and open block + if file[line].rstrip() == '--': + if openBlockState == 'outside': + # Only refpage open blocks should use -- delimiters + logWarn('Unexpected double-dash block delimiters') + elif openBlockState == 'start': + # -- delimiter following [open,refpage=...] + openBlockState = 'inside' + + if pi is None: + logWarn('no pageInfo available for opening -- delimiter') + else: + pi.begin = line + 1 + logDiag('opening -- delimiter: added BEGIN =', pi.begin) + elif openBlockState == 'inside': + # -- delimiter ending an open block + if pi is None: + logWarn('no pageInfo available for closing -- delimiter') + else: + pi.end = line - 1 + logDiag('closing -- delimiter: added END =', pi.end) + + openBlockState = 'outside' + pi = None + else: + logWarn('unknown openBlockState:', openBlockState) + + line = line + 1 + continue + + matches = INCLUDE.search(file[line]) + if matches is not None: + # Something got included, not sure what yet. + gen_type = matches.group('generated_type') + refpage_type = matches.group('category') + name = matches.group('entity_name') + + # This will never match in OpenCL + if gen_type == 'validity': + logDiag('Matched validity pattern') + if pi is not None: + if pi.type and refpage_type != pi.type: + logWarn('ERROR: pageMap[' + name + '] type:', + pi.type, 'does not match type:', refpage_type) + pi.type = refpage_type + pi.validity = line + logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity) + else: + logWarn('validity include:: line NOT inside block') + + line = line + 1 + continue + + if gen_type == 'api': + logDiag('Matched include pattern') + if pi is not None: + if pi.include is not None: + logDiag('found multiple includes for this block') + if pi.type and refpage_type != pi.type: + logWarn('ERROR: pageMap[' + name + '] type:', + pi.type, 'does not match type:', refpage_type) + pi.type = refpage_type + pi.include = line + logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include) + else: + logWarn('interface include:: line NOT inside block') + + line = line + 1 + continue + + logDiag('ignoring unrecognized include line ', matches.group()) + + # Vulkan 1.1 markup allows the last API include construct to be + # followed by an asciidoctor endif:: construct (and also preceded, + # at some distance). + # This looks for endif:: immediately following an include:: line + # and, if found, moves the include boundary to this line. + matches = endifPat.search(file[line]) + if matches is not None and pi is not None: + if pi.include == line - 1: + logDiag('Matched endif pattern following include; moving include') + pi.include = line + else: + logDiag('Matched endif pattern (not following include)') + + line = line + 1 + continue + + matches = bodyPat.search(file[line]) + if matches is not None: + logDiag('Matched // refBody pattern') + if pi is not None: + pi.body = line + logDiag('added BODY =', pi.body) + else: + logWarn('// refBody line NOT inside block') + + line = line + 1 + continue + + # OpenCL spec uses // refError to tag "validity" (Errors) language, + # instead of /validity/ includes. + matches = errorPat.search(file[line]) + if matches is not None: + logDiag('Matched // refError pattern') + if pi is not None: + pi.validity = line + logDiag('added VALIDITY (refError) =', pi.validity) + else: + logWarn('// refError line NOT inside block') + + line = line + 1 + continue + + line = line + 1 + continue + + if pi is not None: + logErr('Unclosed open block at EOF!') + + setLogSourcefile(None) + setLogProcname(None) + setLogLine(None) + + return pageMap + + +def getBranch(): + """Determine current git branch + + Returns (branch name, ''), or (None, stderr output) if the branch name + can't be determined""" + + command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ] + results = subprocess.run(command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # git command failed + if len(results.stderr) > 0: + return (None, results.stderr) + + # Remove newline from output and convert to a string + branch = results.stdout.rstrip().decode() + if len(branch) > 0: + # Strip trailing newline + branch = results.stdout.decode()[0:-1] + + return (branch, '') diff --git a/xml/reg.py b/xml/reg.py new file mode 100644 index 000000000..d78ecde89 --- /dev/null +++ b/xml/reg.py @@ -0,0 +1,1397 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2023 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Types and classes for manipulating an API registry.""" + +import copy +import re +import sys +import xml.etree.ElementTree as etree +from collections import defaultdict, namedtuple +from generator import OutputGenerator, GeneratorOptions, write +import pdb + +def apiNameMatch(str, supported): + """Return whether a required api name matches a pattern specified for an + XML 'api' attribute or 'supported' attribute. + + - str - api name such as 'vulkan' or 'openxr' + - supported - comma-separated list of XML API names""" + + return (str is not None and str in supported.split(',')) + + +def matchAPIProfile(api, profile, elem): + """Return whether an API and profile + being generated matches an element's profile + + - api - string naming the API to match + - profile - string naming the profile to match + - elem - Element which (may) have 'api' and 'profile' + attributes to match to. + + If a tag is not present in the Element, the corresponding API + or profile always matches. + + Otherwise, the tag must exactly match the API or profile. + + Thus, if 'profile' = core: + + - `` with no attribute will match + - `` will match + - `` will not match + + Possible match conditions: + + ``` + Requested Element + Profile Profile + --------- -------- + None None Always matches + 'string' None Always matches + None 'string' Does not match. Can't generate multiple APIs + or profiles, so if an API/profile constraint + is present, it must be asked for explicitly. + 'string' 'string' Strings must match + ``` + + ** In the future, we will allow regexes for the attributes, + not just strings, so that `api="^(gl|gles2)"` will match. Even + this isn't really quite enough, we might prefer something + like `"gl(core)|gles1(common-lite)"`.""" + # Match 'api', if present + elem_api = elem.get('api') + if elem_api: + if api is None: + raise UserWarning("No API requested, but 'api' attribute is present with value '" + + elem_api + "'") + elif api != elem_api: + # Requested API doesn't match attribute + return False + elem_profile = elem.get('profile') + if elem_profile: + if profile is None: + raise UserWarning("No profile requested, but 'profile' attribute is present with value '" + + elem_profile + "'") + elif profile != elem_profile: + # Requested profile doesn't match attribute + return False + return True + + +class BaseInfo: + """Base class for information about a registry feature + (type/group/enum/command/API/extension). + + Represents the state of a registry feature, used during API generation. + """ + + def __init__(self, elem): + self.required = False + """should this feature be defined during header generation + (has it been removed by a profile or version)?""" + + self.declared = False + "has this feature been defined already?" + + self.elem = elem + "etree Element for this feature" + + def resetState(self): + """Reset required/declared to initial values. Used + prior to generating a new API interface.""" + self.required = False + self.declared = False + + def compareKeys(self, info, key, required = False): + """Return True if self.elem and info.elem have the same attribute + value for key. + If 'required' is not True, also returns True if neither element + has an attribute value for key.""" + + if required and key not in self.elem.keys(): + return False + return self.elem.get(key) == info.elem.get(key) + + def compareElem(self, info, infoName): + """Return True if self.elem and info.elem have the same definition. + info - the other object + infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / + 'extension'""" + + if infoName == 'enum': + if self.compareKeys(info, 'extends'): + # Either both extend the same type, or no type + if (self.compareKeys(info, 'value', required = True) or + self.compareKeys(info, 'bitpos', required = True)): + # If both specify the same value or bit position, + # they're equal + return True + elif (self.compareKeys(info, 'extnumber') and + self.compareKeys(info, 'offset') and + self.compareKeys(info, 'dir')): + # If both specify the same relative offset, they're equal + return True + elif (self.compareKeys(info, 'alias')): + # If both are aliases of the same value + return True + else: + return False + else: + # The same enum can't extend two different types + return False + else: + # Non-s should never be redefined + return False + + +class TypeInfo(BaseInfo): + """Registry information about a type. No additional state + beyond BaseInfo is required.""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + self.additionalValidity = [] + self.removedValidity = [] + + def getMembers(self): + """Get a collection of all member elements for this type, if any.""" + return self.elem.findall('member') + + def resetState(self): + BaseInfo.resetState(self) + self.additionalValidity = [] + self.removedValidity = [] + + +class GroupInfo(BaseInfo): + """Registry information about a group of related enums + in an block, generally corresponding to a C "enum" type.""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + + +class EnumInfo(BaseInfo): + """Registry information about an enum""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + self.type = elem.get('type') + """numeric type of the value of the tag + ( '' for GLint, 'u' for GLuint, 'ull' for GLuint64 )""" + if self.type is None: + self.type = '' + + +class CmdInfo(BaseInfo): + """Registry information about a command""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + self.additionalValidity = [] + self.removedValidity = [] + + def getParams(self): + """Get a collection of all param elements for this command, if any.""" + return self.elem.findall('param') + + def resetState(self): + BaseInfo.resetState(self) + self.additionalValidity = [] + self.removedValidity = [] + + +class FeatureInfo(BaseInfo): + """Registry information about an API + or .""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + self.name = elem.get('name') + "feature name string (e.g. 'VK_KHR_surface')" + + self.emit = False + "has this feature been defined already?" + + self.sortorder = int(elem.get('sortorder', 0)) + """explicit numeric sort key within feature and extension groups. + Defaults to 0.""" + + # Determine element category (vendor). Only works + # for elements. + if elem.tag == 'feature': + # Element category (vendor) is meaningless for + self.category = 'VERSION' + """category, e.g. VERSION or khr/vendor tag""" + + self.version = elem.get('name') + """feature name string""" + + self.versionNumber = elem.get('number') + """versionNumber - API version number, taken from the 'number' + attribute of . Extensions do not have API version + numbers and are assigned number 0.""" + + self.number = "0" + self.supported = None + else: + # Extract vendor portion of __ + self.category = self.name.split('_', 2)[1] + self.version = "0" + self.versionNumber = "0" + self.number = elem.get('number') + """extension number, used for ordering and for assigning + enumerant offsets. features do not have extension + numbers and are assigned number 0.""" + + # If there's no 'number' attribute, use 0, so sorting works + if self.number is None: + self.number = 0 + self.supported = elem.get('supported') + +class SpirvInfo(BaseInfo): + """Registry information about an API + or .""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + +class Registry: + """Object representing an API registry, loaded from an XML file.""" + + def __init__(self, gen=None, genOpts=None): + if gen is None: + # If not specified, give a default object so messaging will work + self.gen = OutputGenerator() + else: + self.gen = gen + "Output generator used to write headers / messages" + + if genOpts is None: + self.genOpts = GeneratorOptions() + else: + self.genOpts = genOpts + "Options controlling features to write and how to format them" + + self.gen.registry = self + self.gen.genOpts = self.genOpts + self.gen.genOpts.registry = self + + self.tree = None + "ElementTree containing the root ``" + + self.typedict = {} + "dictionary of TypeInfo objects keyed by type name" + + self.groupdict = {} + "dictionary of GroupInfo objects keyed by group name" + + self.enumdict = {} + "dictionary of EnumInfo objects keyed by enum name" + + self.cmddict = {} + "dictionary of CmdInfo objects keyed by command name" + + self.apidict = {} + "dictionary of FeatureInfo objects for `` elements keyed by API name" + + self.extensions = [] + "list of `` Elements" + + self.extdict = {} + "dictionary of FeatureInfo objects for `` elements keyed by extension name" + + self.spirvextdict = {} + "dictionary of FeatureInfo objects for `` elements keyed by spirv extension name" + + self.spirvcapdict = {} + "dictionary of FeatureInfo objects for `` elements keyed by spirv capability name" + + self.emitFeatures = False + """True to actually emit features for a version / extension, + or False to just treat them as emitted""" + + self.breakPat = None + "regexp pattern to break on when generating names" + # self.breakPat = re.compile('VkFenceImportFlagBits.*') + + self.requiredextensions = [] # Hack - can remove it after validity generator goes away + + # ** Global types for automatic source generation ** + # Length Member data + self.commandextensiontuple = namedtuple('commandextensiontuple', + ['command', # The name of the command being modified + 'value', # The value to append to the command + 'extension']) # The name of the extension that added it + self.validextensionstructs = defaultdict(list) + self.commandextensionsuccesses = [] + self.commandextensionerrors = [] + + self.filename = None + + def loadElementTree(self, tree): + """Load ElementTree into a Registry object and parse it.""" + self.tree = tree + self.parseTree() + + def loadFile(self, file): + """Load an API registry XML file into a Registry object and parse it""" + self.filename = file + self.tree = etree.parse(file) + self.parseTree() + + def setGenerator(self, gen): + """Specify output generator object. + + `None` restores the default generator.""" + self.gen = gen + self.gen.setRegistry(self) + + def addElementInfo(self, elem, info, infoName, dictionary): + """Add information about an element to the corresponding dictionary. + + Intended for internal use only. + + - elem - ``/``/``/``/``/``/``/`` Element + - info - corresponding {Type|Group|Enum|Cmd|Feature|Spirv}Info object + - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / 'extension' / 'spirvextension' / 'spirvcapability' + - dictionary - self.{type|group|enum|cmd|api|ext|spirvext|spirvcap}dict + + If the Element has an 'api' attribute, the dictionary key is the + tuple (name,api). If not, the key is the name. 'name' is an + attribute of the Element""" + # self.gen.logMsg('diag', 'Adding ElementInfo.required =', + # info.required, 'name =', elem.get('name')) + api = elem.get('api') + if api: + key = (elem.get('name'), api) + else: + key = elem.get('name') + if key in dictionary: + if not dictionary[key].compareElem(info, infoName): + self.gen.logMsg('warn', 'Attempt to redefine', key, + '(this should not happen)') + else: + True + else: + dictionary[key] = info + + def lookupElementInfo(self, fname, dictionary): + """Find a {Type|Enum|Cmd}Info object by name. + + Intended for internal use only. + + If an object qualified by API name exists, use that. + + - fname - name of type / enum / command + - dictionary - self.{type|enum|cmd}dict""" + key = (fname, self.genOpts.apiname) + if key in dictionary: + # self.gen.logMsg('diag', 'Found API-specific element for feature', fname) + return dictionary[key] + if fname in dictionary: + # self.gen.logMsg('diag', 'Found generic element for feature', fname) + return dictionary[fname] + + return None + + def breakOnName(self, regexp): + """Specify a feature name regexp to break on when generating features.""" + self.breakPat = re.compile(regexp) + + def parseTree(self): + """Parse the registry Element, once created""" + # This must be the Element for the root + self.reg = self.tree.getroot() + + # Create dictionary of registry types from toplevel tags + # and add 'name' attribute to each tag (where missing) + # based on its element. + # + # There's usually one block; more are OK + # Required attributes: 'name' or nested tag contents + self.typedict = {} + for type_elem in self.reg.findall('types/type'): + # If the doesn't already have a 'name' attribute, set + # it from contents of its tag. + if type_elem.get('name') is None: + type_elem.set('name', type_elem.find('name').text) + self.addElementInfo(type_elem, TypeInfo(type_elem), 'type', self.typedict) + + # Create dictionary of registry enum groups from tags. + # + # Required attributes: 'name'. If no name is given, one is + # generated, but that group can't be identified and turned into an + # enum type definition - it's just a container for tags. + self.groupdict = {} + for group in self.reg.findall('enums'): + self.addElementInfo(group, GroupInfo(group), 'group', self.groupdict) + + # Create dictionary of registry enums from tags + # + # tags usually define different namespaces for the values + # defined in those tags, but the actual names all share the + # same dictionary. + # Required attributes: 'name', 'value' + # For containing which have type="enum" or type="bitmask", + # tag all contained s are required. This is a stopgap until + # a better scheme for tagging core and extension enums is created. + self.enumdict = {} + for enums in self.reg.findall('enums'): + required = (enums.get('type') is not None) + for enum in enums.findall('enum'): + enumInfo = EnumInfo(enum) + enumInfo.required = required + self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) + + # Create dictionary of registry commands from tags + # and add 'name' attribute to each tag (where missing) + # based on its element. + # + # There's usually only one block; more are OK. + # Required attributes: 'name' or tag contents + self.cmddict = {} + # List of commands which alias others. Contains + # [ aliasName, element ] + # for each alias + cmdAlias = [] + for cmd in self.reg.findall('commands/command'): + # If the doesn't already have a 'name' attribute, set + # it from contents of its tag. + name = cmd.get('name') + if name is None: + name = cmd.set('name', cmd.find('proto/name').text) + ci = CmdInfo(cmd) + self.addElementInfo(cmd, ci, 'command', self.cmddict) + alias = cmd.get('alias') + if alias: + cmdAlias.append([name, alias, cmd]) + + # Now loop over aliases, injecting a copy of the aliased command's + # Element with the aliased prototype name replaced with the command + # name - if it exists. + for (name, alias, cmd) in cmdAlias: + if alias in self.cmddict: + aliasInfo = self.cmddict[alias] + cmdElem = copy.deepcopy(aliasInfo.elem) + cmdElem.find('proto/name').text = name + cmdElem.set('name', name) + cmdElem.set('alias', alias) + ci = CmdInfo(cmdElem) + # Replace the dictionary entry for the CmdInfo element + self.cmddict[name] = ci + + # @ newString = etree.tostring(base, encoding="unicode").replace(aliasValue, aliasName) + # @elem.append(etree.fromstring(replacement)) + else: + self.gen.logMsg('warn', 'No matching found for command', + cmd.get('name'), 'alias', alias) + + # Create dictionaries of API and extension interfaces + # from toplevel and tags. + self.apidict = {} + for feature in self.reg.findall('feature'): + featureInfo = FeatureInfo(feature) + self.addElementInfo(feature, featureInfo, 'feature', self.apidict) + + # Add additional enums defined only in tags + # to the corresponding enumerated type. + # When seen here, the element, processed to contain the + # numeric enum value, is added to the corresponding + # element, as well as adding to the enum dictionary. It is no + # longer removed from the element it is introduced in. + # Instead, generateRequiredInterface ignores elements + # that extend enumerated types. + # + # For tags which are actually just constants, if there's + # no 'extends' tag but there is a 'value' or 'bitpos' tag, just + # add an EnumInfo record to the dictionary. That works because + # output generation of constants is purely dependency-based, and + # doesn't need to iterate through the XML tags. + for elem in feature.findall('require'): + for enum in elem.findall('enum'): + addEnumInfo = False + groupName = enum.get('extends') + if groupName is not None: + # self.gen.logMsg('diag', 'Found extension enum', + # enum.get('name')) + # Add version number attribute to the element + enum.set('version', featureInfo.version) + # Look up the GroupInfo with matching groupName + if groupName in self.groupdict: + # self.gen.logMsg('diag', 'Matching group', + # groupName, 'found, adding element...') + gi = self.groupdict[groupName] + gi.elem.append(copy.deepcopy(enum)) + else: + self.gen.logMsg('warn', 'NO matching group', + groupName, 'for enum', enum.get('name'), 'found.') + addEnumInfo = True + elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): + # self.gen.logMsg('diag', 'Adding extension constant "enum"', + # enum.get('name')) + addEnumInfo = True + if addEnumInfo: + enumInfo = EnumInfo(enum) + self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) + + self.extensions = self.reg.findall('extensions/extension') + self.extdict = {} + for feature in self.extensions: + featureInfo = FeatureInfo(feature) + self.addElementInfo(feature, featureInfo, 'extension', self.extdict) + + # Add additional enums defined only in tags + # to the corresponding core type. + # Algorithm matches that of enums in a "feature" tag as above. + # + # This code also adds a 'extnumber' attribute containing the + # extension number, used for enumerant value calculation. + for elem in feature.findall('require'): + for enum in elem.findall('enum'): + addEnumInfo = False + groupName = enum.get('extends') + if groupName is not None: + # self.gen.logMsg('diag', 'Found extension enum', + # enum.get('name')) + + # Add block's extension number attribute to + # the element unless specified explicitly, such + # as when redefining an enum in another extension. + extnumber = enum.get('extnumber') + if not extnumber: + enum.set('extnumber', featureInfo.number) + + enum.set('extname', featureInfo.name) + enum.set('supported', featureInfo.supported) + # Look up the GroupInfo with matching groupName + if groupName in self.groupdict: + # self.gen.logMsg('diag', 'Matching group', + # groupName, 'found, adding element...') + gi = self.groupdict[groupName] + gi.elem.append(copy.deepcopy(enum)) + else: + self.gen.logMsg('warn', 'NO matching group', + groupName, 'for enum', enum.get('name'), 'found.') + addEnumInfo = True + elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): + # self.gen.logMsg('diag', 'Adding extension constant "enum"', + # enum.get('name')) + addEnumInfo = True + if addEnumInfo: + enumInfo = EnumInfo(enum) + self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) + + # Construct a "validextensionstructs" list for parent structures + # based on "structextends" tags in child structures + disabled_types = [] + for disabled_ext in self.reg.findall('extensions/extension[@supported="disabled"]'): + for type_elem in disabled_ext.findall("*/type"): + disabled_types.append(type_elem.get('name')) + for type_elem in self.reg.findall('types/type'): + if type_elem.get('name') not in disabled_types: + parentStructs = type_elem.get('structextends') + if parentStructs is not None: + for parent in parentStructs.split(','): + # self.gen.logMsg('diag', type.get('name'), 'extends', parent) + self.validextensionstructs[parent].append(type_elem.get('name')) + # Sort the lists so they don't depend on the XML order + for parent in self.validextensionstructs: + self.validextensionstructs[parent].sort() + + # Parse out all spirv tags in dictionaries + # Use addElementInfo to catch duplicates + for spirv in self.reg.findall('spirvextensions/spirvextension'): + spirvInfo = SpirvInfo(spirv) + self.addElementInfo(spirv, spirvInfo, 'spirvextension', self.spirvextdict) + for spirv in self.reg.findall('spirvcapabilities/spirvcapability'): + spirvInfo = SpirvInfo(spirv) + self.addElementInfo(spirv, spirvInfo, 'spirvcapability', self.spirvcapdict) + + def dumpReg(self, maxlen=120, filehandle=sys.stdout): + """Dump all the dictionaries constructed from the Registry object. + + Diagnostic to dump the dictionaries to specified file handle (default stdout). + Truncates type / enum / command elements to maxlen characters (default 120)""" + write('***************************************', file=filehandle) + write(' ** Dumping Registry contents **', file=filehandle) + write('***************************************', file=filehandle) + write('// Types', file=filehandle) + for name in self.typedict: + tobj = self.typedict[name] + write(' Type', name, '->', etree.tostring(tobj.elem)[0:maxlen], file=filehandle) + write('// Groups', file=filehandle) + for name in self.groupdict: + gobj = self.groupdict[name] + write(' Group', name, '->', etree.tostring(gobj.elem)[0:maxlen], file=filehandle) + write('// Enums', file=filehandle) + for name in self.enumdict: + eobj = self.enumdict[name] + write(' Enum', name, '->', etree.tostring(eobj.elem)[0:maxlen], file=filehandle) + write('// Commands', file=filehandle) + for name in self.cmddict: + cobj = self.cmddict[name] + write(' Command', name, '->', etree.tostring(cobj.elem)[0:maxlen], file=filehandle) + write('// APIs', file=filehandle) + for key in self.apidict: + write(' API Version ', key, '->', + etree.tostring(self.apidict[key].elem)[0:maxlen], file=filehandle) + write('// Extensions', file=filehandle) + for key in self.extdict: + write(' Extension', key, '->', + etree.tostring(self.extdict[key].elem)[0:maxlen], file=filehandle) + write('// SPIR-V', file=filehandle) + for key in self.spirvextdict: + write(' SPIR-V Extension', key, '->', + etree.tostring(self.spirvextdict[key].elem)[0:maxlen], file=filehandle) + for key in self.spirvcapdict: + write(' SPIR-V Capability', key, '->', + etree.tostring(self.spirvcapdict[key].elem)[0:maxlen], file=filehandle) + + def markTypeRequired(self, typename, required): + """Require (along with its dependencies) or remove (but not its dependencies) a type. + + - typename - name of type + - required - boolean (to tag features as required or not) + """ + self.gen.logMsg('diag', 'tagging type:', typename, '-> required =', required) + # Get TypeInfo object for tag corresponding to typename + typeinfo = self.lookupElementInfo(typename, self.typedict) + if typeinfo is not None: + if required: + # Tag type dependencies in 'alias' and 'required' attributes as + # required. This does not un-tag dependencies in a + # tag. See comments in markRequired() below for the reason. + for attrib_name in ['requires', 'alias']: + depname = typeinfo.elem.get(attrib_name) + if depname: + self.gen.logMsg('diag', 'Generating dependent type', + depname, 'for', attrib_name, 'type', typename) + # Don't recurse on self-referential structures. + if typename != depname: + self.markTypeRequired(depname, required) + else: + self.gen.logMsg('diag', 'type', typename, 'is self-referential') + # Tag types used in defining this type (e.g. in nested + # tags) + # Look for in entire tree, + # not just immediate children + for subtype in typeinfo.elem.findall('.//type'): + self.gen.logMsg('diag', 'markRequired: type requires dependent ', subtype.text) + if typename != subtype.text: + self.markTypeRequired(subtype.text, required) + else: + self.gen.logMsg('diag', 'type', typename, 'is self-referential') + # Tag enums used in defining this type, for example in + # member[MEMBER_SIZE] + for subenum in typeinfo.elem.findall('.//enum'): + self.gen.logMsg('diag', 'markRequired: type requires dependent ', subenum.text) + self.markEnumRequired(subenum.text, required) + # Tag type dependency in 'bitvalues' attributes as + # required. This ensures that the bit values for a flag + # are emitted + depType = typeinfo.elem.get('bitvalues') + if depType: + self.gen.logMsg('diag', 'Generating bitflag type', + depType, 'for type', typename) + self.markTypeRequired(depType, required) + group = self.lookupElementInfo(depType, self.groupdict) + if group is not None: + group.flagType = typeinfo + + typeinfo.required = required + elif '.h' not in typename: + self.gen.logMsg('warn', 'type:', typename, 'IS NOT DEFINED') + + def markEnumRequired(self, enumname, required): + """Mark an enum as required or not. + + - enumname - name of enum + - required - boolean (to tag features as required or not)""" + + self.gen.logMsg('diag', 'tagging enum:', enumname, '-> required =', required) + enum = self.lookupElementInfo(enumname, self.enumdict) + if enum is not None: + # If the enum is part of a group, and is being removed, then + # look it up in that tag and remove it there, so that it + # isn't visible to generators (which traverse the tag + # elements themselves). + # This isn't the most robust way of doing this, since a removed + # enum that's later required again will no longer have a group + # element, but it makes the change non-intrusive on generator + # code. + if required is False: + groupName = enum.elem.get('extends') + if groupName is not None: + # Look up the Info with matching groupName + if groupName in self.groupdict: + gi = self.groupdict[groupName] + gienum = gi.elem.find("enum[@name='" + enumname + "']") + if gienum is not None: + # Remove copy of this enum from the group + gi.elem.remove(gienum) + else: + self.gen.logMsg('warn', 'Cannot remove enum', + enumname, 'not found in group', + groupName) + else: + self.gen.logMsg('warn', 'Cannot remove enum', + enumname, 'from nonexistent group', + groupName) + + enum.required = required + # Tag enum dependencies in 'alias' attribute as required + depname = enum.elem.get('alias') + if depname: + self.gen.logMsg('diag', 'Generating dependent enum', + depname, 'for alias', enumname, 'required =', enum.required) + self.markEnumRequired(depname, required) + else: + self.gen.logMsg('warn', 'enum:', enumname, 'IS NOT DEFINED') + + def markCmdRequired(self, cmdname, required): + """Mark a command as required or not. + + - cmdname - name of command + - required - boolean (to tag features as required or not)""" + self.gen.logMsg('diag', 'tagging command:', cmdname, '-> required =', required) + cmd = self.lookupElementInfo(cmdname, self.cmddict) + if cmd is not None: + cmd.required = required + # Tag command dependencies in 'alias' attribute as required + depname = cmd.elem.get('alias') + if depname: + self.gen.logMsg('diag', 'Generating dependent command', + depname, 'for alias', cmdname) + self.markCmdRequired(depname, required) + # Tag all parameter types of this command as required. + # This DOES NOT remove types of commands in a + # tag, because many other commands may use the same type. + # We could be more clever and reference count types, + # instead of using a boolean. + if required: + # Look for in entire tree, + # not just immediate children + for type_elem in cmd.elem.findall('.//type'): + self.gen.logMsg('diag', 'markRequired: command implicitly requires dependent type', type_elem.text) + self.markTypeRequired(type_elem.text, required) + else: + self.gen.logMsg('warn', 'command:', cmdname, 'IS NOT DEFINED') + + def markRequired(self, featurename, feature, required): + """Require or remove features specified in the Element. + + - featurename - name of the feature + - feature - Element for `` or `` tag + - required - boolean (to tag features as required or not)""" + self.gen.logMsg('diag', 'markRequired (feature = , required =', required, ')') + + # Loop over types, enums, and commands in the tag + # @@ It would be possible to respect 'api' and 'profile' attributes + # in individual features, but that's not done yet. + for typeElem in feature.findall('type'): + self.markTypeRequired(typeElem.get('name'), required) + for enumElem in feature.findall('enum'): + self.markEnumRequired(enumElem.get('name'), required) + for cmdElem in feature.findall('command'): + self.markCmdRequired(cmdElem.get('name'), required) + + # Extensions may need to extend existing commands or other items in the future. + # So, look for extend tags. + for extendElem in feature.findall('extend'): + extendType = extendElem.get('type') + if extendType == 'command': + commandName = extendElem.get('name') + successExtends = extendElem.get('successcodes') + if successExtends is not None: + for success in successExtends.split(','): + self.commandextensionsuccesses.append(self.commandextensiontuple(command=commandName, + value=success, + extension=featurename)) + errorExtends = extendElem.get('errorcodes') + if errorExtends is not None: + for error in errorExtends.split(','): + self.commandextensionerrors.append(self.commandextensiontuple(command=commandName, + value=error, + extension=featurename)) + else: + self.gen.logMsg('warn', 'extend type:', extendType, 'IS NOT SUPPORTED') + + def getAlias(self, elem, dict): + """Check for an alias in the same require block. + + - elem - Element to check for an alias""" + + # Try to find an alias + alias = elem.get('alias') + if alias is None: + name = elem.get('name') + typeinfo = self.lookupElementInfo(name, dict) + alias = typeinfo.elem.get('alias') + + return alias + + def checkForCorrectionAliases(self, alias, require, tag): + """Check for an alias in the same require block. + + - alias - String name of the alias + - require - `` block from the registry + - tag - tag to look for in the require block""" + + if alias and require.findall(tag + "[@name='" + alias + "']"): + return True + + return False + + def fillFeatureDictionary(self, interface, featurename, api, profile): + """Capture added interfaces for a `` or ``. + + - interface - Element for `` or ``, containing + `` and `` tags + - featurename - name of the feature + - api - string specifying API name being generated + - profile - string specifying API profile being generated""" + + # Explicitly initialize known types - errors for unhandled categories + self.gen.featureDictionary[featurename] = { + "enumconstant": {}, + "command": {}, + "enum": {}, + "struct": {}, + "handle": {}, + "basetype": {}, + "include": {}, + "define": {}, + "bitmask": {}, + "union": {}, + "funcpointer": {}, + } + + # marks things that are required by this version/profile + for require in interface.findall('require'): + if matchAPIProfile(api, profile, require): + + # Determine the required extension or version needed for a require block + # Assumes that only one of these is specified + required_key = require.get('feature') + if required_key is None: + required_key = require.get('extension') + + # Loop over types, enums, and commands in the tag + for typeElem in require.findall('type'): + typename = typeElem.get('name') + typeinfo = self.lookupElementInfo(typename, self.typedict) + + if typeinfo: + # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + alias = self.getAlias(typeElem, self.typedict) + if not self.checkForCorrectionAliases(alias, require, 'type'): + # Resolve the type info to the actual type, so we get an accurate read for 'structextends' + while alias: + typeinfo = self.lookupElementInfo(alias, self.typedict) + alias = typeinfo.elem.get('alias') + + typecat = typeinfo.elem.get('category') + typeextends = typeinfo.elem.get('structextends') + if not required_key in self.gen.featureDictionary[featurename][typecat]: + self.gen.featureDictionary[featurename][typecat][required_key] = {} + if not typeextends in self.gen.featureDictionary[featurename][typecat][required_key]: + self.gen.featureDictionary[featurename][typecat][required_key][typeextends] = [] + self.gen.featureDictionary[featurename][typecat][required_key][typeextends].append(typename) + + for enumElem in require.findall('enum'): + enumname = enumElem.get('name') + typeinfo = self.lookupElementInfo(enumname, self.enumdict) + + # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + alias = self.getAlias(enumElem, self.enumdict) + if not self.checkForCorrectionAliases(alias, require, 'enum'): + enumextends = enumElem.get('extends') + if not required_key in self.gen.featureDictionary[featurename]['enumconstant']: + self.gen.featureDictionary[featurename]['enumconstant'][required_key] = {} + if not enumextends in self.gen.featureDictionary[featurename]['enumconstant'][required_key]: + self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends] = [] + self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends].append(enumname) + + for cmdElem in require.findall('command'): + + # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + alias = self.getAlias(cmdElem, self.cmddict) + if not self.checkForCorrectionAliases(alias, require, 'command'): + if not required_key in self.gen.featureDictionary[featurename]['command']: + self.gen.featureDictionary[featurename]['command'][required_key] = [] + self.gen.featureDictionary[featurename]['command'][required_key].append(cmdElem.get('name')) + + + def requireAndRemoveFeatures(self, interface, featurename, api, profile): + """Process `` and `` tags for a `` or ``. + + - interface - Element for `` or ``, containing + `` and `` tags + - featurename - name of the feature + - api - string specifying API name being generated + - profile - string specifying API profile being generated""" + # marks things that are required by this version/profile + for feature in interface.findall('require'): + if matchAPIProfile(api, profile, feature): + self.markRequired(featurename, feature, True) + # marks things that are removed by this version/profile + for feature in interface.findall('remove'): + if matchAPIProfile(api, profile, feature): + self.markRequired(featurename, feature, False) + + def assignAdditionalValidity(self, interface, api, profile): + # Loop over all usage inside all tags. + for feature in interface.findall('require'): + if matchAPIProfile(api, profile, feature): + for v in feature.findall('usage'): + if v.get('command'): + self.cmddict[v.get('command')].additionalValidity.append(copy.deepcopy(v)) + if v.get('struct'): + self.typedict[v.get('struct')].additionalValidity.append(copy.deepcopy(v)) + + # Loop over all usage inside all tags. + for feature in interface.findall('remove'): + if matchAPIProfile(api, profile, feature): + for v in feature.findall('usage'): + if v.get('command'): + self.cmddict[v.get('command')].removedValidity.append(copy.deepcopy(v)) + if v.get('struct'): + self.typedict[v.get('struct')].removedValidity.append(copy.deepcopy(v)) + + def generateFeature(self, fname, ftype, dictionary): + """Generate a single type / enum group / enum / command, + and all its dependencies as needed. + + - fname - name of feature (``/``/``) + - ftype - type of feature, 'type' | 'enum' | 'command' + - dictionary - of *Info objects - self.{type|enum|cmd}dict""" + + self.gen.logMsg('diag', 'generateFeature: generating', ftype, fname) + f = self.lookupElementInfo(fname, dictionary) + if f is None: + # No such feature. This is an error, but reported earlier + self.gen.logMsg('diag', 'No entry found for feature', fname, + 'returning!') + return + + # If feature isn't required, or has already been declared, return + if not f.required: + self.gen.logMsg('diag', 'Skipping', ftype, fname, '(not required)') + return + if f.declared: + self.gen.logMsg('diag', 'Skipping', ftype, fname, '(already declared)') + return + # Always mark feature declared, as though actually emitted + f.declared = True + + # Determine if this is an alias, and of what, if so + alias = f.elem.get('alias') + if alias: + self.gen.logMsg('diag', fname, 'is an alias of', alias) + + # Pull in dependent declaration(s) of the feature. + # For types, there may be one type in the 'requires' attribute of + # the element, one in the 'alias' attribute, and many in + # embedded and tags within the element. + # For commands, there may be many in tags within the element. + # For enums, no dependencies are allowed (though perhaps if you + # have a uint64 enum, it should require that type). + genProc = None + followupFeature = None + if ftype == 'type': + genProc = self.gen.genType + + # Generate type dependencies in 'alias' and 'requires' attributes + if alias: + self.generateFeature(alias, 'type', self.typedict) + requires = f.elem.get('requires') + if requires: + self.gen.logMsg('diag', 'Generating required dependent type', + requires) + self.generateFeature(requires, 'type', self.typedict) + + # Generate types used in defining this type (e.g. in nested + # tags) + # Look for in entire tree, + # not just immediate children + for subtype in f.elem.findall('.//type'): + self.gen.logMsg('diag', 'Generating required dependent ', + subtype.text) + self.generateFeature(subtype.text, 'type', self.typedict) + + # Generate enums used in defining this type, for example in + # member[MEMBER_SIZE] + for subtype in f.elem.findall('.//enum'): + self.gen.logMsg('diag', 'Generating required dependent ', + subtype.text) + self.generateFeature(subtype.text, 'enum', self.enumdict) + + # If the type is an enum group, look up the corresponding + # group in the group dictionary and generate that instead. + if f.elem.get('category') == 'enum': + self.gen.logMsg('diag', 'Type', fname, 'is an enum group, so generate that instead') + group = self.lookupElementInfo(fname, self.groupdict) + if alias is not None: + # An alias of another group name. + # Pass to genGroup with 'alias' parameter = aliased name + self.gen.logMsg('diag', 'Generating alias', fname, + 'for enumerated type', alias) + # Now, pass the *aliased* GroupInfo to the genGroup, but + # with an additional parameter which is the alias name. + genProc = self.gen.genGroup + f = self.lookupElementInfo(alias, self.groupdict) + elif group is None: + self.gen.logMsg('warn', 'Skipping enum type', fname, + ': No matching enumerant group') + return + else: + genProc = self.gen.genGroup + f = group + + # @ The enum group is not ready for generation. At this + # @ point, it contains all tags injected by + # @ tags without any verification of whether + # @ they're required or not. It may also contain + # @ duplicates injected by multiple consistent + # @ definitions of an . + + # @ Pass over each enum, marking its enumdict[] entry as + # @ required or not. Mark aliases of enums as required, + # @ too. + + enums = group.elem.findall('enum') + + self.gen.logMsg('diag', 'generateFeature: checking enums for group', fname) + + # Check for required enums, including aliases + # LATER - Check for, report, and remove duplicates? + enumAliases = [] + for elem in enums: + name = elem.get('name') + + required = False + + extname = elem.get('extname') + version = elem.get('version') + if extname is not None: + # 'supported' attribute was injected when the element was + # moved into the group in Registry.parseTree() + if self.genOpts.defaultExtensions == elem.get('supported'): + required = True + elif re.match(self.genOpts.addExtensions, extname) is not None: + required = True + elif version is not None: + required = re.match(self.genOpts.emitversions, version) is not None + else: + required = True + + self.gen.logMsg('diag', '* required =', required, 'for', name) + if required: + # Mark this element as required (in the element, not the EnumInfo) + elem.set('required', 'true') + # If it's an alias, track that for later use + enumAlias = elem.get('alias') + if enumAlias: + enumAliases.append(enumAlias) + for elem in enums: + name = elem.get('name') + if name in enumAliases: + elem.set('required', 'true') + self.gen.logMsg('diag', '* also need to require alias', name) + if f.elem.get('category') == 'bitmask': + followupFeature = f.elem.get('bitvalues') + elif ftype == 'command': + # Generate command dependencies in 'alias' attribute + if alias: + self.generateFeature(alias, 'command', self.cmddict) + + genProc = self.gen.genCmd + for type_elem in f.elem.findall('.//type'): + depname = type_elem.text + self.gen.logMsg('diag', 'Generating required parameter type', + depname) + self.generateFeature(depname, 'type', self.typedict) + elif ftype == 'enum': + # Generate enum dependencies in 'alias' attribute + if alias: + self.generateFeature(alias, 'enum', self.enumdict) + genProc = self.gen.genEnum + + # Actually generate the type only if emitting declarations + if self.emitFeatures: + self.gen.logMsg('diag', 'Emitting', ftype, 'decl for', fname) + genProc(f, fname, alias) + else: + self.gen.logMsg('diag', 'Skipping', ftype, fname, + '(should not be emitted)') + + if followupFeature: + self.gen.logMsg('diag', 'Generating required bitvalues ', + followupFeature) + self.generateFeature(followupFeature, "type", self.typedict) + + def generateRequiredInterface(self, interface): + """Generate all interfaces required by an API version or extension. + + - interface - Element for `` or ``""" + + # Loop over all features inside all tags. + for features in interface.findall('require'): + for t in features.findall('type'): + self.generateFeature(t.get('name'), 'type', self.typedict) + for e in features.findall('enum'): + # If this is an enum extending an enumerated type, don't + # generate it - this has already been done in reg.parseTree, + # by copying this element into the enumerated type. + enumextends = e.get('extends') + if not enumextends: + self.generateFeature(e.get('name'), 'enum', self.enumdict) + for c in features.findall('command'): + self.generateFeature(c.get('name'), 'command', self.cmddict) + + def generateSpirv(self, spirv, dictionary): + if spirv is None: + self.gen.logMsg('diag', 'No entry found for element', name, + 'returning!') + return + + name = spirv.elem.get('name') + # No known alias for spirv elements + alias = None + if spirv.emit: + genProc = self.gen.genSpirv + genProc(spirv, name, alias) + + def apiGen(self): + """Generate interface for specified versions using the current + generator and generator options""" + + self.gen.logMsg('diag', '*******************************************') + self.gen.logMsg('diag', ' Registry.apiGen file:', self.genOpts.filename, + 'api:', self.genOpts.apiname, + 'profile:', self.genOpts.profile) + self.gen.logMsg('diag', '*******************************************') + + # Reset required/declared flags for all features + self.apiReset() + + # Compile regexps used to select versions & extensions + regVersions = re.compile(self.genOpts.versions) + regEmitVersions = re.compile(self.genOpts.emitversions) + regAddExtensions = re.compile(self.genOpts.addExtensions) + regRemoveExtensions = re.compile(self.genOpts.removeExtensions) + regEmitExtensions = re.compile(self.genOpts.emitExtensions) + regEmitSpirv = re.compile(self.genOpts.emitSpirv) + + # Get all matching API feature names & add to list of FeatureInfo + # Note we used to select on feature version attributes, not names. + features = [] + apiMatch = False + for key in self.apidict: + fi = self.apidict[key] + api = fi.elem.get('api') + if apiNameMatch(self.genOpts.apiname, api): + apiMatch = True + if regVersions.match(fi.name): + # Matches API & version #s being generated. Mark for + # emission and add to the features[] list . + # @@ Could use 'declared' instead of 'emit'? + fi.emit = (regEmitVersions.match(fi.name) is not None) + features.append(fi) + if not fi.emit: + self.gen.logMsg('diag', 'NOT tagging feature api =', api, + 'name =', fi.name, 'version =', fi.version, + 'for emission (does not match emitversions pattern)') + else: + self.gen.logMsg('diag', 'Including feature api =', api, + 'name =', fi.name, 'version =', fi.version, + 'for emission (matches emitversions pattern)') + else: + self.gen.logMsg('diag', 'NOT including feature api =', api, + 'name =', fi.name, 'version =', fi.version, + '(does not match requested versions)') + else: + self.gen.logMsg('diag', 'NOT including feature api =', api, + 'name =', fi.name, + '(does not match requested API)') + if not apiMatch: + self.gen.logMsg('warn', 'No matching API versions found!') + + # Get all matching extensions, in order by their extension number, + # and add to the list of features. + # Start with extensions tagged with 'api' pattern matching the API + # being generated. Add extensions matching the pattern specified in + # regExtensions, then remove extensions matching the pattern + # specified in regRemoveExtensions + for (extName, ei) in sorted(self.extdict.items(), key=lambda x: x[1].number if x[1].number is not None else '0'): + extName = ei.name + include = False + + # Include extension if defaultExtensions is not None and is + # exactly matched by the 'supported' attribute. + if apiNameMatch(self.genOpts.defaultExtensions, + ei.elem.get('supported')): + self.gen.logMsg('diag', 'Including extension', + extName, "(defaultExtensions matches the 'supported' attribute)") + include = True + + # Include additional extensions if the extension name matches + # the regexp specified in the generator options. This allows + # forcing extensions into an interface even if they're not + # tagged appropriately in the registry. + # However we still respect the 'supported' attribute. + if regAddExtensions.match(extName) is not None: + if not apiNameMatch(self.genOpts.apiname, ei.elem.get('supported')): + self.gen.logMsg('diag', 'NOT including extension', + extName, '(matches explicitly requested, but does not match the \'supported\' attribute)') + include = False + else: + self.gen.logMsg('diag', 'Including extension', + extName, '(matches explicitly requested extensions to add)') + include = True + # Remove extensions if the name matches the regexp specified + # in generator options. This allows forcing removal of + # extensions from an interface even if they're tagged that + # way in the registry. + if regRemoveExtensions.match(extName) is not None: + self.gen.logMsg('diag', 'Removing extension', + extName, '(matches explicitly requested extensions to remove)') + include = False + + # If the extension is to be included, add it to the + # extension features list. + if include: + ei.emit = (regEmitExtensions.match(extName) is not None) + features.append(ei) + if not ei.emit: + self.gen.logMsg('diag', 'NOT tagging extension', + extName, + 'for emission (does not match emitextensions pattern)') + + # Hack - can be removed when validity generator goes away + # (Jon) I'm not sure what this does, or if it should respect + # the ei.emit flag above. + self.requiredextensions.append(extName) + else: + self.gen.logMsg('diag', 'NOT including extension', + extName, '(does not match api attribute or explicitly requested extensions)') + + # Add all spirv elements to list + # generators decide to emit them all or not + # Currently no filtering as no client of these elements needs filtering + spirvexts = [] + for key in self.spirvextdict: + si = self.spirvextdict[key] + si.emit = (regEmitSpirv.match(key) is not None) + spirvexts.append(si) + spirvcaps = [] + for key in self.spirvcapdict: + si = self.spirvcapdict[key] + si.emit = (regEmitSpirv.match(key) is not None) + spirvcaps.append(si) + + # Sort the features list, if a sort procedure is defined + if self.genOpts.sortProcedure: + self.genOpts.sortProcedure(features) + # print('sortProcedure ->', [f.name for f in features]) + + # Pass 1: loop over requested API versions and extensions tagging + # types/commands/features as required (in an block) or no + # longer required (in an block). It is possible to remove + # a feature in one version and restore it later by requiring it in + # a later version. + # If a profile other than 'None' is being generated, it must + # match the profile attribute (if any) of the and + # tags. + self.gen.logMsg('diag', 'PASS 1: TAG FEATURES') + for f in features: + self.gen.logMsg('diag', 'PASS 1: Tagging required and removed features for', + f.name) + self.fillFeatureDictionary(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) + self.requireAndRemoveFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) + self.assignAdditionalValidity(f.elem, self.genOpts.apiname, self.genOpts.profile) + + # Pass 2: loop over specified API versions and extensions printing + # declarations for required things which haven't already been + # generated. + self.gen.logMsg('diag', 'PASS 2: GENERATE INTERFACES FOR FEATURES') + self.gen.beginFile(self.genOpts) + for f in features: + self.gen.logMsg('diag', 'PASS 2: Generating interface for', + f.name) + emit = self.emitFeatures = f.emit + if not emit: + self.gen.logMsg('diag', 'PASS 2: NOT declaring feature', + f.elem.get('name'), 'because it is not tagged for emission') + # Generate the interface (or just tag its elements as having been + # emitted, if they haven't been). + self.gen.beginFeature(f.elem, emit) + self.generateRequiredInterface(f.elem) + self.gen.endFeature() + # Generate spirv elements + for s in spirvexts: + self.generateSpirv(s, self.spirvextdict) + for s in spirvcaps: + self.generateSpirv(s, self.spirvcapdict) + self.gen.endFile() + + def apiReset(self): + """Reset type/enum/command dictionaries before generating another API. + + Use between apiGen() calls to reset internal state.""" + for datatype in self.typedict: + self.typedict[datatype].resetState() + for enum in self.enumdict: + self.enumdict[enum].resetState() + for cmd in self.cmddict: + self.cmddict[cmd].resetState() + for cmd in self.apidict: + self.apidict[cmd].resetState() + + def __validateStructLimittypes(self, struct): + """Validate 'limittype' attributes for a single struct.""" + limittypeDiags = namedtuple('limittypeDiags', ['missing', 'invalid']) + badFields = defaultdict(lambda : limittypeDiags(missing=[], invalid=[])) + validLimittypes = { 'min', 'max', 'bitmask', 'range', 'struct', 'noauto' } + for member in struct.getMembers(): + memberName = member.findtext('name') + if memberName in ['sType', 'pNext']: + continue + limittype = member.get('limittype') + if not limittype: + badFields[struct.elem.get('name')].missing.append(memberName) + elif limittype == 'struct': + typeName = member.findtext('type') + memberType = self.typedict[typeName] + badFields.update(self.__validateStructLimittypes(memberType)) + elif limittype not in validLimittypes: + badFields[struct.elem.get('name')].invalid.append(memberName) + return badFields + + def __validateLimittype(self): + """Validate 'limittype' attributes.""" + self.gen.logMsg('diag', 'VALIDATING LIMITTYPE ATTRIBUTES') + badFields = self.__validateStructLimittypes(self.typedict['VkPhysicalDeviceProperties2']) + for featStructName in self.validextensionstructs['VkPhysicalDeviceProperties2']: + featStruct = self.typedict[featStructName] + badFields.update(self.__validateStructLimittypes(featStruct)) + + if badFields: + self.gen.logMsg('diag', 'SUMMARY OF FIELDS WITH INCORRECT LIMITTYPES') + for key in sorted(badFields.keys()): + diags = badFields[key] + if diags.missing: + self.gen.logMsg('diag', ' ', key, 'missing limittype:', ', '.join(badFields[key].missing)) + if diags.invalid: + self.gen.logMsg('diag', ' ', key, 'invalid limittype:', ', '.join(badFields[key].invalid)) + return False + return True + + def validateRegistry(self): + """Validate properties of the registry.""" + return self.__validateLimittype() From 38080d39e4f00aa6f940d85d440290912939912a Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Mon, 13 Nov 2023 13:39:44 +0200 Subject: [PATCH 007/190] Update xml/cl.xml Co-authored-by: Ewan Crawford --- xml/cl.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xml/cl.xml b/xml/cl.xml index 283b068d5..4f9c1c3aa 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7319,6 +7319,9 @@ server's OpenCL/api-docs repository. + + + From cbf85b621d3a07c9d99055406800119b173d1851 Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Tue, 21 Nov 2023 10:39:09 +0200 Subject: [PATCH 008/190] Update xml/cl.xml Co-authored-by: Sun Serega --- xml/cl.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 4f9c1c3aa..3e042e58f 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1345,7 +1345,7 @@ server's OpenCL/api-docs repository. - + From 711fcda065ae8f0dbb337d625fe8dc72927a8b93 Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Tue, 21 Nov 2023 10:55:28 +0200 Subject: [PATCH 009/190] Update xml/cl.xml Co-authored-by: Sun Serega --- xml/cl.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 3e042e58f..d0982f205 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7323,7 +7323,7 @@ server's OpenCL/api-docs repository. - + From 2264ad6b61ba8e472bdf6b6cc2ed7bb5aa3b7a6b Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Wed, 6 Dec 2023 11:00:44 +0200 Subject: [PATCH 010/190] remove changes to files made by mistake --- .asciidoctorconfig.adoc | 12 - .project | 11 - xml/cgenerator.py | 420 --------- xml/checklinks.py | 71 -- xml/cl.xml | 6 +- xml/clconventions.py | 241 ------ xml/conventions.py | 358 -------- xml/docgenerator.py | 454 ---------- xml/extensionmetadocgenerator.py | 659 -------------- xml/genRef.py | 1019 ---------------------- xml/gen_dictionaries.py | 258 ------ xml/gen_version_notes.py | 127 --- xml/gencl.py | 464 ---------- xml/generator.py | 1186 ------------------------- xml/pygenerator.py | 365 -------- xml/realign.py | 47 - xml/reflib.py | 663 -------------- xml/reg.py | 1397 ------------------------------ 18 files changed, 3 insertions(+), 7755 deletions(-) delete mode 100644 .asciidoctorconfig.adoc delete mode 100644 .project delete mode 100644 xml/cgenerator.py delete mode 100644 xml/checklinks.py delete mode 100644 xml/clconventions.py delete mode 100644 xml/conventions.py delete mode 100644 xml/docgenerator.py delete mode 100644 xml/extensionmetadocgenerator.py delete mode 100644 xml/genRef.py delete mode 100644 xml/gen_dictionaries.py delete mode 100644 xml/gen_version_notes.py delete mode 100644 xml/gencl.py delete mode 100644 xml/generator.py delete mode 100644 xml/pygenerator.py delete mode 100644 xml/realign.py delete mode 100644 xml/reflib.py delete mode 100644 xml/reg.py diff --git a/.asciidoctorconfig.adoc b/.asciidoctorconfig.adoc deleted file mode 100644 index 53b403dbc..000000000 --- a/.asciidoctorconfig.adoc +++ /dev/null @@ -1,12 +0,0 @@ -// +++++++++++++++++++++++++++++++++++++++++++++++++++++++ -// + Initial AsciiDoc editor configuration file - V1.0 + -// ++++++++++++++++++++++++++++++++++++++++++++++++++++++ -// -// Did not found any configuration files, so create this at project root level. -// If you do not like those files to be generated - you can turn it off inside Asciidoctor Editor preferences. -// -// You can define editor specific parts here. -// For example: with next line you could set imagesdir attribute to subfolder "images" relative to the folder where this config file is located. -// :imagesdir: {asciidoctorconfigdir}/images -// -// For more information please take a look at https://github.com/de-jcup/eclipse-asciidoctor-editor/wiki/Asciidoctor-configfiles diff --git a/.project b/.project deleted file mode 100644 index 9877aec6c..000000000 --- a/.project +++ /dev/null @@ -1,11 +0,0 @@ - - - OpenCL-Docs - - - - - - - - diff --git a/xml/cgenerator.py b/xml/cgenerator.py deleted file mode 100644 index 4b2a8f1e2..000000000 --- a/xml/cgenerator.py +++ /dev/null @@ -1,420 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -import os -import re -from generator import (GeneratorOptions, OutputGenerator, noneStr, - regSortFeatures, write) - - -class CGeneratorOptions(GeneratorOptions): - """CGeneratorOptions - subclass of GeneratorOptions. - - Adds options used by COutputGenerator objects during C language header - generation.""" - - def __init__(self, - prefixText="", - genFuncPointers=True, - protectFile=True, - protectFeature=True, - protectProto=None, - protectProtoStr=None, - apicall='', - apientry='', - apientryp='', - indentFuncProto=True, - indentFuncPointer=False, - alignFuncParam=0, - genEnumBeginEndRange=False, - genAliasMacro=False, - aliasMacro='', - misracstyle=False, - misracppstyle=False, - **kwargs - ): - """Constructor. - Additional parameters beyond parent class: - - - prefixText - list of strings to prefix generated header with - (usually a copyright statement + calling convention macros). - - protectFile - True if multiple inclusion protection should be - generated (based on the filename) around the entire header. - - protectFeature - True if #ifndef..#endif protection should be - generated around a feature interface in the header file. - - genFuncPointers - True if function pointer typedefs should be - generated - - protectProto - If conditional protection should be generated - around prototype declarations, set to either '#ifdef' - to require opt-in (#ifdef protectProtoStr) or '#ifndef' - to require opt-out (#ifndef protectProtoStr). Otherwise - set to None. - - protectProtoStr - #ifdef/#ifndef symbol to use around prototype - declarations, if protectProto is set - - apicall - string to use for the function declaration prefix, - such as APICALL on Windows. - - apientry - string to use for the calling convention macro, - in typedefs, such as APIENTRY. - - apientryp - string to use for the calling convention macro - in function pointer typedefs, such as APIENTRYP. - - indentFuncProto - True if prototype declarations should put each - parameter on a separate line - - indentFuncPointer - True if typedefed function pointers should put each - parameter on a separate line - - alignFuncParam - if nonzero and parameters are being put on a - separate line, align parameter names at the specified column - - genEnumBeginEndRange - True if BEGIN_RANGE / END_RANGE macros should - be generated for enumerated types - - genAliasMacro - True if the OpenXR alias macro should be generated - for aliased types (unclear what other circumstances this is useful) - - aliasMacro - alias macro to inject when genAliasMacro is True - - misracstyle - generate MISRA C-friendly headers - - misracppstyle - generate MISRA C++-friendly headers""" - - GeneratorOptions.__init__(self, **kwargs) - - self.prefixText = prefixText - """list of strings to prefix generated header with (usually a copyright statement + calling convention macros).""" - - self.genFuncPointers = genFuncPointers - """True if function pointer typedefs should be generated""" - - self.protectFile = protectFile - """True if multiple inclusion protection should be generated (based on the filename) around the entire header.""" - - self.protectFeature = protectFeature - """True if #ifndef..#endif protection should be generated around a feature interface in the header file.""" - - self.protectProto = protectProto - """If conditional protection should be generated around prototype declarations, set to either '#ifdef' to require opt-in (#ifdef protectProtoStr) or '#ifndef' to require opt-out (#ifndef protectProtoStr). Otherwise set to None.""" - - self.protectProtoStr = protectProtoStr - """#ifdef/#ifndef symbol to use around prototype declarations, if protectProto is set""" - - self.apicall = apicall - """string to use for the function declaration prefix, such as APICALL on Windows.""" - - self.apientry = apientry - """string to use for the calling convention macro, in typedefs, such as APIENTRY.""" - - self.apientryp = apientryp - """string to use for the calling convention macro in function pointer typedefs, such as APIENTRYP.""" - - self.indentFuncProto = indentFuncProto - """True if prototype declarations should put each parameter on a separate line""" - - self.indentFuncPointer = indentFuncPointer - """True if typedefed function pointers should put each parameter on a separate line""" - - self.alignFuncParam = alignFuncParam - """if nonzero and parameters are being put on a separate line, align parameter names at the specified column""" - - self.genEnumBeginEndRange = genEnumBeginEndRange - """True if BEGIN_RANGE / END_RANGE macros should be generated for enumerated types""" - - self.genAliasMacro = genAliasMacro - """True if the OpenXR alias macro should be generated for aliased types (unclear what other circumstances this is useful)""" - - self.aliasMacro = aliasMacro - """alias macro to inject when genAliasMacro is True""" - - self.misracstyle = misracstyle - """generate MISRA C-friendly headers""" - - self.misracppstyle = misracppstyle - """generate MISRA C++-friendly headers""" - - self.codeGenerator = True - """True if this generator makes compilable code""" - - -class COutputGenerator(OutputGenerator): - """Generates C-language API interfaces.""" - - # This is an ordered list of sections in the header file. - TYPE_SECTIONS = ['include', 'define', 'basetype', 'handle', 'enum', - 'group', 'bitmask', 'funcpointer', 'struct'] - ALL_SECTIONS = TYPE_SECTIONS + ['commandPointer', 'command'] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # Internal state - accumulators for different inner block text - self.sections = {section: [] for section in self.ALL_SECTIONS} - self.feature_not_empty = False - self.may_alias = None - - def beginFile(self, genOpts): - OutputGenerator.beginFile(self, genOpts) - # C-specific - # - # Multiple inclusion protection & C++ wrappers. - if genOpts.protectFile and self.genOpts.filename: - headerSym = re.sub(r'\.h', '_h_', - os.path.basename(self.genOpts.filename)).upper() - write('#ifndef', headerSym, file=self.outFile) - write('#define', headerSym, '1', file=self.outFile) - self.newline() - - # User-supplied prefix text, if any (list of strings) - if genOpts.prefixText: - for s in genOpts.prefixText: - write(s, file=self.outFile) - - # C++ extern wrapper - after prefix lines so they can add includes. - self.newline() - write('#ifdef __cplusplus', file=self.outFile) - write('extern "C" {', file=self.outFile) - write('#endif', file=self.outFile) - self.newline() - - def endFile(self): - # C-specific - # Finish C++ wrapper and multiple inclusion protection - self.newline() - write('#ifdef __cplusplus', file=self.outFile) - write('}', file=self.outFile) - write('#endif', file=self.outFile) - if self.genOpts.protectFile and self.genOpts.filename: - self.newline() - write('#endif', file=self.outFile) - # Finish processing in superclass - OutputGenerator.endFile(self) - - def beginFeature(self, interface, emit): - # Start processing in superclass - OutputGenerator.beginFeature(self, interface, emit) - # C-specific - # Accumulate includes, defines, types, enums, function pointer typedefs, - # end function prototypes separately for this feature. They're only - # printed in endFeature(). - self.sections = {section: [] for section in self.ALL_SECTIONS} - self.feature_not_empty = False - - def endFeature(self): - "Actually write the interface to the output file." - # C-specific - if self.emit: - if self.feature_not_empty: - if self.genOpts.conventions.writeFeature(self.featureExtraProtect, self.genOpts.filename): - self.newline() - if self.genOpts.protectFeature: - write('#ifndef', self.featureName, file=self.outFile) - # If type declarations are needed by other features based on - # this one, it may be necessary to suppress the ExtraProtect, - # or move it below the 'for section...' loop. - if self.featureExtraProtect is not None: - write('#ifdef', self.featureExtraProtect, file=self.outFile) - self.newline() - write('#define', self.featureName, '1', file=self.outFile) - for section in self.TYPE_SECTIONS: - contents = self.sections[section] - if contents: - write('\n'.join(contents), file=self.outFile) - if self.genOpts.genFuncPointers and self.sections['commandPointer']: - write('\n'.join(self.sections['commandPointer']), file=self.outFile) - self.newline() - if self.sections['command']: - if self.genOpts.protectProto: - write(self.genOpts.protectProto, - self.genOpts.protectProtoStr, file=self.outFile) - write('\n'.join(self.sections['command']), end='', file=self.outFile) - if self.genOpts.protectProto: - write('#endif', file=self.outFile) - else: - self.newline() - if self.featureExtraProtect is not None: - write('#endif /*', self.featureExtraProtect, '*/', file=self.outFile) - if self.genOpts.protectFeature: - write('#endif /*', self.featureName, '*/', file=self.outFile) - # Finish processing in superclass - OutputGenerator.endFeature(self) - - def appendSection(self, section, text): - "Append a definition to the specified section" - # self.sections[section].append('SECTION: ' + section + '\n') - self.sections[section].append(text) - self.feature_not_empty = True - - def genType(self, typeinfo, name, alias): - "Generate type." - OutputGenerator.genType(self, typeinfo, name, alias) - typeElem = typeinfo.elem - - # Vulkan: - # Determine the category of the type, and the type section to add - # its definition to. - # 'funcpointer' is added to the 'struct' section as a workaround for - # internal issue #877, since structures and function pointer types - # can have cross-dependencies. - category = typeElem.get('category') - if category == 'funcpointer': - section = 'struct' - else: - section = category - - if category in ('struct', 'union'): - # If the type is a struct type, generate it using the - # special-purpose generator. - self.genStruct(typeinfo, name, alias) - else: - # OpenXR: this section was not under 'else:' previously, just fell through - if alias: - # If the type is an alias, just emit a typedef declaration - body = 'typedef ' + alias + ' ' + name + ';\n' - else: - # Replace tags with an APIENTRY-style string - # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. - body = noneStr(typeElem.text) - for elem in typeElem: - if elem.tag == 'apientry': - body += self.genOpts.apientry + noneStr(elem.tail) - else: - body += noneStr(elem.text) + noneStr(elem.tail) - if body: - # Add extra newline after multi-line entries. - if '\n' in body[0:-1]: - body += '\n' - self.appendSection(section, body) - - def genProtectString(self, protect_str): - """Generate protection string. - - Protection strings are the strings defining the OS/Platform/Graphics - requirements for a given OpenXR command. When generating the - language header files, we need to make sure the items specific to a - graphics API or OS platform are properly wrapped in #ifs.""" - protect_if_str = '' - protect_end_str = '' - if not protect_str: - return (protect_if_str, protect_end_str) - - if ',' in protect_str: - protect_list = protect_str.split(",") - protect_defs = ('defined(%s)' % d for d in protect_list) - protect_def_str = ' && '.join(protect_defs) - protect_if_str = '#if %s\n' % protect_def_str - protect_end_str = '#endif // %s\n' % protect_def_str - else: - protect_if_str = '#ifdef %s\n' % protect_str - protect_end_str = '#endif // %s\n' % protect_str - - return (protect_if_str, protect_end_str) - - def typeMayAlias(self, typeName): - if not self.may_alias: - # First time we've asked if a type may alias. - # So, let's populate the set of all names of types that may. - - # Everyone with an explicit mayalias="true" - self.may_alias = set(typeName - for typeName, data in self.registry.typedict.items() - if data.elem.get('mayalias') == 'true') - - # Every type mentioned in some other type's parentstruct attribute. - parent_structs = (otherType.elem.get('parentstruct') - for otherType in self.registry.typedict.values()) - self.may_alias.update(set(x for x in parent_structs - if x is not None)) - return typeName in self.may_alias - - def genStruct(self, typeinfo, typeName, alias): - """Generate struct (e.g. C "struct" type). - - This is a special case of the tag where the contents are - interpreted as a set of tags instead of freeform C - C type declarations. The tags are just like - tags - they are a declaration of a struct or union member. - Only simple member declarations are supported (no nested - structs etc.) - - If alias is not None, then this struct aliases another; just - generate a typedef of that alias.""" - OutputGenerator.genStruct(self, typeinfo, typeName, alias) - - typeElem = typeinfo.elem - - if alias: - body = 'typedef ' + alias + ' ' + typeName + ';\n' - else: - body = '' - (protect_begin, protect_end) = self.genProtectString(typeElem.get('protect')) - if protect_begin: - body += protect_begin - body += 'typedef ' + typeElem.get('category') - - # This is an OpenXR-specific alternative where aliasing refers - # to an inheritance hierarchy of types rather than C-level type - # aliases. - if self.genOpts.genAliasMacro and self.typeMayAlias(typeName): - body += ' ' + self.genOpts.aliasMacro - - body += ' ' + typeName + ' {\n' - - targetLen = self.getMaxCParamTypeLength(typeinfo) - for member in typeElem.findall('.//member'): - body += self.makeCParamDecl(member, targetLen + 4) - body += ';\n' - body += '} ' + typeName + ';\n' - if protect_end: - body += protect_end - - self.appendSection('struct', body) - - def genGroup(self, groupinfo, groupName, alias=None): - """Generate groups (e.g. C "enum" type). - - These are concatenated together with other types. - - If alias is not None, it is the name of another group type - which aliases this type; just generate that alias.""" - OutputGenerator.genGroup(self, groupinfo, groupName, alias) - groupElem = groupinfo.elem - - # After either enumerated type or alias paths, add the declaration - # to the appropriate section for the group being defined. - if groupElem.get('type') == 'bitmask': - section = 'bitmask' - else: - section = 'group' - - if alias: - # If the group name is aliased, just emit a typedef declaration - # for the alias. - body = 'typedef ' + alias + ' ' + groupName + ';\n' - self.appendSection(section, body) - else: - (section, body) = self.buildEnumCDecl(self.genOpts.genEnumBeginEndRange, groupinfo, groupName) - self.appendSection(section, "\n" + body) - - def genEnum(self, enuminfo, name, alias): - """Generate the C declaration for a constant (a single value).""" - - OutputGenerator.genEnum(self, enuminfo, name, alias) - - body = self.buildConstantCDecl(enuminfo, name, alias) - self.appendSection('enum', body) - - def genCmd(self, cmdinfo, name, alias): - "Command generation" - OutputGenerator.genCmd(self, cmdinfo, name, alias) - - # if alias: - # prefix = '// ' + name + ' is an alias of command ' + alias + '\n' - # else: - # prefix = '' - - prefix = '' - decls = self.makeCDecls(cmdinfo.elem) - self.appendSection('command', prefix + decls[0] + '\n') - if self.genOpts.genFuncPointers: - self.appendSection('commandPointer', decls[1]) - - def misracstyle(self): - return self.genOpts.misracstyle; - - def misracppstyle(self): - return self.genOpts.misracppstyle; diff --git a/xml/checklinks.py b/xml/checklinks.py deleted file mode 100644 index 94b650dad..000000000 --- a/xml/checklinks.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2013-2023 The Khronos Group Inc. -# SPDX-License-Identifier: Apache-2.0 - -import argparse -import os -import re - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument('-d', action='store', dest='directory', - default='../api', - help='Directory containing files to check') - parser.add_argument('--unlinked', action='store_true', - help='Check for unlinked APIs and enums (may have false positives!)') - - args = parser.parse_args() - - links = set() - anchors = set() - - for filename in os.listdir(args.directory): - filename = args.directory + '/' + filename - sourcefile = open(filename, 'r') - sourcetext = sourcefile.read() - sourcefile.close() - - # We're not going to check API links. - #filelinks = re.findall(r"{((cl\w+)|(CL\w+))}", sourcetext) - filelinks = re.findall(r"{((CL\w+))}", sourcetext) - fileanchors = re.findall(r"{((cl\w+)|(CL\w+))_anchor}", sourcetext) - - filelinks = [re.sub(r"_anchor\b", "", link[0]) for link in filelinks] - fileanchors = [anchor[0] for anchor in fileanchors] - - links = links.union(set(filelinks) - set(fileanchors)) - anchors = anchors.union(set(fileanchors)) - - #print("=== " + filename) - #print("links:") - #print(' '.join(filelinks)) - #print("anchors:") - #print(' '.join(fileanchors)) - - if args.unlinked: - # Look for APIs and enums that do not begin with: - # { = asciidoctor attribute link - # character = middle of word - # < = asciidoctor link - # ' = refpage description - # / = proto include - fileunlinkedapi = sorted(list(set(re.findall(r"[^{\w<'/](cl[A-Z]\w+)\b[^'](?!.')", sourcetext)))) - fileunlinkedenums = sorted(list(set(re.findall("r[^{\w<](CL_\w+)", sourcetext)))) - fileunlinkedtypes = sorted(list(set(re.findall("r[^{\w<](cl_\w+)", sourcetext)))) - - if len(fileunlinkedapi) != 0: - print("unlinked APIs in " + filename + ":\n\t" + '\n\t'.join(fileunlinkedapi)) - - if len(fileunlinkedenums) != 0: - print("unlinked enums in " + filename + ":\n\t" + '\n\t'.join(fileunlinkedenums)) - - if len(fileunlinkedtypes) != 0: - print("unlinked types in " + filename + ":\n\t" + '\n\t'.join(fileunlinkedtypes)) - - linkswithoutanchors = sorted(list(links - anchors)) - anchorswithoutlinks = sorted(list(anchors - links)) - - print("links without anchors:\n\t" + '\n\t'.join(linkswithoutanchors)) - #print("anchors without links:\n\t" + '\n\t'.join(anchorswithoutlinks)) diff --git a/xml/cl.xml b/xml/cl.xml index d0982f205..1f4a450fd 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -252,7 +252,7 @@ server's OpenCL/api-docs repository. typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; - typedef cl_bitfield cl_mutable_dispatch_asserts_khr + typedef cl_bitfield cl_mutable_dispatch_promises_khr Structure types @@ -1783,7 +1783,7 @@ server's OpenCL/api-docs repository. - + @@ -7282,7 +7282,7 @@ server's OpenCL/api-docs repository. - +
    diff --git a/xml/clconventions.py b/xml/clconventions.py deleted file mode 100644 index f4df49d2d..000000000 --- a/xml/clconventions.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# SPDX-License-Identifier: Apache-2.0 - -# Working-group-specific style conventions, -# used in generation. - -import re - -from conventions import ConventionsBase - - -class OpenCLConventions(ConventionsBase): - def formatExtension(self, name): - """Mark up a name as an extension for the spec.""" - return '`<<{}>>`'.format(name) - - @property - def null(self): - """Preferred spelling of NULL.""" - return '`NULL`' - - @property - def constFlagBits(self): - """Returns True if static const flag bits should be generated, False if an enumerated type should be generated.""" - return False - - @property - def struct_macro(self): - return 'sname:' - - @property - def external_macro(self): - return 'code:' - - @property - def structtype_member_name(self): - """Return name of the structure type member""" - return 'sType' - - @property - def nextpointer_member_name(self): - """Return name of the structure pointer chain member""" - return 'pNext' - - @property - def valid_pointer_prefix(self): - """Return prefix to pointers which must themselves be valid""" - return 'valid' - - def is_structure_type_member(self, paramtype, paramname): - """Determine if member type and name match the structure type member.""" - return False - - def is_nextpointer_member(self, paramtype, paramname): - """Determine if member type and name match the next pointer chain member.""" - return paramtype == 'void' and paramname == self.nextpointer_member_name - - def generate_structure_type_from_name(self, structname): - """Generate a structure type name token from a structure name. - This should never be called for OpenCL, just other APIs.""" - return '' - - @property - def warning_comment(self): - """Return warning comment to be placed in header of generated Asciidoctor files""" - return '// WARNING: DO NOT MODIFY! This file is automatically generated from the cl.xml registry' - - @property - def file_suffix(self): - """Return suffix of generated Asciidoctor files""" - return '.txt' - - def api_name(self, spectype='api'): - """Return API or specification name for citations in ref pages.ref - pages should link to for - - spectype is the spec this refpage is for: 'api' is the OpenCL API - Specification, 'clang' is the OpenCL C Language specification. - Defaults to 'api'. If an unrecognized spectype is given, returns - None. - """ - if spectype == 'api' or spectype is None: - return 'OpenCL' - elif spectype == 'clang': - return 'OpenCL C' - else: - return None - - @property - def xml_supported_name_of_api(self): - """Return the supported= attribute used in API XML""" - return 'opencl' - - @property - def api_prefix(self): - """Return API token prefix""" - return 'CL_' - - @property - def api_version_prefix(self): - """Return API core version token prefix""" - return 'CL_VERSION_' - - @property - def KHR_prefix(self): - """Return extension name prefix for KHR extensions""" - return 'cl_khr_' - - @property - def EXT_prefix(self): - """Return extension name prefix for EXT extensions""" - return 'cl_ext_' - - @property - def write_contacts(self): - """Return whether contact list should be written to extension appendices""" - return True - - @property - def write_refpage_include(self): - """Return whether refpage include should be written to extension appendices""" - return False - - def writeFeature(self, featureExtraProtect, filename): - """Returns True if OutputGenerator.endFeature should write this feature. - Used in COutputGenerator - """ - return True - - def requires_error_validation(self, return_type): - """Returns True if the return_type element is an API result code - requiring error validation. - """ - return False - - @property - def required_errors(self): - """Return a list of required error codes for validation.""" - return [] - - def is_externsync_command(self, protoname): - """Returns True if the protoname element is an API command requiring - external synchronization - """ - return False - - def is_api_name(self, name): - """Returns True if name is in the reserved API namespace. - For OpenCL, these are names with a case-insensitive 'cl' prefix. - """ - return name[0:2].lower() == 'cl' - - def is_voidpointer_alias(self, tag, text, tail): - """Return True if the declaration components (tag,text,tail) of an - element represents a void * type - """ - return tag == 'type' and text == 'void' and tail.startswith('*') - - def make_voidpointer_alias(self, tail): - """Reformat a void * declaration to include the API alias macro. - Vulkan doesn't have an API alias macro, so do nothing. - """ - return tail - - def specURL(self, spectype = 'api'): - """Return public registry URL which ref pages should link to for - full Specification, so xrefs in the asciidoc source that aren't - to ref pages can link into it instead. - - spectype is the spec this refpage is for: 'api' is the OpenCL API - Specification, 'clang' is the OpenCL C Language specification. - Defaults to 'api'. If an unrecognized spectype is given, returns - None. - """ - if spectype == 'api' or spectype is None: - return 'https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html' - elif spectype == 'clang': - return 'https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_C.html' - else: - return None - - @property - def xml_api_name(self): - """Return the name used in the default API XML registry for the default API""" - return 'opencl' - - @property - def registry_path(self): - """Return relpath to the default API XML registry in this project.""" - return 'xml/cl.xml' - - @property - def specification_path(self): - """Return relpath to the Asciidoctor specification sources in this project.""" - return '../appendices/meta' - - @property - def extra_refpage_headers(self): - """Return any extra text to add to refpage headers.""" - return 'include::{config}/attribs.txt[]\n' + \ - 'include::{config}/opencl.asciidoc[]\n' + \ - 'include::{apispec}/footnotes.asciidoc[]\n' + \ - 'include::{cspec}/footnotes.asciidoc[]\n' + \ - 'include::{cspec}/feature-dictionary.asciidoc[]\n' + \ - 'include::{generated}/api/api-dictionary-no-links.asciidoc[]' - - @property - def extension_index_prefixes(self): - """Return a list of extension prefixes used to group extension refpages.""" - return ['cl_khr', 'cl_ext', 'cl'] - - @property - def unified_flag_refpages(self): - """Return True if Flags/FlagBits refpages are unified, False if - they're separate. - """ - return False - - @property - def spec_reflow_path(self): - """Return the relative path to the spec source folder to reflow""" - return '.' - - @property - def spec_no_reflow_dirs(self): - """Return a set of directories not to automatically descend into - when reflowing spec text - """ - return ('scripts', 'style') - - @property - def should_skip_checking_codes(self): - """Return True if more than the basic validation of return codes should - be skipped for a command. - - OpenCL has a different style of error handling than OpenXR or - Vulkan, so these checks are not appropriate.""" - - return True diff --git a/xml/conventions.py b/xml/conventions.py deleted file mode 100644 index 6b6b23d14..000000000 --- a/xml/conventions.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -# Base class for working-group-specific style conventions, -# used in generation. - -from enum import Enum - -# Type categories that respond "False" to isStructAlwaysValid -# basetype is home to typedefs like ..Bool32 -CATEGORIES_REQUIRING_VALIDATION = set(('handle', - 'enum', - 'bitmask', - 'basetype', - None)) - -# These are basic C types pulled in via openxr_platform_defines.h -TYPES_KNOWN_ALWAYS_VALID = set(('char', - 'float', - 'int8_t', 'uint8_t', - 'int32_t', 'uint32_t', - 'int64_t', 'uint64_t', - 'size_t', - 'uintptr_t', - 'int', - )) - - -class ProseListFormats(Enum): - """A connective, possibly with a quantifier.""" - AND = 0 - EACH_AND = 1 - OR = 2 - ANY_OR = 3 - - @classmethod - def from_string(cls, s): - if s == 'or': - return cls.OR - if s == 'and': - return cls.AND - return None - - @property - def connective(self): - if self in (ProseListFormats.OR, ProseListFormats.ANY_OR): - return 'or' - return 'and' - - def quantifier(self, n): - """Return the desired quantifier for a list of a given length.""" - if self == ProseListFormats.ANY_OR: - if n > 1: - return 'any of ' - elif self == ProseListFormats.EACH_AND: - if n > 2: - return 'each of ' - if n == 2: - return 'both of ' - return '' - - -class ConventionsBase: - """WG-specific conventions.""" - - def __init__(self): - self._command_prefix = None - self._type_prefix = None - - def formatExtension(self, name): - """Mark up an extension name as a link the spec.""" - return '`apiext:{}`'.format(name) - - @property - def null(self): - """Preferred spelling of NULL.""" - raise NotImplementedError - - def makeProseList(self, elements, fmt=ProseListFormats.AND, with_verb=False, *args, **kwargs): - """Make a (comma-separated) list for use in prose. - - Adds a connective (by default, 'and') - before the last element if there are more than 1. - - Adds the right one of "is" or "are" to the end if with_verb is true. - - Optionally adds a quantifier (like 'any') before a list of 2 or more, - if specified by fmt. - - Override with a different method or different call to - _implMakeProseList if you want to add a comma for two elements, - or not use a serial comma. - """ - return self._implMakeProseList(elements, fmt, with_verb, *args, **kwargs) - - @property - def struct_macro(self): - """Get the appropriate format macro for a structure. - - May override. - """ - return 'slink:' - - @property - def external_macro(self): - """Get the appropriate format macro for an external type like uint32_t. - - May override. - """ - return 'code:' - - def makeStructName(self, name): - """Prepend the appropriate format macro for a structure to a structure type name. - - Uses struct_macro, so just override that if you want to change behavior. - """ - return self.struct_macro + name - - def makeExternalTypeName(self, name): - """Prepend the appropriate format macro for an external type like uint32_t to a type name. - - Uses external_macro, so just override that if you want to change behavior. - """ - return self.external_macro + name - - def _implMakeProseList(self, elements, fmt, with_verb, comma_for_two_elts=False, serial_comma=True): - """Internal-use implementation to make a (comma-separated) list for use in prose. - - Adds a connective (by default, 'and') - before the last element if there are more than 1, - and only includes commas if there are more than 2 - (if comma_for_two_elts is False). - - Adds the right one of "is" or "are" to the end if with_verb is true. - - Optionally adds a quantifier (like 'any') before a list of 2 or more, - if specified by fmt. - - Don't edit these defaults, override self.makeProseList(). - """ - assert(serial_comma) # didn't implement what we didn't need - if isinstance(fmt, str): - fmt = ProseListFormats.from_string(fmt) - - my_elts = list(elements) - if len(my_elts) > 1: - my_elts[-1] = '{} {}'.format(fmt.connective, my_elts[-1]) - - if not comma_for_two_elts and len(my_elts) <= 2: - prose = ' '.join(my_elts) - else: - prose = ', '.join(my_elts) - - quantifier = fmt.quantifier(len(my_elts)) - - parts = [quantifier, prose] - - if with_verb: - if len(my_elts) > 1: - parts.append(' are') - else: - parts.append(' is') - return ''.join(parts) - - @property - def file_suffix(self): - """Return suffix of generated Asciidoctor files""" - raise NotImplementedError - - def api_name(self, spectype=None): - """Return API or specification name for citations in ref pages. - - spectype is the spec this refpage is for. - 'api' (the default value) is the main API Specification. - If an unrecognized spectype is given, returns None. - - Must implement.""" - raise NotImplementedError - - def should_insert_may_alias_macro(self, genOpts): - """Return true if we should insert a "may alias" macro in this file. - - Only used by OpenXR right now.""" - return False - - @property - def command_prefix(self): - """Return the expected prefix of commands/functions. - - Implemented in terms of api_prefix.""" - if not self._command_prefix: - self._command_prefix = self.api_prefix[:].replace('_', '').lower() - return self._command_prefix - - @property - def type_prefix(self): - """Return the expected prefix of type names. - - Implemented in terms of command_prefix (and in turn, api_prefix).""" - if not self._type_prefix: - self._type_prefix = ''.join( - (self.command_prefix[0:1].upper(), self.command_prefix[1:])) - return self._type_prefix - - @property - def api_prefix(self): - """Return API token prefix. - - Typically two uppercase letters followed by an underscore. - - Must implement.""" - raise NotImplementedError - - @property - def api_version_prefix(self): - """Return API core version token prefix. - - Implemented in terms of api_prefix. - - May override.""" - return self.api_prefix + 'VERSION_' - - @property - def KHR_prefix(self): - """Return extension name prefix for KHR extensions. - - Implemented in terms of api_prefix. - - May override.""" - return self.api_prefix + 'KHR_' - - @property - def EXT_prefix(self): - """Return extension name prefix for EXT extensions. - - Implemented in terms of api_prefix. - - May override.""" - return self.api_prefix + 'EXT_' - - def writeFeature(self, featureExtraProtect, filename): - """Return True if OutputGenerator.endFeature should write this feature. - - Defaults to always True. - Used in COutputGenerator. - - May override.""" - return True - - def requires_error_validation(self, return_type): - """Return True if the return_type element is an API result code - requiring error validation. - - Defaults to always False. - - May override.""" - return False - - @property - def required_errors(self): - """Return a list of required error codes for validation. - - Defaults to an empty list. - - May override.""" - return [] - - def is_voidpointer_alias(self, tag, text, tail): - """Return True if the declaration components (tag,text,tail) of an - element represents a void * type. - - Defaults to a reasonable implementation. - - May override.""" - return tag == 'type' and text == 'void' and tail.startswith('*') - - def make_voidpointer_alias(self, tail): - """Reformat a void * declaration to include the API alias macro. - - Defaults to a no-op. - - Must override if you actually want to use this feature in your project.""" - return tail - - def category_requires_validation(self, category): - """Return True if the given type 'category' always requires validation. - - Defaults to a reasonable implementation. - - May override.""" - return category in CATEGORIES_REQUIRING_VALIDATION - - def type_always_valid(self, typename): - """Return True if the given type name is always valid (never requires validation). - - This is for things like integers. - - Defaults to a reasonable implementation. - - May override.""" - return typename in TYPES_KNOWN_ALWAYS_VALID - - @property - def should_skip_checking_codes(self): - """Return True if more than the basic validation of return codes should - be skipped for a command.""" - - return False - - @property - def generate_index_terms(self): - """Return True if asiidoctor index terms should be generated as part - of an API interface from the docgenerator.""" - - return False - - @property - def generate_enum_table(self): - """Return True if asciidoctor tables describing enumerants in a - group should be generated as part of group generation.""" - return False - - @property - def generate_max_enum_in_docs(self): - """Return True if MAX_ENUM tokens should be generated in - documentation includes.""" - return False - - - def extension_include_string(self, ext): - """Return format string for include:: line for an extension appendix - file. ext is an object with the following members: - - name - extension string string - - vendor - vendor portion of name - - barename - remainder of name - - Must implement.""" - raise NotImplementedError - - @property - def refpage_generated_include_path(self): - """Return path relative to the generated reference pages, to the - generated API include files. - - Must implement.""" - raise NotImplementedError - - def valid_flag_bit(self, bitpos): - """Return True if bitpos is an allowed numeric bit position for - an API flag. - - Behavior depends on the data type used for flags (which may be 32 - or 64 bits), and may depend on assumptions about compiler - handling of sign bits in enumerated types, as well.""" - return True diff --git a/xml/docgenerator.py b/xml/docgenerator.py deleted file mode 100644 index 073552534..000000000 --- a/xml/docgenerator.py +++ /dev/null @@ -1,454 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -from pathlib import Path - -from generator import GeneratorOptions, OutputGenerator, noneStr, write - -ENUM_TABLE_PREFIX = """ -[cols=",",options="header",] -|======================================================================= -|Enum |Description""" - -ENUM_TABLE_SUFFIX = """|=======================================================================""" - -FLAG_BLOCK_PREFIX = """.Flag Descriptions -****""" - -FLAG_BLOCK_SUFFIX = """****""" - - -class DocGeneratorOptions(GeneratorOptions): - """DocGeneratorOptions - subclass of GeneratorOptions for - generating declaration snippets for the spec. - - Shares many members with CGeneratorOptions, since - both are writing C-style declarations.""" - - def __init__(self, - prefixText="", - apicall='', - apientry='', - apientryp='', - indentFuncProto=True, - indentFuncPointer=False, - alignFuncParam=0, - secondaryInclude=False, - expandEnumerants=True, - extEnumerantAdditions=False, - extEnumerantFormatString=" (Added by the {} extension)", - **kwargs): - """Constructor. - - Since this generator outputs multiple files at once, - the filename is just a "stamp" to indicate last generation time. - - Shares many parameters/members with CGeneratorOptions, since - both are writing C-style declarations: - - - prefixText - list of strings to prefix generated header with - (usually a copyright statement + calling convention macros). - - apicall - string to use for the function declaration prefix, - such as APICALL on Windows. - - apientry - string to use for the calling convention macro, - in typedefs, such as APIENTRY. - - apientryp - string to use for the calling convention macro - in function pointer typedefs, such as APIENTRYP. - - indentFuncProto - True if prototype declarations should put each - parameter on a separate line - - indentFuncPointer - True if typedefed function pointers should put each - parameter on a separate line - - alignFuncParam - if nonzero and parameters are being put on a - separate line, align parameter names at the specified column - - Additional parameters/members: - - - expandEnumerants - if True, add BEGIN/END_RANGE macros in enumerated - type declarations - - secondaryInclude - if True, add secondary (no xref anchor) versions - of generated files - - extEnumerantAdditions - if True, include enumerants added by extensions - in comment tables for core enumeration types. - - extEnumerantFormatString - A format string for any additional message for - enumerants from extensions if extEnumerantAdditions is True. The correctly- - marked-up extension name will be passed. - """ - GeneratorOptions.__init__(self, **kwargs) - self.prefixText = prefixText - """list of strings to prefix generated header with (usually a copyright statement + calling convention macros).""" - - self.apicall = apicall - """string to use for the function declaration prefix, such as APICALL on Windows.""" - - self.apientry = apientry - """string to use for the calling convention macro, in typedefs, such as APIENTRY.""" - - self.apientryp = apientryp - """string to use for the calling convention macro in function pointer typedefs, such as APIENTRYP.""" - - self.indentFuncProto = indentFuncProto - """True if prototype declarations should put each parameter on a separate line""" - - self.indentFuncPointer = indentFuncPointer - """True if typedefed function pointers should put each parameter on a separate line""" - - self.alignFuncParam = alignFuncParam - """if nonzero and parameters are being put on a separate line, align parameter names at the specified column""" - - self.secondaryInclude = secondaryInclude - """if True, add secondary (no xref anchor) versions of generated files""" - - self.expandEnumerants = expandEnumerants - """if True, add BEGIN/END_RANGE macros in enumerated type declarations""" - - self.extEnumerantAdditions = extEnumerantAdditions - """if True, include enumerants added by extensions in comment tables for core enumeration types.""" - - self.extEnumerantFormatString = extEnumerantFormatString - """A format string for any additional message for - enumerants from extensions if extEnumerantAdditions is True. The correctly- - marked-up extension name will be passed.""" - - -class DocOutputGenerator(OutputGenerator): - """DocOutputGenerator - subclass of OutputGenerator. - - Generates AsciiDoc includes with C-language API interfaces, for reference - pages and the corresponding specification. Similar to COutputGenerator, - but each interface is written into a different file as determined by the - options, only actual C types are emitted, and none of the boilerplate - preprocessor code is emitted.""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # Keep track of all extension numbers - self.extension_numbers = set() - - def beginFile(self, genOpts): - OutputGenerator.beginFile(self, genOpts) - - # This should be a separate conventions property rather than an - # inferred type name pattern for different APIs. - self.result_type = genOpts.conventions.type_prefix + "Result" - - def endFile(self): - OutputGenerator.endFile(self) - - def beginFeature(self, interface, emit): - # Start processing in superclass - OutputGenerator.beginFeature(self, interface, emit) - - # Decide if we're in a core or an - self.in_core = (interface.tag == 'feature') - - # Verify that each has a unique number during doc - # generation - # TODO move this to consistency_tools - if not self.in_core: - extension_number = interface.get('number') - if extension_number is not None and extension_number != "0": - if extension_number in self.extension_numbers: - self.logMsg('error', 'Duplicate extension number ', extension_number, ' detected in feature ', interface.get('name'), '\n') - exit(1) - else: - self.extension_numbers.add(extension_number) - - def endFeature(self): - # Finish processing in superclass - OutputGenerator.endFeature(self) - - def genRequirements(self, name, mustBeFound = True): - """Generate text showing what core versions and extensions introduce - an API. This relies on the map in api.py, which may be loaded at - runtime into self.apidict. If not present, no message is - generated. - - - name - name of the API - - mustBeFound - If True, when requirements for 'name' cannot be - determined, a warning comment is generated. - """ - - if self.apidict: - if name in self.apidict.requiredBy: - features = [] - for (base,dependency) in self.apidict.requiredBy[name]: - if dependency is not None: - features.append('{} with {}'.format(base, dependency)) - else: - features.append(base) - return '// Provided by {}\n'.format(', '.join(features)) - else: - if mustBeFound: - self.logMsg('warn', 'genRequirements: API {} not found'.format(name)) - return '' - else: - # No API dictionary available, return nothing - return '' - - def writeInclude(self, directory, basename, contents): - """Generate an include file. - - - directory - subdirectory to put file in - - basename - base name of the file - - contents - contents of the file (Asciidoc boilerplate aside)""" - # Create subdirectory, if needed - directory = self.genOpts.directory + '/' + directory - self.makeDir(directory) - - # Create file - filename = directory + '/' + basename + '.txt' - self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w', encoding='utf-8') - - # Asciidoc anchor - write(self.genOpts.conventions.warning_comment, file=fp) - write('[[{0},{0}]]'.format(basename), file=fp) - - if self.genOpts.conventions.generate_index_terms: - index_terms = [] - if basename.startswith(self.conventions.command_prefix): - index_terms.append(basename[2:] + " (function)") - elif basename.startswith(self.conventions.type_prefix): - index_terms.append(basename[2:] + " (type)") - elif basename.startswith(self.conventions.api_prefix): - index_terms.append(basename[len(self.conventions.api_prefix):] + " (define)") - index_terms.append(basename) - write('indexterm:[{}]'.format(','.join(index_terms)), file=fp) - - write('[source,opencl]', file=fp) - write('----', file=fp) - write(contents, file=fp) - write('----', file=fp) - fp.close() - - if self.genOpts.secondaryInclude: - # Create secondary no cross-reference include file - filename = directory + '/' + basename + '.no-xref.txt' - self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w', encoding='utf-8') - - # Asciidoc anchor - write(self.genOpts.conventions.warning_comment, file=fp) - write('// Include this no-xref version without cross reference id for multiple includes of same file', file=fp) - write('[source,opencl]', file=fp) - write('----', file=fp) - write(contents, file=fp) - write('----', file=fp) - fp.close() - - def writeTable(self, basename, values): - """Output a table of enumerants.""" - directory = Path(self.genOpts.directory) / 'enums' - self.makeDir(str(directory)) - - filename = str(directory / '{}.comments.txt'.format(basename)) - self.logMsg('diag', '# Generating include file:', filename) - - with open(filename, 'w', encoding='utf-8') as fp: - write(self.conventions.warning_comment, file=fp) - write(ENUM_TABLE_PREFIX, file=fp) - - for data in values: - write("|ename:{}".format(data['name']), file=fp) - write("|{}".format(data['comment']), file=fp) - - write(ENUM_TABLE_SUFFIX, file=fp) - - def writeFlagBox(self, basename, values): - """Output a box of flag bit comments.""" - directory = Path(self.genOpts.directory) / 'enums' - self.makeDir(str(directory)) - - filename = str(directory / '{}.comments.txt'.format(basename)) - self.logMsg('diag', '# Generating include file:', filename) - - with open(filename, 'w', encoding='utf-8') as fp: - write(self.conventions.warning_comment, file=fp) - write(FLAG_BLOCK_PREFIX, file=fp) - - for data in values: - write("* ename:{} -- {}".format(data['name'], - data['comment']), - file=fp) - - write(FLAG_BLOCK_SUFFIX, file=fp) - - def genType(self, typeinfo, name, alias): - """Generate type.""" - OutputGenerator.genType(self, typeinfo, name, alias) - typeElem = typeinfo.elem - # If the type is a struct type, traverse the embedded tags - # generating a structure. Otherwise, emit the tag text. - category = typeElem.get('category') - - if category in ('struct', 'union'): - # If the type is a struct type, generate it using the - # special-purpose generator. - self.genStruct(typeinfo, name, alias) - else: - body = self.genRequirements(name) - if alias: - # If the type is an alias, just emit a typedef declaration - body += 'typedef ' + alias + ' ' + name + ';\n' - self.writeInclude(OutputGenerator.categoryToPath[category], - name, body) - else: - # Replace tags with an APIENTRY-style string - # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. - body += noneStr(typeElem.text) - for elem in typeElem: - if elem.tag == 'apientry': - body += self.genOpts.apientry + noneStr(elem.tail) - else: - body += noneStr(elem.text) + noneStr(elem.tail) - - if body: - if category in OutputGenerator.categoryToPath: - self.writeInclude(OutputGenerator.categoryToPath[category], - name, body + '\n') - else: - self.logMsg('diag', '# NOT writing include file for type:', - name, '- bad category: ', category) - else: - self.logMsg('diag', '# NOT writing empty include file for type', name) - - def genStruct(self, typeinfo, typeName, alias): - """Generate struct.""" - OutputGenerator.genStruct(self, typeinfo, typeName, alias) - - typeElem = typeinfo.elem - - body = self.genRequirements(typeName) - if alias: - body += 'typedef ' + alias + ' ' + typeName + ';\n' - else: - body += 'typedef ' + typeElem.get('category') + ' ' + typeName + ' {\n' - - targetLen = self.getMaxCParamTypeLength(typeinfo) - for member in typeElem.findall('.//member'): - body += self.makeCParamDecl(member, targetLen + 4) - body += ';\n' - body += '} ' + typeName + ';' - - self.writeInclude('structs', typeName, body) - - def genEnumTable(self, groupinfo, groupName): - """Generate tables of enumerant values and short descriptions from - the XML.""" - - values = [] - got_comment = False - missing_comments = [] - for elem in groupinfo.elem.findall('enum'): - if not elem.get('required'): - continue - name = elem.get('name') - - data = { - 'name': name, - } - - (numVal, strVal) = self.enumToValue(elem, True) - data['value'] = numVal - - extname = elem.get('extname') - - added_by_extension_to_core = (extname is not None and self.in_core) - if added_by_extension_to_core and not self.genOpts.extEnumerantAdditions: - # We're skipping such values - continue - - comment = elem.get('comment') - if comment: - got_comment = True - elif name.endswith('_UNKNOWN') and numVal == 0: - # This is a placeholder for 0-initialization to be clearly invalid. - # Just skip this silently - continue - else: - # Skip but record this in case it's an odd-one-out missing a comment. - missing_comments.append(name) - continue - - if added_by_extension_to_core and self.genOpts.extEnumerantFormatString: - # Add a note to the comment - comment += self.genOpts.extEnumerantFormatString.format( - self.conventions.formatExtension(extname)) - - data['comment'] = comment - values.append(data) - - if got_comment: - # If any had a comment, output it. - - if missing_comments: - self.logMsg('warn', 'The following values for', groupName, - 'were omitted from the table due to missing comment attributes:', - ', '.join(missing_comments)) - - group_type = groupinfo.elem.get('type') - if groupName == self.result_type: - # Split this into success and failure - self.writeTable(groupName + '.success', - (data for data in values - if data['value'] >= 0)) - self.writeTable(groupName + '.error', - (data for data in values - if data['value'] < 0)) - elif group_type == 'bitmask': - self.writeFlagBox(groupName, values) - elif group_type == 'enum': - self.writeTable(groupName, values) - else: - raise RuntimeError("Unrecognized enums type: " + str(group_type)) - - def genGroup(self, groupinfo, groupName, alias): - """Generate group (e.g. C "enum" type).""" - OutputGenerator.genGroup(self, groupinfo, groupName, alias) - - body = self.genRequirements(groupName) - if alias: - # If the group name is aliased, just emit a typedef declaration - # for the alias. - body += 'typedef ' + alias + ' ' + groupName + ';\n' - else: - expand = self.genOpts.expandEnumerants - (_, enumbody) = self.buildEnumCDecl(expand, groupinfo, groupName) - body += enumbody - if self.genOpts.conventions.generate_enum_table: - self.genEnumTable(groupinfo, groupName) - - self.writeInclude('enums', groupName, body) - - def genEnum(self, enuminfo, name, alias): - """Generate the C declaration for a constant (a single value).""" - - OutputGenerator.genEnum(self, enuminfo, name, alias) - - body = self.buildConstantCDecl(enuminfo, name, alias) - - self.writeInclude('enums', name, body) - - def genCmd(self, cmdinfo, name, alias): - "Generate command." - OutputGenerator.genCmd(self, cmdinfo, name, alias) - - return_type = cmdinfo.elem.find('proto/type') - if self.genOpts.conventions.requires_error_validation(return_type): - # This command returns an API result code, so check that it - # returns at least the required errors. - # TODO move this to consistency_tools - required_errors = set(self.genOpts.conventions.required_errors) - errorcodes = cmdinfo.elem.get('errorcodes').split(',') - if not required_errors.issubset(set(errorcodes)): - self.logMsg('error', 'Missing required error code for command: ', name, '\n') - exit(1) - - body = self.genRequirements(name) - decls = self.makeCDecls(cmdinfo.elem) - body += decls[0] - self.writeInclude('protos', name, body) diff --git a/xml/extensionmetadocgenerator.py b/xml/extensionmetadocgenerator.py deleted file mode 100644 index d6243889d..000000000 --- a/xml/extensionmetadocgenerator.py +++ /dev/null @@ -1,659 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -import os -import re -import sys -from functools import total_ordering -from generator import GeneratorOptions, OutputGenerator, regSortFeatures, write - -class ExtensionMetaDocGeneratorOptions(GeneratorOptions): - """ExtensionMetaDocGeneratorOptions - subclass of GeneratorOptions. - - Represents options during extension metainformation generation for Asciidoc""" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - -EXT_NAME_DECOMPOSE_RE = re.compile(r'[A-Z]+_(?P[A-Z]+)_(?P[\w_]+)') - - -@total_ordering -class Extension: - def __init__(self, - generator, # needed for logging and API conventions - filename, - name, - number, - ext_type, - requires, - requiresCore, - contact, - promotedTo, - deprecatedBy, - obsoletedBy, - provisional, - revision, - specialuse ): - self.generator = generator - self.conventions = generator.genOpts.conventions - self.filename = filename - self.name = name - self.number = number - self.ext_type = ext_type - self.requires = requires - self.requiresCore = requiresCore - self.contact = contact - self.promotedTo = promotedTo - self.deprecatedBy = deprecatedBy - self.obsoletedBy = obsoletedBy - self.provisional = provisional - self.revision = revision - self.specialuse = specialuse - - self.deprecationType = None - self.supercedingAPIVersion = None - self.supercedingExtension = None - - if self.promotedTo is not None and self.deprecatedBy is not None and self.obsoletedBy is not None: - self.generator.logMsg('warn', 'All \'promotedto\', \'deprecatedby\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'promotedto\' and \'deprecatedby\'.') - elif self.promotedTo is not None and self.deprecatedBy is not None: - self.generator.logMsg('warn', 'Both \'promotedto\' and \'deprecatedby\' attributes used on extension ' + self.name + '! Ignoring \'deprecatedby\'.') - elif self.promotedTo is not None and self.obsoletedBy is not None: - self.generator.logMsg('warn', 'Both \'promotedto\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'promotedto\'.') - elif self.deprecatedBy is not None and self.obsoletedBy is not None: - self.generator.logMsg('warn', 'Both \'deprecatedby\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'deprecatedby\'.') - - supercededBy = None - if self.promotedTo is not None: - self.deprecationType = 'promotion' - supercededBy = promotedTo - elif self.deprecatedBy is not None: - self.deprecationType = 'deprecation' - supercededBy = deprecatedBy - elif self.obsoletedBy is not None: - self.deprecationType = 'obsoletion' - supercededBy = obsoletedBy - - if supercededBy is not None: - if supercededBy == '' and not self.deprecationType == 'promotion': - pass # supercedingAPIVersion, supercedingExtension is None - elif supercededBy.startswith(self.conventions.api_version_prefix): - self.supercedingAPIVersion = supercededBy - elif supercededBy.startswith(self.conventions.api_prefix): - self.supercedingExtension = supercededBy - else: - self.generator.logMsg('error', 'Unrecognized ' + self.deprecationType + ' attribute value \'' + supercededBy + '\'!') - - match = EXT_NAME_DECOMPOSE_RE.match(self.name) - self.vendor = match.group('tag') - self.bare_name = match.group('name') - - def __str__(self): - return self.name - def __eq__(self, other): - return self.name == other.name - def __ne__(self, other): - return self.name != other.name - - def __lt__(self, other): - self_is_KHR = self.name.startswith(self.conventions.KHR_prefix) - self_is_EXT = self.name.startswith(self.conventions.EXT_prefix) - other_is_KHR = other.name.startswith(self.conventions.KHR_prefix) - other_is_EXT = other.name.startswith(self.conventions.EXT_prefix) - - swap = False - if self_is_KHR and not other_is_KHR: - return not swap - if other_is_KHR and not self_is_KHR: - return swap - if self_is_EXT and not other_is_EXT: - return not swap - if other_is_EXT and not self_is_EXT: - return swap - - return self.name < other.name - - def typeToStr(self): - if self.ext_type == 'instance': - return 'Instance extension' - if self.ext_type == 'device': - return 'Device extension' - - if self.ext_type is not None: - self.generator.logMsg('warn', 'The type attribute of ' + self.name + ' extension is neither \'instance\' nor \'device\'. That is invalid (at the time this script was written).') - else: # should be unreachable - self.generator.logMsg('error', 'Logic error in typeToStr(): Missing type attribute!') - return None - - def specLink(self, xrefName, xrefText, isRefpage = False): - """Generate a string containing a link to a specification anchor in - asciidoctor markup form. - - - xrefName - anchor name in the spec - - xrefText - text to show for the link, or None - - isRefpage = True if generating a refpage include, False if - generating a specification extension appendix include""" - - if isRefpage: - # Always link into API spec - specURL = self.conventions.specURL('api') - return 'link:{}#{}[{}^]'.format(specURL, xrefName, xrefText) - else: - return '<<' + xrefName + ', ' + xrefText + '>>' - - def conditionalLinkCoreAPI(self, apiVersion, linkSuffix, isRefpage): - versionMatch = re.match(self.conventions.api_version_prefix + r'(\d+)_(\d+)', apiVersion) - major = versionMatch.group(1) - minor = versionMatch.group(2) - - dottedVersion = major + '.' + minor - - xrefName = 'versions-' + dottedVersion + linkSuffix - xrefText = self.conventions.api_name() + ' ' + dottedVersion - - doc = 'ifdef::' + apiVersion + '[]\n' - doc += ' ' + self.specLink(xrefName, xrefText, isRefpage) + '\n' - doc += 'endif::' + apiVersion + '[]\n' - doc += 'ifndef::' + apiVersion + '[]\n' - doc += ' ' + self.conventions.api_name() + ' ' + dottedVersion + '\n' - doc += 'endif::' + apiVersion + '[]\n' - - return doc - - def conditionalLinkExt(self, extName, indent = ' '): - doc = 'ifdef::' + extName + '[]\n' - doc += indent + self.conventions.formatExtension(extName) + '\n' - doc += 'endif::' + extName + '[]\n' - doc += 'ifndef::' + extName + '[]\n' - doc += indent + '`' + extName + '`\n' - doc += 'endif::' + extName + '[]\n' - - return doc - - def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): - ext = next(x for x in extensionsList if x.name == succeededBy) - - if ext.deprecationType: - if ext.deprecationType == 'promotion': - if ext.supercedingAPIVersion: - write(' ** Which in turn was _promoted_ to\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-promotions', isRefpage), file=file) - else: # ext.supercedingExtension - write(' ** Which in turn was _promoted_ to extension\n' + ext.conditionalLinkExt(ext.supercedingExtension), file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) - elif ext.deprecationType == 'deprecation': - if ext.supercedingAPIVersion: - write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) - elif ext.supercedingExtension: - write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) - else: - write(' ** Which in turn was _deprecated_ without replacement', file=file) - elif ext.deprecationType == 'obsoletion': - if ext.supercedingAPIVersion: - write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) - elif ext.supercedingExtension: - write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) - else: - write(' ** Which in turn was _obsoleted_ without replacement', file=file) - else: # should be unreachable - self.generator.logMsg('error', 'Logic error in resolveDeprecationChain(): deprecationType is neither \'promotion\', \'deprecation\' nor \'obsoletion\'!') - - - def writeTag(self, tag, value, isRefpage, fp): - """Write a tag and (if non-None) a tag value to a file. - - - tag - string tag name - - value - tag value, or None - - isRefpage - controls style in which the tag is marked up - - fp - open file pointer to write to""" - - if isRefpage: - # Use subsection headers for the tag name - tagPrefix = '== ' - tagSuffix = '' - else: - # Use an bolded item list for the tag name - tagPrefix = '*' - tagSuffix = '*::' - - write(tagPrefix + tag + tagSuffix, file=fp) - if value is not None: - write(value, file=fp) - - if isRefpage: - write('', file=fp) - - def makeMetafile(self, extensionsList, isRefpage = False): - """Generate a file containing extension metainformation in - asciidoctor markup form. - - - extensionsList - list of extensions spec is being generated against - - isRefpage - True if generating a refpage include, False if - generating a specification extension appendix include""" - - if isRefpage: - filename = self.filename.replace('meta/', 'meta/refpage.') - else: - filename = self.filename - - fp = self.generator.newFile(filename) - - if not isRefpage: - write('[[' + self.name + ']]', file=fp) - write('=== ' + self.name, file=fp) - write('', file=fp) - - self.writeTag('Name String', '`' + self.name + '`', isRefpage, fp) - self.writeTag('Extension Type', self.typeToStr(), isRefpage, fp) - - self.writeTag('Registered Extension Number', self.number, isRefpage, fp) - self.writeTag('Revision', self.revision, isRefpage, fp) - - # Only API extension dependencies are coded in XML, others are explicit - self.writeTag('Extension and Version Dependencies', None, isRefpage, fp) - - write(' * Requires ' + self.conventions.api_name() + ' ' + self.requiresCore, file=fp) - if self.requires: - for dep in self.requires.split(','): - write(' * Requires', self.conventions.formatExtension(dep), - file=fp) - if self.provisional == 'true': - write(' * *This is a _provisional_ extension and must: be used with caution.', file=fp) - write(' See the ' + - self.specLink(xrefName = 'boilerplate-provisional-header', - xrefText = 'description', - isRefpage = isRefpage) + - ' of provisional header files for enablement and stability details.*', file=fp) - write('', file=fp) - - if self.deprecationType: - self.writeTag('Deprecation state', None, isRefpage, fp) - - if self.deprecationType == 'promotion': - if self.supercedingAPIVersion: - write(' * _Promoted_ to\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-promotions', isRefpage), file=fp) - else: # ext.supercedingExtension - write(' * _Promoted_ to\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension', file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) - elif self.deprecationType == 'deprecation': - if self.supercedingAPIVersion: - write(' * _Deprecated_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) - elif self.supercedingExtension: - write(' * _Deprecated_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) - else: - write(' * _Deprecated_ without replacement' , file=fp) - elif self.deprecationType == 'obsoletion': - if self.supercedingAPIVersion: - write(' * _Obsoleted_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) - elif self.supercedingExtension: - write(' * _Obsoleted_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) - else: - # TODO: Does not make sense to retroactively ban use of extensions from 1.0. - # Needs some tweaks to the semantics and this message, when such extension(s) occur. - write(' * _Obsoleted_ without replacement' , file=fp) - else: # should be unreachable - self.generator.logMsg('error', 'Logic error in makeMetafile(): deprecationType is neither \'promotion\', \'deprecation\' nor \'obsoletion\'!') - write('', file=fp) - - if self.specialuse is not None: - specialuses = self.specialuse.split(',') - if len(specialuses) > 1: - header = 'Special Uses' - else: - header = 'Special Use' - self.writeTag(header, None, isRefpage, fp) - - for use in specialuses: - # Each specialuse attribute value expands an asciidoctor - # attribute of the same name, instead of using the shorter, - # and harder to understand attribute - write('* {}'.format( - self.specLink( - xrefName = self.conventions.special_use_section_anchor, - xrefText = '{' + use + '}', - isRefpage = isRefpage)), file=fp) - write('', file=fp) - - if self.conventions.write_contacts: - self.writeTag('Contact', None, isRefpage, fp) - - contacts = self.contact.split(',') - for contact in contacts: - contactWords = contact.strip().split() - name = ' '.join(contactWords[:-1]) - handle = contactWords[-1] - if handle.startswith('gitlab:'): - prettyHandle = 'icon:gitlab[alt=GitLab, role="red"]' + handle.replace('gitlab:@', '') - elif handle.startswith('@'): - issuePlaceholderText = '[' + self.name + '] ' + handle - issuePlaceholderText += '%0A<>' - trackerLink = 'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body=' + issuePlaceholderText + '++' - prettyHandle = trackerLink + '[icon:github[alt=GitHub,role="black"]' + handle[1:] + ', window=_blank]' - else: - prettyHandle = handle - - write(' * ' + name + ' ' + prettyHandle, file=fp) - write('', file=fp) - - # Check if a proposal document for this extension exists in the - # current repository, and link to the same document (parameterized - # by a URL prefix attribute) if it does. - # The assumption is that a proposal document for an extension - # VK_name will be located in 'proposals/VK_name.asciidoc' relative - # to the repository root, and that this script will be invoked from - # the repository root. - path = 'proposals/{}.asciidoc'.format(self.name) - if os.path.exists(path) and os.access(path, os.R_OK): - self.writeTag('Extension Proposal', - 'link:{{specRepositoryURL}}/{}[{}]'.format(path, self.name), isRefpage, fp) - - fp.close() - -class ExtensionMetaDocOutputGenerator(OutputGenerator): - """ExtensionMetaDocOutputGenerator - subclass of OutputGenerator. - - Generates AsciiDoc includes with metainformation for the API extension - appendices. The fields used from tags in the API XML are: - - - name extension name string - - number extension number (optional) - - contact name and GitHub login or email address (optional) - - type 'instance' | 'device' (optional) - - requires list of comma-separated required API extensions (optional) - - requiresCore required core version of API (optional) - - promotedTo extension or API version it was promoted to - - deprecatedBy extension or API version which deprecated this extension, - or empty string if deprecated without replacement - - obsoletedBy extension or API version which obsoleted this extension, - or empty string if obsoleted without replacement - - provisional 'true' if this extension is released provisionally""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.extensions = [] - # List of strings containing all vendor tags - self.vendor_tags = [] - self.file_suffix = '' - - def newFile(self, filename): - self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w', encoding='utf-8') - write(self.genOpts.conventions.warning_comment, file=fp) - return fp - - def beginFile(self, genOpts): - OutputGenerator.beginFile(self, genOpts) - - self.directory = self.genOpts.directory - self.file_suffix = self.genOpts.conventions.file_suffix - - # Iterate over all 'tag' Elements and add the names of all the valid vendor - # tags to the list - root = self.registry.tree.getroot() - for tag in root.findall('tags/tag'): - self.vendor_tags.append(tag.get('name')) - - # Create subdirectory, if needed - self.makeDir(self.directory) - - def conditionalExt(self, extName, content, ifdef = None, condition = None): - doc = '' - - innerdoc = 'ifdef::' + extName + '[]\n' - innerdoc += content + '\n' - innerdoc += 'endif::' + extName + '[]\n' - - if ifdef: - if ifdef == 'ifndef': - if condition: - doc += 'ifndef::' + condition + '[]\n' - doc += innerdoc - doc += 'endif::' + condition + '[]\n' - else: # no condition is as if condition is defined; "nothing" is always defined :p - pass # so no output - elif ifdef == 'ifdef': - if condition: - doc += 'ifdef::' + condition + '+' + extName + '[]\n' - doc += content + '\n' # does not include innerdoc; the ifdef was merged with the one above - doc += 'endif::' + condition + '+' + extName + '[]\n' - else: # no condition is as if condition is defined; "nothing" is always defined :p - doc += innerdoc - else: # should be unreachable - raise RuntimeError('Should be unreachable: ifdef is neither \'ifdef \' nor \'ifndef\'!') - else: - doc += innerdoc - - return doc - - def makeExtensionInclude(self, ext): - return self.conventions.extension_include_string(ext) - - def endFile(self): - self.extensions.sort() - - # Generate metadoc extension files, in refpage and non-refpage form - for ext in self.extensions: - ext.makeMetafile(self.extensions, isRefpage = False) - if self.conventions.write_refpage_include: - ext.makeMetafile(self.extensions, isRefpage = True) - - # Generate list of promoted extensions - promotedExtensions = {} - for ext in self.extensions: - if ext.deprecationType == 'promotion' and ext.supercedingAPIVersion: - promotedExtensions.setdefault(ext.supercedingAPIVersion, []).append(ext) - - for coreVersion, extensions in promotedExtensions.items(): - promoted_extensions_fp = self.newFile(self.directory + '/promoted_extensions_' + coreVersion + self.file_suffix) - - for ext in extensions: - indent = '' - write(' * {blank}\n+\n' + ext.conditionalLinkExt(ext.name, indent), file=promoted_extensions_fp) - - promoted_extensions_fp.close() - - # Re-sort to match earlier behavior - # TODO: Remove this extra sort when re-arranging section order OK. - - def makeSortKey(ext): - name = ext.name.lower() - prefixes = self.conventions.extension_index_prefixes - for i, prefix in enumerate(prefixes): - if ext.name.startswith(prefix): - return (i, name) - return (len(prefixes), name) - - self.extensions.sort(key=makeSortKey) - - # Generate include directives for the extensions appendix, grouping - # extensions by status (current, deprecated, provisional, etc.) - with self.newFile(self.directory + '/current_extensions_appendix' + self.file_suffix) as current_extensions_appendix_fp, \ - self.newFile(self.directory + '/deprecated_extensions_appendix' + self.file_suffix) as deprecated_extensions_appendix_fp, \ - self.newFile(self.directory + '/current_extension_appendices' + self.file_suffix) as current_extension_appendices_fp, \ - self.newFile(self.directory + '/current_extension_appendices_toc' + self.file_suffix) as current_extension_appendices_toc_fp, \ - self.newFile(self.directory + '/deprecated_extension_appendices' + self.file_suffix) as deprecated_extension_appendices_fp, \ - self.newFile(self.directory + '/deprecated_extension_appendices_toc' + self.file_suffix) as deprecated_extension_appendices_toc_fp, \ - self.newFile(self.directory + '/deprecated_extensions_guard_macro' + self.file_suffix) as deprecated_extensions_guard_macro_fp, \ - self.newFile(self.directory + '/provisional_extensions_appendix' + self.file_suffix) as provisional_extensions_appendix_fp, \ - self.newFile(self.directory + '/provisional_extension_appendices' + self.file_suffix) as provisional_extension_appendices_fp, \ - self.newFile(self.directory + '/provisional_extension_appendices_toc' + self.file_suffix) as provisional_extension_appendices_toc_fp, \ - self.newFile(self.directory + '/provisional_extensions_guard_macro' + self.file_suffix) as provisional_extensions_guard_macro_fp: - - write('', file=current_extensions_appendix_fp) - write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) - write('', file=current_extensions_appendix_fp) - write('ifndef::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) - write('[[extension-appendices-list]]', file=current_extensions_appendix_fp) - write('== List of Extensions', file=current_extensions_appendix_fp) - write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) - write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) - write('[[extension-appendices-list]]', file=current_extensions_appendix_fp) - write('== List of Current Extensions', file=current_extensions_appendix_fp) - write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) - write('', file=current_extensions_appendix_fp) - write('include::current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) - write('\n<<<\n', file=current_extensions_appendix_fp) - write('include::current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) - - write('', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) - write('', file=deprecated_extensions_appendix_fp) - write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) - write('[[deprecated-extension-appendices-list]]', file=deprecated_extensions_appendix_fp) - write('== List of Deprecated Extensions', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) - write('\n<<<\n', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) - write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) - - # add include guards to allow multiple includes - write('ifndef::DEPRECATED_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=deprecated_extensions_guard_macro_fp) - write(':DEPRECATED_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD:\n', file=deprecated_extensions_guard_macro_fp) - write('ifndef::PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=provisional_extensions_guard_macro_fp) - write(':PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD:\n', file=provisional_extensions_guard_macro_fp) - - write('', file=provisional_extensions_appendix_fp) - write('include::provisional_extensions_guard_macro' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) - write('', file=provisional_extensions_appendix_fp) - write('ifdef::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) - write('[[provisional-extension-appendices-list]]', file=provisional_extensions_appendix_fp) - write('== List of Provisional Extensions', file=provisional_extensions_appendix_fp) - write('include::provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) - write('\n<<<\n', file=provisional_extensions_appendix_fp) - write('include::provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) - write('endif::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) - - for ext in self.extensions: - include = self.makeExtensionInclude(ext) - link = ' * ' + self.conventions.formatExtension(ext.name) - if ext.provisional == 'true': - write(self.conditionalExt(ext.name, include), file=provisional_extension_appendices_fp) - write(self.conditionalExt(ext.name, link), file=provisional_extension_appendices_toc_fp) - write(self.conditionalExt(ext.name, ':HAS_PROVISIONAL_EXTENSIONS:'), file=provisional_extensions_guard_macro_fp) - elif ext.deprecationType is None: - write(self.conditionalExt(ext.name, include), file=current_extension_appendices_fp) - write(self.conditionalExt(ext.name, link), file=current_extension_appendices_toc_fp) - else: - condition = ext.supercedingAPIVersion if ext.supercedingAPIVersion else ext.supercedingExtension # potentially None too - - write(self.conditionalExt(ext.name, include, 'ifndef', condition), file=current_extension_appendices_fp) - write(self.conditionalExt(ext.name, link, 'ifndef', condition), file=current_extension_appendices_toc_fp) - - write(self.conditionalExt(ext.name, include, 'ifdef', condition), file=deprecated_extension_appendices_fp) - write(self.conditionalExt(ext.name, link, 'ifdef', condition), file=deprecated_extension_appendices_toc_fp) - - write(self.conditionalExt(ext.name, ':HAS_DEPRECATED_EXTENSIONS:', 'ifdef', condition), file=deprecated_extensions_guard_macro_fp) - - write('endif::DEPRECATED_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=deprecated_extensions_guard_macro_fp) - write('endif::PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD[]', file=provisional_extensions_guard_macro_fp) - - OutputGenerator.endFile(self) - - def beginFeature(self, interface, emit): - # Start processing in superclass - OutputGenerator.beginFeature(self, interface, emit) - - if interface.tag != 'extension': - self.logMsg('diag', 'beginFeature: ignoring non-extension feature', self.featureName) - return - - # These attributes must exist - name = self.featureName - number = self.getAttrib(interface, 'number') - ext_type = self.getAttrib(interface, 'type') - revision = self.getSpecVersion(interface, name) - - # These attributes are optional - OPTIONAL = False - requires = self.getAttrib(interface, 'requires', OPTIONAL) - requiresCore = self.getAttrib(interface, 'requiresCore', OPTIONAL, '1.0') # TODO update this line with update_version.py - contact = self.getAttrib(interface, 'contact', OPTIONAL) - promotedTo = self.getAttrib(interface, 'promotedto', OPTIONAL) - deprecatedBy = self.getAttrib(interface, 'deprecatedby', OPTIONAL) - obsoletedBy = self.getAttrib(interface, 'obsoletedby', OPTIONAL) - provisional = self.getAttrib(interface, 'provisional', OPTIONAL, 'false') - specialuse = self.getAttrib(interface, 'specialuse', OPTIONAL) - - filename = self.directory + '/' + name + self.file_suffix - - extdata = Extension( - generator = self, - filename = filename, - name = name, - number = number, - ext_type = ext_type, - requires = requires, - requiresCore = requiresCore, - contact = contact, - promotedTo = promotedTo, - deprecatedBy = deprecatedBy, - obsoletedBy = obsoletedBy, - provisional = provisional, - revision = revision, - specialuse = specialuse) - self.extensions.append(extdata) - - - def endFeature(self): - # Finish processing in superclass - OutputGenerator.endFeature(self) - - def getAttrib(self, elem, attribute, required=True, default=None): - """Query an attribute from an element, or return a default value - - - elem - element to query - - attribute - attribute name - - required - whether attribute must exist - - default - default value if attribute not present""" - attrib = elem.get(attribute, default) - if required and (attrib is None): - name = elem.get('name', 'UNKNOWN') - self.logMsg('error', 'While processing \'' + self.featureName + ', <' + elem.tag + '> \'' + name + '\' does not contain required attribute \'' + attribute + '\'') - return attrib - - def numbersToWords(self, name): - allowlist = ['WIN32', 'INT16', 'D3D1'] - - # temporarily replace allowlist items - for i, w in enumerate(allowlist): - name = re.sub(w, '{' + str(i) + '}', name) - - name = re.sub(r'(?<=[A-Z])(\d+)(?![A-Z])', r'_\g<1>', name) - - # undo allowlist substitution - for i, w in enumerate(allowlist): - name = re.sub('\\{' + str(i) + '}', w, name) - - return name - - def getSpecVersion(self, elem, extname, default=None): - """Determine the extension revision from the EXTENSION_NAME_SPEC_VERSION - enumerant. - - - elem - element to query - - extname - extension name from the 'name' attribute - - default - default value if SPEC_VERSION token not present""" - # The literal enumerant name to match - versioningEnumName = self.numbersToWords(extname.upper()) + '_SPEC_VERSION' - - for enum in elem.findall('./require/enum'): - enumName = self.getAttrib(enum, 'name') - if enumName == versioningEnumName: - return self.getAttrib(enum, 'value') - - #if not found: - for enum in elem.findall('./require/enum'): - enumName = self.getAttrib(enum, 'name') - if enumName.find('SPEC_VERSION') != -1: - self.logMsg('diag', 'Missing ' + versioningEnumName + '! Potential misnamed candidate ' + enumName + '.') - return self.getAttrib(enum, 'value') - - self.logMsg('error', 'Missing ' + versioningEnumName + '!') - return default diff --git a/xml/genRef.py b/xml/genRef.py deleted file mode 100644 index 87d8d7a60..000000000 --- a/xml/genRef.py +++ /dev/null @@ -1,1019 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2016-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -# genRef.py - create API ref pages from spec source files -# -# Usage: genRef.py files - -import argparse -import io -import os -import re -import sys -from collections import OrderedDict -from reflib import (findRefs, fixupRefs, loadFile, logDiag, logWarn, - printPageInfo, setLogFile) -from reg import Registry -from clconventions import OpenCLConventions as APIConventions - - -def makeExtensionInclude(name): - """Return an include command, given an extension name.""" - return 'include::{}/refpage.{}{}[]'.format( - conventions.specification_path, - name, - conventions.file_suffix) - - -def makeAPIInclude(type, name): - """Return an include command for a generated API interface - - type - type of the API, e.g. 'flags', 'handles', etc - - name - name of the API""" - - return 'include::{}/api/{}/{}{}\n'.format( - conventions.refpage_generated_include_path, - type, name, conventions.file_suffix) - - -def isextension(name): - """Return True if name is an API extension name (ends with an upper-case - author ID). - - This assumes that author IDs are at least two characters.""" - return name[-2:].isalpha() and name[-2:].isupper() - - -def printCopyrightSourceComments(fp): - """Print Khronos CC-BY copyright notice on open file fp. - - Writes an asciidoc comment block, which copyrights the source - file.""" - print('// Copyright 2014-2023 The Khronos Group, Inc.', file=fp) - print('//', file=fp) - # This works around constraints of the 'reuse' tool - print('// SPDX' + '-License-Identifier: CC-BY-4.0', file=fp) - print('', file=fp) - - -def printFooter(fp): - """Print footer material at the end of each refpage on open file fp. - - If generating separate refpages, adds the copyright. - If generating the single combined refpage, just add a separator.""" - - print('ifdef::doctype-manpage[]', - '== Copyright', - '', - 'include::{config}/copyright-ccby.txt[]', - 'endif::doctype-manpage[]', - '', - 'ifndef::doctype-manpage[]', - '<<<', - 'endif::doctype-manpage[]', - '', - sep='\n', file=fp) - - -def macroPrefix(name): - """Add a spec asciidoc macro prefix to an API name, depending on its type - (protos, structs, enums, etc.). - - If the name is not recognized, use the generic link macro 'reflink:'.""" - if name in api.basetypes: - return 'basetype:' + name - if name in api.defines: - return 'dlink:' + name - if name in api.enums: - return 'elink:' + name - if name in api.flags: - return 'elink:' + name - if name in api.funcpointers: - return 'tlink:' + name - if name in api.handles: - return 'slink:' + name - if name in api.protos: - return 'flink:' + name - if name in api.structs: - return 'slink:' + name - if name == 'TBD': - return 'No cross-references are available' - return 'reflink:' + name - - -def seeAlsoList(apiName, explicitRefs=None, apiAliases=[]): - """Return an asciidoc string with a list of 'See Also' references for the - API entity 'apiName', based on the relationship mapping in the api module. - - 'explicitRefs' is a list of additional cross-references. - - If apiAliases is not None, it is a list of aliases of apiName whose - cross-references will also be included. - - If no relationships are available, return None.""" - - refs = set(()) - - # apiName and its aliases are treated equally - allApis = apiAliases.copy() - allApis.append(apiName) - - # Add all the implicit references to refs - for name in allApis: - if name in api.mapDict: - refs.update(api.mapDict[name]) - - # Add all the explicit references - if explicitRefs is not None: - if isinstance(explicitRefs, str): - explicitRefs = explicitRefs.split() - refs.update(name for name in explicitRefs) - - # Add extensions / core versions based on dependencies - for name in allApis: - if name in api.requiredBy: - for (base,dependency) in api.requiredBy[name]: - refs.add(base) - if dependency is not None: - refs.add(dependency) - - if len(refs) == 0: - return None - else: - return ', '.join(macroPrefix(name) for name in sorted(refs)) + '\n' - - -def remapIncludes(lines, baseDir, specDir): - """Remap include directives in a list of lines so they can be extracted to a - different directory. - - Returns remapped lines. - - - lines - text to remap - - baseDir - target directory - - specDir - source directory""" - # This should be compiled only once - includePat = re.compile(r'^include::(?P.*)\[\]') - - newLines = [] - for line in lines: - matches = includePat.search(line) - if matches is not None: - path = matches.group('path') - - if path[0] != '{': - # Relative path to include file from here - incPath = specDir + '/' + path - # Remap to be relative to baseDir - newPath = os.path.relpath(incPath, baseDir) - newLine = 'include::' + newPath + '[]\n' - logDiag('remapIncludes: remapping', line, '->', newLine) - newLines.append(newLine) - else: - # An asciidoctor variable starts the path. - # This must be an absolute path, not needing to be rewritten. - newLines.append(line) - else: - newLines.append(line) - return newLines - - -def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tail_content=None, man_section=3): - """Generate body of a reference page. - - - pageName - string name of the page - - pageDesc - string short description of the page - - fp - file to write to - - head_content - text to include before the sections - - sections - iterable returning (title,body) for each section. - - tail_content - text to include after the sections - - man_section - Unix man page section""" - - printCopyrightSourceComments(fp) - - print(':data-uri:', - ':icons: font', - conventions.extra_refpage_headers, - '', - sep='\n', file=fp) - - s = '{}({})'.format(pageName, man_section) - print('= ' + s, - '', - sep='\n', file=fp) - if pageDesc.strip() == '': - pageDesc = 'NO SHORT DESCRIPTION PROVIDED' - logWarn('refPageHead: no short description provided for', pageName) - - print('== Name', - '{} - {}'.format(pageName, pageDesc), - '', - sep='\n', file=fp) - - if head_content is not None: - print(head_content, - '', - sep='\n', file=fp) - - if sections is not None: - for title, content in sections.items(): - print('== {}'.format(title), - '', - content, - '', - sep='\n', file=fp) - - if tail_content is not None: - print(tail_content, - '', - sep='\n', file=fp) - - -def refPageHead(pageName, pageDesc, specText, fieldName, fieldText, descText, fp): - """Generate header of a reference page. - - - pageName - string name of the page - - pageDesc - string short description of the page - - specType - string containing 'spec' field from refpage open block, or None. - Used to determine containing spec name and URL. - - specText - string that goes in the "C Specification" section - - fieldName - string heading an additional section following specText, if not None - - fieldText - string that goes in the additional section - - descText - string that goes in the "Description" section - - fp - file to write to""" - sections = OrderedDict() - - if specText is not None: - sections['C Specification'] = specText - - if fieldName is not None: - sections[fieldName] = fieldText - - if descText is None or descText.strip() == '': - logWarn('refPageHead: no description provided for', pageName) - - if descText is not None: - sections['Description'] = descText - - refPageShell(pageName, pageDesc, fp, head_content=None, sections=sections) - - -def refPageTail(pageName, - specType=None, - specAnchor=None, - seeAlso=None, - fp=None, - auto=False): - """Generate end boilerplate of a reference page. - - - pageName - name of the page - - specType - None or the 'spec' attribute from the refpage block, - identifying the specification name and URL this refpage links to. - - specAnchor - None or the 'anchor' attribute from the refpage block, - identifying the anchor in the specification this refpage links to. If - None, the pageName is assumed to be a valid anchor.""" - - specName = conventions.api_name(specType) - specURL = conventions.specURL(specType) - if specAnchor is None: - specAnchor = pageName - - if seeAlso is None: - seeAlso = 'No cross-references are available\n' - - notes = [ - 'For more information, see the {}#{}[{} Specification^]'.format( - specURL, specAnchor, specName), - '', - ] - - if auto: - notes.extend(( - 'This page is a generated document.', - 'Fixes and changes should be made to the generator scripts, ' - 'not directly.', - )) - else: - notes.extend(( - 'This page is extracted from the ' + specName + ' Specification. ', - 'Fixes and changes should be made to the Specification, ' - 'not directly.', - )) - - print('== See Also', - '', - seeAlso, - '', - sep='\n', file=fp) - - print('== Document Notes', - '', - '\n'.join(notes), - '', - sep='\n', file=fp) - - printFooter(fp) - - -def xrefRewriteInitialize(): - """Initialize substitution patterns for asciidoctor xrefs.""" - - global refLinkPattern, refLinkSubstitute - global refLinkTextPattern, refLinkTextSubstitute - global specLinkPattern, specLinkSubstitute - - # These are xrefs to Vulkan API entities, rewritten to link to refpages - # The refLink variants are for xrefs with only an anchor and no text. - # The refLinkText variants are for xrefs with both anchor and text - refLinkPattern = re.compile(r'<<([Vv][Kk][^>,]+)>>') - refLinkSubstitute = r'link:\1.html[\1^]' - - refLinkTextPattern = re.compile(r'<<([Vv][Kk][^>,]+)[,]?[ \t\n]*([^>,]*)>>') - refLinkTextSubstitute = r'link:\1.html[\2^]' - - # These are xrefs to other anchors, rewritten to link to the spec - specLinkPattern = re.compile(r'<<([^>,]+)[,]?[ \t\n]*([^>,]*)>>') - - # Unfortunately, specLinkSubstitute depends on the link target, - # so can't be constructed in advance. - specLinkSubstitute = None - - -def xrefRewrite(text, specURL): - """Rewrite asciidoctor xrefs in text to resolve properly in refpages. - Xrefs which are to Vulkan refpages are rewritten to link to those - refpages. The remainder are rewritten to generate external links into - the supplied specification document URL. - - - text - string to rewrite, or None - - specURL - URL to target - - Returns rewritten text, or None, respectively""" - - global refLinkPattern, refLinkSubstitute - global refLinkTextPattern, refLinkTextSubstitute - global specLinkPattern, specLinkSubstitute - - specLinkSubstitute = r'link:{}#\1[\2^]'.format(specURL) - - if text is not None: - text, _ = refLinkPattern.subn(refLinkSubstitute, text) - text, _ = refLinkTextPattern.subn(refLinkTextSubstitute, text) - text, _ = specLinkPattern.subn(specLinkSubstitute, text) - - return text - -def emitPage(baseDir, specDir, pi, file): - """Extract a single reference page into baseDir. - - - baseDir - base directory to emit page into - - specDir - directory extracted page source came from - - pi - pageInfo for this page relative to file - - file - list of strings making up the file, indexed by pi""" - pageName = baseDir + '/' + pi.name + '.txt' - - # Add a dictionary entry for this page - global genDict - genDict[pi.name] = None - logDiag('emitPage:', pageName) - - # Short description - if pi.desc is None: - pi.desc = '(no short description available)' - - # Member/parameter section label and text, if there is one - field = None - fieldText = None - - if pi.type != 'freeform' and pi.type != 'spirv': - if pi.include is None: - # Not sure how this happens yet - logWarn('emitPage:', pageName, 'INCLUDE is None, no page generated') - return - - # Specification text from beginning to just before the parameter - # section. This covers the description, the prototype, the version - # note, and any additional version note text. If a parameter section - # is absent then go a line beyond the include. - remap_end = pi.include + 1 if pi.param is None else pi.param - lines = remapIncludes(file[pi.begin:remap_end], baseDir, specDir) - specText = ''.join(lines) - - if pi.param is not None: - if pi.type == 'structs': - field = 'Members' - elif pi.type in ['protos', 'funcpointers']: - field = 'Parameters' - else: - logWarn('emitPage: unknown field type:', pi.type, - 'for', pi.name) - lines = remapIncludes(file[pi.param:pi.body], baseDir, specDir) - fieldText = ''.join(lines) - - # Description text - if pi.body != pi.include: - lines = remapIncludes(file[pi.body:pi.end + 1], baseDir, specDir) - descText = ''.join(lines) - else: - descText = None - logWarn('emitPage: INCLUDE == BODY, so description will be empty for', pi.name) - if pi.begin != pi.include: - logWarn('emitPage: Note: BEGIN != INCLUDE, so the description might be incorrectly located before the API include!') - else: - specText = None - descText = ''.join(file[pi.begin:pi.end + 1]) - - # Rewrite asciidoctor xrefs to resolve properly in refpages - specURL = conventions.specURL(pi.spec) - - specText = xrefRewrite(specText, specURL) - fieldText = xrefRewrite(fieldText, specURL) - descText = xrefRewrite(descText, specURL) - - fp = open(pageName, 'w', encoding='utf-8') - refPageHead(pi.name, - pi.desc, - specText, - field, fieldText, - descText, - fp) - refPageTail(pageName=pi.name, - specType=pi.spec, - specAnchor=pi.anchor, - seeAlso=seeAlsoList(pi.name, pi.refs, pi.alias.split()), - fp=fp, - auto=False) - fp.close() - - -def autoGenEnumsPage(baseDir, pi, file): - """Autogenerate a single reference page in baseDir. - - Script only knows how to do this for /enums/ pages, at present. - - - baseDir - base directory to emit page into - - pi - pageInfo for this page relative to file - - file - list of strings making up the file, indexed by pi""" - pageName = baseDir + '/' + pi.name + '.txt' - fp = open(pageName, 'w', encoding='utf-8') - - # Add a dictionary entry for this page - global genDict - genDict[pi.name] = None - logDiag('autoGenEnumsPage:', pageName) - - # Short description - if pi.desc is None: - pi.desc = '(no short description available)' - - # Description text. Allow for the case where an enum definition - # is not embedded. - if not pi.embed: - embedRef = '' - else: - embedRef = ''.join(( - ' * The reference page for ', - macroPrefix(pi.embed), - ', where this interface is defined.\n')) - - txt = ''.join(( - 'For more information, see:\n\n', - embedRef, - ' * The See Also section for other reference pages using this type.\n', - ' * The ' + apiName + ' Specification.\n')) - - refPageHead(pi.name, - pi.desc, - ''.join(file[pi.begin:pi.include + 1]), - None, None, - txt, - fp) - refPageTail(pageName=pi.name, - specType=pi.spec, - specAnchor=pi.anchor, - seeAlso=seeAlsoList(pi.name, pi.refs, pi.alias.split()), - fp=fp, - auto=True) - fp.close() - - -# Pattern to break apart an API *Flags{authorID} name, used in -# autoGenFlagsPage. -flagNamePat = re.compile(r'(?P\w+)Flags(?P[A-Z]*)') - - -def autoGenFlagsPage(baseDir, flagName): - """Autogenerate a single reference page in baseDir for an API *Flags type. - - - baseDir - base directory to emit page into - - flagName - API *Flags name""" - pageName = baseDir + '/' + flagName + '.txt' - fp = open(pageName, 'w', encoding='utf-8') - - # Add a dictionary entry for this page - global genDict - genDict[flagName] = None - logDiag('autoGenFlagsPage:', pageName) - - # Short description - matches = flagNamePat.search(flagName) - if matches is not None: - name = matches.group('name') - author = matches.group('author') - logDiag('autoGenFlagsPage: split name into', name, 'Flags', author) - flagBits = name + 'FlagBits' + author - desc = 'Bitmask of ' + flagBits - else: - logWarn('autoGenFlagsPage:', pageName, 'does not end in "Flags{author ID}". Cannot infer FlagBits type.') - flagBits = None - desc = 'Unknown ' + apiName + ' flags type' - - # Description text - if flagBits is not None: - txt = ''.join(( - 'etext:' + flagName, - ' is a mask of zero or more elink:' + flagBits + '.\n', - 'It is used as a member and/or parameter of the structures and commands\n', - 'in the See Also section below.\n')) - else: - txt = ''.join(( - 'etext:' + flagName, - ' is an unknown ' + apiName + ' type, assumed to be a bitmask.\n')) - - refPageHead(flagName, - desc, - makeAPIInclude('flags', flagName), - None, None, - txt, - fp) - refPageTail(pageName=flagName, - specType=pi.spec, - specAnchor=pi.anchor, - seeAlso=seeAlsoList(flagName, None), - fp=fp, - auto=True) - fp.close() - - -def autoGenHandlePage(baseDir, handleName): - """Autogenerate a single handle page in baseDir for an API handle type. - - - baseDir - base directory to emit page into - - handleName - API handle name""" - # @@ Need to determine creation function & add handles/ include for the - # @@ interface in generator.py. - pageName = baseDir + '/' + handleName + '.txt' - fp = open(pageName, 'w', encoding='utf-8') - - # Add a dictionary entry for this page - global genDict - genDict[handleName] = None - logDiag('autoGenHandlePage:', pageName) - - # Short description - desc = apiName + ' object handle' - - descText = ''.join(( - 'sname:' + handleName, - ' is an object handle type, referring to an object used\n', - 'by the ' + apiName + ' implementation. These handles are created or allocated\n', - 'by the @@ TBD @@ function, and used by other ' + apiName + ' structures\n', - 'and commands in the See Also section below.\n')) - - refPageHead(handleName, - desc, - makeAPIInclude('handles', handleName), - None, None, - descText, - fp) - refPageTail(pageName=handleName, - specType=pi.spec, - specAnchor=pi.anchor, - seeAlso=seeAlsoList(handleName, None), - fp=fp, - auto=True) - fp.close() - - -def genRef(specFile, baseDir): - """Extract reference pages from a spec asciidoc source file. - - - specFile - filename to extract from - - baseDir - output directory to generate page in""" - file = loadFile(specFile) - if file is None: - return - - # Save the path to this file for later use in rewriting relative includes - specDir = os.path.dirname(os.path.abspath(specFile)) - - pageMap = findRefs(file, specFile) - logDiag(specFile + ': found', len(pageMap.keys()), 'potential pages') - - sys.stderr.flush() - - # Fix up references in pageMap - fixupRefs(pageMap, specFile, file) - - # Create each page, if possible - pages = {} - - for name in sorted(pageMap): - pi = pageMap[name] - - printPageInfo(pi, file) - - if pi.Warning: - logDiag('genRef:', pi.name + ':', pi.Warning) - - if pi.extractPage: - emitPage(baseDir, specDir, pi, file) - elif pi.type == 'enums': - autoGenEnumsPage(baseDir, pi, file) - elif pi.type == 'flags': - autoGenFlagsPage(baseDir, pi.name) - else: - # Don't extract this page - logWarn('genRef: Cannot extract or autogenerate:', pi.name) - - pages[pi.name] = pi - for alias in pi.alias.split(): - pages[alias] = pi - - return pages - - -def genSinglePageRef(baseDir): - """Generate baseDir/apispec.txt, the single-page version of the ref pages. - - This assumes there's a page for everything in the api module dictionaries. - Extensions (KHR, EXT, etc.) are currently skipped""" - # Accumulate head of page - head = io.StringIO() - - printCopyrightSourceComments(head) - - print('= ' + apiName + ' API Reference Pages', - ':data-uri:', - ':icons: font', - ':doctype: book', - ':numbered!:', - ':max-width: 200', - ':data-uri:', - ':toc2:', - ':toclevels: 2', - '', - sep='\n', file=head) - - print('== Copyright', file=head) - print('', file=head) - print('include::{config}/copyright-ccby.txt[]', file=head) - print('', file=head) - # Inject the table of contents. Asciidoc really ought to be generating - # this for us. - - sections = [ - [api.protos, 'protos', apiName + ' Commands'], - [api.handles, 'handles', 'Object Handles'], - [api.structs, 'structs', 'Structures'], - [api.enums, 'enums', 'Enumerations'], - [api.flags, 'flags', 'Flags'], - [api.funcpointers, 'funcpointers', 'Function Pointer Types'], - [api.basetypes, 'basetypes', apiName + ' Scalar types'], - [api.defines, 'defines', 'C Macro Definitions'], - [extensions, 'extensions', apiName + ' Extensions'] - ] - - # Accumulate body of page - body = io.StringIO() - - for (apiDict, label, title) in sections: - # Add section title/anchor header to body - anchor = '[[' + label + ',' + title + ']]' - print(anchor, - '== ' + title, - '', - ':leveloffset: 2', - '', - sep='\n', file=body) - - if label == 'extensions': - # preserve order of extensions since we already sorted the way we want. - keys = apiDict.keys() - else: - keys = sorted(apiDict.keys()) - - for refPage in keys: - # Don't generate links for aliases, which are included with the - # aliased page - if refPage not in api.alias: - # Add page to body - if 'FlagBits' in refPage and conventions.unified_flag_refpages: - # OpenXR does not create separate ref pages for FlagBits: - # the FlagBits includes go in the Flags refpage. - # Previously the Vulkan script would only emit non-empty - # Vk*Flags pages, via the logic - # if refPage not in api.flags or api.flags[refPage] is not None - # emit page - # Now, all are emitted. - continue - else: - print('include::' + refPage + '.txt[]', file=body) - else: - # Alternatively, we could (probably should) link to the - # aliased refpage - logWarn('(Benign) Not including', refPage, - 'in single-page reference', - 'because it is an alias of', api.alias[refPage]) - - print('\n' + ':leveloffset: 0' + '\n', file=body) - - # Write head and body to the output file - pageName = baseDir + '/apispec.txt' - fp = open(pageName, 'w', encoding='utf-8') - - print(head.getvalue(), file=fp, end='') - print(body.getvalue(), file=fp, end='') - - head.close() - body.close() - fp.close() - - -def genExtension(baseDir, extpath, name, info): - """Generate refpage, and add dictionary entry for an extension - - - baseDir - output directory to generate page in - - extpath - None, or path to per-extension specification sources if - those are to be included in extension refpages - - name - extension name - - info - Element from XML""" - - # Add a dictionary entry for this page - global genDict - genDict[name] = None - declares = [] - elem = info.elem - - # Type of extension (instance, device, etc.) - ext_type = elem.get('type') - - # Autogenerate interfaces from entry - for required in elem.find('require'): - req_name = required.get('name') - if not req_name: - # This isn't what we're looking for - continue - if req_name.endswith('_SPEC_VERSION') or req_name.endswith('_EXTENSION_NAME'): - # Don't link to spec version or extension name - those ref pages aren't created. - continue - - if required.get('extends'): - # These are either extensions of enumerated types, or const enum - # values: neither of which get a ref page - although we could - # include the enumerated types in the See Also list. - continue - - if req_name not in genDict: - logWarn('ERROR: {} (in extension {}) does not have a ref page.'.format(req_name, name)) - - declares.append(req_name) - - # import pdb - # pdb.set_trace() - - appbody = None - if extpath is not None: - appfp = open('{}/{}.txt'.format(extpath, name), 'r', encoding='utf-8') - if appfp is not None: - appbody = appfp.read() - - # Transform internal links to crosslinks - specURL = conventions.specURL() - appbody = xrefRewrite(appbody, specURL) - else: - logWarn('Cannot find extension appendix for', name) - - # Fall through to autogenerated page - extpath = None - appbody = None - appfp.close() - - # Include the extension appendix without an extra title - # head_content = 'include::{{appendices}}/{}.txt[]'.format(name) - - # Write the extension refpage - pageName = baseDir + '/' + name + '.txt' - logDiag('genExtension:', pageName) - fp = open(pageName, 'w', encoding='utf-8') - - # There are no generated titled sections - sections = None - - # 'See link:{html_spec_relative}#%s[ %s] in the main specification for complete information.' % ( - # name, name) - refPageShell(name, - "{} extension".format(ext_type), - fp, - appbody, - sections=sections) - refPageTail(pageName=name, - specType=None, - specAnchor=name, - seeAlso=seeAlsoList(name, declares), - fp=fp, - auto=True) - fp.close() - - -if __name__ == '__main__': - global genDict, extensions, conventions, apiName - genDict = {} - extensions = OrderedDict() - conventions = APIConventions() - apiName = conventions.api_name('api') - - parser = argparse.ArgumentParser() - - parser.add_argument('-diag', action='store', dest='diagFile', - help='Set the diagnostic file') - parser.add_argument('-warn', action='store', dest='warnFile', - help='Set the warning file') - parser.add_argument('-log', action='store', dest='logFile', - help='Set the log file for both diagnostics and warnings') - parser.add_argument('-genpath', action='store', - default='gen', - help='Path to directory containing generated files') - parser.add_argument('-basedir', action='store', dest='baseDir', - default=None, - help='Set the base directory in which pages are generated') - parser.add_argument('-noauto', action='store_true', - help='Don\'t generate inferred ref pages automatically') - parser.add_argument('files', metavar='filename', nargs='*', - help='a filename to extract ref pages from') - parser.add_argument('--version', action='version', version='%(prog)s 1.0') - parser.add_argument('-extension', action='append', - default=[], - help='Specify an extension or extensions to add to targets') - parser.add_argument('-rewrite', action='store', - default=None, - help='Name of output file to write Apache mod_rewrite directives to') - parser.add_argument('-toc', action='store', - default=None, - help='Name of output file to write an alphabetical TOC to') - parser.add_argument('-registry', action='store', - default=conventions.registry_path, - help='Use specified registry file instead of default') - parser.add_argument('-extpath', action='store', - default=None, - help='Use extension descriptions from this directory instead of autogenerating extension refpages') - - results = parser.parse_args() - - # Look for api.py in the specified directory - if results.genpath is not None: - sys.path.insert(0, results.genpath) - import api - - setLogFile(True, True, results.logFile) - setLogFile(True, False, results.diagFile) - setLogFile(False, True, results.warnFile) - - # Initialize static rewrite patterns for spec xrefs - xrefRewriteInitialize() - - if results.baseDir is None: - baseDir = results.genpath + '/ref' - else: - baseDir = results.baseDir - - # Dictionary of pages & aliases - pages = {} - - for file in results.files: - d = genRef(file, baseDir) - pages.update(d) - - # Now figure out which pages *weren't* generated from the spec. - # This relies on the dictionaries of API constructs in the api module. - - if not results.noauto: - registry = Registry() - registry.loadFile(results.registry) - - if conventions.write_refpage_include: - # Only extensions with a supported="..." attribute in this set - # will be considered for extraction/generation. - supported_strings = set((conventions.xml_api_name,)) - ext_names = set(k for k, v in registry.extdict.items() - if v.supported in supported_strings) - - desired_extensions = ext_names.intersection(set(results.extension)) - for prefix in conventions.extension_index_prefixes: - # Splits up into chunks, sorted within each chunk. - filtered_extensions = sorted( - [name for name in desired_extensions - if name.startswith(prefix) and name not in extensions]) - for name in filtered_extensions: - # logWarn('NOT autogenerating extension refpage for', name) - extensions[name] = None - genExtension(baseDir, results.extpath, name, registry.extdict[name]) - - # autoGenFlagsPage is no longer needed because they are added to - # the spec sources now. - # for page in api.flags: - # if page not in genDict: - # autoGenFlagsPage(baseDir, page) - - # autoGenHandlePage is no longer needed because they are added to - # the spec sources now. - # for page in api.structs: - # if typeCategory[page] == 'handle': - # autoGenHandlePage(baseDir, page) - - sections = [ - (api.flags, 'Flag Types'), - (api.enums, 'Enumerated Types'), - (api.structs, 'Structures'), - (api.protos, 'Prototypes'), - (api.funcpointers, 'Function Pointers'), - (api.basetypes, apiName + ' Scalar Types'), - (extensions, apiName + ' Extensions'), - ] - - # Summarize pages that weren't generated, for good or bad reasons - - for (apiDict, title) in sections: - # OpenXR was keeping a 'flagged' state which only printed out a - # warning for the first non-generated page, but was otherwise - # unused. This doesn't seem helpful. - for page in apiDict: - if page not in genDict: - # Page was not generated - why not? - if page in api.alias: - logWarn('(Benign, is an alias) Ref page for', title, page, 'is aliased into', api.alias[page]) - elif page in api.flags and api.flags[page] is None: - logWarn('(Benign, no FlagBits defined) No ref page generated for ', title, - page) - else: - # Could introduce additional logic to detect - # external types and not emit them. - logWarn('No ref page generated for ', title, page) - - genSinglePageRef(baseDir) - - if results.rewrite: - # Generate Apache rewrite directives for refpage aliases - fp = open(results.rewrite, 'w', encoding='utf-8') - - for page in sorted(pages): - p = pages[page] - rewrite = p.name - - if page != rewrite: - print('RewriteRule ^', page, '.html$ ', rewrite, '.html', - sep='', file=fp) - fp.close() - - if results.toc: - # Generate dynamic portion of refpage TOC - fp = open(results.toc, 'w', encoding='utf-8') - - # Run through dictionary of pages generating an TOC - print(12 * ' ', '
  • Alphabetic Contents', sep='', file=fp) - print(16 * ' ', '
      ', sep='', file=fp) - lastLetter = None - - for page in sorted(pages, key=str.upper): - p = pages[page] - letter = page[0:1].upper() - - if letter != lastLetter: - if lastLetter: - # End previous block - print(24 * ' ', '
    ', sep='', file=fp) - print(20 * ' ', '
  • ', sep='', file=fp) - # Start new block - print(20 * ' ', '
  • ', letter, sep='', file=fp) - print(24 * ' ', '
      ', sep='', file=fp) - lastLetter = letter - - # Add this page to the list - print(28 * ' ', '
    • ', page, '
    • ', - sep='', file=fp) - - if lastLetter: - # Close the final letter block - print(24 * ' ', '
    ', sep='', file=fp) - print(20 * ' ', '
  • ', sep='', file=fp) - - # Close the list - print(16 * ' ', '', sep='', file=fp) - print(12 * ' ', '', sep='', file=fp) - - # print('name {} -> page {}'.format(page, pages[page].name)) - - fp.close() diff --git a/xml/gen_dictionaries.py b/xml/gen_dictionaries.py deleted file mode 100644 index 069800dbb..000000000 --- a/xml/gen_dictionaries.py +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/python3 - -# Copyright 2019-2023 The Khronos Group Inc. -# SPDX-License-Identifier: Apache-2.0 - -from collections import OrderedDict - -import argparse -import sys -import urllib -import xml.etree.ElementTree as etree -import urllib.request - -def parse_xml(path): - file = urllib.request.urlopen(path) if path.startswith("http") else open(path, 'r') - with file: - tree = etree.parse(file) - return tree - -# File Header: -def GetHeader(): - return """// Copyright 2017-2023 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -""" - -# File Footer: -def GetFooter(): - return """ -""" - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument('-registry', action='store', - default='cl.xml', - help='Use specified registry file instead of cl.xml') - parser.add_argument('-o', action='store', dest='directory', - default='.', - help='Create target and related files in specified directory') - - args = parser.parse_args() - - linkFileName = args.directory + '/api-dictionary.asciidoc' - nolinkFileName = args.directory + '/api-dictionary-no-links.asciidoc' - typeFileName = args.directory + '/api-types.txt' - - specpath = args.registry - #specpath = "https://raw.githubusercontent.com/KhronosGroup/OpenCL-Registry/main/xml/cl.xml" - - print('Generating dictionaries from: ' + specpath) - - spec = parse_xml(specpath) - - linkFile = open(linkFileName, 'w') - nolinkFile = open(nolinkFileName, 'w') - linkFile.write( GetHeader() ) - nolinkFile.write( GetHeader() ) - typeFile = open(typeFileName, 'w') - - # Generate the API functions dictionaries: - - numberOfFuncs = 0 - - # Add core API functions with and without links: - for feature in spec.findall('feature/require'): - for api in feature.findall('command'): - name = api.get('name') - #print('found api: ' + name) - - # Example with link: - # - # // clEnqueueNDRangeKernel - # :clEnqueueNDRangeKernel_label: pass:q[*clEnqueueNDRangeKernel*] - # :clEnqueueNDRangeKernel: <> - linkFile.write('// ' + name + '\n') - linkFile.write(':' + name + '_label: pass:q[*' + name + '*]\n') - linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') - linkFile.write('\n') - - # Example without link: - # - # // clEnqueueNDRangeKernel - # :clEnqueueNDRangeKernel: pass:q[*clEnqueueNDRangeKernel*] - nolinkFile.write('// ' + name + '\n') - nolinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') - nolinkFile.write('\n') - - numberOfFuncs = numberOfFuncs + 1 - - # Add extension API functions without links: - for extension in spec.findall('extensions/extension/require'): - for api in extension.findall('command'): - name = api.get('name') - #print('found extension api: ' +name) - - # Example without link: - # - # // clGetGLObjectInfo - # :clGetGLObjectInfo: pass:q[*clGetGLObjectInfo*] - linkFile.write('// ' + name + '\n') - linkFile.write(':' + name + ': pass:q[*' + name + '*]\n') - linkFile.write('\n') - - nolinkFile.write('// ' + name + '\n') - nolinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') - nolinkFile.write('\n') - - numberOfFuncs = numberOfFuncs + 1 - - print('Found ' + str(numberOfFuncs) + ' API functions.') - - # Generate the API enums dictionaries: - - numberOfEnums = 0 - - for enums in spec.findall('enums'): - name = enums.get('name') - for enum in enums.findall('enum'): - name = enum.get('name') - #print('found enum: ' + name) - - # Create a variant of the name that precedes underscores with - # "zero width" spaces. This causes some long names to be - # broken at more intuitive places. - htmlName = name[:3] + name[3:].replace("_", "_") - otherName = name[:3] + name[3:].replace("_", "_​") - - # Example with link: - # - # // CL_MEM_READ_ONLY - #:CL_MEM_READ_ONLY_label: pass:q[`CL_MEM_READ_ONLY`] - #:CL_MEM_READ_ONLY: <> - #:CL_MEM_READ_ONLY_anchor: [[CL_MEM_READ_ONLY]]{CL_MEM_READ_ONLY} - linkFile.write('// ' + name + '\n') - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') - linkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') - linkFile.write('\n') - - # Example without link: - # - # // CL_MEM_READ_ONLY - #:CL_MEM_READ_ONLY: pass:q[`CL_MEM_READ_ONLY`] - #:CL_MEM_READ_ONLY_anchor: {CL_MEM_READ_ONLY} - nolinkFile.write('// ' + name + '\n') - nolinkFile.write('ifdef::backend-html5[]\n') - nolinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('ifndef::backend-html5[]\n') - nolinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write(':' + name + '_anchor: {' + name + '}\n') - nolinkFile.write('\n') - - numberOfEnums = numberOfEnums + 1 - - print('Found ' + str(numberOfEnums) + ' API enumerations.') - - # Generate the API types dictionaries: - - numberOfTypes = 0 - - for types in spec.findall('types'): - for type in types.findall('type'): - addLink = False - name = "" - category = type.get('category') - if category == 'basetype': - name = type.get('name') - elif category == 'struct': - addLink = True - name = type.get('name') - elif category == 'define': - name = type.find('name').text - else: - continue - - #print('found type: ' +name) - - # Create a variant of the name that precedes underscores with - # "zero width" spaces. This causes some long names to be - # broken at more intuitive places. - if name.endswith('_t'): - htmlName = name - otherName = name - else: - htmlName = name[:3] + name[3:].replace("_", "_") - otherName = name[:3] + name[3:].replace("_", "_​") - - # Some types can have spaces in the name (such as unsigned char), - # but Asciidoctor attributes cannot. So, replace spaces with - # underscores for the attribute name. - attribName = name.replace(" ", "_") - - # Append the type suffix for disambiguation, since asciidoctor - # attributes are not case-sensitive (currently). - attribName = attribName + "_TYPE" - - # Example with link: - # - # // cl_image_desc - # :cl_image_desc_TYPE_label: pass:q[`cl_image_desc`] - # :cl_image_desc_TYPE: <> - linkFile.write('// ' + name + '\n') - if addLink: - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + attribName + '_label: pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + attribName + '_label: pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write(':' + attribName + ': <<' + name + ',{' + attribName + '_label}>>\n') - else: - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('\n') - - # // cl_image_desc - # :cl_image_desc_TYPE: pass:q[`cl_image_desc`] - nolinkFile.write('// ' + name + '\n') - nolinkFile.write('ifdef::backend-html5[]\n') - nolinkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('ifndef::backend-html5[]\n') - nolinkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('\n') - - # Print the type list to a file for custom syntax highlighting. - # For this we only care about CL types, not base types. - if category != 'basetype': - typeFile.write(' ' + name + '\n') - - numberOfTypes = numberOfTypes + 1 - - print('Found ' + str(numberOfTypes) + ' API types.') - - linkFile.write( GetFooter() ) - linkFile.close() - nolinkFile.write( GetFooter() ) - nolinkFile.close() - typeFile.close() - - print('Successfully generated file: ' + linkFileName) - print('Successfully generated file: ' + nolinkFileName) - print('Successfully generated file: ' + typeFileName) - diff --git a/xml/gen_version_notes.py b/xml/gen_version_notes.py deleted file mode 100644 index 9fed05178..000000000 --- a/xml/gen_version_notes.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/python3 - -# Copyright 2019-2023 The Khronos Group Inc. -# SPDX-License-Identifier: Apache-2.0 - -from collections import OrderedDict - -import argparse -import sys -import os -import urllib -import xml.etree.ElementTree as etree -import urllib.request - - -def parse_xml(path): - file = urllib.request.urlopen(path) if path.startswith("http") else open( - path, 'r') - with file: - tree = etree.parse(file) - return tree - - -# File Header: -def GetHeader(): - return """// Copyright 2017-2023 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ -""" - - -# File Footer: -def GetFooter(): - return """ -""" - -def FullNote(name, added_in, deprecated_by): - # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in - # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. - if added_in == "1.0" and deprecated_by == None: - return "\n// Intentionally empty, %s has always been present." % name - if added_in != "1.0" and deprecated_by == None: - return "\nIMPORTANT: {%s} is <> version %s." % (name, added_in) - if added_in == "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is <> version %s." % (name, deprecated_by) - if added_in != "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is <> version %s and <> version %s." % (name, added_in, deprecated_by) - -def ShortNote(name, added_in, deprecated_by): - # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in - # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. - if added_in == "1.0" and deprecated_by == None: - return "// Intentionally empty, %s has always been present." % name - if added_in != "1.0" and deprecated_by == None: - return "<> version %s." % added_in - if added_in == "1.0" and deprecated_by != None: - return "<> version %s." % deprecated_by - if added_in != "1.0" and deprecated_by != None: - return "<> version %s and <> version %s." % (added_in, deprecated_by) - -# Find feature groups that are parents of a feature/require/${entry_type} -# hierarchy, and then find all the ${entry_type} within each hierarchy: -def process_xml(spec, entry_type, note_printer): - numberOfEntries = 0 - numberOfNewEntries = 0 - numberOfDeprecatedEntries = 0 - - for feature in spec.findall('.//feature/require/%s/../..' % entry_type): - for entry in feature.findall('.//%s' % entry_type): - name = entry.get('name') - - numberOfEntries += 1 - added_in = feature.get('number') - deprecated_by = None - - # All the groups that this specific API ${entry_type} belongs. - categories = spec.findall( - './/require[@comment]/%s[@name="%s"]/..' % (entry_type, name)) - for category in categories: - comment = category.get('comment') - if "deprecated in OpenCL" in comment: - words = comment.split(" ") - assert " ".join(words[-4:-1]) == "deprecated in OpenCL" - assert deprecated_by == None # Can't deprecate something twice. - deprecated_by = words[-1] - - versionFileName = os.path.join(args.directory, name + ".asciidoc") - with open(versionFileName, 'w') as versionFile: - versionFile.write(GetHeader()) - versionFile.write(note_printer(name, added_in, deprecated_by)) - versionFile.write(GetFooter()) - - numberOfNewEntries += 0 if added_in == "1.0" else 1 - numberOfDeprecatedEntries += 0 if deprecated_by == None else 1 - - print('Found ' + str(numberOfEntries) + ' API ' + entry_type + 's, ' - + str(numberOfNewEntries) + " newer than 1.0, " - + str(numberOfDeprecatedEntries) + " are deprecated.") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - '-registry', - action='store', - default='cl.xml', - help='Use specified registry file instead of cl.xml') - parser.add_argument( - '-o', - action='store', - dest='directory', - default='.', - help='Create target and related files in specified directory') - - args = parser.parse_args() - - specpath = args.registry - - print('Generating version notes from: ' + specpath) - - spec = parse_xml(specpath) - - # Generate the API functions dictionaries: - - process_xml(spec, "command", FullNote) - process_xml(spec, "enum", ShortNote) diff --git a/xml/gencl.py b/xml/gencl.py deleted file mode 100644 index eb77a8cdf..000000000 --- a/xml/gencl.py +++ /dev/null @@ -1,464 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -import argparse -import pdb -import re -import sys -import time -import xml.etree.ElementTree as etree - -from cgenerator import CGeneratorOptions, COutputGenerator -from docgenerator import DocGeneratorOptions, DocOutputGenerator -from extensionmetadocgenerator import (ExtensionMetaDocGeneratorOptions, - ExtensionMetaDocOutputGenerator) - -from generator import write - - -from pygenerator import PyOutputGenerator -from reflib import logDiag, logWarn, setLogFile -from reg import Registry - -from clconventions import OpenCLConventions as APIConventions - - -# Simple timer functions -startTime = None - - -def startTimer(timeit): - global startTime - if timeit: - startTime = time.process_time() - - -def endTimer(timeit, msg): - global startTime - if timeit: - endTime = time.process_time() - logDiag(msg, endTime - startTime) - startTime = None - - -def makeREstring(strings, default=None, strings_are_regex=False): - """Turn a list of strings into a regexp string matching exactly those strings.""" - if strings or default is None: - if not strings_are_regex: - strings = (re.escape(s) for s in strings) - return '^(' + '|'.join(strings) + ')$' - return default - -def makeGenOpts(args): - """Returns a directory of [ generator function, generator options ] indexed - by specified short names. The generator options incorporate the following - parameters: - - args is an parsed argument object; see below for the fields that are used.""" - global genOpts - genOpts = {} - - # Default class of extensions to include, or None - defaultExtensions = args.defaultExtensions - - # Additional extensions to include (list of extensions) - extensions = args.extension - - # Extensions to remove (list of extensions) - removeExtensions = args.removeExtensions - - # Extensions to emit (list of extensions) - emitExtensions = args.emitExtensions - - # SPIR-V capabilities / features to emit (list of extensions & capabilities) - # emitSpirv = args.emitSpirv - - # Features to include (list of features) - features = args.feature - - # Whether to disable inclusion protect in headers - protect = args.protect - - # Output target directory - directory = args.directory - - # Path to generated files, particularly api.py - genpath = args.genpath - - # Generate MISRA C-friendly headers - misracstyle = args.misracstyle; - - # Generate MISRA C++-friendly headers - misracppstyle = args.misracppstyle; - - # Descriptive names for various regexp patterns used to select - # versions and extensions - allSpirv = allFeatures = allExtensions = r'.*' - - # Turn lists of names/patterns into matching regular expressions - addExtensionsPat = makeREstring(extensions, None) - removeExtensionsPat = makeREstring(removeExtensions, None) - emitExtensionsPat = makeREstring(emitExtensions, allExtensions) - # emitSpirvPat = makeREstring(emitSpirv, allSpirv) - featuresPat = makeREstring(features, allFeatures) - - # Copyright text prefixing all headers (list of strings). - # The SPDX formatting below works around constraints of the 'reuse' tool - prefixStrings = [ - '/*', - '** Copyright 2015-2023 The Khronos Group Inc.', - '**', - '** SPDX' + '-License-Identifier: Apache-2.0', - '*/', - '' - ] - - # Text specific to OpenCL headers - clPrefixStrings = [ - '/*', - '** This header is generated from the Khronos OpenCL XML API Registry.', - '**', - '*/', - '' - ] - - # Defaults for generating re-inclusion protection wrappers (or not) - protectFile = protect - - # An API style conventions object - conventions = APIConventions() - - # API include files for spec and ref pages - # Overwrites include subdirectories in spec source tree - # The generated include files do not include the calling convention - # macros (apientry etc.), unlike the header files. - # Because the 1.0 core branch includes ref pages for extensions, - # all the extension interfaces need to be generated, even though - # none are used by the core spec itself. - genOpts['apiinc'] = [ - DocOutputGenerator, - DocGeneratorOptions( - conventions = conventions, - filename = 'timeMarker', - directory = directory, - genpath = genpath, - apiname = 'opencl', - profile = None, - versions = featuresPat, - emitversions = featuresPat, - defaultExtensions = defaultExtensions, - addExtensions = addExtensionsPat, - removeExtensions = removeExtensionsPat, - emitExtensions = emitExtensionsPat, - prefixText = prefixStrings + clPrefixStrings, - apicall = '', - apientry = '', - apientryp = '*', - alignFuncParam = 0, - expandEnumerants = False) - ] - - # Python representation of API information, used by scripts that - # don't need to load the full XML. - genOpts['api.py'] = [ - PyOutputGenerator, - DocGeneratorOptions( - conventions = conventions, - filename = 'api.py', - directory = directory, - genpath = genpath, - apiname = 'opencl', - profile = None, - versions = featuresPat, - emitversions = featuresPat, - defaultExtensions = None, - addExtensions = addExtensionsPat, - removeExtensions = removeExtensionsPat, - emitExtensions = emitExtensionsPat, - reparentEnums = False) - ] - - # Extension metainformation for spec extension appendices - # Includes all extensions by default, but only so that the generated - # 'promoted_extensions_*' files refer to all extensions that were - # promoted to a core version. - genOpts['extinc'] = [ - ExtensionMetaDocOutputGenerator, - ExtensionMetaDocGeneratorOptions( - conventions = conventions, - filename = 'timeMarker', - directory = directory, - genpath = None, - apiname = 'opencl', - profile = None, - versions = featuresPat, - emitversions = None, - defaultExtensions = defaultExtensions, - addExtensions = addExtensionsPat, - removeExtensions = None, - emitExtensions = emitExtensionsPat) - ] - - # Platform extensions, in their own header files - # Each element of the platforms[] array defines information for - # generating a single platform: - # [0] is the generated header file name - # [1] is the set of platform extensions to generate - # [2] is additional extensions whose interfaces should be considered, - # but suppressed in the output, to avoid duplicate definitions of - # dependent types like VkDisplayKHR and VkSurfaceKHR which come from - # non-platform extensions. - - # Track all platform extensions, for exclusion from vulkan_core.h - allPlatformExtensions = [] - - # # Extensions suppressed for all platforms. - # # Covers common WSI extension types. - # commonSuppressExtensions = [ 'VK_KHR_display', 'VK_KHR_swapchain' ] - # - # platforms = [ - # [ 'vulkan_android.h', [ 'VK_KHR_android_surface', - # 'VK_ANDROID_external_memory_android_hardware_buffer' - # ], commonSuppressExtensions ], - # [ 'vulkan_fuchsia.h', [ 'VK_FUCHSIA_imagepipe_surface'], commonSuppressExtensions ], - # [ 'vulkan_ios.h', [ 'VK_MVK_ios_surface' ], commonSuppressExtensions ], - # [ 'vulkan_macos.h', [ 'VK_MVK_macos_surface' ], commonSuppressExtensions ], - # [ 'vulkan_vi.h', [ 'VK_NN_vi_surface' ], commonSuppressExtensions ], - # [ 'vulkan_wayland.h', [ 'VK_KHR_wayland_surface' ], commonSuppressExtensions ], - # [ 'vulkan_win32.h', [ 'VK_.*_win32(|_.*)' ], commonSuppressExtensions + [ 'VK_KHR_external_semaphore', 'VK_KHR_external_memory_capabilities', 'VK_KHR_external_fence', 'VK_KHR_external_fence_capabilities', 'VK_NV_external_memory_capabilities' ] ], - # [ 'vulkan_xcb.h', [ 'VK_KHR_xcb_surface' ], commonSuppressExtensions ], - # [ 'vulkan_xlib.h', [ 'VK_KHR_xlib_surface' ], commonSuppressExtensions ], - # [ 'vulkan_xlib_xrandr.h', [ 'VK_EXT_acquire_xlib_display' ], commonSuppressExtensions ], - # ] - # - # for platform in platforms: - # headername = platform[0] - # - # allPlatformExtensions += platform[1] - # - # addPlatformExtensionsRE = makeREstring(platform[1] + platform[2]) - # emitPlatformExtensionsRE = makeREstring(platform[1]) - # - # opts = CGeneratorOptions( - # filename = headername, - # directory = directory, - # apiname = 'vulkan', - # profile = None, - # versions = featuresPat, - # emitversions = None, - # defaultExtensions = None, - # addExtensions = addPlatformExtensionsRE, - # removeExtensions = None, - # emitExtensions = emitPlatformExtensionsRE, - # prefixText = prefixStrings + clPrefixStrings, - # genFuncPointers = True, - # protectFile = protectFile, - # protectFeature = False, - # protectProto = '#ifndef', - # protectProtoStr = 'VK_NO_PROTOTYPES', - # apicall = 'VKAPI_ATTR ', - # apientry = 'VKAPI_CALL ', - # apientryp = 'VKAPI_PTR *', - # alignFuncParam = 0) - # - # genOpts[headername] = [ COutputGenerator, opts ] - - # Header for core API + extensions. - # To generate just the core API, - # change to 'defaultExtensions = None' below. - # - # By default this adds all enabled, non-platform extensions. - # It removes all platform extensions (from the platform headers options - # constructed above) as well as any explicitly specified removals. - - removeExtensionsPat = makeREstring( - allPlatformExtensions + removeExtensions, None, strings_are_regex=True) - - genOpts['cl.h'] = [ - COutputGenerator, - CGeneratorOptions( - conventions = conventions, - filename = 'cl.h', - directory = directory, - genpath = None, - apiname = 'opencl', - profile = None, - versions = featuresPat, - emitversions = featuresPat, - defaultExtensions = defaultExtensions, - addExtensions = None, - removeExtensions = removeExtensionsPat, - emitExtensions = emitExtensionsPat, - prefixText = prefixStrings + clPrefixStrings, - genFuncPointers = False, - protectFile = protectFile, - protectFeature = False, - protectProto = '#ifndef', - protectProtoStr = 'CL_NO_PROTOTYPES', - apicall = 'CL_API_ENTRY ', - apientry = 'CL_API_CALL ', - apientryp = 'CL_API_CALL *', - alignFuncParam = 0, - misracstyle = misracstyle, - misracppstyle = misracppstyle) - ] - -def genTarget(args): - """Create an API generator and corresponding generator options based on - the requested target and command line options. - - This is encapsulated in a function so it can be profiled and/or timed. - The args parameter is an parsed argument object containing the following - fields that are used: - - - target - target to generate - - directory - directory to generate it in - - protect - True if re-inclusion wrappers should be created - - extensions - list of additional extensions to include in generated interfaces""" - - # Create generator options with parameters specified on command line - makeGenOpts(args) - - # pdb.set_trace() - - # Select a generator matching the requested target - if args.target in genOpts: - createGenerator = genOpts[args.target][0] - options = genOpts[args.target][1] - - logDiag('* Building', options.filename) - logDiag('* options.versions =', options.versions) - logDiag('* options.emitversions =', options.emitversions) - logDiag('* options.defaultExtensions =', options.defaultExtensions) - logDiag('* options.addExtensions =', options.addExtensions) - logDiag('* options.removeExtensions =', options.removeExtensions) - logDiag('* options.emitExtensions =', options.emitExtensions) - - gen = createGenerator(errFile=errWarn, - warnFile=errWarn, - diagFile=diag) - return (gen, options) - else: - logErr('No generator options for unknown target:', args.target) - return None - - -# -feature name -# -extension name -# For both, "name" may be a single name, or a space-separated list -# of names, or a regular expression. -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument('-defaultExtensions', action='store', - default='opencl', - help='Specify a single class of extensions to add to targets') - parser.add_argument('-extension', action='append', - default=[], - help='Specify an extension or extensions to add to targets') - parser.add_argument('-removeExtensions', action='append', - default=[], - help='Specify an extension or extensions to remove from targets') - parser.add_argument('-emitExtensions', action='append', - default=[], - help='Specify an extension or extensions to emit in targets') - - - - parser.add_argument('-feature', action='append', - default=[], - help='Specify a core API feature name or names to add to targets') - parser.add_argument('-debug', action='store_true', - help='Enable debugging') - parser.add_argument('-dump', action='store_true', - help='Enable dump to stderr') - parser.add_argument('-diagfile', action='store', - default=None, - help='Write diagnostics to specified file') - parser.add_argument('-errfile', action='store', - default=None, - help='Write errors and warnings to specified file instead of stderr') - parser.add_argument('-noprotect', dest='protect', action='store_false', - help='Disable inclusion protection in output headers') - parser.add_argument('-profile', action='store_true', - help='Enable profiling') - parser.add_argument('-registry', action='store', - default='cl.xml', - help='Use specified registry file instead of cl.xml') - parser.add_argument('-time', action='store_true', - help='Enable timing') - parser.add_argument('-validate', action='store_true', - help='Validate the registry properties and exit') - parser.add_argument('-genpath', action='store', default='gen', - help='Path to generated files') - parser.add_argument('-o', action='store', dest='directory', - default='.', - help='Create target and related files in specified directory') - parser.add_argument('target', metavar='target', nargs='?', - help='Specify target') - parser.add_argument('-quiet', action='store_true', default=True, - help='Suppress script output during normal execution.') - parser.add_argument('-verbose', action='store_false', dest='quiet', default=True, - help='Enable script output during normal execution.') - parser.add_argument('-misracstyle', dest='misracstyle', action='store_true', - help='generate MISRA C-friendly headers') - parser.add_argument('-misracppstyle', dest='misracppstyle', action='store_true', - help='generate MISRA C++-friendly headers') - - args = parser.parse_args() - - # This splits arguments which are space-separated lists - args.feature = [name for arg in args.feature for name in arg.split()] - args.extension = [name for arg in args.extension for name in arg.split()] - - # create error/warning & diagnostic files - if args.errfile: - errWarn = open(args.errfile, 'w', encoding='utf-8') - else: - errWarn = sys.stderr - - if args.diagfile: - diag = open(args.diagfile, 'w', encoding='utf-8') - else: - diag = None - - (gen, options) = (None, None) - if not args.validate: - # Create the API generator & generator options - (gen, options) = genTarget(args) - - # Create the registry object with the specified generator and generator - # options. The options are set before XML loading as they may affect it. - reg = Registry(gen, options) - - # Parse the specified registry XML into an ElementTree object - startTimer(args.time) - tree = etree.parse(args.registry) - endTimer(args.time, '* Time to make ElementTree =') - - # Load the XML tree into the registry object - startTimer(args.time) - reg.loadElementTree(tree) - endTimer(args.time, '* Time to parse ElementTree =') - - if args.validate: - success = reg.validateRegistry() - sys.exit(0 if success else 1) - - if args.dump: - logDiag('* Dumping registry to regdump.txt') - reg.dumpReg(filehandle=open('regdump.txt', 'w', encoding='utf-8')) - - # Finally, use the output generator to create the requested target - if args.debug: - pdb.run('reg.apiGen()') - else: - startTimer(args.time) - reg.apiGen() - endTimer(args.time, '* Time to generate ' + options.filename + ' =') - - if not args.quiet: - logDiag('* Generated', options.filename) diff --git a/xml/generator.py b/xml/generator.py deleted file mode 100644 index c7c460d95..000000000 --- a/xml/generator.py +++ /dev/null @@ -1,1186 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 -"""Base class for source/header/doc generators, as well as some utility functions.""" - -from __future__ import unicode_literals - -import io -import os -import pdb -import re -import shutil -import sys -import tempfile -try: - from pathlib import Path -except ImportError: - from pathlib2 import Path - -from spec_tools.util import getElemName, getElemType - - -def write(*args, **kwargs): - file = kwargs.pop('file', sys.stdout) - end = kwargs.pop('end', '\n') - file.write(' '.join(str(arg) for arg in args)) - file.write(end) - - -def noneStr(s): - """Return string argument, or "" if argument is None. - - Used in converting etree Elements into text. - s - string to convert""" - if s: - return s - return "" - - -def enquote(s): - """Return string argument with surrounding quotes, - for serialization into Python code.""" - if s: - return "'{}'".format(s) - return None - - -def regSortCategoryKey(feature): - """Sort key for regSortFeatures. - Sorts by category of the feature name string: - - - Core API features (those defined with a `` tag) - - ARB/KHR/OES (Khronos extensions) - - other (EXT/vendor extensions)""" - - if feature.elem.tag == 'feature': - return 0 - if (feature.category == 'ARB' - or feature.category == 'KHR' - or feature.category == 'OES'): - return 1 - - return 2 - - -def regSortOrderKey(feature): - """Sort key for regSortFeatures - key is the sortorder attribute.""" - - # print("regSortOrderKey {} -> {}".format(feature.name, feature.sortorder)) - return feature.sortorder - - -def regSortFeatureVersionKey(feature): - """Sort key for regSortFeatures - key is the feature version. - `` elements all have version number 0.""" - - return float(feature.versionNumber) - - -def regSortExtensionNumberKey(feature): - """Sort key for regSortFeatures - key is the extension number. - `` elements all have extension number 0.""" - - return int(feature.number) - - -def regSortFeatures(featureList): - """Default sort procedure for features. - - - Sorts by explicit sort order (default 0) relative to other features - - then by feature category ('feature' or 'extension'), - - then by version number (for features) - - then by extension number (for extensions)""" - featureList.sort(key=regSortExtensionNumberKey) - featureList.sort(key=regSortFeatureVersionKey) - featureList.sort(key=regSortCategoryKey) - featureList.sort(key=regSortOrderKey) - - -class GeneratorOptions: - """Base class for options used during header/documentation production. - - These options are target language independent, and used by - Registry.apiGen() and by base OutputGenerator objects.""" - - def __init__(self, - conventions=None, - filename=None, - directory='.', - genpath=None, - apiname=None, - profile=None, - versions='.*', - emitversions='.*', - defaultExtensions=None, - addExtensions=None, - removeExtensions=None, - emitExtensions=None, - emitSpirv=None, - reparentEnums=True, - sortProcedure=regSortFeatures): - """Constructor. - - Arguments: - - - conventions - may be mandatory for some generators: - an object that implements ConventionsBase - - filename - basename of file to generate, or None to write to stdout. - - directory - directory in which to generate files - - genpath - path to previously generated files, such as api.py - - apiname - string matching `` 'apiname' attribute, e.g. 'gl'. - - profile - string specifying API profile , e.g. 'core', or None. - - versions - regex matching API versions to process interfaces for. - Normally `'.*'` or `'[0-9][.][0-9]'` to match all defined versions. - - emitversions - regex matching API versions to actually emit - interfaces for (though all requested versions are considered - when deciding which interfaces to generate). For GL 4.3 glext.h, - this might be `'1[.][2-5]|[2-4][.][0-9]'`. - - defaultExtensions - If not None, a string which must in its - entirety match the pattern in the "supported" attribute of - the ``. Defaults to None. Usually the same as apiname. - - addExtensions - regex matching names of additional extensions - to include. Defaults to None. - - removeExtensions - regex matching names of extensions to - remove (after defaultExtensions and addExtensions). Defaults - to None. - - emitExtensions - regex matching names of extensions to actually emit - interfaces for (though all requested versions are considered when - deciding which interfaces to generate). - to None. - - emitSpirv - regex matching names of extensions and capabilities - to actually emit interfaces for. - - reparentEnums - move elements which extend an enumerated - type from or elements to the target - element. This is required for almost all purposes, but the - InterfaceGenerator relies on the list of interfaces in the - or being complete. Defaults to True. - - sortProcedure - takes a list of FeatureInfo objects and sorts - them in place to a preferred order in the generated output. - Default is core API versions, ARB/KHR/OES extensions, all other - extensions, by core API version number or extension number in each - group. - - The regex patterns can be None or empty, in which case they match - nothing.""" - self.conventions = conventions - """may be mandatory for some generators: - an object that implements ConventionsBase""" - - self.filename = filename - "basename of file to generate, or None to write to stdout." - - self.genpath = genpath - """path to previously generated files, such as api.py""" - - self.directory = directory - "directory in which to generate filename" - - self.apiname = apiname - "string matching `` 'apiname' attribute, e.g. 'gl'." - - self.profile = profile - "string specifying API profile , e.g. 'core', or None." - - self.versions = self.emptyRegex(versions) - """regex matching API versions to process interfaces for. - Normally `'.*'` or `'[0-9][.][0-9]'` to match all defined versions.""" - - self.emitversions = self.emptyRegex(emitversions) - """regex matching API versions to actually emit - interfaces for (though all requested versions are considered - when deciding which interfaces to generate). For GL 4.3 glext.h, - this might be `'1[.][2-5]|[2-4][.][0-9]'`.""" - - self.defaultExtensions = defaultExtensions - """If not None, a string which must in its - entirety match the pattern in the "supported" attribute of - the ``. Defaults to None. Usually the same as apiname.""" - - self.addExtensions = self.emptyRegex(addExtensions) - """regex matching names of additional extensions - to include. Defaults to None.""" - - self.removeExtensions = self.emptyRegex(removeExtensions) - """regex matching names of extensions to - remove (after defaultExtensions and addExtensions). Defaults - to None.""" - - self.emitExtensions = self.emptyRegex(emitExtensions) - """regex matching names of extensions to actually emit - interfaces for (though all requested versions are considered when - deciding which interfaces to generate).""" - - self.emitSpirv = self.emptyRegex(emitSpirv) - """regex matching names of extensions and capabilities - to actually emit interfaces for.""" - - self.reparentEnums = reparentEnums - """boolean specifying whether to remove elements from - or when extending an type.""" - - self.sortProcedure = sortProcedure - """takes a list of FeatureInfo objects and sorts - them in place to a preferred order in the generated output. - Default is core API versions, ARB/KHR/OES extensions, all - other extensions, alphabetically within each group.""" - - self.codeGenerator = False - """True if this generator makes compilable code""" - - def emptyRegex(self, pat): - """Substitute a regular expression which matches no version - or extension names for None or the empty string.""" - if not pat: - return '_nomatch_^' - - return pat - - -class OutputGenerator: - """Generate specified API interfaces in a specific style, such as a C header. - - Base class for generating API interfaces. - Manages basic logic, logging, and output file control. - Derived classes actually generate formatted output. - """ - - # categoryToPath - map XML 'category' to include file directory name - categoryToPath = { - 'bitmask': 'flags', - 'enum': 'enums', - 'funcpointer': 'funcpointers', - 'handle': 'handles', - 'define': 'defines', - 'basetype': 'basetypes', - } - - def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout): - """Constructor - - - errFile, warnFile, diagFile - file handles to write errors, - warnings, diagnostics to. May be None to not write.""" - self.outFile = None - self.errFile = errFile - self.warnFile = warnFile - self.diagFile = diagFile - # Internal state - self.featureName = None - self.genOpts = None - self.registry = None - self.featureDictionary = {} - # Used for extension enum value generation - self.extBase = 1000000000 - self.extBlockSize = 1000 - self.madeDirs = {} - - # API dictionary, which may be loaded by the beginFile method of - # derived generators. - self.apidict = None - - def logMsg(self, level, *args): - """Write a message of different categories to different - destinations. - - - `level` - - 'diag' (diagnostic, voluminous) - - 'warn' (warning) - - 'error' (fatal error - raises exception after logging) - - - `*args` - print()-style arguments to direct to corresponding log""" - if level == 'error': - strfile = io.StringIO() - write('ERROR:', *args, file=strfile) - if self.errFile is not None: - write(strfile.getvalue(), file=self.errFile) - raise UserWarning(strfile.getvalue()) - elif level == 'warn': - if self.warnFile is not None: - write('WARNING:', *args, file=self.warnFile) - elif level == 'diag': - if self.diagFile is not None: - write('DIAG:', *args, file=self.diagFile) - else: - raise UserWarning( - '*** FATAL ERROR in Generator.logMsg: unknown level:' + level) - - def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): - """Parse and convert an `` tag into a value. - - Returns a list: - - - first element - integer representation of the value, or None - if needsNum is False. The value must be a legal number - if needsNum is True. - - second element - string representation of the value - - There are several possible representations of values. - - - A 'value' attribute simply contains the value. - - A 'bitpos' attribute defines a value by specifying the bit - position which is set in that value. - - An 'offset','extbase','extends' triplet specifies a value - as an offset to a base value defined by the specified - 'extbase' extension name, which is then cast to the - typename specified by 'extends'. This requires probing - the registry database, and imbeds knowledge of the - API extension enum scheme in this function. - - An 'alias' attribute contains the name of another enum - which this is an alias of. The other enum must be - declared first when emitting this enum.""" - name = elem.get('name') - numVal = None - if 'value' in elem.keys(): - value = elem.get('value') - # print('About to translate value =', value, 'type =', type(value)) - if needsNum: - numVal = int(value, 0) - # If there's a non-integer, numeric 'type' attribute (e.g. 'u' or - # 'ull'), append it to the string value. - # t = enuminfo.elem.get('type') - # if t is not None and t != '' and t != 'i' and t != 's': - # value += enuminfo.type - if forceSuffix: - if bitwidth == 64: - value = value + 'ULL' - else: - value = value + 'U' - self.logMsg('diag', 'Enum', name, '-> value [', numVal, ',', value, ']') - return [numVal, value] - if 'bitpos' in elem.keys(): - value = elem.get('bitpos') - bitpos = int(value, 0) - numVal = 1 << bitpos - value = '0x%08x' % numVal - if bitwidth == 64: - value = value + 'ULL' - elif forceSuffix: - value = value + 'U' - self.logMsg('diag', 'Enum', name, '-> bitpos [', numVal, ',', value, ']') - return [numVal, value] - if 'offset' in elem.keys(): - # Obtain values in the mapping from the attributes - enumNegative = False - offset = int(elem.get('offset'), 0) - extnumber = int(elem.get('extnumber'), 0) - extends = elem.get('extends') - if 'dir' in elem.keys(): - enumNegative = True - self.logMsg('diag', 'Enum', name, 'offset =', offset, - 'extnumber =', extnumber, 'extends =', extends, - 'enumNegative =', enumNegative) - # Now determine the actual enumerant value, as defined - # in the "Layers and Extensions" appendix of the spec. - numVal = self.extBase + (extnumber - 1) * self.extBlockSize + offset - if enumNegative: - numVal *= -1 - value = '%d' % numVal - # More logic needed! - self.logMsg('diag', 'Enum', name, '-> offset [', numVal, ',', value, ']') - return [numVal, value] - if 'alias' in elem.keys(): - return [None, elem.get('alias')] - return [None, None] - - def checkDuplicateEnums(self, enums): - """Check enumerated values for duplicates. - - - enums - list of `` Elements - - returns the list with duplicates stripped""" - # Dictionaries indexed by name and numeric value. - # Entries are [ Element, numVal, strVal ] matching name or value - - nameMap = {} - valueMap = {} - - stripped = [] - for elem in enums: - name = elem.get('name') - (numVal, strVal) = self.enumToValue(elem, True) - - if name in nameMap: - # Duplicate name found; check values - (name2, numVal2, strVal2) = nameMap[name] - - # Duplicate enum values for the same name are benign. This - # happens when defining the same enum conditionally in - # several extension blocks. - if (strVal2 == strVal or (numVal is not None - and numVal == numVal2)): - True - # self.logMsg('info', 'checkDuplicateEnums: Duplicate enum (' + name + - # ') found with the same value:' + strVal) - else: - self.logMsg('warn', 'checkDuplicateEnums: Duplicate enum (' + name - + ') found with different values:' + strVal - + ' and ' + strVal2) - - # Don't add the duplicate to the returned list - continue - elif numVal in valueMap: - # Duplicate value found (such as an alias); report it, but - # still add this enum to the list. - (name2, numVal2, strVal2) = valueMap[numVal] - - msg = 'Two enums found with the same value: {} = {} = {}'.format( - name, name2.get('name'), strVal) - self.logMsg('error', msg) - - # Track this enum to detect followon duplicates - nameMap[name] = [elem, numVal, strVal] - if numVal is not None: - valueMap[numVal] = [elem, numVal, strVal] - - # Add this enum to the list - stripped.append(elem) - - # Return the list - return stripped - - def misracstyle(self): - return False; - - def misracppstyle(self): - return False; - - def buildEnumCDecl(self, expand, groupinfo, groupName): - """Generate the C declaration for an enum""" - groupElem = groupinfo.elem - - # Determine the required bit width for the enum group. - # 32 is the default, which generates C enum types for the values. - bitwidth = 32 - - # If the constFlagBits preference is set, 64 is the default for bitmasks - if self.genOpts.conventions.constFlagBits and groupElem.get('type') == 'bitmask': - bitwidth = 64 - - # Check for an explicitly defined bitwidth, which will override any defaults. - if groupElem.get('bitwidth'): - try: - bitwidth = int(groupElem.get('bitwidth')) - except ValueError as ve: - self.logMsg('error', 'Invalid value for bitwidth attribute (', groupElem.get('bitwidth'), ') for ', groupName, ' - must be an integer value\n') - exit(1) - - usebitmask = False - usedefine = False - - # Bitmask flags can be generated as either "static const uint{32,64}_t" values, - # or as 32-bit C enums. 64-bit types must use uint64_t values. - if groupElem.get('type') == 'bitmask': - if bitwidth > 32 or self.misracppstyle(): - usebitmask = True - if self.misracstyle(): - usedefine = True - - if usedefine or usebitmask: - # Validate the bitwidth and generate values appropriately - if bitwidth > 64: - self.logMsg('error', 'Invalid value for bitwidth attribute (', groupElem.get('bitwidth'), ') for bitmask type ', groupName, ' - must be less than or equal to 64\n') - exit(1) - else: - return self.buildEnumCDecl_BitmaskOrDefine(groupinfo, groupName, bitwidth, usedefine) - else: - # Validate the bitwidth and generate values appropriately - if bitwidth > 32: - self.logMsg('error', 'Invalid value for bitwidth attribute (', groupElem.get('bitwidth'), ') for enum type ', groupName, ' - must be less than or equal to 32\n') - exit(1) - else: - return self.buildEnumCDecl_Enum(expand, groupinfo, groupName) - - def buildEnumCDecl_BitmaskOrDefine(self, groupinfo, groupName, bitwidth, usedefine): - """Generate the C declaration for an "enum" that is actually a - set of flag bits""" - groupElem = groupinfo.elem - flagTypeName = groupElem.get('name') - - # Prefix - body = "// Flag bits for " + flagTypeName + "\n" - - if bitwidth == 64: - body += "typedef VkFlags64 %s;\n" % flagTypeName; - else: - body += "typedef VkFlags %s;\n" % flagTypeName; - - # Maximum allowable value for a flag (unsigned 64-bit integer) - maxValidValue = 2**(64) - 1 - minValidValue = 0 - - # Get a list of nested 'enum' tags. - enums = groupElem.findall('enum') - - # Check for and report duplicates, and return a list with them - # removed. - enums = self.checkDuplicateEnums(enums) - - # Accumulate non-numeric enumerant values separately and append - # them following the numeric values, to allow for aliases. - # NOTE: this doesn't do a topological sort yet, so aliases of - # aliases can still get in the wrong order. - aliasText = '' - - # Loop over the nested 'enum' tags. - for elem in enums: - # Convert the value to an integer and use that to track min/max. - # Values of form -(number) are accepted but nothing more complex. - # Should catch exceptions here for more complex constructs. Not yet. - (numVal, strVal) = self.enumToValue(elem, True, bitwidth, True) - name = elem.get('name') - - # Range check for the enum value - if numVal is not None and (numVal > maxValidValue or numVal < minValidValue): - self.logMsg('error', 'Allowable range for flag types in C is [', minValidValue, ',', maxValidValue, '], but', name, 'flag has a value outside of this (', strVal, ')\n') - exit(1) - - decl = self.genRequirements(name, mustBeFound = False) - - if self.isEnumRequired(elem): - protect = elem.get('protect') - if protect is not None: - body += '#ifdef {}\n'.format(protect) - - if usedefine: - decl += "#define {} {}\n".format(name, strVal) - elif self.misracppstyle(): - decl += "static constexpr {} {} {{{}}};\n".format(flagTypeName, name, strVal) - else: - # Some C compilers only allow initializing a 'static const' variable with a literal value. - # So initializing an alias from another 'static const' value would fail to compile. - # Work around this by chasing the aliases to get the actual value. - while numVal is None: - alias = self.registry.tree.find("enums/enum[@name='" + strVal + "']") - (numVal, strVal) = self.enumToValue(alias, True, bitwidth, True) - decl += "static const {} {} = {};\n".format(flagTypeName, name, strVal) - - if numVal is not None: - body += decl - else: - aliasText += decl - - if protect is not None: - body += '#endif\n' - - # Now append the non-numeric enumerant values - body += aliasText - - # Postfix - - return ("bitmask", body) - - def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): - """Generate the C declaration for an enumerated type""" - groupElem = groupinfo.elem - - # Break the group name into prefix and suffix portions for range - # enum generation - expandName = re.sub(r'([0-9]+|[a-z_])([A-Z0-9])', r'\1_\2', groupName).upper() - expandPrefix = expandName - expandSuffix = '' - expandSuffixMatch = re.search(r'[A-Z][A-Z]+$', groupName) - if expandSuffixMatch: - expandSuffix = '_' + expandSuffixMatch.group() - # Strip off the suffix from the prefix - expandPrefix = expandName.rsplit(expandSuffix, 1)[0] - - # Prefix - body = ["typedef enum %s {" % groupName] - - # @@ Should use the type="bitmask" attribute instead - isEnum = ('FLAG_BITS' not in expandPrefix) - - # Allowable range for a C enum - which is that of a signed 32-bit integer - maxValidValue = 2**(32 - 1) - 1 - minValidValue = (maxValidValue * -1) - 1 - - - # Get a list of nested 'enum' tags. - enums = groupElem.findall('enum') - - # Check for and report duplicates, and return a list with them - # removed. - enums = self.checkDuplicateEnums(enums) - - # Loop over the nested 'enum' tags. Keep track of the minimum and - # maximum numeric values, if they can be determined; but only for - # core API enumerants, not extension enumerants. This is inferred - # by looking for 'extends' attributes. - minName = None - - # Accumulate non-numeric enumerant values separately and append - # them following the numeric values, to allow for aliases. - # NOTE: this doesn't do a topological sort yet, so aliases of - # aliases can still get in the wrong order. - aliasText = [] - - for elem in enums: - # Convert the value to an integer and use that to track min/max. - # Values of form -(number) are accepted but nothing more complex. - # Should catch exceptions here for more complex constructs. Not yet. - (numVal, strVal) = self.enumToValue(elem, True) - name = elem.get('name') - - # Extension enumerants are only included if they are required - if self.isEnumRequired(elem): - decl = '' - - protect = elem.get('protect') - if protect is not None: - decl += '#ifdef {}\n'.format(protect) - - # Indent requirements comment, if there is one - requirements = self.genRequirements(name, mustBeFound = False) - if requirements != '': - requirements = ' ' + requirements - decl += requirements - decl += ' {} = {},'.format(name, strVal) - - if protect is not None: - decl += '\n#endif' - - if numVal is not None: - body.append(decl) - else: - aliasText.append(decl) - - # Range check for the enum value - if numVal is not None and (numVal > maxValidValue or numVal < minValidValue): - self.logMsg('error', 'Allowable range for C enum types is [', minValidValue, ',', maxValidValue, '], but', name, 'has a value outside of this (', strVal, ')\n') - exit(1) - - # Don't track min/max for non-numbers (numVal is None) - if isEnum and numVal is not None and elem.get('extends') is None: - if minName is None: - minName = maxName = name - minValue = maxValue = numVal - elif numVal < minValue: - minName = name - minValue = numVal - elif numVal > maxValue: - maxName = name - maxValue = numVal - - # Now append the non-numeric enumerant values - body.extend(aliasText) - - # Generate min/max value tokens - legacy use case. - if isEnum and expand: - body.extend((" {}_BEGIN_RANGE{} = {},".format(expandPrefix, expandSuffix, minName), - " {}_END_RANGE{} = {},".format( - expandPrefix, expandSuffix, maxName), - " {}_RANGE_SIZE{} = ({} - {} + 1),".format(expandPrefix, expandSuffix, maxName, minName))) - - # Generate a range-padding value to ensure the enum is 32 bits, but - # only in code generators, so it doesn't appear in documentation - if (self.genOpts.codeGenerator or - self.conventions.generate_max_enum_in_docs): - body.append(" {}_MAX_ENUM{} = 0x7FFFFFFF".format( - expandPrefix, expandSuffix)) - - # Postfix - body.append("} %s;" % groupName) - - # Determine appropriate section for this declaration - if groupElem.get('type') == 'bitmask': - section = 'bitmask' - else: - section = 'group' - - return (section, '\n'.join(body)) - - def buildConstantCDecl(self, enuminfo, name, alias): - """Generate the C declaration for a constant (a single - value). - - tags may specify their values in several ways, but are - usually just integers or floating-point numbers.""" - - (_, strVal) = self.enumToValue(enuminfo.elem, False) - - if self.misracppstyle() and enuminfo.elem.get('type') and not alias: - # Generate e.g.: static constexpr uint32_t x = ~static_cast(1U); - # This appeases MISRA "underlying type" rules. - typeStr = enuminfo.elem.get('type'); - invert = '~' in strVal - number = strVal.strip("()~UL") - if typeStr != "float": - number += 'U' - strVal = "~" if invert else "" - strVal += "static_cast<" + typeStr + ">(" + number + ")" - body = 'static constexpr ' + typeStr.ljust(9) + name.ljust(33) + ' {' + strVal + '};' - elif enuminfo.elem.get('type') and not alias: - # Generate e.g.: #define x (~0ULL) - typeStr = enuminfo.elem.get('type'); - invert = '~' in strVal - paren = '(' in strVal - number = strVal.strip("()~UL") - if typeStr != "float": - if typeStr == "uint64_t": - number += 'ULL' - else: - number += 'U' - strVal = "~" if invert else "" - strVal += number - if paren: - strVal = "(" + strVal + ")"; - body = '#define ' + name.ljust(33) + ' ' + strVal; - else: - body = '#define ' + name.ljust(33) + ' ' + strVal - - return body - - def makeDir(self, path): - """Create a directory, if not already done. - - Generally called from derived generators creating hierarchies.""" - self.logMsg('diag', 'OutputGenerator::makeDir(' + path + ')') - if path not in self.madeDirs: - # This can get race conditions with multiple writers, see - # https://stackoverflow.com/questions/273192/ - if not os.path.exists(path): - os.makedirs(path) - self.madeDirs[path] = None - - def beginFile(self, genOpts): - """Start a new interface file - - - genOpts - GeneratorOptions controlling what's generated and how""" - self.genOpts = genOpts - self.should_insert_may_alias_macro = \ - self.genOpts.conventions.should_insert_may_alias_macro(self.genOpts) - - # Try to import the API dictionary, api.py, if it exists. Nothing in - # api.py cannot be extracted directly from the XML, and in the - # future we should do that. - if self.genOpts.genpath is not None: - try: - sys.path.insert(0, self.genOpts.genpath) - import api - self.apidict = api - except ImportError: - self.apidict = None - - self.conventions = genOpts.conventions - - # Open a temporary file for accumulating output. - if self.genOpts.filename is not None: - self.outFile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', newline='\n', delete=False) - else: - self.outFile = sys.stdout - - def endFile(self): - if self.errFile: - self.errFile.flush() - if self.warnFile: - self.warnFile.flush() - if self.diagFile: - self.diagFile.flush() - self.outFile.flush() - if self.outFile != sys.stdout and self.outFile != sys.stderr: - self.outFile.close() - - # On successfully generating output, move the temporary file to the - # target file. - if self.genOpts.filename is not None: - if sys.platform == 'win32': - directory = Path(self.genOpts.directory) - if not Path.exists(directory): - os.makedirs(directory) - shutil.copy(self.outFile.name, self.genOpts.directory + '/' + self.genOpts.filename) - os.remove(self.outFile.name) - self.genOpts = None - - def beginFeature(self, interface, emit): - """Write interface for a feature and tag generated features as having been done. - - - interface - element for the `` / `` to generate - - emit - actually write to the header only when True""" - self.emit = emit - self.featureName = interface.get('name') - # If there's an additional 'protect' attribute in the feature, save it - self.featureExtraProtect = interface.get('protect') - - def endFeature(self): - """Finish an interface file, closing it when done. - - Derived classes responsible for emitting feature""" - self.featureName = None - self.featureExtraProtect = None - - def genRequirements(self, name, mustBeFound = True): - """Generate text showing what core versions and extensions introduce - an API. This exists in the base Generator class because it's used by - the shared enumerant-generating interfaces (buildEnumCDecl, etc.). - Here it returns an empty string for most generators, but can be - overridden by e.g. DocGenerator. - - - name - name of the API - - mustBeFound - If True, when requirements for 'name' cannot be - determined, a warning comment is generated. - """ - - return '' - - def validateFeature(self, featureType, featureName): - """Validate we're generating something only inside a `` tag""" - if self.featureName is None: - raise UserWarning('Attempt to generate', featureType, - featureName, 'when not in feature') - - def genType(self, typeinfo, name, alias): - """Generate interface for a type - - - typeinfo - TypeInfo for a type - - Extend to generate as desired in your derived class.""" - self.validateFeature('type', name) - - def genStruct(self, typeinfo, typeName, alias): - """Generate interface for a C "struct" type. - - - typeinfo - TypeInfo for a type interpreted as a struct - - Extend to generate as desired in your derived class.""" - self.validateFeature('struct', typeName) - - # The mixed-mode tags may contain no-op tags. - # It is convenient to remove them here where all output generators - # will benefit. - for member in typeinfo.elem.findall('.//member'): - for comment in member.findall('comment'): - member.remove(comment) - - def genGroup(self, groupinfo, groupName, alias): - """Generate interface for a group of enums (C "enum") - - - groupinfo - GroupInfo for a group. - - Extend to generate as desired in your derived class.""" - - self.validateFeature('group', groupName) - - def genEnum(self, enuminfo, typeName, alias): - """Generate interface for an enum (constant). - - - enuminfo - EnumInfo for an enum - - name - enum name - - Extend to generate as desired in your derived class.""" - self.validateFeature('enum', typeName) - - def genCmd(self, cmd, cmdinfo, alias): - """Generate interface for a command. - - - cmdinfo - CmdInfo for a command - - Extend to generate as desired in your derived class.""" - self.validateFeature('command', cmdinfo) - - def genSpirv(self, spirv, spirvinfo, alias): - """Generate interface for a spirv element. - - - spirvinfo - SpirvInfo for a command - - Extend to generate as desired in your derived class.""" - return - - def makeProtoName(self, name, tail): - """Turn a `` `` into C-language prototype - and typedef declarations for that name. - - - name - contents of `` tag - - tail - whatever text follows that tag in the Element""" - return self.genOpts.apientry + name + tail - - def makeTypedefName(self, name, tail): - """Make the function-pointer typedef name for a command.""" - return '(' + self.genOpts.apientryp + 'PFN_' + name + tail + ')' - - def makeCParamDecl(self, param, aligncol): - """Return a string which is an indented, formatted - declaration for a `` or `` block (e.g. function parameter - or structure/union member). - - - param - Element (`` or ``) to format - - aligncol - if non-zero, attempt to align the nested `` element - at this column""" - indent = ' ' - paramdecl = indent - prefix = noneStr(param.text) - - for elem in param: - text = noneStr(elem.text) - tail = noneStr(elem.tail) - - if self.should_insert_may_alias_macro and self.genOpts.conventions.is_voidpointer_alias(elem.tag, text, tail): - # OpenXR-specific macro insertion - but not in apiinc for the spec - tail = self.genOpts.conventions.make_voidpointer_alias(tail) - if elem.tag == 'name' and aligncol > 0: - self.logMsg('diag', 'Aligning parameter', elem.text, 'to column', self.genOpts.alignFuncParam) - # Align at specified column, if possible - paramdecl = paramdecl.rstrip() - oldLen = len(paramdecl) - # This works around a problem where very long type names - - # longer than the alignment column - would run into the tail - # text. - paramdecl = paramdecl.ljust(aligncol - 1) + ' ' - newLen = len(paramdecl) - self.logMsg('diag', 'Adjust length of parameter decl from', oldLen, 'to', newLen, ':', paramdecl) - - if (self.misracppstyle() and prefix.find('const ') != -1): - # Change pointer type order from e.g. "const void *" to "void const *". - # If the string starts with 'const', reorder it to be after the first type. - paramdecl += prefix.replace('const ', '') + text + ' const' + tail - else: - paramdecl += prefix + text + tail - - # Clear prefix for subsequent iterations - prefix = '' - - # If prefix was originally non-empty and the param has no elements - # (e.g. is nothing but text), preserve it. - paramdecl = paramdecl + prefix - - if aligncol == 0: - # Squeeze out multiple spaces other than the indentation - paramdecl = indent + ' '.join(paramdecl.split()) - return paramdecl - - def getCParamTypeLength(self, param): - """Return the length of the type field is an indented, formatted - declaration for a `` or `` block (e.g. function parameter - or structure/union member). - - - param - Element (`` or ``) to identify""" - - # Allow for missing tag - newLen = 0 - paramdecl = ' ' + noneStr(param.text) - for elem in param: - text = noneStr(elem.text) - tail = noneStr(elem.tail) - - if self.should_insert_may_alias_macro and self.genOpts.conventions.is_voidpointer_alias(elem.tag, text, tail): - # OpenXR-specific macro insertion - tail = self.genOpts.conventions.make_voidpointer_alias(tail) - if elem.tag == 'name': - # Align at specified column, if possible - newLen = len(paramdecl.rstrip()) - self.logMsg('diag', 'Identifying length of', elem.text, 'as', newLen) - paramdecl += text + tail - - return newLen - - def getMaxCParamTypeLength(self, info): - """Return the length of the longest type field for a member/parameter. - - - info - TypeInfo or CommandInfo. - """ - lengths = (self.getCParamTypeLength(member) - for member in info.getMembers()) - return max(lengths) - - def getHandleParent(self, typename): - """Get the parent of a handle object.""" - info = self.registry.typedict.get(typename) - if info is None: - return None - - elem = info.elem - if elem is not None: - return elem.get('parent') - - return None - - def iterateHandleAncestors(self, typename): - """Iterate through the ancestors of a handle type.""" - current = self.getHandleParent(typename) - while current is not None: - yield current - current = self.getHandleParent(current) - - def getHandleAncestors(self, typename): - """Get the ancestors of a handle object.""" - return list(self.iterateHandleAncestors(typename)) - - def getTypeCategory(self, typename): - """Get the category of a type.""" - info = self.registry.typedict.get(typename) - if info is None: - return None - - elem = info.elem - if elem is not None: - return elem.get('category') - return None - - def isStructAlwaysValid(self, structname): - """Try to do check if a structure is always considered valid (i.e. there's no rules to its acceptance).""" - # A conventions object is required for this call. - if not self.conventions: - raise RuntimeError("To use isStructAlwaysValid, be sure your options include a Conventions object.") - - if self.conventions.type_always_valid(structname): - return True - - category = self.getTypeCategory(structname) - if self.conventions.category_requires_validation(category): - return False - - info = self.registry.typedict.get(structname) - assert(info is not None) - - members = info.getMembers() - - for member in members: - member_name = getElemName(member) - if member_name in (self.conventions.structtype_member_name, - self.conventions.nextpointer_member_name): - return False - - if member.get('noautovalidity'): - return False - - member_type = getElemType(member) - - if member_type in ('void', 'char') or self.paramIsArray(member) or self.paramIsPointer(member): - return False - - if self.conventions.type_always_valid(member_type): - continue - - member_category = self.getTypeCategory(member_type) - - if self.conventions.category_requires_validation(member_category): - return False - - if member_category in ('struct', 'union'): - if self.isStructAlwaysValid(member_type) is False: - return False - - return True - - def isEnumRequired(self, elem): - """Return True if this `` element is - required, False otherwise - - - elem - `` element to test""" - required = elem.get('required') is not None - self.logMsg('diag', 'isEnumRequired:', elem.get('name'), - '->', required) - return required - - # @@@ This code is overridden by equivalent code now run in - # @@@ Registry.generateFeature - - required = False - - extname = elem.get('extname') - if extname is not None: - # 'supported' attribute was injected when the element was - # moved into the group in Registry.parseTree() - if self.genOpts.defaultExtensions == elem.get('supported'): - required = True - elif re.match(self.genOpts.addExtensions, extname) is not None: - required = True - elif elem.get('version') is not None: - required = re.match(self.genOpts.emitversions, elem.get('version')) is not None - else: - required = True - - return required - - def makeCDecls(self, cmd): - """Return C prototype and function pointer typedef for a - `` Element, as a two-element list of strings. - - - cmd - Element containing a `` tag""" - proto = cmd.find('proto') - params = cmd.findall('param') - # Begin accumulating prototype and typedef strings - pdecl = self.genOpts.apicall - tdecl = 'typedef ' - - # Insert the function return type/name. - # For prototypes, add APIENTRY macro before the name - # For typedefs, add (APIENTRY *) around the name and - # use the PFN_cmdnameproc naming convention. - # Done by walking the tree for element by element. - # etree has elem.text followed by (elem[i], elem[i].tail) - # for each child element and any following text - # Leading text - pdecl += noneStr(proto.text) - tdecl += noneStr(proto.text) - # For each child element, if it's a wrap in appropriate - # declaration. Otherwise append its contents and tail contents. - for elem in proto: - text = noneStr(elem.text) - tail = noneStr(elem.tail) - if elem.tag == 'name': - pdecl += self.makeProtoName(text, tail) - tdecl += self.makeTypedefName(text, tail) - else: - pdecl += text + tail - tdecl += text + tail - - if self.genOpts.alignFuncParam == 0: - # Squeeze out multiple spaces - there is no indentation - pdecl = ' '.join(pdecl.split()) - tdecl = ' '.join(tdecl.split()) - - # Now add the parameter declaration list, which is identical - # for prototypes and typedefs. Concatenate all the text from - # a node without the tags. No tree walking required - # since all tags are ignored. - # Uses: self.indentFuncProto - # self.indentFuncPointer - # self.alignFuncParam - n = len(params) - # Indented parameters - if n > 0: - indentdecl = '(\n' - indentdecl += ',\n'.join(self.makeCParamDecl(p, self.genOpts.alignFuncParam) - for p in params) - indentdecl += ');' - else: - indentdecl = '(void);' - # Non-indented parameters - paramdecl = '(' - if n > 0: - paramnames = [] - if self.misracppstyle(): - for p in params: - param = '' - firstIter = True; - for t in p.itertext(): - if (firstIter): - prefix = t - firstIter = False - else: - # Change pointer type order from e.g. "const void *" to "void const *". - # If the string starts with 'const', reorder it to be after the first type. - if (prefix.find('const ') != -1): - param += prefix.replace('const ', '') + t + ' const ' - else: - param += prefix + t - # Clear prefix for subsequent iterations - prefix = '' - paramnames.append(param); - else: - paramnames = (''.join(t for t in p.itertext()) - for p in params) - paramdecl += ', '.join(paramnames) - else: - paramdecl += 'void' - paramdecl += ");" - return [pdecl + indentdecl, tdecl + paramdecl] - - def newline(self): - """Print a newline to the output file (utility function)""" - write('', file=self.outFile) - - def setRegistry(self, registry): - self.registry = registry diff --git a/xml/pygenerator.py b/xml/pygenerator.py deleted file mode 100644 index b2e76e66d..000000000 --- a/xml/pygenerator.py +++ /dev/null @@ -1,365 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -import sys -from generator import OutputGenerator, enquote, noneStr, write -import pprint - -class PyOutputGenerator(OutputGenerator): - """PyOutputGenerator - subclass of OutputGenerator. - Generates Python data structures describing API names and relationships. - Similar to DocOutputGenerator, but writes a single file.""" - - def apiName(self, name): - """Return True if name is in the reserved API namespace. - - Delegates to the conventions object. """ - return self.genOpts.conventions.is_api_name(name) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # Track features being generated - self.features = [] - - # Reverse map from interface names to features requiring them - self.apimap = {} - - def beginFile(self, genOpts): - OutputGenerator.beginFile(self, genOpts) - # - # Dictionaries are keyed by the name of the entity (e.g. - # self.structs is keyed by structure names). Values are - # the names of related entities (e.g. structs contain - # a list of type names of members, enums contain a list - # of enumerants belong to the enumerated type, etc.), or - # just None if there are no directly related entities. - # - # Collect the mappings, then emit the Python script in endFile - self.basetypes = {} - self.consts = {} - self.enums = {} - self.flags = {} - self.funcpointers = {} - self.protos = {} - self.structs = {} - self.handles = {} - self.defines = {} - self.alias = {} - # Dictionary containing the type of a type name - # (e.g. the string name of the dictionary with its contents). - self.typeCategory = {} - self.mapDict = {} - - def addInterfaceMapping(self, api, feature, required): - """Add a reverse mapping in self.apimap from an API to a feature - requiring that API. - - - api - name of the API - - feature - name of the feature requiring it - - required - None, or an additional feature dependency within - 'feature' """ - - # Each entry in self.apimap contains one or more - # ( feature, required ) tuples. - deps = ( feature, required ) - - if api in self.apimap: - self.apimap[api].append(deps) - else: - self.apimap[api] = [ deps ] - - def mapInterfaceKeys(self, feature, key): - """Construct reverse mapping of APIs to features requiring them in - self.apimap. - - - feature - name of the feature being generated - - key - API category - 'define', 'basetype', etc.""" - - dict = self.featureDictionary[feature][key] - - if dict: - # Not clear why handling of command vs. type APIs is different - - # see interfacedocgenerator.py, which this was based on. - if key == 'command': - for required in dict: - for api in dict[required]: - self.addInterfaceMapping(api, feature, required) - else: - for required in dict: - for parent in dict[required]: - for api in dict[required][parent]: - self.addInterfaceMapping(api, feature, required) - - def mapInterfaces(self, feature): - """Construct reverse mapping of APIs to features requiring them in - self.apimap. - - - feature - name of the feature being generated""" - - # Map each category of interface - self.mapInterfaceKeys(feature, 'basetype') - self.mapInterfaceKeys(feature, 'bitmask') - self.mapInterfaceKeys(feature, 'command') - self.mapInterfaceKeys(feature, 'define') - self.mapInterfaceKeys(feature, 'enum') - self.mapInterfaceKeys(feature, 'enumconstant') - self.mapInterfaceKeys(feature, 'funcpointer') - self.mapInterfaceKeys(feature, 'handle') - self.mapInterfaceKeys(feature, 'include') - self.mapInterfaceKeys(feature, 'struct') - self.mapInterfaceKeys(feature, 'union') - - def endFile(self): - # Print out all the dictionaries as Python strings. - # Could just print(dict) but that's not human-readable - dicts = ( [ self.basetypes, 'basetypes' ], - [ self.consts, 'consts' ], - [ self.enums, 'enums' ], - [ self.flags, 'flags' ], - [ self.funcpointers, 'funcpointers' ], - [ self.protos, 'protos' ], - [ self.structs, 'structs' ], - [ self.handles, 'handles' ], - [ self.defines, 'defines' ], - [ self.typeCategory, 'typeCategory' ], - [ self.alias, 'alias' ] ) - for (entry_dict, name) in dicts: - write(name + ' = {}', file=self.outFile) - for key in sorted(entry_dict.keys()): - write(name + '[' + enquote(key) + '] = ', entry_dict[key], - file=self.outFile) - - # Dictionary containing the relationships of a type - # (e.g. a dictionary with each related type as keys). - write('mapDict = {}', file=self.outFile) - - # Could just print(self.mapDict), but prefer something - # human-readable and stable-ordered - for baseType in sorted(self.mapDict.keys()): - write('mapDict[' + enquote(baseType) + '] = ', file=self.outFile, end='') - pprint.pprint(self.mapDict[baseType], self.outFile) - - # Generate feature <-> interface mappings - for feature in self.features: - self.mapInterfaces(feature) - - # Write out the reverse map from APIs to requiring features - write('requiredBy = {}', file=self.outFile) - - for api in sorted(self.apimap): - # Construct list of requirements as Python list arguments - ##reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) - ##write('requiredBy[{}] = ( {} )'.format(enquote(api), reqs), file=self.outFile) - - # Ideally these would be sorted by dep[0] as well - reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) - write('requiredBy[{}] = {}'.format(enquote(api), pprint.saferepr(self.apimap[api])), file=self.outFile) - - OutputGenerator.endFile(self) - - def beginFeature(self, interface, emit): - # Start processing in superclass - OutputGenerator.beginFeature(self, interface, emit) - - # Add this feature to the list being tracked - self.features.append( self.featureName ) - - def endFeature(self): - # Finish processing in superclass - OutputGenerator.endFeature(self) - - def addName(self, entry_dict, name, value): - """Add a string entry to the dictionary, quoting it so it gets printed - out correctly in self.endFile().""" - entry_dict[name] = enquote(value) - - def addMapping(self, baseType, refType): - """Add a mapping between types to mapDict. - - Only include API types, so we don't end up with a lot of useless uint32_t and void types.""" - if not self.apiName(baseType) or not self.apiName(refType): - self.logMsg('diag', 'PyOutputGenerator::addMapping: IGNORE map from', baseType, '<->', refType) - return - - self.logMsg('diag', 'PyOutputGenerator::addMapping: map from', - baseType, '<->', refType) - - if baseType not in self.mapDict: - baseDict = {} - self.mapDict[baseType] = baseDict - else: - baseDict = self.mapDict[baseType] - if refType not in self.mapDict: - refDict = {} - self.mapDict[refType] = refDict - else: - refDict = self.mapDict[refType] - - baseDict[refType] = None - refDict[baseType] = None - - def genType(self, typeinfo, name, alias): - """Generate type. - - - For 'struct' or 'union' types, defer to genStruct() to - add to the dictionary. - - For 'bitmask' types, add the type name to the 'flags' dictionary, - with the value being the corresponding 'enums' name defining - the acceptable flag bits. - - For 'enum' types, add the type name to the 'enums' dictionary, - with the value being '@STOPHERE@' (because this case seems - never to happen). - - For 'funcpointer' types, add the type name to the 'funcpointers' - dictionary. - - For 'handle' and 'define' types, add the handle or #define name - to the 'struct' dictionary, because that's how the spec sources - tag these types even though they aren't structs.""" - OutputGenerator.genType(self, typeinfo, name, alias) - typeElem = typeinfo.elem - # If the type is a struct type, traverse the embedded tags - # generating a structure. Otherwise, emit the tag text. - category = typeElem.get('category') - - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, category) - - if category in ('struct', 'union'): - self.genStruct(typeinfo, name, alias) - else: - if alias: - # Add name -> alias mapping - self.addName(self.alias, name, alias) - - # Always emit an alias (?!) - count = 1 - - # May want to only emit full type definition when not an alias? - else: - # Extract the type name - # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. - count = len(noneStr(typeElem.text)) - for elem in typeElem: - count += len(noneStr(elem.text)) + len(noneStr(elem.tail)) - - if count > 0: - if category == 'bitmask': - requiredEnum = typeElem.get('requires') - self.addName(self.flags, name, requiredEnum) - - # This happens when the Flags type is defined, but no - # FlagBits are defined yet. - if requiredEnum is not None: - self.addMapping(name, requiredEnum) - elif category == 'enum': - # This case does not seem to come up. It nominally would - # result from - # , - # but the output generator doesn't emit them directly. - self.logMsg('warn', 'PyOutputGenerator::genType: invalid \'enum\' category for name:', name) - elif category == 'funcpointer': - self.funcpointers[name] = None - elif category == 'handle': - self.handles[name] = None - elif category == 'define': - self.defines[name] = None - elif category == 'basetype': - # Don't add an entry for base types that are not API types - # e.g. an API Bool type gets an entry, uint32_t does not - if self.apiName(name): - self.basetypes[name] = None - self.addName(self.typeCategory, name, 'basetype') - else: - self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name, 'category:', category) - else: - self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name) - - def genStruct(self, typeinfo, typeName, alias): - """Generate struct (e.g. C "struct" type). - - Add the struct name to the 'structs' dictionary, with the - value being an ordered list of the struct member names.""" - OutputGenerator.genStruct(self, typeinfo, typeName, alias) - - if alias: - # Add name -> alias mapping - self.addName(self.alias, typeName, alias) - else: - # May want to only emit definition on this branch - True - - members = [member.text for member in typeinfo.elem.findall('.//member/name')] - self.structs[typeName] = members - memberTypes = [member.text for member in typeinfo.elem.findall('.//member/type')] - for member_type in memberTypes: - self.addMapping(typeName, member_type) - - def genGroup(self, groupinfo, groupName, alias): - """Generate group (e.g. C "enum" type). - - These are concatenated together with other types. - - - Add the enum type name to the 'enums' dictionary, with - the value being an ordered list of the enumerant names. - - Add each enumerant name to the 'consts' dictionary, with - the value being the enum type the enumerant is part of.""" - OutputGenerator.genGroup(self, groupinfo, groupName, alias) - groupElem = groupinfo.elem - - if alias: - # Add name -> alias mapping - self.addName(self.alias, groupName, alias) - else: - # May want to only emit definition on this branch - True - - # Loop over the nested 'enum' tags. - enumerants = [elem.get('name') for elem in groupElem.findall('enum')] - for name in enumerants: - self.addName(self.consts, name, groupName) - self.enums[groupName] = enumerants - - def genEnum(self, enuminfo, name, alias): - """Generate enumerant (compile-time constants). - - - Add the constant name to the 'consts' dictionary, with the - value being None to indicate that the constant isn't - an enumeration value.""" - OutputGenerator.genEnum(self, enuminfo, name, alias) - - if name not in self.consts: - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, 'consts') - self.consts[name] = None - # Otherwise, don't add it to the consts dictionary because it's - # already present. This happens due to the generator 'reparentEnums' - # parameter being False, so each extension enum appears in both the - # type and in the or it originally - # came from. - - def genCmd(self, cmdinfo, name, alias): - """Generate command. - - - Add the command name to the 'protos' dictionary, with the - value being an ordered list of the parameter names.""" - OutputGenerator.genCmd(self, cmdinfo, name, alias) - - if alias: - # Add name -> alias mapping - self.addName(self.alias, name, alias) - else: - # May want to only emit definition on this branch - True - - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, 'protos') - - params = [param.text for param in cmdinfo.elem.findall('param/name')] - self.protos[name] = params - paramTypes = [param.text for param in cmdinfo.elem.findall('param/type')] - for param_type in paramTypes: - self.addMapping(name, param_type) diff --git a/xml/realign.py b/xml/realign.py deleted file mode 100644 index b59865b3d..000000000 --- a/xml/realign.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2013-2023 The Khronos Group Inc. -# SPDX-License-Identifier: Apache-2.0 - -# Usage: realign [infile] > outfile -# Used to realign XML tags in the Vulkan registry after it's operated on by -# some other filter, since whitespace inside a tag isn't part of the -# internal representation. - -import copy, sys, string, re - -def realignXML(fp): - patterns = [ - [ '(^ *\ 1): - realignXML(open(sys.argv[1], 'r', encoding='utf-8')) - else: - realignXML(sys.stdin) diff --git a/xml/reflib.py b/xml/reflib.py deleted file mode 100644 index 426a1811b..000000000 --- a/xml/reflib.py +++ /dev/null @@ -1,663 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2016-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -# Utility functions for automatic ref page generation and other script stuff - -import io -import re -import sys -import subprocess - -# global errFile, warnFile, diagFile - -errFile = sys.stderr -warnFile = sys.stdout -diagFile = None -logSourcefile = None -logProcname = None -logLine = None - -def unescapeQuotes(s): - """Remove \' escape sequences in a string (refpage description)""" - return s.replace('\\\'', '\'') - -def write(*args, **kwargs ): - file = kwargs.pop('file',sys.stdout) - end = kwargs.pop('end','\n') - file.write(' '.join(str(arg) for arg in args)) - file.write(end) - -def setLogSourcefile(filename): - """Metadata which may be printed (if not None) for diagnostic messages""" - global logSourcefile - logSourcefile = filename - -def setLogProcname(procname): - global logProcname - logProcname = procname - -def setLogLine(line): - global logLine - logLine = line - -def logHeader(severity): - """Generate prefix for a diagnostic line using metadata and severity""" - global logSourcefile, logProcname, logLine - - msg = severity + ': ' - if logProcname: - msg = msg + ' in ' + logProcname - if logSourcefile: - msg = msg + ' for ' + logSourcefile - if logLine: - msg = msg + ' line ' + str(logLine) - return msg + ' ' - -def setLogFile(setDiag, setWarn, filename): - """Set the file handle to log either or both warnings and diagnostics to. - - - setDiag and setWarn are True if the corresponding handle is to be set. - - filename is None for no logging, '-' for stdout, or a pathname.""" - global diagFile, warnFile - - if filename is None: - return - - if filename == '-': - fp = sys.stdout - else: - fp = open(filename, 'w', encoding='utf-8') - - if setDiag: - diagFile = fp - if setWarn: - warnFile = fp - -def logDiag(*args, **kwargs): - file = kwargs.pop('file', diagFile) - end = kwargs.pop('end','\n') - if file is not None: - file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args)) - file.write(end) - -def logWarn(*args, **kwargs): - file = kwargs.pop('file', warnFile) - end = kwargs.pop('end','\n') - if file is not None: - file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args)) - file.write(end) - -def logErr(*args, **kwargs): - file = kwargs.pop('file', errFile) - end = kwargs.pop('end','\n') - - strfile = io.StringIO() - strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args)) - strfile.write(end) - - if file is not None: - file.write(strfile.getvalue()) - sys.exit(1) - -def isempty(s): - """Return True if s is nothing but white space, False otherwise""" - return len(''.join(s.split())) == 0 - -class pageInfo: - """Information about a ref page relative to the file it's extracted from.""" - def __init__(self): - self.extractPage = True - """True if page should be extracted""" - - self.Warning = None - """string warning if page is suboptimal or can't be generated""" - - self.embed = False - """False or the name of the ref page this include is embedded within""" - - self.type = None - """'structs', 'protos', 'funcpointers', 'flags', 'enums'""" - - self.name = None - """struct/proto/enumerant/etc. name""" - - self.desc = None - """short description of ref page""" - - self.begin = None - """index of first line of the page (heuristic or // refBegin)""" - - self.include = None - """index of include:: line defining the page""" - - self.param = None - """index of first line of parameter/member definitions""" - - self.body = None - """index of first line of body text""" - - self.validity = None - """index of validity include""" - - self.end = None - """index of last line of the page (heuristic validity include, or // refEnd)""" - - self.alias = '' - """aliases of this name, if supplied, or ''""" - - self.refs = '' - """cross-references on // refEnd line, if supplied""" - - self.spec = None - """'spec' attribute in refpage open block, if supplied, or None for the default ('api') type""" - - self.anchor = None - """'anchor' attribute in refpage open block, if supplied, or inferred to be the same as the 'name'""" - -def printPageInfoField(desc, line, file): - """Print a single field of a pageInfo struct, possibly None. - - - desc - string description of field - - line - field value or None - - file - indexed by line""" - if line is not None: - logDiag(desc + ':', line + 1, '\t-> ', file[line], end='') - else: - logDiag(desc + ':', line) - -def printPageInfo(pi, file): - """Print out fields of a pageInfo struct - - - pi - pageInfo - - file - indexed by pageInfo""" - logDiag('TYPE: ', pi.type) - logDiag('NAME: ', pi.name) - logDiag('WARNING:', pi.Warning) - logDiag('EXTRACT:', pi.extractPage) - logDiag('EMBED: ', pi.embed) - logDiag('DESC: ', pi.desc) - printPageInfoField('BEGIN ', pi.begin, file) - printPageInfoField('INCLUDE ', pi.include, file) - printPageInfoField('PARAM ', pi.param, file) - printPageInfoField('BODY ', pi.body, file) - printPageInfoField('VALIDITY', pi.validity, file) - printPageInfoField('END ', pi.end, file) - logDiag('REFS: "' + pi.refs + '"') - -def prevPara(file, line): - """Go back one paragraph from the specified line and return the line number - of the first line of that paragraph. - - Paragraphs are delimited by blank lines. It is assumed that the - current line is the first line of a paragraph. - - - file is an array of strings - - line is the starting point (zero-based)""" - # Skip over current paragraph - while (line >= 0 and not isempty(file[line])): - line = line - 1 - # Skip over white space - while (line >= 0 and isempty(file[line])): - line = line - 1 - # Skip to first line of previous paragraph - while (line >= 1 and not isempty(file[line-1])): - line = line - 1 - return line - -def nextPara(file, line): - """Go forward one paragraph from the specified line and return the line - number of the first line of that paragraph. - - Paragraphs are delimited by blank lines. It is assumed that the - current line is standalone (which is bogus). - - - file is an array of strings - - line is the starting point (zero-based)""" - maxLine = len(file) - 1 - # Skip over current paragraph - while (line != maxLine and not isempty(file[line])): - line = line + 1 - # Skip over white space - while (line != maxLine and isempty(file[line])): - line = line + 1 - return line - -def lookupPage(pageMap, name): - """Return (creating if needed) the pageInfo entry in pageMap for name""" - if name not in pageMap: - pi = pageInfo() - pi.name = name - pageMap[name] = pi - else: - pi = pageMap[name] - return pi - -def loadFile(filename): - """Load a file into a list of strings. Return the list or None on failure""" - try: - fp = open(filename, 'r', encoding='utf-8') - except: - logWarn('Cannot open file', filename, ':', sys.exc_info()[0]) - return None - - file = fp.readlines() - fp.close() - - return file - -def clampToBlock(line, minline, maxline): - """Clamp a line number to be in the range [minline,maxline]. - - If the line number is None, just return it. - If minline is None, don't clamp to that value.""" - if line is None: - return line - if minline and line < minline: - return minline - if line > maxline: - return maxline - - return line - -def fixupRefs(pageMap, specFile, file): - """Fill in missing fields in pageInfo structures, to the extent they can be - inferred. - - - pageMap - dictionary of pageInfo structures - - specFile - filename - - file - list of strings making up the file, indexed by pageInfo""" - # All potential ref pages are now in pageMap. Process them to - # identify actual page start/end/description boundaries, if - # not already determined from the text. - for name in sorted(pageMap.keys()): - pi = pageMap[name] - - # # If nothing is found but an include line with no begin, validity, - # # or end, this is not intended as a ref page (yet). Set the begin - # # line to the include line, so autogeneration can at least - # # pull the include out, but mark it not to be extracted. - # # Examples include the host sync table includes in - # # chapters/fundamentals.txt and the table of Vk*Flag types in - # # appendices/boilerplate.txt. - # if pi.begin is None and pi.validity is None and pi.end is None: - # pi.begin = pi.include - # pi.extractPage = False - # pi.Warning = 'No begin, validity, or end lines identified' - # continue - - # Using open block delimiters, ref pages must *always* have a - # defined begin and end. If either is undefined, that's fatal. - if pi.begin is None: - pi.extractPage = False - pi.Warning = 'Can\'t identify begin of ref page open block' - continue - - if pi.end is None: - pi.extractPage = False - pi.Warning = 'Can\'t identify end of ref page open block' - continue - - # If there's no description of the page, infer one from the type - if pi.desc is None: - if pi.type is not None: - # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)' - pi.Warning = 'No short description available; could infer from the type and name' - else: - pi.extractPage = False - pi.Warning = 'No short description available, cannot infer from the type' - continue - - # Try to determine where the parameter and body sections of the page - # begin. funcpointer, proto, and struct pages infer the location of - # the parameter and body sections. Other pages infer the location of - # the body, but have no parameter sections. - if pi.include is not None: - if pi.type in ['funcpointers', 'protos', 'structs']: - pi.param = nextPara(file, pi.include) - if pi.body is None: - pi.body = nextPara(file, pi.param) - else: - if pi.body is None: - pi.body = nextPara(file, pi.include) - else: - pi.Warning = 'Page does not have an API definition include::' - - # It's possible for the inferred param and body lines to run past - # the end of block, if, for example, there is no parameter section. - pi.param = clampToBlock(pi.param, pi.include, pi.end) - pi.body = clampToBlock(pi.body, pi.param, pi.end) - - # We can get to this point with .include, .param, and .validity - # all being None, indicating those sections weren't found. - - logDiag('fixupRefs: after processing,', pi.name, 'looks like:') - printPageInfo(pi, file) - - # Now that all the valid pages have been found, try to make some - # inferences about invalid pages. - # - # If a reference without a .end is entirely inside a valid reference, - # then it's intentionally embedded - may want to create an indirect - # page that links into the embedding page. This is done by a very - # inefficient double loop, but the loop depth is small. - for name in sorted(pageMap.keys()): - pi = pageMap[name] - - if pi.end is None: - for embedName in sorted(pageMap.keys()): - logDiag('fixupRefs: comparing', pi.name, 'to', embedName) - embed = pageMap[embedName] - # Don't check embeddings which are themselves invalid - if not embed.extractPage: - logDiag('Skipping check for embedding in:', embed.name) - continue - if embed.begin is None or embed.end is None: - logDiag('fixupRefs:', name + ':', - 'can\'t compare to unanchored ref:', embed.name, - 'in', specFile, 'at line', pi.include ) - printPageInfo(pi, file) - printPageInfo(embed, file) - # If an embed is found, change the error to a warning - elif (pi.include is not None and pi.include >= embed.begin and - pi.include <= embed.end): - logDiag('fixupRefs: Found embed for:', name, - 'inside:', embedName, - 'in', specFile, 'at line', pi.include ) - pi.embed = embed.name - pi.Warning = 'Embedded in definition for ' + embed.name - break - else: - logDiag('fixupRefs: No embed match for:', name, - 'inside:', embedName, 'in', specFile, - 'at line', pi.include) - - -# Patterns used to recognize interesting lines in an asciidoc source file. -# These patterns are only compiled once. -INCSVAR_DEF = re.compile(r':INCS-VAR: (?P.*)') -endifPat = re.compile(r'^endif::(?P[\w_+,]+)\[\]') -beginPat = re.compile(r'^\[open,(?Prefpage=.*)\]') -# attribute key/value pairs of an open block -attribStr = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'" -attribPat = re.compile(attribStr) -bodyPat = re.compile(r'^// *refBody') -errorPat = re.compile(r'^// *refError') - -# This regex transplanted from check_spec_links -# It looks for either OpenXR or Vulkan generated file conventions, and for -# the api/validity include (generated_type), protos/struct/etc path -# (category), and API name (entity_name). It could be put into the API -# conventions object. -INCLUDE = re.compile( - r'include::(?P((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P[\w]+)/(?P\w+)/(?P[^./]+).txt[\[][\]]') - - -def findRefs(file, filename): - """Identify reference pages in a list of strings, returning a dictionary of - pageInfo entries for each one found, or None on failure.""" - setLogSourcefile(filename) - setLogProcname('findRefs') - - # To reliably detect the open blocks around reference pages, we must - # first detect the '[open,refpage=...]' markup delimiting the block; - # skip past the '--' block delimiter on the next line; and identify the - # '--' block delimiter closing the page. - # This can't be done solely with pattern matching, and requires state to - # track 'inside/outside block'. - # When looking for open blocks, possible states are: - # 'outside' - outside a block - # 'start' - have found the '[open...]' line - # 'inside' - have found the following '--' line - openBlockState = 'outside' - - # Dictionary of interesting line numbers and strings related to an API - # name - pageMap = {} - - numLines = len(file) - line = 0 - - # Track the pageInfo object corresponding to the current open block - pi = None - incsvar = None - - while (line < numLines): - setLogLine(line) - - # Look for a file-wide definition - matches = INCSVAR_DEF.match(file[line]) - if matches: - incsvar = matches.group('value') - logDiag('Matched INCS-VAR definition:', incsvar) - - line = line + 1 - continue - - # Perform INCS-VAR substitution immediately. - if incsvar and '{INCS-VAR}' in file[line]: - newLine = file[line].replace('{INCS-VAR}', incsvar) - logDiag('PERFORMING SUBSTITUTION', file[line], '->', newLine) - file[line] = newLine - - # Only one of the patterns can possibly match. Add it to - # the dictionary for that name. - - # [open,refpage=...] starting a refpage block - matches = beginPat.search(file[line]) - if matches is not None: - logDiag('Matched open block pattern') - attribs = matches.group('attribs') - - # If the previous open block wasn't closed, raise an error - if openBlockState != 'outside': - logErr('Nested open block starting at line', line, 'of', - filename) - - openBlockState = 'start' - - # Parse the block attributes - matches = attribPat.findall(attribs) - - # Extract each attribute - name = None - desc = None - refpage_type = None - spec_type = None - anchor = None - alias = None - xrefs = None - - for (key,value) in matches: - logDiag('got attribute', key, '=', value) - if key == 'refpage': - name = value - elif key == 'desc': - desc = unescapeQuotes(value) - elif key == 'type': - refpage_type = value - elif key == 'spec': - spec_type = value - elif key == 'anchor': - anchor = value - elif key == 'alias': - alias = value - elif key == 'xrefs': - xrefs = value - else: - logWarn('unknown open block attribute:', key) - - if name is None or desc is None or refpage_type is None: - logWarn('missing one or more required open block attributes:' - 'refpage, desc, or type') - # Leave pi is None so open block delimiters are ignored - else: - pi = lookupPage(pageMap, name) - pi.desc = desc - # Must match later type definitions in interface/validity includes - pi.type = refpage_type - pi.spec = spec_type - pi.anchor = anchor - if alias: - pi.alias = alias - if xrefs: - pi.refs = xrefs - logDiag('open block for', name, 'added DESC =', desc, - 'TYPE =', refpage_type, 'ALIAS =', alias, - 'XREFS =', xrefs, 'SPEC =', spec_type, - 'ANCHOR =', anchor) - - line = line + 1 - continue - - # '--' starting or ending and open block - if file[line].rstrip() == '--': - if openBlockState == 'outside': - # Only refpage open blocks should use -- delimiters - logWarn('Unexpected double-dash block delimiters') - elif openBlockState == 'start': - # -- delimiter following [open,refpage=...] - openBlockState = 'inside' - - if pi is None: - logWarn('no pageInfo available for opening -- delimiter') - else: - pi.begin = line + 1 - logDiag('opening -- delimiter: added BEGIN =', pi.begin) - elif openBlockState == 'inside': - # -- delimiter ending an open block - if pi is None: - logWarn('no pageInfo available for closing -- delimiter') - else: - pi.end = line - 1 - logDiag('closing -- delimiter: added END =', pi.end) - - openBlockState = 'outside' - pi = None - else: - logWarn('unknown openBlockState:', openBlockState) - - line = line + 1 - continue - - matches = INCLUDE.search(file[line]) - if matches is not None: - # Something got included, not sure what yet. - gen_type = matches.group('generated_type') - refpage_type = matches.group('category') - name = matches.group('entity_name') - - # This will never match in OpenCL - if gen_type == 'validity': - logDiag('Matched validity pattern') - if pi is not None: - if pi.type and refpage_type != pi.type: - logWarn('ERROR: pageMap[' + name + '] type:', - pi.type, 'does not match type:', refpage_type) - pi.type = refpage_type - pi.validity = line - logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity) - else: - logWarn('validity include:: line NOT inside block') - - line = line + 1 - continue - - if gen_type == 'api': - logDiag('Matched include pattern') - if pi is not None: - if pi.include is not None: - logDiag('found multiple includes for this block') - if pi.type and refpage_type != pi.type: - logWarn('ERROR: pageMap[' + name + '] type:', - pi.type, 'does not match type:', refpage_type) - pi.type = refpage_type - pi.include = line - logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include) - else: - logWarn('interface include:: line NOT inside block') - - line = line + 1 - continue - - logDiag('ignoring unrecognized include line ', matches.group()) - - # Vulkan 1.1 markup allows the last API include construct to be - # followed by an asciidoctor endif:: construct (and also preceded, - # at some distance). - # This looks for endif:: immediately following an include:: line - # and, if found, moves the include boundary to this line. - matches = endifPat.search(file[line]) - if matches is not None and pi is not None: - if pi.include == line - 1: - logDiag('Matched endif pattern following include; moving include') - pi.include = line - else: - logDiag('Matched endif pattern (not following include)') - - line = line + 1 - continue - - matches = bodyPat.search(file[line]) - if matches is not None: - logDiag('Matched // refBody pattern') - if pi is not None: - pi.body = line - logDiag('added BODY =', pi.body) - else: - logWarn('// refBody line NOT inside block') - - line = line + 1 - continue - - # OpenCL spec uses // refError to tag "validity" (Errors) language, - # instead of /validity/ includes. - matches = errorPat.search(file[line]) - if matches is not None: - logDiag('Matched // refError pattern') - if pi is not None: - pi.validity = line - logDiag('added VALIDITY (refError) =', pi.validity) - else: - logWarn('// refError line NOT inside block') - - line = line + 1 - continue - - line = line + 1 - continue - - if pi is not None: - logErr('Unclosed open block at EOF!') - - setLogSourcefile(None) - setLogProcname(None) - setLogLine(None) - - return pageMap - - -def getBranch(): - """Determine current git branch - - Returns (branch name, ''), or (None, stderr output) if the branch name - can't be determined""" - - command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ] - results = subprocess.run(command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - - # git command failed - if len(results.stderr) > 0: - return (None, results.stderr) - - # Remove newline from output and convert to a string - branch = results.stdout.rstrip().decode() - if len(branch) > 0: - # Strip trailing newline - branch = results.stdout.decode()[0:-1] - - return (branch, '') diff --git a/xml/reg.py b/xml/reg.py deleted file mode 100644 index d78ecde89..000000000 --- a/xml/reg.py +++ /dev/null @@ -1,1397 +0,0 @@ -#!/usr/bin/python3 -i -# -# Copyright 2013-2023 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Types and classes for manipulating an API registry.""" - -import copy -import re -import sys -import xml.etree.ElementTree as etree -from collections import defaultdict, namedtuple -from generator import OutputGenerator, GeneratorOptions, write -import pdb - -def apiNameMatch(str, supported): - """Return whether a required api name matches a pattern specified for an - XML 'api' attribute or 'supported' attribute. - - - str - api name such as 'vulkan' or 'openxr' - - supported - comma-separated list of XML API names""" - - return (str is not None and str in supported.split(',')) - - -def matchAPIProfile(api, profile, elem): - """Return whether an API and profile - being generated matches an element's profile - - - api - string naming the API to match - - profile - string naming the profile to match - - elem - Element which (may) have 'api' and 'profile' - attributes to match to. - - If a tag is not present in the Element, the corresponding API - or profile always matches. - - Otherwise, the tag must exactly match the API or profile. - - Thus, if 'profile' = core: - - - `` with no attribute will match - - `` will match - - `` will not match - - Possible match conditions: - - ``` - Requested Element - Profile Profile - --------- -------- - None None Always matches - 'string' None Always matches - None 'string' Does not match. Can't generate multiple APIs - or profiles, so if an API/profile constraint - is present, it must be asked for explicitly. - 'string' 'string' Strings must match - ``` - - ** In the future, we will allow regexes for the attributes, - not just strings, so that `api="^(gl|gles2)"` will match. Even - this isn't really quite enough, we might prefer something - like `"gl(core)|gles1(common-lite)"`.""" - # Match 'api', if present - elem_api = elem.get('api') - if elem_api: - if api is None: - raise UserWarning("No API requested, but 'api' attribute is present with value '" - + elem_api + "'") - elif api != elem_api: - # Requested API doesn't match attribute - return False - elem_profile = elem.get('profile') - if elem_profile: - if profile is None: - raise UserWarning("No profile requested, but 'profile' attribute is present with value '" - + elem_profile + "'") - elif profile != elem_profile: - # Requested profile doesn't match attribute - return False - return True - - -class BaseInfo: - """Base class for information about a registry feature - (type/group/enum/command/API/extension). - - Represents the state of a registry feature, used during API generation. - """ - - def __init__(self, elem): - self.required = False - """should this feature be defined during header generation - (has it been removed by a profile or version)?""" - - self.declared = False - "has this feature been defined already?" - - self.elem = elem - "etree Element for this feature" - - def resetState(self): - """Reset required/declared to initial values. Used - prior to generating a new API interface.""" - self.required = False - self.declared = False - - def compareKeys(self, info, key, required = False): - """Return True if self.elem and info.elem have the same attribute - value for key. - If 'required' is not True, also returns True if neither element - has an attribute value for key.""" - - if required and key not in self.elem.keys(): - return False - return self.elem.get(key) == info.elem.get(key) - - def compareElem(self, info, infoName): - """Return True if self.elem and info.elem have the same definition. - info - the other object - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / - 'extension'""" - - if infoName == 'enum': - if self.compareKeys(info, 'extends'): - # Either both extend the same type, or no type - if (self.compareKeys(info, 'value', required = True) or - self.compareKeys(info, 'bitpos', required = True)): - # If both specify the same value or bit position, - # they're equal - return True - elif (self.compareKeys(info, 'extnumber') and - self.compareKeys(info, 'offset') and - self.compareKeys(info, 'dir')): - # If both specify the same relative offset, they're equal - return True - elif (self.compareKeys(info, 'alias')): - # If both are aliases of the same value - return True - else: - return False - else: - # The same enum can't extend two different types - return False - else: - # Non-s should never be redefined - return False - - -class TypeInfo(BaseInfo): - """Registry information about a type. No additional state - beyond BaseInfo is required.""" - - def __init__(self, elem): - BaseInfo.__init__(self, elem) - self.additionalValidity = [] - self.removedValidity = [] - - def getMembers(self): - """Get a collection of all member elements for this type, if any.""" - return self.elem.findall('member') - - def resetState(self): - BaseInfo.resetState(self) - self.additionalValidity = [] - self.removedValidity = [] - - -class GroupInfo(BaseInfo): - """Registry information about a group of related enums - in an block, generally corresponding to a C "enum" type.""" - - def __init__(self, elem): - BaseInfo.__init__(self, elem) - - -class EnumInfo(BaseInfo): - """Registry information about an enum""" - - def __init__(self, elem): - BaseInfo.__init__(self, elem) - self.type = elem.get('type') - """numeric type of the value of the tag - ( '' for GLint, 'u' for GLuint, 'ull' for GLuint64 )""" - if self.type is None: - self.type = '' - - -class CmdInfo(BaseInfo): - """Registry information about a command""" - - def __init__(self, elem): - BaseInfo.__init__(self, elem) - self.additionalValidity = [] - self.removedValidity = [] - - def getParams(self): - """Get a collection of all param elements for this command, if any.""" - return self.elem.findall('param') - - def resetState(self): - BaseInfo.resetState(self) - self.additionalValidity = [] - self.removedValidity = [] - - -class FeatureInfo(BaseInfo): - """Registry information about an API - or .""" - - def __init__(self, elem): - BaseInfo.__init__(self, elem) - self.name = elem.get('name') - "feature name string (e.g. 'VK_KHR_surface')" - - self.emit = False - "has this feature been defined already?" - - self.sortorder = int(elem.get('sortorder', 0)) - """explicit numeric sort key within feature and extension groups. - Defaults to 0.""" - - # Determine element category (vendor). Only works - # for elements. - if elem.tag == 'feature': - # Element category (vendor) is meaningless for - self.category = 'VERSION' - """category, e.g. VERSION or khr/vendor tag""" - - self.version = elem.get('name') - """feature name string""" - - self.versionNumber = elem.get('number') - """versionNumber - API version number, taken from the 'number' - attribute of . Extensions do not have API version - numbers and are assigned number 0.""" - - self.number = "0" - self.supported = None - else: - # Extract vendor portion of __ - self.category = self.name.split('_', 2)[1] - self.version = "0" - self.versionNumber = "0" - self.number = elem.get('number') - """extension number, used for ordering and for assigning - enumerant offsets. features do not have extension - numbers and are assigned number 0.""" - - # If there's no 'number' attribute, use 0, so sorting works - if self.number is None: - self.number = 0 - self.supported = elem.get('supported') - -class SpirvInfo(BaseInfo): - """Registry information about an API - or .""" - - def __init__(self, elem): - BaseInfo.__init__(self, elem) - -class Registry: - """Object representing an API registry, loaded from an XML file.""" - - def __init__(self, gen=None, genOpts=None): - if gen is None: - # If not specified, give a default object so messaging will work - self.gen = OutputGenerator() - else: - self.gen = gen - "Output generator used to write headers / messages" - - if genOpts is None: - self.genOpts = GeneratorOptions() - else: - self.genOpts = genOpts - "Options controlling features to write and how to format them" - - self.gen.registry = self - self.gen.genOpts = self.genOpts - self.gen.genOpts.registry = self - - self.tree = None - "ElementTree containing the root ``" - - self.typedict = {} - "dictionary of TypeInfo objects keyed by type name" - - self.groupdict = {} - "dictionary of GroupInfo objects keyed by group name" - - self.enumdict = {} - "dictionary of EnumInfo objects keyed by enum name" - - self.cmddict = {} - "dictionary of CmdInfo objects keyed by command name" - - self.apidict = {} - "dictionary of FeatureInfo objects for `` elements keyed by API name" - - self.extensions = [] - "list of `` Elements" - - self.extdict = {} - "dictionary of FeatureInfo objects for `` elements keyed by extension name" - - self.spirvextdict = {} - "dictionary of FeatureInfo objects for `` elements keyed by spirv extension name" - - self.spirvcapdict = {} - "dictionary of FeatureInfo objects for `` elements keyed by spirv capability name" - - self.emitFeatures = False - """True to actually emit features for a version / extension, - or False to just treat them as emitted""" - - self.breakPat = None - "regexp pattern to break on when generating names" - # self.breakPat = re.compile('VkFenceImportFlagBits.*') - - self.requiredextensions = [] # Hack - can remove it after validity generator goes away - - # ** Global types for automatic source generation ** - # Length Member data - self.commandextensiontuple = namedtuple('commandextensiontuple', - ['command', # The name of the command being modified - 'value', # The value to append to the command - 'extension']) # The name of the extension that added it - self.validextensionstructs = defaultdict(list) - self.commandextensionsuccesses = [] - self.commandextensionerrors = [] - - self.filename = None - - def loadElementTree(self, tree): - """Load ElementTree into a Registry object and parse it.""" - self.tree = tree - self.parseTree() - - def loadFile(self, file): - """Load an API registry XML file into a Registry object and parse it""" - self.filename = file - self.tree = etree.parse(file) - self.parseTree() - - def setGenerator(self, gen): - """Specify output generator object. - - `None` restores the default generator.""" - self.gen = gen - self.gen.setRegistry(self) - - def addElementInfo(self, elem, info, infoName, dictionary): - """Add information about an element to the corresponding dictionary. - - Intended for internal use only. - - - elem - ``/``/``/``/``/``/``/`` Element - - info - corresponding {Type|Group|Enum|Cmd|Feature|Spirv}Info object - - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / 'extension' / 'spirvextension' / 'spirvcapability' - - dictionary - self.{type|group|enum|cmd|api|ext|spirvext|spirvcap}dict - - If the Element has an 'api' attribute, the dictionary key is the - tuple (name,api). If not, the key is the name. 'name' is an - attribute of the Element""" - # self.gen.logMsg('diag', 'Adding ElementInfo.required =', - # info.required, 'name =', elem.get('name')) - api = elem.get('api') - if api: - key = (elem.get('name'), api) - else: - key = elem.get('name') - if key in dictionary: - if not dictionary[key].compareElem(info, infoName): - self.gen.logMsg('warn', 'Attempt to redefine', key, - '(this should not happen)') - else: - True - else: - dictionary[key] = info - - def lookupElementInfo(self, fname, dictionary): - """Find a {Type|Enum|Cmd}Info object by name. - - Intended for internal use only. - - If an object qualified by API name exists, use that. - - - fname - name of type / enum / command - - dictionary - self.{type|enum|cmd}dict""" - key = (fname, self.genOpts.apiname) - if key in dictionary: - # self.gen.logMsg('diag', 'Found API-specific element for feature', fname) - return dictionary[key] - if fname in dictionary: - # self.gen.logMsg('diag', 'Found generic element for feature', fname) - return dictionary[fname] - - return None - - def breakOnName(self, regexp): - """Specify a feature name regexp to break on when generating features.""" - self.breakPat = re.compile(regexp) - - def parseTree(self): - """Parse the registry Element, once created""" - # This must be the Element for the root - self.reg = self.tree.getroot() - - # Create dictionary of registry types from toplevel tags - # and add 'name' attribute to each tag (where missing) - # based on its element. - # - # There's usually one block; more are OK - # Required attributes: 'name' or nested tag contents - self.typedict = {} - for type_elem in self.reg.findall('types/type'): - # If the doesn't already have a 'name' attribute, set - # it from contents of its tag. - if type_elem.get('name') is None: - type_elem.set('name', type_elem.find('name').text) - self.addElementInfo(type_elem, TypeInfo(type_elem), 'type', self.typedict) - - # Create dictionary of registry enum groups from tags. - # - # Required attributes: 'name'. If no name is given, one is - # generated, but that group can't be identified and turned into an - # enum type definition - it's just a container for tags. - self.groupdict = {} - for group in self.reg.findall('enums'): - self.addElementInfo(group, GroupInfo(group), 'group', self.groupdict) - - # Create dictionary of registry enums from tags - # - # tags usually define different namespaces for the values - # defined in those tags, but the actual names all share the - # same dictionary. - # Required attributes: 'name', 'value' - # For containing which have type="enum" or type="bitmask", - # tag all contained s are required. This is a stopgap until - # a better scheme for tagging core and extension enums is created. - self.enumdict = {} - for enums in self.reg.findall('enums'): - required = (enums.get('type') is not None) - for enum in enums.findall('enum'): - enumInfo = EnumInfo(enum) - enumInfo.required = required - self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) - - # Create dictionary of registry commands from tags - # and add 'name' attribute to each tag (where missing) - # based on its element. - # - # There's usually only one block; more are OK. - # Required attributes: 'name' or tag contents - self.cmddict = {} - # List of commands which alias others. Contains - # [ aliasName, element ] - # for each alias - cmdAlias = [] - for cmd in self.reg.findall('commands/command'): - # If the doesn't already have a 'name' attribute, set - # it from contents of its tag. - name = cmd.get('name') - if name is None: - name = cmd.set('name', cmd.find('proto/name').text) - ci = CmdInfo(cmd) - self.addElementInfo(cmd, ci, 'command', self.cmddict) - alias = cmd.get('alias') - if alias: - cmdAlias.append([name, alias, cmd]) - - # Now loop over aliases, injecting a copy of the aliased command's - # Element with the aliased prototype name replaced with the command - # name - if it exists. - for (name, alias, cmd) in cmdAlias: - if alias in self.cmddict: - aliasInfo = self.cmddict[alias] - cmdElem = copy.deepcopy(aliasInfo.elem) - cmdElem.find('proto/name').text = name - cmdElem.set('name', name) - cmdElem.set('alias', alias) - ci = CmdInfo(cmdElem) - # Replace the dictionary entry for the CmdInfo element - self.cmddict[name] = ci - - # @ newString = etree.tostring(base, encoding="unicode").replace(aliasValue, aliasName) - # @elem.append(etree.fromstring(replacement)) - else: - self.gen.logMsg('warn', 'No matching found for command', - cmd.get('name'), 'alias', alias) - - # Create dictionaries of API and extension interfaces - # from toplevel and tags. - self.apidict = {} - for feature in self.reg.findall('feature'): - featureInfo = FeatureInfo(feature) - self.addElementInfo(feature, featureInfo, 'feature', self.apidict) - - # Add additional enums defined only in tags - # to the corresponding enumerated type. - # When seen here, the element, processed to contain the - # numeric enum value, is added to the corresponding - # element, as well as adding to the enum dictionary. It is no - # longer removed from the element it is introduced in. - # Instead, generateRequiredInterface ignores elements - # that extend enumerated types. - # - # For tags which are actually just constants, if there's - # no 'extends' tag but there is a 'value' or 'bitpos' tag, just - # add an EnumInfo record to the dictionary. That works because - # output generation of constants is purely dependency-based, and - # doesn't need to iterate through the XML tags. - for elem in feature.findall('require'): - for enum in elem.findall('enum'): - addEnumInfo = False - groupName = enum.get('extends') - if groupName is not None: - # self.gen.logMsg('diag', 'Found extension enum', - # enum.get('name')) - # Add version number attribute to the element - enum.set('version', featureInfo.version) - # Look up the GroupInfo with matching groupName - if groupName in self.groupdict: - # self.gen.logMsg('diag', 'Matching group', - # groupName, 'found, adding element...') - gi = self.groupdict[groupName] - gi.elem.append(copy.deepcopy(enum)) - else: - self.gen.logMsg('warn', 'NO matching group', - groupName, 'for enum', enum.get('name'), 'found.') - addEnumInfo = True - elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): - # self.gen.logMsg('diag', 'Adding extension constant "enum"', - # enum.get('name')) - addEnumInfo = True - if addEnumInfo: - enumInfo = EnumInfo(enum) - self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) - - self.extensions = self.reg.findall('extensions/extension') - self.extdict = {} - for feature in self.extensions: - featureInfo = FeatureInfo(feature) - self.addElementInfo(feature, featureInfo, 'extension', self.extdict) - - # Add additional enums defined only in tags - # to the corresponding core type. - # Algorithm matches that of enums in a "feature" tag as above. - # - # This code also adds a 'extnumber' attribute containing the - # extension number, used for enumerant value calculation. - for elem in feature.findall('require'): - for enum in elem.findall('enum'): - addEnumInfo = False - groupName = enum.get('extends') - if groupName is not None: - # self.gen.logMsg('diag', 'Found extension enum', - # enum.get('name')) - - # Add block's extension number attribute to - # the element unless specified explicitly, such - # as when redefining an enum in another extension. - extnumber = enum.get('extnumber') - if not extnumber: - enum.set('extnumber', featureInfo.number) - - enum.set('extname', featureInfo.name) - enum.set('supported', featureInfo.supported) - # Look up the GroupInfo with matching groupName - if groupName in self.groupdict: - # self.gen.logMsg('diag', 'Matching group', - # groupName, 'found, adding element...') - gi = self.groupdict[groupName] - gi.elem.append(copy.deepcopy(enum)) - else: - self.gen.logMsg('warn', 'NO matching group', - groupName, 'for enum', enum.get('name'), 'found.') - addEnumInfo = True - elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): - # self.gen.logMsg('diag', 'Adding extension constant "enum"', - # enum.get('name')) - addEnumInfo = True - if addEnumInfo: - enumInfo = EnumInfo(enum) - self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) - - # Construct a "validextensionstructs" list for parent structures - # based on "structextends" tags in child structures - disabled_types = [] - for disabled_ext in self.reg.findall('extensions/extension[@supported="disabled"]'): - for type_elem in disabled_ext.findall("*/type"): - disabled_types.append(type_elem.get('name')) - for type_elem in self.reg.findall('types/type'): - if type_elem.get('name') not in disabled_types: - parentStructs = type_elem.get('structextends') - if parentStructs is not None: - for parent in parentStructs.split(','): - # self.gen.logMsg('diag', type.get('name'), 'extends', parent) - self.validextensionstructs[parent].append(type_elem.get('name')) - # Sort the lists so they don't depend on the XML order - for parent in self.validextensionstructs: - self.validextensionstructs[parent].sort() - - # Parse out all spirv tags in dictionaries - # Use addElementInfo to catch duplicates - for spirv in self.reg.findall('spirvextensions/spirvextension'): - spirvInfo = SpirvInfo(spirv) - self.addElementInfo(spirv, spirvInfo, 'spirvextension', self.spirvextdict) - for spirv in self.reg.findall('spirvcapabilities/spirvcapability'): - spirvInfo = SpirvInfo(spirv) - self.addElementInfo(spirv, spirvInfo, 'spirvcapability', self.spirvcapdict) - - def dumpReg(self, maxlen=120, filehandle=sys.stdout): - """Dump all the dictionaries constructed from the Registry object. - - Diagnostic to dump the dictionaries to specified file handle (default stdout). - Truncates type / enum / command elements to maxlen characters (default 120)""" - write('***************************************', file=filehandle) - write(' ** Dumping Registry contents **', file=filehandle) - write('***************************************', file=filehandle) - write('// Types', file=filehandle) - for name in self.typedict: - tobj = self.typedict[name] - write(' Type', name, '->', etree.tostring(tobj.elem)[0:maxlen], file=filehandle) - write('// Groups', file=filehandle) - for name in self.groupdict: - gobj = self.groupdict[name] - write(' Group', name, '->', etree.tostring(gobj.elem)[0:maxlen], file=filehandle) - write('// Enums', file=filehandle) - for name in self.enumdict: - eobj = self.enumdict[name] - write(' Enum', name, '->', etree.tostring(eobj.elem)[0:maxlen], file=filehandle) - write('// Commands', file=filehandle) - for name in self.cmddict: - cobj = self.cmddict[name] - write(' Command', name, '->', etree.tostring(cobj.elem)[0:maxlen], file=filehandle) - write('// APIs', file=filehandle) - for key in self.apidict: - write(' API Version ', key, '->', - etree.tostring(self.apidict[key].elem)[0:maxlen], file=filehandle) - write('// Extensions', file=filehandle) - for key in self.extdict: - write(' Extension', key, '->', - etree.tostring(self.extdict[key].elem)[0:maxlen], file=filehandle) - write('// SPIR-V', file=filehandle) - for key in self.spirvextdict: - write(' SPIR-V Extension', key, '->', - etree.tostring(self.spirvextdict[key].elem)[0:maxlen], file=filehandle) - for key in self.spirvcapdict: - write(' SPIR-V Capability', key, '->', - etree.tostring(self.spirvcapdict[key].elem)[0:maxlen], file=filehandle) - - def markTypeRequired(self, typename, required): - """Require (along with its dependencies) or remove (but not its dependencies) a type. - - - typename - name of type - - required - boolean (to tag features as required or not) - """ - self.gen.logMsg('diag', 'tagging type:', typename, '-> required =', required) - # Get TypeInfo object for tag corresponding to typename - typeinfo = self.lookupElementInfo(typename, self.typedict) - if typeinfo is not None: - if required: - # Tag type dependencies in 'alias' and 'required' attributes as - # required. This does not un-tag dependencies in a - # tag. See comments in markRequired() below for the reason. - for attrib_name in ['requires', 'alias']: - depname = typeinfo.elem.get(attrib_name) - if depname: - self.gen.logMsg('diag', 'Generating dependent type', - depname, 'for', attrib_name, 'type', typename) - # Don't recurse on self-referential structures. - if typename != depname: - self.markTypeRequired(depname, required) - else: - self.gen.logMsg('diag', 'type', typename, 'is self-referential') - # Tag types used in defining this type (e.g. in nested - # tags) - # Look for in entire tree, - # not just immediate children - for subtype in typeinfo.elem.findall('.//type'): - self.gen.logMsg('diag', 'markRequired: type requires dependent ', subtype.text) - if typename != subtype.text: - self.markTypeRequired(subtype.text, required) - else: - self.gen.logMsg('diag', 'type', typename, 'is self-referential') - # Tag enums used in defining this type, for example in - # member[MEMBER_SIZE] - for subenum in typeinfo.elem.findall('.//enum'): - self.gen.logMsg('diag', 'markRequired: type requires dependent ', subenum.text) - self.markEnumRequired(subenum.text, required) - # Tag type dependency in 'bitvalues' attributes as - # required. This ensures that the bit values for a flag - # are emitted - depType = typeinfo.elem.get('bitvalues') - if depType: - self.gen.logMsg('diag', 'Generating bitflag type', - depType, 'for type', typename) - self.markTypeRequired(depType, required) - group = self.lookupElementInfo(depType, self.groupdict) - if group is not None: - group.flagType = typeinfo - - typeinfo.required = required - elif '.h' not in typename: - self.gen.logMsg('warn', 'type:', typename, 'IS NOT DEFINED') - - def markEnumRequired(self, enumname, required): - """Mark an enum as required or not. - - - enumname - name of enum - - required - boolean (to tag features as required or not)""" - - self.gen.logMsg('diag', 'tagging enum:', enumname, '-> required =', required) - enum = self.lookupElementInfo(enumname, self.enumdict) - if enum is not None: - # If the enum is part of a group, and is being removed, then - # look it up in that tag and remove it there, so that it - # isn't visible to generators (which traverse the tag - # elements themselves). - # This isn't the most robust way of doing this, since a removed - # enum that's later required again will no longer have a group - # element, but it makes the change non-intrusive on generator - # code. - if required is False: - groupName = enum.elem.get('extends') - if groupName is not None: - # Look up the Info with matching groupName - if groupName in self.groupdict: - gi = self.groupdict[groupName] - gienum = gi.elem.find("enum[@name='" + enumname + "']") - if gienum is not None: - # Remove copy of this enum from the group - gi.elem.remove(gienum) - else: - self.gen.logMsg('warn', 'Cannot remove enum', - enumname, 'not found in group', - groupName) - else: - self.gen.logMsg('warn', 'Cannot remove enum', - enumname, 'from nonexistent group', - groupName) - - enum.required = required - # Tag enum dependencies in 'alias' attribute as required - depname = enum.elem.get('alias') - if depname: - self.gen.logMsg('diag', 'Generating dependent enum', - depname, 'for alias', enumname, 'required =', enum.required) - self.markEnumRequired(depname, required) - else: - self.gen.logMsg('warn', 'enum:', enumname, 'IS NOT DEFINED') - - def markCmdRequired(self, cmdname, required): - """Mark a command as required or not. - - - cmdname - name of command - - required - boolean (to tag features as required or not)""" - self.gen.logMsg('diag', 'tagging command:', cmdname, '-> required =', required) - cmd = self.lookupElementInfo(cmdname, self.cmddict) - if cmd is not None: - cmd.required = required - # Tag command dependencies in 'alias' attribute as required - depname = cmd.elem.get('alias') - if depname: - self.gen.logMsg('diag', 'Generating dependent command', - depname, 'for alias', cmdname) - self.markCmdRequired(depname, required) - # Tag all parameter types of this command as required. - # This DOES NOT remove types of commands in a - # tag, because many other commands may use the same type. - # We could be more clever and reference count types, - # instead of using a boolean. - if required: - # Look for in entire tree, - # not just immediate children - for type_elem in cmd.elem.findall('.//type'): - self.gen.logMsg('diag', 'markRequired: command implicitly requires dependent type', type_elem.text) - self.markTypeRequired(type_elem.text, required) - else: - self.gen.logMsg('warn', 'command:', cmdname, 'IS NOT DEFINED') - - def markRequired(self, featurename, feature, required): - """Require or remove features specified in the Element. - - - featurename - name of the feature - - feature - Element for `` or `` tag - - required - boolean (to tag features as required or not)""" - self.gen.logMsg('diag', 'markRequired (feature = , required =', required, ')') - - # Loop over types, enums, and commands in the tag - # @@ It would be possible to respect 'api' and 'profile' attributes - # in individual features, but that's not done yet. - for typeElem in feature.findall('type'): - self.markTypeRequired(typeElem.get('name'), required) - for enumElem in feature.findall('enum'): - self.markEnumRequired(enumElem.get('name'), required) - for cmdElem in feature.findall('command'): - self.markCmdRequired(cmdElem.get('name'), required) - - # Extensions may need to extend existing commands or other items in the future. - # So, look for extend tags. - for extendElem in feature.findall('extend'): - extendType = extendElem.get('type') - if extendType == 'command': - commandName = extendElem.get('name') - successExtends = extendElem.get('successcodes') - if successExtends is not None: - for success in successExtends.split(','): - self.commandextensionsuccesses.append(self.commandextensiontuple(command=commandName, - value=success, - extension=featurename)) - errorExtends = extendElem.get('errorcodes') - if errorExtends is not None: - for error in errorExtends.split(','): - self.commandextensionerrors.append(self.commandextensiontuple(command=commandName, - value=error, - extension=featurename)) - else: - self.gen.logMsg('warn', 'extend type:', extendType, 'IS NOT SUPPORTED') - - def getAlias(self, elem, dict): - """Check for an alias in the same require block. - - - elem - Element to check for an alias""" - - # Try to find an alias - alias = elem.get('alias') - if alias is None: - name = elem.get('name') - typeinfo = self.lookupElementInfo(name, dict) - alias = typeinfo.elem.get('alias') - - return alias - - def checkForCorrectionAliases(self, alias, require, tag): - """Check for an alias in the same require block. - - - alias - String name of the alias - - require - `` block from the registry - - tag - tag to look for in the require block""" - - if alias and require.findall(tag + "[@name='" + alias + "']"): - return True - - return False - - def fillFeatureDictionary(self, interface, featurename, api, profile): - """Capture added interfaces for a `` or ``. - - - interface - Element for `` or ``, containing - `` and `` tags - - featurename - name of the feature - - api - string specifying API name being generated - - profile - string specifying API profile being generated""" - - # Explicitly initialize known types - errors for unhandled categories - self.gen.featureDictionary[featurename] = { - "enumconstant": {}, - "command": {}, - "enum": {}, - "struct": {}, - "handle": {}, - "basetype": {}, - "include": {}, - "define": {}, - "bitmask": {}, - "union": {}, - "funcpointer": {}, - } - - # marks things that are required by this version/profile - for require in interface.findall('require'): - if matchAPIProfile(api, profile, require): - - # Determine the required extension or version needed for a require block - # Assumes that only one of these is specified - required_key = require.get('feature') - if required_key is None: - required_key = require.get('extension') - - # Loop over types, enums, and commands in the tag - for typeElem in require.findall('type'): - typename = typeElem.get('name') - typeinfo = self.lookupElementInfo(typename, self.typedict) - - if typeinfo: - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. - alias = self.getAlias(typeElem, self.typedict) - if not self.checkForCorrectionAliases(alias, require, 'type'): - # Resolve the type info to the actual type, so we get an accurate read for 'structextends' - while alias: - typeinfo = self.lookupElementInfo(alias, self.typedict) - alias = typeinfo.elem.get('alias') - - typecat = typeinfo.elem.get('category') - typeextends = typeinfo.elem.get('structextends') - if not required_key in self.gen.featureDictionary[featurename][typecat]: - self.gen.featureDictionary[featurename][typecat][required_key] = {} - if not typeextends in self.gen.featureDictionary[featurename][typecat][required_key]: - self.gen.featureDictionary[featurename][typecat][required_key][typeextends] = [] - self.gen.featureDictionary[featurename][typecat][required_key][typeextends].append(typename) - - for enumElem in require.findall('enum'): - enumname = enumElem.get('name') - typeinfo = self.lookupElementInfo(enumname, self.enumdict) - - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. - alias = self.getAlias(enumElem, self.enumdict) - if not self.checkForCorrectionAliases(alias, require, 'enum'): - enumextends = enumElem.get('extends') - if not required_key in self.gen.featureDictionary[featurename]['enumconstant']: - self.gen.featureDictionary[featurename]['enumconstant'][required_key] = {} - if not enumextends in self.gen.featureDictionary[featurename]['enumconstant'][required_key]: - self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends] = [] - self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends].append(enumname) - - for cmdElem in require.findall('command'): - - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. - alias = self.getAlias(cmdElem, self.cmddict) - if not self.checkForCorrectionAliases(alias, require, 'command'): - if not required_key in self.gen.featureDictionary[featurename]['command']: - self.gen.featureDictionary[featurename]['command'][required_key] = [] - self.gen.featureDictionary[featurename]['command'][required_key].append(cmdElem.get('name')) - - - def requireAndRemoveFeatures(self, interface, featurename, api, profile): - """Process `` and `` tags for a `` or ``. - - - interface - Element for `` or ``, containing - `` and `` tags - - featurename - name of the feature - - api - string specifying API name being generated - - profile - string specifying API profile being generated""" - # marks things that are required by this version/profile - for feature in interface.findall('require'): - if matchAPIProfile(api, profile, feature): - self.markRequired(featurename, feature, True) - # marks things that are removed by this version/profile - for feature in interface.findall('remove'): - if matchAPIProfile(api, profile, feature): - self.markRequired(featurename, feature, False) - - def assignAdditionalValidity(self, interface, api, profile): - # Loop over all usage inside all tags. - for feature in interface.findall('require'): - if matchAPIProfile(api, profile, feature): - for v in feature.findall('usage'): - if v.get('command'): - self.cmddict[v.get('command')].additionalValidity.append(copy.deepcopy(v)) - if v.get('struct'): - self.typedict[v.get('struct')].additionalValidity.append(copy.deepcopy(v)) - - # Loop over all usage inside all tags. - for feature in interface.findall('remove'): - if matchAPIProfile(api, profile, feature): - for v in feature.findall('usage'): - if v.get('command'): - self.cmddict[v.get('command')].removedValidity.append(copy.deepcopy(v)) - if v.get('struct'): - self.typedict[v.get('struct')].removedValidity.append(copy.deepcopy(v)) - - def generateFeature(self, fname, ftype, dictionary): - """Generate a single type / enum group / enum / command, - and all its dependencies as needed. - - - fname - name of feature (``/``/``) - - ftype - type of feature, 'type' | 'enum' | 'command' - - dictionary - of *Info objects - self.{type|enum|cmd}dict""" - - self.gen.logMsg('diag', 'generateFeature: generating', ftype, fname) - f = self.lookupElementInfo(fname, dictionary) - if f is None: - # No such feature. This is an error, but reported earlier - self.gen.logMsg('diag', 'No entry found for feature', fname, - 'returning!') - return - - # If feature isn't required, or has already been declared, return - if not f.required: - self.gen.logMsg('diag', 'Skipping', ftype, fname, '(not required)') - return - if f.declared: - self.gen.logMsg('diag', 'Skipping', ftype, fname, '(already declared)') - return - # Always mark feature declared, as though actually emitted - f.declared = True - - # Determine if this is an alias, and of what, if so - alias = f.elem.get('alias') - if alias: - self.gen.logMsg('diag', fname, 'is an alias of', alias) - - # Pull in dependent declaration(s) of the feature. - # For types, there may be one type in the 'requires' attribute of - # the element, one in the 'alias' attribute, and many in - # embedded and tags within the element. - # For commands, there may be many in tags within the element. - # For enums, no dependencies are allowed (though perhaps if you - # have a uint64 enum, it should require that type). - genProc = None - followupFeature = None - if ftype == 'type': - genProc = self.gen.genType - - # Generate type dependencies in 'alias' and 'requires' attributes - if alias: - self.generateFeature(alias, 'type', self.typedict) - requires = f.elem.get('requires') - if requires: - self.gen.logMsg('diag', 'Generating required dependent type', - requires) - self.generateFeature(requires, 'type', self.typedict) - - # Generate types used in defining this type (e.g. in nested - # tags) - # Look for in entire tree, - # not just immediate children - for subtype in f.elem.findall('.//type'): - self.gen.logMsg('diag', 'Generating required dependent ', - subtype.text) - self.generateFeature(subtype.text, 'type', self.typedict) - - # Generate enums used in defining this type, for example in - # member[MEMBER_SIZE] - for subtype in f.elem.findall('.//enum'): - self.gen.logMsg('diag', 'Generating required dependent ', - subtype.text) - self.generateFeature(subtype.text, 'enum', self.enumdict) - - # If the type is an enum group, look up the corresponding - # group in the group dictionary and generate that instead. - if f.elem.get('category') == 'enum': - self.gen.logMsg('diag', 'Type', fname, 'is an enum group, so generate that instead') - group = self.lookupElementInfo(fname, self.groupdict) - if alias is not None: - # An alias of another group name. - # Pass to genGroup with 'alias' parameter = aliased name - self.gen.logMsg('diag', 'Generating alias', fname, - 'for enumerated type', alias) - # Now, pass the *aliased* GroupInfo to the genGroup, but - # with an additional parameter which is the alias name. - genProc = self.gen.genGroup - f = self.lookupElementInfo(alias, self.groupdict) - elif group is None: - self.gen.logMsg('warn', 'Skipping enum type', fname, - ': No matching enumerant group') - return - else: - genProc = self.gen.genGroup - f = group - - # @ The enum group is not ready for generation. At this - # @ point, it contains all tags injected by - # @ tags without any verification of whether - # @ they're required or not. It may also contain - # @ duplicates injected by multiple consistent - # @ definitions of an . - - # @ Pass over each enum, marking its enumdict[] entry as - # @ required or not. Mark aliases of enums as required, - # @ too. - - enums = group.elem.findall('enum') - - self.gen.logMsg('diag', 'generateFeature: checking enums for group', fname) - - # Check for required enums, including aliases - # LATER - Check for, report, and remove duplicates? - enumAliases = [] - for elem in enums: - name = elem.get('name') - - required = False - - extname = elem.get('extname') - version = elem.get('version') - if extname is not None: - # 'supported' attribute was injected when the element was - # moved into the group in Registry.parseTree() - if self.genOpts.defaultExtensions == elem.get('supported'): - required = True - elif re.match(self.genOpts.addExtensions, extname) is not None: - required = True - elif version is not None: - required = re.match(self.genOpts.emitversions, version) is not None - else: - required = True - - self.gen.logMsg('diag', '* required =', required, 'for', name) - if required: - # Mark this element as required (in the element, not the EnumInfo) - elem.set('required', 'true') - # If it's an alias, track that for later use - enumAlias = elem.get('alias') - if enumAlias: - enumAliases.append(enumAlias) - for elem in enums: - name = elem.get('name') - if name in enumAliases: - elem.set('required', 'true') - self.gen.logMsg('diag', '* also need to require alias', name) - if f.elem.get('category') == 'bitmask': - followupFeature = f.elem.get('bitvalues') - elif ftype == 'command': - # Generate command dependencies in 'alias' attribute - if alias: - self.generateFeature(alias, 'command', self.cmddict) - - genProc = self.gen.genCmd - for type_elem in f.elem.findall('.//type'): - depname = type_elem.text - self.gen.logMsg('diag', 'Generating required parameter type', - depname) - self.generateFeature(depname, 'type', self.typedict) - elif ftype == 'enum': - # Generate enum dependencies in 'alias' attribute - if alias: - self.generateFeature(alias, 'enum', self.enumdict) - genProc = self.gen.genEnum - - # Actually generate the type only if emitting declarations - if self.emitFeatures: - self.gen.logMsg('diag', 'Emitting', ftype, 'decl for', fname) - genProc(f, fname, alias) - else: - self.gen.logMsg('diag', 'Skipping', ftype, fname, - '(should not be emitted)') - - if followupFeature: - self.gen.logMsg('diag', 'Generating required bitvalues ', - followupFeature) - self.generateFeature(followupFeature, "type", self.typedict) - - def generateRequiredInterface(self, interface): - """Generate all interfaces required by an API version or extension. - - - interface - Element for `` or ``""" - - # Loop over all features inside all tags. - for features in interface.findall('require'): - for t in features.findall('type'): - self.generateFeature(t.get('name'), 'type', self.typedict) - for e in features.findall('enum'): - # If this is an enum extending an enumerated type, don't - # generate it - this has already been done in reg.parseTree, - # by copying this element into the enumerated type. - enumextends = e.get('extends') - if not enumextends: - self.generateFeature(e.get('name'), 'enum', self.enumdict) - for c in features.findall('command'): - self.generateFeature(c.get('name'), 'command', self.cmddict) - - def generateSpirv(self, spirv, dictionary): - if spirv is None: - self.gen.logMsg('diag', 'No entry found for element', name, - 'returning!') - return - - name = spirv.elem.get('name') - # No known alias for spirv elements - alias = None - if spirv.emit: - genProc = self.gen.genSpirv - genProc(spirv, name, alias) - - def apiGen(self): - """Generate interface for specified versions using the current - generator and generator options""" - - self.gen.logMsg('diag', '*******************************************') - self.gen.logMsg('diag', ' Registry.apiGen file:', self.genOpts.filename, - 'api:', self.genOpts.apiname, - 'profile:', self.genOpts.profile) - self.gen.logMsg('diag', '*******************************************') - - # Reset required/declared flags for all features - self.apiReset() - - # Compile regexps used to select versions & extensions - regVersions = re.compile(self.genOpts.versions) - regEmitVersions = re.compile(self.genOpts.emitversions) - regAddExtensions = re.compile(self.genOpts.addExtensions) - regRemoveExtensions = re.compile(self.genOpts.removeExtensions) - regEmitExtensions = re.compile(self.genOpts.emitExtensions) - regEmitSpirv = re.compile(self.genOpts.emitSpirv) - - # Get all matching API feature names & add to list of FeatureInfo - # Note we used to select on feature version attributes, not names. - features = [] - apiMatch = False - for key in self.apidict: - fi = self.apidict[key] - api = fi.elem.get('api') - if apiNameMatch(self.genOpts.apiname, api): - apiMatch = True - if regVersions.match(fi.name): - # Matches API & version #s being generated. Mark for - # emission and add to the features[] list . - # @@ Could use 'declared' instead of 'emit'? - fi.emit = (regEmitVersions.match(fi.name) is not None) - features.append(fi) - if not fi.emit: - self.gen.logMsg('diag', 'NOT tagging feature api =', api, - 'name =', fi.name, 'version =', fi.version, - 'for emission (does not match emitversions pattern)') - else: - self.gen.logMsg('diag', 'Including feature api =', api, - 'name =', fi.name, 'version =', fi.version, - 'for emission (matches emitversions pattern)') - else: - self.gen.logMsg('diag', 'NOT including feature api =', api, - 'name =', fi.name, 'version =', fi.version, - '(does not match requested versions)') - else: - self.gen.logMsg('diag', 'NOT including feature api =', api, - 'name =', fi.name, - '(does not match requested API)') - if not apiMatch: - self.gen.logMsg('warn', 'No matching API versions found!') - - # Get all matching extensions, in order by their extension number, - # and add to the list of features. - # Start with extensions tagged with 'api' pattern matching the API - # being generated. Add extensions matching the pattern specified in - # regExtensions, then remove extensions matching the pattern - # specified in regRemoveExtensions - for (extName, ei) in sorted(self.extdict.items(), key=lambda x: x[1].number if x[1].number is not None else '0'): - extName = ei.name - include = False - - # Include extension if defaultExtensions is not None and is - # exactly matched by the 'supported' attribute. - if apiNameMatch(self.genOpts.defaultExtensions, - ei.elem.get('supported')): - self.gen.logMsg('diag', 'Including extension', - extName, "(defaultExtensions matches the 'supported' attribute)") - include = True - - # Include additional extensions if the extension name matches - # the regexp specified in the generator options. This allows - # forcing extensions into an interface even if they're not - # tagged appropriately in the registry. - # However we still respect the 'supported' attribute. - if regAddExtensions.match(extName) is not None: - if not apiNameMatch(self.genOpts.apiname, ei.elem.get('supported')): - self.gen.logMsg('diag', 'NOT including extension', - extName, '(matches explicitly requested, but does not match the \'supported\' attribute)') - include = False - else: - self.gen.logMsg('diag', 'Including extension', - extName, '(matches explicitly requested extensions to add)') - include = True - # Remove extensions if the name matches the regexp specified - # in generator options. This allows forcing removal of - # extensions from an interface even if they're tagged that - # way in the registry. - if regRemoveExtensions.match(extName) is not None: - self.gen.logMsg('diag', 'Removing extension', - extName, '(matches explicitly requested extensions to remove)') - include = False - - # If the extension is to be included, add it to the - # extension features list. - if include: - ei.emit = (regEmitExtensions.match(extName) is not None) - features.append(ei) - if not ei.emit: - self.gen.logMsg('diag', 'NOT tagging extension', - extName, - 'for emission (does not match emitextensions pattern)') - - # Hack - can be removed when validity generator goes away - # (Jon) I'm not sure what this does, or if it should respect - # the ei.emit flag above. - self.requiredextensions.append(extName) - else: - self.gen.logMsg('diag', 'NOT including extension', - extName, '(does not match api attribute or explicitly requested extensions)') - - # Add all spirv elements to list - # generators decide to emit them all or not - # Currently no filtering as no client of these elements needs filtering - spirvexts = [] - for key in self.spirvextdict: - si = self.spirvextdict[key] - si.emit = (regEmitSpirv.match(key) is not None) - spirvexts.append(si) - spirvcaps = [] - for key in self.spirvcapdict: - si = self.spirvcapdict[key] - si.emit = (regEmitSpirv.match(key) is not None) - spirvcaps.append(si) - - # Sort the features list, if a sort procedure is defined - if self.genOpts.sortProcedure: - self.genOpts.sortProcedure(features) - # print('sortProcedure ->', [f.name for f in features]) - - # Pass 1: loop over requested API versions and extensions tagging - # types/commands/features as required (in an block) or no - # longer required (in an block). It is possible to remove - # a feature in one version and restore it later by requiring it in - # a later version. - # If a profile other than 'None' is being generated, it must - # match the profile attribute (if any) of the and - # tags. - self.gen.logMsg('diag', 'PASS 1: TAG FEATURES') - for f in features: - self.gen.logMsg('diag', 'PASS 1: Tagging required and removed features for', - f.name) - self.fillFeatureDictionary(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) - self.requireAndRemoveFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) - self.assignAdditionalValidity(f.elem, self.genOpts.apiname, self.genOpts.profile) - - # Pass 2: loop over specified API versions and extensions printing - # declarations for required things which haven't already been - # generated. - self.gen.logMsg('diag', 'PASS 2: GENERATE INTERFACES FOR FEATURES') - self.gen.beginFile(self.genOpts) - for f in features: - self.gen.logMsg('diag', 'PASS 2: Generating interface for', - f.name) - emit = self.emitFeatures = f.emit - if not emit: - self.gen.logMsg('diag', 'PASS 2: NOT declaring feature', - f.elem.get('name'), 'because it is not tagged for emission') - # Generate the interface (or just tag its elements as having been - # emitted, if they haven't been). - self.gen.beginFeature(f.elem, emit) - self.generateRequiredInterface(f.elem) - self.gen.endFeature() - # Generate spirv elements - for s in spirvexts: - self.generateSpirv(s, self.spirvextdict) - for s in spirvcaps: - self.generateSpirv(s, self.spirvcapdict) - self.gen.endFile() - - def apiReset(self): - """Reset type/enum/command dictionaries before generating another API. - - Use between apiGen() calls to reset internal state.""" - for datatype in self.typedict: - self.typedict[datatype].resetState() - for enum in self.enumdict: - self.enumdict[enum].resetState() - for cmd in self.cmddict: - self.cmddict[cmd].resetState() - for cmd in self.apidict: - self.apidict[cmd].resetState() - - def __validateStructLimittypes(self, struct): - """Validate 'limittype' attributes for a single struct.""" - limittypeDiags = namedtuple('limittypeDiags', ['missing', 'invalid']) - badFields = defaultdict(lambda : limittypeDiags(missing=[], invalid=[])) - validLimittypes = { 'min', 'max', 'bitmask', 'range', 'struct', 'noauto' } - for member in struct.getMembers(): - memberName = member.findtext('name') - if memberName in ['sType', 'pNext']: - continue - limittype = member.get('limittype') - if not limittype: - badFields[struct.elem.get('name')].missing.append(memberName) - elif limittype == 'struct': - typeName = member.findtext('type') - memberType = self.typedict[typeName] - badFields.update(self.__validateStructLimittypes(memberType)) - elif limittype not in validLimittypes: - badFields[struct.elem.get('name')].invalid.append(memberName) - return badFields - - def __validateLimittype(self): - """Validate 'limittype' attributes.""" - self.gen.logMsg('diag', 'VALIDATING LIMITTYPE ATTRIBUTES') - badFields = self.__validateStructLimittypes(self.typedict['VkPhysicalDeviceProperties2']) - for featStructName in self.validextensionstructs['VkPhysicalDeviceProperties2']: - featStruct = self.typedict[featStructName] - badFields.update(self.__validateStructLimittypes(featStruct)) - - if badFields: - self.gen.logMsg('diag', 'SUMMARY OF FIELDS WITH INCORRECT LIMITTYPES') - for key in sorted(badFields.keys()): - diags = badFields[key] - if diags.missing: - self.gen.logMsg('diag', ' ', key, 'missing limittype:', ', '.join(badFields[key].missing)) - if diags.invalid: - self.gen.logMsg('diag', ' ', key, 'invalid limittype:', ', '.join(badFields[key].invalid)) - return False - return True - - def validateRegistry(self): - """Validate properties of the registry.""" - return self.__validateLimittype() From 3855094768a2dce03a5aeb1e36d841bee3607adc Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Mon, 13 Nov 2023 13:19:23 +0200 Subject: [PATCH 011/190] rename more occurances of "promise" to "assert" --- xml/cl.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index 1f4a450fd..d0982f205 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -252,7 +252,7 @@ server's OpenCL/api-docs repository. typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; - typedef cl_bitfield cl_mutable_dispatch_promises_khr + typedef cl_bitfield cl_mutable_dispatch_asserts_khr Structure types @@ -1783,7 +1783,7 @@ server's OpenCL/api-docs repository. - + @@ -7282,7 +7282,7 @@ server's OpenCL/api-docs repository. - + From 50bfadb7fe7d7c474afdeee1d8a365a7b48b9596 Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Tue, 16 Jan 2024 11:27:09 +0200 Subject: [PATCH 012/190] Update cl_khr_command_buffer_mutable_dispatch.asciidoc --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index a8b234df7..9aafb3b3d 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -975,13 +975,3 @@ non-trivial deep copying of the underlying objects contained in the command-buffer. As a result of this new entry-point being an additive change to the specification it is omitted, and if its functionality has demand later, it may be a introduced as a stand alone extension. --- - -. Introduce a `CL_MUTABLE_DISPATCH_ADDITIONAL_WORK_GROUPS_KHR` capability to - allow the number of work-groups in kernel execution to be increased during - update. -+ --- -*Resolved*: Can be included in the final release of the extension if there is -implementation coverage. --- From 2eb8460598643a8eb07952682d9c71c8dfd65ace Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Tue, 6 Feb 2024 11:32:21 +0200 Subject: [PATCH 013/190] Update cl_khr_command_buffer_mutable_dispatch.asciidoc replace error with undefined behavior --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 9aafb3b3d..af9680bb3 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -337,6 +337,11 @@ description of property values. {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} An assertion by the user that the number of work-groups of any ND-range kernel recorded in this command buffer will not be updated beyond the number defined when the ND-range kernel was recorded. + If the user's update to the values of _local_work_size_ and/or _global_work_size_ result in an increase + in the number of work-groups in the ND-range over the number specified when the ND-range kernel was + recorded, the behavior is undefined. + If the user updates _local_work_size_ to be _NULL_, the assertion is ignored. + |==== ==== Modifications to clCommandNDRangeKernelKHR @@ -561,10 +566,6 @@ the array violates the defined conditions: * {CL_INVALID_VALUE} if _type_ is not {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. -* {CL_INVALID_OPERATION} if {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} is specified, - and values of _local_work_size_ and/or _global_work_size_ result in an increase to the number of work- - groups in the ND-range over the number specified when the ND-range kernel was recorded. - * {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or _global_work_size_ result in a change to work-group uniformity. From 39bdaeb09a263818ea9ae4a5d6c1880382df2f9a Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Wed, 7 Feb 2024 11:29:14 +0200 Subject: [PATCH 014/190] add CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR --- ...r_command_buffer_mutable_dispatch.asciidoc | 20 ++++++++++++++++--- xml/cl.xml | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index af9680bb3..0e8cf61ff 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -235,6 +235,9 @@ CL_INVALID_MUTABLE_COMMAND_KHR -1141 // Accepted values for the param_name parameter to clGetDeviceInfo CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 +/* cl_command_buffer_properties_khr */ +#define CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 + // Property to cl_ndrange_kernel_command_properties_khr CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 @@ -330,7 +333,18 @@ description of property values. command-buffer, by default command-buffers are immutable. If set, commands in the command-buffer may be updated via {clUpdateMutableCommandsKHR}. -| {CL_MUTABLE_DISPATCH_ASSERTS_KHR} +|==== + +Add a {CL_COMMAND_BUFFER_ASSERTS_KHR} property to the +<> table. + +[cols=",,",options="header",] +|==== +| *Recording Properties* +| *Property Value* +| *Description* + +| {CL_COMMAND_BUFFER_ASSERTS_KHR} | {cl_mutable_dispatch_asserts_khr_TYPE} | This is a bitfield and can be set to a combination of the following values: @@ -340,8 +354,8 @@ description of property values. If the user's update to the values of _local_work_size_ and/or _global_work_size_ result in an increase in the number of work-groups in the ND-range over the number specified when the ND-range kernel was recorded, the behavior is undefined. - If the user updates _local_work_size_ to be _NULL_, the assertion is ignored. - + If the user updates _local_work_size_ to be _NULL_, the behavior is undefined. + |==== ==== Modifications to clCommandNDRangeKernelKHR diff --git a/xml/cl.xml b/xml/cl.xml index d0982f205..35f2faedd 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1783,7 +1783,7 @@ server's OpenCL/api-docs repository. - + From df6029f5e5d63d2edd177cc84652df61da036355 Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Wed, 7 Feb 2024 11:49:00 +0200 Subject: [PATCH 015/190] fixing mix-up in enum values --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 2 +- xml/cl.xml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 0e8cf61ff..2b7949f35 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -240,7 +240,7 @@ CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 // Property to cl_ndrange_kernel_command_properties_khr CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 -CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 +CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B8 // Bits for cl_mutable_dispatch_fields_khr bitfield CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (0x1 << 0) diff --git a/xml/cl.xml b/xml/cl.xml index 35f2faedd..4912779d7 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1784,7 +1784,8 @@ server's OpenCL/api-docs repository. - + + From 65b173edb337317e1c1200f48c64ad952ccc386a Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Thu, 8 Feb 2024 13:43:27 +0200 Subject: [PATCH 016/190] return errors in non-update APIs --- ...r_command_buffer_mutable_dispatch.asciidoc | 22 ++++++++++++++++--- xml/cl.xml | 1 + 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 2b7949f35..8ba601ba3 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -236,7 +236,7 @@ CL_INVALID_MUTABLE_COMMAND_KHR -1141 CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 /* cl_command_buffer_properties_khr */ -#define CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 +CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 // Property to cl_ndrange_kernel_command_properties_khr CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 @@ -354,10 +354,14 @@ Add a {CL_COMMAND_BUFFER_ASSERTS_KHR} property to the If the user's update to the values of _local_work_size_ and/or _global_work_size_ result in an increase in the number of work-groups in the ND-range over the number specified when the ND-range kernel was recorded, the behavior is undefined. - If the user updates _local_work_size_ to be _NULL_, the behavior is undefined. |==== +===== Additional Errors + +* {CL_INVALID_VALUE} if _properties_ has a {CL_COMMAND_BUFFER_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, but _local_work_size_ is `NULL`. + ==== Modifications to clCommandNDRangeKernelKHR ===== Properties Parameter @@ -438,7 +442,6 @@ in the table below. defined as the product for each _i_ from _0_ to _work_dim - 1_ of _ceil((global_work_size[i] - global_work_offset[i])/local_work_size[i])_ (if _global_work_offset_ is NULL, _global_work_offset[i]_ should be replaced with _0_). - In case _local_work_size_ is NULL, the effect of this flag is undefined. |==== ===== Mutable Handle Parameter @@ -464,6 +467,11 @@ Is replaced with {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} for the device associated with _command_queue_. If _command_queue_ is `NULL`, the device associated with _command_buffer_ must report support for these properties. + +The following error condition is added: + +* {CL_INVALID_VALUE} if _properties_ has a {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, but _local_work_size_ is `NULL`. [[mutable-commands]] ==== New Section in the OpenCL API specification 5.X.5 - Mutable Commands: @@ -528,6 +536,14 @@ state of all commands is known, rather than iteratively updating each command individually. ==== +[NOTE] +==== +if the command buffer has been created with {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or +the updated ND-range command has been recorded with this flag, and the ND-range parameters are updated so +that the new number of work-groups exceeds the number when the ND-range command was recorded, the behavior +is undefined. +==== + _command_buffer_ Refers to a valid command-buffer object. _mutable_config_ Is a pointer to a diff --git a/xml/cl.xml b/xml/cl.xml index 4912779d7..817e4644c 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7153,6 +7153,7 @@ server's OpenCL/api-docs repository.
    + From 980a2dce3b4d0343a1ab9358c37b06a667c13194 Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Sun, 11 Feb 2024 13:30:46 +0200 Subject: [PATCH 017/190] spaces Co-authored-by: Sun Serega --- xml/cl.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 817e4644c..5797c97b5 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1784,7 +1784,7 @@ server's OpenCL/api-docs repository. - +
    From 23777a31772984c36508b46a9882156c8a562861 Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Sun, 11 Feb 2024 13:31:18 +0200 Subject: [PATCH 018/190] Update ext/cl_khr_command_buffer_mutable_dispatch.asciidoc Co-authored-by: Ewan Crawford --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 8ba601ba3..d1c76a7b9 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -538,7 +538,7 @@ individually. [NOTE] ==== -if the command buffer has been created with {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or +If the command buffer has been created with {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or the updated ND-range command has been recorded with this flag, and the ND-range parameters are updated so that the new number of work-groups exceeds the number when the ND-range command was recorded, the behavior is undefined. From df7f8490e1d0d73dd00d425fb0c9273649bc6bd3 Mon Sep 17 00:00:00 2001 From: aharon-abramson Date: Sun, 11 Feb 2024 13:37:13 +0200 Subject: [PATCH 019/190] update the formula for number of WGs --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index d1c76a7b9..0de7cb03a 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -440,8 +440,7 @@ in the table below. An assertion by the user that the number of work-groups of this ND-range kernel will not be updated beyond the number defined when the ND-range kernel was recorded. The number of work-groups is defined as the product for each _i_ from _0_ to _work_dim - 1_ of - _ceil((global_work_size[i] - global_work_offset[i])/local_work_size[i])_ (if - _global_work_offset_ is NULL, _global_work_offset[i]_ should be replaced with _0_). + _ceil(global_work_size[i]/local_work_size[i])_. |==== ===== Mutable Handle Parameter From e89bf7f8410948b05977712f375927dd4aef1621 Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Mon, 12 Feb 2024 09:35:58 +0200 Subject: [PATCH 020/190] Update xml/cl.xml Co-authored-by: Sun Serega --- xml/cl.xml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 5797c97b5..0a36a34b6 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7321,7 +7321,10 @@ server's OpenCL/api-docs repository. - + + + + From f6486595d2dfa04aa6f88f3da952eb330eaaf47f Mon Sep 17 00:00:00 2001 From: joshqti <127994991+joshqti@users.noreply.github.com> Date: Tue, 7 Nov 2023 07:22:16 -0800 Subject: [PATCH 021/190] Remove minor TODO comment (#957) (#988) Delete obsolete comment in cl_khr_semaphore. Issue --- ext/cl_khr_semaphore.asciidoc | 1 - 1 file changed, 1 deletion(-) diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index 751732420..e0aff5357 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -155,7 +155,6 @@ CL_SEMAPHORE_PROPERTIES_KHR 0x203B CL_SEMAPHORE_PAYLOAD_KHR 0x203C ---- -// TODO: We don't need an enum assigned for CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR and should just use 0. // TODO: Do we need to define CL_SEMAPHORE_DEVICE_HANDLE_LIST here or should it be in the external semaphore spec instead? New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE} or {cl_semaphore_properties_khr_TYPE}: From e5d39eb1587e400dfddf11880db03468b3ed71bc Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Tue, 7 Nov 2023 09:12:05 -0800 Subject: [PATCH 022/190] Use hexapdf instead of ghostscript for PDF optimization (#991) * Use hexapdf instead of ghostscript for PDF optimization Resulting PDFs tend to be considerably smaller, and also runs about 15% faster when doing a full PDF build (2:39 vs. 3:06 on my machine). The hexapdf tool does need to be installed in the build environment - it is in the khronosgroup/docker-images:asciidoctor-spec Docker image. * Add hexapdf to Travis environment. --- .travis.yml | 1 + Makefile | 77 +++++++++-------------------------------------------- README.adoc | 10 ++++--- 3 files changed, 20 insertions(+), 68 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0b0580d64..fc2b142cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ before_install: - gem install coderay -v 1.1.1 - gem install rouge -v 3.19.0 - gem install ttfunk -v 1.5.1 + - gem install hexapdf -v 0.27.0 - gem install asciidoctor-pdf -v 1.5.0 - gem install asciidoctor-mathematical -v 0.3.5 diff --git a/Makefile b/Makefile index a6243f410..85d2ca239 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,6 @@ RM = rm -f RMRF = rm -rf MKDIR = mkdir -p CP = cp -GS_EXISTS := $(shell command -v gs 2> /dev/null) GITHEAD = ./.git/logs/HEAD # Target directories for output files @@ -163,6 +162,10 @@ icdinst: icdinsthtml icdinstpdf html: apihtml envhtml exthtml extensionshtml cxxhtml chtml icdinsthtml +# PDF optimizer - usage $(OPTIMIZEPDF) in.pdf out.pdf +# OPTIMIZEPDFOPTS=--compress-pages is slightly better, but much slower +OPTIMIZEPDF = hexapdf optimize $(OPTIMIZEPDFOPTS) + pdf: apipdf envpdf extpdf extensionspdf cxxpdf cpdf icdinstpdf # Spec targets. @@ -192,13 +195,7 @@ $(PDFDIR)/$(APISPEC).pdf: $(APISPECSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(APISPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(APISPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # Environment spec @@ -218,13 +215,7 @@ $(PDFDIR)/$(ENVSPEC).pdf: $(ENVSPECSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(ENVSPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(ENVSPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # Extensions spec EXTSPEC = OpenCL_Ext @@ -242,13 +233,7 @@ $(PDFDIR)/$(EXTSPEC).pdf: $(EXTSPECSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(EXTSPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(EXTSPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # Individual extensions spec(s) EXTDIR = extensions @@ -280,13 +265,7 @@ $(PDFDIR)/$(EXTENSIONSSPEC).pdf: $(EXTENSIONSSPECSRC) $(GENDEPENDS) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(EXTDIR)/$(EXTENSIONSSPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(EXTENSIONSSPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # Language Extensions spec CEXTDOC = OpenCL_LangExt @@ -304,13 +283,7 @@ $(PDFDIR)/$(CEXTDOC).pdf: $(CEXTDOCSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(CEXTDOC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(CEXTDOC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # C++ (cxx) spec CXXSPEC = OpenCL_Cxx @@ -328,13 +301,7 @@ $(PDFDIR)/$(CXXSPEC).pdf: $(CXXSPECSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(CXXSPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(CXXSPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # C spec CSPEC = OpenCL_C @@ -352,13 +319,7 @@ $(PDFDIR)/$(CSPEC).pdf: $(CSPECSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(CSPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(CSPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # C++ for OpenCL doc CXX4OPENCLDOC = CXX_for_OpenCL @@ -376,13 +337,7 @@ $(PDFDIR)/$(CXX4OPENCLDOC).pdf: $(CXX4OPENCLDOCSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(CXX4OPENCL_ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(CXX4OPENCLDOC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(CXX4OPENCLDOC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # ICD installation guidelines ICDINSTSPEC = OpenCL_ICD_Installation @@ -400,13 +355,7 @@ $(PDFDIR)/$(ICDINSTSPEC).pdf: $(ICDINSTSPECSRC) $(QUIET)$(MKDIR) $(PDFDIR) $(QUIET)$(MKDIR) $(PDFMATHDIR) $(QUIET)$(ASCIIDOCTOR) -b pdf $(ADOCOPTS) $(ADOCPDFOPTS) -o $@ $(ICDINSTSPEC).txt -ifndef GS_EXISTS - $(QUIET) echo "Warning: Ghostscript not installed, skipping pdf optimization" -else - $(QUIET)$(CURDIR)/config/optimize-pdf $@ - $(QUIET)rm $@ - $(QUIET)mv $(PDFDIR)/$(ICDINSTSPEC)-optimized.pdf $@ -endif + $(QUIET)$(OPTIMIZEPDF) $@ $@.out.pdf && mv $@.out.pdf $@ # Clean generated and output files diff --git a/README.adoc b/README.adoc index a71ccdb2f..410abb0bc 100644 --- a/README.adoc +++ b/README.adoc @@ -246,6 +246,8 @@ scheme. This section describes the software components used by the OpenCL spec toolchain. +The specified versions are known to work. +Later compatible versions will probably work as well. Before building the OpenCL specs, you must install the following tools: @@ -260,10 +262,6 @@ Before building the OpenCL specs, you must install the following tools: Any version supporting the following operations should work: ** `git symbolic-ref --short HEAD` ** `git log -1 --format="%H"` - * Ghostscript (ghostscript, version: 9.10). - This is for the PDF build, and it can still progress without it. - Ghostscript is used to optimize the size of the PDF, so will be a lot - smaller if it is included. * ttf Fonts. These are needed the PDF build for latexmath rendering. See https://github.com/asciidoctor/asciidoctor-mathematical/blob/master/README.md#dependencies[Font Dependencies for asciidoctor-mathematical]. @@ -277,6 +275,7 @@ parts you don't use) completely before trying to install. * Asciidoctor (asciidoctor, version: 2.0.16) * Coderay (coderay, version: 1.1.1) + * hexapdf (version: 0.27.0) * rouge (rouge, version 3.19.0) * ttfunk (ttfunk, version: 1.5.1) * Asciidoctor PDF (asciidoctor-pdf, version: 1.5.0) @@ -444,6 +443,7 @@ echo "2.3.3" > ~/.rbenv/version gem install asciidoctor -v 2.0.16 gem install coderay -v 1.1.1 +gem install hexapdf -v 0.27.0 gem install rouge -v 3.19.0 gem install ttfunk -v 1.5.1 gem install asciidoctor-pdf -v 1.5.0 @@ -656,6 +656,7 @@ command, once the platform is set up: ---- gem install asciidoctor -v 2.0.16 gem install coderay -v 1.1.1 +gem install hexapdf -v 0.27.0 gen install rouge -v 3.19.0 gem install ttfunk -v 1.5.1 @@ -687,6 +688,7 @@ by Khronos. [[history]] == Revision History + * 2023-11-05 - Add hexapdf, remove ghostscript * 2020-03-13 - Updated package versions to match Travis build. * 2019-06-20 - Add directions for publishing OpenCL 2.2 reference pages, generated from the spec sources in this repository, in the From ceb87e7511ee73971f132d850cb3eb94bbb37f6e Mon Sep 17 00:00:00 2001 From: joshqti <127994991+joshqti@users.noreply.github.com> Date: Tue, 14 Nov 2023 10:36:06 -0800 Subject: [PATCH 023/190] cl_khr_semaphore: Enforce one device semaphores (#973) (#996) * cl_khr_semaphore: Enforce one device semaphores (#973) Only permit semaphores to be associated with a single device. Add an error code for invalid use. * Changes wording according to review comments * Change error code to CL_INVALID_PROPERTY if a context is multi-device, and no device is specified. --- ext/cl_khr_semaphore.asciidoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index e0aff5357..3c6c9da15 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -257,10 +257,10 @@ Following new properties are added to the list of possible supported properties | Specifies the type of semaphore to create. This property is always required. | {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} | {cl_device_id_TYPE}[] - | Specifies the list of OpenCL devices (terminated with {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the semaphore. + | Specifies the list of OpenCL devices (terminated with {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the semaphore. Only a single device is permitted in the list. |==== -If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of _sema_props_, the semaphore object created by {clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices in the _context_. +If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of _sema_props_, the semaphore object created by {clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices in the _context_. For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be specified in _sema_props_. _errcode_ret_ returns an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. @@ -268,8 +268,8 @@ _errcode_ret_ returns an appropriate error code. If _errcode_ret_ is `NULL`, no Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: * {CL_INVALID_CONTEXT} if _context_ is not a valid context. -* {CL_INVALID_PROPERTY} if a property name in _sema_props_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. -* {CL_INVALID_DEVICE} if {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is specified as part of _sema_props_, but it does not identify a valid device or if a device identified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not one of the devices within _context_. +* {CL_INVALID_PROPERTY} if a property name in _sema_props_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. Additionally, if _context_ is a multiple device context and _sema_props_ does not specify {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR}. +* {CL_INVALID_DEVICE} if {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is specified as part of _sema_props_, but it does not identify exactly one valid device or if a device identified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not one of the devices within _context_. * {CL_INVALID_VALUE} ** if _sema_props_ is `NULL`, or ** if _sema_props_ do not specify pairs for minimum set of properties (i.e. {CL_SEMAPHORE_TYPE_KHR}) required for successful creation of a {cl_semaphore_khr_TYPE}, or From 981ba9564a3ec5aa73cb65a4a3297f1f92ccd29e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 15 Nov 2023 15:05:31 -0800 Subject: [PATCH 024/190] move the layers spec to the extensions directory (#999) Since the layers spec is not published in the OpenCL extension spec and is instead published on the OpenCL registry similar to EXT and vendor extensions, it makes more sense to put it in the extensions directory. --- {ext => extensions}/cl_loader_layers.asciidoc | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {ext => extensions}/cl_loader_layers.asciidoc (100%) diff --git a/ext/cl_loader_layers.asciidoc b/extensions/cl_loader_layers.asciidoc similarity index 100% rename from ext/cl_loader_layers.asciidoc rename to extensions/cl_loader_layers.asciidoc From ea1c145d2095f48236ac48445ffababe11dbb41a Mon Sep 17 00:00:00 2001 From: joshqti <127994991+joshqti@users.noreply.github.com> Date: Tue, 21 Nov 2023 21:05:25 -0800 Subject: [PATCH 025/190] cl_semaphore_khr: Query if semaphore is exportable (#997) * cl_semaphore_khr: Query if semaphore is exportable Add query to clGetSemaphoreInfoKHR that returns CL_TRUE if a semaphore is exportable. * Change extension version to 0.9.1 * Add missing brackets around return types. --- ext/cl_khr_external_semaphore.asciidoc | 11 +++++++++++ ext/cl_khr_semaphore.asciidoc | 2 -- xml/cl.xml | 5 ++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ext/cl_khr_external_semaphore.asciidoc b/ext/cl_khr_external_semaphore.asciidoc index 10cdaa60b..5cc6fe347 100644 --- a/ext/cl_khr_external_semaphore.asciidoc +++ b/ext/cl_khr_external_semaphore.asciidoc @@ -40,6 +40,7 @@ Other related extensions define specific external semaphores that may be importe |==== | *Date* | *Version* | *Description* | 2021-09-10 | 0.9.0 | Initial version (provisional). +| 2023-11-16 | 0.9.1 | Added CL_SEMAPHORE_EXPORTABLE_KHR. |==== NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ @@ -129,6 +130,13 @@ CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0 ---- +The following new attribute that can be passed as part of {cl_semaphore_info_khr_TYPE}: + +[source] +---- +CL_SEMAPHORE_EXPORTABLE_KHR 0x2054 +---- + External semaphore handle type added by `cl_khr_external_semaphore_dx_fence`: [source] @@ -234,6 +242,9 @@ Add to the list of supported _param_names_ by {clGetSemaphoreInfoKHR}: | Returns the list of external semaphore handle types that may be used for exporting. The size of this query may be 0 indicating that this semaphore does not support any handle types for exporting. +| {CL_SEMAPHORE_EXPORTABLE_KHR} + | {cl_bool_TYPE} + | Returns {CL_TRUE} if the semaphore is exportable and {CL_FALSE} otherwise. |==== === Exporting semaphore external handles diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index 3c6c9da15..e6cfd4ab7 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -155,8 +155,6 @@ CL_SEMAPHORE_PROPERTIES_KHR 0x203B CL_SEMAPHORE_PAYLOAD_KHR 0x203C ---- -// TODO: Do we need to define CL_SEMAPHORE_DEVICE_HANDLE_LIST here or should it be in the external semaphore spec instead? - New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE} or {cl_semaphore_properties_khr_TYPE}: [source] diff --git a/xml/cl.xml b/xml/cl.xml index 0a36a34b6..b64d8dff4 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1854,7 +1854,7 @@ server's OpenCL/api-docs repository. - + @@ -6991,6 +6991,9 @@ server's OpenCL/api-docs repository. + + + From a164c7ff48b51ce5f81ff11912c0767d03a7aa2f Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 28 Nov 2023 04:22:16 -0800 Subject: [PATCH 026/190] remove TODO comment in semaphore spec (#1012) The default behavior when the device handle list is not specified is now properly described, so the TODO comment can be removed. --- ext/cl_khr_semaphore.asciidoc | 2 -- 1 file changed, 2 deletions(-) diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index e6cfd4ab7..ff4297b32 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -239,8 +239,6 @@ include::{generated}/api/protos/clCreateSemaphoreWithPropertiesKHR.txt[] _context_ identifies a valid OpenCL context that the created {cl_semaphore_khr_TYPE} will belong to. -// TODO: Do we want the same "all devices in the context" behavior if CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR is not specified? - _sema_props_ specifies additional semaphore properties in the form list of pairs terminated with 0. {CL_SEMAPHORE_TYPE_KHR} must be part of the list of properties specified by _sema_props_. From 92e08baa6a698fa2358540f55e1e73eef5ed583b Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 28 Nov 2023 04:37:09 -0800 Subject: [PATCH 027/190] document USM error conditions for clSetKernelExecInfo (#974) --- extensions/cl_intel_unified_shared_memory.asciidoc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/extensions/cl_intel_unified_shared_memory.asciidoc b/extensions/cl_intel_unified_shared_memory.asciidoc index f591bc247..33381e294 100644 --- a/extensions/cl_intel_unified_shared_memory.asciidoc +++ b/extensions/cl_intel_unified_shared_memory.asciidoc @@ -788,6 +788,13 @@ The new _param_name_ values described below may be used with the existing *clSet |==== +The following errors may be returned by *clSetKernelExecInfo* for these new _param_name_ values: + +* `CL_INVALID_OPERATION` if _param_name_ is `CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL` and no devices in the context associated with _kernel_ support Unified Shared Memory. +* `CL_INVALID_OPERATION` if _param_name_ is `CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL` and no devices in the context associated with _kernel_ support host Unified Shared Memory allocations. +* `CL_INVALID_OPERATION` if _param_name_ is `CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL` and no devices in the context associated with _kernel_ support device Unified Shared Memory allocations. +* `CL_INVALID_OPERATION` if _param_name_ is `CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL` and no devices in the context associated with _kernel_ support shared Unified Shared Memory allocations. + ==== Filling and Copying Unified Shared Memory The function @@ -1281,6 +1288,7 @@ Note that there is no similar SVM "rect" memcpy. |S|2020-08-26|Maciej Dziuban|Added initial placement flags for shared allocations. |1.0.0|2021-11-07|Ben Ashbaugh|Added version and other minor updates prior to posting on the OpenCL registry. |1.0.0|2022-11-08|Ben Ashbaugh|Added new issues regarding error behavior for clSetKernelArgMemPointerINTEL and rect copies. +|1.0.1|2023-08-28|Ben Ashbaugh|Documented error conditions for clSetKernelExecInfo. |======================================== //************************************************************************ From 0cd45cb95c98bff247663f78149683a5771c7f10 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 28 Nov 2023 04:47:44 -0800 Subject: [PATCH 028/190] add description metadata (#1000) --- OpenCL_API.txt | 4 ++++ OpenCL_C.txt | 3 +++ OpenCL_Env.txt | 3 +++ OpenCL_Ext.txt | 3 +++ 4 files changed, 13 insertions(+) diff --git a/OpenCL_API.txt b/OpenCL_API.txt index cc0092ee9..db6d507fe 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -19,6 +19,10 @@ Khronos{R} OpenCL Working Group :docinfo: shared-header :docinfodir: config :title-logo-image: image:images/OpenCL.png[top="25%",width="55%"] +:description: OpenCL(TM) is an open, royalty-free standard for cross-platform \ +parallel programming of diverse accelerators. \ +This document describes the OpenCL API. + // Various special / math symbols. This is easier to edit with than Unicode. include::config/attribs.txt[] diff --git a/OpenCL_C.txt b/OpenCL_C.txt index c1a7e2510..1492dc238 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -20,6 +20,9 @@ Khronos{R} OpenCL Working Group :docinfo: shared-header :docinfodir: config :title-logo-image: image:images/OpenCL.png[top="25%",width="55%"] +:description: OpenCL(TM) is an open, royalty-free standard for cross-platform \ +parallel programming of diverse accelerators. \ +This document describes the OpenCL C language. // Various special / math symbols. This is easier to edit with than Unicode. include::config/attribs.txt[] diff --git a/OpenCL_Env.txt b/OpenCL_Env.txt index 7d10a347d..debc6b13a 100644 --- a/OpenCL_Env.txt +++ b/OpenCL_Env.txt @@ -18,6 +18,9 @@ Khronos{R} OpenCL Working Group :docinfo: shared-header :docinfodir: config :title-logo-image: image:images/OpenCL.png[top="25%",width="55%"] +:description: OpenCL(TM) is an open, royalty-free standard for cross-platform \ +parallel programming of diverse accelerators. \ +This document describes the OpenCL SPIR-V environment. // Various special / math symbols. This is easier to edit with than Unicode. include::config/attribs.txt[] diff --git a/OpenCL_Ext.txt b/OpenCL_Ext.txt index 0fe4b83c3..cedcd485a 100644 --- a/OpenCL_Ext.txt +++ b/OpenCL_Ext.txt @@ -21,6 +21,9 @@ ifndef::backend-html5[:toclevels: 2] :docinfo: shared-header :docinfodir: config :title-logo-image: image:images/OpenCL.png[top="25%",width="55%"] +:description: OpenCL(TM) is an open, royalty-free standard for cross-platform \ +parallel programming of diverse accelerators. \ +This document describes OpenCL extensions. // Various special / math symbols. This is easier to edit with than Unicode. include::config/attribs.txt[] From 6134e8d8c458f1540972bc499b0ddef6b7ff4bcf Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 28 Nov 2023 04:53:18 -0800 Subject: [PATCH 029/190] consistently put newer versions at the bottom of version history tables (#1010) --- ext/cl_khr_external_memory.asciidoc | 6 +++--- ext/cl_khr_integer_dot_product.asciidoc | 2 +- ext/cl_khr_semaphore.asciidoc | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ext/cl_khr_external_memory.asciidoc b/ext/cl_khr_external_memory.asciidoc index 43a780de2..01f7330d1 100644 --- a/ext/cl_khr_external_memory.asciidoc +++ b/ext/cl_khr_external_memory.asciidoc @@ -30,10 +30,10 @@ Other related extensions define specific external memory types that may be impor [cols="1,1,3",options="header",] |==== | *Date* | *Version* | *Description* -| 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_HANDLE_TYPES_KHR} (provisional). -| 2023-08-01 | 0.9.2 | Changed device handle list enum to the memory-specific {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). -| 2023-05-04 | 0.9.1 | Clarified device handle list enum cannot be specified without an external memory handle (provisional). | 2021-09-10 | 0.9.0 | Initial version (provisional). +| 2023-05-04 | 0.9.1 | Clarified device handle list enum cannot be specified without an external memory handle (provisional). +| 2023-08-01 | 0.9.2 | Changed device handle list enum to the memory-specific {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). +| 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_HANDLE_TYPES_KHR} (provisional). |==== NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ diff --git a/ext/cl_khr_integer_dot_product.asciidoc b/ext/cl_khr_integer_dot_product.asciidoc index c124fec12..d75742f10 100644 --- a/ext/cl_khr_integer_dot_product.asciidoc +++ b/ext/cl_khr_integer_dot_product.asciidoc @@ -19,8 +19,8 @@ functions to compute the dot product of vectors of integers. [cols="1,1,3",options="header",] |==== | *Date* | *Version* | *Description* -| 2021-06-23 | 2.0.0 | All 8-bit support is mandatory, added 8-bit acceleration properties. | 2021-06-17 | 1.0.0 | Initial version. +| 2021-06-23 | 2.0.0 | All 8-bit support is mandatory, added 8-bit acceleration properties. |==== ==== Dependencies diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index ff4297b32..9246b9ce6 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -37,8 +37,8 @@ In particular, this extension defines: [cols="1,1,3",options="header",] |==== | *Date* | *Version* | *Description* -| 2023-08-01 | 0.9.1 | Changed device handle list enum to the semaphore-specific {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). | 2021-09-10 | 0.9.0 | Initial version (provisional). +| 2023-08-01 | 0.9.1 | Changed device handle list enum to the semaphore-specific {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). |==== NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ From a40434d0f12e6a867d60cc2835881a7d8b6147e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 29 Nov 2023 18:44:43 +0000 Subject: [PATCH 030/190] Reserve enum for cl_ext_yuv_images (#1022) Change-Id: I942c3ce47284e7aea93edc02cf0f327af95e4ed9 --- xml/cl.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index b64d8dff4..404a47e1d 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1567,8 +1567,7 @@ server's OpenCL/api-docs repository. - - + From 502e3c0c938e33c0b296dd93888dabf05d5f479c Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 29 Nov 2023 10:45:05 -0800 Subject: [PATCH 031/190] allow clSetCommandQueueProperty to return an error for non-OpenCL 1.0 devices (#980) This follows the same pattern and text used by clSetProgramReleaseCallback, which may return an error for non-OpenCL 2.2 devices. --- api/opencl_runtime_layer.asciidoc | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index ed59544f0..861b1c0ad 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -428,25 +428,28 @@ include::{generated}/api/version-notes/clSetCommandQueueProperty.asciidoc[] changed by {clSetCommandQueueProperty}. If _old_properties_ is `NULL`, it is ignored. -[NOTE] -==== -Changing the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} command-queue property -will cause the OpenCL implementation to block until all previously queued -commands in _command_queue_ have completed. This can be an expensive operation -and therefore changes to this property should only be done when absolutely -necessary. -==== - // refError +{clSetCommandQueueProperty} may unconditionally return an error if no +devices in the context associated with _command_queue_ support modifying +the properties of a command-queue. +Support for modifying the properties of a command-queue is required only for +OpenCL 1.0 devices. + {clSetCommandQueueProperty} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. + * {CL_INVALID_OPERATION} if no devices in the context associated with + _command_queue_ support modifying the properties of a command-queue. * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_QUEUE_PROPERTIES} if values specified in _properties_ are valid but are not supported by the device. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- @@ -5672,14 +5675,14 @@ object is deleted. This provides a mechanism for an application to be notified when destructors for program scope global variables are complete. +// refError + {clSetProgramReleaseCallback} may unconditionally return an error if no devices in the context associated with _program_ support destructors for program scope global variables. Support for constructors and destructors for program scope global variables is required only for OpenCL 2.2 devices. -// refError - {clSetProgramReleaseCallback} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: From 3300536d54e09ea8f95e5e9d2fdb45fd1d46e323 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 29 Nov 2023 10:46:13 -0800 Subject: [PATCH 032/190] clarify the free function for clEnqueueSVMFree must be thread-safe (#1016) --- api/opencl_platform_layer.asciidoc | 2 +- api/opencl_runtime_layer.asciidoc | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index f34098091..abac6b88a 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1837,7 +1837,7 @@ include::{generated}/api/version-notes/clCreateContext.asciidoc[] at runtime in this context. This callback function may be called asynchronously by the OpenCL implementation. - It is the applications responsibility to ensure that the callback function + It is the application's responsibility to ensure that the callback function is thread-safe. If _pfn_notify_ is `NULL`, no callback function is registered. * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 861b1c0ad..75c6728ae 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -4534,6 +4534,10 @@ include::{generated}/api/version-notes/clEnqueueSVMFree.asciidoc[] function returns. * _pfn_free_func_ specifies the callback function to be called to free the SVM pointers. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. _pfn_free_func_ takes four arguments: _queue_ which is the command-queue in which {clEnqueueSVMFree} was enqueued, the count and list of SVM pointers to free and _user_data_ which is a pointer to user specified data. @@ -5663,7 +5667,7 @@ include::{generated}/api/version-notes/clSetProgramReleaseCallback.asciidoc[] ** _user_data_ is a pointer to user supplied data. * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is called. - user data can be `NULL`. + _user_data_ can be `NULL`. Each call to {clSetProgramReleaseCallback} registers the specified callback function on a callback stack associated with _program_. @@ -5816,10 +5820,10 @@ include::{generated}/api/version-notes/clBuildProgram.asciidoc[] has completed. This callback function may be called asynchronously by the OpenCL implementation. - It is the applications responsibility to ensure that the callback function + It is the application's responsibility to ensure that the callback function is thread-safe. - ** _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. The program executable is built from the program source or binary for all the devices, or a specific device(s) in the OpenCL context associated with @@ -5954,10 +5958,10 @@ include::{generated}/api/version-notes/clCompileProgram.asciidoc[] compiler has completed. This callback function may be called asynchronously by the OpenCL implementation. - It is the applications responsibility to ensure that the callback function + It is the application's responsibility to ensure that the callback function is thread-safe. - ** _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. The pre-processor runs before the program sources are compiled. The compiled binary is built for all devices associated with _program_ or @@ -6097,8 +6101,8 @@ include::{generated}/api/version-notes/clLinkProgram.asciidoc[] The notification routine is a callback function that an application can register and which will be called when the program executable has been built (successfully or unsuccessfully). - ** _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. If _pfn_notify_ is not `NULL`, {clLinkProgram} does not need to wait for the linker to complete, and can return immediately once the linking operation can @@ -6109,7 +6113,7 @@ Any state changes of the program object that result from calling {clLinkProgram} (e.g. link status or log) will be observable from this callback function. This callback function may be called asynchronously by the OpenCL implementation. -It is the applications responsibility to ensure that the callback function +It is the application's responsibility to ensure that the callback function is thread-safe. If _pfn_notify_ is `NULL`, {clLinkProgram} does not return until the linker @@ -8861,7 +8865,7 @@ include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] the application. This callback function may be called asynchronously by the OpenCL implementation. - It is the applications responsibility to ensure that the callback function + It is the application's responsibility to ensure that the callback function is thread-safe. The parameters to this callback function are: ** _event_ is the event object for which the callback function is invoked. From d65ddf8e5effafbea1dee7bc9c2658effaeb59c9 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 29 Nov 2023 10:46:49 -0800 Subject: [PATCH 033/190] use title case for more chapter headings (#1011) --- ext/cl_khr_3d_image_writes.asciidoc | 4 ++-- ext/cl_khr_async_work_group_copy_fence.asciidoc | 4 ++-- ext/cl_khr_byte_addressable_store.asciidoc | 4 ++-- ext/cl_khr_create_command_queue.asciidoc | 4 ++-- ext/cl_khr_d3d10_sharing.asciidoc | 4 ++-- ext/cl_khr_d3d11_sharing.asciidoc | 4 ++-- ext/cl_khr_depth_images.asciidoc | 4 ++-- ext/cl_khr_device_enqueue_local_arg_types.asciidoc | 4 ++-- ext/cl_khr_device_uuid.asciidoc | 4 ++-- ext/cl_khr_dx9_media_sharing.asciidoc | 4 ++-- ext/cl_khr_egl_event.asciidoc | 4 ++-- ext/cl_khr_egl_image.asciidoc | 4 ++-- ext/cl_khr_expect_assume.asciidoc | 2 +- ext/cl_khr_extended_async_copies.asciidoc | 4 ++-- ext/cl_khr_extended_versioning.asciidoc | 4 ++-- ext/cl_khr_fp16.asciidoc | 4 ++-- ext/cl_khr_fp64.asciidoc | 4 ++-- ext/cl_khr_gl_depth_images.asciidoc | 4 ++-- ext/cl_khr_gl_event.asciidoc | 4 ++-- ext/cl_khr_gl_msaa_sharing.asciidoc | 4 ++-- ext/cl_khr_gl_sharing__context.asciidoc | 4 ++-- ext/cl_khr_gl_sharing__memobjs.asciidoc | 4 ++-- ext/cl_khr_icd.asciidoc | 4 ++-- ext/cl_khr_il_program.asciidoc | 4 ++-- ext/cl_khr_image2d_from_buffer.asciidoc | 4 ++-- ext/cl_khr_initialize_memory.asciidoc | 4 ++-- ext/cl_khr_int32_atomics.asciidoc | 4 ++-- ext/cl_khr_int64_atomics.asciidoc | 4 ++-- ext/cl_khr_mipmap_image.asciidoc | 4 ++-- ext/cl_khr_pci_bus_info.asciidoc | 2 +- ext/cl_khr_priority_hints.asciidoc | 4 ++-- ext/cl_khr_select_fprounding_mode.asciidoc | 4 ++-- ext/cl_khr_spir.asciidoc | 4 ++-- ext/cl_khr_srgb_image_writes.asciidoc | 4 ++-- ext/cl_khr_subgroup_extensions.asciidoc | 4 ++-- ext/cl_khr_subgroup_named_barrier.asciidoc | 4 ++-- ext/cl_khr_subgroups.asciidoc | 4 ++-- ext/cl_khr_terminate_context.asciidoc | 4 ++-- ext/cl_khr_throttle_hints.asciidoc | 4 ++-- 39 files changed, 76 insertions(+), 76 deletions(-) diff --git a/ext/cl_khr_3d_image_writes.asciidoc b/ext/cl_khr_3d_image_writes.asciidoc index 211555705..f3a9c07d9 100644 --- a/ext/cl_khr_3d_image_writes.asciidoc +++ b/ext/cl_khr_3d_image_writes.asciidoc @@ -11,9 +11,9 @@ This extension adds built-in functions that allow a kernel to write to 3D image This extension became a core feature in OpenCL 2.0. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_async_work_group_copy_fence.asciidoc b/ext/cl_khr_async_work_group_copy_fence.asciidoc index c2b20af0a..420e4afba 100644 --- a/ext/cl_khr_async_work_group_copy_fence.asciidoc +++ b/ext/cl_khr_async_work_group_copy_fence.asciidoc @@ -8,9 +8,9 @@ This section describes the *cl_khr_async_work_group_copy_fence* extension. The extension adds a new built-in function to OpenCL C to establish a memory synchronization ordering of asynchronous copies. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_byte_addressable_store.asciidoc b/ext/cl_khr_byte_addressable_store.asciidoc index 11b7bd88a..357756527 100644 --- a/ext/cl_khr_byte_addressable_store.asciidoc +++ b/ext/cl_khr_byte_addressable_store.asciidoc @@ -11,9 +11,9 @@ With this extension, applications are able to read from and write to pointers to This extension became a core feature in OpenCL 1.1. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_create_command_queue.asciidoc b/ext/cl_khr_create_command_queue.asciidoc index c8876239f..84b7ca311 100644 --- a/ext/cl_khr_create_command_queue.asciidoc +++ b/ext/cl_khr_create_command_queue.asciidoc @@ -27,9 +27,9 @@ Applications that only target OpenCL 2.x devices should use the core OpenCL 2.x {clCreateCommandQueueWithProperties} API instead of this extension API. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_d3d10_sharing.asciidoc b/ext/cl_khr_d3d10_sharing.asciidoc index 05a8ab0fe..3e8cb1557 100644 --- a/ext/cl_khr_d3d10_sharing.asciidoc +++ b/ext/cl_khr_d3d10_sharing.asciidoc @@ -12,9 +12,9 @@ This section describes the *cl_khr_d3d10_sharing* extension. The goal of this extension is to provide interoperability between OpenCL and Direct3D 10. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_d3d11_sharing.asciidoc b/ext/cl_khr_d3d11_sharing.asciidoc index 200ad0799..db190c244 100644 --- a/ext/cl_khr_d3d11_sharing.asciidoc +++ b/ext/cl_khr_d3d11_sharing.asciidoc @@ -12,9 +12,9 @@ This section describes the *cl_khr_d3d11_sharing* extension. The goal of this extension is to provide interoperability between OpenCL and Direct3D 11. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_depth_images.asciidoc b/ext/cl_khr_depth_images.asciidoc index c066cc6a7..665ba8467 100644 --- a/ext/cl_khr_depth_images.asciidoc +++ b/ext/cl_khr_depth_images.asciidoc @@ -11,9 +11,9 @@ This extension adds support for depth images. This extension became a core feature in OpenCL 2.0. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc b/ext/cl_khr_device_enqueue_local_arg_types.asciidoc index 88e26e8de..b7775aae0 100644 --- a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc +++ b/ext/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -11,9 +11,9 @@ requiring arguments to blocks to be pointers to void in local memory. The name of this extension is *cl_khr_device_enqueue_local_arg_types*. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_device_uuid.asciidoc b/ext/cl_khr_device_uuid.asciidoc index 6dbb2b1c4..fe98af0db 100644 --- a/ext/cl_khr_device_uuid.asciidoc +++ b/ext/cl_khr_device_uuid.asciidoc @@ -12,9 +12,9 @@ This extension adds the ability to query a universally unique identifier The UUIDs returned by the query may be used to identify drivers and devices across processes or APIs. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_dx9_media_sharing.asciidoc b/ext/cl_khr_dx9_media_sharing.asciidoc index 83827464b..30610072b 100644 --- a/ext/cl_khr_dx9_media_sharing.asciidoc +++ b/ext/cl_khr_dx9_media_sharing.asciidoc @@ -20,9 +20,9 @@ Note that OpenCL memory objects may be created from the adapter media surface if and only if the OpenCL context has been created from that adapter. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_egl_event.asciidoc b/ext/cl_khr_egl_event.asciidoc index d8d6a5630..70dafeb0e 100644 --- a/ext/cl_khr_egl_event.asciidoc +++ b/ext/cl_khr_egl_event.asciidoc @@ -15,9 +15,9 @@ between the two APIs. The companion *EGL_KHR_cl_event* extension provides the complementary functionality of creating an EGL sync object from an OpenCL event object. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_egl_image.asciidoc b/ext/cl_khr_egl_image.asciidoc index 17c32d5cf..2d1a8a75a 100644 --- a/ext/cl_khr_egl_image.asciidoc +++ b/ext/cl_khr_egl_image.asciidoc @@ -12,9 +12,9 @@ This section describes the *cl_khr_egl_image* extension. This extension provides a mechanism to creating OpenCL memory objects from from EGLImages. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_expect_assume.asciidoc b/ext/cl_khr_expect_assume.asciidoc index df8cfdcc8..f4af54bff 100644 --- a/ext/cl_khr_expect_assume.asciidoc +++ b/ext/cl_khr_expect_assume.asciidoc @@ -16,7 +16,7 @@ These functions are not required for functional correctness. The initial version of this extension extends the OpenCL SPIR-V environment to support new instructions for offline compilation tool chains. Similar functionality may be provided by some OpenCL C online compilation tool chains, but formal support in OpenCL C is not required by the initial version of the extension. -=== General information +=== General Information ==== Name Strings diff --git a/ext/cl_khr_extended_async_copies.asciidoc b/ext/cl_khr_extended_async_copies.asciidoc index 28a82538c..6dd262198 100644 --- a/ext/cl_khr_extended_async_copies.asciidoc +++ b/ext/cl_khr_extended_async_copies.asciidoc @@ -12,9 +12,9 @@ to support more patterns: 1. for async copy between 2D source and 2D destination. 2. for async copy between 3D source and 3D destination. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_extended_versioning.asciidoc b/ext/cl_khr_extended_versioning.asciidoc index e62e4c08d..984bc2f38 100644 --- a/ext/cl_khr_extended_versioning.asciidoc +++ b/ext/cl_khr_extended_versioning.asciidoc @@ -17,7 +17,7 @@ Extended versioning was promoted to a core feature in OpenCL 3.0, however note that the query for {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} was replaced by the query for {CL_DEVICE_OPENCL_C_ALL_VERSIONS}. -=== General information +=== General Information ==== Name Strings @@ -30,7 +30,7 @@ Ben Ashbaugh, Intel + Alastair Murray, Codeplay Software Ltd. + Einar Hov, Arm Ltd. -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_fp16.asciidoc b/ext/cl_khr_fp16.asciidoc index a2fb42cb2..c6233a695 100644 --- a/ext/cl_khr_fp16.asciidoc +++ b/ext/cl_khr_fp16.asciidoc @@ -9,9 +9,9 @@ This section describes the *cl_khr_fp16* extension. This extension adds support for half scalar and vector types as built-in types that can be used for arithmetic operations, conversions etc. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_fp64.asciidoc b/ext/cl_khr_fp64.asciidoc index 4cf9d679d..cb3a45fac 100644 --- a/ext/cl_khr_fp64.asciidoc +++ b/ext/cl_khr_fp64.asciidoc @@ -8,9 +8,9 @@ This section describes the *cl_khr_fp64* extension. This extension became an optional core feature in OpenCL 1.2. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_gl_depth_images.asciidoc b/ext/cl_khr_gl_depth_images.asciidoc index f713577ac..d50e38260 100644 --- a/ext/cl_khr_gl_depth_images.asciidoc +++ b/ext/cl_khr_gl_depth_images.asciidoc @@ -12,9 +12,9 @@ cl_khr_gl_sharing_extension) defined in Objects>> to allow an OpenCL image to be created from an OpenGL depth or depth-stencil texture. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_gl_event.asciidoc b/ext/cl_khr_gl_event.asciidoc index 68ea4e53c..97df4a870 100644 --- a/ext/cl_khr_gl_event.asciidoc +++ b/ext/cl_khr_gl_event.asciidoc @@ -20,9 +20,9 @@ In addition, this extension modifies the behavior of guarantee synchronization with an OpenGL context bound in the same thread as the OpenCL context. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_gl_msaa_sharing.asciidoc b/ext/cl_khr_gl_msaa_sharing.asciidoc index 6c2958591..1418443d7 100644 --- a/ext/cl_khr_gl_msaa_sharing.asciidoc +++ b/ext/cl_khr_gl_msaa_sharing.asciidoc @@ -15,9 +15,9 @@ MSAA) texture (color or depth). This extension name is *cl_khr_gl_msaa_sharing*. This extension requires *cl_khr_gl_depth_images*. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_gl_sharing__context.asciidoc b/ext/cl_khr_gl_sharing__context.asciidoc index 4237fa300..300bd070e 100644 --- a/ext/cl_khr_gl_sharing__context.asciidoc +++ b/ext/cl_khr_gl_sharing__context.asciidoc @@ -18,9 +18,9 @@ may be used to share OpenGL buffer, texture, and renderbuffer objects with the O An OpenGL implementation supporting buffer objects and sharing of texture and buffer object images with OpenCL is required by this extension. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_gl_sharing__memobjs.asciidoc b/ext/cl_khr_gl_sharing__memobjs.asciidoc index f8a919a52..b9b5d1761 100644 --- a/ext/cl_khr_gl_sharing__memobjs.asciidoc +++ b/ext/cl_khr_gl_sharing__memobjs.asciidoc @@ -19,9 +19,9 @@ Any supported OpenGL object defined within the associated OpenGL context or share group object may be shared, with the exception of the default OpenGL objects (i.e. objects named zero), which may not be shared. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_icd.asciidoc b/ext/cl_khr_icd.asciidoc index c50afb124..816a9300e 100644 --- a/ext/cl_khr_icd.asciidoc +++ b/ext/cl_khr_icd.asciidoc @@ -20,9 +20,9 @@ This is a platform extension, so if this extension is supported by an implementation, the string *cl_khr_icd* will be present in the {CL_PLATFORM_EXTENSIONS} string. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_il_program.asciidoc b/ext/cl_khr_il_program.asciidoc index 0742baa0b..721c7eccd 100644 --- a/ext/cl_khr_il_program.asciidoc +++ b/ext/cl_khr_il_program.asciidoc @@ -14,9 +14,9 @@ the OpenCL environment may be found in the OpenCL SPIR-V Environment Specificati This functionality described by this extension is a core feature in OpenCL 2.1. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_image2d_from_buffer.asciidoc b/ext/cl_khr_image2d_from_buffer.asciidoc index f7f13ada2..4c08840c9 100644 --- a/ext/cl_khr_image2d_from_buffer.asciidoc +++ b/ext/cl_khr_image2d_from_buffer.asciidoc @@ -11,9 +11,9 @@ This extension allows a 2D image to be created from an existing OpenCL buffer me This extension became a core feature in OpenCL 2.0. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_initialize_memory.asciidoc b/ext/cl_khr_initialize_memory.asciidoc index dbd4c42ed..b2730b913 100644 --- a/ext/cl_khr_initialize_memory.asciidoc +++ b/ext/cl_khr_initialize_memory.asciidoc @@ -19,9 +19,9 @@ This extension adds support for initializing local and private memory before a kernel begins execution. This extension name is *cl_khr_initialize_memory*. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_int32_atomics.asciidoc b/ext/cl_khr_int32_atomics.asciidoc index 6d2b3de86..cf5657073 100644 --- a/ext/cl_khr_int32_atomics.asciidoc +++ b/ext/cl_khr_int32_atomics.asciidoc @@ -10,9 +10,9 @@ These extensions allow atomic operations to be performed on 32-bit signed and un These extensions became core features in OpenCL 1.1, except the built-in atomic function names are changed to use the **atomic_** prefix instead of **atom_** and the volatile qualifier was added to the pointer parameter _p_. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_int64_atomics.asciidoc b/ext/cl_khr_int64_atomics.asciidoc index 95af963a7..f2875a562 100644 --- a/ext/cl_khr_int64_atomics.asciidoc +++ b/ext/cl_khr_int64_atomics.asciidoc @@ -7,9 +7,9 @@ This section describes the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions. These extensions allow atomic operations to be performed on 64-bit signed and unsigned integers in global and local memory. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_mipmap_image.asciidoc b/ext/cl_khr_mipmap_image.asciidoc index b88ef2e2d..d5a270da5 100644 --- a/ext/cl_khr_mipmap_image.asciidoc +++ b/ext/cl_khr_mipmap_image.asciidoc @@ -17,9 +17,9 @@ be used to write a mip-mapped image in an OpenCL C program. If the *cl_khr_mipmap_image_writes* extension is supported by the OpenCL device, the *cl_khr_mipmap_image* extension must also be supported. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_pci_bus_info.asciidoc b/ext/cl_khr_pci_bus_info.asciidoc index d144a8134..3b92c929a 100644 --- a/ext/cl_khr_pci_bus_info.asciidoc +++ b/ext/cl_khr_pci_bus_info.asciidoc @@ -18,7 +18,7 @@ extension string for each individual OpenCL device for which they intend to issue the new query for and should not have any assumptions about the availability of the extension on any given platform. -=== General information +=== General Information ==== Name Strings diff --git a/ext/cl_khr_priority_hints.asciidoc b/ext/cl_khr_priority_hints.asciidoc index fbffaf13c..5da9a971c 100644 --- a/ext/cl_khr_priority_hints.asciidoc +++ b/ext/cl_khr_priority_hints.asciidoc @@ -12,9 +12,9 @@ It is expected that the the user guides associated with each implementation which supports this extension will describe the scheduling behavior guarantees. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_select_fprounding_mode.asciidoc b/ext/cl_khr_select_fprounding_mode.asciidoc index 5c1934a12..1432e07e7 100644 --- a/ext/cl_khr_select_fprounding_mode.asciidoc +++ b/ext/cl_khr_select_fprounding_mode.asciidoc @@ -10,9 +10,9 @@ It allows an application to specify the rounding mode for an instruction or grou **This extension was deprecated in OpenCL 1.1 and its use is not recommended.** -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_spir.asciidoc b/ext/cl_khr_spir.asciidoc index 4790158b9..f7c999307 100644 --- a/ext/cl_khr_spir.asciidoc +++ b/ext/cl_khr_spir.asciidoc @@ -15,9 +15,9 @@ This extension has been superseded by the SPIR-V intermediate representation, which is supported by the *cl_khr_il_program* extension, and is a core feature in OpenCL 2.1. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_srgb_image_writes.asciidoc b/ext/cl_khr_srgb_image_writes.asciidoc index 07c7da406..357d7d5e0 100644 --- a/ext/cl_khr_srgb_image_writes.asciidoc +++ b/ext/cl_khr_srgb_image_writes.asciidoc @@ -13,9 +13,9 @@ The sRGB image formats that may be written to will be returned by {clGetSupporte When the image is an sRGB image, the *write_imagef* built-in function will perform the linear to sRGB conversion. Only the R, G, and B components are converted from linear to sRGB; the A component is written as-is. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_subgroup_extensions.asciidoc b/ext/cl_khr_subgroup_extensions.asciidoc index a3f7f0bce..9f131cb0d 100644 --- a/ext/cl_khr_subgroup_extensions.asciidoc +++ b/ext/cl_khr_subgroup_extensions.asciidoc @@ -28,9 +28,9 @@ The functionality added by these extensions includes: This section describes changes to the OpenCL C Language for these extensions. There are no new API functions or enums added by these extensions. -=== General information +=== General Information -==== Version history +==== Version History For all of the extensions described in this section: diff --git a/ext/cl_khr_subgroup_named_barrier.asciidoc b/ext/cl_khr_subgroup_named_barrier.asciidoc index f04b7dec1..7140e7d8e 100644 --- a/ext/cl_khr_subgroup_named_barrier.asciidoc +++ b/ext/cl_khr_subgroup_named_barrier.asciidoc @@ -14,9 +14,9 @@ sub-groups named barriers in the SPIR-V intermediate representation, and to the OpenCL {cpp} specification for descriptions of the sub-group named barrier built-in functions in the OpenCL {cpp} kernel language. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_subgroups.asciidoc b/ext/cl_khr_subgroups.asciidoc index 91b93c868..8ea3cdd4a 100644 --- a/ext/cl_khr_subgroups.asciidoc +++ b/ext/cl_khr_subgroups.asciidoc @@ -16,9 +16,9 @@ Sub-groups were promoted to a core feature in OpenCL 2.1, however note that: * The sub-group OpenCL C built-in functions described by this extension must still be accessed as an OpenCL C extension in OpenCL 2.1. * Sub-group independent forward progress is an optional device property in OpenCL 2.1, see {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_terminate_context.asciidoc b/ext/cl_khr_terminate_context.asciidoc index 250d4020a..4b3a7f816 100644 --- a/ext/cl_khr_terminate_context.asciidoc +++ b/ext/cl_khr_terminate_context.asciidoc @@ -27,9 +27,9 @@ terminate an OpenCL context and adds an API to terminate a context. The extension name is *cl_khr_terminate_context*. -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== diff --git a/ext/cl_khr_throttle_hints.asciidoc b/ext/cl_khr_throttle_hints.asciidoc index 728d8c441..ff734cae5 100644 --- a/ext/cl_khr_throttle_hints.asciidoc +++ b/ext/cl_khr_throttle_hints.asciidoc @@ -17,9 +17,9 @@ For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) but should at the same time be executed at an optimized throttle setting ({CL_QUEUE_THROTTLE_LOW_KHR}). -=== General information +=== General Information -==== Version history +==== Version History [cols="1,1,3",options="header",] |==== From 21943846498be7792b7f6c370573c1333d671d72 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 29 Nov 2023 10:47:32 -0800 Subject: [PATCH 034/190] clarify async copies and wait group events must be convergent (#1015) --- OpenCL_C.txt | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 1492dc238..a25d94924 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -6382,6 +6382,23 @@ The OpenCL C programming language implements the <> that provide asynchronous copies between `global` and local memory and a prefetch from `global` memory. +The async copy and wait group events functions are performed by all work-items +in a work-group and therefore must be encountered by all work-items in a +work-group executing the kernel with the same argument values, otherwise the +results are undefined. +This rule applies to ND-ranges implemented with uniform and non-uniform +work-groups. + +If an async copy or wait group events function is inside a conditional statement +then all work-items in the work-group must enter the conditional if any +work-item in the work-group enters the conditional statement and executes the +async copy or wait group events function. + +If an async copy or wait group events function is inside a loop then all +work-items in the work-group must execute the async copy or wait group events +function on each iteration of the loop if any work-item executes the async copy +or wait group events function on that iteration. + We use the generic type name `gentype` to indicate the built-in data types `char`, `char__n__`, `uchar`, `uchar__n__`, `short`, `short__n__`, `ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, @@ -6402,13 +6419,6 @@ _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. const {local} gentype *_src_, size_t _num_gentypes_, event_t _event_) | Perform an async copy of _num_gentypes_ gentype elements from _src_ to _dst_. - The async copy is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; - otherwise the results are undefined. - This rule applies to ND-ranges implemented with uniform and - non-uniform work-groups. - Returns an event object that can be used by *wait_group_events* to wait for the async copy to finish. The _event_ argument can also be used to associate the @@ -6436,12 +6446,6 @@ _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. element read from _src_. The _dst_stride_ is the stride in elements for each `gentype` element written to _dst_. - The async gather is performed by all work-items in a work-group. - This built-in function must therefore be encountered by all work-items - in a work-group executing the kernel with the same argument values; - otherwise the results are undefined. - This rule applies to ND-ranges implemented with uniform and - non-uniform work-groups Returns an event object that can be used by *wait_group_events* to wait for the async copy to finish. @@ -6470,12 +6474,6 @@ _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. to complete. The event objects specified in _event_list_ will be released after the wait is performed. - - This function must be encountered by all work-items in a work-group - executing the kernel with the same _num_events_ and event objects - specified in _event_list_; otherwise the results are undefined. - This rule applies to ND-ranges implemented with uniform and - non-uniform work-groups | | | void **prefetch**(const {global} gentype *_p_, size_t _num_gentypes_) | Prefetch `_num_gentypes_ * sizeof(gentype)` bytes into the global From 8cd620684bf8e7d59e4f410c577aada548c6ab55 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 29 Nov 2023 10:47:59 -0800 Subject: [PATCH 035/190] add old command queue APIs to the reference page table of contents (#985) --- man/toctail | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/toctail b/man/toctail index 3e96a3d03..82092a1ce 100644 --- a/man/toctail +++ b/man/toctail @@ -39,10 +39,12 @@
    • Command-Queues
    • From 1320de77e4928f02f767de346d89be8bf4a482dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 29 Nov 2023 18:48:39 +0000 Subject: [PATCH 036/190] Reserve enums for cl_ext_image_drm_format_modifier (#1020) Change-Id: Id096c7ab542d6fe04a5f7ae7cdbd031755ca15c8 --- xml/cl.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 404a47e1d..5ca7af53c 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -2241,7 +2241,8 @@ server's OpenCL/api-docs repository. - + + From ef2e9489fe27987fd15bb841b25c07fca49ff7b2 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 29 Nov 2023 10:49:38 -0800 Subject: [PATCH 037/190] fix version note links in the reference pages (#982) * fix version note links in the reference pages * add config files with and without links * fix comment as per review Co-authored-by: Alastair Murray --------- Co-authored-by: Alastair Murray --- OpenCL_API.txt | 3 +++ config/version-full-links.asciidoc | 13 +++++++++++++ config/version-local-links.asciidoc | 13 +++++++++++++ scripts/clconventions.py | 1 + scripts/gen_version_notes.py | 12 ++++++------ 5 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 config/version-full-links.asciidoc create mode 100644 config/version-local-links.asciidoc diff --git a/OpenCL_API.txt b/OpenCL_API.txt index db6d507fe..9cdac6798 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -30,6 +30,9 @@ include::config/attribs.txt[] // Attributes that are shared by OpenCL specifications. include::config/opencl.asciidoc[] +// Attributes for version notes, with local links. +include::config/version-local-links.asciidoc[] + // Formatting and links for API functions and enums. include::api/dictionary.asciidoc[] diff --git a/config/version-full-links.asciidoc b/config/version-full-links.asciidoc new file mode 100644 index 000000000..884a50c14 --- /dev/null +++ b/config/version-full-links.asciidoc @@ -0,0 +1,13 @@ +// Copyright 2023 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + +// Attributes for version notes, with full links. + +// "missing before" +:missing_before_label: pass:q[missing before] +:missing_before: link:https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#unified-spec[{missing_before_label}^] + +// "deprecated by" +:deprecated_by_label: pass:q[deprecated by] +:deprecated_by: link:https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#unified-spec[{deprecated_by_label}^] diff --git a/config/version-local-links.asciidoc b/config/version-local-links.asciidoc new file mode 100644 index 000000000..d2f3b8029 --- /dev/null +++ b/config/version-local-links.asciidoc @@ -0,0 +1,13 @@ +// Copyright 2023 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + +// Attributes for version notes, with local links. + +// "missing before" +:missing_before_label: pass:q[missing before] +:missing_before: <> + +// "deprecated by" +:deprecated_by_label: pass:q[deprecated by] +:deprecated_by: <> diff --git a/scripts/clconventions.py b/scripts/clconventions.py index f4df49d2d..aae61110c 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -201,6 +201,7 @@ def extra_refpage_headers(self): """Return any extra text to add to refpage headers.""" return 'include::{config}/attribs.txt[]\n' + \ 'include::{config}/opencl.asciidoc[]\n' + \ + 'include::{config}/version-full-links.asciidoc[]\n' + \ 'include::{apispec}/footnotes.asciidoc[]\n' + \ 'include::{cspec}/footnotes.asciidoc[]\n' + \ 'include::{cspec}/feature-dictionary.asciidoc[]\n' + \ diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index 9fed05178..030d9f948 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -40,11 +40,11 @@ def FullNote(name, added_in, deprecated_by): if added_in == "1.0" and deprecated_by == None: return "\n// Intentionally empty, %s has always been present." % name if added_in != "1.0" and deprecated_by == None: - return "\nIMPORTANT: {%s} is <> version %s." % (name, added_in) + return "\nIMPORTANT: {%s} is {missing_before} version %s." % (name, added_in) if added_in == "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is <> version %s." % (name, deprecated_by) + return "\nIMPORTANT: {%s} is {deprecated_by} version %s." % (name, deprecated_by) if added_in != "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is <> version %s and <> version %s." % (name, added_in, deprecated_by) + return "\nIMPORTANT: {%s} is {missing_before} version %s and {deprecated_by} version %s." % (name, added_in, deprecated_by) def ShortNote(name, added_in, deprecated_by): # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in @@ -52,11 +52,11 @@ def ShortNote(name, added_in, deprecated_by): if added_in == "1.0" and deprecated_by == None: return "// Intentionally empty, %s has always been present." % name if added_in != "1.0" and deprecated_by == None: - return "<> version %s." % added_in + return "{missing_before} version %s." % added_in if added_in == "1.0" and deprecated_by != None: - return "<> version %s." % deprecated_by + return "{deprecated_by} version %s." % deprecated_by if added_in != "1.0" and deprecated_by != None: - return "<> version %s and <> version %s." % (added_in, deprecated_by) + return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) # Find feature groups that are parents of a feature/require/${entry_type} # hierarchy, and then find all the ${entry_type} within each hierarchy: From e523cee730da4035e1e4343e6837096b2e7e58f5 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Thu, 30 Nov 2023 12:01:03 -0700 Subject: [PATCH 038/190] cl_khr_external_semaphore_khr: semaphore re-import (#939) * cl_khr_external_semaphore_khr Add clImportSemaphoreSyncFD API call. Sync fd semaphores must re-import the sync_fd after every wait. Add an API call to make this re-import possible, without creating a new OpenCL semaphores. See issue #888. * Revised to be general * Removed sync_fd reference * Cleaned up re-import mechanism * Move re-import back to sync_fd extension * Moved version to 0.9.1 * Bump version to 0.9.2 and re-name import to re-import --------- Co-authored-by: Joshua Kelly --- ext/cl_khr_external_semaphore.asciidoc | 48 +++++++++++++++++++++++--- xml/cl.xml | 15 ++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/ext/cl_khr_external_semaphore.asciidoc b/ext/cl_khr_external_semaphore.asciidoc index 5cc6fe347..bce72bbda 100644 --- a/ext/cl_khr_external_semaphore.asciidoc +++ b/ext/cl_khr_external_semaphore.asciidoc @@ -41,6 +41,7 @@ Other related extensions define specific external semaphores that may be importe | *Date* | *Version* | *Description* | 2021-09-10 | 0.9.0 | Initial version (provisional). | 2023-11-16 | 0.9.1 | Added CL_SEMAPHORE_EXPORTABLE_KHR. +| 2023-11-21 | 0.9.2 | Added re-import function call to cl_khr_external_semaphore_sync_fd |==== NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ @@ -91,6 +92,13 @@ Vivek Kini, NVIDIA + typedef cl_uint cl_external_semaphore_handle_type_khr; ---- +The `cl_khr_external_semaphore_sync_fd` extension adds: + +[source] +---- +typedef cl_properties cl_semaphore_reimport_properties_khr; +---- + === New API Functions [source] @@ -104,6 +112,15 @@ cl_int clGetSemaphoreHandleForTypeKHR( size_t *handle_size_ret); ---- +The `cl_khr_external_semaphore_sync_fd` extension adds: + +---- +cl_int clReImportSemaphoreSyncFdKHR( + cl_semaphore_khr sema_object, + cl_semaphore_reimport_properties_khr *reimport_props, + int fd); +---- + === New API Enums Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query external semaphore handle types that may be imported or exported by all devices in an OpenCL platform: @@ -378,6 +395,27 @@ Transference and permanence properties for handle types added by `cl_khr_externa For these extensions, importing a semaphore payload from a file descriptor transfers ownership of the file descriptor from the application to the OpenCL implementation. The application must not perform any operations on the file descriptor after a successful import. +A handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} may be re-imported into an existing semaphore using {clReImportSemaphoreSyncFdKHR}: + +include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] + +_sema_object_ specifies a valid semaphore object with importable properties. + +_reimport_props_ Must be `NULL`. Reserved for future use. + +_fd_ external file descriptor handle to import + +Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying _sema_object_ and re-creating it with the original _sema_props_ +from {clCreateSemaphoreWithPropertiesKHR}, except a handle specified by _fd_ will be imported. +The semaphore _sema_object_ must have originally imported an external handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. + +* {CL_INVALID_SEMAPHORE_KHR} +** if _sema_object_ is not a valid semaphore +* {CL_INVALID_SEMAPHORE_KHR} if a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} handle was not imported when _sema_object_ was created. +* {CL_INVALID_VALUE} if _fd_ is invalid. +* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. + ==== NT Handle Types The `cl_khr_external_semaphore_dx_fence` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: @@ -487,11 +525,11 @@ int fd = getFdForExternalSemaphore(); // Create clSema of type cl_semaphore_khr usable only on device 1 // assuming the semaphore was imported from the same device. cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, (cl_semaphore_properties_khr)fd, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, (cl_semaphore_properties_khr)devices[1], CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, 0 @@ -559,9 +597,9 @@ clCreateContext(..., 2, devices, ...); // Create clSema of type cl_semaphore_khr usable only on device 1 cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR, (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, diff --git a/xml/cl.xml b/xml/cl.xml index 5ca7af53c..6f7df66ef 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -232,6 +232,7 @@ server's OpenCL/api-docs repository. typedef cl_bitfield cl_device_feature_capabilities_intel; typedef cl_bitfield cl_device_integer_dot_product_capabilities_khr; typedef cl_properties cl_semaphore_properties_khr; + typedef cl_properties cl_semaphore_reimport_properties_khr; typedef cl_uint cl_semaphore_info_khr; typedef cl_uint cl_semaphore_type_khr; typedef cl_ulong cl_semaphore_payload_khr; @@ -2696,6 +2697,12 @@ server's OpenCL/api-docs repository. void* handle_ptr size_t* handle_size_ret + + cl_int clReImportSemaphoreSyncFdKHR + cl_semaphore_khr sema_object + cl_semaphore_reimport_properties_khr* reimport_props + int fd + cl_int clEnqueueAcquireExternalMemObjectsKHR cl_command_queue command_queue @@ -6991,6 +6998,8 @@ server's OpenCL/api-docs repository. + + @@ -7018,9 +7027,15 @@ server's OpenCL/api-docs repository. + + + + + + From 6bbad0245bdfbad26fedc99af796725e91f3a38e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 30 Nov 2023 12:00:57 -0800 Subject: [PATCH 039/190] update the provisional extension notification text (#1002) * refactor the provisional notice into a separate file * correct and update the provisional notice text --- ext/cl_khr_command_buffer.asciidoc | 4 ++-- ext/cl_khr_command_buffer_multi_device.asciidoc | 2 ++ ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 2 ++ ext/cl_khr_external_memory.asciidoc | 2 +- ext/cl_khr_external_semaphore.asciidoc | 2 +- ext/cl_khr_semaphore.asciidoc | 2 +- ext/provisional_notice.asciidoc | 12 ++++++++++++ 7 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 ext/provisional_notice.asciidoc diff --git a/ext/cl_khr_command_buffer.asciidoc b/ext/cl_khr_command_buffer.asciidoc index 5b7bf45ca..a7943c30d 100644 --- a/ext/cl_khr_command_buffer.asciidoc +++ b/ext/cl_khr_command_buffer.asciidoc @@ -25,6 +25,8 @@ This extension adds the ability to record and replay buffers of OpenCL commands. | 2023-05-11 | 0.9.4 | Add clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR command entries (provisional). |==== +include::provisional_notice.asciidoc[] + ==== Dependencies This extension is written against the OpenCL Specification version 3.0.6. @@ -2016,5 +2018,3 @@ Add to Table 37, _Event Command Types_: -- *UNRESOLVED* -- - -NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ diff --git a/ext/cl_khr_command_buffer_multi_device.asciidoc b/ext/cl_khr_command_buffer_multi_device.asciidoc index a2d0faea2..0e6fc023b 100644 --- a/ext/cl_khr_command_buffer_multi_device.asciidoc +++ b/ext/cl_khr_command_buffer_multi_device.asciidoc @@ -23,6 +23,8 @@ providing execution of heterogeneous task graphs from command-queues associated | 2024-04-30 | 0.9.1 | Added clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR as affected functions (provisional). |==== +include::provisional_notice.asciidoc[] + ==== Dependencies This extension requires the `cl_khr_command_buffer` extension version 0.9.3. diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 0de7cb03a..dc75f9a9f 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -23,6 +23,8 @@ commands between command-buffer enqueues. | 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_asserts_khr and its possible values (provisional). |==== +include::provisional_notice.asciidoc[] + ==== Dependencies This extension requires the `cl_khr_command_buffer` extension version 0.9.0. diff --git a/ext/cl_khr_external_memory.asciidoc b/ext/cl_khr_external_memory.asciidoc index 01f7330d1..57c8867f6 100644 --- a/ext/cl_khr_external_memory.asciidoc +++ b/ext/cl_khr_external_memory.asciidoc @@ -36,7 +36,7 @@ Other related extensions define specific external memory types that may be impor | 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_HANDLE_TYPES_KHR} (provisional). |==== -NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ +include::provisional_notice.asciidoc[] ==== Dependencies diff --git a/ext/cl_khr_external_semaphore.asciidoc b/ext/cl_khr_external_semaphore.asciidoc index bce72bbda..f5198a123 100644 --- a/ext/cl_khr_external_semaphore.asciidoc +++ b/ext/cl_khr_external_semaphore.asciidoc @@ -44,7 +44,7 @@ Other related extensions define specific external semaphores that may be importe | 2023-11-21 | 0.9.2 | Added re-import function call to cl_khr_external_semaphore_sync_fd |==== -NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ +include::provisional_notice.asciidoc[] ==== Dependencies diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index 9246b9ce6..158a24d58 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -41,7 +41,7 @@ In particular, this extension defines: | 2023-08-01 | 0.9.1 | Changed device handle list enum to the semaphore-specific {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). |==== -NOTE: This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on https://github.com/KhronosGroup/OpenCL-Docs/ +include::provisional_notice.asciidoc[] ==== Dependencies diff --git a/ext/provisional_notice.asciidoc b/ext/provisional_notice.asciidoc new file mode 100644 index 000000000..ddbf779e0 --- /dev/null +++ b/ext/provisional_notice.asciidoc @@ -0,0 +1,12 @@ +// Copyright 2023 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + +[NOTE] +==== +This is a provisional OpenCL extension specification that has been Ratified under the Khronos Intellectual Property Framework. +It is being made publicly available as a provisional extension to enable review and feedback from the community. +While it is a provisional extension features may be added, removed, or changed in non-backward compatible ways. + +If you have feedback please create an issue on: https://github.com/KhronosGroup/OpenCL-Docs/ +==== \ No newline at end of file From ed633243a6e7065364f3cdd2e5f858df3d002d48 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 5 Dec 2023 08:47:17 -0800 Subject: [PATCH 040/190] add links to change log PRs (#1017) --- api/appendix_e.asciidoc | 8 ++++---- config/opencl.asciidoc | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index a43c8b47b..4d5f6731c 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -564,9 +564,9 @@ Changes from *v3.0.12*: Changes from *v3.0.13*: - * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `cl_khr_fp16`, see GitHub #893. - * Added a context query for command-buffers to `cl_khr_command_buffer`, see GitHub #899. - * Updated the semaphore wait and signal rules for binary semaphores in `cl_khr_semaphore`, see GitHub #882. - * Removed redundant error conditions from `cl_khr_external_semaphore` and `cl_khr_external_memory`, see GitHub #903 and #904. + * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `cl_khr_fp16`, see {khronos-opencl-pr}/893[#893]. + * Added a context query for command-buffers to `cl_khr_command_buffer`, see {khronos-opencl-pr}/899[#899]. + * Updated the semaphore wait and signal rules for binary semaphores in `cl_khr_semaphore`, see {khronos-opencl-pr}/882[#882]. + * Removed redundant error conditions from `cl_khr_external_semaphore` and `cl_khr_external_memory`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. * Added new extension: ** `cl_khr_command_buffer_multi_device` (provisional) diff --git a/config/opencl.asciidoc b/config/opencl.asciidoc index 8083875c5..9d233e95e 100644 --- a/config/opencl.asciidoc +++ b/config/opencl.asciidoc @@ -4,6 +4,9 @@ // Attributes that are shared by OpenCL specifications. +:khronos-opencl-repo: https://github.com/KhronosGroup/OpenCL-Docs +:khronos-opencl-pr: {khronos-opencl-repo}/pull + :blank: pass:[ +] :pp: ++ :cpp: pass:[C++] From 1c4a9eb6d9e8273ca3a490ce9efd9abf70727a75 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 5 Dec 2023 08:54:33 -0800 Subject: [PATCH 041/190] include dictionaries before footnotes for reference pages (#1018) --- scripts/clconventions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/clconventions.py b/scripts/clconventions.py index aae61110c..5b849892b 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -202,10 +202,10 @@ def extra_refpage_headers(self): return 'include::{config}/attribs.txt[]\n' + \ 'include::{config}/opencl.asciidoc[]\n' + \ 'include::{config}/version-full-links.asciidoc[]\n' + \ - 'include::{apispec}/footnotes.asciidoc[]\n' + \ - 'include::{cspec}/footnotes.asciidoc[]\n' + \ + 'include::{generated}/api/api-dictionary-no-links.asciidoc[]\n' + \ 'include::{cspec}/feature-dictionary.asciidoc[]\n' + \ - 'include::{generated}/api/api-dictionary-no-links.asciidoc[]' + 'include::{apispec}/footnotes.asciidoc[]\n' + \ + 'include::{cspec}/footnotes.asciidoc[]\n' @property def extension_index_prefixes(self): From 4744432b6c7cb8ed521a1356eef0c8db6befca84 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 5 Dec 2023 09:16:27 -0800 Subject: [PATCH 042/190] remove unnecessary rounding mode text for geometric and common functions (#1027) Without knowing how these functions are implemented any statements about rounding modes or contractions are unnecessary and confusing. We have defined error bounds for these functions, and as long as an implementation meets these error bounds it should be considered correct. --- OpenCL_C.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index a25d94924..dfea8980a 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -5473,11 +5473,6 @@ We use the generic type name `gentyped` to indicate that the function can take `double`, `double2`, `double3`, `double4`, `double8` or `double16` as the type for the arguments. -The built-in common functions are implemented using the round to nearest -even rounding mode. -The built-in common functions may be implemented using contractions such -as *mad* or *fma*. - [[table-builtin-common]] .Built-in Scalar and Vector Argument Common Functions [cols=",",] @@ -5560,11 +5555,6 @@ The description is per-component. `double` footnote:[{fn-double-supported}], `double2`, `double3`, or `double4`. -The built-in geometric functions are implemented using the round to nearest -even rounding mode. -The built-in geometric functions may be implemented using contractions such -as *mad* or *fma*. - [[table-builtin-geometric]] .Built-in Scalar and Vector Argument Geometric Functions [cols=",",] From 8deed7d3623b669538843bd22e2eea64a45d23bc Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 5 Dec 2023 09:32:04 -0800 Subject: [PATCH 043/190] clarify the user function for clEnqueueNativeKernel must be thread safe (#1026) --- api/opencl_runtime_layer.asciidoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 75c6728ae..04140e567 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -8303,6 +8303,8 @@ include::{generated}/api/version-notes/clEnqueueNativeKernel.asciidoc[] {CL_DEVICE_EXECUTION_CAPABILITIES} as specified in the <> table. * _user_func_ is a pointer to a host-callable user function. + It is the application's responsibility to ensure that the host-callable user + function is thread-safe. * _args_ is a pointer to the args list that _user_func_ should be called with. * _cb_args_ is the size in bytes of the args list that _args_ points to. * _num_mem_objects_ is the number of buffer objects that are passed in _args_. From 9f60d2440af20ab014bb54cff40bb47a7b3dc52e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 5 Dec 2023 09:35:48 -0800 Subject: [PATCH 044/190] fix the link appearance in the online reference pages (#987) * fix the link appearance in the online reference pages * revert a few unintended extension changes --- OpenCL_C.txt | 109 ++++++++++++----------- api/embedded_profile.asciidoc | 2 +- api/opencl_platform_layer.asciidoc | 12 +-- api/opencl_runtime_layer.asciidoc | 46 +++++----- ext/cl_khr_create_command_queue.asciidoc | 2 +- ext/cl_khr_semaphore.asciidoc | 10 +-- ext/cl_khr_subgroups.asciidoc | 2 +- 7 files changed, 96 insertions(+), 87 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index dfea8980a..fccd80798 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -605,9 +605,9 @@ constructed from the built-in <>, <> data types are supported, with specified <>. -The following tables describe the other built-in data types in OpenCL -described in <> and the corresponding data type -available to the application: +The following tables describe the other built-in data types in OpenCL described +in <> and the corresponding +data type available to the application: [cols=",",] |==== @@ -639,8 +639,9 @@ available to the application: The data type names described in the following table are reserved and cannot be used by applications as type names. -The vector data type names defined in <>, but -where _n_ is any value other than 2, 3, 4, 8 and 16, are also reserved. +The vector data type names defined in <>, but where _n_ is any value other than 2, 3, 4, 8 and 16, +are also reserved. [[table-reserved-types]] .Reserved Data Types @@ -1031,8 +1032,10 @@ The following names are reserved for use as keywords in OpenCL C and shall not be used otherwise. * Names reserved as keywords by C99. - * OpenCL C data types defined in <>, - <>, and <>. + * OpenCL C data types defined in <>, + <>, and + <>. * Address space qualifiers: `{global}`, `global`, `{local}`, `local`, `{constant}`, `constant`, `{private}`, and `private`. `{generic}` and `generic` are reserved for future use. @@ -1050,8 +1053,8 @@ not be used otherwise. === Implicit Conversions Implicit conversions between scalar built-in types defined in -<> (except `void` and `half` -footnote:[{fn-cl_khr_fp16}]) are supported. +<> (except `void` and +`half` footnote:[{fn-cl_khr_fp16}]) are supported. When an implicit conversion is done, it is not just a re-interpretation of the expression's value but a conversion of that value to an equivalent value in the new type. @@ -1073,8 +1076,8 @@ Implicit conversions for pointer types follow the rules described in the === Explicit Casts Standard typecasts for built-in scalar data types defined in -<> will perform appropriate conversion (except -`void` and `half` footnote:[{fn-cl_khr_fp16}]). +<> will perform +appropriate conversion (except `void` and `half` footnote:[{fn-cl_khr_fp16}]). In the example below: [source,opencl_c] @@ -1374,12 +1377,12 @@ u.d = 1.0; // u.u contains 0x3ff00000 (big endian) or 0 [open,refpage='as_typen',desc='Reinterpreting Types',type='freeform',spec='clang',anchor='reinterpreting-types-using-as_type-and-as_typen',xrefs='convert_T scalarDataTypes vectorDataTypes'] -- -All data types described in <> and -<> (except `bool`, `void`, and `half` -footnote:[{fn-cl_khr_fp16}]) may be also reinterpreted as another data type of -the same size using the *as_type*() operator for scalar data types and the -*as_type__n__*() operator footnote:[{fn-reinterpret-vector-types}] for vector -data types. +All data types described in <> and <> (except +`bool`, `void`, and `half` footnote:[{fn-cl_khr_fp16}]) may be also +reinterpreted as another data type of the same size using the *as_type*() +operator for scalar data types and the *as_type__n__*() operator +footnote:[{fn-reinterpret-vector-types}] for vector data types. When the operand and result type contain the same number of elements, the bits in the operand shall be returned directly without modification as the new type. @@ -1804,7 +1807,8 @@ If all three expressions are scalar values, the C99 rules for ternary operator are followed. If the result is a vector value, then this is equivalent to calling *select*(_exp3_, _exp2_, _exp1_). -The *select* function is described in <>. +The *select* function is described in <>. The second and third expressions can be any type, as long their types match, or there is an <> that can be applied to one of the expressions to make their types match, or one is a @@ -1950,10 +1954,10 @@ operator (*=*), like * _lvalue_ = _expression_ The assignment operator stores the value of _expression_ into _lvalue_. -The _expression_ and _lvalue_ must have the same type, or the expression -must have a type in <>, in which case an -implicit conversion will be done on the expression before the assignment is -done. +The _expression_ and _lvalue_ must have the same type, or the expression must +have a type in <>, in +which case an implicit conversion will be done on the expression before the +assignment is done. If _expression_ is a scalar type and _lvalue_ is a vector type, the scalar is converted to the element type used by the vector operand. @@ -2827,12 +2831,12 @@ to OpenCL C with the generic address space. *Clause 6.2.5 - Types*: If address space qualifier on type T is omitted refer to -<>. +<>. *Clause 6.3.2.3 - Pointers* -Conversions between disjoint address spaces are disallowed in OpenCL -(<>). +Conversions between disjoint address spaces are disallowed in OpenCL, +refer to <>. *Clause 6.5.8 - Relational operators*: @@ -3127,7 +3131,7 @@ representation of the computational _width_ of the `{kernel}`, and should serve as the basis for calculating processor bandwidth utilization when the compiler is looking to autovectorize the code. In the `+__attribute__((vec_type_hint()))+` qualifier is one of -the built-in vector types listed in <> or the +the built-in vector types listed in <> or the constituent scalar element types. If `vec_type_hint ()` is not specified, the kernel is assumed to have the `+__attribute__((vec_type_hint(int)))+` qualifier. @@ -3385,9 +3389,9 @@ do_proc (__global char *pA, short b, . A function in an OpenCL program cannot be called `main`. . Implicit function declaration is not supported. . Program scope variables can be defined with any valid OpenCL C data type - except for those in <>. Such program scope - variables may be of any user-defined type, or a pointer to a user-defined - type. + except for those in <>. + Such program scope variables may be of any user-defined type, or a pointer + to a user-defined type. + In the presence of shared virtual memory, these pointers or pointer members should work as expected as long as they are shared virtual memory @@ -4578,8 +4582,9 @@ The built-in math functions are not affected by the prevailing rounding mode in the calling environment, and always return the same value as they would if called with the round to nearest even rounding mode. -The <> describes the list of built-in -math functions that can take scalar or vector arguments. +The <> +table describes the list of built-in math functions that can take scalar or +vector arguments. We use the generic type name `gentype` to indicate that the function can take `float`, `float2`, `float3`, `float4`, `float8`, `float16`, `double` footnote:double-supported[{fn-double-supported}], `double2`, @@ -4925,16 +4930,15 @@ all arguments and the return type, unless otherwise specified. The following table describes the following functions: - * A subset of functions from <> that are defined with - the half_ prefix . + * A subset of functions from <> that are defined with the half_ prefix . These functions are implemented with a minimum of 10-bits of accuracy, i.e. the maximum error value \<= 8192 ulp. - * A subset of functions from <> that are defined with - the native_ prefix. + * A subset of functions from <> that are defined with the native_ prefix. These functions may map to one or more native device instructions and will typically have better performance compared to the corresponding - functions (without the `+native_+` prefix) described in - <>. + functions without the `+native_+` prefix). The accuracy (and in some cases the input range(s)) of these functions is implementation-defined. * `+half_+` and `+native_+` functions for following basic operations: @@ -5629,9 +5633,10 @@ operators (*<*, *\<=*, *>*, *>=*, *!=*, *==*) can be used with scalar and vector built-in types and produce a scalar or vector signed integer result respectively. -The functions described in the <> can -be used with built-in scalar or vector types as arguments and return a scalar or -vector integer result footnote:[{fn-floating-point-exception-nans}]. +The functions described in the <> table can be used with built-in scalar or vector +types as arguments and return a scalar or vector integer result +footnote:[{fn-floating-point-exception-nans}]. The argument type `gentype` refers to the following built-in types: `char`, `char__n__`, `uchar`, `uchar__n__`, `short`, `short__n__`, `ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, `uint__n__`, `long` @@ -6103,11 +6108,13 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, The results of vector data load and store functions are undefined if the address being read from or written to is not correctly aligned as described -in <>. +in <>. The pointer argument p can be a pointer to `global`, `local`, or `private` -memory for store functions described in <>. +memory for store functions described in <>. The pointer argument p can be a pointer to `global`, `local`, `constant`, or -`private` memory for load functions described in <>. +`private` memory for load functions described in +<>. [NOTE] ==== @@ -9835,9 +9842,9 @@ For query functions this may be `read_only`, `write_only` or `read_write`. |==== The values returned by *get_image_channel_data_type* and -*get_image_channel_order* as specified in <> with the -`CLK_` prefixes correspond to the `CL_` prefixes used to describe the -<> and +*get_image_channel_order* as specified in <> with the `CLK_` prefixes correspond to the `CL_` prefixes used +to describe the <> and <> in the <>. For example, both `CL_UNORM_INT8` and `CLK_UNORM_INT8` refer to an image @@ -11611,10 +11618,12 @@ as ULP values if the `-cl-unsafe-math-optimizations` compiler option is specified when compiling or building an OpenCL program. For derived implementations, the operations used in the derivation may themselves be relaxed according to the following table. -The minimum accuracy of math functions not defined in the following table -when the `-cl-unsafe-math-optimizations` compiler option is specified is as defined -in <> when operating in the full profile, and as -defined in <> when operating in the embedded profile. +The minimum accuracy of math functions not defined in the following table when +the `-cl-unsafe-math-optimizations` compiler option is specified is as defined +in <> when operating in the full profile, and as defined in +<> when operating in the +embedded profile. The reference value used to compute the ULP value of an arithmetic operation is the infinitely precise result. 0 ulp is used for math functions that do not require rounding. diff --git a/api/embedded_profile.asciidoc b/api/embedded_profile.asciidoc index 2830fcecc..46dc54515 100644 --- a/api/embedded_profile.asciidoc +++ b/api/embedded_profile.asciidoc @@ -150,7 +150,7 @@ profile are listed in the <> table. [[embedded-device-queries-table]] -.List of supported param_names by <> for embedded profile +.List of supported param_names by {clGetDeviceInfo} for embedded profile [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Device Info | Return Type | Description diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index abac6b88a..125f21e73 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -75,7 +75,7 @@ The information that can be queried using {clGetPlatformInfo} is specified in the <> table. [[platform-queries-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetPlatformInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Platform Info | Return Type | Description @@ -210,7 +210,7 @@ include::{generated}/api/version-notes/clGetDeviceIDs.asciidoc[] If _num_devices_ is `NULL`, this argument is ignored. [[device-types-table]] -.List of supported device_types by <> +.List of supported device_types by {clGetDeviceIDs} [width="100%",cols="<50%,<50%",options="header"] |==== | Device Type | Description @@ -331,7 +331,7 @@ device except for the following queries: * {CL_DEVICE_REFERENCE_COUNT} [[device-queries-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetDeviceInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Device Info | Return Type | Description @@ -1618,7 +1618,7 @@ When a command-queue is created against a sub-device, the commands enqueued on the queue are executed only on the sub-device. [[sub-device-partition-table]] -.List of supported partition schemes by <> +.List of supported partition schemes by {clCreateSubDevices} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Partition Property | Partition Value | Description @@ -1859,7 +1859,7 @@ command-queues, memory, program and kernel objects and for executing kernels on one or more devices specified in the context. [[context-properties-table]] -.List of supported context creation properties by <> +.List of supported context creation properties by {clCreateContext} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Context Property | Property Value | Description @@ -2070,7 +2070,7 @@ _param_value_ by {clGetContextInfo} is described in the <> table. [[context-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetContextInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Context Info | Return Type | Description diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 04140e567..d7a4de7a3 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -55,7 +55,7 @@ Also see extension *cl_khr_create_command_queue*. If _errcode_ret_ is `NULL`, no error code is returned. [[queue-properties-table]] -.List of supported queue creation properties by <> +.List of supported queue creation properties by {clCreateCommandQueueWithProperties} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Queue Property | Property Value | Description @@ -154,7 +154,7 @@ include::{generated}/api/version-notes/clCreateCommandQueue.asciidoc[] command-queue properties will be used. [[legacy-queue-properties-table]] -.List of supported `cl_command_queue_property` values by <> +.List of supported `cl_command_queue_property` values by {clCreateCommandQueue} [width="100%",cols="<50%,<50%",options="header"] |==== | Command-Queue Properties | Description @@ -321,7 +321,7 @@ _param_value_ by {clGetCommandQueueInfo} is described in the <> table. [[command-queue-param-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetCommandQueueInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Queue Info | Return Type | Description @@ -491,7 +491,7 @@ include::{generated}/api/version-notes/clCreateBufferWithProperties.asciidoc[] If _errcode_ret_ is `NULL`, no error code is returned. The alignment requirements for data stored in buffer objects are described -in <>. +in <>. If {clCreateBuffer} or {clCreateBufferWithProperties} is called with {CL_MEM_USE_HOST_PTR} set in its _flags_ argument, the contents of the @@ -682,7 +682,7 @@ include::{generated}/api/version-notes/clCreateSubBuffer.asciidoc[] <> table. [[subbuffer-create-info-table]] -.List of supported buffer creation types by <> +.List of supported buffer creation types by {clCreateSubBuffer} [width="100%",cols="<50%,<50%",options="header"] |==== | Buffer Creation Type | Description @@ -1634,7 +1634,7 @@ include::{generated}/api/version-notes/clCreateImageWithProperties.asciidoc[] If _errcode_ret_ is `NULL`, no error code is returned. The alignment requirements for data stored in image objects are described -in <>. +in <>. For all image types except {CL_MEM_OBJECT_IMAGE1D_BUFFER}, if the value specified for _flags_ is 0, the default is used which is {CL_MEM_READ_WRITE}. @@ -2541,7 +2541,7 @@ image and a `write_only` image parameter, or to a `read_write` image parameter and any other image parameter. [[image-format-mapping-table]] -.Mapping from format flags passed to <> to OpenCL kernel language image access qualifiers +.Mapping from format flags passed to {clGetSupportedImageFormats} to OpenCL kernel language image access qualifiers [width="100%",cols="<50%,<50%",options="header"] |==== | Access Qualifier | Memory Flags @@ -2617,7 +2617,7 @@ include::{generated}/api/version-notes/clEnqueueWriteImage.asciidoc[] * _ptr_ is the pointer to a buffer in host memory where image data is to be read from or to be written to. The alignment requirements for ptr are specified in - <>. + <>. * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to complete before this particular command can be executed. If _event_wait_list_ is `NULL`, then this particular command does not wait @@ -3418,7 +3418,7 @@ include::{generated}/api/version-notes/clGetImageInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[image-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetImageInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Image Info | Return type | Description @@ -3642,7 +3642,7 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. [[pipe-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetPipeInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Pipe Info | Return type | Description @@ -4019,7 +4019,7 @@ include::{generated}/api/version-notes/clEnqueueMigrateMemObjects.asciidoc[] to an element of the _event_wait_list_ array. [[migration-flags-table]] -.List of supported migration flags by <> +.List of supported migration flags by {clEnqueueMigrateMemObjects} [width="100%",cols="<50%,<50%",options="header"] |==== | Memory Migration Flags | Description @@ -4123,7 +4123,7 @@ include::{generated}/api/version-notes/clGetMemObjectInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[mem-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetMemObjectInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Memory Object Info | Return type | Description @@ -5075,7 +5075,7 @@ include::{generated}/api/version-notes/clCreateSamplerWithProperties.asciidoc[] supported sampler properties will be used. [[sampler-properties-table]] -.List of supported sampler creation properties by <> +.List of supported sampler creation properties by {clCreateSamplerWithProperties} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Sampler Property | Property Value | Description @@ -5277,7 +5277,7 @@ include::{generated}/api/version-notes/clGetSamplerInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[sampler-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetSamplerInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Sampler Info | Return Type | Description @@ -6603,7 +6603,7 @@ include::{generated}/api/version-notes/clGetProgramInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[program-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetProgramInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Program Info | Return Type | Description @@ -6812,7 +6812,7 @@ include::{generated}/api/version-notes/clGetProgramBuildInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[program-build-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetProgramBuildInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Program Build Info | Return Type | Description @@ -7321,7 +7321,7 @@ include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] by _param_name_ are specified. [[kernel-exec-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clSetKernelExecInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Kernel Exec Info | Type | Description @@ -7515,7 +7515,7 @@ include::{generated}/api/version-notes/clGetKernelInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[kernel-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetKernelInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Kernel Info | Return Type | Description @@ -7614,7 +7614,7 @@ include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[kernel-workgroup-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetKernelWorkGroupInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Kernel Work-group Info | Return Type | Description @@ -7751,7 +7751,7 @@ Also see extension *cl_khr_subgroups*. If _param_value_size_ret_ is `NULL`, it is ignored. [[kernel-sub-group-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetKernelSubGroupInfo} [width="100%",cols="<25%,<25%,<25%,<25%",options="header"] |==== | Kernel Sub-group Info | Input Type | Return Type | Description @@ -7892,7 +7892,7 @@ program executable was built with the `-cl-kernel-arg-info option` specified in options argument to {clBuildProgram} or {clCompileProgram}. [[kernel-argument-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetKernelArgInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Kernel Arg Info | Return Type | Description @@ -8605,7 +8605,7 @@ include::{generated}/api/version-notes/clGetEventInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[event-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetEventInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Event Info | Return Type | Description @@ -9328,7 +9328,7 @@ include::{generated}/api/version-notes/clGetEventProfilingInfo.asciidoc[] If _param_value_size_ret_ is `NULL`, it is ignored. [[event-profiling-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetEventProfilingInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== | Event Profiling Info | Return Type | Description diff --git a/ext/cl_khr_create_command_queue.asciidoc b/ext/cl_khr_create_command_queue.asciidoc index 84b7ca311..54fa4eb75 100644 --- a/ext/cl_khr_create_command_queue.asciidoc +++ b/ext/cl_khr_create_command_queue.asciidoc @@ -105,7 +105,7 @@ default value will be used. _properties_ can be NULL in which case the default values for supported command-queue properties will be used. [caption="Table X.Y "] -.List of supported param_names by <> +.List of supported param_names by {clCreateCommandQueueWithPropertiesKHR} |======================================================================= |*Queue Properties* |*Property Value* |*Description* diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index 158a24d58..b323a031b 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -382,11 +382,11 @@ include::{generated}/api/protos/clGetSemaphoreInfoKHR.txt[] _sema_object_ specifies the semaphore object being queried. -_param_name_ is a constant that specifies the semaphore information to query, and must be one of the values shown in <>. +_param_name_ is a constant that specifies the semaphore information to query, and must be one of the values shown in the <> table. -_param_value_ is a pointer to memory where the result of the query is returned as described in <>. If _param_value_ is `NULL`, it is ignored. +_param_value_ is a pointer to memory where the result of the query is returned as described in the <> table. If _param_value_ is `NULL`, it is ignored. -_param_value_size_ specifies the size in bytes of memory pointed to _param_value_. This size must be greater than or equal to the size of the return type described in table <>. +_param_value_size_ specifies the size in bytes of memory pointed to _param_value_. This size must be greater than or equal to the size of the return type described in the <> table. _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -434,8 +434,8 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_SEMAPHORE_KHR} ** if _sema_object_ is not a valid semaphore * {CL_INVALID_VALUE} -** if _param_name_ is not one of the attribute defined in table <> or -** if _param_value_size_ is less than the size of Return Type of the corresponding _param_name_ attribute as defined in table <>. +** if _param_name_ is not one of the attribute defined in the <> table or +** if _param_value_size_ is less than the size of Return Type of the corresponding _param_name_ attribute as defined in the <> table. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. diff --git a/ext/cl_khr_subgroups.asciidoc b/ext/cl_khr_subgroups.asciidoc index 8ea3cdd4a..8dcd49429 100644 --- a/ext/cl_khr_subgroups.asciidoc +++ b/ext/cl_khr_subgroups.asciidoc @@ -91,7 +91,7 @@ queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. [[cl_khr_subgroups-kernel-sub-group-info-table]] -.List of supported param_names by <> +.List of supported param_names by {clGetKernelSubGroupInfoKHR} [width="100%",cols="<25%,<25%,<25%,<25%",options="header"] |==== | Kernel Sub-group Info | Input Type | Return Type | Description From bee46e8234f602f8e82a0524ec92746cf9c14045 Mon Sep 17 00:00:00 2001 From: jebasamuelimg <152629973+jebasamuelimg@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:01:37 +0000 Subject: [PATCH 045/190] cl_img_mem_properties: Memory hint flags update (#1025) * Update cl.xml cl_img_mem_properties: Added hint mem alloc flags for CPU and GPU. * Update cl.xml * Update cl.xml * Update cl.xml Removing unused bitfield definition. * Renaming enum CL_DEVICE_MEMORY_CAPABILITIES_IMG * Update cl_img_mem_properties.asciidoc. Memory flag extensions and deviceInfo query extensions Added extension to memory allocation flags and deviceInfo property to query device memory capabilities. --- extensions/cl_img_mem_properties.asciidoc | 51 ++++++++++++++++++++--- xml/cl.xml | 23 +++++++--- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/extensions/cl_img_mem_properties.asciidoc b/extensions/cl_img_mem_properties.asciidoc index fcf401040..235695fc3 100644 --- a/extensions/cl_img_mem_properties.asciidoc +++ b/extensions/cl_img_mem_properties.asciidoc @@ -14,12 +14,13 @@ include::../config/attribs.txt[] Imagination Technologies Developer Forum: + https://forums.imgtec.com/ -Jeremy Kemp, Imagination Technologies (Jeremy.Kemp 'at' imgtec.com) +Jeba Samuel, Imagination Technologies (Jeba.Samuels 'at' imgtec.com) == Contributors Jeremy Kemp, Imagination Technologies. + -Anitha Raj, Imagination Technologies. +Anitha Raj, Imagination Technologies. + +Jeba Samuel, Imagination Technologies. == Notice @@ -32,7 +33,7 @@ Shipping == Version Built On: {docdate} + -Version: 1.0.0 +Version: 1.1.1 == Dependencies @@ -40,7 +41,8 @@ This extension requires OpenCL 3.0 or later. == Overview -This extension provides additional _properties_ that can be passed to *clCreateBufferWithProperties*. +This extension provides additional _properties_ that can be passed to *clCreateBufferWithProperties* and *clGetDeviceInfo* +This extension can be used to query additional information about Imagination OpenCL device memory. The additional information may be useful to allocate memory objects in different types of memory regions supported by the device. == New API Types @@ -62,6 +64,12 @@ Accepted values for `cl_mem_alloc_flags_img`: [source,c] ---- #define CL_MEM_ALLOC_RELAX_REQUIREMENTS_IMG (1 << 0) +#define CL_MEM_ALLOC_GPU_WRITE_COMBINE_IMG (1 << 1) +#define CL_MEM_ALLOC_GPU_CACHED_IMG (1 << 2) +#define CL_MEM_ALLOC_CPU_LOCAL_IMG (1 << 3) +#define CL_MEM_ALLOC_GPU_LOCAL_IMG (1 << 4) +#define CL_MEM_ALLOC_GPU_PRIVATE_IMG (1 << 5) + ---- == Modifications to the OpenCL API Specification @@ -81,6 +89,14 @@ Add Table: List of supported _properties_ when passed to *clCreateBufferWithProp | `cl_mem_alloc_flags_img` | `CL_MEM_ALLOC_RELAX_REQUIREMENTS_IMG` - On platforms with limited amounts of global memory available it may be desirable to request an allocation larger than the maximum amount reported by the implementation via `CL_DEVICE_MAX_MEM_ALLOC_SIZE` if there is sufficient knowledge about the entire system. This property allows an application to request buffer objects that are larger than `CL_DEVICE_MAX_MEM_ALLOC_SIZE`. +`CL_MEM_ALLOC_GPU_WRITE_COMBINE_IMG` - The GPU device memory backing this allocation will be allocated with the GPU Write Combine flag. +`CL_MEM_ALLOC_GPU_CACHED_IMG` - The GPU device memory backing this allocation will be allocated with the GPU Cached flag. +`CL_MEM_ALLOC_GPU_WRITE_COMBINE_IMG` and `CL_MEM_ALLOC_GPU_CACHED_IMG` are mutually exclusive. +`CL_MEM_ALLOC_CPU_LOCAL_IMG` - The GPU device memory backing this allocation will come from physical memory accessible to both the CPU and GPU. +`CL_MEM_ALLOC_GPU_LOCAL_IMG` - The GPU device memory backing this allocation will come from physical memory accessible to both the CPU and GPU. +`CL_MEM_ALLOC_GPU_PRIVATE_IMG` - The GPU device memory backing this allocation will come from physical memory accessible to the GPU only. +`CL_MEM_ALLOC_CPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_PRIVATE_IMG` are mutually exclusive. + |==== (Replace the following error value) :: + @@ -89,6 +105,29 @@ This property allows an application to request buffer objects that are larger th with + * `CL_INVALID_BUFFER_SIZE` if _size_ is 0. +(Add the following error value) :: +* `CL_INVALID_VALUE` if both `CL_MEM_ALLOC_GPU_WRITE_COMBINE_IMG` and `CL_MEM_ALLOC_GPU_CACHED_IMG` are set. +* `CL_INVALID_VALUE` if both `CL_MEM_ALLOC_CPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_PRIVATE_IMG` are set. +* `CL_INVALID_VALUE` if both `CL_MEM_ALLOC_CPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_LOCAL_IMG` are set. +* `CL_INVALID_VALUE` if both `CL_MEM_ALLOC_GPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_PRIVATE_IMG` are set. +* `CL_INVALID_VALUE` if `CL_MEM_ALLOC_CPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_LOCAL_IMG` and `CL_MEM_ALLOC_GPU_PRIVATE_IMG` are set. + +-- + +(Modify Section 4.2, *Querying Devices*) :: ++ +-- +Add Table: List of supported param name by *clGetDeviceInfo* :: ++ +[cols="1,1,4",options="header",width = "90%"] +|==== +| clDeviceInfo +| Return Type +| Description + +| `CL_DEVICE_MEMORY_CAPABILITIES_IMG` +| `cl_mem_alloc_flags_img` +| Allocation flags describing the memory region capabilities by the device. -- == Revision History @@ -99,4 +138,6 @@ with |==== | Version | Date | Author | Changes | 1.0.0 | 2020-08-18 | Jeremy Kemp | Initial revision. -|==== \ No newline at end of file +| 1.1.0 | 2022-01-04 | Jeremy Kemp | Added internal values for cl_mem_alloc_flags_img (1 << 1 and 1 << 2). Corrected a typo. +| 1.1.1 | 2023-11-16 | Jeba Samuel | Added internal values for cl_mem_alloc_flags_img (1 << 3 and 1 << 4). The flag provides heap hint for local memory regions of the CPU, GPU and scratchpad memory region of the GPU. The extension allows to query the supported memory regions. +|==== diff --git a/xml/cl.xml b/xml/cl.xml index 6f7df66ef..f40253c8d 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1239,8 +1239,12 @@ server's OpenCL/api-docs repository. + + + + + - @@ -2093,10 +2097,11 @@ server's OpenCL/api-docs repository. - - - - + + + + + @@ -6806,6 +6811,14 @@ server's OpenCL/api-docs repository. + + + + + + + + From c5ca092f3b7f4e2e75ed648fea946f94f5e6568b Mon Sep 17 00:00:00 2001 From: Sun Serega Date: Tue, 12 Dec 2023 19:08:43 +0200 Subject: [PATCH 046/190] Remove `cl_semaphore_import_properties_khr` (#1034) --- xml/cl.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index f40253c8d..61f6c4bb4 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7011,8 +7011,6 @@ server's OpenCL/api-docs repository. - - From 6c54e1141c949378422ff5ed4e5ccfd300752424 Mon Sep 17 00:00:00 2001 From: Sun Serega Date: Tue, 12 Dec 2023 19:09:08 +0200 Subject: [PATCH 047/190] fix ptr (#1033) --- xml/cl.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 61f6c4bb4..2a478c364 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -2705,7 +2705,7 @@ server's OpenCL/api-docs repository. cl_int clReImportSemaphoreSyncFdKHR cl_semaphore_khr sema_object - cl_semaphore_reimport_properties_khr* reimport_props + cl_semaphore_reimport_properties_khr* reimport_props int fd From 4b156fc8d9d90eb392333e8a532abcf8f8dc2dae Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 12 Dec 2023 09:11:45 -0800 Subject: [PATCH 048/190] add the unbreakable attribute to generated source blocks (#1032) --- scripts/docgenerator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index 073552534..ea4339f10 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -218,7 +218,7 @@ def writeInclude(self, directory, basename, contents): index_terms.append(basename) write('indexterm:[{}]'.format(','.join(index_terms)), file=fp) - write('[source,opencl]', file=fp) + write('[source%unbreakable,opencl]', file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) @@ -233,7 +233,7 @@ def writeInclude(self, directory, basename, contents): # Asciidoc anchor write(self.genOpts.conventions.warning_comment, file=fp) write('// Include this no-xref version without cross reference id for multiple includes of same file', file=fp) - write('[source,opencl]', file=fp) + write('[source,%unbreakable,opencl]', file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) From b00c47f95aa788ff58c869925c3019386ddf8c6a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 12 Dec 2023 09:25:14 -0800 Subject: [PATCH 049/190] update the spec change log with changes from 3.0.14 (#1031) * initial change log for changes to 3.0.14 * minor updates and wordsmithing --- api/appendix_e.asciidoc | 30 ++++++++++++++++++++++++++++++ c/appendix_a.asciidoc | 7 +++++++ env/appendix_a.asciidoc | 4 ++++ 3 files changed, 41 insertions(+) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 4d5f6731c..3704f052f 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -570,3 +570,33 @@ Changes from *v3.0.13*: * Removed redundant error conditions from `cl_khr_external_semaphore` and `cl_khr_external_memory`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. * Added new extension: ** `cl_khr_command_buffer_multi_device` (provisional) + +Changes from *v3.0.14*: + + * Clarified which error code should be returned when calling {clCreateBuffer} with a pointer to an SVM allocation that is too small, see {khronos-opencl-pr}/879[#879]. + * Improved capitalization and hyphenation consistency throughout the specs, see {khronos-opencl-pr}/902[#902]. + * Clarified that SVM is optional for all OpenCL 3.0 devices, see {khronos-opencl-pr}/913[#913]. + * Clarified that {clSetCommandQueueProperty} is only required for OpenCL 1.0 devices and may return an error otherwise, see {khronos-opencl-pr}/980[#980]. + * Clarified that the application must ensure the free function passed to {clEnqueueSVMFree} is thread safe, see {khronos-opencl-pr}/1016[#1016]. + * Clarified that the application must ensure the user function passed to {clEnqueueNativeKernel} is thread safe, see {khronos-opencl-pr}/1026[#1026]. + * `cl_khr_command_buffers` (provisional): + ** Removed the "invalid" command buffer state, see {khronos-opencl-pr}/885[#885]. + ** Added support for recording SVM memory copies and memory fills in a command buffer, see {khronos-opencl-pr}/915[#915]. + * `cl_khr_command_buffer_multi_device` (provisional): + ** Clarified that the sync devices query should only return root devices, see {khronos-opencl-pr}/925[#925]. + * `cl_khr_external_memory` (provisional): + ** Disallowed specifying a device handle list without also specifying an external memory handle, see {khronos-opencl-pr}/922[#922]. + ** Added a query to determine the handle types an implementation will assume have a linear memory layout, see {khronos-opencl-pr}/940[#940]. + ** Added an external memory-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. + ** Clarified that implementations may acquire information about an image from an external memory handle when the image is created, see {khronos-opencl-pr}/970[#970]. + * `cl_khr_external_semaphore` (provisional): + ** Added the ability to re-import "sync fd" handles into an existing semaphore, see {khronos-opencl-pr}/939[#939]. + ** Clarified that a semaphore may only export one handle type, and that a semaphore created from an external handle cannot also export a handle, see {khronos-opencl-pr}/975[#975]. + ** Clarified that `cl_khr_external_semaphore` requires support for `cl_khr_semaphore`, see {khronos-opencl-pr}/976[#976]. + ** Added a query to determine if a semaphore may export an external handle, see {khronos-opencl-pr}/997[#997]. + * `cl_khr_semaphore` (provisional): + ** Added an semaphore-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. + ** Restricted semaphores to a single associated device, see {khronos-opencl-pr}/996[#996]. + * `cl_khr_subgroup_rotate`: + ** Clarified that only rotating within a subgroup is supported, see {khronos-opencl-pr}/967[#967]. + diff --git a/c/appendix_a.asciidoc b/c/appendix_a.asciidoc index 4b57a1578..fb691fb77 100644 --- a/c/appendix_a.asciidoc +++ b/c/appendix_a.asciidoc @@ -36,3 +36,10 @@ Changes from *v3.0.6*: Changes from *v3.0.7*: * Clarified optionality support for double-precision literals. + +Changes from *v3.0.14*: + + * Improved capitalization and hyphenation consistency throughout the specs, see {khronos-opencl-pr}/902[#902]. + * Clarified that the *nextafter* built-in function works with all floating-point types, see {khronos-opencl-pr}/953[#953]. + * Clarified that the async copy and wait group events built-in functions must be called within converged control flow, see {khronos-opencl-pr}/1015[#1015]. + * Removed unnecessary rounding mode text from the descriptions of the geometric and common functions, see {khronos-opencl-pr}/1027[#1027]. diff --git a/env/appendix_a.asciidoc b/env/appendix_a.asciidoc index 3273e8b5a..e216b72f1 100644 --- a/env/appendix_a.asciidoc +++ b/env/appendix_a.asciidoc @@ -39,3 +39,7 @@ Changes from *v3.0.6*: Changes from *v3.0.8*: * Clarified that some OpenCL `khr` extensions also require SPIR-V extensions. + +Changes from *v3.0.14*: + + * Fixed several numerical compliance bugs, see {khronos-opencl-pr}/937[#937]. From b5ea70b797c0f8871111dee15118a926adbb3211 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 3 Jan 2024 06:39:44 -0800 Subject: [PATCH 050/190] update the spec source copyright dates to 2024 (#1035) --- CXX_for_OpenCL.txt | 2 +- Makefile | 2 +- OpenCL_API.txt | 2 +- OpenCL_C.txt | 2 +- OpenCL_Cxx.txt | 2 +- OpenCL_Env.txt | 2 +- OpenCL_Ext.txt | 2 +- OpenCL_ICD_Installation.txt | 2 +- OpenCL_LangExt.txt | 2 +- api/acknowledgements.asciidoc | 2 +- api/appendix_a.asciidoc | 2 +- api/appendix_b.asciidoc | 2 +- api/appendix_c.asciidoc | 2 +- api/appendix_d.asciidoc | 2 +- api/appendix_e.asciidoc | 2 +- api/appendix_f.asciidoc | 2 +- api/appendix_g.asciidoc | 2 +- api/appendix_h.asciidoc | 2 +- api/dictionary.asciidoc | 2 +- api/embedded_profile.asciidoc | 2 +- api/footnotes.asciidoc | 2 +- api/glossary.asciidoc | 2 +- api/introduction.asciidoc | 2 +- api/opencl_architecture.asciidoc | 2 +- api/opencl_assoc_spec.asciidoc | 2 +- api/opencl_platform_layer.asciidoc | 2 +- api/opencl_runtime_layer.asciidoc | 2 +- c/appendix_a.asciidoc | 2 +- c/feature-dictionary.asciidoc | 2 +- c/footnotes.asciidoc | 2 +- config/copyright-ccby.txt | 2 +- config/katex_replace.rb | 2 +- config/katex_replace/extension.rb | 2 +- config/opencl.asciidoc | 2 +- config/rouge_opencl.rb | 2 +- config/spec-macros.rb | 2 +- config/spec-macros/extension.rb | 2 +- config/version-full-links.asciidoc | 2 +- config/version-local-links.asciidoc | 2 +- copyrights-ccby.txt | 2 +- copyrights.txt | 2 +- cxx/acknowledgements.txt | 2 +- cxx/annotation.txt | 2 +- cxx/compiler_options.txt | 2 +- cxx/generic_type_name_notation.txt | 2 +- cxx/image_addressing_and_filtering.txt | 2 +- cxx/lang/address_spaces.txt | 2 +- cxx/lang/attribute_qualifiers.txt | 2 +- cxx/lang/builtin_data_types.txt | 2 +- cxx/lang/expressions.txt | 2 +- cxx/lang/implicit_type_conversions.txt | 2 +- cxx/lang/kernel_functions.txt | 2 +- cxx/lang/keywords.txt | 2 +- cxx/lang/lang.txt | 2 +- cxx/lang/preprocessor.txt | 2 +- cxx/lang/restrictions.txt | 2 +- cxx/numerical_compliance/edge_case_behavior.txt | 2 +- cxx/numerical_compliance/floating_point_exceptions.txt | 2 +- cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt | 2 +- cxx/numerical_compliance/numerical_compliance.txt | 2 +- cxx/numerical_compliance/relative_error_as_ulps.txt | 2 +- cxx/numerical_compliance/rounding_modes.txt | 2 +- cxx/stdlib/address_spaces.txt | 2 +- cxx/stdlib/array.txt | 2 +- cxx/stdlib/atomic_operations.txt | 2 +- cxx/stdlib/common.txt | 2 +- cxx/stdlib/conversions.txt | 2 +- cxx/stdlib/definitions.txt | 2 +- cxx/stdlib/device_enqueue.txt | 2 +- cxx/stdlib/general_utilities.txt | 2 +- cxx/stdlib/geometric.txt | 2 +- cxx/stdlib/half_wrapper.txt | 2 +- cxx/stdlib/images_and_samplers.txt | 2 +- cxx/stdlib/integer.txt | 2 +- cxx/stdlib/iterator.txt | 2 +- cxx/stdlib/limits.txt | 2 +- cxx/stdlib/marker_types.txt | 2 +- cxx/stdlib/math.txt | 2 +- cxx/stdlib/math_constants.txt | 2 +- cxx/stdlib/pipes.txt | 2 +- cxx/stdlib/printf.txt | 2 +- cxx/stdlib/range.txt | 2 +- cxx/stdlib/reinterpreting_data.txt | 2 +- cxx/stdlib/relational.txt | 2 +- cxx/stdlib/specialization_constants.txt | 2 +- cxx/stdlib/stdlib.txt | 2 +- cxx/stdlib/synchronization.txt | 2 +- cxx/stdlib/tuple.txt | 2 +- cxx/stdlib/type_traits.txt | 2 +- cxx/stdlib/vector_data_load_and_store.txt | 2 +- cxx/stdlib/vector_iterator.txt | 2 +- cxx/stdlib/vector_utilities.txt | 2 +- cxx/stdlib/vector_wrapper.txt | 2 +- cxx/stdlib/work_group.txt | 2 +- cxx/stdlib/work_item.txt | 2 +- cxx4opencl/acknowledgements.txt | 2 +- cxx4opencl/address_spaces.txt | 2 +- cxx4opencl/cxxcasts.txt | 2 +- cxx4opencl/diff2cxx.txt | 2 +- cxx4opencl/diff2openclc.txt | 2 +- cxx4opencl/intro.txt | 2 +- cxx4opencl/kernel.txt | 2 +- cxx4opencl/references.txt | 2 +- env/appendix_a.asciidoc | 2 +- env/common_properties.asciidoc | 2 +- env/dictionary.asciidoc | 2 +- env/extensions.asciidoc | 2 +- env/image_addressing_and_filtering.asciidoc | 2 +- env/introduction.asciidoc | 2 +- env/numerical_compliance.asciidoc | 2 +- env/references.asciidoc | 2 +- env/required_capabilities.asciidoc | 2 +- env/validation_rules.asciidoc | 2 +- ext/cl_khr_3d_image_writes.asciidoc | 2 +- ext/cl_khr_async_work_group_copy_fence.asciidoc | 2 +- ext/cl_khr_byte_addressable_store.asciidoc | 2 +- ext/cl_khr_command_buffer.asciidoc | 2 +- ext/cl_khr_command_buffer_multi_device.asciidoc | 2 +- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 2 +- ext/cl_khr_create_command_queue.asciidoc | 2 +- ext/cl_khr_d3d10_sharing.asciidoc | 2 +- ext/cl_khr_d3d11_sharing.asciidoc | 2 +- ext/cl_khr_depth_images.asciidoc | 2 +- ext/cl_khr_device_enqueue_local_arg_types.asciidoc | 2 +- ext/cl_khr_device_uuid.asciidoc | 2 +- ext/cl_khr_dx9_media_sharing.asciidoc | 2 +- ext/cl_khr_egl_event.asciidoc | 2 +- ext/cl_khr_egl_image.asciidoc | 2 +- ext/cl_khr_expect_assume.asciidoc | 2 +- ext/cl_khr_extended_async_copies.asciidoc | 2 +- ext/cl_khr_extended_bit_ops.asciidoc | 2 +- ext/cl_khr_extended_versioning.asciidoc | 2 +- ext/cl_khr_external_memory.asciidoc | 2 +- ext/cl_khr_external_semaphore.asciidoc | 2 +- ext/cl_khr_fp16.asciidoc | 2 +- ext/cl_khr_fp64.asciidoc | 2 +- ext/cl_khr_gl_depth_images.asciidoc | 2 +- ext/cl_khr_gl_event.asciidoc | 2 +- ext/cl_khr_gl_msaa_sharing.asciidoc | 2 +- ext/cl_khr_gl_sharing__context.asciidoc | 2 +- ext/cl_khr_gl_sharing__memobjs.asciidoc | 2 +- ext/cl_khr_icd.asciidoc | 2 +- ext/cl_khr_il_program.asciidoc | 2 +- ext/cl_khr_image2d_from_buffer.asciidoc | 2 +- ext/cl_khr_initialize_memory.asciidoc | 2 +- ext/cl_khr_int32_atomics.asciidoc | 2 +- ext/cl_khr_int64_atomics.asciidoc | 2 +- ext/cl_khr_integer_dot_product.asciidoc | 2 +- ext/cl_khr_mipmap_image.asciidoc | 2 +- ext/cl_khr_pci_bus_info.asciidoc | 2 +- ext/cl_khr_priority_hints.asciidoc | 2 +- ext/cl_khr_select_fprounding_mode.asciidoc | 2 +- ext/cl_khr_semaphore.asciidoc | 2 +- ext/cl_khr_spir.asciidoc | 2 +- ext/cl_khr_srgb_image_writes.asciidoc | 2 +- ext/cl_khr_subgroup_named_barrier.asciidoc | 2 +- ext/cl_khr_subgroup_rotate.asciidoc | 2 +- ext/cl_khr_subgroups.asciidoc | 2 +- ext/cl_khr_suggested_local_work_size.asciidoc | 2 +- ext/cl_khr_terminate_context.asciidoc | 2 +- ext/cl_khr_throttle_hints.asciidoc | 2 +- ext/cl_khr_work_group_uniform_arithmetic.asciidoc | 2 +- ext/deprecated_extensions.asciidoc | 2 +- ext/dictionary.asciidoc | 2 +- ext/index.asciidoc | 2 +- ext/introduction.asciidoc | 2 +- ext/provisional_notice.asciidoc | 2 +- ext/quick_reference.asciidoc | 2 +- ext/spirv_extensions.asciidoc | 2 +- ext/to_core_features.asciidoc | 2 +- extensions/cl_arm_controlled_kernel_termination.asciidoc | 2 +- extensions/cl_arm_printf.asciidoc | 2 +- extensions/cl_arm_protected_memory_allocation.asciidoc | 2 +- extensions/cl_arm_scheduling_controls.asciidoc | 2 +- extensions/cl_ext_cxx_for_opencl.asciidoc | 2 +- extensions/cl_ext_float_atomics.asciidoc | 4 ++-- extensions/cl_ext_image_from_buffer.asciidoc | 2 +- extensions/cl_ext_image_raw10_raw12.asciidoc | 2 +- extensions/cl_extension_template.asciidoc | 4 ++-- extensions/cl_img_cached_allocations.asciidoc | 2 +- extensions/cl_img_generate_mipmap.asciidoc | 2 +- extensions/cl_img_mem_properties.asciidoc | 2 +- extensions/cl_img_use_gralloc_ptr.asciidoc | 2 +- extensions/cl_img_yuv_image.asciidoc | 2 +- extensions/cl_intel_bfloat16_conversions.asciidoc | 2 +- extensions/cl_intel_command_queue_families.asciidoc | 2 +- extensions/cl_intel_create_buffer_with_properties.asciidoc | 2 +- extensions/cl_intel_device_attribute_query.asciidoc | 2 +- extensions/cl_intel_mem_alloc_buffer_location.asciidoc | 2 +- extensions/cl_intel_mem_channel_property.asciidoc | 2 +- extensions/cl_intel_mem_force_host_memory.asciidoc | 2 +- extensions/cl_intel_packed_yuv.asciidoc | 2 +- extensions/cl_intel_planar_yuv.asciidoc | 2 +- extensions/cl_intel_program_scope_host_pipe.asciidoc | 2 +- extensions/cl_intel_required_subgroup_size.asciidoc | 2 +- extensions/cl_intel_sharing_format_query.asciidoc | 2 +- .../cl_intel_spirv_device_side_avc_motion_estimation.asciidoc | 2 +- extensions/cl_intel_spirv_media_block_io.asciidoc | 2 +- extensions/cl_intel_spirv_subgroups.asciidoc | 2 +- extensions/cl_intel_split_work_group_barrier.asciidoc | 2 +- .../cl_intel_subgroup_matrix_multiply_accumulate.asciidoc | 2 +- ...l_intel_subgroup_split_matrix_multiply_accumulate.asciidoc | 2 +- extensions/cl_intel_subgroups.asciidoc | 2 +- extensions/cl_intel_subgroups_char.asciidoc | 2 +- extensions/cl_intel_subgroups_long.asciidoc | 2 +- extensions/cl_intel_subgroups_short.asciidoc | 2 +- extensions/cl_intel_unified_shared_memory.asciidoc | 2 +- extensions/cl_loader_info.asciidoc | 4 ++-- extensions/cl_loader_layers.asciidoc | 2 +- extensions/cl_pocl_content_size.asciidoc | 2 +- extensions/extensions.txt | 2 +- langext/acknowledgements.txt | 2 +- langext/intro.txt | 2 +- langext/variadic_macro.txt | 2 +- man/static/EXTENSION.txt | 2 +- man/static/abstractDataTypes.txt | 2 +- man/static/clCreateEventFromEGLSyncKHR.txt | 2 +- man/static/clCreateEventFromGLsyncKHR.txt | 2 +- man/static/clCreateFromD3D10BufferKHR.txt | 2 +- man/static/clCreateFromD3D10Texture2DKHR.txt | 2 +- man/static/clCreateFromD3D10Texture3DKHR.txt | 2 +- man/static/clCreateFromD3D11BufferKHR.txt | 2 +- man/static/clCreateFromD3D11Texture2DKHR.txt | 2 +- man/static/clCreateFromD3D11Texture3DKHR.txt | 2 +- man/static/clCreateFromDX9MediaSurfaceKHR.txt | 2 +- man/static/clCreateFromEGLImageKHR.txt | 2 +- man/static/clCreateFromGLBuffer.txt | 2 +- man/static/clCreateFromGLRenderbuffer.txt | 2 +- man/static/clCreateFromGLTexture.txt | 2 +- man/static/clEnqueueAcquireD3D10ObjectsKHR.txt | 2 +- man/static/clEnqueueAcquireD3D11ObjectsKHR.txt | 2 +- man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt | 2 +- man/static/clEnqueueAcquireEGLObjectsKHR.txt | 2 +- man/static/clEnqueueAcquireGLObjects.txt | 2 +- man/static/clEnqueueReleaseD3D10ObjectsKHR.txt | 2 +- man/static/clEnqueueReleaseD3D11ObjectsKHR.txt | 2 +- man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt | 2 +- man/static/clEnqueueReleaseEGLObjectsKHR.txt | 2 +- man/static/clEnqueueReleaseGLObjects.txt | 2 +- man/static/clGetDeviceIDsFromD3D10KHR.txt | 2 +- man/static/clGetDeviceIDsFromD3D11KHR.txt | 2 +- man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt | 2 +- man/static/clGetExtensionFunctionAddressForPlatform.txt | 2 +- man/static/clGetGLContextInfoKHR.txt | 2 +- man/static/clGetGLObjectInfo.txt | 2 +- man/static/clGetGLTextureInfo.txt | 2 +- man/static/clIcdGetPlatformIDsKHR.txt | 2 +- man/static/clTerminateContextKHR.txt | 2 +- man/static/cl_khr_3d_image_writes.txt | 2 +- man/static/cl_khr_byte_addressable_store.txt | 2 +- man/static/cl_khr_d3d10_sharing.txt | 2 +- man/static/cl_khr_d3d11_sharing.txt | 2 +- man/static/cl_khr_depth_images.txt | 2 +- man/static/cl_khr_device_enqueue_local_arg_types.txt | 2 +- man/static/cl_khr_dx9_media_sharing.txt | 2 +- man/static/cl_khr_egl_event.txt | 2 +- man/static/cl_khr_egl_image.txt | 2 +- man/static/cl_khr_fp16.txt | 2 +- man/static/cl_khr_fp64.txt | 2 +- man/static/cl_khr_gl_depth_images.txt | 2 +- man/static/cl_khr_gl_event.txt | 2 +- man/static/cl_khr_gl_msaa_sharing.txt | 2 +- man/static/cl_khr_gl_sharing.txt | 2 +- man/static/cl_khr_global_int32_base_atomics.txt | 2 +- man/static/cl_khr_global_int32_extended_atomics.txt | 2 +- man/static/cl_khr_icd.txt | 2 +- man/static/cl_khr_il_program.txt | 2 +- man/static/cl_khr_image2d_from_buffer.txt | 2 +- man/static/cl_khr_initialize_memory.txt | 2 +- man/static/cl_khr_int64_base_atomics.txt | 2 +- man/static/cl_khr_int64_extended_atomics.txt | 2 +- man/static/cl_khr_local_int32_base_atomics.txt | 2 +- man/static/cl_khr_local_int32_extended_atomics.txt | 2 +- man/static/cl_khr_mipmap_image.txt | 2 +- man/static/cl_khr_priority_hints.txt | 2 +- man/static/cl_khr_spir.txt | 2 +- man/static/cl_khr_srgb_image_writes.txt | 2 +- man/static/cl_khr_subgroups.txt | 2 +- man/static/cl_khr_terminate_context.txt | 2 +- man/static/cl_khr_throttle_hints.txt | 2 +- man/static/convert_T.txt | 2 +- man/static/deadLinks.txt | 2 +- man/static/enums.txt | 2 +- man/static/footer.txt | 2 +- man/static/intro.txt | 2 +- scripts/cgenerator.py | 2 +- scripts/checklinks.py | 2 +- scripts/clconventions.py | 2 +- scripts/conventions.py | 2 +- scripts/docgenerator.py | 2 +- scripts/extensionmetadocgenerator.py | 2 +- scripts/genRef.py | 4 ++-- scripts/gen_dictionaries.py | 4 ++-- scripts/gen_version_notes.py | 4 ++-- scripts/gencl.py | 4 ++-- scripts/generator.py | 2 +- scripts/pygenerator.py | 2 +- scripts/realign.py | 2 +- scripts/reflib.py | 2 +- scripts/reg.py | 2 +- scripts/spec_tools/util.py | 2 +- xml/Makefile | 2 +- xml/cl.xml | 2 +- xml/registry.rnc | 2 +- 304 files changed, 311 insertions(+), 311 deletions(-) diff --git a/CXX_for_OpenCL.txt b/CXX_for_OpenCL.txt index 3d3ea0140..e182fd9ee 100644 --- a/CXX_for_OpenCL.txt +++ b/CXX_for_OpenCL.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/Makefile b/Makefile index 85d2ca239..47848ed20 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2023 The Khronos Group Inc. +# Copyright (c) 2013-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/OpenCL_API.txt b/OpenCL_API.txt index 9cdac6798..0df01b799 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_C.txt b/OpenCL_C.txt index fccd80798..2bdf6887e 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_Cxx.txt b/OpenCL_Cxx.txt index ea50fba12..bf0d71f76 100644 --- a/OpenCL_Cxx.txt +++ b/OpenCL_Cxx.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_Env.txt b/OpenCL_Env.txt index debc6b13a..96b07f4ee 100644 --- a/OpenCL_Env.txt +++ b/OpenCL_Env.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_Ext.txt b/OpenCL_Ext.txt index cedcd485a..a2e983ea3 100644 --- a/OpenCL_Ext.txt +++ b/OpenCL_Ext.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_ICD_Installation.txt b/OpenCL_ICD_Installation.txt index d1cacaf8c..9032b77ab 100644 --- a/OpenCL_ICD_Installation.txt +++ b/OpenCL_ICD_Installation.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_LangExt.txt b/OpenCL_LangExt.txt index 4c2ed8e73..acb91342b 100644 --- a/OpenCL_LangExt.txt +++ b/OpenCL_LangExt.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/acknowledgements.asciidoc b/api/acknowledgements.asciidoc index 202e440b2..6df49ab06 100644 --- a/api/acknowledgements.asciidoc +++ b/api/acknowledgements.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2016-2023 The Khronos Group. This work is licensed under a +// Copyright 2016-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_a.asciidoc b/api/appendix_a.asciidoc index bd91f6a32..c2fc908df 100644 --- a/api/appendix_a.asciidoc +++ b/api/appendix_a.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_b.asciidoc b/api/appendix_b.asciidoc index 1c9d66eae..2759903f1 100644 --- a/api/appendix_b.asciidoc +++ b/api/appendix_b.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2016-2023 The Khronos Group. This work is licensed under a +// Copyright 2016-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_c.asciidoc b/api/appendix_c.asciidoc index 292bce396..a8b236c21 100644 --- a/api/appendix_c.asciidoc +++ b/api/appendix_c.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2016-2023 The Khronos Group. This work is licensed under a +// Copyright 2016-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_d.asciidoc b/api/appendix_d.asciidoc index 8cc9b627c..aad002ee8 100644 --- a/api/appendix_d.asciidoc +++ b/api/appendix_d.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 3704f052f..8cc593172 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_f.asciidoc b/api/appendix_f.asciidoc index 22dd6a307..57b6c2435 100644 --- a/api/appendix_f.asciidoc +++ b/api/appendix_f.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_g.asciidoc b/api/appendix_g.asciidoc index 85364782d..fe0850ce4 100644 --- a/api/appendix_g.asciidoc +++ b/api/appendix_g.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index 8123a1a8d..eda297c9a 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2023 The Khronos Group. This work is licensed under a +// Copyright 2020-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/dictionary.asciidoc b/api/dictionary.asciidoc index b1cdce171..2f85dfdd5 100644 --- a/api/dictionary.asciidoc +++ b/api/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/embedded_profile.asciidoc b/api/embedded_profile.asciidoc index 46dc54515..e80243c8e 100644 --- a/api/embedded_profile.asciidoc +++ b/api/embedded_profile.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index c1cd80dc4..4af8a24c9 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/glossary.asciidoc b/api/glossary.asciidoc index a73a2d4c9..dbe50cd01 100644 --- a/api/glossary.asciidoc +++ b/api/glossary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/introduction.asciidoc b/api/introduction.asciidoc index 9f12bf0f8..74fea0ce9 100644 --- a/api/introduction.asciidoc +++ b/api/introduction.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index ee54ac655..a3b42ee28 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/opencl_assoc_spec.asciidoc b/api/opencl_assoc_spec.asciidoc index 21b13368f..aa19b1add 100644 --- a/api/opencl_assoc_spec.asciidoc +++ b/api/opencl_assoc_spec.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 125f21e73..6ab8b4779 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index d7a4de7a3..3c7fa3123 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/c/appendix_a.asciidoc b/c/appendix_a.asciidoc index fb691fb77..711fe7c39 100644 --- a/c/appendix_a.asciidoc +++ b/c/appendix_a.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index 17c8cd792..ce7767038 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index 2a8d1bc61..6047fd0a2 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/config/copyright-ccby.txt b/config/copyright-ccby.txt index 38ddf68d6..7a63dbaa6 100644 --- a/config/copyright-ccby.txt +++ b/config/copyright-ccby.txt @@ -1,3 +1,3 @@ -Copyright 2014-2023 The Khronos Group Inc. +Copyright 2014-2024 The Khronos Group Inc. SPDX-License-Identifier: CC-BY-4.0 diff --git a/config/katex_replace.rb b/config/katex_replace.rb index 8452d705f..12465a460 100644 --- a/config/katex_replace.rb +++ b/config/katex_replace.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2023 The Khronos Group Inc. +# Copyright (c) 2016-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/katex_replace/extension.rb b/config/katex_replace/extension.rb index ce0aa20f3..24f0e95d6 100644 --- a/config/katex_replace/extension.rb +++ b/config/katex_replace/extension.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2023 The Khronos Group Inc. +# Copyright (c) 2016-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/opencl.asciidoc b/config/opencl.asciidoc index 9d233e95e..db190b495 100644 --- a/config/opencl.asciidoc +++ b/config/opencl.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/config/rouge_opencl.rb b/config/rouge_opencl.rb index 0b3ec645f..e7c26d47b 100644 --- a/config/rouge_opencl.rb +++ b/config/rouge_opencl.rb @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true -# Copyright (c) 2011-2023 The Khronos Group, Inc. +# Copyright (c) 2011-2024 The Khronos Group, Inc. # SPDX-License-Identifier: Apache-2.0 #puts "Loading rouge_opencl extensions for source code highlighting..." diff --git a/config/spec-macros.rb b/config/spec-macros.rb index 032ad440d..5fc043436 100644 --- a/config/spec-macros.rb +++ b/config/spec-macros.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2023 The Khronos Group Inc. +# Copyright (c) 2016-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/spec-macros/extension.rb b/config/spec-macros/extension.rb index 8e3d61a86..cce81f9a9 100644 --- a/config/spec-macros/extension.rb +++ b/config/spec-macros/extension.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2023 The Khronos Group Inc. +# Copyright (c) 2016-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/version-full-links.asciidoc b/config/version-full-links.asciidoc index 884a50c14..f7e506950 100644 --- a/config/version-full-links.asciidoc +++ b/config/version-full-links.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023 The Khronos Group. This work is licensed under a +// Copyright 2023-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/config/version-local-links.asciidoc b/config/version-local-links.asciidoc index d2f3b8029..7ce8377e4 100644 --- a/config/version-local-links.asciidoc +++ b/config/version-local-links.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023 The Khronos Group. This work is licensed under a +// Copyright 2023-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/copyrights-ccby.txt b/copyrights-ccby.txt index c6536480b..d6b85b3e5 100644 --- a/copyrights-ccby.txt +++ b/copyrights-ccby.txt @@ -1,4 +1,4 @@ -Copyright 2019-2023 The Khronos Group. +Copyright 2019-2024 The Khronos Group. Khronos licenses this file to you under the Creative Commons Attribution 4.0 International (CC BY 4.0) License (the "License"); you may not use this file diff --git a/copyrights.txt b/copyrights.txt index 078a93b57..11333319c 100644 --- a/copyrights.txt +++ b/copyrights.txt @@ -1,4 +1,4 @@ -Copyright 2008-2023 The Khronos Group Inc. +Copyright 2008-2024 The Khronos Group Inc. This Specification is protected by copyright laws and contains material proprietary to Khronos. Except as described by these terms, it or any components may not be reproduced, republished, diff --git a/cxx/acknowledgements.txt b/cxx/acknowledgements.txt index 0800d1e87..e00801c8c 100644 --- a/cxx/acknowledgements.txt +++ b/cxx/acknowledgements.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/annotation.txt b/cxx/annotation.txt index 780572b50..d3f95b606 100644 --- a/cxx/annotation.txt +++ b/cxx/annotation.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/compiler_options.txt b/cxx/compiler_options.txt index bc7d17f39..5f601f00c 100644 --- a/cxx/compiler_options.txt +++ b/cxx/compiler_options.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/generic_type_name_notation.txt b/cxx/generic_type_name_notation.txt index 936dc9224..578c677ae 100644 --- a/cxx/generic_type_name_notation.txt +++ b/cxx/generic_type_name_notation.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/image_addressing_and_filtering.txt b/cxx/image_addressing_and_filtering.txt index 5b7c09c5a..7b27f3d04 100644 --- a/cxx/image_addressing_and_filtering.txt +++ b/cxx/image_addressing_and_filtering.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/address_spaces.txt b/cxx/lang/address_spaces.txt index 04a32913c..dbaab05ef 100644 --- a/cxx/lang/address_spaces.txt +++ b/cxx/lang/address_spaces.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/attribute_qualifiers.txt b/cxx/lang/attribute_qualifiers.txt index b5b0f8d76..0eb344836 100644 --- a/cxx/lang/attribute_qualifiers.txt +++ b/cxx/lang/attribute_qualifiers.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/builtin_data_types.txt b/cxx/lang/builtin_data_types.txt index f59e9f315..7925ac28d 100644 --- a/cxx/lang/builtin_data_types.txt +++ b/cxx/lang/builtin_data_types.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/expressions.txt b/cxx/lang/expressions.txt index 417a29918..10dfde9f0 100644 --- a/cxx/lang/expressions.txt +++ b/cxx/lang/expressions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/implicit_type_conversions.txt b/cxx/lang/implicit_type_conversions.txt index 27c1ffc40..695cdfb8b 100644 --- a/cxx/lang/implicit_type_conversions.txt +++ b/cxx/lang/implicit_type_conversions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/kernel_functions.txt b/cxx/lang/kernel_functions.txt index 68dcd0e89..9efb5251b 100644 --- a/cxx/lang/kernel_functions.txt +++ b/cxx/lang/kernel_functions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/keywords.txt b/cxx/lang/keywords.txt index 6047add1e..5381756e6 100644 --- a/cxx/lang/keywords.txt +++ b/cxx/lang/keywords.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/lang.txt b/cxx/lang/lang.txt index a0788034b..78360fa63 100644 --- a/cxx/lang/lang.txt +++ b/cxx/lang/lang.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/preprocessor.txt b/cxx/lang/preprocessor.txt index e3262e293..95880a19b 100644 --- a/cxx/lang/preprocessor.txt +++ b/cxx/lang/preprocessor.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/restrictions.txt b/cxx/lang/restrictions.txt index a27335102..cb212b30c 100644 --- a/cxx/lang/restrictions.txt +++ b/cxx/lang/restrictions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/edge_case_behavior.txt b/cxx/numerical_compliance/edge_case_behavior.txt index 984612344..814e112b8 100644 --- a/cxx/numerical_compliance/edge_case_behavior.txt +++ b/cxx/numerical_compliance/edge_case_behavior.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/floating_point_exceptions.txt b/cxx/numerical_compliance/floating_point_exceptions.txt index a1be11110..f2a043c0c 100644 --- a/cxx/numerical_compliance/floating_point_exceptions.txt +++ b/cxx/numerical_compliance/floating_point_exceptions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt b/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt index 1244c5d87..08d11b047 100644 --- a/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt +++ b/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/numerical_compliance.txt b/cxx/numerical_compliance/numerical_compliance.txt index 24d49dbb5..d2606a35d 100644 --- a/cxx/numerical_compliance/numerical_compliance.txt +++ b/cxx/numerical_compliance/numerical_compliance.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/relative_error_as_ulps.txt b/cxx/numerical_compliance/relative_error_as_ulps.txt index ea4fd919a..cc4ad9de1 100644 --- a/cxx/numerical_compliance/relative_error_as_ulps.txt +++ b/cxx/numerical_compliance/relative_error_as_ulps.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/rounding_modes.txt b/cxx/numerical_compliance/rounding_modes.txt index a2e4ccc04..f607a6774 100644 --- a/cxx/numerical_compliance/rounding_modes.txt +++ b/cxx/numerical_compliance/rounding_modes.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/address_spaces.txt b/cxx/stdlib/address_spaces.txt index 2140c660d..83688504e 100644 --- a/cxx/stdlib/address_spaces.txt +++ b/cxx/stdlib/address_spaces.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/array.txt b/cxx/stdlib/array.txt index 40fb19d39..2483fa04e 100644 --- a/cxx/stdlib/array.txt +++ b/cxx/stdlib/array.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/atomic_operations.txt b/cxx/stdlib/atomic_operations.txt index 7450c87a8..5d326c8b8 100644 --- a/cxx/stdlib/atomic_operations.txt +++ b/cxx/stdlib/atomic_operations.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/common.txt b/cxx/stdlib/common.txt index bf7620eb2..c0b932bff 100644 --- a/cxx/stdlib/common.txt +++ b/cxx/stdlib/common.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/conversions.txt b/cxx/stdlib/conversions.txt index 068e7c277..032690e0e 100644 --- a/cxx/stdlib/conversions.txt +++ b/cxx/stdlib/conversions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/definitions.txt b/cxx/stdlib/definitions.txt index b3a2c6513..f167cb110 100644 --- a/cxx/stdlib/definitions.txt +++ b/cxx/stdlib/definitions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/device_enqueue.txt b/cxx/stdlib/device_enqueue.txt index 13b1b055a..e3dd94167 100644 --- a/cxx/stdlib/device_enqueue.txt +++ b/cxx/stdlib/device_enqueue.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/general_utilities.txt b/cxx/stdlib/general_utilities.txt index 730791de6..bc6bf3c8d 100644 --- a/cxx/stdlib/general_utilities.txt +++ b/cxx/stdlib/general_utilities.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/geometric.txt b/cxx/stdlib/geometric.txt index 6bb3c5a26..18ed5108d 100644 --- a/cxx/stdlib/geometric.txt +++ b/cxx/stdlib/geometric.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/half_wrapper.txt b/cxx/stdlib/half_wrapper.txt index 2ad117ab8..0ad1e5ec6 100644 --- a/cxx/stdlib/half_wrapper.txt +++ b/cxx/stdlib/half_wrapper.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/images_and_samplers.txt b/cxx/stdlib/images_and_samplers.txt index fed213459..601ec04c1 100644 --- a/cxx/stdlib/images_and_samplers.txt +++ b/cxx/stdlib/images_and_samplers.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/integer.txt b/cxx/stdlib/integer.txt index f7aae2143..fff5e91f3 100644 --- a/cxx/stdlib/integer.txt +++ b/cxx/stdlib/integer.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/iterator.txt b/cxx/stdlib/iterator.txt index d7fa39e25..b2592b5cb 100644 --- a/cxx/stdlib/iterator.txt +++ b/cxx/stdlib/iterator.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/limits.txt b/cxx/stdlib/limits.txt index c7d453a1b..60bdad986 100644 --- a/cxx/stdlib/limits.txt +++ b/cxx/stdlib/limits.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/marker_types.txt b/cxx/stdlib/marker_types.txt index ca42eb40d..fb09da812 100644 --- a/cxx/stdlib/marker_types.txt +++ b/cxx/stdlib/marker_types.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/math.txt b/cxx/stdlib/math.txt index 487ee2c99..6cce28211 100644 --- a/cxx/stdlib/math.txt +++ b/cxx/stdlib/math.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/math_constants.txt b/cxx/stdlib/math_constants.txt index c37215324..63c01f976 100644 --- a/cxx/stdlib/math_constants.txt +++ b/cxx/stdlib/math_constants.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/pipes.txt b/cxx/stdlib/pipes.txt index 37d301144..9ec6d157b 100644 --- a/cxx/stdlib/pipes.txt +++ b/cxx/stdlib/pipes.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/printf.txt b/cxx/stdlib/printf.txt index c92748beb..a4479dc25 100644 --- a/cxx/stdlib/printf.txt +++ b/cxx/stdlib/printf.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/range.txt b/cxx/stdlib/range.txt index 6f4d766ef..d2172d2bc 100644 --- a/cxx/stdlib/range.txt +++ b/cxx/stdlib/range.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/reinterpreting_data.txt b/cxx/stdlib/reinterpreting_data.txt index 1541d2bce..6fd5b96c9 100644 --- a/cxx/stdlib/reinterpreting_data.txt +++ b/cxx/stdlib/reinterpreting_data.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/relational.txt b/cxx/stdlib/relational.txt index e074a0785..80d0fd596 100644 --- a/cxx/stdlib/relational.txt +++ b/cxx/stdlib/relational.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/specialization_constants.txt b/cxx/stdlib/specialization_constants.txt index 646c2b086..f5a24ab54 100644 --- a/cxx/stdlib/specialization_constants.txt +++ b/cxx/stdlib/specialization_constants.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/stdlib.txt b/cxx/stdlib/stdlib.txt index 05f22cd15..1b61c1c36 100644 --- a/cxx/stdlib/stdlib.txt +++ b/cxx/stdlib/stdlib.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/synchronization.txt b/cxx/stdlib/synchronization.txt index 5748b3581..08e433e8d 100644 --- a/cxx/stdlib/synchronization.txt +++ b/cxx/stdlib/synchronization.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/tuple.txt b/cxx/stdlib/tuple.txt index 9c226f826..6e492a286 100644 --- a/cxx/stdlib/tuple.txt +++ b/cxx/stdlib/tuple.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/type_traits.txt b/cxx/stdlib/type_traits.txt index fc6aacc57..ea2e6b7af 100644 --- a/cxx/stdlib/type_traits.txt +++ b/cxx/stdlib/type_traits.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_data_load_and_store.txt b/cxx/stdlib/vector_data_load_and_store.txt index 5a6ec7c72..eb5cf60f6 100644 --- a/cxx/stdlib/vector_data_load_and_store.txt +++ b/cxx/stdlib/vector_data_load_and_store.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_iterator.txt b/cxx/stdlib/vector_iterator.txt index b18b85b62..710a243fe 100644 --- a/cxx/stdlib/vector_iterator.txt +++ b/cxx/stdlib/vector_iterator.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_utilities.txt b/cxx/stdlib/vector_utilities.txt index 401d1c147..87c2979b4 100644 --- a/cxx/stdlib/vector_utilities.txt +++ b/cxx/stdlib/vector_utilities.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_wrapper.txt b/cxx/stdlib/vector_wrapper.txt index a6a0b11bf..480da4ba4 100644 --- a/cxx/stdlib/vector_wrapper.txt +++ b/cxx/stdlib/vector_wrapper.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/work_group.txt b/cxx/stdlib/work_group.txt index 41e0a1a45..f496a85a6 100644 --- a/cxx/stdlib/work_group.txt +++ b/cxx/stdlib/work_group.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/work_item.txt b/cxx/stdlib/work_item.txt index d5d6de3fb..ca561d406 100644 --- a/cxx/stdlib/work_item.txt +++ b/cxx/stdlib/work_item.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/acknowledgements.txt b/cxx4opencl/acknowledgements.txt index 380fd5f74..932bad1aa 100644 --- a/cxx4opencl/acknowledgements.txt +++ b/cxx4opencl/acknowledgements.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/address_spaces.txt b/cxx4opencl/address_spaces.txt index d3bf10e7b..c36384e71 100644 --- a/cxx4opencl/address_spaces.txt +++ b/cxx4opencl/address_spaces.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/cxxcasts.txt b/cxx4opencl/cxxcasts.txt index 775b7c309..b23e31133 100644 --- a/cxx4opencl/cxxcasts.txt +++ b/cxx4opencl/cxxcasts.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/diff2cxx.txt b/cxx4opencl/diff2cxx.txt index 17493a033..3b8821d2c 100644 --- a/cxx4opencl/diff2cxx.txt +++ b/cxx4opencl/diff2cxx.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/diff2openclc.txt b/cxx4opencl/diff2openclc.txt index f5853c3a3..a8ea5ae8a 100644 --- a/cxx4opencl/diff2openclc.txt +++ b/cxx4opencl/diff2openclc.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/intro.txt b/cxx4opencl/intro.txt index dbd16f12b..9c6593894 100644 --- a/cxx4opencl/intro.txt +++ b/cxx4opencl/intro.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/kernel.txt b/cxx4opencl/kernel.txt index 1f3398b6c..d7ccd2331 100644 --- a/cxx4opencl/kernel.txt +++ b/cxx4opencl/kernel.txt @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The Khronos Group. This work is licensed under a +// Copyright 2021-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/references.txt b/cxx4opencl/references.txt index 24fc3610c..ca89409ee 100644 --- a/cxx4opencl/references.txt +++ b/cxx4opencl/references.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/appendix_a.asciidoc b/env/appendix_a.asciidoc index e216b72f1..10ae42123 100644 --- a/env/appendix_a.asciidoc +++ b/env/appendix_a.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index 3ca25a0cb..ce5acf67f 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/dictionary.asciidoc b/env/dictionary.asciidoc index 58a5564ea..bdf9c23c3 100644 --- a/env/dictionary.asciidoc +++ b/env/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index 4ad2adfb6..f4666f293 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/image_addressing_and_filtering.asciidoc b/env/image_addressing_and_filtering.asciidoc index bf4858fdc..f230d61ba 100644 --- a/env/image_addressing_and_filtering.asciidoc +++ b/env/image_addressing_and_filtering.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/introduction.asciidoc b/env/introduction.asciidoc index 1f5a9cd2c..90962b5bf 100644 --- a/env/introduction.asciidoc +++ b/env/introduction.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/numerical_compliance.asciidoc b/env/numerical_compliance.asciidoc index 5d883a757..994475e46 100644 --- a/env/numerical_compliance.asciidoc +++ b/env/numerical_compliance.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/references.asciidoc b/env/references.asciidoc index 7f513a6f6..f5dabc473 100644 --- a/env/references.asciidoc +++ b/env/references.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/required_capabilities.asciidoc b/env/required_capabilities.asciidoc index 90ecafffa..f5314c3e9 100644 --- a/env/required_capabilities.asciidoc +++ b/env/required_capabilities.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/validation_rules.asciidoc b/env/validation_rules.asciidoc index 028e65363..698729c35 100644 --- a/env/validation_rules.asciidoc +++ b/env/validation_rules.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_3d_image_writes.asciidoc b/ext/cl_khr_3d_image_writes.asciidoc index f3a9c07d9..2ebfa10e4 100644 --- a/ext/cl_khr_3d_image_writes.asciidoc +++ b/ext/cl_khr_3d_image_writes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_async_work_group_copy_fence.asciidoc b/ext/cl_khr_async_work_group_copy_fence.asciidoc index 420e4afba..5a2656c65 100644 --- a/ext/cl_khr_async_work_group_copy_fence.asciidoc +++ b/ext/cl_khr_async_work_group_copy_fence.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_byte_addressable_store.asciidoc b/ext/cl_khr_byte_addressable_store.asciidoc index 357756527..0386a9818 100644 --- a/ext/cl_khr_byte_addressable_store.asciidoc +++ b/ext/cl_khr_byte_addressable_store.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_command_buffer.asciidoc b/ext/cl_khr_command_buffer.asciidoc index a7943c30d..2c543eea6 100644 --- a/ext/cl_khr_command_buffer.asciidoc +++ b/ext/cl_khr_command_buffer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_command_buffer_multi_device.asciidoc b/ext/cl_khr_command_buffer_multi_device.asciidoc index 0e6fc023b..cd1c638a7 100644 --- a/ext/cl_khr_command_buffer_multi_device.asciidoc +++ b/ext/cl_khr_command_buffer_multi_device.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index dc75f9a9f..642fb0540 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_create_command_queue.asciidoc b/ext/cl_khr_create_command_queue.asciidoc index 54fa4eb75..de1cf6e4f 100644 --- a/ext/cl_khr_create_command_queue.asciidoc +++ b/ext/cl_khr_create_command_queue.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_d3d10_sharing.asciidoc b/ext/cl_khr_d3d10_sharing.asciidoc index 3e8cb1557..25e89a890 100644 --- a/ext/cl_khr_d3d10_sharing.asciidoc +++ b/ext/cl_khr_d3d10_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_d3d11_sharing.asciidoc b/ext/cl_khr_d3d11_sharing.asciidoc index db190c244..6d14a3f50 100644 --- a/ext/cl_khr_d3d11_sharing.asciidoc +++ b/ext/cl_khr_d3d11_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_depth_images.asciidoc b/ext/cl_khr_depth_images.asciidoc index 665ba8467..c11c695ee 100644 --- a/ext/cl_khr_depth_images.asciidoc +++ b/ext/cl_khr_depth_images.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc b/ext/cl_khr_device_enqueue_local_arg_types.asciidoc index b7775aae0..19f341989 100644 --- a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc +++ b/ext/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_device_uuid.asciidoc b/ext/cl_khr_device_uuid.asciidoc index fe98af0db..e4005dcaf 100644 --- a/ext/cl_khr_device_uuid.asciidoc +++ b/ext/cl_khr_device_uuid.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_dx9_media_sharing.asciidoc b/ext/cl_khr_dx9_media_sharing.asciidoc index 30610072b..a350af40a 100644 --- a/ext/cl_khr_dx9_media_sharing.asciidoc +++ b/ext/cl_khr_dx9_media_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_egl_event.asciidoc b/ext/cl_khr_egl_event.asciidoc index 70dafeb0e..d04fb0d8b 100644 --- a/ext/cl_khr_egl_event.asciidoc +++ b/ext/cl_khr_egl_event.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_egl_image.asciidoc b/ext/cl_khr_egl_image.asciidoc index 2d1a8a75a..da56b9ea6 100644 --- a/ext/cl_khr_egl_image.asciidoc +++ b/ext/cl_khr_egl_image.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_expect_assume.asciidoc b/ext/cl_khr_expect_assume.asciidoc index f4af54bff..274d73b1c 100644 --- a/ext/cl_khr_expect_assume.asciidoc +++ b/ext/cl_khr_expect_assume.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_extended_async_copies.asciidoc b/ext/cl_khr_extended_async_copies.asciidoc index 6dd262198..9bde9244c 100644 --- a/ext/cl_khr_extended_async_copies.asciidoc +++ b/ext/cl_khr_extended_async_copies.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_extended_bit_ops.asciidoc b/ext/cl_khr_extended_bit_ops.asciidoc index a9568a762..b65194cf9 100644 --- a/ext/cl_khr_extended_bit_ops.asciidoc +++ b/ext/cl_khr_extended_bit_ops.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_extended_versioning.asciidoc b/ext/cl_khr_extended_versioning.asciidoc index 984bc2f38..115f5ce7c 100644 --- a/ext/cl_khr_extended_versioning.asciidoc +++ b/ext/cl_khr_extended_versioning.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_external_memory.asciidoc b/ext/cl_khr_external_memory.asciidoc index 57c8867f6..a2dda4883 100644 --- a/ext/cl_khr_external_memory.asciidoc +++ b/ext/cl_khr_external_memory.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The Khronos Group. This work is licensed under a +// Copyright 2021-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_external_semaphore.asciidoc b/ext/cl_khr_external_semaphore.asciidoc index f5198a123..064ebd754 100644 --- a/ext/cl_khr_external_semaphore.asciidoc +++ b/ext/cl_khr_external_semaphore.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The Khronos Group. This work is licensed under a +// Copyright 2021-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_fp16.asciidoc b/ext/cl_khr_fp16.asciidoc index c6233a695..595ff95c5 100644 --- a/ext/cl_khr_fp16.asciidoc +++ b/ext/cl_khr_fp16.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_fp64.asciidoc b/ext/cl_khr_fp64.asciidoc index cb3a45fac..467830cd6 100644 --- a/ext/cl_khr_fp64.asciidoc +++ b/ext/cl_khr_fp64.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_gl_depth_images.asciidoc b/ext/cl_khr_gl_depth_images.asciidoc index d50e38260..c958da903 100644 --- a/ext/cl_khr_gl_depth_images.asciidoc +++ b/ext/cl_khr_gl_depth_images.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_gl_event.asciidoc b/ext/cl_khr_gl_event.asciidoc index 97df4a870..d5c3b686f 100644 --- a/ext/cl_khr_gl_event.asciidoc +++ b/ext/cl_khr_gl_event.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_gl_msaa_sharing.asciidoc b/ext/cl_khr_gl_msaa_sharing.asciidoc index 1418443d7..91fad53bb 100644 --- a/ext/cl_khr_gl_msaa_sharing.asciidoc +++ b/ext/cl_khr_gl_msaa_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_gl_sharing__context.asciidoc b/ext/cl_khr_gl_sharing__context.asciidoc index 300bd070e..ac0cc1388 100644 --- a/ext/cl_khr_gl_sharing__context.asciidoc +++ b/ext/cl_khr_gl_sharing__context.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_gl_sharing__memobjs.asciidoc b/ext/cl_khr_gl_sharing__memobjs.asciidoc index b9b5d1761..2de4b2927 100644 --- a/ext/cl_khr_gl_sharing__memobjs.asciidoc +++ b/ext/cl_khr_gl_sharing__memobjs.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_icd.asciidoc b/ext/cl_khr_icd.asciidoc index 816a9300e..2298e6cb3 100644 --- a/ext/cl_khr_icd.asciidoc +++ b/ext/cl_khr_icd.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_il_program.asciidoc b/ext/cl_khr_il_program.asciidoc index 721c7eccd..05201a14f 100644 --- a/ext/cl_khr_il_program.asciidoc +++ b/ext/cl_khr_il_program.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_image2d_from_buffer.asciidoc b/ext/cl_khr_image2d_from_buffer.asciidoc index 4c08840c9..ce4e7f21f 100644 --- a/ext/cl_khr_image2d_from_buffer.asciidoc +++ b/ext/cl_khr_image2d_from_buffer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_initialize_memory.asciidoc b/ext/cl_khr_initialize_memory.asciidoc index b2730b913..29a078bf5 100644 --- a/ext/cl_khr_initialize_memory.asciidoc +++ b/ext/cl_khr_initialize_memory.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_int32_atomics.asciidoc b/ext/cl_khr_int32_atomics.asciidoc index cf5657073..f6b79ae81 100644 --- a/ext/cl_khr_int32_atomics.asciidoc +++ b/ext/cl_khr_int32_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_int64_atomics.asciidoc b/ext/cl_khr_int64_atomics.asciidoc index f2875a562..ebed85229 100644 --- a/ext/cl_khr_int64_atomics.asciidoc +++ b/ext/cl_khr_int64_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_integer_dot_product.asciidoc b/ext/cl_khr_integer_dot_product.asciidoc index d75742f10..9ed542cd0 100644 --- a/ext/cl_khr_integer_dot_product.asciidoc +++ b/ext/cl_khr_integer_dot_product.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2023 The Khronos Group. This work is licensed under a +// Copyright 2020-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_mipmap_image.asciidoc b/ext/cl_khr_mipmap_image.asciidoc index d5a270da5..c7a435922 100644 --- a/ext/cl_khr_mipmap_image.asciidoc +++ b/ext/cl_khr_mipmap_image.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_pci_bus_info.asciidoc b/ext/cl_khr_pci_bus_info.asciidoc index 3b92c929a..fc724ca1a 100644 --- a/ext/cl_khr_pci_bus_info.asciidoc +++ b/ext/cl_khr_pci_bus_info.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_priority_hints.asciidoc b/ext/cl_khr_priority_hints.asciidoc index 5da9a971c..bdbcfe402 100644 --- a/ext/cl_khr_priority_hints.asciidoc +++ b/ext/cl_khr_priority_hints.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_select_fprounding_mode.asciidoc b/ext/cl_khr_select_fprounding_mode.asciidoc index 1432e07e7..c1285bd2c 100644 --- a/ext/cl_khr_select_fprounding_mode.asciidoc +++ b/ext/cl_khr_select_fprounding_mode.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index b323a031b..ff885700c 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The Khronos Group. This work is licensed under a +// Copyright 2021-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_spir.asciidoc b/ext/cl_khr_spir.asciidoc index f7c999307..d5d0d47fd 100644 --- a/ext/cl_khr_spir.asciidoc +++ b/ext/cl_khr_spir.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_srgb_image_writes.asciidoc b/ext/cl_khr_srgb_image_writes.asciidoc index 357d7d5e0..63c7444f7 100644 --- a/ext/cl_khr_srgb_image_writes.asciidoc +++ b/ext/cl_khr_srgb_image_writes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_subgroup_named_barrier.asciidoc b/ext/cl_khr_subgroup_named_barrier.asciidoc index 7140e7d8e..08636a76e 100644 --- a/ext/cl_khr_subgroup_named_barrier.asciidoc +++ b/ext/cl_khr_subgroup_named_barrier.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_subgroup_rotate.asciidoc b/ext/cl_khr_subgroup_rotate.asciidoc index b6670a66c..337f98887 100644 --- a/ext/cl_khr_subgroup_rotate.asciidoc +++ b/ext/cl_khr_subgroup_rotate.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2022-2023 The Khronos Group. This work is licensed under a +// Copyright 2022-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_subgroups.asciidoc b/ext/cl_khr_subgroups.asciidoc index 8dcd49429..ae479e9b7 100644 --- a/ext/cl_khr_subgroups.asciidoc +++ b/ext/cl_khr_subgroups.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_suggested_local_work_size.asciidoc b/ext/cl_khr_suggested_local_work_size.asciidoc index cd7e50fb3..97bef879a 100644 --- a/ext/cl_khr_suggested_local_work_size.asciidoc +++ b/ext/cl_khr_suggested_local_work_size.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_terminate_context.asciidoc b/ext/cl_khr_terminate_context.asciidoc index 4b3a7f816..9a7717883 100644 --- a/ext/cl_khr_terminate_context.asciidoc +++ b/ext/cl_khr_terminate_context.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_throttle_hints.asciidoc b/ext/cl_khr_throttle_hints.asciidoc index ff734cae5..8b19ce69c 100644 --- a/ext/cl_khr_throttle_hints.asciidoc +++ b/ext/cl_khr_throttle_hints.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/cl_khr_work_group_uniform_arithmetic.asciidoc b/ext/cl_khr_work_group_uniform_arithmetic.asciidoc index 8ed278fe9..097f0aed2 100644 --- a/ext/cl_khr_work_group_uniform_arithmetic.asciidoc +++ b/ext/cl_khr_work_group_uniform_arithmetic.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2022-2023 The Khronos Group. This work is licensed under a +// Copyright 2022-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/deprecated_extensions.asciidoc b/ext/deprecated_extensions.asciidoc index 812669ba3..200eda14b 100644 --- a/ext/deprecated_extensions.asciidoc +++ b/ext/deprecated_extensions.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/dictionary.asciidoc b/ext/dictionary.asciidoc index 58a5564ea..bdf9c23c3 100644 --- a/ext/dictionary.asciidoc +++ b/ext/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/index.asciidoc b/ext/index.asciidoc index 83e33f880..5064392a6 100644 --- a/ext/index.asciidoc +++ b/ext/index.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 1cfd7f21a..1580441f5 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/provisional_notice.asciidoc b/ext/provisional_notice.asciidoc index ddbf779e0..0cc0eb0d0 100644 --- a/ext/provisional_notice.asciidoc +++ b/ext/provisional_notice.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023 The Khronos Group. This work is licensed under a +// Copyright 2023-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 1305e53ae..00757cdbf 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/spirv_extensions.asciidoc b/ext/spirv_extensions.asciidoc index 5de65410f..bffbf1f8d 100644 --- a/ext/spirv_extensions.asciidoc +++ b/ext/spirv_extensions.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/to_core_features.asciidoc b/ext/to_core_features.asciidoc index 606b4a835..c82ac7639 100644 --- a/ext/to_core_features.asciidoc +++ b/ext/to_core_features.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_arm_controlled_kernel_termination.asciidoc b/extensions/cl_arm_controlled_kernel_termination.asciidoc index 77bfcc141..174f496c6 100644 --- a/extensions/cl_arm_controlled_kernel_termination.asciidoc +++ b/extensions/cl_arm_controlled_kernel_termination.asciidoc @@ -21,7 +21,7 @@ Anastasia Stulova, Arm Ltd. + == Notice -Copyright (c) 2021-2023 Arm Ltd. +Copyright (c) 2021-2024 Arm Ltd. == Status diff --git a/extensions/cl_arm_printf.asciidoc b/extensions/cl_arm_printf.asciidoc index 5fff49c74..9af0add7f 100644 --- a/extensions/cl_arm_printf.asciidoc +++ b/extensions/cl_arm_printf.asciidoc @@ -24,7 +24,7 @@ Kevin Petit, Arm Ltd. + == Notice -Copyright (c) 2014-2023 Arm Ltd. +Copyright (c) 2014-2024 Arm Ltd. == Status diff --git a/extensions/cl_arm_protected_memory_allocation.asciidoc b/extensions/cl_arm_protected_memory_allocation.asciidoc index b9797206b..cc4f43ab3 100644 --- a/extensions/cl_arm_protected_memory_allocation.asciidoc +++ b/extensions/cl_arm_protected_memory_allocation.asciidoc @@ -19,7 +19,7 @@ Kevin Petit, Arm Ltd. + == Notice -Copyright (c) 2021-2023 Arm Ltd. +Copyright (c) 2021-2024 Arm Ltd. == Status diff --git a/extensions/cl_arm_scheduling_controls.asciidoc b/extensions/cl_arm_scheduling_controls.asciidoc index 42a2a3ee0..ef07f8e5f 100644 --- a/extensions/cl_arm_scheduling_controls.asciidoc +++ b/extensions/cl_arm_scheduling_controls.asciidoc @@ -22,7 +22,7 @@ Radek Szymanski, Arm Ltd. + == Notice -Copyright (c) 2020-2023 Arm Ltd. +Copyright (c) 2020-2024 Arm Ltd. == Status diff --git a/extensions/cl_ext_cxx_for_opencl.asciidoc b/extensions/cl_ext_cxx_for_opencl.asciidoc index 2438f6069..12bd4406f 100644 --- a/extensions/cl_ext_cxx_for_opencl.asciidoc +++ b/extensions/cl_ext_cxx_for_opencl.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_ext_float_atomics.asciidoc b/extensions/cl_ext_float_atomics.asciidoc index 40b2a2662..ad4ab26fc 100644 --- a/extensions/cl_ext_float_atomics.asciidoc +++ b/extensions/cl_ext_float_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ @@ -74,7 +74,7 @@ Ruihao Zhang, Qualcomm == Notice -Copyright (c) 2021-2023 The Khronos Group Inc. +Copyright (c) 2021-2024 The Khronos Group Inc. == Status diff --git a/extensions/cl_ext_image_from_buffer.asciidoc b/extensions/cl_ext_image_from_buffer.asciidoc index 9c73c5c0b..1ef094c5d 100644 --- a/extensions/cl_ext_image_from_buffer.asciidoc +++ b/extensions/cl_ext_image_from_buffer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_ext_image_raw10_raw12.asciidoc b/extensions/cl_ext_image_raw10_raw12.asciidoc index 5fcc21796..d90173f7a 100644 --- a/extensions/cl_ext_image_raw10_raw12.asciidoc +++ b/extensions/cl_ext_image_raw10_raw12.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_extension_template.asciidoc b/extensions/cl_extension_template.asciidoc index dde9417e5..2020299c7 100644 --- a/extensions/cl_extension_template.asciidoc +++ b/extensions/cl_extension_template.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ @@ -93,7 +93,7 @@ the time of their contribution, one person per line. == Notice -Copyright (c) 2023 Some Company. Copyright terms at: + +Copyright (c) 2023-2024 Some Company. Copyright terms at: + http://link/copyright.html **** diff --git a/extensions/cl_img_cached_allocations.asciidoc b/extensions/cl_img_cached_allocations.asciidoc index 52f88df04..0faf142df 100644 --- a/extensions/cl_img_cached_allocations.asciidoc +++ b/extensions/cl_img_cached_allocations.asciidoc @@ -24,7 +24,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2023 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_generate_mipmap.asciidoc b/extensions/cl_img_generate_mipmap.asciidoc index aa3c1019f..6ea7bd256 100644 --- a/extensions/cl_img_generate_mipmap.asciidoc +++ b/extensions/cl_img_generate_mipmap.asciidoc @@ -26,7 +26,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2023 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_mem_properties.asciidoc b/extensions/cl_img_mem_properties.asciidoc index 235695fc3..94f015945 100644 --- a/extensions/cl_img_mem_properties.asciidoc +++ b/extensions/cl_img_mem_properties.asciidoc @@ -24,7 +24,7 @@ Jeba Samuel, Imagination Technologies. == Notice -Copyright (c) 2020-2023 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_use_gralloc_ptr.asciidoc b/extensions/cl_img_use_gralloc_ptr.asciidoc index 86937c514..cf05ec786 100644 --- a/extensions/cl_img_use_gralloc_ptr.asciidoc +++ b/extensions/cl_img_use_gralloc_ptr.asciidoc @@ -25,7 +25,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2023 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_yuv_image.asciidoc b/extensions/cl_img_yuv_image.asciidoc index ed0d8a551..d33c2fd0f 100644 --- a/extensions/cl_img_yuv_image.asciidoc +++ b/extensions/cl_img_yuv_image.asciidoc @@ -25,7 +25,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2023 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_intel_bfloat16_conversions.asciidoc b/extensions/cl_intel_bfloat16_conversions.asciidoc index afbf2aa9f..5262b6a83 100644 --- a/extensions/cl_intel_bfloat16_conversions.asciidoc +++ b/extensions/cl_intel_bfloat16_conversions.asciidoc @@ -23,7 +23,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2022-2023 Intel Corporation. All rights reserved. +Copyright (c) 2022-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_command_queue_families.asciidoc b/extensions/cl_intel_command_queue_families.asciidoc index 2ad16ce9d..9e967f76c 100644 --- a/extensions/cl_intel_command_queue_families.asciidoc +++ b/extensions/cl_intel_command_queue_families.asciidoc @@ -34,7 +34,7 @@ Michal Mrozek, Intel + == Notice -Copyright (c) 2021-2023 Intel Corporation. All rights reserved. +Copyright (c) 2021-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_create_buffer_with_properties.asciidoc b/extensions/cl_intel_create_buffer_with_properties.asciidoc index 05e220e01..4948de276 100644 --- a/extensions/cl_intel_create_buffer_with_properties.asciidoc +++ b/extensions/cl_intel_create_buffer_with_properties.asciidoc @@ -32,7 +32,7 @@ Ben Ashbaugh, Intel == Notice -Copyright (c) 2020-2023 Intel Corporation. All rights reserved. +Copyright (c) 2020-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_device_attribute_query.asciidoc b/extensions/cl_intel_device_attribute_query.asciidoc index 2e40e444d..71fdcfa8d 100644 --- a/extensions/cl_intel_device_attribute_query.asciidoc +++ b/extensions/cl_intel_device_attribute_query.asciidoc @@ -44,7 +44,7 @@ Rafik Saliev, Intel == Notice -Copyright (c) 2021-2023 Intel Corporation. All rights reserved. +Copyright (c) 2021-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_mem_alloc_buffer_location.asciidoc b/extensions/cl_intel_mem_alloc_buffer_location.asciidoc index 212aaec2f..f079a9b22 100644 --- a/extensions/cl_intel_mem_alloc_buffer_location.asciidoc +++ b/extensions/cl_intel_mem_alloc_buffer_location.asciidoc @@ -40,7 +40,7 @@ Contributors Notice ------ -Copyright (c) 2020-2023 Intel Corporation. All rights reserved. +Copyright (c) 2020-2024 Intel Corporation. All rights reserved. Status ------ diff --git a/extensions/cl_intel_mem_channel_property.asciidoc b/extensions/cl_intel_mem_channel_property.asciidoc index a7338dcb8..2336c763b 100644 --- a/extensions/cl_intel_mem_channel_property.asciidoc +++ b/extensions/cl_intel_mem_channel_property.asciidoc @@ -37,7 +37,7 @@ Contributors Notice ------ -Copyright (c) 2020-2023 Intel Corporation. All rights reserved. +Copyright (c) 2020-2024 Intel Corporation. All rights reserved. Status ------ diff --git a/extensions/cl_intel_mem_force_host_memory.asciidoc b/extensions/cl_intel_mem_force_host_memory.asciidoc index c337cab21..474852870 100644 --- a/extensions/cl_intel_mem_force_host_memory.asciidoc +++ b/extensions/cl_intel_mem_force_host_memory.asciidoc @@ -30,7 +30,7 @@ Filip Hazubski, Intel == Notice -Copyright (c) 2020-2023 Intel Corporation. All rights reserved. +Copyright (c) 2020-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_packed_yuv.asciidoc b/extensions/cl_intel_packed_yuv.asciidoc index a87608419..8395af491 100644 --- a/extensions/cl_intel_packed_yuv.asciidoc +++ b/extensions/cl_intel_packed_yuv.asciidoc @@ -30,7 +30,7 @@ Ben Ashbaugh, Intel == Notice -Copyright (c) 2021-2023 Intel Corporation. All rights reserved. +Copyright (c) 2021-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_planar_yuv.asciidoc b/extensions/cl_intel_planar_yuv.asciidoc index f9d7743ab..07f4388e8 100644 --- a/extensions/cl_intel_planar_yuv.asciidoc +++ b/extensions/cl_intel_planar_yuv.asciidoc @@ -34,7 +34,7 @@ Biju George, Intel == Notice -Copyright (c) 2021-2023 Intel Corporation. All rights reserved. +Copyright (c) 2021-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_program_scope_host_pipe.asciidoc b/extensions/cl_intel_program_scope_host_pipe.asciidoc index c8412cf69..51a3badfc 100644 --- a/extensions/cl_intel_program_scope_host_pipe.asciidoc +++ b/extensions/cl_intel_program_scope_host_pipe.asciidoc @@ -31,7 +31,7 @@ Zibai Wang, Intel + == Notice -Copyright (c) 2023 Intel Corporation. All rights reserved. +Copyright (c) 2023-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_required_subgroup_size.asciidoc b/extensions/cl_intel_required_subgroup_size.asciidoc index 200c1e5c7..c30323730 100644 --- a/extensions/cl_intel_required_subgroup_size.asciidoc +++ b/extensions/cl_intel_required_subgroup_size.asciidoc @@ -39,7 +39,7 @@ Ben Ashbaugh, Intel == Notice -Copyright (c) 2018-2023 Intel Corporation. All rights reserved. +Copyright (c) 2018-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_sharing_format_query.asciidoc b/extensions/cl_intel_sharing_format_query.asciidoc index 08a13aea7..c1b8e0164 100644 --- a/extensions/cl_intel_sharing_format_query.asciidoc +++ b/extensions/cl_intel_sharing_format_query.asciidoc @@ -26,7 +26,7 @@ Pawel Wilma, Intel == Notice -Copyright (c) 2021-2023 Intel Corporation. All rights reserved. +Copyright (c) 2021-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc b/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc index 007df51a1..9101fd86b 100644 --- a/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc +++ b/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc @@ -29,7 +29,7 @@ Biju George, Intel == Notice -Copyright (c) 2018-2023 Intel Corporation. All rights reserved. +Copyright (c) 2018-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_spirv_media_block_io.asciidoc b/extensions/cl_intel_spirv_media_block_io.asciidoc index 474985c43..322cbafc8 100644 --- a/extensions/cl_intel_spirv_media_block_io.asciidoc +++ b/extensions/cl_intel_spirv_media_block_io.asciidoc @@ -30,7 +30,7 @@ Pawel Jurek, Intel == Notice -Copyright (c) 2018-2023 Intel Corporation. All rights reserved. +Copyright (c) 2018-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_spirv_subgroups.asciidoc b/extensions/cl_intel_spirv_subgroups.asciidoc index fd188226b..c2f3faf38 100644 --- a/extensions/cl_intel_spirv_subgroups.asciidoc +++ b/extensions/cl_intel_spirv_subgroups.asciidoc @@ -31,7 +31,7 @@ Mariusz Merecki, Intel == Notice -Copyright (c) 2018-2023 Intel Corporation. All rights reserved. +Copyright (c) 2018-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_split_work_group_barrier.asciidoc b/extensions/cl_intel_split_work_group_barrier.asciidoc index 97a456b96..ed2d1ee73 100644 --- a/extensions/cl_intel_split_work_group_barrier.asciidoc +++ b/extensions/cl_intel_split_work_group_barrier.asciidoc @@ -24,7 +24,7 @@ John Pennycook, Intel == Notice -Copyright (c) 2022-2023 Intel Corporation. All rights reserved. +Copyright (c) 2022-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc b/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc index 384427e64..d6f492bab 100644 --- a/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc +++ b/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc @@ -28,7 +28,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2022-2023 Intel Corporation. All rights reserved. +Copyright (c) 2022-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc b/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc index 13eb0e22f..630078a46 100644 --- a/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc +++ b/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc @@ -25,7 +25,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2022-2023 Intel Corporation. All rights reserved. +Copyright (c) 2022-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups.asciidoc b/extensions/cl_intel_subgroups.asciidoc index 109dbc367..b17b9afe5 100644 --- a/extensions/cl_intel_subgroups.asciidoc +++ b/extensions/cl_intel_subgroups.asciidoc @@ -42,7 +42,7 @@ Biju George, Intel == Notice -Copyright (c) 2018-2023 Intel Corporation. All rights reserved. +Copyright (c) 2018-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups_char.asciidoc b/extensions/cl_intel_subgroups_char.asciidoc index ca16380b1..1658cfacb 100644 --- a/extensions/cl_intel_subgroups_char.asciidoc +++ b/extensions/cl_intel_subgroups_char.asciidoc @@ -33,7 +33,7 @@ Konrad Trifunovic, Intel == Notice -Copyright (c) 2020-2023 Intel Corporation. All rights reserved. +Copyright (c) 2020-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups_long.asciidoc b/extensions/cl_intel_subgroups_long.asciidoc index 121b8e51e..b010c4e5d 100644 --- a/extensions/cl_intel_subgroups_long.asciidoc +++ b/extensions/cl_intel_subgroups_long.asciidoc @@ -30,7 +30,7 @@ Konrad Trifunovic, Intel == Notice -Copyright (c) 2020-2023 Intel Corporation. All rights reserved. +Copyright (c) 2020-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups_short.asciidoc b/extensions/cl_intel_subgroups_short.asciidoc index 32df2692d..c9d75d3ac 100644 --- a/extensions/cl_intel_subgroups_short.asciidoc +++ b/extensions/cl_intel_subgroups_short.asciidoc @@ -29,7 +29,7 @@ Insoo Woo, Intel == Notice -Copyright (c) 2018-2023 Intel Corporation. All rights reserved. +Copyright (c) 2018-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_unified_shared_memory.asciidoc b/extensions/cl_intel_unified_shared_memory.asciidoc index 33381e294..c28d5303a 100644 --- a/extensions/cl_intel_unified_shared_memory.asciidoc +++ b/extensions/cl_intel_unified_shared_memory.asciidoc @@ -43,7 +43,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2021-2023 Intel Corporation. All rights reserved. +Copyright (c) 2021-2024 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_loader_info.asciidoc b/extensions/cl_loader_info.asciidoc index e9338afd9..3702583b7 100644 --- a/extensions/cl_loader_info.asciidoc +++ b/extensions/cl_loader_info.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ @@ -29,7 +29,7 @@ Brice Videau, Argonne National Laboratory == Notice -Copyright (c) 2023 The Khronos Group Inc. +Copyright (c) 2023-2024 The Khronos Group Inc. == Status diff --git a/extensions/cl_loader_layers.asciidoc b/extensions/cl_loader_layers.asciidoc index 405cde40c..c286706de 100644 --- a/extensions/cl_loader_layers.asciidoc +++ b/extensions/cl_loader_layers.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2023 The Khronos Group. This work is licensed under a +// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_pocl_content_size.asciidoc b/extensions/cl_pocl_content_size.asciidoc index 59d2edc26..613e58f14 100644 --- a/extensions/cl_pocl_content_size.asciidoc +++ b/extensions/cl_pocl_content_size.asciidoc @@ -31,7 +31,7 @@ Jan Solanti, Tampere University == Notice -Copyright (c) 2020-2023 Tampere University +Copyright (c) 2020-2024 Tampere University == Status diff --git a/extensions/extensions.txt b/extensions/extensions.txt index df0d0d5e9..acf843e84 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -1,4 +1,4 @@ -// Copyright 2018-2023 The Khronos Group. This work is licensed under a +// Copyright 2018-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/langext/acknowledgements.txt b/langext/acknowledgements.txt index ed44a42c9..9c7d379e4 100644 --- a/langext/acknowledgements.txt +++ b/langext/acknowledgements.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/langext/intro.txt b/langext/intro.txt index ee1e9e6aa..361ddbe4d 100644 --- a/langext/intro.txt +++ b/langext/intro.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/langext/variadic_macro.txt b/langext/variadic_macro.txt index d70ca426f..97da9739b 100644 --- a/langext/variadic_macro.txt +++ b/langext/variadic_macro.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2023 The Khronos Group. This work is licensed under a +// Copyright 2019-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/man/static/EXTENSION.txt b/man/static/EXTENSION.txt index 7717f1de7..b24743843 100644 --- a/man/static/EXTENSION.txt +++ b/man/static/EXTENSION.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/abstractDataTypes.txt b/man/static/abstractDataTypes.txt index 22db20e93..1bbc80324 100644 --- a/man/static/abstractDataTypes.txt +++ b/man/static/abstractDataTypes.txt @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The Khronos Group Inc. +// Copyright 2021-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateEventFromEGLSyncKHR.txt b/man/static/clCreateEventFromEGLSyncKHR.txt index 5ca992e16..f067fc3d2 100644 --- a/man/static/clCreateEventFromEGLSyncKHR.txt +++ b/man/static/clCreateEventFromEGLSyncKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateEventFromGLsyncKHR.txt b/man/static/clCreateEventFromGLsyncKHR.txt index fe940860a..1cbc618ae 100644 --- a/man/static/clCreateEventFromGLsyncKHR.txt +++ b/man/static/clCreateEventFromGLsyncKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromD3D10BufferKHR.txt b/man/static/clCreateFromD3D10BufferKHR.txt index c159c45d5..445c42aa3 100644 --- a/man/static/clCreateFromD3D10BufferKHR.txt +++ b/man/static/clCreateFromD3D10BufferKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromD3D10Texture2DKHR.txt b/man/static/clCreateFromD3D10Texture2DKHR.txt index 166628166..a8f420d81 100644 --- a/man/static/clCreateFromD3D10Texture2DKHR.txt +++ b/man/static/clCreateFromD3D10Texture2DKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromD3D10Texture3DKHR.txt b/man/static/clCreateFromD3D10Texture3DKHR.txt index 96573556a..d8f0ab1c2 100644 --- a/man/static/clCreateFromD3D10Texture3DKHR.txt +++ b/man/static/clCreateFromD3D10Texture3DKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromD3D11BufferKHR.txt b/man/static/clCreateFromD3D11BufferKHR.txt index d2d1f0520..e3a007aa3 100644 --- a/man/static/clCreateFromD3D11BufferKHR.txt +++ b/man/static/clCreateFromD3D11BufferKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromD3D11Texture2DKHR.txt b/man/static/clCreateFromD3D11Texture2DKHR.txt index 6752471d9..1ff783c0e 100644 --- a/man/static/clCreateFromD3D11Texture2DKHR.txt +++ b/man/static/clCreateFromD3D11Texture2DKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromD3D11Texture3DKHR.txt b/man/static/clCreateFromD3D11Texture3DKHR.txt index 67be0d944..ccbec7667 100644 --- a/man/static/clCreateFromD3D11Texture3DKHR.txt +++ b/man/static/clCreateFromD3D11Texture3DKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromDX9MediaSurfaceKHR.txt b/man/static/clCreateFromDX9MediaSurfaceKHR.txt index 84978b616..af3a7a4bd 100644 --- a/man/static/clCreateFromDX9MediaSurfaceKHR.txt +++ b/man/static/clCreateFromDX9MediaSurfaceKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromEGLImageKHR.txt b/man/static/clCreateFromEGLImageKHR.txt index 0cb3eb7f6..99e85f0dd 100644 --- a/man/static/clCreateFromEGLImageKHR.txt +++ b/man/static/clCreateFromEGLImageKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromGLBuffer.txt b/man/static/clCreateFromGLBuffer.txt index 9ab18e5d3..8518310ab 100644 --- a/man/static/clCreateFromGLBuffer.txt +++ b/man/static/clCreateFromGLBuffer.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromGLRenderbuffer.txt b/man/static/clCreateFromGLRenderbuffer.txt index 6dab6eeb9..a79e8e4dd 100644 --- a/man/static/clCreateFromGLRenderbuffer.txt +++ b/man/static/clCreateFromGLRenderbuffer.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clCreateFromGLTexture.txt b/man/static/clCreateFromGLTexture.txt index 223f83ac5..6cffe9407 100644 --- a/man/static/clCreateFromGLTexture.txt +++ b/man/static/clCreateFromGLTexture.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt b/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt index 3edd98f6d..946322777 100644 --- a/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt +++ b/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt b/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt index 86d85e75d..34822411e 100644 --- a/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt +++ b/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt b/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt index 3855667de..e2750b64c 100644 --- a/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt +++ b/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueAcquireEGLObjectsKHR.txt b/man/static/clEnqueueAcquireEGLObjectsKHR.txt index b267e957b..a2f84a60c 100644 --- a/man/static/clEnqueueAcquireEGLObjectsKHR.txt +++ b/man/static/clEnqueueAcquireEGLObjectsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueAcquireGLObjects.txt b/man/static/clEnqueueAcquireGLObjects.txt index 331c4c282..d989fd3de 100644 --- a/man/static/clEnqueueAcquireGLObjects.txt +++ b/man/static/clEnqueueAcquireGLObjects.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt b/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt index ab902f4dc..7be974afd 100644 --- a/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt +++ b/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt b/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt index e32cedcb9..fedb562e3 100644 --- a/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt +++ b/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt b/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt index 8f8e81918..c5648dada 100644 --- a/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt +++ b/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueReleaseEGLObjectsKHR.txt b/man/static/clEnqueueReleaseEGLObjectsKHR.txt index 66bdd3557..cddd6dcfa 100644 --- a/man/static/clEnqueueReleaseEGLObjectsKHR.txt +++ b/man/static/clEnqueueReleaseEGLObjectsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clEnqueueReleaseGLObjects.txt b/man/static/clEnqueueReleaseGLObjects.txt index 84a06e69b..8f6ee5dbd 100644 --- a/man/static/clEnqueueReleaseGLObjects.txt +++ b/man/static/clEnqueueReleaseGLObjects.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetDeviceIDsFromD3D10KHR.txt b/man/static/clGetDeviceIDsFromD3D10KHR.txt index 4e98f2873..80941348e 100644 --- a/man/static/clGetDeviceIDsFromD3D10KHR.txt +++ b/man/static/clGetDeviceIDsFromD3D10KHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetDeviceIDsFromD3D11KHR.txt b/man/static/clGetDeviceIDsFromD3D11KHR.txt index 74f334a4f..cd6e0f60d 100644 --- a/man/static/clGetDeviceIDsFromD3D11KHR.txt +++ b/man/static/clGetDeviceIDsFromD3D11KHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt b/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt index 10a426a54..58b265d30 100644 --- a/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt +++ b/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetExtensionFunctionAddressForPlatform.txt b/man/static/clGetExtensionFunctionAddressForPlatform.txt index ec41e87d9..0004c2a2d 100644 --- a/man/static/clGetExtensionFunctionAddressForPlatform.txt +++ b/man/static/clGetExtensionFunctionAddressForPlatform.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetGLContextInfoKHR.txt b/man/static/clGetGLContextInfoKHR.txt index 874f918d9..df2534e1d 100644 --- a/man/static/clGetGLContextInfoKHR.txt +++ b/man/static/clGetGLContextInfoKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetGLObjectInfo.txt b/man/static/clGetGLObjectInfo.txt index 2911c0586..1d7b7bbc3 100644 --- a/man/static/clGetGLObjectInfo.txt +++ b/man/static/clGetGLObjectInfo.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetGLTextureInfo.txt b/man/static/clGetGLTextureInfo.txt index f18502559..dffef2e65 100644 --- a/man/static/clGetGLTextureInfo.txt +++ b/man/static/clGetGLTextureInfo.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clIcdGetPlatformIDsKHR.txt b/man/static/clIcdGetPlatformIDsKHR.txt index 3bc34e24b..7c937659b 100644 --- a/man/static/clIcdGetPlatformIDsKHR.txt +++ b/man/static/clIcdGetPlatformIDsKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clTerminateContextKHR.txt b/man/static/clTerminateContextKHR.txt index b40cafae8..8b0abcbcd 100644 --- a/man/static/clTerminateContextKHR.txt +++ b/man/static/clTerminateContextKHR.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_3d_image_writes.txt b/man/static/cl_khr_3d_image_writes.txt index c9ef19a4d..474126f40 100644 --- a/man/static/cl_khr_3d_image_writes.txt +++ b/man/static/cl_khr_3d_image_writes.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_byte_addressable_store.txt b/man/static/cl_khr_byte_addressable_store.txt index 5aa97454d..9265f1aa4 100644 --- a/man/static/cl_khr_byte_addressable_store.txt +++ b/man/static/cl_khr_byte_addressable_store.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_d3d10_sharing.txt b/man/static/cl_khr_d3d10_sharing.txt index c2cfbea35..11ee4ced7 100644 --- a/man/static/cl_khr_d3d10_sharing.txt +++ b/man/static/cl_khr_d3d10_sharing.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_d3d11_sharing.txt b/man/static/cl_khr_d3d11_sharing.txt index 933287f88..edf6faede 100644 --- a/man/static/cl_khr_d3d11_sharing.txt +++ b/man/static/cl_khr_d3d11_sharing.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_depth_images.txt b/man/static/cl_khr_depth_images.txt index 9f5874894..e333c9600 100644 --- a/man/static/cl_khr_depth_images.txt +++ b/man/static/cl_khr_depth_images.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_device_enqueue_local_arg_types.txt b/man/static/cl_khr_device_enqueue_local_arg_types.txt index e83d192f2..88c465fd2 100644 --- a/man/static/cl_khr_device_enqueue_local_arg_types.txt +++ b/man/static/cl_khr_device_enqueue_local_arg_types.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_dx9_media_sharing.txt b/man/static/cl_khr_dx9_media_sharing.txt index 822b6f659..84f892362 100644 --- a/man/static/cl_khr_dx9_media_sharing.txt +++ b/man/static/cl_khr_dx9_media_sharing.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_egl_event.txt b/man/static/cl_khr_egl_event.txt index fbd984c98..e182c857a 100644 --- a/man/static/cl_khr_egl_event.txt +++ b/man/static/cl_khr_egl_event.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_egl_image.txt b/man/static/cl_khr_egl_image.txt index 26cc5f33c..d6609c90d 100644 --- a/man/static/cl_khr_egl_image.txt +++ b/man/static/cl_khr_egl_image.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_fp16.txt b/man/static/cl_khr_fp16.txt index f000476a2..fbc273ce2 100644 --- a/man/static/cl_khr_fp16.txt +++ b/man/static/cl_khr_fp16.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_fp64.txt b/man/static/cl_khr_fp64.txt index b40162ec7..3d567e34e 100644 --- a/man/static/cl_khr_fp64.txt +++ b/man/static/cl_khr_fp64.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_gl_depth_images.txt b/man/static/cl_khr_gl_depth_images.txt index 59073ebf8..6b8486f09 100644 --- a/man/static/cl_khr_gl_depth_images.txt +++ b/man/static/cl_khr_gl_depth_images.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_gl_event.txt b/man/static/cl_khr_gl_event.txt index 93d39c83f..8e5e37a80 100644 --- a/man/static/cl_khr_gl_event.txt +++ b/man/static/cl_khr_gl_event.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_gl_msaa_sharing.txt b/man/static/cl_khr_gl_msaa_sharing.txt index 14d496a6b..e87bb2f87 100644 --- a/man/static/cl_khr_gl_msaa_sharing.txt +++ b/man/static/cl_khr_gl_msaa_sharing.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_gl_sharing.txt b/man/static/cl_khr_gl_sharing.txt index 5663ef162..3525731ff 100644 --- a/man/static/cl_khr_gl_sharing.txt +++ b/man/static/cl_khr_gl_sharing.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_global_int32_base_atomics.txt b/man/static/cl_khr_global_int32_base_atomics.txt index 8a7c6f870..81a45a93c 100644 --- a/man/static/cl_khr_global_int32_base_atomics.txt +++ b/man/static/cl_khr_global_int32_base_atomics.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_global_int32_extended_atomics.txt b/man/static/cl_khr_global_int32_extended_atomics.txt index 4fdddd9db..e4ce63f1a 100644 --- a/man/static/cl_khr_global_int32_extended_atomics.txt +++ b/man/static/cl_khr_global_int32_extended_atomics.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_icd.txt b/man/static/cl_khr_icd.txt index b2f8790ea..df5349f2d 100644 --- a/man/static/cl_khr_icd.txt +++ b/man/static/cl_khr_icd.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_il_program.txt b/man/static/cl_khr_il_program.txt index feba40726..96c03dfe1 100644 --- a/man/static/cl_khr_il_program.txt +++ b/man/static/cl_khr_il_program.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_image2d_from_buffer.txt b/man/static/cl_khr_image2d_from_buffer.txt index 9543c6e71..2c15ac8f5 100644 --- a/man/static/cl_khr_image2d_from_buffer.txt +++ b/man/static/cl_khr_image2d_from_buffer.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_initialize_memory.txt b/man/static/cl_khr_initialize_memory.txt index d808d0779..3b9a900cf 100644 --- a/man/static/cl_khr_initialize_memory.txt +++ b/man/static/cl_khr_initialize_memory.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_int64_base_atomics.txt b/man/static/cl_khr_int64_base_atomics.txt index ba9448f19..8c448ce9b 100644 --- a/man/static/cl_khr_int64_base_atomics.txt +++ b/man/static/cl_khr_int64_base_atomics.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_int64_extended_atomics.txt b/man/static/cl_khr_int64_extended_atomics.txt index 88e0f6d61..e3dccdde7 100644 --- a/man/static/cl_khr_int64_extended_atomics.txt +++ b/man/static/cl_khr_int64_extended_atomics.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_local_int32_base_atomics.txt b/man/static/cl_khr_local_int32_base_atomics.txt index 587444673..7a18190bf 100644 --- a/man/static/cl_khr_local_int32_base_atomics.txt +++ b/man/static/cl_khr_local_int32_base_atomics.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_local_int32_extended_atomics.txt b/man/static/cl_khr_local_int32_extended_atomics.txt index 5abbf1879..dec50fa86 100644 --- a/man/static/cl_khr_local_int32_extended_atomics.txt +++ b/man/static/cl_khr_local_int32_extended_atomics.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_mipmap_image.txt b/man/static/cl_khr_mipmap_image.txt index a77f529f9..67058baa4 100644 --- a/man/static/cl_khr_mipmap_image.txt +++ b/man/static/cl_khr_mipmap_image.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_priority_hints.txt b/man/static/cl_khr_priority_hints.txt index 0ea6a76c4..776ad08f0 100644 --- a/man/static/cl_khr_priority_hints.txt +++ b/man/static/cl_khr_priority_hints.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_spir.txt b/man/static/cl_khr_spir.txt index ae177c0a0..eb4d7f840 100644 --- a/man/static/cl_khr_spir.txt +++ b/man/static/cl_khr_spir.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_srgb_image_writes.txt b/man/static/cl_khr_srgb_image_writes.txt index fd386abf2..e567116dc 100644 --- a/man/static/cl_khr_srgb_image_writes.txt +++ b/man/static/cl_khr_srgb_image_writes.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_subgroups.txt b/man/static/cl_khr_subgroups.txt index ea8775b78..64a9075f6 100644 --- a/man/static/cl_khr_subgroups.txt +++ b/man/static/cl_khr_subgroups.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_terminate_context.txt b/man/static/cl_khr_terminate_context.txt index 59654afe3..d20eec864 100644 --- a/man/static/cl_khr_terminate_context.txt +++ b/man/static/cl_khr_terminate_context.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/cl_khr_throttle_hints.txt b/man/static/cl_khr_throttle_hints.txt index 6e19f8a35..8463c503a 100644 --- a/man/static/cl_khr_throttle_hints.txt +++ b/man/static/cl_khr_throttle_hints.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/convert_T.txt b/man/static/convert_T.txt index 97d86f223..4fd29230e 100644 --- a/man/static/convert_T.txt +++ b/man/static/convert_T.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/deadLinks.txt b/man/static/deadLinks.txt index c799d90c8..38a3d92e3 100644 --- a/man/static/deadLinks.txt +++ b/man/static/deadLinks.txt @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The Khronos Group Inc. +// Copyright 2021-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/enums.txt b/man/static/enums.txt index 884db11ba..f9778cfb2 100644 --- a/man/static/enums.txt +++ b/man/static/enums.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2023 The Khronos Group Inc. +// Copyright 2014-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/footer.txt b/man/static/footer.txt index f65ba1ffa..d40bf0d75 100644 --- a/man/static/footer.txt +++ b/man/static/footer.txt @@ -1,4 +1,4 @@ -// Copyright 2016-2023 The Khronos Group Inc. +// Copyright 2016-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 ifdef::doctype-manpage[] diff --git a/man/static/intro.txt b/man/static/intro.txt index 516a5519e..cbfd26dea 100644 --- a/man/static/intro.txt +++ b/man/static/intro.txt @@ -1,4 +1,4 @@ -// Copyright 2007-2023 The Khronos Group Inc. +// Copyright 2007-2024 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/scripts/cgenerator.py b/scripts/cgenerator.py index 4b2a8f1e2..d4cab2b0b 100644 --- a/scripts/cgenerator.py +++ b/scripts/cgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/checklinks.py b/scripts/checklinks.py index 94b650dad..3b8ee644c 100755 --- a/scripts/checklinks.py +++ b/scripts/checklinks.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 import argparse diff --git a/scripts/clconventions.py b/scripts/clconventions.py index 5b849892b..dc4d95fe8 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # Working-group-specific style conventions, diff --git a/scripts/conventions.py b/scripts/conventions.py index 6b6b23d14..34fa2ea52 100644 --- a/scripts/conventions.py +++ b/scripts/conventions.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index ea4339f10..4ebabd870 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index d6243889d..957cb5a49 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/genRef.py b/scripts/genRef.py index 87d8d7a60..ed7f2580c 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # -# Copyright 2016-2023 The Khronos Group Inc. +# Copyright 2016-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 @@ -51,7 +51,7 @@ def printCopyrightSourceComments(fp): Writes an asciidoc comment block, which copyrights the source file.""" - print('// Copyright 2014-2023 The Khronos Group, Inc.', file=fp) + print('// Copyright 2014-2024 The Khronos Group, Inc.', file=fp) print('//', file=fp) # This works around constraints of the 'reuse' tool print('// SPDX' + '-License-Identifier: CC-BY-4.0', file=fp) diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 069800dbb..4d750876a 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2019-2023 The Khronos Group Inc. +# Copyright 2019-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 from collections import OrderedDict @@ -19,7 +19,7 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2023 The Khronos Group. This work is licensed under a + return """// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index 030d9f948..5843fe1da 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2019-2023 The Khronos Group Inc. +# Copyright 2019-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 from collections import OrderedDict @@ -23,7 +23,7 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2023 The Khronos Group. This work is licensed under a + return """// Copyright 2017-2024 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ """ diff --git a/scripts/gencl.py b/scripts/gencl.py index eb77a8cdf..df7c74765 100755 --- a/scripts/gencl.py +++ b/scripts/gencl.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 @@ -109,7 +109,7 @@ def makeGenOpts(args): # The SPDX formatting below works around constraints of the 'reuse' tool prefixStrings = [ '/*', - '** Copyright 2015-2023 The Khronos Group Inc.', + '** Copyright 2015-2024 The Khronos Group Inc.', '**', '** SPDX' + '-License-Identifier: Apache-2.0', '*/', diff --git a/scripts/generator.py b/scripts/generator.py index c7c460d95..a5b648b98 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 """Base class for source/header/doc generators, as well as some utility functions.""" diff --git a/scripts/pygenerator.py b/scripts/pygenerator.py index b2e76e66d..da8cd7d69 100644 --- a/scripts/pygenerator.py +++ b/scripts/pygenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/realign.py b/scripts/realign.py index b59865b3d..495cb74ba 100755 --- a/scripts/realign.py +++ b/scripts/realign.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # Usage: realign [infile] > outfile diff --git a/scripts/reflib.py b/scripts/reflib.py index 426a1811b..535683ae8 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # -# Copyright 2016-2023 The Khronos Group Inc. +# Copyright 2016-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/reg.py b/scripts/reg.py index d78ecde89..1b1173e78 100755 --- a/scripts/reg.py +++ b/scripts/reg.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -i # -# Copyright 2013-2023 The Khronos Group Inc. +# Copyright 2013-2024 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/spec_tools/util.py b/scripts/spec_tools/util.py index 4e42e239d..3dde0bd42 100644 --- a/scripts/spec_tools/util.py +++ b/scripts/spec_tools/util.py @@ -1,6 +1,6 @@ """Utility functions not closely tied to other spec_tools types.""" # Copyright (c) 2018-2019 Collabora, Ltd. -# Copyright (c) 2013-2023 The Khronos Group Inc. +# Copyright (c) 2013-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/xml/Makefile b/xml/Makefile index 0c4e7345e..28df30091 100644 --- a/xml/Makefile +++ b/xml/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2023 The Khronos Group Inc. +# Copyright (c) 2013-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/xml/cl.xml b/xml/cl.xml index 2a478c364..3691b12ab 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1,7 +1,7 @@ -Copyright 2013-2023 The Khronos Group Inc. +Copyright 2013-2024 The Khronos Group Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/xml/registry.rnc b/xml/registry.rnc index d2055e79f..f29ba8d5b 100644 --- a/xml/registry.rnc +++ b/xml/registry.rnc @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2023 The Khronos Group Inc. +# Copyright (c) 2013-2024 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 8ca1abc62acd0924b24628537d6d7c8c07a0134f Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 8 Jan 2024 08:36:47 -0800 Subject: [PATCH 051/190] add missing curly brace for asciidoctor attribute (#1038) --- api/opencl_runtime_layer.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 3c7fa3123..da5612e1b 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -366,7 +366,7 @@ include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES_ARRAY.asciidoc[] additional properties. If _command_queue_ was created using {clCreateCommandQueue}, or if the - _properties_ argument specified in clCreateCommandQueueWithProperties} + _properties_ argument specified in {clCreateCommandQueueWithProperties} was `NULL`, the implementation must return _param_value_size_ret_ equal to 0, indicating that there are no properties to be returned. From 6a7874a368326070213cede6f044c1b6d44684a3 Mon Sep 17 00:00:00 2001 From: paulfradgley <39525348+paulfradgley@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:36:31 +0000 Subject: [PATCH 052/190] Add CL_CANCELLED_IMG error code for cl_img_cancel_command extension. (#1042) * Remove unnecessary RESERVED_IMG enums and add CL_CANCELLED_IMG error code for cl_img_cancel_commands * Re-add RESERVED enum names, and change CL_CANCELLED_IMG enum value to -1126 --- xml/cl.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 3691b12ab..34c0fd6d6 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -674,7 +674,8 @@ server's OpenCL/api-docs repository. - + + From 4e2460f27d1b5422ab7735fb54ebc1f75ff67389 Mon Sep 17 00:00:00 2001 From: paulfradgley <39525348+paulfradgley@users.noreply.github.com> Date: Fri, 26 Jan 2024 19:11:11 +0000 Subject: [PATCH 053/190] Upload cl_img_cancel_command asciidoc specification (#1046) * Upload cl_img_cancel_command asciidoc specification * Fix copyright message --- extensions/cl_img_cancel_command.asciidoc | 96 +++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 extensions/cl_img_cancel_command.asciidoc diff --git a/extensions/cl_img_cancel_command.asciidoc b/extensions/cl_img_cancel_command.asciidoc new file mode 100644 index 000000000..9b9599268 --- /dev/null +++ b/extensions/cl_img_cancel_command.asciidoc @@ -0,0 +1,96 @@ + += cl_img_cancel_command + +== Name Strings + +`cl_img_cancel_command` + +== Version History + +[cols="1,1,3",options="header",] +|==== +| *Date* | *Version* | *Description* +| 2023-07-05 | 1.0.0 | Initial revision. +|==== + +== Contacts + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Paul Fradgley, Imagination Technologies (paul.fradgley 'at' imgtec.com) + +== Contributors + +Paul Fradgley, Imagination Technologies. + +== Notice + +Copyright (c) 2023-2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Shipping. + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +Requires OpenCL version 3.0 or later. + +This extension is written against the wording of the OpenCL 3.0 Specification. + +== Overview + +This extension adds the functionality to instruct the OpenCL implementation that an incomplete OpenCL command no longer needs to be executed. + +== New API Functions + +[source] +---- +cl_int clCancelCommandsIMG( + const cl_event *event_list, + size_t num_events_in_list) +---- + +== New API Enums + +[source,opencl] +---- +CL_CANCELLED_IMG -1126 +---- + +== Modifications to the OpenCL API Specification + +(Add Section 5.16, *Cancelling Queued Commands*) :: ++ + +The function + +[source] +---- +cl_int clCancelCommandsIMG( + size_t num_events_in_list, + const cl_event *event_list); +---- +is used to inform the OpenCL implementation that a list of commands that were previously enqueued are no longer required. + +Any commands belonging to events in the _event_list_ that are in the `CL_QUEUED` state will not be executed. These events will be set to the `CL_CANCELLED_IMG` state. + +Any commands belonging to events in the _event_list_ that are in the `CL_SUBMITTED` might not be executed. These events will be set to the `CL_CANCELLED_IMG` state. + +Any commands belonging to events in the _event_list_ that are in the `CL_RUNNING`, `CL_COMPLETE` or an error state will not be affected. + +Any other command in the `CL_QUEUED` state that has a `CL_CANCELLED_IMG` event in its event_wait_list will not be executed. The events belonging to these commands will also be set to the `CL_CANCELLED_IMG` state. + +_event_list_ and _num_events_in_list_ specify events that belong to commands that no longer need to be executed. +If _event_list_ is `NULL`, _num_events_in_list_ must be 0. +If _event_list_ is not `NULL`, the list of events pointed to by _event_list_ must be valid and _num_events_in_list_ must be greater than 0. + +*clCancelCommandsIMG* returns `CL_SUCCESS` if the function is executed successfully, otherwise it returns one of the following errors: + +* `CL_INVALID_VALUE` if _event_list_ is `NULL` and _num_events_in_list_ is greater than 0. +* `CL_INVALID_VALUE` if _event_list_ is not `NULL` and _num_events_in_list_ is 0. +* `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. +* `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. + +== Issues From b349171c91e8748c50a297350a2b796348ae8c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Mon, 29 Jan 2024 16:51:25 +0000 Subject: [PATCH 054/190] Fix unterminated table in IMG extension (#1051) Change-Id: Ib3e14b09c052930da79c74b2e09654c89df93d45 Signed-off-by: Kevin Petit --- extensions/cl_img_mem_properties.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/cl_img_mem_properties.asciidoc b/extensions/cl_img_mem_properties.asciidoc index 94f015945..2e3b4bb37 100644 --- a/extensions/cl_img_mem_properties.asciidoc +++ b/extensions/cl_img_mem_properties.asciidoc @@ -128,6 +128,7 @@ Add Table: List of supported param name by *clGetDeviceInfo* :: | `CL_DEVICE_MEMORY_CAPABILITIES_IMG` | `cl_mem_alloc_flags_img` | Allocation flags describing the memory region capabilities by the device. +|==== -- == Revision History From 0e4680022943bee2dbdd9dbccffed0f9ed8754f7 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 30 Jan 2024 09:26:21 -0800 Subject: [PATCH 055/190] clarify that 16-bit and 64-bit floats can be passed as kernel arguments (#1049) --- env/common_properties.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index ce5acf67f..271d50b9c 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -370,7 +370,7 @@ Allowed types for OpenCL kernel arguments are: For *OpTypeInt* parameters, supported _Widths_ are 8, 16, 32, and 64, and must have no signedness semantics. -For *OpTypeFloat* parameters, _Width_ must be 32. +For *OpTypeFloat* parameters, supported _Width_ are 16, 32, and 64. For *OpTypeStruct* parameters, supported structure _Member Types_ are: From 6a17a4f48799740f75b7c4e5752c24a244c418ce Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 30 Jan 2024 09:27:15 -0800 Subject: [PATCH 056/190] fixes derived formula for atanh (#1048) Asciidoctor treats words surrounded by double parentheses as index terms, so we need to escape the first parenthesis so the derived formula for atanh is properly preserved. --- OpenCL_C.txt | 2 +- env/numerical_compliance.asciidoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 2bdf6887e..d016927e2 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -11677,7 +11677,7 @@ requires>> support for OpenCL C 2.0 or newer. | Defined for _x_ in the domain (-1, 1). For _x_ in [-2^-10^, 2^-10^], derived implementations may implement as _x_. For _x_ outside of [-2^-10^, 2^-10^], derived implementations may implement as - 0.5f * *log*((1.0f + _x_) / (1.0f - _x_)). + 0.5f * *log*\((1.0f + _x_) / (1.0f - _x_)). For non-derived implementations, the error is {leq} 8192 ulp. | *atanpi*(_x_) diff --git a/env/numerical_compliance.asciidoc b/env/numerical_compliance.asciidoc index 994475e46..ee339a948 100644 --- a/env/numerical_compliance.asciidoc +++ b/env/numerical_compliance.asciidoc @@ -1443,7 +1443,7 @@ profile. | Defined for _x_ in the domain (-1, 1). For _x_ in [-2^-10^, 2^-10^], derived implementations may implement as _x_. For _x_ outside of [-2^-10^, 2^-10^], derived implementations may implement as - 0.5f * *log*((1.0f + _x_) / (1.0f - _x_)). + 0.5f * *log*\((1.0f + _x_) / (1.0f - _x_)). For non-derived implementations, the error is {leq} 8192 ulp. | *OpExtInst* *atanpi* From 75cb453337f807c45487fdd95310789569c7fddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Fri, 2 Feb 2024 23:32:04 +0000 Subject: [PATCH 057/190] Remove redundant error condition in cl_khr_semaphore (#1052) * Remove redundant error condition in cl_khr_semaphore This case is already (better) covered by the conditions for CL_INVALID_CONTEXT. Signed-off-by: Kevin Petit Change-Id: Ibb22aaba04772042e84464487b3528305c0e2809 * remote stray or's Change-Id: I78918127b4818236b4cdf3ed569d1dfef7eace8c --------- Signed-off-by: Kevin Petit --- ext/cl_khr_semaphore.asciidoc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc index ff885700c..95c41522a 100644 --- a/ext/cl_khr_semaphore.asciidoc +++ b/ext/cl_khr_semaphore.asciidoc @@ -60,6 +60,7 @@ Carsten Rohde, NVIDIA + Christoph Kubisch, NVIDIA + Debalina Bhattacharjee, NVIDIA + Faith Ekstrand, INTEL + +Gorazd Sumkovski, ARM + James Jones, NVIDIA + Jeremy Kemp, IMAGINATION + Joshua Kelly, QUALCOMM + @@ -309,8 +310,7 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_QUEUE} ** if _command_queue_ is not a valid command-queue, or -** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_, or -** if one or more of _sema_objects_ belong to a context that does not contain a device associated with _command_queue_. +** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_. * {CL_INVALID_VALUE} if _num_sema_objects_ is 0. * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and any of the semaphore objects in _sema_objects_ are not the same or if the context associated with _command_queue_ and that associated with events in _event_wait_list_ are not the same. @@ -360,8 +360,7 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_QUEUE} ** if _command_queue_ is not a valid command-queue, or -** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_, or -** if one or more of _sema_objects_ belong to a context that does not contain a device associated with _command_queue_. +** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_. * {CL_INVALID_VALUE} if _num_sema_objects_ is 0 * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and any of the semaphore objects in _sema_objects_ are not the same or if the context associated with _command_queue_ and that associated with events in _event_wait_list_ are not the same. From 750bfe4c3b521f3b16abb0dc9bcc4ef93d22c8e5 Mon Sep 17 00:00:00 2001 From: Sun Serega Date: Thu, 15 Feb 2024 17:53:20 +0200 Subject: [PATCH 058/190] Fix `cl_khr_command_buffer_mutable_dispatch` extension (#1059) * fix found at the end of https://github.com/KhronosGroup/OpenCL-Docs/pull/992 * Update xml/cl.xml Co-authored-by: Ben Ashbaugh --------- Co-authored-by: Ben Ashbaugh --- ext/cl_khr_command_buffer_mutable_dispatch.asciidoc | 6 +++--- xml/cl.xml | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index 642fb0540..c5ab15504 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -337,7 +337,7 @@ description of property values. |==== -Add a {CL_COMMAND_BUFFER_ASSERTS_KHR} property to the +Add a {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property to the <> table. [cols=",,",options="header",] @@ -346,7 +346,7 @@ Add a {CL_COMMAND_BUFFER_ASSERTS_KHR} property to the | *Property Value* | *Description* -| {CL_COMMAND_BUFFER_ASSERTS_KHR} +| {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} | {cl_mutable_dispatch_asserts_khr_TYPE} | This is a bitfield and can be set to a combination of the following values: @@ -361,7 +361,7 @@ Add a {CL_COMMAND_BUFFER_ASSERTS_KHR} property to the ===== Additional Errors -* {CL_INVALID_VALUE} if _properties_ has a {CL_COMMAND_BUFFER_ASSERTS_KHR} property with +* {CL_INVALID_VALUE} if _properties_ has a {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, but _local_work_size_ is `NULL`. ==== Modifications to clCommandNDRangeKernelKHR diff --git a/xml/cl.xml b/xml/cl.xml index 34c0fd6d6..1f543939a 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7183,7 +7183,6 @@ server's OpenCL/api-docs repository. - @@ -7352,7 +7351,7 @@ server's OpenCL/api-docs repository. - + From df459c0a7ce98ccd3327083301c7e85c4b110cfe Mon Sep 17 00:00:00 2001 From: Sun Serega Date: Tue, 27 Feb 2024 02:49:05 +0100 Subject: [PATCH 059/190] Add `cl_img_cancel_command` extension to XML (#1056) * Add `cl_img_cancel_command` to XML * also require the command --- xml/cl.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xml/cl.xml b/xml/cl.xml index 1f543939a..44aac6105 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -4305,6 +4305,11 @@ server's OpenCL/api-docs repository. void* param_value size_t* param_value_size_ret + + cl_int clCancelCommandsIMG + const cl_event* event_list + size_t num_events_in_list + @@ -7410,5 +7415,13 @@ server's OpenCL/api-docs repository. + + + + + + + + From 60069a1c2ead9f4d7c23bbb0097d5b1831e732de Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 27 Feb 2024 07:25:04 -0800 Subject: [PATCH 060/190] add USM host mem alloc size issue (#1058) * add USM host mem alloc size issue * fix asciidoc list markup --- .../cl_intel_unified_shared_memory.asciidoc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/extensions/cl_intel_unified_shared_memory.asciidoc b/extensions/cl_intel_unified_shared_memory.asciidoc index c28d5303a..934dbafa8 100644 --- a/extensions/cl_intel_unified_shared_memory.asciidoc +++ b/extensions/cl_intel_unified_shared_memory.asciidoc @@ -1260,6 +1260,24 @@ This would be a fairly straightforward addition if it is useful. Note that there is no similar SVM "rect" memcpy. -- +. Should there be an upper limit on the size of an allocation using *clHostMemAllocINTEL*? +If so, what should the upper limit be? ++ +-- +*UNRESOLVED*: +The upper limit is currently defined by `CL_DEVICE_MAX_MEM_ALLOC_SIZE` and if the allocation size exceeds this value then *clHostMemAllocINTEL* returns `CL_INVALID_BUFFER_SIZE`. + +This behavior is consistent with *clSVMAlloc* (although *clSVMAlloc* does not return an error code it is specified to return a `NULL` pointer in this case) and *clCreateBuffer*. +However, because *clHostMemAllocINTEL* is intended to allocate host memory, some implementations are able to support larger allocation sizes using *clHostMemAllocINTEL*. + +Possible resolutions: + +* Add a new query representing the maximum host memory allocation size supported by the device, e.g. `CL_DEVICE_MAX_HOST_MEM_ALLOC_SIZE_INTEL`. +For some devices, this query will return the same value as `CL_DEVICE_MAX_MEM_ALLOC_SIZE`, but for other devices this query will return a larger value. +* Relax the error behavior so implementations may return `CL_INVALID_BUFFER_SIZE`, but they would not be required to return an error if they support larger allocation sizes. +* Do nothing and keep the existing error behavior. +-- + == Revision History [cols="5,15,15,70"] From 29e7d9b39641d8d0d41a25323caa953883c5a990 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 27 Feb 2024 08:23:38 -0800 Subject: [PATCH 061/190] clarify conditions for CL_INVALID_PLATFORM (#1063) Clarifies the conditions when clCreateContext and clCreateContextFromType should return CL_INVALID_PLATFORM. --- api/opencl_platform_layer.asciidoc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 6ab8b4779..3aeba7375 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1894,9 +1894,9 @@ to {CL_SUCCESS} if the context is created successfully. Otherwise, it returns a `NULL` value with the following error values returned in _errcode_ret_: - * {CL_INVALID_PLATFORM} if _properties_ is `NULL` and no platform could be - selected or if platform value specified in _properties_ is not a valid - platform. + * {CL_INVALID_PLATFORM} if no platform is specified in _properties_ and no + platform could be selected, or if the platform specified in _properties_ is + not a valid platform. * {CL_INVALID_PROPERTY} if context property name in _properties_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than @@ -1962,9 +1962,9 @@ is set to {CL_SUCCESS} if the context is created successfully. Otherwise, it returns a `NULL` value with the following error values returned in _errcode_ret_: - * {CL_INVALID_PLATFORM} if _properties_ is `NULL` and no platform could be - selected or if platform value specified in _properties_ is not a valid - platform. + * {CL_INVALID_PLATFORM} if no platform is specified in _properties_ and no + platform could be selected, or if the platform specified in _properties_ is + not a valid platform. * {CL_INVALID_PROPERTY} if context property name in _properties_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than From 8dd7a6c2277ecd173b3acf3366538eda0b75f757 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 27 Feb 2024 08:24:40 -0800 Subject: [PATCH 062/190] fix a few notes in the spec for consistency (#1070) --- OpenCL_C.txt | 4 ++-- api/embedded_profile.asciidoc | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index d016927e2..2eb53ea47 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -51,11 +51,11 @@ include::copyrights.txt[] = The OpenCL C Programming Language [NOTE] --- +==== This document starts at chapter 6 to keep the section numbers historically consistent with previous versions of the OpenCL and OpenCL C Programming Language specifications. --- +==== This section describes the OpenCL C programming language. The OpenCL C programming language may be used to write kernels that execute diff --git a/api/embedded_profile.asciidoc b/api/embedded_profile.asciidoc index e80243c8e..cc53176c3 100644 --- a/api/embedded_profile.asciidoc +++ b/api/embedded_profile.asciidoc @@ -81,7 +81,6 @@ Edge case behavior and accuracy rules are described in the OpenCL C and OpenCL SPIR-V Environment specifications. [NOTE] -.Note ==== If addition, subtraction and multiplication have default round to zero rounding mode, then *fract*, *fma* and *fdim* shall produce the correctly From ce6f8a42c83f2e59058e0ef8ae3b478a769ecdf5 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 29 Feb 2024 17:10:07 -0800 Subject: [PATCH 063/190] clarify clSetEventCallback behavior for command errors (#1071) The callback registered for CL_COMPLETE will be called when the command completes successfully or when the command is abnormally terminated. This behavior used to be documented in a footnote but it is belongs in the main spec text instead. --- api/footnotes.asciidoc | 4 ---- api/opencl_runtime_layer.asciidoc | 35 +++++++++++++++++-------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index 4af8a24c9..66da48e80 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -43,10 +43,6 @@ Either of these choices would mean that no big swap would need to occur in hardw The OpenCL specification does not describe the order of precedence for error codes returned by API calls. \ ] -:fn-event-callback-complete: pass:n[ \ -The callback function registered for a _command_exec_callback_type_ value of {CL_COMPLETE} will be called when the command has completed successfully or is abnormally terminated. \ -] - :fn-event-status-order: pass:n[ \ The error code values are negative, and event state values are positive. \ The event state values are ordered from the largest value {CL_QUEUED} for the first or initial state to the smallest value ({CL_COMPLETE} or negative integer value) for the last or complete state. \ diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index da5612e1b..18902d1b0 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -8852,17 +8852,11 @@ include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] * _event_ is a valid event object. * _command_exec_callback_type_ specifies the command execution status for which the callback is registered. - The command execution callback values for which a callback can be registered - are: {CL_SUBMITTED}, {CL_RUNNING}, or - {CL_COMPLETE} footnote:[{fn-event-callback-complete}]. - There is no guarantee that the callback functions registered for various - execution status values for an event will be called in the exact order that - the execution status of a command changes. - Furthermore, it should be noted that receiving a call back for an event with - a status other than {CL_COMPLETE}, in no way implies that the memory model or - execution model as defined by the OpenCL specification has changed. - For example, it is not valid to assume that a corresponding memory transfer - has completed unless the event is in a state {CL_COMPLETE}. + The command execution status types for which a callback can be registered + are {CL_SUBMITTED}, {CL_RUNNING}, or {CL_COMPLETE}. + The callback function registered for a _command_exec_callback_type_ value of + {CL_COMPLETE} will be called when the command has completed successfully or + is abnormally terminated. * _pfn_event_notify_ is the event callback function that can be registered by the application. This callback function may be called asynchronously by the OpenCL @@ -8884,19 +8878,28 @@ include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] called. _user_data_ can be `NULL`. -The registered callback function will be called when the execution status of -command associated with _event_ changes to an execution status equal to or -past the status specified by _command_exec_status_. - Each call to {clSetEventCallback} registers the specified user callback function on a callback stack associated with _event_. The order in which the registered user callback functions are called is undefined. +The registered callback function will be called when the execution status of the +command associated with _event_ changes to an execution status equal to or past +the status specified by _command_exec_status_, or for the execution status +{CL_COMPLETE}, if the command is abnormally terminated. +There is no guarantee that the callback functions registered for various command +execution status values for an event will be called in the exact order that the +execution status of a command changes. +Furthermore, it should be noted that calling a callback for an event execution +status other than {CL_COMPLETE} in no way implies that the memory model or +execution model as defined by the OpenCL specification has changed. For example, +it is not valid to assume that a corresponding memory transfer has completed +unless the event is in the state {CL_COMPLETE}. + All callbacks registered for an event object must be called before the event object is destroyed. -Callbacks should return promptly. +Callbacks should return promptly. Behavior is undefined when calling expensive system routines, OpenCL APIs to create contexts or command-queues, or blocking OpenCL APIs in an event callback. Rather than calling a blocking OpenCL API in an event callback, applications From b6e245700a52cddeacb658dee85b45e8d983cca5 Mon Sep 17 00:00:00 2001 From: Yuki K Date: Fri, 1 Mar 2024 10:17:57 +0900 Subject: [PATCH 064/190] Fix markups of parameter names ``obj`` and ``value`` (#1054) In other part of the document, parameter names are enclosed in ``_``, so these parameters also should be marked up the same way. --- OpenCL_C.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 2eb53ea47..eace43b92 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -6629,7 +6629,7 @@ operation, constitutes a data-race. -- The `atomic_init` function non-atomically initializes the atomic object -pointed to by obj to the value value. +pointed to by _obj_ to the value _value_. [source,opencl_c] ---------- From 1b80a478e809d1597123886c19ca2875a5b07f9e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 3 Mar 2024 08:29:47 -0800 Subject: [PATCH 065/190] fix a few more broken refpage links for CL_VERSION_X_Y macros (#1074) --- OpenCL_C.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index eace43b92..ab33bdf11 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -3405,7 +3405,7 @@ supported with `{global}` address space qualifier. [[preprocessor-directives-and-macros]] == Preprocessor Directives and Macros -[open,refpage='preprocessorDirectives',desc='Preprocessor Directives and Macros',type='freeform',spec='clang',anchor='preprocessor-directives-and-macros',xrefs='clBuildProgram mathConstants EXTENSION FP_CONTRACT',alias='CL_VERSION_1_0 CL_VERSION_1_1 CL_VERSION_1_2'] +[open,refpage='preprocessorDirectives',desc='Preprocessor Directives and Macros',type='freeform',spec='clang',anchor='preprocessor-directives-and-macros',xrefs='clBuildProgram mathConstants EXTENSION FP_CONTRACT',alias='CL_VERSION_1_0 CL_VERSION_1_1 CL_VERSION_1_2 CL_VERSION_2_0 CL_VERSION_2_1 CL_VERSION_2_2 CL_VERSION_3_0'] -- The preprocessing directives defined by the C99 specification are supported. From 32522cb32e9cae64430ef0e6c9cf7ae5643db857 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 3 Mar 2024 08:53:13 -0800 Subject: [PATCH 066/190] fix a few minor issues in the extensions spec (#1062) * fix unterminated open block * fix missing word in assume linear images enum * tidy up mutable dispatch extension Use consistent comment style in new enum description Use asciidoctor attribute for mutable dispatch asserts type Rearrange enums slightly to group by use * move the description of the no additional work-groups error condition This error condition is returned by clCommandNDRangeKernelKHR, not clCreateCommandBufferKHR. --- ...r_command_buffer_mutable_dispatch.asciidoc | 35 ++++++++++--------- ext/cl_khr_external_memory.asciidoc | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc index c5ab15504..b189c9c88 100644 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -20,7 +20,7 @@ commands between command-buffer enqueues. |==== | *Date* | *Version* | *Description* | 2022-08-31 | 0.9.0 | First assigned version (provisional). -| 2023-11-07 | 0.9.1 | Add type cl_mutable_dispatch_asserts_khr and its possible values (provisional). +| 2023-11-07 | 0.9.1 | Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values (provisional). |==== include::provisional_notice.asciidoc[] @@ -105,7 +105,7 @@ typedef cl_uint cl_mutable_command_info_khr; // Identifies the type of a structure to allow structure pointer chains typedef cl_uint cl_command_buffer_structure_type_khr; -// Bitfield covering certain asserts by the user to the implementation, enabling possible optimizations +// Bitfield describing mutable-dispatch assertions, enabling possible optimizations typedef cl_bitfield cl_mutable_dispatch_asserts_khr; ---- @@ -237,10 +237,13 @@ CL_INVALID_MUTABLE_COMMAND_KHR -1141 // Accepted values for the param_name parameter to clGetDeviceInfo CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 -/* cl_command_buffer_properties_khr */ +// Accepted command buffer property to clCreateCommandBufferKHR CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 -// Property to cl_ndrange_kernel_command_properties_khr +// Bits for cl_command_buffer_flags_khr +CL_COMMAND_BUFFER_MUTABLE_KHR (0x1 << 1) + +// Accepted ND-range kernel command properties to clCommandNDRangeKernelKHR CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B8 @@ -251,6 +254,9 @@ CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (0x1 << 2) CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (0x1 << 3) CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (0x1 << 4) +// Bits for cl_mutable_dispatch_asserts_khr bitfield +CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) + // cl_mutable_command_info_khr CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0 CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1 @@ -261,12 +267,6 @@ CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5 CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6 CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7 CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD - -// Bits for cl_command_buffer_flags_khr -CL_COMMAND_BUFFER_MUTABLE_KHR (0x1 << 1) - -// Bits for cl_mutable_dispatch_asserts_khr bitfield -CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) ---- Enum values for {cl_command_buffer_structure_type_khr_TYPE} allowing the structure @@ -359,11 +359,6 @@ Add a {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property to the |==== -===== Additional Errors - -* {CL_INVALID_VALUE} if _properties_ has a {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, but _local_work_size_ is `NULL`. - ==== Modifications to clCommandNDRangeKernelKHR ===== Properties Parameter @@ -471,8 +466,13 @@ Is replaced with The following error condition is added: -* {CL_INVALID_VALUE} if _properties_ has a {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} property with - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, but _local_work_size_ is `NULL`. +* {CL_INVALID_VALUE} if _command_buffer_ was created with the + {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and + _local_work_size_ is `NULL`, or if _properties_ includes the + {CL_MUTABLE_DISPATCH_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and + _local_work_size_ is `NULL`. [[mutable-commands]] ==== New Section in the OpenCL API specification 5.X.5 - Mutable Commands: @@ -1007,3 +1007,4 @@ non-trivial deep copying of the underlying objects contained in the command-buffer. As a result of this new entry-point being an additive change to the specification it is omitted, and if its functionality has demand later, it may be a introduced as a stand alone extension. +-- diff --git a/ext/cl_khr_external_memory.asciidoc b/ext/cl_khr_external_memory.asciidoc index a2dda4883..3eeae8908 100644 --- a/ext/cl_khr_external_memory.asciidoc +++ b/ext/cl_khr_external_memory.asciidoc @@ -33,7 +33,7 @@ Other related extensions define specific external memory types that may be impor | 2021-09-10 | 0.9.0 | Initial version (provisional). | 2023-05-04 | 0.9.1 | Clarified device handle list enum cannot be specified without an external memory handle (provisional). | 2023-08-01 | 0.9.2 | Changed device handle list enum to the memory-specific {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). -| 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_HANDLE_TYPES_KHR} (provisional). +| 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). |==== include::provisional_notice.asciidoc[] From f4910b160b7072c6f40b2b6f66d2218de3aa1320 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 3 Mar 2024 09:10:41 -0800 Subject: [PATCH 067/190] add the numerical value of the image channel order and data type (#1050) This may make it easier to extend these tables or enable additional simplifications in the future. --- env/common_properties.asciidoc | 142 ++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 45 deletions(-) diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index 271d50b9c..99007b77f 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -211,70 +211,99 @@ The following table describes how the results of the SPIR-V channel orders. .Image Channel Order mapping -[cols="1,1",options="header"] +[cols="1,5,5",options="header"] |==== -| *SPIR-V Image Channel Order* +2+| *SPIR-V Image Channel Order* | *OpenCL Image Channel Order* -| `R` +| 0 +| *R* | `CL_R` -| `A` +| 1 +| *A* | `CL_A` -| `RG` +| 2 +| *RG* | `CL_RG` -| `RA` +| 3 +| *RA* | `CL_RA` -| `RGB` +| 4 +| *RGB* | `CL_RGB` -| `RGBA` +| 5 +| *RGBA* | `CL_RGBA` -| `BGRA` +| 6 +| *BGRA* | `CL_BGRA` -| `ARGB` +| 7 +| *ARGB* | `CL_ARGB` -| `Intensity` +| 8 +| *Intensity* | `CL_INTENSITY` -| `Luminance` +| 9 +| *Luminance* | `CL_LUMINANCE` -| `Rx` +| 10 +| *Rx* | `CL_Rx` -| `RGx` +| 11 +| *RGx* | `CL_RGx` -| `RGBx` +| 12 +| *RGBx* | `CL_RGBx` -| `Depth` +| 13 +| *Depth* | `CL_DEPTH` -| `DepthStencil` +| 14 +| *DepthStencil* | `CL_DEPTH_STENCIL` -| `sRGB` +| 15 +| *sRGB* | `CL_sRGB` -| `sRGBA` +| 16 +| *sRGBx* +| `CL_sRGBx` + +| 17 +| *sRGBA* | `CL_sRGBA` -| `sBGRA` +| 18 +| *sBGRA* | `CL_sBGRA` -| `sRGBx` -| `CL_sRGBx` +| 19 +| *ABGR* +| `CL_ABGR` |==== +[NOTE] +-- +The SPIR-V Image Channel Orders are enumerated in the same order as the +OpenCL Channel Order enums to enable simple conversion between the two. +-- + === Image Channel Data Type Mapping The following table describes how the results of the SPIR-V @@ -282,64 +311,87 @@ The following table describes how the results of the SPIR-V channel data types. .Image Channel Data Type mapping -[cols="1,1",options="header"] +[cols="1,5,5",options="header"] |==== -| *SPIR-V Image Channel Data Type* +2+| *SPIR-V Image Channel Data Type* | *OpenCL Image Channel Data Type* -| `SnormInt8` +| 0 +| *SnormInt8* | `CL_SNORM_INT8` -| `SnormInt16` +| 1 +| *SnormInt16* | `CL_SNORM_INT16` -| `UnormInt8` +| 2 +| *UnormInt8* | `CL_UNORM_INT8` -| `UnormInt16` +| 3 +| *UnormInt16* | `CL_UNORM_INT16` -| `UnormInt24` -| `CL_UNORM_INT24` - -| `UnormShort565` +| 4 +| *UnormShort565* | `CL_UNORM_SHORT_565` -| `UnormShort555` +| 5 +| *UnormShort555* | `CL_UNORM_SHORT_555` -| `UnormInt101010` +| 6 +| *UnormInt101010* | `CL_UNORM_INT_101010` -| `UnormInt101010_2` -| `CL_UNORM_INT_101010_2` - -| `SignedInt8` +| 7 +| *SignedInt8* | `CL_SIGNED_INT8` -| `SignedInt16` +| 8 +| *SignedInt16* | `CL_SIGNED_INT16` -| `SignedInt32` +| 9 +| *SignedInt32* | `CL_SIGNED_INT32` -| `UnsignedInt8` +| 10 +| *UnsignedInt8* | `CL_UNSIGNED_INT8` -| `UnsignedInt16` +| 11 +| *UnsignedInt16* | `CL_UNSIGNED_INT16` -| `UnsignedInt32` +| 12 +| *UnsignedInt32* | `CL_UNSIGNED_INT32` -| `HalfFloat` +| 13 +| *HalfFloat* | `CL_HALF_FLOAT` -| `Float` +| 14 +| *Float* | `CL_FLOAT` +| 15 +| *UnormInt24* +| `CL_UNORM_INT24` + +| 16 +| *UnormInt101010_2* +| `CL_UNORM_INT_101010_2` + |==== +[NOTE] +-- +The SPIR-V Image Channel Data Types are enumerated in the same order as the +OpenCL Channel Data Type enums to enable simple conversion between the two. +-- + === Kernels An *OpFunction* in a SPIR-V module that is identified with *OpEntryPoint* From 759841f4d9c328fe389e3fdf411fae7ba7d74c6d Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 3 Mar 2024 09:34:47 -0800 Subject: [PATCH 068/190] publish cl_intel_subgroup_local_block_io (#1028) --- .../cl_intel_subgroup_local_block_io.asciidoc | 480 ++++++++++++++++++ 1 file changed, 480 insertions(+) create mode 100644 extensions/cl_intel_subgroup_local_block_io.asciidoc diff --git a/extensions/cl_intel_subgroup_local_block_io.asciidoc b/extensions/cl_intel_subgroup_local_block_io.asciidoc new file mode 100644 index 000000000..b6a393fe0 --- /dev/null +++ b/extensions/cl_intel_subgroup_local_block_io.asciidoc @@ -0,0 +1,480 @@ +:data-uri: +:sectanchors: +:icons: font +:source-highlighter: coderay +// TODO: try rouge? + += cl_intel_subgroup_local_block_io + +== Name Strings + +`cl_intel_subgroup_local_block_io` + +== Contact + +Ben Ashbaugh, Intel (ben 'dot' ashbaugh 'at' intel 'dot' com) + +== Contributors + +// spell-checker: disable +Ben Ashbaugh, Intel +// spell-checker: enable + +== Notice + +Copyright (c) 2023 Intel Corporation. All rights reserved. + +== Status + +Shipping + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +OpenCL 1.2 and support for `cl_intel_subgroups` is required. +This extension is written against version 8 of the `cl_intel_subgroups` specification. +This extension interacts with the `cl_intel_subgroups_char`, `cl_intel_subgroups_short`, `cl_intel_subgroups_long`, and `cl_intel_spirv_subgroups` extensions. + +This extension requires OpenCL support for SPIR-V, either via OpenCL 2.1 or newer, or via the `cl_khr_il_program` extension. + +== Overview + +This extension extends the subgroup block read and write functions defined by `cl_intel_subgroups` (and, when supported, `cl_intel_subgroups_char`, `cl_intel_subgroups_short`, and `cl_intel_subgroups_long`) to support reading from and writing to pointers to the `+__local+` memory address space in addition to pointers to the `+__global+` memory address space. + +== New API Functions + +None. + +== New API Enums + +None. + +== New OpenCL C Functions + +Add variants of the `uint` subgroup block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[source] +---- +uint intel_sub_group_block_read_ui( const __local uint* p ) +uint2 intel_sub_group_block_read_ui2( const __local uint* p ) +uint4 intel_sub_group_block_read_ui4( const __local uint* p ) +uint8 intel_sub_group_block_read_ui8( const __local uint* p ) + +void intel_sub_group_block_write_ui( __local uint* p, uint data ) +void intel_sub_group_block_write_ui2( __local uint* p, uint2 data ) +void intel_sub_group_block_write_ui4( __local uint* p, uint4 data ) +void intel_sub_group_block_write_ui8( __local uint* p, uint8 data ) +---- + +For naming consistency, also add un-suffixed aliases of the `uint` functions as originally described in the `cl_intel_subgroups` extension: + +[source] +---- +uint intel_sub_group_block_read( const __local uint* p ) +uint2 intel_sub_group_block_read2( const __local uint* p ) +uint4 intel_sub_group_block_read4( const __local uint* p ) +uint8 intel_sub_group_block_read8( const __local uint* p ) + +void intel_sub_group_block_write( __local uint* p, uint data ) +void intel_sub_group_block_write2( __local uint* p, uint2 data ) +void intel_sub_group_block_write4( __local uint* p, uint4 data ) +void intel_sub_group_block_write8( __local uint* p, uint8 data ) +---- + +If `cl_intel_subgroups_char` is supported, add variants of the `uchar` subgroup block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[source] +---- +uchar intel_sub_group_block_read_uc( const __local uchar* p ) +uchar2 intel_sub_group_block_read_uc2( const __local uchar* p ) +uchar4 intel_sub_group_block_read_uc4( const __local uchar* p ) +uchar8 intel_sub_group_block_read_uc8( const __local uchar* p ) +uchar16 intel_sub_group_block_read_uc16( const __local uchar* p ) + +void intel_sub_group_block_write_uc( __local uchar* p, uchar data ) +void intel_sub_group_block_write_uc2( __local uchar* p, uchar2 data ) +void intel_sub_group_block_write_uc4( __local uchar* p, uchar4 data ) +void intel_sub_group_block_write_uc8( __local uchar* p, uchar8 data ) +void intel_sub_group_block_write_uc16( __local uchar* p, uchar16 data ) +---- + +If `cl_intel_subgroups_short` is supported, add variants of the `ushort` subgroup block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[source] +---- +ushort intel_sub_group_block_read_us( const __local ushort* p ) +ushort2 intel_sub_group_block_read_us2( const __local ushort* p ) +ushort4 intel_sub_group_block_read_us4( const __local ushort* p ) +ushort8 intel_sub_group_block_read_us8( const __local ushort* p ) + +void intel_sub_group_block_write_us( __local ushort* p, ushort data ) +void intel_sub_group_block_write_us2( __local ushort* p, ushort2 data ) +void intel_sub_group_block_write_us4( __local ushort* p, ushort4 data ) +void intel_sub_group_block_write_us8( __local ushort* p, ushort8 data ) +---- + +If `cl_intel_subgroups_long` is supported, add variants of the `ulong` subgroup block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[source] +---- +ulong intel_sub_group_block_read_ul( const __local ulong* p ) +ulong2 intel_sub_group_block_read_ul2( const __local ulong* p ) +ulong4 intel_sub_group_block_read_ul4( const __local ulong* p ) +ulong8 intel_sub_group_block_read_ul8( const __local ulong* p ) + +void intel_sub_group_block_write_ul( __local ulong* p, ulong data ) +void intel_sub_group_block_write_ul2( __local ulong* p, ulong2 data ) +void intel_sub_group_block_write_ul4( __local ulong* p, ulong4 data ) +void intel_sub_group_block_write_ul8( __local ulong* p, ulong8 data ) +---- + +== Modifications to the OpenCL C Specification + +=== Modifications to Section 6.13.X "Sub Group Read and Write Functions" + +This section was added by the `cl_intel_subgroups` extension. + +Add versions of the 32-bit block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[cols="5a,4",options="header"] +|================================== +|*Function* +|*Description* + +|[source,opencl_c] +---- +uint intel_sub_group_block_read( + const __global uint* p ) +uint2 intel_sub_group_block_read2( + const __global uint* p ) +uint4 intel_sub_group_block_read4( + const __global uint* p ) +uint8 intel_sub_group_block_read8( + const __global uint* p ) + +uint intel_sub_group_block_read_ui( + const __global uint* p ) +uint2 intel_sub_group_block_read_ui2( + const __global uint* p ) +uint4 intel_sub_group_block_read_ui4( + const __global uint* p ) +uint8 intel_sub_group_block_read_ui8( + const __global uint* p ) + +uint intel_sub_group_block_read( + const __local uint* p ) +uint2 intel_sub_group_block_read2( + const __local uint* p ) +uint4 intel_sub_group_block_read4( + const __local uint* p ) +uint8 intel_sub_group_block_read8( + const __local uint* p ) + +uint intel_sub_group_block_read_ui( + const __local uint* p ) +uint2 intel_sub_group_block_read_ui2( + const __local uint* p ) +uint4 intel_sub_group_block_read_ui4( + const __local uint* p ) +uint8 intel_sub_group_block_read_ui8( + const __local uint* p ) +---- + +| Reads 1, 2, 4, or 8 uints of data for each work item in the subgroup from the specified pointer as a block operation... + +|[source,opencl_c] +---- +void intel_sub_group_block_write( + __global uint* p, uint data ) +void intel_sub_group_block_write2( + __global uint* p, uint2 data ) +void intel_sub_group_block_write4( + __global uint* p, uint4 data ) +void intel_sub_group_block_write8( + __global uint* p, uint8 data ) + +void intel_sub_group_block_write_ui( + __global uint* p, uint data ) +void intel_sub_group_block_write_ui2( + __global uint* p, uint2 data ) +void intel_sub_group_block_write_ui4( + __global uint* p, uint4 data ) +void intel_sub_group_block_write_ui8( + __global uint* p, uint8 data ) + +void intel_sub_group_block_write( + __local uint* p, uint data ) +void intel_sub_group_block_write2( + __local uint* p, uint2 data ) +void intel_sub_group_block_write4( + __local uint* p, uint4 data ) +void intel_sub_group_block_write8( + __local uint* p, uint8 data ) + +void intel_sub_group_block_write_ui( + __local uint* p, uint data ) +void intel_sub_group_block_write_ui2( + __local uint* p, uint2 data ) +void intel_sub_group_block_write_ui4( + __local uint* p, uint4 data ) +void intel_sub_group_block_write_ui8( + __local uint* p, uint8 data ) +---- + +| Writes 1, 2, 4, or 8 uints of data for each work item in the subgroup to the specified pointer as a block operation... + +|================================== + +If `cl_intel_subgroups_char` is supported, add versions of the 8-bit block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[cols="5a,4",options="header"] +|================================== +|*Function* +|*Description* + +|[source,opencl_c] +---- +uchar intel_sub_group_block_read_uc( + const __global uchar* p ) +uchar2 intel_sub_group_block_read_uc2( + const __global uchar* p ) +uchar4 intel_sub_group_block_read_uc4( + const __global uchar* p ) +uchar8 intel_sub_group_block_read_uc8( + const __global uchar* p ) +uchar16 intel_sub_group_block_read_uc16( + const __global uchar* p ) + +uchar intel_sub_group_block_read_uc( + const __local uchar* p ) +uchar2 intel_sub_group_block_read_uc2( + const __local uchar* p ) +uchar4 intel_sub_group_block_read_uc4( + const __local uchar* p ) +uchar8 intel_sub_group_block_read_uc8( + const __local uchar* p ) +uchar16 intel_sub_group_block_read_uc16( + const __local uchar* p ) +---- + +| Reads 1, 2, 4, 8, or 16 uchars of data for each work item in the subgroup from the specified pointer as a block operation... + +|[source,opencl_c] +---- +void intel_sub_group_block_write_uc( + __global uchar* p, uchar data ) +void intel_sub_group_block_write_uc2( + __global uchar* p, uchar2 data ) +void intel_sub_group_block_write_uc4( + __global uchar* p, uchar4 data ) +void intel_sub_group_block_write_uc8( + __global uchar* p, uchar8 data ) +void intel_sub_group_block_write_uc16( + __global uchar* p, uchar16 data ) + +void intel_sub_group_block_write_uc( + __local uchar* p, uchar data ) +void intel_sub_group_block_write_uc2( + __local uchar* p, uchar2 data ) +void intel_sub_group_block_write_uc4( + __local uchar* p, uchar4 data ) +void intel_sub_group_block_write_uc8( + __local uchar* p, uchar8 data ) +void intel_sub_group_block_write_uc16( + __local uchar* p, uchar16 data ) +---- + +| Writes 1, 2, 4, 8, or 16 uchars of data for each work item in the subgroup to the specified pointer as a block operation... + +|================================== + +If `cl_intel_subgroups_short` is supported, add versions of the 16-bit block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[cols="5a,4",options="header"] +|================================== +|*Function* +|*Description* + +|[source,opencl_c] +---- +ushort intel_sub_group_block_read_us( + const __global ushort* p ) +ushort2 intel_sub_group_block_read_us2( + const __global ushort* p ) +ushort4 intel_sub_group_block_read_us4( + const __global ushort* p ) +ushort8 intel_sub_group_block_read_us8( + const __global ushort* p ) + +ushort intel_sub_group_block_read_us( + const __local ushort* p ) +ushort2 intel_sub_group_block_read_us2( + const __local ushort* p ) +ushort4 intel_sub_group_block_read_us4( + const __local ushort* p ) +ushort8 intel_sub_group_block_read_us8( + const __local ushort* p ) +---- + +| Reads 1, 2, 4, or 8 ushorts of data for each work item in the subgroup from the specified pointer as a block operation... + +|[source,opencl_c] +---- +void intel_sub_group_block_write_us( + __global ushort* p, ushort data ) +void intel_sub_group_block_write_us2( + __global ushort* p, ushort2 data ) +void intel_sub_group_block_write_us4( + __global ushort* p, ushort4 data ) +void intel_sub_group_block_write_us8( + __global ushort* p, ushort8 data ) + +void intel_sub_group_block_write_us( + __local ushort* p, ushort data ) +void intel_sub_group_block_write_us2( + __local ushort* p, ushort2 data ) +void intel_sub_group_block_write_us4( + __local ushort* p, ushort4 data ) +void intel_sub_group_block_write_us8( + __local ushort* p, ushort8 data ) +---- + +| Writes 1, 2, 4, or 8 ushorts of data for each work item in the subgroup to the specified pointer as a block operation... + +|================================== + +If `cl_intel_subgroups_long` is supported, add versions of the 64-bit block read and write functions that support loading from and storing to pointers to the `+__local+` address space: + +[cols="5a,4",options="header"] +|================================== +|*Function* +|*Description* + +|[source,opencl_c] +---- +ulong intel_sub_group_block_read_ul( + const __global ulong* p ) +ulong2 intel_sub_group_block_read_ul2( + const __global ulong* p ) +ulong4 intel_sub_group_block_read_ul4( + const __global ulong* p ) +ulong8 intel_sub_group_block_read_ul8( + const __global ulong* p ) + +ulong intel_sub_group_block_read_ul( + const __local ulong* p ) +ulong2 intel_sub_group_block_read_ul2( + const __local ulong* p ) +ulong4 intel_sub_group_block_read_ul4( + const __local ulong* p ) +ulong8 intel_sub_group_block_read_ul8( + const __local ulong* p ) +---- + +| Reads 1, 2, 4, or 8 ulongs of data for each work item in the subgroup from the specified pointer as a block operation... + +|[source,opencl_c] +---- +void intel_sub_group_block_write_ul( + __global ulong* p, ulong data ) +void intel_sub_group_block_write_ul2( + __global ulong* p, ulong2 data ) +void intel_sub_group_block_write_ul4( + __global ulong* p, ulong4 data ) +void intel_sub_group_block_write_ul8( + __global ulong* p, ulong8 data ) + +void intel_sub_group_block_write_ul( + __local ulong* p, ulong data ) +void intel_sub_group_block_write_ul2( + __local ulong* p, ulong2 data ) +void intel_sub_group_block_write_ul4( + __local ulong* p, ulong4 data ) +void intel_sub_group_block_write_ul8( + __local ulong* p, ulong8 data ) +---- + +| Writes 1, 2, 4, or 8 ulongs of data for each work item in the subgroup to the specified pointer as a block operation... + +|================================== + +=== Modifications to Section 6.13.X.1 "Restrictions" + +This section was added by the `cl_intel_subgroups` extension. + +Change the description of the first section to: The following restrictions apply to the subgroup buffer block read and write functions that accept pointers to `+__global+` memory... + +Insert a section between the restrictions on subgroup buffer block read and write functions that accept pointers to `+__global+` memory and the restrictions on subgroup image block read and write functions: + +The following restrictions apply to the subgroup buffer block read and write functions that accept pointers to `+__local+` memory: + +* The pointer `p` must be 128-bit (16-byte) aligned for both reads and writes. + +== Modifications to the OpenCL SPIR-V Environment Specification + +=== Modifications to Section 7.1.X.2 "Block IO Instructions" + +This section was added by the `cl_intel_spirv_subgroups` extension. + +Add to the validation rules for _Ptr_: + +Additionally, if the OpenCL environment supports the extension `cl_intel_subgroup_local_block_io`, for _Ptr_ valid _Storage Classes_ are: + +* *Workgroup* (equivalent to the `local` address space) + +=== Modifications to Section 7.1.X.3 "Notes and Restrictions" + +This section was added by the `cl_intel_spirv_subgroups` extension. + +Change the description of the restrictions on *SubgroupBufferBlockIOINTEL* instructions to: The following restrictions apply to the *SubgroupBufferBlockIOINTEL* instructions when the pointer operand _Ptr_ is a pointer to the *CrossWorkgroup* _Storage Class_... + +Insert a section between the restrictions on *SubgroupBufferBlockIOINTEL* instructions when the pointer operand _Ptr_ is a pointer to the *CrossWorkGroup* _Storage Class_ and restrictions on *SubgroupImageBlockIOINTEL* instructions: + +The following restrictions apply to the *SubgroupBufferBlockIOINTEL* instructions when the pointer operand _Ptr_ is a pointer to the *Workgroup* _Storage Class_: + +* The pointer _Ptr_ must be 128-bit (16-byte) aligned for both reads and writes. + +== Issues + +. What should this extension be called? ++ +-- +*RESOLVED*: `cl_intel_subgroup_local_block_io` +-- + +. Do we need un-suffixed aliases of the 32-bit subgroup block read and write functions? ++ +-- +*RESOLVED*: Yes, this extension describes both suffixed functions and their un-suffixed aliases. + +As background: + +The 32-bit subgroup block read and write functions were originally un-suffixed in `cl_intel_subgroups`. + +When we extended the subgroup block read and write functions for other types in `cl_intel_subgroups_short` (and, eventually, `cl_intel_subgroups_char` and `cl_intel_subgroups_long`), we added suffixed aliases for consistency with the suffixed functions added to support the other types. + +For consistency with `cl_intel_subgroups` we should include both the un-suffixed and suffixed versions of the 32-bit functions. +-- + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1.0.0|2023-11-29|Ben Ashbaugh|*Initial revision for publication* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use `mono` text for device APIs, or [source] syntax highlighting. +//* Use `mono` text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ From 1efdf51676405e9a7686678ba02f71e98405d844 Mon Sep 17 00:00:00 2001 From: Alastair Murray Date: Tue, 5 Mar 2024 07:07:32 +0000 Subject: [PATCH 069/190] Building individual extensions depends on generated files (#1076) Fix an occasional parallel build failure. I don't think this is a new issue, just it rarely occurs and only on a clean build. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 47848ed20..239a8e4e3 100644 --- a/Makefile +++ b/Makefile @@ -238,7 +238,7 @@ $(PDFDIR)/$(EXTSPEC).pdf: $(EXTSPECSRC) # Individual extensions spec(s) EXTDIR = extensions EXTENSIONSSPEC = extensions -EXTENSIONSSPECSRC = $(EXTDIR)/$(EXTENSIONSSPEC).txt \ +EXTENSIONSSPECSRC = $(EXTDIR)/$(EXTENSIONSSPEC).txt ${GENDEPENDS} \ $(shell grep ^include:: $(EXTDIR)/$(EXTENSIONSSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) # Included extension documents From 5aeda6ac5acca05f281f20ed7ea03b7984c2eba3 Mon Sep 17 00:00:00 2001 From: Alastair Murray Date: Wed, 6 Mar 2024 10:35:04 +0000 Subject: [PATCH 070/190] Remove CRLF line endings from two core spec files (#1079) All other text files in the repository use LF line endings. The presence of the carriage returns in the files means that on macOS the shell code that automatically builds up the Makefile dependencies from the include lines in Asciidoc was not working as xargs did not behave as intended. Note that various Intel vendor extensions also use CRLF line endings but this PR does not touch them, they do not contain includes so do not actually cause any problems. --- OpenCL_ICD_Installation.txt | 252 +-- env/image_addressing_and_filtering.asciidoc | 2104 +++++++++---------- 2 files changed, 1178 insertions(+), 1178 deletions(-) diff --git a/OpenCL_ICD_Installation.txt b/OpenCL_ICD_Installation.txt index 9032b77ab..072e706f4 100644 --- a/OpenCL_ICD_Installation.txt +++ b/OpenCL_ICD_Installation.txt @@ -1,126 +1,126 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -= OpenCL^(TM)^ ICD Installation Guidelines -:R: pass:q,r[^(R)^] -Khronos{R} OpenCL Working Group -:data-uri: -:icons: font -:toc2: -:toclevels: 2 -:max-width: 100% -:numbered: -:imagewidth: 800 -:fullimagewidth: width="800" -:source-highlighter: rouge -:rouge-style: github -:docinfo: shared-header -:docinfodir: config -:title-logo-image: image:images/OpenCL.png[top="25%",width="55%"] - - -// Various special / math symbols. This is easier to edit with than Unicode. -include::config/attribs.txt[] - -// Attributes that are shared by OpenCL specifications. -include::config/opencl.asciidoc[] - -// type of the source code in the document -:language: {basebackend@docbook:c++:cpp} - -include::copyrights.txt[] - -<<< - -== Introduction - -The OpenCL Installable Client Driver (ICD) is a mechanism to allow OpenCL implementations from multiple vendors to coexist on a system. A vendor OpenCL implementation is an OpenCL Installable Client Driver if it implements the extension `cl_khr_icd`, which is described in the OpenCL extension registry: - -https://www.khronos.org/registry/OpenCL/extensions/khr/cl_khr_icd.txt - -The ICD loader library is a shared resource that discovers and enumerates all OpenCL ICDs. It will typically be installed by an installer from one of the vendors. - -In order to prevent conflicts between vendor installers it is necessary to have strict guidelines for installation and uninstallation of the ICD loader library and associated system configuration. - -== General Guidelines - -Vendor installers MUST install and uninstall their ICD-compliant implementations in such a way that the installer: - -. Installs its own ICD loader library if and only if the existing ICD loader library is older than the one being installed. -. Does not remove ICD loader library at uninstall if other implementations exist. -. Does not cause existing installations to become inoperable or unusable in any way. This includes, but is not limited to, WHQL and similar signed package certification check failures. -. Does not manipulate the vendor enumeration order within the ICD loader library except to add (or remove) the new vendor implementation. - -=== Compatibility With Non-ICD implementations - -Because the ICD loader library and a non-ICD OpenCL implementation are likely to share the same library file name, behavior is undefined if the ICD loader library is installed on a system with an existing non-ICD OpenCL implementation, or if a non-ICD OpenCL implementation is installed on a system with an existing ICD loader library. In particular, in this scenario the non-ICD OpenCL implementation, or the ICD OpenCL implementation, or both, may cease to function correctly. - -== Windows ICD Installation and Uninstallation - -On Windows, the ICD loader library is `OpenCL.dll`. - -In general, Windows Vendor installers MUST follow the guidelines described here: - -* https://msdn.microsoft.com/en-us/library/ms954376.aspx - -If the Windows Vendor installer is using the _Windows Installer_ then many of the steps below will happen automatically. - -=== Windows ICD Installation - -. Vendor MAY include `OpenCL.dll` file in its vendor package. - -. IF Vendor includes `OpenCL.dll` in the manifest of the signed vendor package, then Vendor MUST NOT include `OpenCL.dll` in the manifest of the signed vendor package to map to either of the following paths: -.. `%WINDIR%\system32\OpenCL.dll` -.. `%WINDIR%\SysWOW64\OpenCL.dll` - -+ -Vendor MAY include `OpenCL.dll` in the manifest of a signed package provided that a vendor specific directory is used, such as `%PROGRAMFILES%\\OpenCL`. - -. Vendor MUST check for existing OpenCL installations before installing `OpenCL.dll`. -.. Vendor SHALL check the version of `OpenCL.dll` located in -... `%WINDIR%\System32\` -... `%WINDIR%\SysWOW64\` -.. IF `OpenCL.dll` is not present, install ICD in 3.a.i and/or 3.a.ii as appropriate. -.. IF version of installed `OpenCL.dll` < vendor `OpenCL.dll` version, then replace the installed `OpenCL.dll` in 3.a.i and/or 3.a.ii as appropriate. -.. IF version of installed `OpenCL.dll` >= vendor `OpenCL.dll` version, then vendor MUST NOT modify the installed `OpenCL.dll`. - -+ -Versioning of `OpenCL.dll` is described in a later section. - -. Vendor MUST accurately increment the reference count for `OpenCL.dll`. -.. IF Vendor does not use the Windows Installer, the Vendor installer MUST increment the reference count under the registry key: - - HKLM\SOFTWARE\Microsoft\Windows\Current Version\SharedDLLs - -=== Windows ICD Uninstallation - -Uninstalling `OpenCL.dll` should be straightforward since it is reference counted as a shared component. - -. Vendor MUST accurately decrement the reference count for `OpenCL.dll` and delete it when the reference count reaches zero. - -Note that older installers that do not comply with these guidelines may not check the reference count when uninstalling and hence may erroneously uninstall `OpenCL.dll` while it is still in use by another OpenCL implementation. If this occurs, reinstalling the other OpenCL implementation will usually fix the issue. - -=== OpenCL.dll Versioning - -The `OpenCL.dll` has a FileVersion string of the form “x.y.z.0”. The parts *_x_* and *_y_* denote the OpenCL major and minor version (2.2 at the time of writing this document). The third part *_z_* is a revision number which will be incremented for every change made to the ICD loader sources. - -For same version of OpenCL, higher *_z_* value means a later revision. For different versions of OpenCL a higher OpenCL version means a later revision, irrespective of the value of *_z_*. - -If a given OpenCL.dll file does not have a valid FileVersion string or if the FileVersion string is absent then the version should be considered to be "0.0.0.0". - -== Android ICD Installation - -On Android, the ICD loader library is `libOpenCL.so`. - -=== Target Device Filesystem - -. Vendor MUST install `libOpenCL.so` to reside directly within the directory `/vendor/lib/` which is one of the paths searched by the dynamic loader on an Android system. - -Usually an Android system will have a single-vendor OpenCL installation, so the need to overwrite `libOpenCL.so` should not arise. - -=== Android SDK/NDK - -Vendors should package the `libOpenCL.so` stub for linking to user applications in their Android SDK/NDK and either configure the default environment, or provide instructions for configuring the build environment, or both. - -Typically a vendor should put `libOpenCL.so` inside a directory within the Android SDK/NDK package provided by the vendor for application development on the vendor’s device. The path to this directory should be added to LIBPATH in the default environment of the IDE (e.g. Eclipse) and other build configurations (e.g. Makefiles) in the SDK/NDK. The path should also be mentioned in the vendor documentation to allow application developers to write their own Makefiles or other build systems. +// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + += OpenCL^(TM)^ ICD Installation Guidelines +:R: pass:q,r[^(R)^] +Khronos{R} OpenCL Working Group +:data-uri: +:icons: font +:toc2: +:toclevels: 2 +:max-width: 100% +:numbered: +:imagewidth: 800 +:fullimagewidth: width="800" +:source-highlighter: rouge +:rouge-style: github +:docinfo: shared-header +:docinfodir: config +:title-logo-image: image:images/OpenCL.png[top="25%",width="55%"] + + +// Various special / math symbols. This is easier to edit with than Unicode. +include::config/attribs.txt[] + +// Attributes that are shared by OpenCL specifications. +include::config/opencl.asciidoc[] + +// type of the source code in the document +:language: {basebackend@docbook:c++:cpp} + +include::copyrights.txt[] + +<<< + +== Introduction + +The OpenCL Installable Client Driver (ICD) is a mechanism to allow OpenCL implementations from multiple vendors to coexist on a system. A vendor OpenCL implementation is an OpenCL Installable Client Driver if it implements the extension `cl_khr_icd`, which is described in the OpenCL extension registry: + +https://www.khronos.org/registry/OpenCL/extensions/khr/cl_khr_icd.txt + +The ICD loader library is a shared resource that discovers and enumerates all OpenCL ICDs. It will typically be installed by an installer from one of the vendors. + +In order to prevent conflicts between vendor installers it is necessary to have strict guidelines for installation and uninstallation of the ICD loader library and associated system configuration. + +== General Guidelines + +Vendor installers MUST install and uninstall their ICD-compliant implementations in such a way that the installer: + +. Installs its own ICD loader library if and only if the existing ICD loader library is older than the one being installed. +. Does not remove ICD loader library at uninstall if other implementations exist. +. Does not cause existing installations to become inoperable or unusable in any way. This includes, but is not limited to, WHQL and similar signed package certification check failures. +. Does not manipulate the vendor enumeration order within the ICD loader library except to add (or remove) the new vendor implementation. + +=== Compatibility With Non-ICD implementations + +Because the ICD loader library and a non-ICD OpenCL implementation are likely to share the same library file name, behavior is undefined if the ICD loader library is installed on a system with an existing non-ICD OpenCL implementation, or if a non-ICD OpenCL implementation is installed on a system with an existing ICD loader library. In particular, in this scenario the non-ICD OpenCL implementation, or the ICD OpenCL implementation, or both, may cease to function correctly. + +== Windows ICD Installation and Uninstallation + +On Windows, the ICD loader library is `OpenCL.dll`. + +In general, Windows Vendor installers MUST follow the guidelines described here: + +* https://msdn.microsoft.com/en-us/library/ms954376.aspx + +If the Windows Vendor installer is using the _Windows Installer_ then many of the steps below will happen automatically. + +=== Windows ICD Installation + +. Vendor MAY include `OpenCL.dll` file in its vendor package. + +. IF Vendor includes `OpenCL.dll` in the manifest of the signed vendor package, then Vendor MUST NOT include `OpenCL.dll` in the manifest of the signed vendor package to map to either of the following paths: +.. `%WINDIR%\system32\OpenCL.dll` +.. `%WINDIR%\SysWOW64\OpenCL.dll` + ++ +Vendor MAY include `OpenCL.dll` in the manifest of a signed package provided that a vendor specific directory is used, such as `%PROGRAMFILES%\\OpenCL`. + +. Vendor MUST check for existing OpenCL installations before installing `OpenCL.dll`. +.. Vendor SHALL check the version of `OpenCL.dll` located in +... `%WINDIR%\System32\` +... `%WINDIR%\SysWOW64\` +.. IF `OpenCL.dll` is not present, install ICD in 3.a.i and/or 3.a.ii as appropriate. +.. IF version of installed `OpenCL.dll` < vendor `OpenCL.dll` version, then replace the installed `OpenCL.dll` in 3.a.i and/or 3.a.ii as appropriate. +.. IF version of installed `OpenCL.dll` >= vendor `OpenCL.dll` version, then vendor MUST NOT modify the installed `OpenCL.dll`. + ++ +Versioning of `OpenCL.dll` is described in a later section. + +. Vendor MUST accurately increment the reference count for `OpenCL.dll`. +.. IF Vendor does not use the Windows Installer, the Vendor installer MUST increment the reference count under the registry key: + + HKLM\SOFTWARE\Microsoft\Windows\Current Version\SharedDLLs + +=== Windows ICD Uninstallation + +Uninstalling `OpenCL.dll` should be straightforward since it is reference counted as a shared component. + +. Vendor MUST accurately decrement the reference count for `OpenCL.dll` and delete it when the reference count reaches zero. + +Note that older installers that do not comply with these guidelines may not check the reference count when uninstalling and hence may erroneously uninstall `OpenCL.dll` while it is still in use by another OpenCL implementation. If this occurs, reinstalling the other OpenCL implementation will usually fix the issue. + +=== OpenCL.dll Versioning + +The `OpenCL.dll` has a FileVersion string of the form “x.y.z.0”. The parts *_x_* and *_y_* denote the OpenCL major and minor version (2.2 at the time of writing this document). The third part *_z_* is a revision number which will be incremented for every change made to the ICD loader sources. + +For same version of OpenCL, higher *_z_* value means a later revision. For different versions of OpenCL a higher OpenCL version means a later revision, irrespective of the value of *_z_*. + +If a given OpenCL.dll file does not have a valid FileVersion string or if the FileVersion string is absent then the version should be considered to be "0.0.0.0". + +== Android ICD Installation + +On Android, the ICD loader library is `libOpenCL.so`. + +=== Target Device Filesystem + +. Vendor MUST install `libOpenCL.so` to reside directly within the directory `/vendor/lib/` which is one of the paths searched by the dynamic loader on an Android system. + +Usually an Android system will have a single-vendor OpenCL installation, so the need to overwrite `libOpenCL.so` should not arise. + +=== Android SDK/NDK + +Vendors should package the `libOpenCL.so` stub for linking to user applications in their Android SDK/NDK and either configure the default environment, or provide instructions for configuring the build environment, or both. + +Typically a vendor should put `libOpenCL.so` inside a directory within the Android SDK/NDK package provided by the vendor for application development on the vendor’s device. The path to this directory should be added to LIBPATH in the default environment of the IDE (e.g. Eclipse) and other build configurations (e.g. Makefiles) in the SDK/NDK. The path should also be mentioned in the vendor documentation to allow application developers to write their own Makefiles or other build systems. diff --git a/env/image_addressing_and_filtering.asciidoc b/env/image_addressing_and_filtering.asciidoc index f230d61ba..1098aba22 100644 --- a/env/image_addressing_and_filtering.asciidoc +++ b/env/image_addressing_and_filtering.asciidoc @@ -1,1052 +1,1052 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[image_addressing_and_filtering]] -== Image Addressing and Filtering - -This section describes how image operations behave in an OpenCL environment. - -[[image-coordinates]] -=== Image Coordinates - -Let `w~t~`, `h~t~` and `d~t~` be the width, height (or image array size for a 1D image array) and depth (or image array size for a 2D image array) of the image in pixels. -Let `coord.xy` (also referred to as `(s,t)`) or `coord.xyz` (also referred to as `(s,t,r)`) be the coordinates specified to an image read instruction (such as *OpImageRead*) or an image write instruction (such as *OpImageWrite*). - -If image coordinates specified to an image read instruction are normalized (as specified in the sampler), the `s`, `t`, and `r` coordinate values are multiplied by `w~t~`, `h~t~` and `d~t~` respectively to generate the unnormalized coordinate values. -For image arrays, the image array coordinate (i.e. `t` if it is a 1D image array or `r` if it is a 2D image array) specified to the image read instruction must always be the unnormalized image coordinate value. - -Image coordinates specified to an image write instruction are always unnormalized image coordinate values. - -Let `(u,v,w)` represent the unnormalized image coordinate values. - -If values in `(s,t,r)` or `(u,v,w)` are INF or NaN, the behavior of the image read instruction or image write instruction is undefined. - -[[addressing-and-filter-modes]] -=== Addressing and Filter Modes - -After generating the image coordinate `(u,v,w)` we apply the appropriate addressing and filter mode to generate the appropriate sample locations to read from the image. - -[[clamp-addressing]] -==== Clamp and None Addressing Modes - -We first describe how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE`, or `CL_ADDRESS_NONE`. - -[[clamp-nearest_filtering]] -===== Nearest Filtering - -When the filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. -The image element location `(i,j,k)` is computed as: - -[latexmath] -++++ -\begin{aligned} -i & = address\_mode((int)floor(u))\\ -j & = address\_mode((int)floor(v))\\ -k & = address\_mode((int)floor(w)) -\end{aligned} -++++ - -For a 3D image, the image element at location `(i,j,k)` becomes the color value. -For a 2D image, the image element at location `(i,j)` becomes the color value. - -The below table describes the `address_mode` function. - -[[addressing_modes_to_generate_texel_location]] -.Addressing Modes to Generate Texel Location -[width="100%",cols="50%,50%",options="header"] -|==== -a|*Addressing Mode* -a|*Result of _address_mode(coord)_* - -a|`CL_ADDRESS_CLAMP` -a|_clamp (coord, -1, size)_ - -a|`CL_ADDRESS_CLAMP_TO_EDGE` -a|_clamp (coord, 0, size - 1)_ - -a|`CL_ADDRESS_NONE` -a|_coord_ -|==== - -The size term in the table above is `w~t~` for u, `h~t~` for v and `d~t~` for w. - -The clamp function used in the table above is defined as: - -[latexmath] -++++ -\begin{aligned} -clamp(a, b, c) & = return (a < b) ? b : ((a > c) ? c : a) -\end{aligned} -++++ - -If the addressing mode is `CL_ADDRESS_CLAMP` or `CL_ADDRESS_CLAMP_TO_EDGE`, and the selected texel location `(i,j,k)` refers to a location outside the image, the border color is used as the color value for the texel. - -Otherwise, if the addressing mode is `CL_ADDRESS_NONE` and the selected texel location `(i,j,k)` refers to a location outside the image, the color value for the texel is undefined. - -[[clamp-linear-filtering]] -===== Linear Filtering - -When the filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements (for a 2D image) or a 2 x 2 x 2 cube of image elements (for a 3D image is selected). -This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. - -Let: - -[latexmath] -++++ -\begin{aligned} -i0 & = address\_mode((int)floor(u - 0.5))\\ -j0 & = address\_mode((int)floor(v - 0.5))\\ -k0 & = address\_mode((int)floor(w - 0.5))\\ -i1 & = address\_mode((int)floor(u - 0.5) + 1)\\ -j1 & = address\_mode((int)floor(v - 0.5) + 1)\\ -k1 & = address\_mode((int)floor(w - 0.5) + 1)\\ -a & = frac(u - 0.5)\\ -b & = frac(v - 0.5)\\ -c & = frac(w - 0.5) -\end{aligned} -++++ - -The frac function determines the fractional part of x and is computed as: - -[latexmath] -++++ -\begin{aligned} -frac(x) & = x - floor(x) -\end{aligned} -++++ - -For a 3D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=& (1 - a) \times (1 - b) \times (1 - c) \times T_{i0j0k0}\\ - & & {} + a \times (1 - b) \times (1 - c) \times T_{i1j0k0}\\ - & & {} + (1 - a) \times b \times (1 - c) \times T_{i0j1k0}\\ - & & {} + a \times b \times (1 - c) \times T_{i1j1k0}\\ - & & {} + (1 - a) \times (1 - b) \times c \times T_{i0j0k1}\\ - & & {} + a \times (1 - b) \times c \times T_{i1j0k1}\\ - & & {} + (1 - a) \times b \times c \times T_{i0j1k1}\\ - & & {} + a \times b \times c \times T_{i1j1k1} -\end{array} -++++ - -where `T~ijk~` is the image element at location `(i,j,k)` in the 3D image. - -For a 2D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=& (1 - a) \times (1 - b) \times T_{i0j0}\\ - & & {} + a \times (1 - b) \times T_{i1j0}\\ - & & {} + (1 - a) \times b \times T_{i0j1}\\ - & & {} + a \times b \times T_{i1j1} -\end{array} -++++ - -where `T~ij~` is the image element at location `(i,j)` in the 2D image. - -If the addressing mode is `CL_ADDRESS_CLAMP` or `CL_ADDRESS_CLAMP_TO_EDGE`, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the border color is used as the image element. - -Otherwise, if the addressing mode is `CL_ADDRESS_NONE`, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the color value is undefined. - -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT`, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. - -[[repeat-addressing]] -==== Repeat Addressing Mode - -We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_REPEAT`. - -[[repeat-nearest-filtering]] -===== Nearest Filtering - -When filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. -The image element location `(i,j,k)` is computed as: - -[latexmath] -++++ -\begin{array}{l} -u = (s - floor(s)) \times w_t\\ -i = (int)floor(u)\\ -if\ (i > w_t - 1)\\ -\qquad i = i - w_t\\ -v = (t - floor(t)) \times h_t\\ -j = (int)floor(v)\\ -if\ (j > h_t - 1)\\ -\qquad j = j - h_t\\ -w = (r - floor(r)) \times d_t\\ -k = (int)floor(w)\\ -if\ (k > d_t - 1)\\ -\qquad k = k - d_t -\end{array} -++++ - -For a 3D image, the image element at location (i, j, k) becomes the color value. -For a 2D image, the image element at location (i, j) becomes the color value. - -[[repeat-linear-filtering]] -===== Linear Filtering - -When filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. -This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. - -Let - -[latexmath] -++++ -\begin{array}{l} -u = (s - floor(s)) \times w_t\\ -i0 = (int)floor(u - 0.5)\\ -i1 = i0 + 1\\ -if (i0 < 0)\\ -i0 = w_t + i0\\ -if\ (i1 > w_t - 1)\\ -\qquad i1 = i1 - w_t\\ -v = (t - floor(t)) \times h_t\\ -j0 = (int)floor(v - 0.5)\\ -j1 = j0 + 1\\ -if (j0 < 0)\\ -j0 = h_t + j0\\ -if\ (j1 > h_t - 1)\\ -\qquad j1 = j1 - h_t\\ -w = (r - floor(r)) \times d_t\\ -k0 = (int)floor(w - 0.5)\\ -k1 = k0 + 1\\ -if (k0 < 0)\\ -\qquad k0 = d_t + k0\\ -if\ (k1 > d_t - 1)\\ -\qquad k1 = k1 - d_t\\ -a = frac(u - 0.5)\\ -b = frac(v - 0.5)\\ -c = frac(w - 0.5) -\end{array} -++++ - -For a 3D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=& (1 - a) \times (1 - b) \times (1 - c) \times T_{i0j0k0}\\ - & & {} + a \times (1 - b) \times (1 - c) \times T_{i1j0k0}\\ - & & {} + (1 - a) \times b \times (1 - c) \times T_{i0j1k0}\\ - & & {} + a \times b \times (1 - c) \times T_{i1j1k0}\\ - & & {} + (1 - a) \times (1 - b) \times c \times T_{i0j0k1}\\ - & & {} + a \times (1 - b) \times c \times T_{i1j0k1}\\ - & & {} + (1 - a) \times b \times c \times T_{i0j1k1}\\ - & & {} + a \times b \times c \times T_{i1j1k1} -\end{array} -++++ - -where `T~ijk~` is the image element at location `(i,j,k)` in the 3D image. - -For a 2D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=&(1 - a) \times (1 - b) \times T_{i0j0}\\ - & & {} + a \times (1 - b) \times T_{i1j0}\\ - & & {} + (1 - a) \times b \times T_{i0j1}\\ - & & {} + a \times b \times T_{i1j1} -\end{array} -++++ - -where `T~ij~` is the image element at location `(i,j)` in the 2D image. - -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT`, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. - -[[mirrored-repeat-addressing]] -==== Mirrored Repeat Addressing Mode - -We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_MIRRORED_REPEAT`. -The `CL_ADDRESS_MIRRORED_REPEAT` addressing mode causes the image to be read as if it is tiled at every integer seam, with the interpretation of the image data flipped at each integer crossing. - -[[mirrored-repeat-nearest-filtering]] -===== Nearest Filtering - -When filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. -The image element location `(i,j,k)` is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -s' &=& 2.0f \times rint(0.5f \times s)\\ -s` &=& fabs(s - s`)\\ -u &=& s' \times w_t\\ -i &=& (int)floor(u)\\ -i &=& min(i, w_t - 1)\\ -t' &=& 2.0f \times rint(0.5f \times t)\\ -t` &=& fabs(t - t`)\\ -v &=& t' \times h_t\\ -j &=& (int)floor(v)\\ -j &=& min(j, h_t - 1)\\ -r' &=& 2.0f \times rint(0.5f \times r)\\ -r` &=& fabs(r - r`)\\ -w &=& r' \times d_t\\ -k &=& (int)floor(w)\\ -k &=& min(k, d_t - 1) -\end{array} -++++ - -For a 3D image, the image element at location (i, j, k) becomes the color value. -For a 2D image, the image element at location (i, j) becomes the color value. - -[[mirrored-repeat-linear-filtering]] -===== Linear Filtering - -When filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. -This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. - -Let - -[latexmath] -++++ -\begin{array}{rcl} -s' &=& 2.0f \times rint(0.5f \times s)\\ -s` &=& fabs(s - s`)\\ -u &=& s' \times w_t\\ -i0 &=& (int)floor(u - 0.5f)\\ -i1 &=& i0 + 1\\ -i0 &=& max(i0, 0)\\ -i1 &=& min(i1, w_t - 1)\\ -t' &=& 2.0f \times rint(0.5f \times t)\\ -t` &=& fabs(t - t`)\\ -v &=& t' \times h_t\\ -j0 &=& (int)floor(v - 0.5f)\\ -j1 &=& j0 + 1\\ -j0 &=& max(j0, 0)\\ -j1 &=& min(j1, h_t - 1)\\ -r' &=& 2.0f \times rint(0.5f \times r)\\ -r` &=& fabs(r - r`)\\ -w &=& r' \times d_t\\ -k0 &=& (int)floor(w - 0.5f)\\ -k1 &=& k0 + 1\\ -k0 &=& max(k0, 0)\\ -k1 &=& min(k1, d_t - 1)\\ -a &=& frac(u - 0.5)\\ -b &=& frac(v - 0.5)\\ -c &=& frac(w - 0.5) -\end{array} -++++ - -For a 3D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=& (1 - a) \times (1 - b) \times (1 - c) \times T_{i0j0k0}\\ - & & {} + a \times (1 - b) \times (1 - c) \times T_{i1j0k0}\\ - & & {} + (1 - a) \times b \times (1 - c) \times T_{i0j1k0}\\ - & & {} + a \times b \times (1 - c) \times T_{i1j1k0}\\ - & & {} + (1 - a) \times (1 - b) \times c \times T_{i0j0k1}\\ - & & {} + a \times (1 - b) \times c \times T_{i1j0k1}\\ - & & {} + (1 - a) \times b \times c \times T_{i0j1k1}\\ - & & {} + a \times b \times c \times T_{i1j1k1} -\end{array} -++++ - -where `T~ijk~` is the image element at location `(i,j,k)` in the 3D image. - -For a 2D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=& (1 - a) \times (1 - b) \times T_{i0j0}\\ - & & {} + a \times (1 - b) \times T_{i1j0}\\ - & & {} + (1 - a) \times b \times T_{i0j1}\\ - & & {} + a \times b \times T_{i1j1} -\end{array} -++++ - -where `T~ij~` is the image element at location `(i,j)` in the 2D image. - -For a 1D image, the color value is computed as: - -[latexmath] -++++ -\begin{array}{rcl} -T &=& (1 - a) \times T_i0 + a \times T_i1 -\end{array} -++++ - -where `T~i~` is the image element at location `(i)` in the 1D image. - -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT` and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. - -[[precision-of-addressing-and-filter-modes]] -=== Precision of Addressing and Filter Modes - -If the sampler is specified as using unnormalized coordinates (floating-point or integer coordinates), filter mode set to `CL_FILTER_NEAREST` and addressing mode set to one of the following modes - `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE` or `CL_ADDRESS_NONE` - the location of the image element in the image given by `(i,j,k)` will be computed without any loss of precision. - -For all other sampler combinations of normalized or unnormalized coordinates, filter modes, and addressing modes, the relative error or precision of the addressing mode calculations and the image filter operation are not defined. -To ensure precision of image addressing and filter calculations across any OpenCL device for these sampler combinations, developers may unnormalize the image coordinate in the kernel, and then implement the linear filter in the kernel with appropriate read image instructions with a sampler that uses unnormalized coordinates, filter mode set to `CL_FILTER_NEAREST`, addressing mode set to `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE` or `CL_ADDRESS_NONE`, and finally performing the interpolation of color values read from the image to generate the filtered color value. - -[[conversion-rules]] -=== Conversion Rules - -In this section we discuss conversion rules that are applied when reading and writing images in a kernel. - -[[conversion-rules-for-normalized-integer-channel-data-types]] -==== Conversion Rules for Normalized Integer Channel Data Types - -In this section we discuss converting normalized integer channel data types to half-precision and single-precision floating-point values and vice-versa. - -[[converting-normalized-integer-channel-data-types-to-half-precision-floating-point-values]] -===== Converting Normalized Integer Channel Data Types to Half Precision Floating-point Values - -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized half precision floating-point values in the range [0.0h ... 1.0h]. - -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized half precision floating-point values in the range [-1.0h ... 1.0h]. - -These conversions are performed as follows: - -* `CL_UNORM_INT8` (8-bit unsigned integer) -> `half` -+ -[latexmath] -++++ -normalized\_half\_value(x)=round\_to\_half(\frac{x}{255}) -++++ - -* `CL_UNORM_INT_101010` (10-bit unsigned integer) -> `half` -+ -[latexmath] -++++ -normalized\_half\_value(x)=round\_to\_half(\frac{x}{1023}) -++++ - -* `CL_UNORM_INT16` (16-bit unsigned integer) -> `half` -+ -[latexmath] -++++ -normalized\_half\_value(x)=round\_to\_half(\frac{x}{65535}) -++++ - -* `CL_SNORM_INT8` (8-bit signed integer) -> `half` -+ -[latexmath] -++++ -normalized\_half\_value(x)=max(-1.0h, round\_to\_half(\frac{x}{127})) -++++ - -* `CL_SNORM_INT16` (16-bit signed integer) -> `half` -+ -[latexmath] -++++ -normalized\_half\_value(x)=max(-1.0h, round\_to\_half(\frac{x}{32767})) -++++ - -The precision of the above conversions is \<= 1.5 ulp except for the following cases: - -For `CL_UNORM_INT8`: - - * 0 must convert to 0.0h, and - * 255 must convert to 1.0h - -For `CL_UNORM_INT_101010`: - - * 0 must convert to 0.0h, and - * 1023 must convert to 1.0h - -For `CL_UNORM_INT16`: - - * 0 must convert to 0.0h, and - * 65535 must convert to 1.0h - -For `CL_SNORM_INT8`: - - * -128 and -127 must convert to -1.0h, - * 0 must convert to 0.0h, and - * 127 must convert to 1.0h - -For `CL_SNORM_INT16`: - - * -32768 and -32767 must convert to -1.0h, - * 0 must convert to 0.0h, and - * 32767 must convert to 1.0h - -[[converting-half-precision-floating-point-values-to-normalized-integer-channel-data-types]] -===== Converting Half Precision Floating-point Values to Normalized Integer Channel Data Types - -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit unsigned integer. - -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit signed integer. - -OpenCL implementations may choose to approximate the rounding mode used in the conversions described below. -When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. - -The conversions from half precision floating-point values to normalized integer values are performed is as follows: - - * `half` -> `CL_UNORM_INT8` (8-bit unsigned integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(0,min(255,255 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint8(f(x)) & x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint8(f(x)) & x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `half` -> `CL_UNORM_INT16` (16-bit unsigned integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(0,min(65535,65535 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `half` -> `CL_SNORM_INT8` (8-bit signed integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(-128,min(127,127 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_int8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_int8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `half` -> `CL_SNORM_INT16` (16-bit signed integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(-32768,min(32767,32767 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_int16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_int16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - -[[converting-normalized-integer-channel-data-types-to-floating-point-values]] -===== Converting Normalized Integer Channel Data Types to Floating-point Values - -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized floating-point values in the range [0.0f ... 1.0f]. - -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized floating-point values in the range [-1.0f ... 1.0f]. - -These conversions are performed as follows: - - * `CL_UNORM_INT8` (8-bit unsigned integer) -> `float` -+ -[latexmath] -++++ -normalized\_float\_value(x)=round\_to\_float(\frac{x}{255}) -++++ - - * `CL_UNORM_INT_101010` (10-bit unsigned integer) -> `float` -+ -[latexmath] -++++ -normalized\_float\_value(x)=round\_to\_float(\frac{x}{1023}) -++++ - - * `CL_UNORM_INT16` (16-bit unsigned integer) -> `float` -+ -[latexmath] -++++ -normalized\_float\_value(x)=round\_to\_float(\frac{x}{65535}) -++++ - - * `CL_SNORM_INT8` (8-bit signed integer) -> `float` -+ -[latexmath] -++++ -normalized\_float\_value(x)=max(-1.0f, round\_to\_float(\frac{x}{127})) -++++ - - * `CL_SNORM_INT16` (16-bit signed integer) -> `float` -+ -[latexmath] -++++ -normalized\_float\_value(x)=max(-1.0f, round\_to\_float(\frac{x}{32767})) -++++ - -The precision of the above conversions is \<= 1.5 ulp except for the following cases. - -For `CL_UNORM_INT8`: - - * 0 must convert to 0.0f, and - * 255 must convert to 1.0f - -For `CL_UNORM_INT_101010`: - - * 0 must convert to 0.0f, and - * 1023 must convert to 1.0f - -For `CL_UNORM_INT16`: - - * 0 must convert to 0.0f, and - * 65535 must convert to 1.0f - -For `CL_SNORM_INT8`: - - * -128 and -127 must convert to -1.0f, - * 0 must convert to 0.0f, and - * 127 must convert to 1.0f - -For `CL_SNORM_INT16`: - - * -32768 and -32767 must convert to -1.0f, - * 0 must convert to 0.0f, and - * 32767 must convert to 1.0f - -[[converting-floating-point-values-to-normalized-integer-channel-data-types]] -===== Converting Floating-point Values to Normalized Integer Channel Data Types - -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image write instructions will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. - -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image write instructions will convert the floating-point color value to an 8-bit or 16-bit signed integer. - -OpenCL implementations may choose to approximate the rounding mode used in the conversions described below. -When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. - -The conversions from half precision floating-point values to normalized integer values are performed is as follows: - - * `float` -> `CL_UNORM_INT8` (8-bit unsigned integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(0,min(255,255 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `float` -> `CL_UNORM_INT_101010` (10-bit unsigned integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(0,min(1023,1023 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint10(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint10(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `float` -> `CL_UNORM_INT16` (16-bit unsigned integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(0,min(65535,65535 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `float` -> `CL_SNORM_INT8` (8-bit signed integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(-128,min(127,127 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - - * `float` -> `CL_SNORM_INT16` (16-bit signed integer) -+ -[latexmath] -++++ -\begin{aligned} -& f(x)=max(-32768,min(32767,32767 \times x))\\ -\\ -& f_{preferred}(x) = -\begin{cases} - round\_to\_nearest\_even\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -& f_{approx}(x) = -\begin{cases} - round\_to\_impl\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ - \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN -\end{cases}\\ -\\ -& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN -\end{aligned} -++++ - -[[conversion-rules-for-half-precision-floating-point-channel-data-type]] -==== Conversion Rules for Half Precision Floating-point Channel Data Type - -For images created with a channel data type of `CL_HALF_FLOAT`, the conversions of half to float and half to half are lossless. -Conversions from float to half round the mantissa using the round to nearest even or round to zero rounding mode. -Denormalized numbers for the half data type which may be generated when converting a float to a half may be flushed to zero. -A float NaN must be converted to an appropriate NaN in the half type. -A float INF must be converted to an appropriate INF in the half type. - -[[conversion-rules-for-floating-point-channel-data-type]] -==== Conversion Rules for Floating-point Channel Data Type - -The following rules apply for reading and writing images created with channel data type of `CL_FLOAT`. - -* NaNs may be converted to a NaN value(s) supported by the device. -* Denorms can be flushed to zero. -* All other values must be preserved. - -[[conversion-rules-for-signed-and-unsigned-8-bit-16-bit-and-32-bit-integer-channel-data-types]] -==== Conversion Rules for Signed and Unsigned 8-bit, 16-bit and 32-bit Integer Channel Data Types - -For images created with image channel data type of `CL_SIGNED_INT8`, `CL_SIGNED_INT16` and `CL_SIGNED_INT32`, image read instructions will return the unmodified integer values stored in the image at specified location. - -Likewise, for images created with image channel data type of `CL_UNSIGNED_INT8`, `CL_UNSIGNED_INT16` and `CL_UNSIGNED_INT32`, image read instructions will return the unmodified unsigned integer values stored in the image at specified location. - -Image write instructions will perform one of the following conversions: - -* 32 bit signed integer -> `CL_SIGNED_INT8` (8-bit signed integer): -+ -[latexmath] -++++ -int8\_value(x) = clamp(x, -128, 127) -++++ - -* 32 bit signed integer -> `CL_SIGNED_INT16` (16-bit signed integer): -+ -[latexmath] -++++ -int16\_value(x) = clamp(x, -32768, 32767) -++++ - -* 32 bit signed integer -> `CL_SIGNED_INT32` (32-bit signed integer): -+ -[latexmath] -++++ -int32\_value(x) = x \quad \text{(no conversion)} -++++ - -* 32 bit unsigned integer -> `CL_UNSIGNED_INT8` (8-bit unsigned integer): -+ -[latexmath] -++++ -uint8\_value(x) = clamp(x, 0, 255) -++++ - -* 32 bit unsigned integer -> `CL_UNSIGNED_INT16` (16-bit unsigned integer): -+ -[latexmath] -++++ -uint16\_value(x) = clamp(x, 0, 65535) -++++ - -* 32 bit unsigned integer -> `CL_UNSIGNED_INT32` (32-bit unsigned integer): -+ -[latexmath] -++++ -uint32\_value(x) = x \quad \text{(no conversion)} -++++ - -The conversions described in this section must be correctly saturated. - -[[conversion-rules-for-srgba-and-sbgra-images]] -==== Conversion Rules for sRGBA and sBGRA Images - -Standard RGB data, which roughly displays colors in a linear ramp of luminosity levels such that an average observer, under average viewing conditions, can view them as perceptually equal steps on an average display. -All 0s maps to 0.0f, and all 1s maps to 1.0f. -The sequence of unsigned integer encodings between all 0s and all 1s represent a nonlinear progression in the floating-point interpretation of the numbers between 0.0f to 1.0f. -For more detail, see the <>. - -Conversion from sRGB space is automatically done the image read instruction if the image channel order is one of the sRGB values described above. -When reading from an sRGB image, the conversion from sRGB to linear RGB is performed before filtering is applied. -If the format has an alpha channel, the alpha data is stored in linear color space. -Conversion to sRGB space is automatically done by the image write instruction if the image channel order is one of the sRGB values described above and the device supports writing to sRGB images. - -If the format has an alpha channel, the alpha data is stored in linear color space. - -1. The following process is used by image read instructions to convert a normalized 8-bit unsigned integer sRGB color value x to a floating-point linear RGB color value y: -a. Convert a normalized 8-bit unsigned integer sRGB value x to a floating-point sRGB value r as per rules described in <> section. -+ -[latexmath] -++++ -r=normalized\_float\_value(x) -++++ - -b. Convert a floating-point sRGB value r to a floating-point linear RGB color value y: -+ -[latexmath] -++++ -\begin{aligned} -& c_{linear}(x) = -\begin{cases} - \frac{r}{12.92} & \quad r \geq 0 \text{ and } r \leq 0.04045\\ - (\frac{r + 0.055}{1.055})^{2.4} & \quad r > 0.04045 \text{ and } \leq 1 -\end{cases}\\ -\\ -& y = c_{linear}(r) -\end{aligned} -++++ - -2. The following process is used by image write instructions to convert a linear RGB floating-point color value y to a normalized 8-bit unsigned integer sRGB value x: -a. Convert a floating-point linear RGB value y to a normalized floating-point sRGB value r: -+ -[latexmath] -++++ -\begin{aligned} -& c_{linear}(x) = -\begin{cases} - 0 & \quad y \geq NaN \text{ or } y < 0\\ - 12.92 \times y & \quad y \geq 0 \text{ and } y < 0.0031308\\ - 1.055 \times y^{(\frac{1}{2.4})} & \quad y \geq 0.0031308 \text{ and } y \leq 1\\ - 1 & \quad y > 1 -\end{cases}\\ -\\ -& r = c_{sRGB}(y) -\end{aligned} -++++ - -b. Convert a normalized floating-point sRGB value r to a normalized 8-bit unsigned integer sRGB value x as per rules described in <> section. -+ -[latexmath] -++++ -\begin{aligned} -& g(r) = -\begin{cases} - f_{preferred}(r) & \quad \text{if rounding mode is round to even}\\ - f_{approx}(r) & \quad \text{if implementation-defined rounding mode} -\end{cases}\\ -\\ -& x = g(r) -\end{aligned} -++++ - -The accuracy required when converting a normalized 8-bit unsigned integer sRGB color value x to a floating-point linear RGB color value y is given by: -[latexmath] -++++ -|x-255 \times c_{sRGB}(y)|\leq 0.5 -++++ - -The accuracy required when converting a linear RGB floating-point color value y to a normalized 8-bit unsigned integer sRGB value x is given by: -[latexmath] -++++ -|x-255 \times c_{sRGB}(y)|\leq 0.6 -++++ - -[[selecting-an-image-from-an-image-array]] -=== Selecting an Image from an Image Array - -Let `(u,v,w)` represent the unnormalized image coordinate values for reading from and/or writing to a 2D image in a 2D image array. - -When read using a sampler, the 2D image layer selected is computed as: - -[latexmath] -++++ -layer = clamp(rint(w), 0, d_t - 1) -++++ - -otherwise the layer selected is computed as: - -[latexmath] -++++ -layer = w -++++ - -(since w is already an integer) and the result is undefined if w is not one of the integers 0, 1, ... `d~t~` - 1. - -Let `(u,v)` represent the unnormalized image coordinate values for reading from and/or writing to a 1D image in a 1D image array. - -When read using a sampler, the 1D image layer selected is computed as: - -[latexmath] -++++ -layer = clamp(rint(v), 0, h_t - 1) -++++ - -otherwise the layer selected is computed as: - -[latexmath] -++++ -layer = v -++++ - -(since v is already an integer) and the result is undefined if v is not one of the integers 0, 1, ... `h~t~` - 1. - -=== Data Format for Reading and Writing Images - -This section describes how image element data is returned by an -image read instruction or passed as the _Texel_ data that is -written by an image write instruction: - -For the following image channel orders, the data is a four -component vector type: - -._Mapping Image Data to Vector Components_ -[cols=",",options="header",] -|==== -|*Image Channel Order* -|*Components* - -|`R`, `Rx` -|(R, 0, 0, 1) - -|`A` -|(0, 0, 0, A) - -|`RG`, `RGx` -|(R, G, 0, 1) - -|`RGB`, `RGBx`, `sRGB`, `sRGBx` -|(R, G, B, 1) - -|`RGBA`, `BGRA`, `ARGB`, `ABGR`, `sRGBA`, `sBGRA` -|(R, G, B, A) - -|`Intensity` -|(I, I, I, I) - -|`Luminance` -|(L, L, L, 1) - -|==== - -For the following image channel orders, the data is a scalar type: - -._Scalar Image Data_ -[cols=",",options="header",] -|==== -|*Image Channel Order* -|*Scalar Value* - -|`Depth` -|D - -|`DepthStencil` -|D - -|==== - -The following table describes the mapping from image channel data type -to the data vector component type or scalar type: - -._Image Data Types_ -[cols=",",options="header",] -|==== -|*Image Channel Order* -|*Data Type* - -|`SnormInt8`, -`SnormInt16`, + -`UnormInt8`, -`UnormInt16`, + -`UnormShort565`, -`UnormShort555`, + -`UnormInt101010`, -`UnormInt101010_2`, + -`UnormInt24`, + -`HalfFloat`, + -`Float` -|*OpTypeFloat*, with _Width_ equal to 16 or 32. - -|`SignedInt8`, -`SignedInt16`, -`SignedInt32`, + -`UnsignedInt8`, -`UnsignedInt16`, -`UnsignedInt32` -|*OpTypeInt*, with _Width_ equal to 32. - -|==== - - -=== Sampled and Sampler-less Reads - -SPIR-V instructions that read from an image without a sampler (such as *OpImageRead*) behave exactly the same as the corresponding image read instruction with a sampler that has _Sampler Filter Mode_ set to *Nearest*, *Non-Normalized* coordinates, and _Sampler Addressing Mode_ set to *None*. - -There is one exception for cases where the image being read has _Image Format_ equal to a floating-point type (such as *R32f*). -In this exceptional case, when channel data values are denormalized, the non-sampler image read instruction may return the denormalized data, while the sampler image read instruction may flush denormalized channel data values to zero. -The coordinates must be between 0 and image size in that dimension, non inclusive. +// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + +[[image_addressing_and_filtering]] +== Image Addressing and Filtering + +This section describes how image operations behave in an OpenCL environment. + +[[image-coordinates]] +=== Image Coordinates + +Let `w~t~`, `h~t~` and `d~t~` be the width, height (or image array size for a 1D image array) and depth (or image array size for a 2D image array) of the image in pixels. +Let `coord.xy` (also referred to as `(s,t)`) or `coord.xyz` (also referred to as `(s,t,r)`) be the coordinates specified to an image read instruction (such as *OpImageRead*) or an image write instruction (such as *OpImageWrite*). + +If image coordinates specified to an image read instruction are normalized (as specified in the sampler), the `s`, `t`, and `r` coordinate values are multiplied by `w~t~`, `h~t~` and `d~t~` respectively to generate the unnormalized coordinate values. +For image arrays, the image array coordinate (i.e. `t` if it is a 1D image array or `r` if it is a 2D image array) specified to the image read instruction must always be the unnormalized image coordinate value. + +Image coordinates specified to an image write instruction are always unnormalized image coordinate values. + +Let `(u,v,w)` represent the unnormalized image coordinate values. + +If values in `(s,t,r)` or `(u,v,w)` are INF or NaN, the behavior of the image read instruction or image write instruction is undefined. + +[[addressing-and-filter-modes]] +=== Addressing and Filter Modes + +After generating the image coordinate `(u,v,w)` we apply the appropriate addressing and filter mode to generate the appropriate sample locations to read from the image. + +[[clamp-addressing]] +==== Clamp and None Addressing Modes + +We first describe how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE`, or `CL_ADDRESS_NONE`. + +[[clamp-nearest_filtering]] +===== Nearest Filtering + +When the filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. +The image element location `(i,j,k)` is computed as: + +[latexmath] +++++ +\begin{aligned} +i & = address\_mode((int)floor(u))\\ +j & = address\_mode((int)floor(v))\\ +k & = address\_mode((int)floor(w)) +\end{aligned} +++++ + +For a 3D image, the image element at location `(i,j,k)` becomes the color value. +For a 2D image, the image element at location `(i,j)` becomes the color value. + +The below table describes the `address_mode` function. + +[[addressing_modes_to_generate_texel_location]] +.Addressing Modes to Generate Texel Location +[width="100%",cols="50%,50%",options="header"] +|==== +a|*Addressing Mode* +a|*Result of _address_mode(coord)_* + +a|`CL_ADDRESS_CLAMP` +a|_clamp (coord, -1, size)_ + +a|`CL_ADDRESS_CLAMP_TO_EDGE` +a|_clamp (coord, 0, size - 1)_ + +a|`CL_ADDRESS_NONE` +a|_coord_ +|==== + +The size term in the table above is `w~t~` for u, `h~t~` for v and `d~t~` for w. + +The clamp function used in the table above is defined as: + +[latexmath] +++++ +\begin{aligned} +clamp(a, b, c) & = return (a < b) ? b : ((a > c) ? c : a) +\end{aligned} +++++ + +If the addressing mode is `CL_ADDRESS_CLAMP` or `CL_ADDRESS_CLAMP_TO_EDGE`, and the selected texel location `(i,j,k)` refers to a location outside the image, the border color is used as the color value for the texel. + +Otherwise, if the addressing mode is `CL_ADDRESS_NONE` and the selected texel location `(i,j,k)` refers to a location outside the image, the color value for the texel is undefined. + +[[clamp-linear-filtering]] +===== Linear Filtering + +When the filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements (for a 2D image) or a 2 x 2 x 2 cube of image elements (for a 3D image is selected). +This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. + +Let: + +[latexmath] +++++ +\begin{aligned} +i0 & = address\_mode((int)floor(u - 0.5))\\ +j0 & = address\_mode((int)floor(v - 0.5))\\ +k0 & = address\_mode((int)floor(w - 0.5))\\ +i1 & = address\_mode((int)floor(u - 0.5) + 1)\\ +j1 & = address\_mode((int)floor(v - 0.5) + 1)\\ +k1 & = address\_mode((int)floor(w - 0.5) + 1)\\ +a & = frac(u - 0.5)\\ +b & = frac(v - 0.5)\\ +c & = frac(w - 0.5) +\end{aligned} +++++ + +The frac function determines the fractional part of x and is computed as: + +[latexmath] +++++ +\begin{aligned} +frac(x) & = x - floor(x) +\end{aligned} +++++ + +For a 3D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=& (1 - a) \times (1 - b) \times (1 - c) \times T_{i0j0k0}\\ + & & {} + a \times (1 - b) \times (1 - c) \times T_{i1j0k0}\\ + & & {} + (1 - a) \times b \times (1 - c) \times T_{i0j1k0}\\ + & & {} + a \times b \times (1 - c) \times T_{i1j1k0}\\ + & & {} + (1 - a) \times (1 - b) \times c \times T_{i0j0k1}\\ + & & {} + a \times (1 - b) \times c \times T_{i1j0k1}\\ + & & {} + (1 - a) \times b \times c \times T_{i0j1k1}\\ + & & {} + a \times b \times c \times T_{i1j1k1} +\end{array} +++++ + +where `T~ijk~` is the image element at location `(i,j,k)` in the 3D image. + +For a 2D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=& (1 - a) \times (1 - b) \times T_{i0j0}\\ + & & {} + a \times (1 - b) \times T_{i1j0}\\ + & & {} + (1 - a) \times b \times T_{i0j1}\\ + & & {} + a \times b \times T_{i1j1} +\end{array} +++++ + +where `T~ij~` is the image element at location `(i,j)` in the 2D image. + +If the addressing mode is `CL_ADDRESS_CLAMP` or `CL_ADDRESS_CLAMP_TO_EDGE`, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the border color is used as the image element. + +Otherwise, if the addressing mode is `CL_ADDRESS_NONE`, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the color value is undefined. + +If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT`, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. + +[[repeat-addressing]] +==== Repeat Addressing Mode + +We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_REPEAT`. + +[[repeat-nearest-filtering]] +===== Nearest Filtering + +When filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. +The image element location `(i,j,k)` is computed as: + +[latexmath] +++++ +\begin{array}{l} +u = (s - floor(s)) \times w_t\\ +i = (int)floor(u)\\ +if\ (i > w_t - 1)\\ +\qquad i = i - w_t\\ +v = (t - floor(t)) \times h_t\\ +j = (int)floor(v)\\ +if\ (j > h_t - 1)\\ +\qquad j = j - h_t\\ +w = (r - floor(r)) \times d_t\\ +k = (int)floor(w)\\ +if\ (k > d_t - 1)\\ +\qquad k = k - d_t +\end{array} +++++ + +For a 3D image, the image element at location (i, j, k) becomes the color value. +For a 2D image, the image element at location (i, j) becomes the color value. + +[[repeat-linear-filtering]] +===== Linear Filtering + +When filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. +This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. + +Let + +[latexmath] +++++ +\begin{array}{l} +u = (s - floor(s)) \times w_t\\ +i0 = (int)floor(u - 0.5)\\ +i1 = i0 + 1\\ +if (i0 < 0)\\ +i0 = w_t + i0\\ +if\ (i1 > w_t - 1)\\ +\qquad i1 = i1 - w_t\\ +v = (t - floor(t)) \times h_t\\ +j0 = (int)floor(v - 0.5)\\ +j1 = j0 + 1\\ +if (j0 < 0)\\ +j0 = h_t + j0\\ +if\ (j1 > h_t - 1)\\ +\qquad j1 = j1 - h_t\\ +w = (r - floor(r)) \times d_t\\ +k0 = (int)floor(w - 0.5)\\ +k1 = k0 + 1\\ +if (k0 < 0)\\ +\qquad k0 = d_t + k0\\ +if\ (k1 > d_t - 1)\\ +\qquad k1 = k1 - d_t\\ +a = frac(u - 0.5)\\ +b = frac(v - 0.5)\\ +c = frac(w - 0.5) +\end{array} +++++ + +For a 3D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=& (1 - a) \times (1 - b) \times (1 - c) \times T_{i0j0k0}\\ + & & {} + a \times (1 - b) \times (1 - c) \times T_{i1j0k0}\\ + & & {} + (1 - a) \times b \times (1 - c) \times T_{i0j1k0}\\ + & & {} + a \times b \times (1 - c) \times T_{i1j1k0}\\ + & & {} + (1 - a) \times (1 - b) \times c \times T_{i0j0k1}\\ + & & {} + a \times (1 - b) \times c \times T_{i1j0k1}\\ + & & {} + (1 - a) \times b \times c \times T_{i0j1k1}\\ + & & {} + a \times b \times c \times T_{i1j1k1} +\end{array} +++++ + +where `T~ijk~` is the image element at location `(i,j,k)` in the 3D image. + +For a 2D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=&(1 - a) \times (1 - b) \times T_{i0j0}\\ + & & {} + a \times (1 - b) \times T_{i1j0}\\ + & & {} + (1 - a) \times b \times T_{i0j1}\\ + & & {} + a \times b \times T_{i1j1} +\end{array} +++++ + +where `T~ij~` is the image element at location `(i,j)` in the 2D image. + +If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT`, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. + +[[mirrored-repeat-addressing]] +==== Mirrored Repeat Addressing Mode + +We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_MIRRORED_REPEAT`. +The `CL_ADDRESS_MIRRORED_REPEAT` addressing mode causes the image to be read as if it is tiled at every integer seam, with the interpretation of the image data flipped at each integer crossing. + +[[mirrored-repeat-nearest-filtering]] +===== Nearest Filtering + +When filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. +The image element location `(i,j,k)` is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +s' &=& 2.0f \times rint(0.5f \times s)\\ +s` &=& fabs(s - s`)\\ +u &=& s' \times w_t\\ +i &=& (int)floor(u)\\ +i &=& min(i, w_t - 1)\\ +t' &=& 2.0f \times rint(0.5f \times t)\\ +t` &=& fabs(t - t`)\\ +v &=& t' \times h_t\\ +j &=& (int)floor(v)\\ +j &=& min(j, h_t - 1)\\ +r' &=& 2.0f \times rint(0.5f \times r)\\ +r` &=& fabs(r - r`)\\ +w &=& r' \times d_t\\ +k &=& (int)floor(w)\\ +k &=& min(k, d_t - 1) +\end{array} +++++ + +For a 3D image, the image element at location (i, j, k) becomes the color value. +For a 2D image, the image element at location (i, j) becomes the color value. + +[[mirrored-repeat-linear-filtering]] +===== Linear Filtering + +When filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. +This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. + +Let + +[latexmath] +++++ +\begin{array}{rcl} +s' &=& 2.0f \times rint(0.5f \times s)\\ +s` &=& fabs(s - s`)\\ +u &=& s' \times w_t\\ +i0 &=& (int)floor(u - 0.5f)\\ +i1 &=& i0 + 1\\ +i0 &=& max(i0, 0)\\ +i1 &=& min(i1, w_t - 1)\\ +t' &=& 2.0f \times rint(0.5f \times t)\\ +t` &=& fabs(t - t`)\\ +v &=& t' \times h_t\\ +j0 &=& (int)floor(v - 0.5f)\\ +j1 &=& j0 + 1\\ +j0 &=& max(j0, 0)\\ +j1 &=& min(j1, h_t - 1)\\ +r' &=& 2.0f \times rint(0.5f \times r)\\ +r` &=& fabs(r - r`)\\ +w &=& r' \times d_t\\ +k0 &=& (int)floor(w - 0.5f)\\ +k1 &=& k0 + 1\\ +k0 &=& max(k0, 0)\\ +k1 &=& min(k1, d_t - 1)\\ +a &=& frac(u - 0.5)\\ +b &=& frac(v - 0.5)\\ +c &=& frac(w - 0.5) +\end{array} +++++ + +For a 3D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=& (1 - a) \times (1 - b) \times (1 - c) \times T_{i0j0k0}\\ + & & {} + a \times (1 - b) \times (1 - c) \times T_{i1j0k0}\\ + & & {} + (1 - a) \times b \times (1 - c) \times T_{i0j1k0}\\ + & & {} + a \times b \times (1 - c) \times T_{i1j1k0}\\ + & & {} + (1 - a) \times (1 - b) \times c \times T_{i0j0k1}\\ + & & {} + a \times (1 - b) \times c \times T_{i1j0k1}\\ + & & {} + (1 - a) \times b \times c \times T_{i0j1k1}\\ + & & {} + a \times b \times c \times T_{i1j1k1} +\end{array} +++++ + +where `T~ijk~` is the image element at location `(i,j,k)` in the 3D image. + +For a 2D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=& (1 - a) \times (1 - b) \times T_{i0j0}\\ + & & {} + a \times (1 - b) \times T_{i1j0}\\ + & & {} + (1 - a) \times b \times T_{i0j1}\\ + & & {} + a \times b \times T_{i1j1} +\end{array} +++++ + +where `T~ij~` is the image element at location `(i,j)` in the 2D image. + +For a 1D image, the color value is computed as: + +[latexmath] +++++ +\begin{array}{rcl} +T &=& (1 - a) \times T_i0 + a \times T_i1 +\end{array} +++++ + +where `T~i~` is the image element at location `(i)` in the 1D image. + +If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT` and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. + +[[precision-of-addressing-and-filter-modes]] +=== Precision of Addressing and Filter Modes + +If the sampler is specified as using unnormalized coordinates (floating-point or integer coordinates), filter mode set to `CL_FILTER_NEAREST` and addressing mode set to one of the following modes - `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE` or `CL_ADDRESS_NONE` - the location of the image element in the image given by `(i,j,k)` will be computed without any loss of precision. + +For all other sampler combinations of normalized or unnormalized coordinates, filter modes, and addressing modes, the relative error or precision of the addressing mode calculations and the image filter operation are not defined. +To ensure precision of image addressing and filter calculations across any OpenCL device for these sampler combinations, developers may unnormalize the image coordinate in the kernel, and then implement the linear filter in the kernel with appropriate read image instructions with a sampler that uses unnormalized coordinates, filter mode set to `CL_FILTER_NEAREST`, addressing mode set to `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE` or `CL_ADDRESS_NONE`, and finally performing the interpolation of color values read from the image to generate the filtered color value. + +[[conversion-rules]] +=== Conversion Rules + +In this section we discuss conversion rules that are applied when reading and writing images in a kernel. + +[[conversion-rules-for-normalized-integer-channel-data-types]] +==== Conversion Rules for Normalized Integer Channel Data Types + +In this section we discuss converting normalized integer channel data types to half-precision and single-precision floating-point values and vice-versa. + +[[converting-normalized-integer-channel-data-types-to-half-precision-floating-point-values]] +===== Converting Normalized Integer Channel Data Types to Half Precision Floating-point Values + +For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized half precision floating-point values in the range [0.0h ... 1.0h]. + +For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized half precision floating-point values in the range [-1.0h ... 1.0h]. + +These conversions are performed as follows: + +* `CL_UNORM_INT8` (8-bit unsigned integer) -> `half` ++ +[latexmath] +++++ +normalized\_half\_value(x)=round\_to\_half(\frac{x}{255}) +++++ + +* `CL_UNORM_INT_101010` (10-bit unsigned integer) -> `half` ++ +[latexmath] +++++ +normalized\_half\_value(x)=round\_to\_half(\frac{x}{1023}) +++++ + +* `CL_UNORM_INT16` (16-bit unsigned integer) -> `half` ++ +[latexmath] +++++ +normalized\_half\_value(x)=round\_to\_half(\frac{x}{65535}) +++++ + +* `CL_SNORM_INT8` (8-bit signed integer) -> `half` ++ +[latexmath] +++++ +normalized\_half\_value(x)=max(-1.0h, round\_to\_half(\frac{x}{127})) +++++ + +* `CL_SNORM_INT16` (16-bit signed integer) -> `half` ++ +[latexmath] +++++ +normalized\_half\_value(x)=max(-1.0h, round\_to\_half(\frac{x}{32767})) +++++ + +The precision of the above conversions is \<= 1.5 ulp except for the following cases: + +For `CL_UNORM_INT8`: + + * 0 must convert to 0.0h, and + * 255 must convert to 1.0h + +For `CL_UNORM_INT_101010`: + + * 0 must convert to 0.0h, and + * 1023 must convert to 1.0h + +For `CL_UNORM_INT16`: + + * 0 must convert to 0.0h, and + * 65535 must convert to 1.0h + +For `CL_SNORM_INT8`: + + * -128 and -127 must convert to -1.0h, + * 0 must convert to 0.0h, and + * 127 must convert to 1.0h + +For `CL_SNORM_INT16`: + + * -32768 and -32767 must convert to -1.0h, + * 0 must convert to 0.0h, and + * 32767 must convert to 1.0h + +[[converting-half-precision-floating-point-values-to-normalized-integer-channel-data-types]] +===== Converting Half Precision Floating-point Values to Normalized Integer Channel Data Types + +For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit unsigned integer. + +For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit signed integer. + +OpenCL implementations may choose to approximate the rounding mode used in the conversions described below. +When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. + +The conversions from half precision floating-point values to normalized integer values are performed is as follows: + + * `half` -> `CL_UNORM_INT8` (8-bit unsigned integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(0,min(255,255 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint8(f(x)) & x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint8(f(x)) & x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `half` -> `CL_UNORM_INT16` (16-bit unsigned integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(0,min(65535,65535 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `half` -> `CL_SNORM_INT8` (8-bit signed integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(-128,min(127,127 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_int8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_int8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `half` -> `CL_SNORM_INT16` (16-bit signed integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(-32768,min(32767,32767 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_int16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_int16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + +[[converting-normalized-integer-channel-data-types-to-floating-point-values]] +===== Converting Normalized Integer Channel Data Types to Floating-point Values + +For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized floating-point values in the range [0.0f ... 1.0f]. + +For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized floating-point values in the range [-1.0f ... 1.0f]. + +These conversions are performed as follows: + + * `CL_UNORM_INT8` (8-bit unsigned integer) -> `float` ++ +[latexmath] +++++ +normalized\_float\_value(x)=round\_to\_float(\frac{x}{255}) +++++ + + * `CL_UNORM_INT_101010` (10-bit unsigned integer) -> `float` ++ +[latexmath] +++++ +normalized\_float\_value(x)=round\_to\_float(\frac{x}{1023}) +++++ + + * `CL_UNORM_INT16` (16-bit unsigned integer) -> `float` ++ +[latexmath] +++++ +normalized\_float\_value(x)=round\_to\_float(\frac{x}{65535}) +++++ + + * `CL_SNORM_INT8` (8-bit signed integer) -> `float` ++ +[latexmath] +++++ +normalized\_float\_value(x)=max(-1.0f, round\_to\_float(\frac{x}{127})) +++++ + + * `CL_SNORM_INT16` (16-bit signed integer) -> `float` ++ +[latexmath] +++++ +normalized\_float\_value(x)=max(-1.0f, round\_to\_float(\frac{x}{32767})) +++++ + +The precision of the above conversions is \<= 1.5 ulp except for the following cases. + +For `CL_UNORM_INT8`: + + * 0 must convert to 0.0f, and + * 255 must convert to 1.0f + +For `CL_UNORM_INT_101010`: + + * 0 must convert to 0.0f, and + * 1023 must convert to 1.0f + +For `CL_UNORM_INT16`: + + * 0 must convert to 0.0f, and + * 65535 must convert to 1.0f + +For `CL_SNORM_INT8`: + + * -128 and -127 must convert to -1.0f, + * 0 must convert to 0.0f, and + * 127 must convert to 1.0f + +For `CL_SNORM_INT16`: + + * -32768 and -32767 must convert to -1.0f, + * 0 must convert to 0.0f, and + * 32767 must convert to 1.0f + +[[converting-floating-point-values-to-normalized-integer-channel-data-types]] +===== Converting Floating-point Values to Normalized Integer Channel Data Types + +For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image write instructions will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. + +For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image write instructions will convert the floating-point color value to an 8-bit or 16-bit signed integer. + +OpenCL implementations may choose to approximate the rounding mode used in the conversions described below. +When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. + +The conversions from half precision floating-point values to normalized integer values are performed is as follows: + + * `float` -> `CL_UNORM_INT8` (8-bit unsigned integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(0,min(255,255 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `float` -> `CL_UNORM_INT_101010` (10-bit unsigned integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(0,min(1023,1023 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint10(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint10(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `float` -> `CL_UNORM_INT16` (16-bit unsigned integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(0,min(65535,65535 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `float` -> `CL_SNORM_INT8` (8-bit signed integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(-128,min(127,127 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint8(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + + * `float` -> `CL_SNORM_INT16` (16-bit signed integer) ++ +[latexmath] +++++ +\begin{aligned} +& f(x)=max(-32768,min(32767,32767 \times x))\\ +\\ +& f_{preferred}(x) = +\begin{cases} + round\_to\_nearest\_even\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +& f_{approx}(x) = +\begin{cases} + round\_to\_impl\_uint16(f(x)) & \quad x \neq \infty \text{ and } x \neq NaN\\ + \text{implementation-defined} & \quad x = \infty \text{ or } x = NaN +\end{cases}\\ +\\ +& |f(x) - f_{approx}(x)|\leq 0.6, x \neq \infty \text{ and } x \neq NaN +\end{aligned} +++++ + +[[conversion-rules-for-half-precision-floating-point-channel-data-type]] +==== Conversion Rules for Half Precision Floating-point Channel Data Type + +For images created with a channel data type of `CL_HALF_FLOAT`, the conversions of half to float and half to half are lossless. +Conversions from float to half round the mantissa using the round to nearest even or round to zero rounding mode. +Denormalized numbers for the half data type which may be generated when converting a float to a half may be flushed to zero. +A float NaN must be converted to an appropriate NaN in the half type. +A float INF must be converted to an appropriate INF in the half type. + +[[conversion-rules-for-floating-point-channel-data-type]] +==== Conversion Rules for Floating-point Channel Data Type + +The following rules apply for reading and writing images created with channel data type of `CL_FLOAT`. + +* NaNs may be converted to a NaN value(s) supported by the device. +* Denorms can be flushed to zero. +* All other values must be preserved. + +[[conversion-rules-for-signed-and-unsigned-8-bit-16-bit-and-32-bit-integer-channel-data-types]] +==== Conversion Rules for Signed and Unsigned 8-bit, 16-bit and 32-bit Integer Channel Data Types + +For images created with image channel data type of `CL_SIGNED_INT8`, `CL_SIGNED_INT16` and `CL_SIGNED_INT32`, image read instructions will return the unmodified integer values stored in the image at specified location. + +Likewise, for images created with image channel data type of `CL_UNSIGNED_INT8`, `CL_UNSIGNED_INT16` and `CL_UNSIGNED_INT32`, image read instructions will return the unmodified unsigned integer values stored in the image at specified location. + +Image write instructions will perform one of the following conversions: + +* 32 bit signed integer -> `CL_SIGNED_INT8` (8-bit signed integer): ++ +[latexmath] +++++ +int8\_value(x) = clamp(x, -128, 127) +++++ + +* 32 bit signed integer -> `CL_SIGNED_INT16` (16-bit signed integer): ++ +[latexmath] +++++ +int16\_value(x) = clamp(x, -32768, 32767) +++++ + +* 32 bit signed integer -> `CL_SIGNED_INT32` (32-bit signed integer): ++ +[latexmath] +++++ +int32\_value(x) = x \quad \text{(no conversion)} +++++ + +* 32 bit unsigned integer -> `CL_UNSIGNED_INT8` (8-bit unsigned integer): ++ +[latexmath] +++++ +uint8\_value(x) = clamp(x, 0, 255) +++++ + +* 32 bit unsigned integer -> `CL_UNSIGNED_INT16` (16-bit unsigned integer): ++ +[latexmath] +++++ +uint16\_value(x) = clamp(x, 0, 65535) +++++ + +* 32 bit unsigned integer -> `CL_UNSIGNED_INT32` (32-bit unsigned integer): ++ +[latexmath] +++++ +uint32\_value(x) = x \quad \text{(no conversion)} +++++ + +The conversions described in this section must be correctly saturated. + +[[conversion-rules-for-srgba-and-sbgra-images]] +==== Conversion Rules for sRGBA and sBGRA Images + +Standard RGB data, which roughly displays colors in a linear ramp of luminosity levels such that an average observer, under average viewing conditions, can view them as perceptually equal steps on an average display. +All 0s maps to 0.0f, and all 1s maps to 1.0f. +The sequence of unsigned integer encodings between all 0s and all 1s represent a nonlinear progression in the floating-point interpretation of the numbers between 0.0f to 1.0f. +For more detail, see the <>. + +Conversion from sRGB space is automatically done the image read instruction if the image channel order is one of the sRGB values described above. +When reading from an sRGB image, the conversion from sRGB to linear RGB is performed before filtering is applied. +If the format has an alpha channel, the alpha data is stored in linear color space. +Conversion to sRGB space is automatically done by the image write instruction if the image channel order is one of the sRGB values described above and the device supports writing to sRGB images. + +If the format has an alpha channel, the alpha data is stored in linear color space. + +1. The following process is used by image read instructions to convert a normalized 8-bit unsigned integer sRGB color value x to a floating-point linear RGB color value y: +a. Convert a normalized 8-bit unsigned integer sRGB value x to a floating-point sRGB value r as per rules described in <> section. ++ +[latexmath] +++++ +r=normalized\_float\_value(x) +++++ + +b. Convert a floating-point sRGB value r to a floating-point linear RGB color value y: ++ +[latexmath] +++++ +\begin{aligned} +& c_{linear}(x) = +\begin{cases} + \frac{r}{12.92} & \quad r \geq 0 \text{ and } r \leq 0.04045\\ + (\frac{r + 0.055}{1.055})^{2.4} & \quad r > 0.04045 \text{ and } \leq 1 +\end{cases}\\ +\\ +& y = c_{linear}(r) +\end{aligned} +++++ + +2. The following process is used by image write instructions to convert a linear RGB floating-point color value y to a normalized 8-bit unsigned integer sRGB value x: +a. Convert a floating-point linear RGB value y to a normalized floating-point sRGB value r: ++ +[latexmath] +++++ +\begin{aligned} +& c_{linear}(x) = +\begin{cases} + 0 & \quad y \geq NaN \text{ or } y < 0\\ + 12.92 \times y & \quad y \geq 0 \text{ and } y < 0.0031308\\ + 1.055 \times y^{(\frac{1}{2.4})} & \quad y \geq 0.0031308 \text{ and } y \leq 1\\ + 1 & \quad y > 1 +\end{cases}\\ +\\ +& r = c_{sRGB}(y) +\end{aligned} +++++ + +b. Convert a normalized floating-point sRGB value r to a normalized 8-bit unsigned integer sRGB value x as per rules described in <> section. ++ +[latexmath] +++++ +\begin{aligned} +& g(r) = +\begin{cases} + f_{preferred}(r) & \quad \text{if rounding mode is round to even}\\ + f_{approx}(r) & \quad \text{if implementation-defined rounding mode} +\end{cases}\\ +\\ +& x = g(r) +\end{aligned} +++++ + +The accuracy required when converting a normalized 8-bit unsigned integer sRGB color value x to a floating-point linear RGB color value y is given by: +[latexmath] +++++ +|x-255 \times c_{sRGB}(y)|\leq 0.5 +++++ + +The accuracy required when converting a linear RGB floating-point color value y to a normalized 8-bit unsigned integer sRGB value x is given by: +[latexmath] +++++ +|x-255 \times c_{sRGB}(y)|\leq 0.6 +++++ + +[[selecting-an-image-from-an-image-array]] +=== Selecting an Image from an Image Array + +Let `(u,v,w)` represent the unnormalized image coordinate values for reading from and/or writing to a 2D image in a 2D image array. + +When read using a sampler, the 2D image layer selected is computed as: + +[latexmath] +++++ +layer = clamp(rint(w), 0, d_t - 1) +++++ + +otherwise the layer selected is computed as: + +[latexmath] +++++ +layer = w +++++ + +(since w is already an integer) and the result is undefined if w is not one of the integers 0, 1, ... `d~t~` - 1. + +Let `(u,v)` represent the unnormalized image coordinate values for reading from and/or writing to a 1D image in a 1D image array. + +When read using a sampler, the 1D image layer selected is computed as: + +[latexmath] +++++ +layer = clamp(rint(v), 0, h_t - 1) +++++ + +otherwise the layer selected is computed as: + +[latexmath] +++++ +layer = v +++++ + +(since v is already an integer) and the result is undefined if v is not one of the integers 0, 1, ... `h~t~` - 1. + +=== Data Format for Reading and Writing Images + +This section describes how image element data is returned by an +image read instruction or passed as the _Texel_ data that is +written by an image write instruction: + +For the following image channel orders, the data is a four +component vector type: + +._Mapping Image Data to Vector Components_ +[cols=",",options="header",] +|==== +|*Image Channel Order* +|*Components* + +|`R`, `Rx` +|(R, 0, 0, 1) + +|`A` +|(0, 0, 0, A) + +|`RG`, `RGx` +|(R, G, 0, 1) + +|`RGB`, `RGBx`, `sRGB`, `sRGBx` +|(R, G, B, 1) + +|`RGBA`, `BGRA`, `ARGB`, `ABGR`, `sRGBA`, `sBGRA` +|(R, G, B, A) + +|`Intensity` +|(I, I, I, I) + +|`Luminance` +|(L, L, L, 1) + +|==== + +For the following image channel orders, the data is a scalar type: + +._Scalar Image Data_ +[cols=",",options="header",] +|==== +|*Image Channel Order* +|*Scalar Value* + +|`Depth` +|D + +|`DepthStencil` +|D + +|==== + +The following table describes the mapping from image channel data type +to the data vector component type or scalar type: + +._Image Data Types_ +[cols=",",options="header",] +|==== +|*Image Channel Order* +|*Data Type* + +|`SnormInt8`, +`SnormInt16`, + +`UnormInt8`, +`UnormInt16`, + +`UnormShort565`, +`UnormShort555`, + +`UnormInt101010`, +`UnormInt101010_2`, + +`UnormInt24`, + +`HalfFloat`, + +`Float` +|*OpTypeFloat*, with _Width_ equal to 16 or 32. + +|`SignedInt8`, +`SignedInt16`, +`SignedInt32`, + +`UnsignedInt8`, +`UnsignedInt16`, +`UnsignedInt32` +|*OpTypeInt*, with _Width_ equal to 32. + +|==== + + +=== Sampled and Sampler-less Reads + +SPIR-V instructions that read from an image without a sampler (such as *OpImageRead*) behave exactly the same as the corresponding image read instruction with a sampler that has _Sampler Filter Mode_ set to *Nearest*, *Non-Normalized* coordinates, and _Sampler Addressing Mode_ set to *None*. + +There is one exception for cases where the image being read has _Image Format_ equal to a floating-point type (such as *R32f*). +In this exceptional case, when channel data values are denormalized, the non-sampler image read instruction may return the denormalized data, while the sampler image read instruction may flush denormalized channel data values to zero. +The coordinates must be between 0 and image size in that dimension, non inclusive. From ea38dac5a614d677f0f423a3122f1908618b5682 Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Wed, 20 Mar 2024 13:39:38 -0700 Subject: [PATCH 071/190] Add framework for extensions in API spec (#950) * Add framework for extensions in API spec and examples for cl_khr_integer_dot_product and cl_khr_command_buffer. * Add uncommitted file * Complete adding cl_khr_integer_dot_product to API and C specs * Add external memory extensions * Bring cl_khr_command_buffer up to date with latest provisional updates * Add cl_khr_semaphore. * Add build script * Working snapshots api/draft/ -> working versions of extension specs api/draft/Snapshot/ -> latest snapshotted versions of ext/ Initially, these were identical to the published extension specs. This was followed by a lot of markup cleanup and reorganization, moving sections around to separate spec changes from extension appendix / descriptions, adding refpage block headers, cleaning up C function tables, making table headings uniform, using Title Case consistently in section headers, and marking all khr extensions Ratified. * Update NOTES * Use hexapdf instead of ghostscript for PDF optimization Resulting PDFs tend to be considerably smaller, and also runs about 15% faster when doing a full PDF build (2:39 vs. 3:06 on my machine). The hexapdf tool does need to be installed in the build environment - it is in the khronosgroup/docker-images:asciidoctor-spec Docker image. * Roughly sync with Vulkan scripts, then add CL customizations. Add a workaround for safely importing api.py, so that a missing api.py + empty 'api/' directory don't cause exceptions. Update XML schema to sync with Vulkan insofar as possible. * Replace extension names with links to extension appendices * Add framework for extension metadata includes in build and scripts. Add depends / promotedto / supersededby / ratified attributes to XML based on comments in extension specs and feedback from Ben, and start converting extension appendices to use metadata includes. Add missing OpenCL C extensions to cl.xml. This might impact downstream header generation - to be checked. Minor updates to metadocgenerator and conventions for OpenCL * Split documents containing multiple extensions into separate files, and split all extension documents into appendix and body (in draft/app/) files. Move appendices into API spec directory, leaving the bodies to be integrated in api/draft/. * Script fixes for extension/version link markup, and add provisional tags to XML * Remove workarea files not needed in this branch * Change to SPDX ID * Add cl_khr_external_semaphore* extension language And use Title Case consistently throughout the spec * Add cl_khr_subgroup* extensions, mostly to OpenCL C spec, and extension framework in the C spec * Add cl_khr_{global,local}_int32_{base,extended}_atomics extensions Really just improve language describing them, since all the functions were already in the OpenCL C spec as part of OpenCL 1.1. * Add cl_khr_fp64 extension language (virtually all of which was already present, but needed minor changes). * Add most remaining khr extensions to the API and C language specs: * cl_khr_async_work_group_copy_fence * cl_khr_device_enqueue_local_arg_types * cl_khr_device_uuid * cl_khr_extended_async_copies * cl_khr_extended_bit_ops * cl_khr_extended_versioning * cl_khr_gl_depth_images * cl_khr_gl_event * cl_khr_gl_msaa_sharing * cl_khr_gl_sharing * cl_khr_global_int32_base_atomics * cl_khr_global_int32_extended_atomics * cl_khr_icd * cl_khr_il_program * cl_khr_image2d_from_buffer * cl_khr_initialize_memory * cl_khr_int64_base_atomics * cl_khr_int64_extended_atomics * cl_khr_local_int32_base_atomics * cl_khr_local_int32_extended_atomics * cl_khr_mipmap_image * cl_khr_mipmap_image_writes * cl_khr_pci_bus_info * cl_khr_priority_hints * cl_khr_srgb_image_writes * cl_khr_suggested_local_work_size * cl_khr_terminate_context * cl_khr_throttle_hints * cl_khr_work_group_uniform_arithmetic There are about a dozen remaining extensions, mostly DX / EGL sharing related. * Add extension language for * cl_khr_create_command_queue * cl_khr_d3d10_sharing * cl_khr_d3d11_sharing * cl_khr_dx9_media_sharing * cl_khr_egl_event * cl_khr_egl_image * Fix SPDX typo * Fix dependency generation * Move asciidoc dependency generation into a separate shell script Executing the updated version under Travis CI (but not locally) was failing for no apparent reason. * Add new python dependency * Add extension language for * cl_khr_command_buffer * cl_khr_command_buffer_multi_device * cl_khr_command_buffer_mutable_dispatch * cl_khr_pci_bus_info * cl_khr_select_fprounding_mode * cl_khr_spir Also some minor cleanup / consistency edits in related markup. * Add cl_khr_fp16 and some missing cl_khr_depth_images extension language. Some table and section heading markup / capitalization cleanup. * Update api/appendix_e.asciidoc Co-authored-by: Ewan Crawford * Update api/appendix_e.asciidoc Co-authored-by: Ewan Crawford * Resolve https://github.com/KhronosGroup/OpenCL-Docs/pull/950#discussion_r1455084632 * Remove language in cl_khr_*_int32_*_atomics appendices saying that ''the volatile qualifier was added'' Closes #1013 * update * Various minor fixes to the build process Add 'makeSpec' script, replacing 'makeAll', and document it in README.adoc. Use `makeSpec -spec khr [make targets]` instead of `makeAll [make targets\` Remove static refpage source which is now extracted from the specification source, including various KHR extension API refpages and extension appendix refpages. Minor fixes to refpage extraction scripts. Remove version dependencies from `depends` attributes in cl.xml, which are not yet supported in the schema or the scripts which consume this attribute for `makeSpec`, and move them to `comment` attributes for the time being. * Minor script fixes. Try to build both core and khr targets in Travis CI. * Attempt to install needed python package * Bring extensions up to date with main at commit c66cb8209f0144fc2cf3819174433835730572eb All extensions in the API and C specs should now be consistent with the ext/ directory. * Cleanup and minor fixes / improvements Add links from the API spec extension appendices to the corresponding extension section of the C language spec. Markup fixes for a few asciidoc conditional typos. Add a few missing extension sections to the C language spec. Modify makeAll script to interpret '-spec all' as '-spec khr', since there are no extension appendices or other content for non-khr extensions at present, and building with all extensions causes problems in the document pipeline. * Removing separate OpenCL Extensions from the repository (#1081) Once we integrate all the khr extensions into the API and C specs, the separate documents under ext/ will serve no purpose and will have the risk of creating divergences between the sme content in the API / C specs and the separate extension spec. This removes the separate khr extension documents from the repo, and strips down the OpenCL Extension spec to just point to the corresponding extension appendix in the API spec. It will also serve as a forcing function for all future extension development to be done against the API / C specs. Any outstanding PRs against these documents will have to be rebased and the same edits reapplied to those specs, whether changes to published extensions or in-flight, but as yet unpublished khr extensions. Ideally this would merge into #950 before that is merged to main, if the WG can agree to that. It could also be retargeted to main instead, if this decision will take longer than the timeframe for merging #950. * Update XML to mark recently ratified extensions as non-provisional (#1084) Net effect is to rearrange the extension appendices so these extensions are no longer in a 'provisional' subsection, and to remove the generated comments about their being provisional from the extension refpages. --------- Co-authored-by: Ewan Crawford --- .travis.yml | 8 +- Makefile | 239 +- OpenCL_API.txt | 10 +- OpenCL_C.txt | 5628 +++++- OpenCL_Ext.txt | 72 +- README.adoc | 43 + api/acknowledgements.asciidoc | 5 +- api/appendix_a.asciidoc | 7 +- api/appendix_b.asciidoc | 9 +- api/appendix_c.asciidoc | 35 +- api/appendix_d.asciidoc | 5 +- api/appendix_e.asciidoc | 138 +- api/appendix_extensions.asciidoc | 37 + api/appendix_f.asciidoc | 12 +- api/appendix_g.asciidoc | 5 +- api/appendix_h.asciidoc | 14 +- api/cl_khr_3d_image_writes.asciidoc | 26 + ...l_khr_async_work_group_copy_fence.asciidoc | 29 + api/cl_khr_byte_addressable_store.asciidoc | 33 + api/cl_khr_command_buffer.asciidoc | 436 + ...l_khr_command_buffer_multi_device.asciidoc | 316 + ...r_command_buffer_mutable_dispatch.asciidoc | 378 + api/cl_khr_create_command_queue.asciidoc | 58 + api/cl_khr_d3d10_sharing.asciidoc | 136 + api/cl_khr_d3d11_sharing.asciidoc | 56 + api/cl_khr_depth_images.asciidoc | 25 + ...hr_device_enqueue_local_arg_types.asciidoc | 29 + api/cl_khr_device_uuid.asciidoc | 40 + api/cl_khr_dx9_media_sharing.asciidoc | 65 + api/cl_khr_egl_event.asciidoc | 72 + api/cl_khr_egl_image.asciidoc | 103 + api/cl_khr_expect_assume.asciidoc | 75 + api/cl_khr_extended_async_copies.asciidoc | 33 + api/cl_khr_extended_bit_ops.asciidoc | 33 + api/cl_khr_extended_versioning.asciidoc | 158 + api/cl_khr_external_memory.asciidoc | 316 + api/cl_khr_external_memory_dma_buf.asciidoc | 92 + api/cl_khr_external_memory_dx.asciidoc | 95 + api/cl_khr_external_memory_opaque_fd.asciidoc | 92 + api/cl_khr_external_memory_win32.asciidoc | 93 + api/cl_khr_external_semaphore.asciidoc | 289 + ...l_khr_external_semaphore_dx_fence.asciidoc | 49 + ..._khr_external_semaphore_opaque_fd.asciidoc | 49 + ...cl_khr_external_semaphore_sync_fd.asciidoc | 62 + api/cl_khr_external_semaphore_win32.asciidoc | 50 + api/cl_khr_fp16.asciidoc | 34 + api/cl_khr_fp64.asciidoc | 35 + api/cl_khr_gl_depth_images.asciidoc | 34 + api/cl_khr_gl_event.asciidoc | 109 + api/cl_khr_gl_msaa_sharing.asciidoc | 38 + api/cl_khr_gl_sharing.asciidoc | 242 + api/cl_khr_global_int32_base_atomics.asciidoc | 29 + ...khr_global_int32_extended_atomics.asciidoc | 31 + {ext => api}/cl_khr_icd.asciidoc | 207 +- api/cl_khr_il_program.asciidoc | 41 + api/cl_khr_image2d_from_buffer.asciidoc | 33 + api/cl_khr_initialize_memory.asciidoc | 46 + api/cl_khr_int64_base_atomics.asciidoc | 27 + api/cl_khr_int64_extended_atomics.asciidoc | 27 + api/cl_khr_integer_dot_product.asciidoc | 64 + api/cl_khr_local_int32_base_atomics.asciidoc | 29 + ..._khr_local_int32_extended_atomics.asciidoc | 31 + api/cl_khr_mipmap_image.asciidoc | 39 + api/cl_khr_mipmap_image_writes.asciidoc | 29 + api/cl_khr_pci_bus_info.asciidoc | 43 + api/cl_khr_priority_hints.asciidoc | 46 + api/cl_khr_select_fprounding_mode.asciidoc | 31 + api/cl_khr_semaphore.asciidoc | 262 + api/cl_khr_spir.asciidoc | 39 + api/cl_khr_srgb_image_writes.asciidoc | 33 + api/cl_khr_subgroup_ballot.asciidoc | 52 + api/cl_khr_subgroup_clustered_reduce.asciidoc | 46 + api/cl_khr_subgroup_extended_types.asciidoc | 50 + api/cl_khr_subgroup_named_barrier.asciidoc | 33 + ...r_subgroup_non_uniform_arithmetic.asciidoc | 71 + api/cl_khr_subgroup_non_uniform_vote.asciidoc | 45 + api/cl_khr_subgroup_rotate.asciidoc | 32 + api/cl_khr_subgroup_shuffle.asciidoc | 37 + api/cl_khr_subgroup_shuffle_relative.asciidoc | 38 + api/cl_khr_subgroups.asciidoc | 53 + api/cl_khr_suggested_local_work_size.asciidoc | 37 + api/cl_khr_terminate_context.asciidoc | 60 + api/cl_khr_throttle_hints.asciidoc | 45 + ...khr_work_group_uniform_arithmetic.asciidoc | 49 + api/dictionary.asciidoc | 5 +- api/embedded_profile.asciidoc | 5 +- api/footnotes.asciidoc | 11 +- api/glossary.asciidoc | 5 +- api/introduction.asciidoc | 5 +- api/opencl_architecture.asciidoc | 161 +- api/opencl_assoc_spec.asciidoc | 5 +- api/opencl_platform_layer.asciidoc | 1507 +- api/opencl_runtime_layer.asciidoc | 15334 +++++++++++----- api/provisional_notice.asciidoc | 11 + c/feature-dictionary.asciidoc | 16 + c/footnotes.asciidoc | 5 +- config/opencl.asciidoc | 1 + config/rouge_opencl.rb | 2 +- ext/cl_khr_3d_image_writes.asciidoc | 67 - ...l_khr_async_work_group_copy_fence.asciidoc | 57 - ext/cl_khr_byte_addressable_store.asciidoc | 22 - ext/cl_khr_command_buffer.asciidoc | 2020 -- ...l_khr_command_buffer_multi_device.asciidoc | 767 - ...r_command_buffer_mutable_dispatch.asciidoc | 1010 - ext/cl_khr_create_command_queue.asciidoc | 154 - ext/cl_khr_d3d10_sharing.asciidoc | 889 - ext/cl_khr_d3d11_sharing.asciidoc | 813 - ext/cl_khr_depth_images.asciidoc | 297 - ...hr_device_enqueue_local_arg_types.asciidoc | 52 - ext/cl_khr_device_uuid.asciidoc | 92 - ext/cl_khr_dx9_media_sharing.asciidoc | 737 - ext/cl_khr_egl_event.asciidoc | 211 - ext/cl_khr_egl_image.asciidoc | 432 - ext/cl_khr_expect_assume.asciidoc | 70 - ext/cl_khr_extended_async_copies.asciidoc | 182 - ext/cl_khr_extended_bit_ops.asciidoc | 131 - ext/cl_khr_extended_versioning.asciidoc | 283 - ext/cl_khr_external_memory.asciidoc | 608 - ext/cl_khr_external_semaphore.asciidoc | 662 - ext/cl_khr_fp16.asciidoc | 1928 -- ext/cl_khr_fp64.asciidoc | 1303 -- ext/cl_khr_gl_depth_images.asciidoc | 120 - ext/cl_khr_gl_event.asciidoc | 274 - ext/cl_khr_gl_msaa_sharing.asciidoc | 405 - ext/cl_khr_gl_sharing__context.asciidoc | 459 - ext/cl_khr_gl_sharing__memobjs.asciidoc | 778 - ext/cl_khr_il_program.asciidoc | 169 - ext/cl_khr_image2d_from_buffer.asciidoc | 66 - ext/cl_khr_initialize_memory.asciidoc | 71 - ext/cl_khr_int32_atomics.asciidoc | 236 - ext/cl_khr_int64_atomics.asciidoc | 155 - ext/cl_khr_integer_dot_product.asciidoc | 256 - ext/cl_khr_mipmap_image.asciidoc | 609 - ext/cl_khr_pci_bus_info.asciidoc | 75 - ext/cl_khr_priority_hints.asciidoc | 48 - ext/cl_khr_select_fprounding_mode.asciidoc | 66 - ext/cl_khr_semaphore.asciidoc | 634 - ext/cl_khr_spir.asciidoc | 127 - ext/cl_khr_srgb_image_writes.asciidoc | 24 - ext/cl_khr_subgroup_extensions.asciidoc | 1071 -- ext/cl_khr_subgroup_named_barrier.asciidoc | 45 - ext/cl_khr_subgroup_rotate.asciidoc | 121 - ext/cl_khr_subgroups.asciidoc | 426 - ext/cl_khr_suggested_local_work_size.asciidoc | 87 - ext/cl_khr_terminate_context.asciidoc | 141 - ext/cl_khr_throttle_hints.asciidoc | 51 - ...khr_work_group_uniform_arithmetic.asciidoc | 239 - ext/introduction.asciidoc | 50 +- ext/quick_reference.asciidoc | 155 +- .../cl_ext_image_requirements_info.asciidoc | 11 +- makeSpec | 154 + man/static/clCreateEventFromEGLSyncKHR.txt | 107 - man/static/clCreateEventFromGLsyncKHR.txt | 79 - man/static/clCreateFromD3D10BufferKHR.txt | 64 - man/static/clCreateFromD3D10Texture2DKHR.txt | 69 - man/static/clCreateFromD3D10Texture3DKHR.txt | 121 - man/static/clCreateFromD3D11BufferKHR.txt | 64 - man/static/clCreateFromD3D11Texture2DKHR.txt | 69 - man/static/clCreateFromD3D11Texture3DKHR.txt | 111 - man/static/clCreateFromDX9MediaSurfaceKHR.txt | 107 - man/static/clCreateFromEGLImageKHR.txt | 141 - man/static/clCreateFromGLBuffer.txt | 86 - man/static/clCreateFromGLRenderbuffer.txt | 86 - man/static/clCreateFromGLTexture.txt | 120 - .../clEnqueueAcquireD3D10ObjectsKHR.txt | 88 - .../clEnqueueAcquireD3D11ObjectsKHR.txt | 89 - .../clEnqueueAcquireDX9MediaSurfacesKHR.txt | 89 - man/static/clEnqueueAcquireEGLObjectsKHR.txt | 84 - man/static/clEnqueueAcquireGLObjects.txt | 117 - .../clEnqueueReleaseD3D10ObjectsKHR.txt | 86 - .../clEnqueueReleaseD3D11ObjectsKHR.txt | 86 - .../clEnqueueReleaseDX9MediaSurfacesKHR.txt | 88 - man/static/clEnqueueReleaseEGLObjectsKHR.txt | 91 - man/static/clEnqueueReleaseGLObjects.txt | 117 - man/static/clGetDeviceIDsFromD3D10KHR.txt | 108 - man/static/clGetDeviceIDsFromD3D11KHR.txt | 104 - .../clGetDeviceIDsFromDX9MediaAdapterKHR.txt | 111 - man/static/clGetGLContextInfoKHR.txt | 130 - man/static/clGetGLObjectInfo.txt | 63 - man/static/clGetGLTextureInfo.txt | 85 - man/static/clIcdGetPlatformIDsKHR.txt | 63 - man/static/clTerminateContextKHR.txt | 93 - man/static/cl_khr_3d_image_writes.txt | 35 - man/static/cl_khr_byte_addressable_store.txt | 35 - man/static/cl_khr_d3d10_sharing.txt | 48 - man/static/cl_khr_d3d11_sharing.txt | 75 - man/static/cl_khr_depth_images.txt | 35 - .../cl_khr_device_enqueue_local_arg_types.txt | 41 - man/static/cl_khr_dx9_media_sharing.txt | 46 - man/static/cl_khr_egl_event.txt | 53 - man/static/cl_khr_egl_image.txt | 45 - man/static/cl_khr_fp16.txt | 121 - man/static/cl_khr_fp64.txt | 39 - man/static/cl_khr_gl_depth_images.txt | 68 - man/static/cl_khr_gl_event.txt | 68 - man/static/cl_khr_gl_msaa_sharing.txt | 84 - man/static/cl_khr_gl_sharing.txt | 55 - .../cl_khr_global_int32_base_atomics.txt | 41 - .../cl_khr_global_int32_extended_atomics.txt | 41 - man/static/cl_khr_icd.txt | 126 - man/static/cl_khr_il_program.txt | 36 - man/static/cl_khr_image2d_from_buffer.txt | 35 - man/static/cl_khr_initialize_memory.txt | 62 - man/static/cl_khr_int64_base_atomics.txt | 39 - man/static/cl_khr_int64_extended_atomics.txt | 39 - .../cl_khr_local_int32_base_atomics.txt | 41 - .../cl_khr_local_int32_extended_atomics.txt | 41 - man/static/cl_khr_mipmap_image.txt | 63 - man/static/cl_khr_priority_hints.txt | 40 - man/static/cl_khr_spir.txt | 55 - man/static/cl_khr_srgb_image_writes.txt | 50 - man/static/cl_khr_subgroups.txt | 36 - man/static/cl_khr_terminate_context.txt | 54 - man/static/cl_khr_throttle_hints.txt | 43 - man/static/gl_formatsInc.txt | 74 - man/static/gl_lifetimeInc.txt | 18 - man/static/gl_sharingInc.txt | 27 - man/static/gl_syncInc.txt | 66 - man/static/sharingD3D10Inc.txt | 45 - man/static/sharingD3D11Inc.txt | 49 - man/static/sharingDX9Inc.txt | 77 - scripts/apiconventions.py | 13 + scripts/cgenerator.py | 143 +- scripts/clconventions.py | 135 +- scripts/docgenerator.py | 228 +- scripts/extdependency.py | 205 + scripts/extensionmetadocgenerator.py | 269 +- scripts/find_adoc_deps | 17 + scripts/genRef.py | 252 +- scripts/gen_dictionaries.py | 5 +- scripts/gen_version_notes.py | 81 +- scripts/gencl.py | 113 +- scripts/generator.py | 324 +- scripts/parse_dependency.py | 403 + scripts/pygenerator.py | 369 +- scripts/reflib.py | 98 +- scripts/reg.py | 728 +- scripts/scriptgenerator.py | 390 + scripts/{ => spec_tools}/conventions.py | 207 +- scripts/spec_tools/util.py | 15 +- xml/cl.xml | 137 +- xml/registry.rnc | 194 +- 242 files changed, 26856 insertions(+), 31197 deletions(-) create mode 100644 api/appendix_extensions.asciidoc create mode 100644 api/cl_khr_3d_image_writes.asciidoc create mode 100644 api/cl_khr_async_work_group_copy_fence.asciidoc create mode 100644 api/cl_khr_byte_addressable_store.asciidoc create mode 100644 api/cl_khr_command_buffer.asciidoc create mode 100644 api/cl_khr_command_buffer_multi_device.asciidoc create mode 100644 api/cl_khr_command_buffer_mutable_dispatch.asciidoc create mode 100644 api/cl_khr_create_command_queue.asciidoc create mode 100644 api/cl_khr_d3d10_sharing.asciidoc create mode 100644 api/cl_khr_d3d11_sharing.asciidoc create mode 100644 api/cl_khr_depth_images.asciidoc create mode 100644 api/cl_khr_device_enqueue_local_arg_types.asciidoc create mode 100644 api/cl_khr_device_uuid.asciidoc create mode 100644 api/cl_khr_dx9_media_sharing.asciidoc create mode 100644 api/cl_khr_egl_event.asciidoc create mode 100644 api/cl_khr_egl_image.asciidoc create mode 100644 api/cl_khr_expect_assume.asciidoc create mode 100644 api/cl_khr_extended_async_copies.asciidoc create mode 100644 api/cl_khr_extended_bit_ops.asciidoc create mode 100644 api/cl_khr_extended_versioning.asciidoc create mode 100644 api/cl_khr_external_memory.asciidoc create mode 100644 api/cl_khr_external_memory_dma_buf.asciidoc create mode 100644 api/cl_khr_external_memory_dx.asciidoc create mode 100644 api/cl_khr_external_memory_opaque_fd.asciidoc create mode 100644 api/cl_khr_external_memory_win32.asciidoc create mode 100644 api/cl_khr_external_semaphore.asciidoc create mode 100644 api/cl_khr_external_semaphore_dx_fence.asciidoc create mode 100644 api/cl_khr_external_semaphore_opaque_fd.asciidoc create mode 100644 api/cl_khr_external_semaphore_sync_fd.asciidoc create mode 100644 api/cl_khr_external_semaphore_win32.asciidoc create mode 100644 api/cl_khr_fp16.asciidoc create mode 100644 api/cl_khr_fp64.asciidoc create mode 100644 api/cl_khr_gl_depth_images.asciidoc create mode 100644 api/cl_khr_gl_event.asciidoc create mode 100644 api/cl_khr_gl_msaa_sharing.asciidoc create mode 100644 api/cl_khr_gl_sharing.asciidoc create mode 100644 api/cl_khr_global_int32_base_atomics.asciidoc create mode 100644 api/cl_khr_global_int32_extended_atomics.asciidoc rename {ext => api}/cl_khr_icd.asciidoc (58%) create mode 100644 api/cl_khr_il_program.asciidoc create mode 100644 api/cl_khr_image2d_from_buffer.asciidoc create mode 100644 api/cl_khr_initialize_memory.asciidoc create mode 100644 api/cl_khr_int64_base_atomics.asciidoc create mode 100644 api/cl_khr_int64_extended_atomics.asciidoc create mode 100644 api/cl_khr_integer_dot_product.asciidoc create mode 100644 api/cl_khr_local_int32_base_atomics.asciidoc create mode 100644 api/cl_khr_local_int32_extended_atomics.asciidoc create mode 100644 api/cl_khr_mipmap_image.asciidoc create mode 100644 api/cl_khr_mipmap_image_writes.asciidoc create mode 100644 api/cl_khr_pci_bus_info.asciidoc create mode 100644 api/cl_khr_priority_hints.asciidoc create mode 100644 api/cl_khr_select_fprounding_mode.asciidoc create mode 100644 api/cl_khr_semaphore.asciidoc create mode 100644 api/cl_khr_spir.asciidoc create mode 100644 api/cl_khr_srgb_image_writes.asciidoc create mode 100644 api/cl_khr_subgroup_ballot.asciidoc create mode 100644 api/cl_khr_subgroup_clustered_reduce.asciidoc create mode 100644 api/cl_khr_subgroup_extended_types.asciidoc create mode 100644 api/cl_khr_subgroup_named_barrier.asciidoc create mode 100644 api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc create mode 100644 api/cl_khr_subgroup_non_uniform_vote.asciidoc create mode 100644 api/cl_khr_subgroup_rotate.asciidoc create mode 100644 api/cl_khr_subgroup_shuffle.asciidoc create mode 100644 api/cl_khr_subgroup_shuffle_relative.asciidoc create mode 100644 api/cl_khr_subgroups.asciidoc create mode 100644 api/cl_khr_suggested_local_work_size.asciidoc create mode 100644 api/cl_khr_terminate_context.asciidoc create mode 100644 api/cl_khr_throttle_hints.asciidoc create mode 100644 api/cl_khr_work_group_uniform_arithmetic.asciidoc create mode 100644 api/provisional_notice.asciidoc delete mode 100644 ext/cl_khr_3d_image_writes.asciidoc delete mode 100644 ext/cl_khr_async_work_group_copy_fence.asciidoc delete mode 100644 ext/cl_khr_byte_addressable_store.asciidoc delete mode 100644 ext/cl_khr_command_buffer.asciidoc delete mode 100644 ext/cl_khr_command_buffer_multi_device.asciidoc delete mode 100644 ext/cl_khr_command_buffer_mutable_dispatch.asciidoc delete mode 100644 ext/cl_khr_create_command_queue.asciidoc delete mode 100644 ext/cl_khr_d3d10_sharing.asciidoc delete mode 100644 ext/cl_khr_d3d11_sharing.asciidoc delete mode 100644 ext/cl_khr_depth_images.asciidoc delete mode 100644 ext/cl_khr_device_enqueue_local_arg_types.asciidoc delete mode 100644 ext/cl_khr_device_uuid.asciidoc delete mode 100644 ext/cl_khr_dx9_media_sharing.asciidoc delete mode 100644 ext/cl_khr_egl_event.asciidoc delete mode 100644 ext/cl_khr_egl_image.asciidoc delete mode 100644 ext/cl_khr_expect_assume.asciidoc delete mode 100644 ext/cl_khr_extended_async_copies.asciidoc delete mode 100644 ext/cl_khr_extended_bit_ops.asciidoc delete mode 100644 ext/cl_khr_extended_versioning.asciidoc delete mode 100644 ext/cl_khr_external_memory.asciidoc delete mode 100644 ext/cl_khr_external_semaphore.asciidoc delete mode 100644 ext/cl_khr_fp16.asciidoc delete mode 100644 ext/cl_khr_fp64.asciidoc delete mode 100644 ext/cl_khr_gl_depth_images.asciidoc delete mode 100644 ext/cl_khr_gl_event.asciidoc delete mode 100644 ext/cl_khr_gl_msaa_sharing.asciidoc delete mode 100644 ext/cl_khr_gl_sharing__context.asciidoc delete mode 100644 ext/cl_khr_gl_sharing__memobjs.asciidoc delete mode 100644 ext/cl_khr_il_program.asciidoc delete mode 100644 ext/cl_khr_image2d_from_buffer.asciidoc delete mode 100644 ext/cl_khr_initialize_memory.asciidoc delete mode 100644 ext/cl_khr_int32_atomics.asciidoc delete mode 100644 ext/cl_khr_int64_atomics.asciidoc delete mode 100644 ext/cl_khr_integer_dot_product.asciidoc delete mode 100644 ext/cl_khr_mipmap_image.asciidoc delete mode 100644 ext/cl_khr_pci_bus_info.asciidoc delete mode 100644 ext/cl_khr_priority_hints.asciidoc delete mode 100644 ext/cl_khr_select_fprounding_mode.asciidoc delete mode 100644 ext/cl_khr_semaphore.asciidoc delete mode 100644 ext/cl_khr_spir.asciidoc delete mode 100644 ext/cl_khr_srgb_image_writes.asciidoc delete mode 100644 ext/cl_khr_subgroup_extensions.asciidoc delete mode 100644 ext/cl_khr_subgroup_named_barrier.asciidoc delete mode 100644 ext/cl_khr_subgroup_rotate.asciidoc delete mode 100644 ext/cl_khr_subgroups.asciidoc delete mode 100644 ext/cl_khr_suggested_local_work_size.asciidoc delete mode 100644 ext/cl_khr_terminate_context.asciidoc delete mode 100644 ext/cl_khr_throttle_hints.asciidoc delete mode 100644 ext/cl_khr_work_group_uniform_arithmetic.asciidoc create mode 100755 makeSpec delete mode 100644 man/static/clCreateEventFromEGLSyncKHR.txt delete mode 100644 man/static/clCreateEventFromGLsyncKHR.txt delete mode 100644 man/static/clCreateFromD3D10BufferKHR.txt delete mode 100644 man/static/clCreateFromD3D10Texture2DKHR.txt delete mode 100644 man/static/clCreateFromD3D10Texture3DKHR.txt delete mode 100644 man/static/clCreateFromD3D11BufferKHR.txt delete mode 100644 man/static/clCreateFromD3D11Texture2DKHR.txt delete mode 100644 man/static/clCreateFromD3D11Texture3DKHR.txt delete mode 100644 man/static/clCreateFromDX9MediaSurfaceKHR.txt delete mode 100644 man/static/clCreateFromEGLImageKHR.txt delete mode 100644 man/static/clCreateFromGLBuffer.txt delete mode 100644 man/static/clCreateFromGLRenderbuffer.txt delete mode 100644 man/static/clCreateFromGLTexture.txt delete mode 100644 man/static/clEnqueueAcquireD3D10ObjectsKHR.txt delete mode 100644 man/static/clEnqueueAcquireD3D11ObjectsKHR.txt delete mode 100644 man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt delete mode 100644 man/static/clEnqueueAcquireEGLObjectsKHR.txt delete mode 100644 man/static/clEnqueueAcquireGLObjects.txt delete mode 100644 man/static/clEnqueueReleaseD3D10ObjectsKHR.txt delete mode 100644 man/static/clEnqueueReleaseD3D11ObjectsKHR.txt delete mode 100644 man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt delete mode 100644 man/static/clEnqueueReleaseEGLObjectsKHR.txt delete mode 100644 man/static/clEnqueueReleaseGLObjects.txt delete mode 100644 man/static/clGetDeviceIDsFromD3D10KHR.txt delete mode 100644 man/static/clGetDeviceIDsFromD3D11KHR.txt delete mode 100644 man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt delete mode 100644 man/static/clGetGLContextInfoKHR.txt delete mode 100644 man/static/clGetGLObjectInfo.txt delete mode 100644 man/static/clGetGLTextureInfo.txt delete mode 100644 man/static/clIcdGetPlatformIDsKHR.txt delete mode 100644 man/static/clTerminateContextKHR.txt delete mode 100644 man/static/cl_khr_3d_image_writes.txt delete mode 100644 man/static/cl_khr_byte_addressable_store.txt delete mode 100644 man/static/cl_khr_d3d10_sharing.txt delete mode 100644 man/static/cl_khr_d3d11_sharing.txt delete mode 100644 man/static/cl_khr_depth_images.txt delete mode 100644 man/static/cl_khr_device_enqueue_local_arg_types.txt delete mode 100644 man/static/cl_khr_dx9_media_sharing.txt delete mode 100644 man/static/cl_khr_egl_event.txt delete mode 100644 man/static/cl_khr_egl_image.txt delete mode 100644 man/static/cl_khr_fp16.txt delete mode 100644 man/static/cl_khr_fp64.txt delete mode 100644 man/static/cl_khr_gl_depth_images.txt delete mode 100644 man/static/cl_khr_gl_event.txt delete mode 100644 man/static/cl_khr_gl_msaa_sharing.txt delete mode 100644 man/static/cl_khr_gl_sharing.txt delete mode 100644 man/static/cl_khr_global_int32_base_atomics.txt delete mode 100644 man/static/cl_khr_global_int32_extended_atomics.txt delete mode 100644 man/static/cl_khr_icd.txt delete mode 100644 man/static/cl_khr_il_program.txt delete mode 100644 man/static/cl_khr_image2d_from_buffer.txt delete mode 100644 man/static/cl_khr_initialize_memory.txt delete mode 100644 man/static/cl_khr_int64_base_atomics.txt delete mode 100644 man/static/cl_khr_int64_extended_atomics.txt delete mode 100644 man/static/cl_khr_local_int32_base_atomics.txt delete mode 100644 man/static/cl_khr_local_int32_extended_atomics.txt delete mode 100644 man/static/cl_khr_mipmap_image.txt delete mode 100644 man/static/cl_khr_priority_hints.txt delete mode 100644 man/static/cl_khr_spir.txt delete mode 100644 man/static/cl_khr_srgb_image_writes.txt delete mode 100644 man/static/cl_khr_subgroups.txt delete mode 100644 man/static/cl_khr_terminate_context.txt delete mode 100644 man/static/cl_khr_throttle_hints.txt delete mode 100644 man/static/gl_formatsInc.txt delete mode 100644 man/static/gl_lifetimeInc.txt delete mode 100644 man/static/gl_sharingInc.txt delete mode 100644 man/static/gl_syncInc.txt delete mode 100644 man/static/sharingD3D10Inc.txt delete mode 100644 man/static/sharingD3D11Inc.txt delete mode 100644 man/static/sharingDX9Inc.txt create mode 100644 scripts/apiconventions.py create mode 100755 scripts/extdependency.py create mode 100755 scripts/find_adoc_deps create mode 100755 scripts/parse_dependency.py create mode 100644 scripts/scriptgenerator.py rename scripts/{ => spec_tools}/conventions.py (62%) diff --git a/.travis.yml b/.travis.yml index fc2b142cd..f8e77eddd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ rvm: - 2.7.1 before_install: - - sudo apt-get install -y libpango1.0-dev ghostscript fonts-lyx jing + - sudo apt-get install -y libpango1.0-dev ghostscript fonts-lyx jing libavalon-framework-java libbatik-java python3-pyparsing - gem install asciidoctor -v 2.0.16 - gem install coderay -v 1.1.1 - gem install rouge -v 3.19.0 @@ -17,17 +17,19 @@ before_install: - gem install hexapdf -v 0.27.0 - gem install asciidoctor-pdf -v 1.5.0 - gem install asciidoctor-mathematical -v 0.3.5 + - pip install pyparsing script: - git describe --tags --dirty - - make -O -j 5 api c env ext cxx4opencl manhtmlpages - make -C xml validate + - python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 api c env ext cxx4opencl + - python3 makeSpec -clean -spec khr OUTDIR=out.khr -j 12 html manhtmlpages deploy: provider: releases api_key: $GH_TOKEN file_glob: true - file: out/pdf/* + file: out.*/pdf/* skip_cleanup: true on: tags: true diff --git a/Makefile b/Makefile index 239a8e4e3..8e171b81e 100644 --- a/Makefile +++ b/Makefile @@ -1,48 +1,56 @@ -# Copyright (c) 2013-2024 The Khronos Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 +# Copyright 2013-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# OpenCL Specifications Makefile # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# To build the specifications / reference pages (refpages) with optional +# extensions included, set the $(EXTENSIONS) variable on the make +# command line to a space-separated list of extension names. +# $(EXTENSIONS) is converted into generator script +# arguments $(EXTOPTIONS) and into $(ATTRIBFILE) + +EXTS := $(sort $(EXTENSIONS)) +EXTOPTIONS := $(foreach ext,$(EXTS),-extension $(ext)) QUIET ?= +VERYQUIET ?= @ ASCIIDOCTOR ?= asciidoctor -XMLLINT ?= xmllint -DBLATEX ?= dblatex -DOS2UNIX ?= dos2unix RM = rm -f RMRF = rm -rf MKDIR = mkdir -p CP = cp GITHEAD = ./.git/logs/HEAD +# Where the repo root is +ROOTDIR = $(CURDIR) +# Where the spec files are +SPECDIR = $(CURDIR) + +# Path to scripts used in generation +SCRIPTS = $(ROOTDIR)/scripts +# Path to configs and asciidoc extensions used in generation +CONFIGS = $(ROOTDIR)/config + # Target directories for output files # HTMLDIR - 'html' target # PDFDIR - 'pdf' target # CHECKDIR - 'allchecks' target -OUTDIR := out -HTMLDIR := $(OUTDIR)/html -PDFDIR := $(OUTDIR)/pdf +OUTDIR = out +HTMLDIR = $(OUTDIR)/html +PDFDIR = $(OUTDIR)/pdf +PYAPIMAP = $(GENERATED)/apimap.py # PDF Equations are written to SVGs, this dictates the location to store those files (temporary) -PDFMATHDIR := $(OUTDIR)/equations_temp +PDFMATHDIR = $(OUTDIR)/equations_temp # Set VERBOSE to -v to see what asciidoc is doing. VERBOSE = # asciidoc attributes to set. # NOTEOPTS sets options controlling which NOTEs are generated -# ATTRIBOPTS sets the api revision and enables MathJax generation, and -# the path to generate include files +# ATTRIBOPTS sets the api revision and enables KaTeX generation # ADOCOPTS options for asciidoc->HTML5 output (book document type) -# ADOCMANOPTS options for asciidoc->HTML5 output (manpage document type) +# ADOCREFOPTS options for asciidoc->HTML5 output (manpage document type) # Currently unused in CL spec NOTEOPTS = -a editing-notes # Spell out RFC2822 format as not all date commands support -R @@ -79,31 +87,47 @@ CXX4OPENCL_DOCREVISION = DocRev2021.12 CXX4OPENCL_DOCREMARK = $(SPECREMARK) \ tag: $(SPECREVISION) +# Some of the attributes used in building spec documents: +# generated - absolute path to generated sources +# refprefix - controls which generated extension metafiles are +# included at build time. Must be empty for specification, +# 'refprefix.' for refpages (see ADOCREFOPTS below). COMMONATTRIBOPTS = -a revdate="$(SPECDATE)" \ -a stem=latexmath \ -a generated=$(GENERATED) \ - -a sectnumlevels=5 + -a sectnumlevels=5 \ + -a refprefix ATTRIBOPTS = -a revnumber="$(SPECREVISION)" \ -a revremark="$(SPECREMARK)" \ $(COMMONATTRIBOPTS) -CXX4OPENCL_ATTRIBOPTS = -a revnumber="$(CXX4OPENCL_DOCREVISION)" \ +CXX4OPENCL_ATTRIBOPTS = -a revnumber="$(CXX4OPENCL_DOCREVISION)" \ -a revremark="$(CXX4OPENCL_DOCREMARK)" \ $(COMMONATTRIBOPTS) -ADOCEXTS = -r $(CURDIR)/config/sectnumoffset-treeprocessor.rb \ - -r $(CURDIR)/config/spec-macros.rb \ - -r $(CURDIR)/config/rouge_opencl.rb +ADOCEXTS = -r $(CONFIGS)/sectnumoffset-treeprocessor.rb \ + -r $(CONFIGS)/spec-macros.rb \ + -r $(CONFIGS)/rouge_opencl.rb CXX4OPENCL_ADOCOPTS = -d book $(CXX4OPENCL_ATTRIBOPTS) $(NOTEOPTS) $(VERBOSE) $(ADOCEXTS) ADOCCOMMONOPTS = -a apispec="$(CURDIR)/api" \ - -a config="$(CURDIR)/config" \ + -a config="$(CONFIGS)" \ -a cspec="$(CURDIR)/c" \ -a images="$(CURDIR)/images" \ $(ATTRIBOPTS) $(NOTEOPTS) $(VERBOSE) $(ADOCEXTS) ADOCOPTS = -d book $(ADOCCOMMONOPTS) -ADOCMANOPTS = -d manpage $(ADOCCOMMONOPTS) + +# Asciidoctor options to build refpages +# +# ADOCREFOPTS *must* be placed after ADOCOPTS in the command line, so +# that it can override spec attribute values. +# +# cross-file-links makes custom macros link to other refpages +# refprefix includes the refpage (not spec) extension metadata. +# isrefpage is for refpage-specific content +ADOCREFOPTS = -a cross-file-links -a refprefix='refpage.' \ + -a isrefpage -d manpage # ADOCHTMLOPTS relies on the relative runtime path from the output HTML # file to the katex scripts being set with KATEXDIR. This is overridden @@ -111,9 +135,9 @@ ADOCMANOPTS = -d manpage $(ADOCCOMMONOPTS) # ADOCHTMLOPTS also relies on the absolute build-time path to the # 'stylesdir' containing our custom CSS. KATEXDIR = ../katex -ADOCHTMLEXTS = -r $(CURDIR)/config/katex_replace.rb +ADOCHTMLEXTS = -r $(CONFIGS)/katex_replace.rb ADOCHTMLOPTS = $(ADOCHTMLEXTS) -a katexpath=$(KATEXDIR) \ - -a stylesheet=khronos.css -a stylesdir=$(CURDIR)/config \ + -a stylesheet=khronos.css -a stylesdir=$(CONFIGS) \ -a sectanchors ADOCPDFEXTS = -r asciidoctor-pdf -r asciidoctor-mathematical --trace @@ -121,14 +145,21 @@ ADOCPDFOPTS = $(ADOCPDFEXTS) -a mathematical-format=svg \ -a imagesoutdir=$(PDFMATHDIR) # Where to put dynamically generated dependencies of the spec and other -# targets, from API XML. GENERATED and APIINCDIR specify the location of +# targets, from API XML. GENERATED and APIPATH specify the location of # the API interface includes. -# GENDEPENDS could have multiple dependencies. GENERATED = $(CURDIR)/generated REFPATH = $(GENERATED)/refpage -APIINCDIR = $(GENERATED)/api -VERSIONDIR = $(APIINCDIR)/version-notes -GENDEPENDS = $(APIINCDIR)/timeMarker +APIPATH = $(GENERATED)/api +METAPATH = $(GENERATED)/meta +VERSIONDIR = $(APIPATH)/version-notes +ATTRIBFILE = $(GENERATED)/specattribs.adoc + +# timeMarker is a proxy target created when many generated files are +# made at once +APIDEPEND = $(APIPATH)/timeMarker +METADEPEND = $(METAPATH)/timeMarker +# All generated dependencies +GENDEPENDS = $(APIDEPEND) $(METADEPEND) $(ATTRIBFILE) .PHONY: directories @@ -173,16 +204,22 @@ pdf: apipdf envpdf extpdf extensionspdf cxxpdf cpdf icdinstpdf # 'html' causing specs to *always* be regenerated. src: - @echo APISPECSRC = $(APISPECSRC) - @echo ENVSPECSRC = $(ENVSPECSRC) - @echo EXTSPECSRC = $(EXTSPECSRC) + @echo APISPECSRC = $(APISPECSRC) + @echo CSPECSRC = $(CSPECSRC) + @echo ENVSPECSRC = $(ENVSPECSRC) + @echo EXTSPECSRC = $(EXTSPECSRC) + @echo CEXTDOCSRC = $(CEXTDOCSRC) + @echo CXX4OPENCLDOCSRC = $(CXX4OPENCLDOCSRC) + @echo CXXSPECSRC = $(CXXSPECSRC) + @echo EXTENSIONSSPECSRC = $(EXTENSIONSSPECSRC) + @echo ICDINSTSPECSRC = $(ICDINSTSPECSRC) # API spec # Top-level spec source file APISPEC = OpenCL_API APISPECSRC = $(APISPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(APISPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(APISPEC).txt $(GENERATED)) apihtml: $(HTMLDIR)/$(APISPEC).html $(APISPECSRC) @@ -202,7 +239,7 @@ $(PDFDIR)/$(APISPEC).pdf: $(APISPECSRC) # Top-level spec source file ENVSPEC = OpenCL_Env ENVSPECSRC = $(ENVSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(ENVSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(ENVSPEC).txt $(GENERATED)) envhtml: $(HTMLDIR)/$(ENVSPEC).html $(ENVSPECSRC) @@ -220,7 +257,7 @@ $(PDFDIR)/$(ENVSPEC).pdf: $(ENVSPECSRC) # Extensions spec EXTSPEC = OpenCL_Ext EXTSPECSRC = $(EXTSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(EXTSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(EXTSPEC).txt $(GENERATED)) exthtml: $(HTMLDIR)/$(EXTSPEC).html $(EXTSPECSRC) @@ -239,12 +276,12 @@ $(PDFDIR)/$(EXTSPEC).pdf: $(EXTSPECSRC) EXTDIR = extensions EXTENSIONSSPEC = extensions EXTENSIONSSPECSRC = $(EXTDIR)/$(EXTENSIONSSPEC).txt ${GENDEPENDS} \ - $(shell grep ^include:: $(EXTDIR)/$(EXTENSIONSSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(EXTDIR)/$(EXTENSIONSSPEC).txt $(GENERATED)) # Included extension documents -EXTENSIONS := $(notdir $(wildcard $(EXTDIR)/[A-Za-z]*.asciidoc)) -EXTENSIONS_HTML = $(patsubst %.asciidoc,$(HTMLDIR)/%.html,$(EXTENSIONS)) -EXTENSIONS_PDF = $(patsubst %.asciidoc,$(PDFDIR)/%.pdf,$(EXTENSIONS)) +EXTDOCS := $(notdir $(wildcard $(EXTDIR)/[A-Za-z]*.asciidoc)) +EXTENSIONS_HTML = $(patsubst %.asciidoc,$(HTMLDIR)/%.html,$(EXTDOCS)) +EXTENSIONS_PDF = $(patsubst %.asciidoc,$(PDFDIR)/%.pdf,$(EXTDOCS)) extensionshtml: $(HTMLDIR)/$(EXTENSIONSSPEC).html $(EXTENSIONSSPECSRC) $(EXTENSIONS_HTML) @@ -270,7 +307,7 @@ $(PDFDIR)/$(EXTENSIONSSPEC).pdf: $(EXTENSIONSSPECSRC) $(GENDEPENDS) # Language Extensions spec CEXTDOC = OpenCL_LangExt CEXTDOCSRC = $(CEXTDOC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CEXTDOC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CEXTDOC).txt $(GENERATED)) cexthtml: $(HTMLDIR)/$(CEXTDOC).html $(CEXTDOCSRC) @@ -288,7 +325,7 @@ $(PDFDIR)/$(CEXTDOC).pdf: $(CEXTDOCSRC) # C++ (cxx) spec CXXSPEC = OpenCL_Cxx CXXSPECSRC = $(CXXSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CXXSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CXXSPEC).txt $(GENERATED)) cxxhtml: $(HTMLDIR)/$(CXXSPEC).html $(CXXSPECSRC) @@ -306,7 +343,7 @@ $(PDFDIR)/$(CXXSPEC).pdf: $(CXXSPECSRC) # C spec CSPEC = OpenCL_C CSPECSRC = $(CSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CSPEC).txt $(GENERATED)) chtml: $(HTMLDIR)/$(CSPEC).html $(CSPECSRC) @@ -324,7 +361,7 @@ $(PDFDIR)/$(CSPEC).pdf: $(CSPECSRC) # C++ for OpenCL doc CXX4OPENCLDOC = CXX_for_OpenCL CXX4OPENCLDOCSRC = $(CXX4OPENCLDOC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CXX4OPENCLDOC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CXX4OPENCLDOC).txt $(GENERATED)) cxx4openclhtml: $(HTMLDIR)/$(CXX4OPENCLDOC).html $(CXX4OPENCLDOCSRC) @@ -342,7 +379,7 @@ $(PDFDIR)/$(CXX4OPENCLDOC).pdf: $(CXX4OPENCLDOCSRC) # ICD installation guidelines ICDINSTSPEC = OpenCL_ICD_Installation ICDINSTSPECSRC = $(ICDINSTSPEC).txt \ - $(shell grep ^include:: $(ICDINSTSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(ICDINSTSPEC).txt $(GENERATED)) icdinsthtml: $(HTMLDIR)/$(ICDINSTSPEC).html $(ICDINSTSPECSRC) @@ -359,18 +396,29 @@ $(PDFDIR)/$(ICDINSTSPEC).pdf: $(ICDINSTSPECSRC) # Clean generated and output files -clean: clean_html clean_pdf clean_generated +clean: clean_html clean_pdf clean_man clean_generated clean_html: - $(QUIET)$(RMRF) $(HTMLDIR) $(MANHTMLDIR) $(OUTDIR)/katex + $(QUIET)$(RMRF) $(HTMLDIR) $(OUTDIR)/katex clean_pdf: $(QUIET)$(RMRF) $(PDFDIR) $(PDFMATHDIR) +clean_man: + $(QUIET)$(RMRF) $(MANHTMLDIR) + +# Generated directories and files to remove +CLEAN_GEN_PATHS = \ + $(APIPATH) \ + $(METAPATH) \ + $(REFPATH) \ + $(GENERATED)/__pycache__ \ + $(PDFMATHDIR) \ + $(PYAPIMAP) \ + $(ATTRIBFILE) + clean_generated: - $(QUIET)$(RMRF) $(APIINCDIR)/* $(GENERATED)/api.py $($(REFPATH)/ - $(QUIET)$(RMRF) $(PDFMATHDIR) - $(QUIET)$(RMRF) $(GENERATED)/__pycache__ + $(QUIET)$(RMRF) $(CLEAN_GEN_PATHS) # Ref page targets for individual pages MANDIR := man @@ -396,22 +444,22 @@ MANSOURCES = $(filter-out $(REFPATH)/apispec.txt $(REFPATH)/footer.txt $(wildc # Should pass in $(EXTOPTIONS) to determine which pages to generate. # For now, all core and extension ref pages are extracted by genRef.py. ## Temporary - eventually should be all spec asciidoctor source files -SPECFILES = $(wildcard api/*.asciidoc) OpenCL_API.txt OpenCL_C.txt -SCRIPTS = scripts +SPECFILES = $(wildcard api/[A-Za-z]*.asciidoc) $(wildcard c/[A-Za-z]*.asciidoc) OpenCL_API.txt OpenCL_C.txt GENREF = $(SCRIPTS)/genRef.py LOGFILE = $(REFPATH)/refpage.log refpages: $(REFPATH)/apispec.txt -$(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(GENERATED)/api.py +$(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(PYAPIMAP) $(QUIET)$(MKDIR) $(REFPATH) $(PYTHON) $(GENREF) -genpath $(GENERATED) -basedir $(REFPATH) \ -rewrite $(REFPATH)/rewritebody -toc $(REFPATH)/tocbody \ - -log $(LOGFILE) $(SPECFILES) + -log $(LOGFILE) -extpath $(CURDIR)/api \ + $(EXTOPTIONS) $(SPECFILES) cat $(MANDIR)/tochead $(REFPATH)/tocbody $(MANDIR)/toctail > $(REFPATH)/toc.html (cat $(MANDIR)/rewritehead ; \ echo ; echo "# Aliases hard-coded in refpage markup" ; \ sort < $(REFPATH)/rewritebody) > $(REFPATH)/.htaccess - $(CP) $(MANDIR)/static/*.txt $(REFPATH) + echo $(CP) $(MANDIR)/static/*.txt $(REFPATH) # These targets are HTML5 ref pages # @@ -419,8 +467,15 @@ $(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(GENERATED) # actual list of man page sources isn't known until after # $(REFPATH)/apispec.txt is generated. $(GENDEPENDS) is generated before # running the recursive make, so it doesn't trigger twice +# $(SUBMAKEOPTIONS) suppresses the redundant "Entering / leaving" +# messages make normally prints out, similarly to suppressing make +# command output logging in the individual refpage actions below. +SUBMAKEOPTIONS = --no-print-directory manhtmlpages: $(REFPATH)/apispec.txt $(GENDEPENDS) - $(MAKE) -e buildmanpages + $(QUIET) echo "manhtmlpages: building HTML refpages with these options:" + $(QUIET) echo $(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ + $(ADOCREFOPTS) -o REFPAGE.html REFPAGE.adoc + $(MAKE) $(SUBMAKEOPTIONS) -e buildmanpages $(CP) $(MANDIR)/*.html $(MANDIR)/*.css $(MANDIR)/*.gif $(MANHTMLDIR) $(CP) $(REFPATH)/.htaccess $(REFPATH)/*.html $(MANHTMLDIR) @@ -429,26 +484,31 @@ MANHTML = $(MANSOURCES:$(REFPATH)/%.txt=$(MANHTMLDIR)/%.html) buildmanpages: $(MANHTML) +# The refpage build process normally generates far too much output, so +# use VERYQUIET instead of QUIET $(MANHTMLDIR)/%.html: KATEXDIR = ../../katex $(MANHTMLDIR)/%.html: $(REFPATH)/%.txt $(MANCOPYRIGHT) $(GENDEPENDS) $(KATEXINST) - $(QUIET)$(MKDIR) $(MANHTMLDIR) - $(QUIET)$(ASCIIDOCTOR) -b html5 -a cross-file-links \ - $(ADOCMANOPTS) $(ADOCHTMLOPTS) -o $@ $< + $(VERYQUIET)echo "Building $@ from $< using default options" + $(VERYQUIET)$(MKDIR) $(MANHTMLDIR) + $(VERYQUIET)$(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ + $(ADOCREFOPTS) -o $@ $< $(MANHTMLDIR)/intro.html: $(REFPATH)/intro.txt $(MANCOPYRIGHT) - $(QUIET)$(MKDIR) $(MANHTMLDIR) - $(QUIET)$(ASCIIDOCTOR) -b html5 -a cross-file-links \ - $(ADOCOPTS) $(ADOCHTMLOPTS) -o $@ $< + $(VERYQUIET)echo "Building $@ from $< using default options" + $(VERYQUIET)$(MKDIR) $(MANHTMLDIR) + $(VERYQUIET)$(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ + $(ADOCREFOPTS) -o $@ $< # Targets generated from the XML and registry processing scripts -# api.py - Python encoding of the registry -# $(APIINCDIR)/timeMarker - proxy for 'apiinc' - generate API interfaces +# apimap.py - Python encoding of the registry +# apiinc / proxy $(APIDEPEND) - API interface include files in $(APIPATH) +# extinc / proxy $(METADEPEND) - extension appendix metadata include files in $(METAPATH) # # $(GENSCRIPTEXTRA) are extra options that can be passed to the # generation script, such as # '-diag diag' -REGISTRY = xml +REGISTRY = $(ROOTDIR)/xml APIXML = $(REGISTRY)/cl.xml GENSCRIPT = $(SCRIPTS)/gencl.py DICTSCRIPT = $(SCRIPTS)/gen_dictionaries.py @@ -456,14 +516,37 @@ VERSIONSCRIPT = $(SCRIPTS)/gen_version_notes.py GENSCRIPTOPTS = $(VERSIONOPTIONS) $(EXTOPTIONS) $(GENSCRIPTEXTRA) -registry $(APIXML) GENSCRIPTEXTRA = -$(GENERATED)/api.py: $(APIXML) $(GENSCRIPT) - $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(GENERATED) api.py +PYAPIMAP = $(GENERATED)/apimap.py + +scriptapi: pyapi -apiinc: $(APIINCDIR)/timeMarker +pyapi $(PYAPIMAP): $(APIXML) $(GENSCRIPT) + $(QUIET)$(MKDIR) $(GENERATED) + $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(GENERATED) apimap.py -$(APIINCDIR)/timeMarker: $(APIXML) $(DICTSCRIPT) $(GENSCRIPT) $(VERSIONSCRIPT) - $(QUIET)$(MKDIR) $(APIINCDIR) - $(QUIET)$(PYTHON) $(DICTSCRIPT) -registry $(APIXML) -o $(APIINCDIR) +apiinc: $(APIDEPEND) + +$(APIDEPEND): $(APIXML) $(DICTSCRIPT) $(GENSCRIPT) $(VERSIONSCRIPT) + $(QUIET)$(MKDIR) $(APIPATH) + $(QUIET)$(PYTHON) $(DICTSCRIPT) -registry $(APIXML) -o $(APIPATH) $(QUIET)$(MKDIR) $(VERSIONDIR) $(QUIET)$(PYTHON) $(VERSIONSCRIPT) -registry $(APIXML) -o $(VERSIONDIR) - $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(APIINCDIR) apiinc + $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(APIPATH) apiinc + +extinc: $(METADEPEND) + +$(METADEPEND): $(APIXML) $(GENSCRIPT) + $(QUIET)$(MKDIR) $(METAPATH) + $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(METAPATH) extinc + +# This generates a single file containing asciidoc attributes for each +# extension in the spec being built. +attribs: $(ATTRIBFILE) + +$(ATTRIBFILE): + for attrib in $(EXTS) ; do \ + echo ":$${attrib}:" ; \ + done > $@ + +# Debugging aid - generate all files from registry XML +generated: $(PYAPIMAP) $(GENDEPENDS) diff --git a/OpenCL_API.txt b/OpenCL_API.txt index 0df01b799..2be31d8e1 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -1,6 +1,9 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group +// SPDX-License-Identifier: CC-BY-4.0 + +// Extensions to enable +// Must be included before the header and attribs.txt +include::{generated}/specattribs.adoc[] = The OpenCL^(TM)^ Specification :R: pass:q,r[^(R)^] @@ -73,6 +76,7 @@ include::api/appendix_e.asciidoc[] include::api/appendix_f.asciidoc[] include::api/appendix_g.asciidoc[] include::api/appendix_h.asciidoc[] +include::api/appendix_extensions.asciidoc[] <<< diff --git a/OpenCL_C.txt b/OpenCL_C.txt index ab33bdf11..5e7776c9b 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -1,6 +1,9 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group. +// SPDX-License-Identifier: CC-BY-4.0 + +// Extensions to enable +// Must be included before the header and attribs.txt +include::{generated}/specattribs.adoc[] = The OpenCL^(TM)^ C Specification :R: pass:q,r[^(R)^] @@ -135,8 +138,7 @@ Feature macro identifiers are used as names of features in this document. .Optional features in OpenCL C 3.0 or newer and their predefined macros. [cols="1,1",options="header",] |==== -| *Feature Macro/Name* -| *Brief Description* +| Feature Macro/Name | Brief Description | {opencl_c_3d_image_writes} | The OpenCL C compiler supports built-in functions for writing to 3D image @@ -214,6 +216,19 @@ of work-items. | The OpenCL C compiler supports built-in functions that perform collective operations across a work-group. +ifdef::cl_khr_integer_dot_product[] +| {opencl_c_integer_dot_product_input_4x8bit_packed} + + (when the {cl_khr_integer_dot_product} extension macro is defined) + +| The OpenCL C compiler supports built-in functions that perform dot +products on 4x8 bit packed integer vectors + +| {opencl_c_integer_dot_product_input_4x8bit} + + (when the {cl_khr_integer_dot_product} extension macro is defined) +| The OpenCL C compiler supports built-in functions that perform dot +products on 4x8 bit integer vectors +endif::cl_khr_integer_dot_product[] + |==== In OpenCL C 3.0 or newer, feature macros must expand to the value `1` if the @@ -232,6 +247,7 @@ feature test macros because there is no guarantee that feature test macros will be defined and that if defined they will indicate the presence of the corresponding optional functionality. + [[extensions]] === Extensions @@ -251,6 +267,442 @@ can still be used as an extension, i.e. the same predefined extension macros are still valid in OpenCL C 3.0 or newer, however the use of feature macros is preferred whenever possible. + +ifdef::cl_khr_3d_image_writes[] +[[cl_khr_3d_image_writes,cl_khr_3d_image_writes]] +==== 3D Image Writes + +The `cl_khr_3d_image_writes` extension was promoted to OpenCL 2.0, and to +OpenCL 3.0 as the {opencl_c_3d_image_writes} feature. +The extension adds <> that allow a kernel to write to 3D image objects in addition to +2D image objects. +endif::cl_khr_3d_image_writes[] + + +ifdef::cl_khr_async_work_group_copy_fence[] +[[cl_khr_async_work_group_copy_fence,cl_khr_async_work_group_copy_fence]] +==== Async Work-group Copy Fence + +The `cl_khr_async_work_group_copy_fence` extension supports establishing a +memory synchronization ordering of asynchronous copies. +The extension provides the `async_work_group_copy_fence` function, as +described in the <> table +endif::cl_khr_async_work_group_copy_fence[] + + +ifdef::cl_khr_byte_addressable_store[] +[[cl_khr_byte_addressable_store,cl_khr_byte_addressable_store]] +==== Byte-Addressable Storage + +The `cl_khr_byte_addressable_store` extension was promoted to OpenCL C 1.1. +The extension relaxes <> on pointers to `char`, `uchar`, +`char2`, `uchar2`, `short`, `ushort` and `half`, allowing applications to +read from and write to pointers to these types. +endif::cl_khr_byte_addressable_store[] + + +ifdef::cl_khr_depth_images[] +[[cl_khr_depth_images,cl_khr_depth_images]] +==== Depth Images + +The `cl_khr_depth_images` extension was promoted to OpenCL 2.0. +The extension provides new <>, as well as <>, +<>, +<>, and <> operating on those types. +endif::cl_khr_depth_images[] + + +ifdef::cl_khr_device_enqueue_local_arg_types[] +[[cl_khr_device_enqueue_local_arg_types,cl_khr_device_enqueue_local_arg_types]] +==== Device Enqueue Local Argument Types + +The `cl_khr_device_enqueue_local_arg_types` extension allows arguments to +blocks that are passed to the <> and to the <> to be pointers to any type (built-in or +user-defined) in local memory, instead of requiring arguments to blocks to +be pointers to `void` in local memory. +endif::cl_khr_device_enqueue_local_arg_types[] + + +ifdef::cl_khr_extended_async_copies[] +[[cl_khr_extended_async_copies,cl_khr_extended_async_copies]] +==== Extended Async Copy Functions + +The `cl_khr_extended_async_copies` extension provides additional +<> which interpret the +source and destination as 2D or 3D images. +endif::cl_khr_extended_async_copies[] + + +ifdef::cl_khr_extended_bit_ops[] +[[cl_khr_extended_bit_ops,cl_khr_extended_bit_ops]] +==== Extended Bit Operations + +The `cl_khr_extended_bit_ops` extension provides additional +<> including bitfield +insert, bitfield extract, and bit reverse. +endif::cl_khr_extended_bit_ops[] + + +ifdef::cl_khr_fp16[] +[[cl_khr_fp16,cl_khr_fp16]] +==== Half-Precision Floating-Point + +The `cl_khr_fp16` extension was promoted to OpenCL C 1.2 as an optional +feature, and to OpenCL 3.0 as the optional {cl_khr_fp16} feature. +The extension provides 16-bit precision scalar and vector floating-point +data types and extends many functions to accept these types. +endif::cl_khr_fp16[] + + +ifdef::cl_khr_fp64[] +[[cl_khr_fp64,cl_khr_fp64]] +==== Double-Precision Floating-Point + +The `cl_khr_fp64` extension was promoted to OpenCL C 1.2 as an optional +feature, and to OpenCL 3.0 as the optional {cl_khr_fp64} feature. +The extension provides double-precision scalar and vector floating-point +data types and extends many functions to accept these types. +endif::cl_khr_fp64[] + + +ifdef::cl_khr_gl_msaa_sharing[] +[[cl_khr_gl_msaa_sharing,cl_khr_gl_msaa_sharing]] +==== Multi-Sample Shared OpenCL/OpenGL Images + +The `cl_khr_gl_msaa_sharing` extension adds support for multi-sample images +shared with OpenGL multi-sample textures. +The extension provides new <>, as well as <> and <> operating on those +types. +endif::cl_khr_gl_msaa_sharing[] + + +ifdef::cl_khr_global_int32_base_atomics[] +[[cl_khr_global_int32_base_atomics,cl_khr_global_int32_base_atomics]] +==== Global 32-Bit Base Atomics + +The `cl_khr_global_int32_base_atomics` extension was promoted to OpenCL C +1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides base atomic functions for {global} variables, as +described in the <> table. + +endif::cl_khr_global_int32_base_atomics[] + + +ifdef::cl_khr_global_int32_extended_atomics[] +[[cl_khr_global_int32_extended_atomics,cl_khr_global_int32_extended_atomics]] +==== Global 32-Bit Extended Atomics + +The `cl_khr_global_int32_extended_atomics` extension was promoted to OpenCL +C 1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides extended atomic functions for {global} variables, as +described in the <> table. + +endif::cl_khr_global_int32_extended_atomics[] + + +ifdef::cl_khr_initialize_memory[] +[[cl_khr_initialize_memory,cl_khr_initialize_memory]] +==== Initializing Memory + +The `cl_khr_initialize_memory` extension allows creating a context which +initializes specified types (local or private) of memory prior to the start +of kernel execution. + +There is one <> on the timing +of this initialization discussed in this document, although most of the +extension is defined by the OpenCL 3.0 API Specification. +endif::cl_khr_initialize_memory[] + + +ifdef::cl_khr_int64_base_atomics[] +[[cl_khr_int64_base_atomics,cl_khr_int64_base_atomics]] +==== 64-Bit Base Atomics + +The `cl_khr_int64_base_atomics` extension provides base atomic functions for +{global} and {local} 64-bit signed and unsigned integer variables, as +described in the <> table. +endif::cl_khr_int64_base_atomics[] + + +ifdef::cl_khr_int64_extended_atomics[] +[[cl_khr_int64_extended_atomics,cl_khr_int64_extended_atomics]] +==== 64-Bit Extended Atomics + +The `cl_khr_int64_extended_atomics` extension provides extended atomic functions for +{global} and {local} 64-bit signed and unsigned integer variables, as +described in the <> table. +endif::cl_khr_int64_extended_atomics[] + + +ifdef::cl_khr_integer_dot_product[] +[[cl_khr_integer_dot_product,cl_khr_integer_dot_product]] +==== Integer Dot Product + +The `cl_khr_integer_dot_product` extension adds support for SPIR-V +instructions and OpenCL C built-in functions to compute the dot product of +vectors of integers. +The extension provides new <> operating on these types. +endif::cl_khr_integer_dot_product[] + + +ifdef::cl_khr_local_int32_base_atomics[] +[[cl_khr_local_int32_base_atomics,cl_khr_local_int32_base_atomics]] +==== Local 32-Bit Base Atomics + +The `cl_khr_local_int32_base_atomics` extension was promoted to OpenCL C +1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides base atomic functions for {local} variables, as +described in the <> table. + +endif::cl_khr_local_int32_base_atomics[] + + +ifdef::cl_khr_local_int32_extended_atomics[] +[[cl_khr_local_int32_extended_atomics,cl_khr_local_int32_extended_atomics]] +==== Local 32-Bit Extended Atomics + +The `cl_khr_local_int32_extended_atomics` extension was promoted to OpenCL +C 1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides extended atomic functions for {local} variables, as +described in the <> table. + +endif::cl_khr_local_int32_extended_atomics[] + + +ifdef::cl_khr_mipmap_image[] +[[cl_khr_mipmap_image,cl_khr_mipmap_image]] +==== Mipmapped Image Reads and Queries + +The `cl_khr_mipmap_image` extension adds support for mipmap images. +The extension provides built-in <> and <> functions +operating on these images. +endif::cl_khr_mipmap_image[] + + +ifdef::cl_khr_mipmap_image_writes[] +[[cl_khr_mipmap_image_writes,cl_khr_mipmap_image_writes]] +==== Mipmapped Image Writes + +The `cl_khr_mipmap_image_writes` extension adds support for writing to +mipmap images, and requires support for the `<>` +extension macro. +The extension provides built-in <> functions operating on these images. +endif::cl_khr_mipmap_image_writes[] + + +ifdef::cl_khr_select_fprounding_mode[] +[[cl_khr_select_fprounding_mode,cl_khr_select_fprounding_mode]] +==== Select Floating-Point Rounding Mode + +The `cl_khr_select_fprounding_mode` extension allows <> for an instruction or group of +instructions in the program source by use of a *#pragma*. + +The extension was deprecated in OpenCL 1.1 and its use is not recommended. +endif::cl_khr_select_fprounding_mode[] + + +ifdef::cl_khr_srgb_image_writes[] +[[cl_khr_srgb_image_writes,cl_khr_srgb_image_writes]] +==== sRGB Image Write Functions + +The `cl_khr_srgb_image_writes` extension adds support for writing to sRGB +images using the <> +functions. Color space conversion is performed by the function. +endif::cl_khr_srgb_image_writes[] + + +ifdef::cl_khr_subgroups[] +[[cl_khr_subgroups,cl_khr_subgroups]] +==== Sub-Groups + +The `cl_khr_subgroups` extension was promoted to OpenCL C 2.1 as the +{opencl_c_subgroups} feature. +The extension provides the following functions: + + * <> + * <> + * <> + * <> + * <> + * The <> type and + <> +endif::cl_khr_subgroups[] + + +ifdef::cl_khr_subgroup_ballot[] +[[cl_khr_subgroup_ballot,cl_khr_subgroup_ballot]] +==== Sub-Group Ballots + +The `cl_khr_subgroup_ballot` extension adds the ability to collect and +operate on ballots from work items in a sub-group. +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_ballot[] + + +ifdef::cl_khr_subgroup_clustered_reduce[] +[[cl_khr_subgroup_clustered_reduce,cl_khr_subgroup_clustered_reduce]] +==== Clustered Reductions + +The `cl_khr_subgroup_clustered_reduce` extension adds support for clustered +reductions that operate on a subset of work items in the sub-group. +The extension provides the following functions: + + * <> + * <> + * <> + +endif::cl_khr_subgroup_clustered_reduce[] + + +ifdef::cl_khr_subgroup_extended_types[] +[[cl_khr_subgroup_extended_types,cl_khr_subgroup_extended_types]] +==== Sub-Group Extended Types + +The `cl_khr_subgroup_extended_types` extension adds <> to the existing +<>. + +endif::cl_khr_subgroup_extended_types[] + + +ifdef::cl_khr_subgroup_non_uniform_arithmetic[] +[[cl_khr_subgroup_non_uniform_arithmetic,cl_khr_subgroup_non_uniform_arithmetic]] +==== Built-in Non-Uniform Arithmetic Functions for Sub-Groups + +The `cl_khr_subgroup_non_uniform_arithmetic` extension adds the ability to +use some sub-group functions within non-uniform flow control, including +additional scan and reduction operators. + +The extension provides the following functions: + + * <> + * <> + * <> + +endif::cl_khr_subgroup_non_uniform_arithmetic[] + + +ifdef::cl_khr_subgroup_non_uniform_vote[] +[[cl_khr_subgroup_non_uniform_vote,cl_khr_subgroup_non_uniform_vote]] +==== Built-in Non-Uniform Vote and Election Functions for Sub-Groups + +The `cl_khr_subgroup_non_uniform_vote` extension adds the ability to elect a +single work item from a sub-group to perform a task and to hold votes among +work items in a sub-group. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_non_uniform_vote[] + + +ifdef::cl_khr_subgroup_rotate[] +[[cl_khr_subgroup_rotate,cl_khr_subgroup_rotate]] +==== Sub-Group Rotation + +The `cl_khr_subgroup_rotate` extension adds support for a new sub-group data +exchange operation that makes it possible to rotate values through the work +items in a sub-group. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_rotate[] + + +ifdef::cl_khr_subgroup_shuffle[] +[[cl_khr_subgroup_shuffle,cl_khr_subgroup_shuffle]] +==== General Purpose Shuffles + +The `cl_khr_subgroup_shuffle` extension adds additional ways to exchange +data among work items in a sub-group. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_shuffle[] + + +ifdef::cl_khr_subgroup_shuffle_relative[] +[[cl_khr_subgroup_shuffle_relative,cl_khr_subgroup_shuffle_relative]] +==== Relative Shuffles + +The `cl_khr_subgroup_shuffle_relative` extension adds specialized ways to +exchange data among work items in a sub-group that may perform better on +some implementations. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_shuffle_relative[] + + +ifdef::cl_khr_work_group_uniform_arithmetic[] +[[cl_khr_work_group_uniform_arithmetic,cl_khr_work_group_uniform_arithmetic]] +==== Work-group Collective Uniform Arithmetic Functions + +The `cl_khr_work_group_uniform_arithmetic` extension adds additional +work-group collective functions, including work-group scans and reductions +for the following operators: + + * Logical operations (`and`, `or`, and `xor`). + * Bitwise operations (`and`, `or`, and `xor`). + * Integer multiplication (`mul`). + * Floating-point multiplication (`mul`). + +The extension provides the following functions: + + * <> + * <> + * <> +endif::cl_khr_work_group_uniform_arithmetic[] + + [[supported-data-types]] == Supported Data Types @@ -267,9 +719,9 @@ The following table describes the list of built-in scalar data types. [[table-builtin-scalar-types]] .Built-in Scalar Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `bool` footnote:[{fn-bool}] | A conditional data type which is either _true_ or _false_. The value _true_ expands to the integer constant 1 and the value @@ -296,15 +748,14 @@ The following table describes the list of built-in scalar data types. storage format. | `double` footnote:[{fn-double}] | A 64-bit floating-point number. - The `double` data type must conform to the IEEE 754 double precision + The `double` data type must conform to the IEEE 754 double-precision storage format. - <> support for OpenCL C 1.2 or newer. In - OpenCL C 3.0 it requires support of the {opencl_c_fp64} feature. - Also see extension *cl_khr_fp64*. + <> support for <>. | `half` | A 16-bit floating-point number. - The `half` data type must conform to the IEEE 754-2008 half precision + The `half` data type must conform to the IEEE 754-2008 half-precision storage format. | `size_t` footnote:size_t[{fn-size_t}] | The unsigned integer type of the result of the `sizeof` operator. @@ -325,21 +776,15 @@ The following table describes the list of built-in scalar data types. type that cannot be completed. |==== -If the double-precision floating-point extension *cl_khr_fp64* or the -{opencl_c_fp64} feature is not supported, implementations may -implicitly cast double-precision floating-point literals to -single-precision literals. The use of double-precision literals without -double-precision support should result in a diagnostic. - Most built-in scalar data types are also declared as appropriate types in the OpenCL API (and header files) that can be used by an application. The following table describes the built-in scalar data type in the OpenCL C programming language and the corresponding data type available to the application: -[cols=",",] +[cols=",",options="header",] |==== -| *Type in OpenCL Language* | *API type for application* +| Type in OpenCL Language | API type for application | `bool` | n/a | `char` | `cl_char` | `unsigned char`, `uchar` | `cl_uchar` @@ -361,6 +806,20 @@ application: -- +[[double-precision-support]] +==== Double-Precision Floating-Point Support + +Double-precision floating-point is supported if +ifdef::cl_khr_fp64[the `<>` extension macro is supported, or if] +OpenCL 1.2 or newer is supported. +In OpenCL 3.0, it also requires support for the {opencl_c_fp64} feature, + +If double-precision is not supported, implementations may +implicitly cast double-precision floating-point literals to +single-precision literals. The use of double-precision literals without +double-precision support should result in a diagnostic. + + [[the-half-data-type]] ==== The `half` Data Type @@ -381,6 +840,8 @@ Conversions from `float` to `half` correctly round the mantissa to 11 bits of precision. Conversions from `half` to `float` are lossless; all `half` numbers are exactly representable as `float` values. +Conversions from `double` to `half` are correctly rounded. +Conversions from `half` to `double` are lossless. The `half` data type can only be used to declare a pointer to a buffer that contains `half` values. @@ -449,9 +910,9 @@ The following table describes the list of built-in vector data types. [[table-builtin-vector-types]] .Built-in Vector Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `char__n__` | A vector of _n_ 8-bit signed two's complement integer values. | `uchar__n__` @@ -468,14 +929,17 @@ The following table describes the list of built-in vector data types. | A vector of _n_ 64-bit signed two's complement integer values. | `ulong__n__` footnote:long-vec[] | A vector of _n_ 64-bit unsigned integer values. +ifdef::cl_khr_fp16[] +| `half__n__` footnote:[{fn-half-supported}] + | A vector of _n_ 16-bit floating-point values. +endif::cl_khr_fp16[] | `float__n__` | A vector of _n_ 32-bit floating-point values. | `double__n__` footnote:[{fn-double-vec}] | A vector of _n_ 64-bit floating-point values. - <> support for OpenCL C 1.2 or newer. In - OpenCL C 3.0 it requires support of the {opencl_c_fp64} feature. - Also see extension *cl_khr_fp64*. + <> support for <>. |==== The built-in vector data types are also declared as appropriate types in the @@ -484,9 +948,9 @@ The following table describes the built-in vector data type in the OpenCL C programming language and the corresponding data type available to the application: -[cols=",",] +[cols=",",options="header",] |==== -| *Type in OpenCL Language* | *API type for application* +| Type in OpenCL Language | API type for application | `char__n__` | `cl_char__n__` | `uchar__n__` | `cl_uchar__n__` | `short__n__` | `cl_short__n__` @@ -495,6 +959,9 @@ application: | `uint__n__` | `cl_uint__n__` | `long__n__` | `cl_long__n__` | `ulong__n__` | `cl_ulong__n__` +ifdef::cl_khr_fp16[] +| `half__n__` | `cl_half__n__` +endif::cl_khr_fp16[] | `float__n__` | `cl_float__n__` | `double__n__` | `cl_double__n__` |==== @@ -512,9 +979,9 @@ OpenCL. [[table-other-builtin-types]] .Other Built-in Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `image2d_t` footnote:image-functions[{fn-image-functions}] | A 2D image. | `image3d_t` footnote:image-functions[] @@ -538,13 +1005,13 @@ OpenCL. | `image2d_depth_t` footnote:image-functions[] | A 2D depth image. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | `image2d_array_depth_t` footnote:image-functions[] | A 2D depth image array. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | `sampler_t` footnote:image-functions[] | A sampler type. | `queue_t` @@ -586,6 +1053,40 @@ OpenCL. These flags are described in detail in the <> section. +ifdef::cl_khr_gl_msaa_sharing[] +| `image2d_msaa_t` + | A 2D multi-sample color image. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +| `image2d_array_msaa_t` + | A 2D multi-sample color image array. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +| `image2d_msaa_depth_t` + | A 2D multi-sample depth image. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +| `image2d_array_msaa_depth_t` + | A 2D multi-sample depth image array. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +endif::cl_khr_gl_msaa_sharing[] |==== [NOTE] @@ -609,9 +1110,9 @@ The following tables describe the other built-in data types in OpenCL described in <> and the corresponding data type available to the application: -[cols=",",] +[cols=",",options="header",] |==== -| *Type in OpenCL C* | *API type for application* +| Type in OpenCL C | API type for application | `image2d_t` | `cl_mem` | `image3d_t` | `cl_mem` | `image2d_array_t` | `cl_mem` @@ -645,9 +1146,9 @@ are also reserved. [[table-reserved-types]] .Reserved Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `bool__n__` | A boolean vector. | `half__n__` @@ -674,7 +1175,7 @@ are also reserved. | An _n_ {times} _m_ matrix of single precision floating-point values stored in column-major order. | `double__n__x__m__` - | An _n_ {times} _m_ matrix of double precision floating-point values + | An _n_ {times} _m_ matrix of double-precision floating-point values stored in column-major order. | `long double`, `long double__n__` | A floating-point scalar and vector type with at least as much @@ -876,7 +1377,7 @@ The numeric indices that can be used are given in the table below: .Numeric indices for built-in vector data types [width="100%",cols="<34%,<66%",options="header"] |==== -| *Vector Components* | *Numeric indices that can be used* +| Vector Components | Numeric indices that can be used | 2-component | 0, 1 | 3-component | 0, 1, 2 | 4-component | 0, 1, 2, 3 @@ -1229,9 +1730,9 @@ following table. [[table-rounding-mode]] .Rounding Modes -[cols=",",] +[cols=",",options="header",] |==== -| *Modifier* | *Rounding Mode Description* +| Modifier | Rounding Mode Description | `_rte` | Round to nearest even | `_rtz` | Round toward zero | `_rtp` | Round toward positive infinity @@ -1248,6 +1749,7 @@ footnote:[{fn-float-conversion-rounding}] use the default rounding mode. The only default floating-point rounding mode supported is round to nearest even i.e the default rounding mode will be `_rte` for floating-point types. + [[out-of-range-behavior]] ==== Out-of-Range Behavior and Saturated Conversions @@ -1328,7 +1830,7 @@ float4 f = convert_float4_rtp( i ); [[reinterpreting-data-as-another-type]] -=== Reinterpreting Data As Another Type +=== Reinterpreting Data as Another Type It is frequently necessary to reinterpret bits in a data type as another data type in OpenCL. @@ -1355,7 +1857,7 @@ Examples: [source,opencl_c] ---------- -// d only if double precision is supported +// d only if double-precision is supported union { float f; uint u; double d; } u; u.u = 1; // u.f contains 2**-149. u.d is undefined -- @@ -1433,7 +1935,7 @@ short8 j = as_short8(i); float4 f; // Error. Result and operand have different sizes -double4 g = as_double4(f); // Only if double precision is supported. +double4 g = as_double4(f); // Only if double-precision is supported. float4 f; // Legal. g.xyz will have same values as f.xyz. g.w is undefined @@ -1647,16 +2149,21 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. + For scalar types, the relational operators shall return 0 if the specified -relation is _false_ and 1 if the specified relation is _true_. +relation is _false_ and return 1 if the specified relation is _true_. For vector types, the relational operators shall return 0 if the specified -relation is _false_ and -1 (i.e. all bits set) if the specified relation is -_true_. +relation is _false_ and return -1 (i.e. all bits set) if the specified +relation is _true_. The relational operators always return 0 if either argument is not a number (NaN). -- @@ -1690,17 +2197,21 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. -For scalar types, the equality operators return 0 if the specified relation -is _false_ and return 1 if the specified relation is _true_. +For scalar types, the equality operators shall return 0 if the specified +relation is _false_ and return 1 if the specified relation is _true_. For vector types, the equality operators shall return 0 if the specified -relation is _false_ and -1 (i.e. all bits set) if the specified relation is -_true_. +relation is _false_ and return -1 (i.e. all bits set) if the specified +relation is _true_. The equality operator equal (*==*) returns 0 if one or both arguments are not a number (NaN). The equality operator not equal (*!=*) returns 1 (for scalar source @@ -1724,6 +2235,10 @@ vector operand. The scalar type is then widened to a vector that has the same number of components as the vector operand. The operation is done component-wise resulting in the same size vector. +ifdef::cl_khr_fp16[] +Vector source operands of type `_half__n__` footnote:[{fn-half-supported}] +return a `short__n__` result. +endif::cl_khr_fp16[] -- @@ -1753,16 +2268,20 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. For scalar types, the logical operators shall return 0 if the result of the -operation is _false_ and 1 if the result is _true_. +operation is _false_ and return 1 if the result is _true_. For vector types, the logical operators shall return 0 if the result of the -operation is _false_ and -1 (i.e. all bits set) if the result is _true_. +operation is _false_ and return -1 (i.e. all bits set) if the result is _true_. -- @@ -1779,18 +2298,22 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. -For scalar types, the result of the logical unary operator is 0 if the value -of its operand compares unequal to 0, and 1 if the value of its operand +For scalar types, the logical unary operator shall return 0 if the value of +its operand compares unequal to 0, and return 1 if the value of its operand compares equal to 0. -For vector types, the unary operator shall return a 0 if the value of its -operand compares unequal to 0, and -1 (i.e. all bits set) if the value of -its operand compares equal to 0. +For vector types, the unary operator shall return 0 if the value of its +operand compares unequal to 0, and return -1 (i.e. all bits set) if the +value of its operand compares equal to 0. -- @@ -2304,7 +2827,7 @@ kernel void foo(int a) -- -=== Usage for declaration scopes and variable types +=== Usage for Declaration Scopes and Variable Types -- This section describes use of address space qualifiers with respect to declaration scopes or variable types. @@ -2512,10 +3035,7 @@ Qualifiers must be explicitly specified for: .Address space behavior [width="100%",cols="1,2,2,2",options="header"] |==== -| *Address Space* - | *Supported Usage* - | *Initialization* - | *Inference* +| Address Space | Supported Usage | Initialization | Inference | `{global}` | Program scope variables, for OpenCL C 2.0 or @@ -2578,7 +3098,7 @@ Qualifiers must be explicitly specified for: -- [[addr-spaces-conversions]] -=== Address space conversions +=== Address Space Conversions -- @@ -3064,7 +3584,7 @@ foo (read_only image2d_t imageA, } ---------- -imageA is a read-only 2D image object, and image is a write-only 2D image +`imageA` is a read-only 2D image object, and `imageB` is a write-only 2D image object. The sampler-less read image and write image built-ins can be used with image @@ -3179,7 +3699,7 @@ Advanced Vector Instructions (Intel^{reg}^ AVX) which implements a work-items to one thread, running a second work-item in the high half of the 256-bit AVX register. -As another example, a Power4 machine has two scalar double precision +As another example, a Power4 machine has two scalar double-precision floating-point units with an 6-cycle deep pipe. An autovectorizer for the Power4 machine might choose to interleave six kernels declared with the `+__attribute__(( vec_type_hint (double2)))+` @@ -3337,8 +3857,12 @@ address space qualifiers. floating-point arithmetic can be performed. . Whether or not irreducible control flow is illegal is implementation defined. - . The following restriction only applies to OpenCL C 1.0, also see the - *cl_khr_byte_addressable_store* extension. + . The following restriction only applies to +ifndef::cl_khr_byte_addressable_store[OpenCL C 1.0: +] +ifdef::cl_khr_byte_addressable_store[] + OpenCL C 1.0, and only if the `<>` + extension macro is not supported: + +endif::cl_khr_byte_addressable_store[] Built-in types that are less than 32-bits in size, i.e. `char`, `uchar`, `char2`, `uchar2`, `short`, `ushort`, and `half`, have the following restriction: @@ -3400,6 +3924,17 @@ Program scope variables can be declared with `{constant}` address space qualifiers or if {opencl_c_program_scope_global_variables} feature is supported with `{global}` address space qualifier. -- +ifdef::cl_khr_initialize_memory[] + . [[restrictions-initialize-memory]] The following restriction only + applies if the `<>` extension is supported: + + If the context is created with `CL_CONTEXT_MEMORY_INITIALIZE_KHR`, + appropriate memory locations as specified by the bit-field are + initialized with zeroes, prior to the start of execution of any kernel. + The driver chooses when, prior to kernel execution, the initialization of + local and/or private memory is performed. + The only requirement is there should be no values set from outside the + context, which can be read during a kernel execution. +endif::cl_khr_initialize_memory[] [[preprocessor-directives-and-macros]] @@ -3489,7 +4024,7 @@ The following predefined macro names are available. Used to determine the current rounding mode and is set to rte. Only affects the rounding mode of conversions to a float type. <> OpenCL C 1.1, along with the - *cl_khr_select_fprounding_mode* extension. + `<>` extension. `+__ENDIAN_LITTLE__+` :: Used to determine if the OpenCL device is a little endian architecture @@ -3897,7 +4432,7 @@ defined. [[specifying-attribute-for-unrolling-loops]] -=== Specifying Attribute For Unrolling Loops +=== Specifying Attribute for Unrolling Loops [open,refpage='attributes-loopUnroll',desc='Specifying Attribute For Unrolling Loops',type='freeform',spec='clang',anchor='specifying-attribute-for-unrolling-loops'] -- @@ -4376,7 +4911,6 @@ that operate on mixed scalar and vector types, however. [open,refpage='workItemFunctions',desc='Work-Item Functions',type='freeform',spec='clang',anchor='work-item-functions',xrefs='',alias='get_enqueued_local_size get_global_id get_global_linear_id get_global_offset get_global_size get_group_id get_local_id get_local_linear_id get_local_size get_num_groups get_work_dim'] -- - The following table describes the list of built-in work-item functions that can be used to query the number of dimensions, the global and local work size specified to *clEnqueueNDRangeKernel*, and the global and local @@ -4384,9 +4918,9 @@ identifier of each work-item when this kernel is being executed on a device. [[table-work-item-functions]] .Built-in Work-Item Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | uint *get_work_dim*() | Returns the number of dimensions in use. This is the value given to the _work_dim_ argument specified in @@ -4503,19 +5037,20 @@ identifier of each work-item when this kernel is being executed on a device. |==== NOTE: The functionality described in the following table <> support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} -feature. +requires>> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. The following table describes the list of built-in work-item functions that can be used to query the size of a sub-group, number of sub-groups per work-group, and identifier of the sub-group within a work-group and work-item within a sub-group when this kernel is being executed on a device. -.Built-in Work-Item Functions for Sub-groups +[[table-subgroup-work-item-functions]] +.Built-in Work-Item Functions for Sub-Groups [cols="a,",options="header",] |==== -| *Function* -| *Description* +| Function | Description | uint *get_sub_group_size*() | Returns the number of work-items in the sub-group. @@ -4568,7 +5103,6 @@ sub-group when this kernel is being executed on a device. [open,refpage='mathFunctions',desc='Math Functions',type='freeform',spec='clang',anchor='math-functions',xrefs='commonFunctions integerFunctions',alias='acos acosh acospi asin asinh asinpi atan atan2 atan2pi atanh atanpi cbrt ceil copysign cos cosh cospi divide erf erfc exp exp10 exp2 expm1 fabs fdim floor fma fmax fmin fmod fract frexp half_cos half_divide half_exp half_exp10 half_exp2 half_log half_log10 half_log2 half_powr half_recip half_rsqrt half_sin half_sqrt half_tan hypot ilogb ldexp lgamma lgamma_r log log10 log1p log2 logb mad maxmag minmag modf nan native_cos native_divide native_exp native_exp10 native_exp2 native_log native_log10 native_log2 native_powr native_recip native_rsqrt native_sin native_sqrt native_tan nextafter pow pown powr recip remainder remquo rint rootn round rsqrt sin sincos sinh sinpi sqrt tan tanh tanpi tgamma trunc'] -- - The built-in math functions are categorized into the following: * A list of built-in functions that have scalar or vector argument @@ -4582,27 +5116,56 @@ The built-in math functions are not affected by the prevailing rounding mode in the calling environment, and always return the same value as they would if called with the round to nearest even rounding mode. -The <> -table describes the list of built-in math functions that can take scalar or -vector arguments. -We use the generic type name `gentype` to indicate that the function can take -`float`, `float2`, `float3`, `float4`, `float8`, `float16`, `double` -footnote:double-supported[{fn-double-supported}], `double2`, -`double3`, `double4`, `double8` or `double16` as the type for the arguments. -We use the generic type name `gentypef` to indicate that the function can -take `float`, `float2`, `float3`, `float4`, `float8`, or `float16` as the -type for the arguments. -We use the generic type name `gentyped` footnote:double-supported[] to -indicate that the function can take `double`, `double2`, `double3`, `double4`, -`double8` or `double16` as the type for the arguments. -For any specific use of a function, the actual type has to be the same for -all arguments and the return type, unless otherwise specified. +The <> table describes the list of built-in math functions that can +take scalar or vector arguments. + +The generic type name `gentype` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + * `double` footnote:double-supported[{fn-double-supported}], `double2`, + `double3`, `double4`, `double8` or `double16` +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}], `half2`, `half3`, `half4`, + `half8` or `half16` +endif::cl_khr_fp16[] + +as the type for the arguments. + +The generic type name `gentypef` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + +as the type for the arguments. + +The generic type name `gentyped` footnote:[{fn-double-supported}] indicates +that the function can take any of + + * `double`, `double2`, `double3`, `double4`, `double8` or `double16` + +as the type for the arguments. + +ifdef::cl_khr_fp16[] +The generic type name `gentypeh` footnote:[{fn-half-supported}] indicates +that the function can take any of + + * `half`, `half2`, `half3`, `half4`, `half8` or `half16` + +as the type for the arguments. + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +For any specific use of a function with `gentype*` arguments the actual type +has to be the same for all arguments and the return type, unless they are +explicitly specified as an actual type. [[table-builtin-math]] .Built-in Scalar and Vector Argument Math Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* |*Description* +| Function | Description | gentype *acos*(gentype) | Arc cosine function. Returns an angle in radians. | gentype *acosh*(gentype) @@ -4642,7 +5205,7 @@ all arguments and the return type, unless otherwise specified. | Complementary error function. | gentype *erf*(gentype) | Error function encountered in integrating the - http://mathworld.wolfram.com/NormalDistribution.html[_normal + https://mathworld.wolfram.com/NormalDistribution.html[_normal distribution_]. | gentype *exp*(gentype _x_) | Compute the base-_e_ exponential of _x_. @@ -4655,7 +5218,7 @@ all arguments and the return type, unless otherwise specified. | gentype *fabs*(gentype) | Compute absolute value of a floating-point number. | gentype *fdim*(gentype _x_, gentype _y_) - | _x_ - _y_ if _x_ > _y_, +0 if _x_ is less than or equal to y. + | _x_ - _y_ if _x_ > _y_, +0 if _x_ is less than or equal to _y_. | gentype *floor*(gentype) | Round to integral value using the round to negative infinity rounding mode. @@ -4667,12 +5230,16 @@ all arguments and the return type, unless otherwise specified. | gentype *fmax*(gentype _x_, gentype _y_) + gentypef *fmax*(gentypef _x_, float _y_) + gentyped *fmax*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *fmax*(gentypeh _x_, half _y_)] | Returns _y_ if _x_ < _y_, otherwise it returns _x_. If one argument is a NaN, *fmax*() returns the other argument. If both arguments are NaNs, *fmax*() returns a NaN. | gentype *fmin*(gentype _x_, gentype _y_) + gentypef *fmin*(gentypef _x_, float _y_) + gentyped *fmin*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *fmax*(gentypeh _x_, half _y_)] | Returns _y_ if _y_ < _x_, otherwise it returns _x_. If one argument is a NaN, *fmin*() returns the other argument. If both arguments are NaNs, *fmin*() returns a NaN. @@ -4682,26 +5249,48 @@ all arguments and the return type, unless otherwise specified. Returns _x_ - _y_ * *trunc*(_x_/_y_). | gentype *fract*(gentype _x_, {global} gentype _*iptr_) + gentype *fract*(gentype _x_, {local} gentype _*iptr_) + - gentype *fract*(gentype _x_, {private} gentype _*iptr_) + + gentype *fract*(gentype _x_, {private} gentype _*iptr_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype *fract*(gentype _x_, gentype _*iptr_) +// TODO The fp16 extension uses the constant `0x1.ffcp-1f` below - unclear +// why, see the OpenCL-Docs issue. | Returns *fmin*(_x_ - *floor*(_x_), `0x1.fffffep-1f`). *floor*(x) is returned in _iptr_. footnote:[{fn-fract-min}] +ifdef::cl_khr_fp16[] +| half__n__ **frexp**(half__n__ _x_, {global} int__n__ *exp) + + half **frexp**(half _x_, {global} int *exp) + + half__n__ **frexp**(half__n__ _x_, {local} int__n__ *exp) + + half **frexp**(half _x_, {local} int *exp) + + half__n__ **frexp**(half__n__ _x_, {private} int__n__ *exp) + + half **frexp**(half _x_, {private} int *exp) + + For OpenCL C 2.0, or OpenCL C 3.0 or newer with the + {opencl_c_generic_address_space} feature: + + half__n__ **frexp**(half__n__ _x_, int__n__ *exp) + + half **frexp**(half _x_, int *exp) + | Extract mantissa and exponent from _x_. + For each component the mantissa returned is a `half` with magnitude in + the interval [1/2, 1) or 0. + Each component of _x_ equals mantissa returned * 2__^exp^__. +endif::cl_khr_fp16[] | float__n__ **frexp**(float__n__ _x_, {global} int__n__ *exp) + - float **frexp**(float _x_, {global} int *exp) + + float **frexp**(float _x_, {global} int *exp) float__n__ **frexp**(float__n__ _x_, {local} int__n__ *exp) + - float **frexp**(float _x_, {local} int *exp) + + float **frexp**(float _x_, {local} int *exp) float__n__ **frexp**(float__n__ _x_, {private} int__n__ *exp) + - float **frexp**(float _x_, {private} int *exp) + + float **frexp**(float _x_, {private} int *exp) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **frexp**(float__n__ _x_, int__n__ *exp) + float **frexp**(float _x_, int *exp) @@ -4710,16 +5299,16 @@ all arguments and the return type, unless otherwise specified. in the interval [1/2, 1) or 0. Each component of _x_ equals mantissa returned * 2__^exp^__. | double__n__ **frexp**(double__n__ _x_, {global} int__n__ *exp) + - double **frexp**(double _x_, {global} int *exp) + + double **frexp**(double _x_, {global} int *exp) double__n__ **frexp**(double__n__ _x_, {local} int__n__ *exp) + - double **frexp**(double _x_, {local} int *exp) + + double **frexp**(double _x_, {local} int *exp) double__n__ **frexp**(double__n__ _x_, {private} int__n__ *exp) + - double **frexp**(double _x_, {private} int *exp) + + double **frexp**(double _x_, {private} int *exp) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: double__n__ **frexp**(double__n__ _x_, int__n__ *exp) + double **frexp**(double _x_, int *exp) @@ -4734,6 +5323,11 @@ all arguments and the return type, unless otherwise specified. int *ilogb*(float _x_) + int__n__ *ilogb*(double__n__ _x_) + int *ilogb*(double _x_) + +ifdef::cl_khr_fp16[] + int__n__ *ilogb*(half__n__ _x_) + + int *ilogb*(half _x_) +endif::cl_khr_fp16[] | Return the exponent as an integer value. | float__n__ *ldexp*(float__n__ _x_, int__n__ _k_) + float__n__ *ldexp*(float__n__ _x_, int _k_) + @@ -4741,31 +5335,55 @@ all arguments and the return type, unless otherwise specified. double__n__ *ldexp*(double__n__ _x_, int__n__ _k_) + double__n__ *ldexp*(double__n__ _x_, int _k_) + double *ldexp*(double _x_, int _k_) +ifdef::cl_khr_fp16[] + half__n__ *ldexp*(half__n__ _x_, int__n__ _k_) + + half__n__ *ldexp*(half__n__ _x_, int _k_) + + half *ldexp*(half _x_, int _k_) +endif::cl_khr_fp16[] | Multiply _x_ by 2 to the power _k_. | gentype *lgamma*(gentype _x_) + - float__n__ **lgamma_r**(float__n__ _x_, {global} int__n__ *_signp_) + float **lgamma_r**(float _x_, {global} int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, {global} int__n__ *_signp_) + - double **lgamma_r**(double _x_, {global} int *_signp_) + + double **lgamma_r**(double _x_, {global} int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, {global} int__n__ *_signp_) + + half **lgamma_r**(half _x_, {global} int *_signp_) + +endif::cl_khr_fp16[] float__n__ **lgamma_r**(float__n__ _x_, {local} int__n__ *_signp_) + float **lgamma_r**(float _x_, {local} int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, {local} int__n__ *_signp_) + - double **lgamma_r**(double _x_, {local} int *_signp_) + + double **lgamma_r**(double _x_, {local} int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, {local} int__n__ *_signp_) + + half **lgamma_r**(half _x_, {local} int *_signp_) + +endif::cl_khr_fp16[] float__n__ **lgamma_r**(float__n__ _x_, {private} int__n__ *_signp_) + float **lgamma_r**(float _x_, {private} int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, {private} int__n__ *_signp_) + - double **lgamma_r**(double _x_, {private} int *_signp_) + + double **lgamma_r**(double _x_, {private} int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, {private} int__n__ *_signp_) + + half **lgamma_r**(half _x_, {private} int *_signp_) + +endif::cl_khr_fp16[] For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **lgamma_r**(float__n__ _x_, int__n__ *_signp_) + float **lgamma_r**(float _x_, int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, int__n__ *_signp_) + double **lgamma_r**(double _x_, int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, int__n__ *_signp_) + + half **lgamma_r**(half _x_, int *_signp_) +endif::cl_khr_fp16[] | Log gamma function. Returns the natural logarithm of the absolute value of the gamma function. @@ -4784,11 +5402,12 @@ all arguments and the return type, unless otherwise specified. log__~r~__(\|_x_\|). | gentype *mad*(gentype _a_, gentype _b_, gentype _c_) | *mad* computes _a_ * _b_ + _c_. - The function may compute _a_ * _b_ + _c_ with reduced accuracy - in the embedded profile. See the OpenCL SPIR-V Environment Specification - for details. On some hardware the mad instruction may provide better - performance than expanded computation of _a_ * _b_ + _c_. - footnote:[{fn-mad-caution}] + The function may compute _a_ * _b_ + _c_ with reduced accuracy in the + embedded profile. + See the OpenCL SPIR-V Environment Specification for details. + On some hardware the mad instruction may provide better performance + than expanded computation of _a_ * _b_ + _c_. + footnote:[{fn-mad-caution}] | gentype *maxmag*(gentype _x_, gentype _y_) | Returns _x_ if \|_x_\| > \|_y_\|, _y_ if \|_y_\| > \|_x_\|, otherwise *fmax*(_x_, _y_). @@ -4801,10 +5420,10 @@ all arguments and the return type, unless otherwise specified. <> support for OpenCL C 1.1 or newer. | gentype *modf*(gentype _x_, {global} gentype _*iptr_) + gentype *modf*(gentype _x_, {local} gentype _*iptr_) + - gentype *modf*(gentype _x_, {private} gentype _*iptr_) + + gentype *modf*(gentype _x_, {private} gentype _*iptr_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype *modf*(gentype _x_, gentype _*iptr_) | Decompose a floating-point number. @@ -4815,9 +5434,16 @@ all arguments and the return type, unless otherwise specified. float *nan*(uint _nancode_) + double__n__ *nan*(ulong__n__ _nancode_) + double *nan*(ulong _nancode_) + +ifdef::cl_khr_fp16[] + half__n__ *nan*(ushort__n__ _nancode_) + + half *nan*(ushort _nancode_) +endif::cl_khr_fp16[] | Returns a quiet NaN. The _nancode_ may be placed in the significand of the resulting NaN. | gentype *nextafter*(gentype _x_, gentype _y_) +// TODO shouldn't this be "next representable FP value of the precision of +// its arguments"? See the OpenCL-Docs issue. | Computes the next representable floating-point value following _x_ in the direction of _y_. Thus, if _y_ is less than _x_, *nextafter*() returns the largest @@ -4828,6 +5454,11 @@ all arguments and the return type, unless otherwise specified. float *pown*(float _x_, int _y_) + double__n__ *pown*(double__n__ _x_, int__n__ _y_) + double *pown*(double _x_, int _y_) + +ifdef::cl_khr_fp16[] + half__n__ *pown*(half__n__ _x_, int__n__ _y_) + + half *pown*(half _x_, int _y_) +endif::cl_khr_fp16[] | Compute _x_ to the power _y_, where _y_ is an integer. | gentype *powr*(gentype _x_, gentype _y_) | Compute _x_ to the power _y_, where _x_ is >= 0. @@ -4838,16 +5469,16 @@ all arguments and the return type, unless otherwise specified. one. If _r_ is zero, it is given the same sign as _x_. | float__n__ **remquo**(float__n__ _x_, float__n__ _y_, {global} int__n__ _*quo_) + - float **remquo**(float _x_, float _y_, {global} int _*quo_) + + float **remquo**(float _x_, float _y_, {global} int _*quo_) float__n__ **remquo**(float__n__ _x_, float__n__ _y_, {local} int__n__ _*quo_) + - float **remquo**(float _x_, float _y_, {local} int _*quo_) + + float **remquo**(float _x_, float _y_, {local} int _*quo_) float__n__ **remquo**(float__n__ _x_, float__n__ _y_, {private} int__n__ _*quo_) + - float **remquo**(float _x_, float _y_, {private} int _*quo_) + + float **remquo**(float _x_, float _y_, {private} int _*quo_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **remquo**(float__n__ _x_, float__n__ _y_, int__n__ _*quo_) + float **remquo**(float _x_, float _y_, int _*quo_) @@ -4861,16 +5492,16 @@ all arguments and the return type, unless otherwise specified. _x_/_y_, and gives that value the same sign as _x_/_y_. It stores this signed value in the object pointed to by _quo_. | double__n__ **remquo**(double__n__ _x_, double__n__ _y_, {global} int__n__ _*quo_) + - double **remquo**(double _x_, double _y_, {global} int _*quo_) + + double **remquo**(double _x_, double _y_, {global} int _*quo_) double__n__ **remquo**(double__n__ _x_, double__n__ _y_, {local} int__n__ _*quo_) + - double **remquo**(double _x_, double _y_, {local} int _*quo_) + + double **remquo**(double _x_, double _y_, {local} int _*quo_) double__n__ **remquo**(double__n__ _x_, double__n__ _y_, {private} int__n__ _*quo_) + - double **remquo**(double _x_, double _y_, {private} int _*quo_) + + double **remquo**(double _x_, double _y_, {private} int _*quo_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: double__n__ **remquo**(double__n__ _x_, double__n__ _y_, int__n__ _*quo_) + double **remquo**(double _x_, double _y_, int _*quo_) @@ -4883,6 +5514,31 @@ all arguments and the return type, unless otherwise specified. *remquo* also calculates the lower seven bits of the integral quotient _x_/_y_, and gives that value the same sign as _x_/_y_. It stores this signed value in the object pointed to by _quo_. +ifdef::cl_khr_fp16[] +| half__n__ **remquo**(half__n__ _x_, half__n__ _y_, {global} int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, {global} int _*quo_) + + half__n__ **remquo**(half__n__ _x_, half__n__ _y_, {local} int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, {local} int _*quo_) + + half__n__ **remquo**(half__n__ _x_, half__n__ _y_, {private} int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, {private} int _*quo_) + + For OpenCL C 2.0 or with the + {opencl_c_generic_address_space} feature: + + half__n__ **remquo**(half__n__ _x_, half__n__ _y_, int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, int _*quo_) + | The *remquo* function computes the value r such that _r_ = _x_ - + _k_*_y_, where _k_ is the integer nearest the exact value of _x_/_y_. + If there are two integers closest to _x_/_y_, _k_ shall be the even + one. + If _r_ is zero, it is given the same sign as _x_. + This is the same value that is returned by the *remainder* function. + *remquo* also calculates the lower seven bits of the integral quotient + _x_/_y_, and gives that value the same sign as _x_/_y_. + It stores this signed value in the object pointed to by _quo_. +endif::cl_khr_fp16[] | gentype *rint*(gentype) | Round to integral value (using round to nearest even rounding mode) in floating-point format. @@ -4891,6 +5547,11 @@ all arguments and the return type, unless otherwise specified. float *rootn*(float _x_, int _y_) + double__n__ *rootn*(double__n__ _x_, int__n__ _y_) + double *rootn*(double _x_, int _y_) + +ifdef::cl_khr_fp16[] + half__n__ *rootn*(half__n__ _x_, int__n__ _y_) + + half *rootn*(half _x_, int _y_) +endif::cl_khr_fp16[] | Compute _x_ to the power 1/_y_. | gentype *round*(gentype _x_) | Return the integral value nearest to _x_ rounding halfway cases away @@ -4901,13 +5562,13 @@ all arguments and the return type, unless otherwise specified. | Compute sine, where _x_ is an angle in radians. | gentype *sincos*(gentype _x_, {global} gentype _*cosval_) + gentype *sincos*(gentype _x_, {local} gentype _*cosval_) + - gentype *sincos*(gentype _x_, {private} gentype _*cosval_) + + gentype *sincos*(gentype _x_, {private} gentype _*cosval_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype *sincos*(gentype _x_, gentype _*cosval_) - | Compute sine and cosine of x. + | Compute sine and cosine of _x_. The computed sine is the return value and computed cosine is returned in _cosval_, where _x_ is an angle in radians. | gentype *sinh*(gentype _x_) @@ -4948,11 +5609,17 @@ We use the generic type name `gentype` to indicate that the functions in the following table can take `float`, `float2`, `float3`, `float4`, `float8` or `float16` as the type for the arguments. +ifdef::cl_khr_fp16[] +NOTE: The use of `half` in this table does not refer to the argument and +return types, which are 32-bit floating-point values, but to the accuracy +requirements of the function results. +endif::cl_khr_fp16[] + [[table-builtin-half-native-math]] .Built-in Scalar and Vector _half_ and _native_ Math Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype *half_cos*(gentype _x_) | Compute cosine. _x_ is an angle in radians, and must be in the range [-2^16^, +2^16^]. @@ -5051,13 +5718,13 @@ functions. [open,refpage='mathConstants',desc='Math Constants',type='freeform',spec='clang',anchor='table-builtin-half-native-math',xrefs='mathFunctions',alias='MAXFLOAT HUGE_VALF INFINITY NAN HUGE_VAL'] -- -The following symbolic constants are available. +The following constants are available. Their values are of type `float` and are accurate within the precision of a single precision floating-point number. -[cols=",",] +[cols=",",options="header",] |==== -| *Constant Name* | *Description* +| Constant Name | Description | `MAXFLOAT` | Value of maximum non-infinite single-precision floating-point number. | `HUGE_VALF` @@ -5071,13 +5738,12 @@ single precision floating-point number. | A constant expression of type `float` representing a quiet NaN. |==== -If double precision is supported by the device, e.g. for OpenCL C 3.0 or newer -the {opencl_c_fp64} feature macro is present, the following symbolic -constants will also be available: +If <>, then the following constants are also available: -[cols=",",] +[cols=",",options="header",] |==== -| *Constant Name* | *Description* +| Constant Name | Description | `HUGE_VAL` | A positive double constant expression. `HUGE_VAL` evaluates to +infinity. @@ -5087,11 +5753,10 @@ constants will also be available: [[floating-point-macros-and-pragmas]] -==== Floating-point macros and pragmas +==== Floating-point Macros and Pragmas [open,refpage='fpMacros',desc='Floating-Point Macros And Pragmas',type='freeform',spec='clang',anchor='floating-point-macros-and-pragmas',xrefs='integerMacros',alias='FP_CONTRACT FP_FAST_FMAF FP_FAST_FMA macroLimits'] -- - The `FP_CONTRACT` pragma can be used to allow (if the state is on) or disallow (if the state is off) the implementation to contract expressions. Each pragma can occur either outside external declarations or preceding all @@ -5143,9 +5808,9 @@ The following table describes the built-in macro names given above in the OpenCL C programming language and the corresponding macro names available to the application. -[cols=",",] +[cols=",",options="header",] |==== -| *Macro in OpenCL Language* | *Macro for application* +| Macro in OpenCL Language | Macro for application | `FLT_DIG` | `CL_FLT_DIG` | `FLT_MANT_DIG` | `CL_FLT_MANT_DIG` | `FLT_MAX_10_EXP` | `CL_FLT_MAX_10_EXP` @@ -5167,9 +5832,9 @@ The following constants are also available. They are of type `float` and are accurate within the precision of the `float` type. -[cols=",",] +[cols=",",options="header",] |==== -| *Constant* | *Description* +| Constant | Description | `M_E_F` | Value of _e_ | `M_LOG2E_F` | Value of log~2~e | `M_LOG10E_F` | Value of log~10~e @@ -5185,12 +5850,11 @@ They are of type `float` and are accurate within the precision of the | `M_SQRT1_2_F` | Value of 1 / {sqrt}2 |==== -If double precision is supported by the device, e.g. for OpenCL C 3.0 or newer -the {opencl_c_fp64} feature macro is present, then the following macros -and constants are also available: +If <>, then the following macros and constants are also available: The `FP_FAST_FMA` macro indicates whether the *fma*() family of functions -are fast compared with direct code for double precision floating-point. +are fast compared with direct code for double-precision floating-point. If defined, the `FP_FAST_FMA` macro shall indicate that the *fma*() function generally executes about as fast as, or faster than, a multiply and an add of `double` operands @@ -5216,9 +5880,9 @@ The following table describes the built-in macro names given above in the OpenCL C programming language and the corresponding macro names available to the application. -[cols=",",] +[cols=",",options="header",] |==== -| *Macro in OpenCL Language* | *Macro for application* +| Macro in OpenCL Language | Macro for application | `DBL_DIG` | `CL_DBL_DIG` | `DBL_MANT_DIG` | `CL_DBL_MANT_DIG` | `DBL_MAX_10_EXP` | `CL_DBL_MAX_10_EXP` @@ -5234,9 +5898,9 @@ The following constants are also available. They are of type ``double`` and are accurate within the precision of the double type. -[cols=",",] +[cols=",",options="header",] |==== -| *Constant* | *Description* +| Constant | Description | `M_E` | Value of _e_ | `M_LOG2E` | Value of log~2~e | `M_LOG10E` | Value of log~10~e @@ -5251,6 +5915,78 @@ double type. | `M_SQRT2` | Value of {sqrt}2 | `M_SQRT1_2` | Value of 1 / {sqrt}2 |==== + +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, then the following +macros and constants are also available: + +The `FP_FAST_FMA_HALF` macro indicates whether the *fma*() family of +functions are fast compared with direct code for half-precision +floating-point. +If defined, the `FP_FAST_FMA_HALF` macro shall indicate that the *fma*() +function generally executes about as fast as, or faster than, a multiply and +an add of `half` operands. + +The macro names given in the following list must use the values specified. +These constant expressions are suitable for use in #if preprocessing +directives. + +[source,opencl_c] +---- +#define HALF_DIG 3 +#define HALF_MANT_DIG 11 +#define HALF_MAX_10_EXP +4 +#define HALF_MAX_EXP +16 +#define HALF_MIN_10_EXP -4 +#define HALF_MIN_EXP -13 +#define HALF_RADIX 2 +#define HALF_MAX 0x1.ffcp15h +#define HALF_MIN 0x1.0p-14h +#define HALF_EPSILON 0x1.0p-10h +---- + +The following table describes the built-in macro names given above in the +OpenCL C programming language and the corresponding macro names available to +the application. + +[cols=",",options="header",] +|==== +| Macro in OpenCL Language | Macro for application +| `HALF_DIG` | `CL_HALF_DIG` +| `HALF_MANT_DIG` | `CL_HALF_MANT_DIG` +| `HALF_MAX_10_EXP` | `CL_HALF_MAX_10_EXP` +| `HALF_MAX_EXP` | `CL_HALF_MAX_EXP` +| `HALF_MIN_10_EXP` | `CL_HALF_MIN_10_EXP` +| `HALF_MIN_EXP` | `CL_HALF_MIN_EXP` +| `HALF_RADIX` | `CL_HALF_RADIX` +| `HALF_MAX` | `CL_HALF_MAX` +| `HALF_MIN` | `CL_HALF_MIN` +| `HALF_EPSILSON` | `CL_HALF_EPSILON` +|==== + +The following constants are also available. +They are of type `half` and are accurate within the precision of the `half` +type. + +[cols=",",options="header",] +|==== +| Constant | Description +| `M_E_H` | Value of e +| `M_LOG2E_H` | Value of log~2~e +| `M_LOG10E_H` | Value of log~10~e +| `M_LN2_H` | Value of log~e~2 +| `M_LN10_H` | Value of log~e~10 +| `M_PI_H` | Value of {pi} +| `M_PI_2_H` | Value of {pi} / 2 +| `M_PI_4_H` | Value of {pi} / 4 +| `M_1_PI_H` | Value of 1 / {pi} +| `M_2_PI_H` | Value of 2 / {pi} +| `M_2_SQRTPI_H` | Value of 2 / {sqrt}{pi} +| `M_SQRT2_H` | Value of {sqrt}2 +| `M_SQRT1_2_H` | Value of 1 / {sqrt}2 +|==== +endif::cl_khr_fp16[] + -- @@ -5285,14 +6021,15 @@ For vector versions, `sgentype` is implicitly widened to `gentype` as described for <>. _n_ is 2, 3, 4, 8, or 16. -For any specific use of a function, the actual type has to be the same for -all arguments and the return type unless otherwise specified. +For any specific use of a function with `gentype*` arguments the actual type +has to be the same for all arguments and the return type, unless they are +explicitly specified as an actual type. [[table-builtin-functions]] .Built-in Scalar and Vector Integer Argument Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | ugentype *abs*(gentype _x_) | Returns \|x\|. | ugentype *abs_diff*(gentype _x_, gentype _y_) @@ -5323,19 +6060,70 @@ all arguments and the return type unless otherwise specified. type of _x_, if _x_ is a vector. <> support for OpenCL 2.0 or newer. +ifdef::cl_khr_integer_dot_product[] +| uint *dot*(uchar4 a, uchar4 b) + + int *dot*(char4 a, char4 b) + + int *dot*(uchar4 a, char4 b) + + int *dot*(char4 a, uchar4 b) + | `dot` returns the dot product of the two input vectors `a` and `b`. + The components of `a` and `b` are sign- or zero-extended to the width + of the destination type and the vectors with extended components are + multiplied component-wise. + All the components of the resulting vectors are added together to form + the final result. + + <> that the + {opencl_c_integer_dot_product_input_4x8bit} feature macro is defined, + +| uint *dot_acc_sat*(uchar4 a, uchar4 b, uint acc) + + int *dot_acc_sat*(char4 a, char4 b, int acc) + + int *dot_acc_sat*(uchar4 a, char4 b, int acc) + + int *dot_acc_sat*(char4 a, uchar4 b, int acc) + a| `dot_acc_sat` returns the saturating addition of the dot product of + the two input vectors `a` and `b` and the accumulator `acc`: +---- +product = dot(a,b); +result = add_sat(product, acc); +---- + +<> that the +{opencl_c_integer_dot_product_input_4x8bit} feature macro is defined, + +| uint *dot_4x8packed_uu_uint*(uint a, uint b) + + int *dot_4x8packed_ss_int*(uint a, uint b) + + int *dot_4x8packed_us_int*(uint a, uint b) + + int *dot_4x8packed_su_int*(uint a, uint b) + | Returns *dot* for 4x8 bit input vectors packed into a 32-bit word. + + <> that the + {opencl_c_integer_dot_product_input_4x8bit_packed} feature macro is + defined, + +| uint *dot_acc_sat_4x8packed_uu_uint*(uint a, uint b, uint acc) + + int *dot_acc_sat_4x8packed_ss_int*(uint a, uint b, int acc) + + int *dot_acc_sat_4x8packed_us_int*(uint a, uint b, int acc) + + int *dot_acc_sat_4x8packed_su_int*(uint a, uint b, int acc) + | Returns *dot_acc_set* for 4x8 bit input vectors packed into a 32-bit + word. + + <> that the + {opencl_c_integer_dot_product_input_4x8bit_packed} feature macro is + defined, +endif::cl_khr_integer_dot_product[] + | gentype *mad_hi*(gentype _a_, gentype _b_, gentype _c_) | Returns *mul_hi*(_a_, _b_) + _c_. | gentype *mad_sat*(gentype _a_, gentype _b_, gentype _c_) | Returns _a_ * _b_ + _c_ and saturates the result. -| gentype *max*(gentype _x_, gentype _y_) + +| gentype *max*(gentype _x_, gentype _y_) - For OpenCL C 1.1 or newer: + + For OpenCL C 1.1 or newer: gentype *max*(gentype _x_, sgentype _y_) | Returns _y_ if _x_ < _y_, otherwise it returns _x_. -| gentype *min*(gentype _x_, gentype _y_) + +| gentype *min*(gentype _x_, gentype _y_) - For OpenCL C 1.1 or newer: + + For OpenCL C 1.1 or newer: gentype *min*(gentype _x_, sgentype _y_) | Returns _y_ if _y_ < _x_, otherwise it returns _x_. @@ -5369,7 +6157,7 @@ all arguments and the return type unless otherwise specified. | _result_[i] = ((long)_hi_[i] << 32) \| _lo_[i] + _result_[i] = ((ulong)_hi_[i] << 32) \| _lo_[i] | gentype *popcount*(gentype _x_) - | Returns the number of non-zero bits in _x_. + + | Returns the number of non-zero bits in _x_. <> support for OpenCL C 1.2 or newer. |==== @@ -5382,9 +6170,9 @@ take `int`, `int2`, `int3`, `int4`, `int8`, `int16`, `uint`, `uint2`, [[table-builtin-fast-integer]] .Built-in 24-bit Integer Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype *mad24*(gentype _x_, gentype _y_, gentype z) | Multipy two 24-bit integer values _x_ and _y_ and add the 32-bit integer result to the 32-bit integer _z_. @@ -5403,6 +6191,132 @@ take `int`, `int2`, `int3`, `int4`, `int8`, `int16`, `uint`, `uint2`, -- +ifdef::cl_khr_extended_bit_ops[] +[[extended-bit-operations]] +==== Extended Bit Operations + +[open,refpage='extendedBitOperations',desc='Extended Bit Operations',type='freeform',spec='clang',anchor='extended-bit-operations',xrefs='commonFunctions',alias='bitfield_insert bitfield_extract_signed bitfield_extract_unsigned bit_reverse'] +-- +If the `<>` extension macro is supported, the +functions described in the <> table can be used with built-in +scalar or vector integer types to perform extended bit operations. +The functions that operate on vector types operate component-wise. +The description is per-component. + +In the table below, the generic type name `gentype` refers to the built-in +integer types `char`, `char__n__`, `uchar`, `uchar__n__`, `short`, +`short__n__`, `ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, +`uint__n__`, `long`, `long__n__`, `ulong`, and `ulong__n__`. +The generic type name `igentype` refers to the built-in signed integer types +`char`, `char__n__`, `short`, `short__n__`, `int`, `int__n__`, `long`, and +`long__n__`. +The generic type name `ugentype` refers to the built-in unsigned integer +types `uchar`, `uchar__n__`, `ushort`, `ushort__n__`, `uint`, `uint__n__`, +`ulong`, and `ulong__n__`. +_n_ is 2, 3, 4, 8, or 16. + +[[table-builtin-extended-bit-operations]] +.Built-in Scalar and Vector Extended Bit Operations +[cols="1a,1", options="header"] +|=== +| Function | Description +a| +[source,opencl_c] +---- +gentype bitfield_insert( + gentype base, gentype insert, + uint offset, uint count) +---- + | Returns a copy of _base_, with a modified bitfield that comes from + _insert_. + + Any bits of the result value numbered outside [_offset_, _offset_ {plus} + _count_ - 1] (inclusive) will come from the corresponding bits in + _base_. + + Any bits of the result value numbered inside [_offset_, _offset_ {plus} + _count_ - 1] (inclusive) will come from the bits numbered [0, _count_ + - 1] (inclusive) of _insert_. + + _count_ is the number of bits to be modified. + If _count_ equals 0, the return value will be equal to _base_. + + If _count_ or _offset_ or _offset_ + _count_ is greater than number of + bits in `gentype` (for scalar types) or components of `gentype` (for + vector types), the result is undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +igentype bitfield_extract_signed( + gentype base, + uint offset, uint count) +---- + | Returns an extracted bitfield from _base_ with sign extension. + The type of the return value is always a signed type. + + The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] + (inclusive) are returned as the bits numbered in [0, _count_ - 1] + (inclusive) of the result. + The remaining bits in the result will be sign extended by replicating + the bit numbered _offset_ + _count_ - 1 of _base_. + + _count_ is the number of bits to be extracted. + If _count_ equals 0, the result is 0. + + If the _count_ or _offset_ or _offset_ + _count_ is greater than + number of bits in `gentype` (for scalar types) or components of + `gentype` (for vector types), the result is undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +ugentype bitfield_extract_unsigned( + gentype base, + uint offset, uint count) +---- + | Returns an extracted bitfield from _base_ with zero extension. + The type of the return value is always an unsigned type. + + The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] + (inclusive) are returned as the bits numbered in [0, _count_ - 1] + (inclusive) of the result. + The remaining bits in the result will be zero. + + _count_ is the number of bits to be extracted. + If _count_ equals 0, the result is 0. + + If the _count_ or _offset_ or _offset_ + _count_ is greater than + number of bits in `gentype` (for scalar types) or components of + `gentype` (for vector types), the result is undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +gentype bit_reverse( + gentype base) +---- + | Returns the value of _base_ with reversed bits. + That is, the bit numbered _n_ of the result value will be taken from + the bit numbered _width_ - _n_ - 1 of _base_ (for scalar types) or a + component of _base_ (for vector types), where _width_ is number of + bits of `gentype` (for scalar types) or components of `gentype` (for + vector types). + + <> support for the + `<>` extension macro. +|=== +-- +endif::cl_khr_extended_bit_ops[] + + [[integer-macros]] ==== Integer Macros @@ -5435,9 +6349,9 @@ The following table describes the built-in macro names given above in the OpenCL C programming language and the corresponding macro names available to the application. -[cols=",",] +[cols=",",options="header",] |==== -| *Macro in OpenCL Language* | *Macro for application* +| Macro in OpenCL Language | Macro for application | `CHAR_BIT` | `CL_CHAR_BIT` | `CHAR_MAX` | `CL_CHAR_MAX` | `CHAR_MIN` | `CL_CHAR_MIN` @@ -5466,25 +6380,54 @@ The <> describes the list of built-in common functions. These all operate component-wise. The description is per-component. -We use the generic type name `gentype` to indicate that the function can take -`float`, `float2`, `float3`, `float4`, `float8`, `float16`, `double` -footnote:[{fn-double-supported}], `double2`, `double3`, `double4`, -`double8` or `double16` as the type for the arguments. -We use the generic type name `gentypef` to indicate that the function can -take `float`, `float2`, `float3`, `float4`, `float8`, or `float16` as the -type for the arguments. -We use the generic type name `gentyped` to indicate that the function can -take `double`, `double2`, `double3`, `double4`, `double8` or `double16` as -the type for the arguments. + +The generic type name `gentype` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + * `double` footnote:double-supported[{fn-double-supported}], `double2`, + `double3`, `double4`, `double8` or `double16` +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}], `half2`, `half3`, `half4`, + `half8` or `half16` +endif::cl_khr_fp16[] + +as the type for the arguments. + +The generic type name `gentypef` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + +as the type for the arguments. + +The generic type name `gentyped` footnote:[{fn-double-supported}] indicates +that the function can take any of + + * `double`, `double2`, `double3`, `double4`, `double8` or `double16` + +as the type for the arguments. + +ifdef::cl_khr_fp16[] +The generic type name `gentypeh` footnote:[{fn-half-supported}] indicates +that the function can take any of + + * `half`, `half2`, `half3`, `half4`, `half8` or `half16` + +as the type for the arguments. + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] [[table-builtin-common]] .Built-in Scalar and Vector Argument Common Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype *clamp*(gentype _x_, gentype _minval_, gentype _maxval_) + gentypef *clamp*(gentypef _x_, float _minval_, float _maxval_) + gentyped *clamp*(gentyped _x_, double _minval_, double _maxval_) + +ifdef::cl_khr_fp16[gentypeh *clamp*(gentypeh _x_, half _minval_, half _maxval_)] | Returns *fmin*(*fmax*(_x_, _minval_), _maxval_). Results are undefined if _minval_ > _maxval_. | gentype *degrees*(gentype _radians_) @@ -5492,32 +6435,47 @@ the type for the arguments. | gentype *max*(gentype _x_, gentype _y_) + gentypef *max*(gentypef _x_, float _y_) + gentyped *max*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *max*(gentypeh _x_, half _y_)] | Returns _y_ if _x_ < _y_, otherwise it returns _x_. If _x_ or _y_ are infinite or NaN, the return values are undefined. | gentype *min*(gentype _x_, gentype _y_) + gentypef *min*(gentypef _x_, float _y_) + gentyped *min*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *min*(gentypeh _x_, half _y_)] | Returns _y_ if _y_ < _x_, otherwise it returns _x_. If _x_ or _y_ are infinite or NaN, the return values are undefined. | gentype *mix*(gentype _x_, gentype _y_, gentype _a_) + gentypef *mix*(gentypef _x_, gentypef _y_, float _a_) + gentyped *mix*(gentyped _x_, gentyped _y_, double _a_) - | Returns the linear blend of _x_ & _y_ implemented as: + +ifdef::cl_khr_fp16[gentypeh *mix*(gentypeh _x_, gentypeh _y_, half _a_)] + a| Returns the linear blend of _x_ and _y_ implemented as: _x_ + (_y_ - _x_) * _a_ _a_ must be a value in the range [0.0, 1.0]. If _a_ is not in the range [0.0, 1.0], the return values are undefined. + +ifdef::cl_khr_fp16[] +NOTE: The half-precision *mix* function can be implemented using +contractions such as *mad* or *fma*. +endif::cl_khr_fp16[] | gentype *radians*(gentype _degrees_) | Converts _degrees_ to radians, i.e. ({pi} / 180) * _degrees_. | gentype *step*(gentype _edge_, gentype _x_) + gentypef *step*(float _edge_, gentypef _x_) + gentyped *step*(double _edge_, gentyped _x_) + +ifdef::cl_khr_fp16[gentypeh *step*(half _edge_, gentypeh _x_)] | Returns 0.0 if _x_ < _edge_, otherwise it returns 1.0. | gentype *smoothstep*(gentype _edge0_, gentype _edge1_, gentype _x_) + gentypef *smoothstep*(float _edge0_, float _edge1_, gentypef _x_) + gentyped *smoothstep*(double _edge0_, double _edge1_, gentyped _x_) + +ifdef::cl_khr_fp16[gentypeh *smoothstep*(half _edge0_, half _edge1_, gentypeh _x_)] a| Returns 0.0 if _x_ \<= _edge0_ and 1.0 if _x_ >= _edge1_ and performs smooth Hermite interpolation between 0 and 1 when _edge0_ < _x_ < _edge1_. @@ -5536,6 +6494,10 @@ return t * t * (3 - 2 * t); Results are undefined if _edge0_ >= _edge1_ or if _x_, _edge0_ or _edge1_ is a NaN. +ifdef::cl_khr_fp16[] +NOTE: The half-precision *mix* function can be implemented using +contractions such as *mad* or *fma*. +endif::cl_khr_fp16[] | gentype *sign*(gentype _x_) | Returns 1.0 if _x_ > 0, -0.0 if _x_ = -0.0, +0.0 if _x_ = +0.0, or -1.0 if _x_ < 0. @@ -5550,42 +6512,83 @@ a NaN. [open,refpage='geometricFunctions',desc='Geometric Functions',type='freeform',spec='clang',anchor='geometric-functions',xrefs='integerFunctions',alias='cross dot distance length normalize fast_distance fast_length fast_normalize'] -- +// TODO It is not actually true that these functions operate - +// TODO in general they *combine* components. The <> describes the list of built-in geometric functions. These all operate component-wise. The description is per-component. -`float__n__` is `float`, `float2`, `float3`, or `float4` and `double__n__` is -`double` footnote:[{fn-double-supported}], `double2`, `double3`, or -`double4`. + +The generic type name `gentypef` indicates that the function can take any of + + * `float`, `float2`, `float3`, or `float4` + +as the type for the arguments. + +The generic type name `gentyped` footnote:[{fn-double-supported}] indicates +that the function can take any of + + * `double`, `double2`, `double3`, or `double4` + +as the type for the arguments. + +ifdef::cl_khr_fp16[] +The generic type name `gentypeh` footnote:[{fn-half-supported}] indicates +that the function can take any of + + * `half`, `half2`, `half3`, or `half4` + +as the type for the arguments. + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +For any specific use of a function with `gentype*` arguments the actual type +has to be the same for all arguments and the return type, unless they are +explicitly specified as an actual type. [[table-builtin-geometric]] .Built-in Scalar and Vector Argument Geometric Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | float4 *cross*(float4 _p0_, float4 _p1_) + float3 *cross*(float3 _p0_, float3 _p1_) + double4 *cross*(double4 _p0_, double4 _p1_) + double3 *cross*(double3 _p0_, double3 _p1_) + +ifdef::cl_khr_fp16[] + half4 *cross*(half4 _p0_, half4 _p1_) + + half3 *cross*(half3 _p0_, half3 _p1_) +endif::cl_khr_fp16[] | Returns the cross product of _p0.xyz_ and _p1.xyz_. The _w_ component of `float4` result returned will be 0.0. -| float *dot*(float__n__ _p0_, float__n__ _p1_) + - double *dot*(double__n__ _p0_, double__n__ _p1_) - | Compute dot product. -| float *distance*(float__n__ _p0_, float__n__ _p1_) + - double *distance*(double__n__ _p0_, double__n__ _p1_) +| float *dot*(gentypef _p0_, gentypef _p1_) + + double *dot*(gentyped _p0_, gentyped _p1_) + +ifdef::cl_khr_fp16[half *dot*(gentypeh _p0_, gentypeh _p1_)] + | Compute the dot product of _p0_ and _p1_. +| float *distance*(gentypef _p0_, gentypef _p1_) + + double *distance*(gentyped _p0_, gentyped _p1_) + +ifdef::cl_khr_fp16[half *distance*(gentypeh _p0_, gentypeh _p1_)] | Returns the distance between _p0_ and _p1_. This is calculated as *length*(_p0_ - _p1_). -| float *length*(float__n__ _p_) + - double *length*(double__n__ _p_) +| float *length*(gentypef _p_) + + double *length*(gentyped _p_) + +ifdef::cl_khr_fp16[half *length*(gentypeh _p_)] | Return the length of vector _p_, i.e., {sqrt} __p.x__^2^ + _p.y_ ^2^ {plus} ... -| float__n__ *normalize*(float__n__ _p_) + - double__n__ *normalize*(double__n__ _p_) +| gentypef *normalize*(gentypef _p_) + + gentyped *normalize*(gentyped _p_) + +ifdef::cl_khr_fp16[gentypeh *normalize*(gentypeh _p_)] | Returns a vector in the same direction as _p_ but with a length of 1. | | -| float *fast_distance*(float__n__ _p0_, float__n__ _p1_) +| float *fast_distance*(float _p0_, float__n__ _p1_) | Returns *fast_length*(_p0_ - _p1_). | float *fast_length*(float__n__ _p_) | Returns the length of vector _p_ computed as: @@ -5627,7 +6630,6 @@ with the following exceptions: [open,refpage='relationalFunctions',desc='Relational Functions',type='freeform',spec='clang',anchor='relational-functions',xrefs='integerFunctions',alias='all any bitselect isequal isfinite isgreater isgreaterequal isinf isless islessequal islessgreater isnan isnormal isnotequal isordered isunordered select signbit'] -- - The <> and <> operators (*<*, *\<=*, *>*, *>=*, *!=*, *==*) can be used with scalar and vector built-in types and produce a scalar or vector signed integer result @@ -5669,89 +6671,159 @@ not a number (NaN) and the argument type is a vector. [[table-builtin-relational]] .Built-in Scalar and Vector Relational Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *isequal*(float _x_, float _y_) + int__n__ *isequal*(float__n__ _x_, float__n__ _y_) + int *isequal*(double _x_, double _y_) + long__n__ *isequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isequal*(half _x_, half _y_) + + short__n__ *isequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ == _y_. | int *isnotequal*(float _x_, float _y_) + int__n__ *isnotequal*(float__n__ _x_, float__n__ _y_) + int *isnotequal*(double _x_, double _y_) + long__n__ *isnotequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isnotequal*(half _x_, half _y_) + + short__n__ *isnotequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ != _y_. | int *isgreater*(float _x_, float _y_) + int__n__ *isgreater*(float__n__ _x_, float__n__ _y_) + int *isgreater*(double _x_, double _y_) + long__n__ *isgreater*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isgreater*(half _x_, half _y_) + + short__n__ *isgreater*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ > _y_. | int *isgreaterequal*(float _x_, float _y_) + int__n__ *isgreaterequal*(float__n__ _x_, float__n__ _y_) + int *isgreaterequal*(double _x_, double _y_) + long__n__ *isgreaterequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isgreaterequal*(half _x_, half _y_) + + short__n__ *isgreaterequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ >= _y_. | int *isless*(float _x_, float _y_) + int__n__ *isless*(float__n__ _x_, float__n__ _y_) + int *isless*(double _x_, double _y_) + long__n__ *isless*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isless*(half _x_, half _y_) + + short__n__ *isless*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ < _y_. | int *islessequal*(float _x_, float _y_) + int__n__ *islessequal*(float__n__ _x_, float__n__ _y_) + int *islessequal*(double _x_, double _y_) + long__n__ *islessequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *islessequal*(half _x_, half _y_) + + short__n__ *islessequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ \<= _y_. | int *islessgreater*(float _x_, float _y_) + int__n__ *islessgreater*(float__n__ _x_, float__n__ _y_) + int *islessgreater*(double _x_, double _y_) + long__n__ *islessgreater*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *islessgreater*(half _x_, half _y_) + + short__n__ *islessgreater*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of (_x_ < _y_) \|\| (_x_ > _y_) . | | | int *isfinite*(float) + int__n__ *isfinite*(float__n__) + int *isfinite*(double) + long__n__ *isfinite*(double__n__) + +ifdef::cl_khr_fp16[] + int *isfinite*(half) + + short__n__ *isfinite*(half__n__) +endif::cl_khr_fp16[] | Test for finite value. | int *isinf*(float) + int__n__ *isinf*(float__n__) + int *isinf*(double) + long__n__ *isinf*(double__n__) + +ifdef::cl_khr_fp16[] + int *isinf*(half) + + short__n__ *isinf*(half__n__) +endif::cl_khr_fp16[] | Test for infinity value (positive or negative). | int *isnan*(float) + int__n__ *isnan*(float__n__) + int *isnan*(double) + long__n__ *isnan*(double__n__) + +ifdef::cl_khr_fp16[] + int *isnan*(half) + + short__n__ *isnan*(half__n__) +endif::cl_khr_fp16[] | Test for a NaN. | int *isnormal*(float) + int__n__ *isnormal*(float__n__) + int *isnormal*(double) + long__n__ *isnormal*(double__n__) -| Test for a normal value. + +ifdef::cl_khr_fp16[] + int *isnormal*(half) + + short__n__ *isnormal*(half__n__) +endif::cl_khr_fp16[] + | Test for a normal value. | int *isordered*(float _x_, float _y_) + int__n__ *isordered*(float__n__ _x_, float__n__ _y_) + int *isordered*(double _x_, double _y_) + long__n__ *isordered*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isordered*(half _x_, half _y_) + + short__n__ *isordered*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Test if arguments are ordered. - *isordered*() takes arguments _x_ and _y_, and returns the result - *isequal*(_x_, _x_) && *isequal*(_y_, _y_). + *isordered*() takes arguments _x_ and _y_, and returns the result + *isequal*(_x_, _x_) && *isequal*(_y_, _y_). | int *isunordered*(float _x_, float _y_) + int__n__ *isunordered*(float__n__ _x_, float__n__ _y_) + int *isunordered*(double _x_, double _y_) + long__n__ *isunordered*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isunordered*(half _x_, half _y_) + + short__n__ *isunordered*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Test if arguments are unordered. *isunordered*() takes arguments _x_ and _y_, returning non-zero if _x_ or _y_ is NaN, and zero otherwise. -| int *signbit*(float) + - int__n__ *signbit*(float__n__) + - int *signbit*(double) + - long__n__ *signbit*(double__n__) +| int *signbit*(float _x_) + + int__n__ *signbit*(float__n__ _x_) + + int *signbit*(double _x_) + + long__n__ *signbit*(double__n__ _x_) + +ifdef::cl_khr_fp16[] + int *signbit*(half _x_) + + short__n__ *signbit*(half__n__ _x_) +endif::cl_khr_fp16[] | Test for sign bit. - The scalar version of the function returns a 1 if the sign bit in the - float is set else returns 0. + The scalar version of the function returns a 1 if the sign bit in _x_ + is set else returns 0. The vector version of the function returns the following for each - component in `float__n__`: -1 (i.e all bits set) if the sign bit in the - float is set else returns 0. + component in _x_: -1 (i.e all bits set) if the sign bit in the float is + set else returns 0. | | | int *any*(igentype _x_) @@ -5790,62 +6862,79 @@ Scalar inputs to *all* are <> OpenCL C version [open,refpage='vectorDataLoadandStoreFunctions',desc='Vector Data Load and Store Functions',type='freeform',spec='clang',anchor='vector-data-load-and-store-functions',xrefs='',alias='vloadn vload_half vload_halfn vloada_halfn vstoren vstore_half vstore_halfn vstorea_halfn'] -- -The <> describes the list of supported -functions that allow you to read and write vector types from a pointer to -memory. -We use the generic type `gentype` to indicate the built-in data types -`char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long` footnote:[{fn-int64-supported}], `ulong`, -`float` or `double` footnote:[{fn-double-supported}]. -We use the generic type name `gentype__n__` to represent n-element vectors -of `gentype` elements. -We use the type name `half__n__` to represent n-element vectors of half +The <> table describes the list of supported functions that allow you +to read and write vector types from a pointer to memory. + +The generic type name `gentype` indicates that the function can take any of + + * `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long` + footnote:[{fn-int64-supported}] or `ulong` + * `float` or `double` footnote:double-supported[{fn-double-supported}] +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}] + + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +as the type for the arguments. + +The generic type name `gentype__n__` indicates an _n_-element vector of +`gentype` elements. + +The generic type name `half__n__` indicates an _n_-element vector of `half` elements. + The suffix _n_ is also used in the function names (i.e. *vload__n__*, -*vstore__n__* etc.), where _n_ = 2, 3 footnote:[{fn-vec3-vload-vstore}], 4, 8 or -16. +*vstore__n__* etc.), where _n_ = 2, 3 footnote:[{fn-vec3-vload-vstore}], 4, +8 or 16. [[table-vector-loadstore]] .Built-in Vector Data Load and Store Functions -[cols="7,3",] +[cols="7,3",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype__n__ **vload__n__**(size_t _offset_, const {global} gentype *_p_) + gentype__n__ **vload__n__**(size_t _offset_, const {local} gentype *_p_) + gentype__n__ **vload__n__**(size_t _offset_, const {constant} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) + + gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype__n__ **vload__n__**(size_t _offset_, const gentype *_p_) | Return `sizeof(gentype__n__)` bytes of data, where the first `(__n__ * sizeof(gentype))` bytes are read from the address computed as `(_p_ {plus} (_offset_ * _n_))`. The computed address must be 8-bit aligned if `gentype` is `char` or - `uchar`; 16-bit aligned if `gentype` is `short` or `ushort`; 32-bit - aligned if `gentype` is `int`, `uint`, or `float`; and 64-bit aligned - if `gentype` is `long` or `ulong`. + `uchar`; 16-bit aligned if `gentype` is +ifdef::cl_khr_fp16[`half`,] + `short` or `ushort`; 32-bit aligned if `gentype` is `int`, `uint`, or + `float`; and 64-bit aligned if `gentype` is `long` or `ulong`. | void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {global} gentype *_p_) + void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {local} gentype *_p_) + - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) + + void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore__n__**(gentype__n__ _data_, size_t _offset_, gentype *_p_) | Write `_n_ * sizeof(gentype)` bytes given by _data_ to the address computed as `(_p_ {plus} (_offset_ * _n_))`. The computed address must be 8-bit aligned if `gentype` is `char` or - `uchar`; 16-bit aligned if `gentype` is `short` or `ushort`; 32-bit - aligned if `gentype` is `int`, `uint`, or `float`; and 64-bit aligned - if `gentype` is `long` or `ulong`. + `uchar`; 16-bit aligned if `gentype` is +ifdef::cl_khr_fp16[`half`,] + `short` or `ushort`; 32-bit aligned if `gentype` is `int`, `uint`, or + `float`; and 64-bit aligned if `gentype` is `long` or `ulong`. | float **vload_half**(size_t _offset_, const {global} half *_p_) + float **vload_half**(size_t _offset_, const {local} half *_p_) + float **vload_half**(size_t _offset_, const {constant} half *_p_) + - float **vload_half**(size_t _offset_, const {private} half *_p_) + + float **vload_half**(size_t _offset_, const {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float **vload_half**(size_t _offset_, const half *_p_) | Read `sizeof(half)` bytes of data from the address computed as `(_p_ @@ -5857,10 +6946,10 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, | float__n__ **vload_half__n__**(size_t _offset_, const {global} half *_p_) + float__n__ **vload_half__n__**(size_t _offset_, const {local} half *_p_) + float__n__ **vload_half__n__**(size_t _offset_, const {constant} half *_p_) + - float__n__ **vload_half__n__**(size_t _offset_, const {private} half *_p_) + + float__n__ **vload_half__n__**(size_t _offset_, const {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **vload_half__n__**(size_t _offset_, const half *_p_) | Read `(_n_ * sizeof(half))` bytes of data from the address computed as @@ -5873,22 +6962,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half{rte}**(float _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtz}**(float _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtp}**(float _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtn}**(float _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half{rtn}**(float _data_, size_t _offset_, {global} half *_p_) void **vstore_half**(float _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rte}**(float _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtz}**(float _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtp}**(float _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtn}**(float _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half{rtn}**(float _data_, size_t _offset_, {local} half *_p_) void **vstore_half**(float _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rte}**(float _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtz}**(float _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtp}**(float _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtn}**(float _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half{rtn}**(float _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half**(float _data_, size_t _offset_, half *_p_) + void **vstore_half{rte}**(float _data_, size_t _offset_, half *_p_) + @@ -5907,22 +6996,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) void **vstore_half__n__**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) void **vstore_half__n__**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half__n__**(float__n__ _data_, size_t _offset_, half *_p_) + void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, half *_p_) + @@ -5942,22 +7031,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half{rte}**(double _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtz}**(double _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtp}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half{rtn}**(double _data_, size_t _offset_, {global} half *_p_) void **vstore_half**(double _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rte}**(double _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtz}**(double _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtp}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half{rtn}**(double _data_, size_t _offset_, {local} half *_p_) void **vstore_half**(double _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rte}**(double _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtz}**(double _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtp}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half{rtn}**(double _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half**(double _data_, size_t _offset_, half *_p_) + void **vstore_half{rte}**(double _data_, size_t _offset_, half *_p_) + @@ -5976,22 +7065,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half__n__**(double__n__ _data_, size_t _offset_, half *_p_) + void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, half *_p_) + @@ -6009,10 +7098,10 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, | float__n__ **vloada_half__n__**(size_t _offset_, const {global} half *_p_) + float__n__ **vloada_half__n__**(size_t _offset_, const {local} half *_p_) + float__n__ **vloada_half__n__**(size_t _offset_, const {constant} half *_p_) + - float__n__ **vloada_half__n__**(size_t _offset_, const {private} half *_p_) + + float__n__ **vloada_half__n__**(size_t _offset_, const {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **vloada_half__n__**(size_t _offset_, const half *_p_) | For n = 2, 4, 8 and 16, read `sizeof(half__n__)` bytes of data from @@ -6030,22 +7119,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) void **vstorea_half__n__**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) void **vstorea_half__n__**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstorea_half__n__**(float__n__ _data_, size_t _offset_, half *_p_) + void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, half *_p_) + @@ -6069,22 +7158,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, half *_p_) + void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, half *_p_) + @@ -6140,20 +7229,20 @@ in a work-group. [[table-builtin-synchronization]] .Built-in Work-group Synchronization Functions -[cols="3,7",] +[cols="3,7",options="header",] |==== -| *Function* | *Description* +| Function | Description | void *barrier*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) - For OpenCL C 2.0 or newer, as an alias for *barrier*: + + For OpenCL C 2.0 or newer, as an alias for *barrier*: void *work_group_barrier*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) void *work_group_barrier*( + - cl_mem_fence_flags _flags_, + + cl_mem_fence_flags _flags_, memory_scope _scope_) | For these functions, if any work-item in a work-group encounters a barrier, the barrier must be encountered by all work-items in the @@ -6200,20 +7289,21 @@ in a work-group. -- NOTE: The functionality described in the following table <> support for OpenCL 3.0 or newer and the {opencl_c_subgroups} -feature. +requires>> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL 3.0 or newer and the {opencl_c_subgroups} feature. The following table describes built-in functions to synchronize the work-items in a sub-group. -.Built-in Sub-group Synchronization Functions +[[table-synchronization-functions]] +.Built-in Sub-Group Synchronization Functions [cols="3,7",options="header",] |==== -| *Function* -| *Description* +| Function | Description | void **sub_group_barrier**( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) void **sub_group_barrier**( + cl_mem_fence_flags _flags_, + @@ -6277,12 +7367,12 @@ The OpenCL C programming language implements the following explicit memory fence [[table-builtin-explicit-memory-fences]] .Built-in Explicit Memory Fence Functions -[cols="3,7",] +[cols="3,7",options="header",] |==== -| *Function* | *Description* +| Function | Description | void *mem_fence*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) | Orders loads and stores of a work-item executing a kernel. This means that loads and stores preceding the *mem_fence* will be committed to memory @@ -6297,7 +7387,7 @@ The OpenCL C programming language implements the following explicit memory fence The value of _flags_ must be the same for all work-items in the work-group. | void *read_mem_fence*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) | Read memory barrier that orders only loads. @@ -6310,7 +7400,7 @@ The OpenCL C programming language implements the following explicit memory fence The value of _flags_ must be the same for all work-items in the work-group. | void *write_mem_fence*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) | Write memory barrier that orders only stores. @@ -6344,9 +7434,9 @@ types supported by OpenCL C or a user defined type. [[table-builtin-address-qualifier]] .Built-in Address Space Qualifier Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | global gentype * **to_global**(gentype *_ptr_) + const global gentype * **to_global**(const gentype *_ptr_) | Returns a pointer that points to a region in the `global` address @@ -6370,11 +7460,10 @@ types supported by OpenCL C or a user defined type. [[async-copies]] -=== Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch +=== Async Copies From Global to Local Memory, Local to Global Memory, and Prefetch -[open,refpage='asyncCopyFunctions',desc='Async Copy Functions',type='freeform',spec='clang',anchor='async-copies',xrefs='',alias='async_work_group_copy async_work_group_strided_copy prefetch wait_group_events'] +[open,refpage='asyncCopyFunctions',desc='Async Copy Functions',type='freeform',spec='clang',anchor='async-copies',xrefs='',alias='async_work_group_copy async_work_group_strided_copy prefetch async_work_group_copy_fence wait_group_events'] -- - The OpenCL C programming language implements the <> that provide asynchronous copies between `global` and local memory and a prefetch from `global` memory. @@ -6396,26 +7485,37 @@ work-items in the work-group must execute the async copy or wait group events function on each iteration of the loop if any work-item executes the async copy or wait group events function on that iteration. -We use the generic type name `gentype` to indicate the built-in data types `char`, -`char__n__`, `uchar`, `uchar__n__`, `short`, `short__n__`, -`ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, -`uint__n__`, `long` footnote:[{fn-int64-supported}], `long__n__`, -`ulong`, `ulong__n__`, `float`, `float__n__`, `double` -footnote:[{fn-double-supported}], and `double__n__` as the type for -the arguments unless otherwise stated. +The generic type name `gentype` indicates that the function can take any of + + * `char`, `char__n__`, `uchar`, or `uchar__n__` + * `short`, `short__n__`, `ushort`, or `ushort__n__` + * `int`, `int__n__`, `uint`, or `uint__n__` + * `long` footnote:[{fn-int64-supported}], `long__n__`, `ulong`, or + `ulong__n__` + * `float`, `float__n__` + * `double` footnote:[{fn-double-supported}] or `double__n__` +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}] or `half__n__` + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +as the type for the arguments unless otherwise stated. _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. [[table-builtin-async-copy]] .Built-in Async Copy and Prefetch Functions -[cols=",",] +[cols="1a,1",options="header",] |==== -| *Function* | *Description* +| Function | Description | event_t **async_work_group_copy**({local} gentype _*dst_, const {global} gentype *_src_, size_t _num_gentypes_, event_t _event_) + event_t **async_work_group_copy**({global} gentype _*dst_, const {local} gentype *_src_, size_t _num_gentypes_, event_t _event_) | Perform an async copy of _num_gentypes_ gentype elements from _src_ to _dst_. + Returns an event object that can be used by *wait_group_events* to wait for the async copy to finish. The _event_ argument can also be used to associate the @@ -6477,6 +7577,39 @@ _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. cache. The prefetch instruction is applied to a work-item in a work-group and does not affect the functional behavior of the kernel. +ifdef::cl_khr_async_work_group_copy_fence[] +|[source,opencl_c] +---- +void async_work_group_copy_fence( + cl_mem_fence_flags flags) +---- + | Orders async copies produced by the work-items of a work-group + executing a kernel. + Async copies preceding the *async_work_group_copy_fence* must complete + their access to the designated memory or memories, including both + reads-from and writes-to it, before async copies following the fence + are allowed to start accessing these memories. + In other words, every async copy preceding the + *async_work_group_copy_fence* must happen-before every async copy + following the fence, with respect to the designated memory or + memories. + + The _flags_ argument specifies the memory address space and can be set + to a combination of the following literal values: + + `CLK_LOCAL_MEM_FENCE` + + `CLK_GLOBAL_MEM_FENCE` + + The async fence is performed by all work-items in a work-group and + this built-in function must therefore be encountered by all work-items + in a work-group executing the kernel with the same argument values; + otherwise the results are undefined. + This rule applies to ND-ranges implemented with uniform and + non-uniform work-groups. + + <> support for the + `<>` extension macro. +endif::cl_khr_async_work_group_copy_fence[] |==== [NOTE] @@ -6488,6 +7621,176 @@ is undefined. -- +ifdef::cl_khr_extended_async_copies[] +[[extended-async-copies]] +==== Extended Async Copy Functions + +[open,refpage='extendedAsyncCopyFunctions',desc='Extended Async Copy Functions',type='freeform',spec='clang',anchor='extended-async-copies',xrefs='',alias='async_work_group_copy_2D2D async_work_group_copy_3D3D'] +-- +If the `<>` extension macro is supported, +additional <> are provided which interpret the source and destination as 2D or +3D data. + +[NOTE] +==== +<> is a special +case of *async_work_group_copy_2D2D*, namely one which copies a single +column to a single line or vice versa. +For example: + +`async_work_group_strided_copy(dst, src, num_gentypes, src_stride, event)` +is equal to `async_work_group_copy_2D2D(dst, 0, src, 0, sizeof(gentype), 1, +num_gentypes, src_stride, 1, event)` +==== + +The functions described in this section support arbitrary `gentype`-based +buffers by casting pointers to `void*`. + +These functions do not perform any implicit synchronization of source data +such as using a *barrier* before performing the copy. + +These functions are performed by all work-items in a work-group and must +therefore be encountered by all work-items in a work-group executing the +kernel with the same argument values; otherwise the results are undefined. + +The _src_offset_, _dst_offset_, _src_total_line_length_, +_dst_total_line_length_, _src_total_plane_area_ and _dst_total_plane_area_ +function arguments are expressed in elements. + +Both _src_total_line_length_ and _dst_total_line_length_ describe the +number of elements between the beginning of the current line and the +beginning of the next line. + +Both _src_total_plane_area_ and _dst_total_plane_area_ describe the +number of elements between the beginning of the current plane and the +beginning of the next plane. + +These functions return an event object that can be used by +*wait_group_events* to wait for the async copy to finish. +The _event_ argument can also be used to associate the async copy with a +previous async copy allowing an event to be shared by multiple async copies; +otherwise _event_ should be zero. +If the _event_ argument is non-zero, the event object supplied as the +_event_ argument will be returned. + +[[table-builtin-extended-async-copy]] +.Built-in Extended Async Copy Functions +[cols="1a,1",options="header",] +|==== +| Function | Description +a| +[source,opencl_c] +---- +event_t async_work_group_copy_2D2D( + __local void *dst, + size_t dst_offset, + const __global void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t src_total_line_length, + size_t dst_total_line_length, + event_t event) + +event_t async_work_group_copy_2D2D( + __global void *dst, + size_t dst_offset, + const __local void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t src_total_line_length, + size_t dst_total_line_length, + event_t event) +---- + | Perform an async copy of (_num_elements_per_line_ * _num_lines_) + elements of size _num_bytes_per_element_ from (_src_ + (_src_offset_ * + _num_bytes_per_element_)) to (_dst_ + (_dst_offset_ * + _num_bytes_per_element_)). + All pointer arithmetic is performed with implicit casting to `char*` + by the implementation. + Each line contains _num_elements_per_line_ elements of size + _num_bytes_per_element_. + After each line of transfer, the _src_ address is incremented by + _src_total_line_length_ elements (i.e. _src_total_line_length_ * + _num_bytes_per_element_ bytes), and the _dst_ address is incremented + by _dst_total_line_length_ elements (i.e. _dst_total_line_length_ * + _num_bytes_per_element_ bytes), for the next line of transfer. + + The behavior of *async_work_group_copy_2D2D* is undefined if the + source or destination addresses exceed the upper bounds of the address + space during the copy. + + The behavior of *async_work_group_copy_2D2D* is also undefined if the + _src_total_line_length_ or _dst_total_line_length_ values are smaller + than _num_elements_per_line_, i.e. overlapping of lines is undefined. +a| +[source,opencl_c] +---- +event_t async_work_group_copy_3D3D( + __local void *dst, + size_t dst_offset, + const __global void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t num_planes, + size_t src_total_line_length, + size_t src_total_plane_area, + size_t dst_total_line_length, + size_t dst_total_plane_area, + event_t event) + +event_t async_work_group_copy_3D3D( + __global void *dst, + size_t dst_offset, + const __local void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t num_planes, + size_t src_total_line_length, + size_t src_total_plane_area, + size_t dst_total_line_length, + size_t dst_total_plane_area, + event_t event) +---- + | Perform an async copy of ((_num_elements_per_line_ * _num_lines_) * + _num_planes_) elements of size _num_bytes_per_element_ from (_src_ + + (_src_offset_ * _num_bytes_per_element_)) to (_dst_ + (_dst_offset_ * + _num_bytes_per_element_)), arranged in _num_planes_ planes. + All pointer arithmetic is performed with implicit casting to `char*` + by the implementation. + Each plane contains _num_lines_ lines. + Each line contains _num_elements_per_line_ elements. + After each line of transfer, the _src_ address is incremented by + _src_total_line_length_ elements (i.e. _src_total_line_length_ * + _num_bytes_per_element_ bytes), and the _dst_ address is incremented + by _dst_total_line_length_ elements (i.e. _dst_total_line_length_ * + _num_bytes_per_element_ bytes), for the next line of transfer. + + The behavior of *async_work_group_copy_3D3D* is undefined if the + source or destination addresses exceed the upper bounds of the address + space during the copy. + + The behavior of *async_work_group_copy_3D3D* is also undefined if the + _src_total_line_length_ or _dst_total_line_length_ values are smaller + than _num_elements_per_line_, i.e. overlapping of lines is undefined. + + The behavior of *async_work_group_copy_3D3D* is also undefined if + _src_total_plane_area_ is smaller than (_num_lines_ * + _src_total_line_length_), or _dst_total_plane_area_ is smaller than + (_num_lines_ * _dst_total_line_length_), i.e. overlapping of planes is + undefined. +|==== +-- +endif::cl_khr_extended_async_copies[] + + [[atomic-functions]] === Atomic Functions @@ -6589,7 +7892,7 @@ endif::refpageOnly[] [[the-atomic_var_init-macro]] -==== The `ATOMIC_VAR_INIT` macro +==== The `ATOMIC_VAR_INIT` Macro [open,refpage='ATOMIC_VAR_INIT',desc='ATOMIC_VAR_INIT macro',type='freeform',spec='clang',anchor='the-atomic_var_init-macro',xrefs='atomicFunctions atomic_init'] -- @@ -6623,7 +7926,7 @@ operation, constitutes a data-race. [[the-atomic_init-function]] -==== The atomic_init function +==== The atomic_init Function [open,refpage='atomic_init',desc='The atomic_init function',type='freeform',spec='clang',anchor='the-atomic_init-function',xrefs='atomicFunctions ATOMIC_VAR_INIT'] -- @@ -6673,9 +7976,9 @@ The following table lists the enumeration constants: [[table-memory-orders]] //.Memory Order Enumeration Constants -[cols=",",] +[cols=",",options="header",] |==== -| *Memory Order* | *Additional Notes* +| Memory Order | Additional Notes | `memory_order_relaxed` | <> support for OpenCL C 2.0 or newer. | `memory_order_acquire` @@ -6714,16 +8017,17 @@ The following table lists the enumeration constants: [[table-memory-scopes]] //.Memory Scope Enumeration Constants -[cols=",",] +[cols=",",options="header",] |==== -| *Memory Scope* | *Additional Notes* +| Memory Scope | Additional Notes | `memory_scope_work_item` | `memory_scope_work_item` can only be used with `atomic_work_item_fence` with flags set to `CLK_IMAGE_MEM_FENCE`. <> support for OpenCL C 2.0 or newer. | `memory_scope_sub_group` - | <> support for OpenCL C 3.0 or newer and the - {opencl_c_subgroups} feature. + | <> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] + OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. | `memory_scope_work_group` | <> support for OpenCL C 2.0 or newer. | `memory_scope_device` @@ -6800,7 +8104,7 @@ NOTE: The use of memory order and scope enumerations must respect the [[atomic-integer-and-floating-point-types]] -==== Atomic integer and floating-point types +==== Atomic Integer and Floating-point Types [open,refpage='atomicTypes',desc='Atomic Integer And Floating-Point Types',type='freeform',spec='clang',anchor='atomic-integer-and-floating-point-types',xrefs='atomicFunctions',alias='atomic_int atomic_uint atomic_long atomic_ulong atomic_float atomic_double atomic_intptr_t atomic_uintptr_t atomic_size_t atomic_ptrdiff_t'] -- @@ -6829,7 +8133,7 @@ The atomic_flag type must be implemented as a 32-bit integer. [[operations-on-atomic-types]] -==== Operations on atomic types +==== Operations on Atomic Types There are only a few kinds of operations on atomic types, though there are many instances of those kinds. @@ -7374,7 +8678,7 @@ All of these operations are applicable to an object of any atomic integer type. The key, operator, and computation correspondence is given in table below: -[cols=",,",] +[cols=",,",options="header",] |==== | *key* | *op* | *computation* | `add` | *+* | addition @@ -7638,14 +8942,13 @@ C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_generic_address_space} feature. -- + [[atomic-legacy]] ==== OpenCL C 1.x Legacy Atomics IMPORTANT: The atomic functions described in this sub-section <> support for OpenCL C 1.1 or newer, and are <> OpenCL C 2.0. Also see extensions -`cl_khr_global_int32_base_atomics`, `cl_khr_global_int32_extended_atomics`, -`cl_khr_local_int32_base_atomics`, and `cl_khr_local_int32_extended_atomics`. +deprecated by>> OpenCL C 2.0. OpenCL C 1.x had support for relaxed atomic operations via built-in functions that could operate on any memory address in `{global}` or `{local}` spaces. @@ -7664,168 +8967,362 @@ semantics of the minimum requirements. // Copied from table 6.19 in OpenCL 1.2 spec [[table-legacy-atomic-functions]] .Legacy Atomic Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int **atomic_add**(volatile {global} int *_p_, int _val_) + - int **atom_add**(volatile {global} int *_p_, int _val_) + + int **atom_add**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_add**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_add**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_add**(volatile {global} uint *_p_, uint _val_) + + uint **atom_add**(volatile {global} uint *_p_, uint _val_) int **atomic_add**(volatile {local} int *_p_, int _val_) + - int **atom_add**(volatile {local} int *_p_, int _val_) + + int **atom_add**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_add**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_add**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_add**(volatile {local} uint *_p_, uint _val_) + + uint **atom_add**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ + _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_sub**(volatile {global} int *_p_, int _val_) + - int **atom_sub**(volatile {global} int *_p_, int _val_) + + int **atom_sub**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_sub**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_sub**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_sub**(volatile {global} uint *_p_, uint _val_) + + uint **atom_sub**(volatile {global} uint *_p_, uint _val_) int **atomic_sub**(volatile {local} int *_p_, int _val_) + - int **atom_sub**(volatile {local} int *_p_, int _val_) + + int **atom_sub**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_sub**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_sub**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_sub**(volatile {local} uint *_p_, uint _val_) + + uint **atom_sub**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ - _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_xchg**(volatile {global} int *_p_, int _val_) + - int **atom_xchg**(volatile {global} int *_p_, int _val_) + + int **atom_xchg**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_xchg**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xchg**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xchg**(volatile {global} uint *_p_, uint _val_) + + uint **atom_xchg**(volatile {global} uint *_p_, uint _val_) float **atomic_xchg**(volatile {global} float *_p_, float _val_) + int **atomic_xchg**(volatile {local} int *_p_, int _val_) + - int **atom_xchg**(volatile {local} int *_p_, int _val_) + + int **atom_xchg**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_xchg**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xchg**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xchg**(volatile {local} uint *_p_, uint _val_) + + uint **atom_xchg**(volatile {local} uint *_p_, uint _val_) - float **atomic_xchg**(volatile {local} float *_p_, float _val_) + + float **atomic_xchg**(volatile {local} float *_p_, float _val_) | Swaps the _old_ value stored at location _p_ with new value given by _val_. Returns _old_ value. | int **atomic_inc**(volatile {global} int *_p_) + - int **atom_inc**(volatile {global} int *_p_) + + int **atom_inc**(volatile {global} int *_p_) - unsigned int **atomic_inc**(volatile {global} unsigned int *_p_) + - unsigned int **atom_inc**(volatile {global} unsigned int *_p_) + + uint **atomic_inc**(volatile {global} uint *_p_) + + uint **atom_inc**(volatile {global} uint *_p_) int **atomic_inc**(volatile {local} int *_p_) + - int **atom_inc**(volatile {local} int *_p_) + + int **atom_inc**(volatile {local} int *_p_) - unsigned int **atomic_inc**(volatile {local} unsigned int *_p_) + - unsigned int **atom_inc**(volatile {local} unsigned int *_p_) + + uint **atomic_inc**(volatile {local} uint *_p_) + + uint **atom_inc**(volatile {local} uint *_p_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ + 1) and store result at location pointed by _p_. The function returns _old_. | int **atomic_dec**(volatile {global} int *_p_) + - int **atom_dec**(volatile {global} int *_p_) + + int **atom_dec**(volatile {global} int *_p_) - unsigned int **atomic_dec**(volatile {global} unsigned int *_p_) + - unsigned int **atom_dec**({global} unsigned int *_p_) + + uint **atomic_dec**(volatile {global} uint *_p_) + + uint **atom_dec**({global} uint *_p_) int **atomic_dec**(volatile {local} int *_p_) + - int **atom_dec**(volatile {local} int *_p_) + + int **atom_dec**(volatile {local} int *_p_) - unsigned int **atomic_dec**(volatile {local} unsigned int *_p_) + - unsigned int **atom_dec**(volatile {local} unsigned int *_p_) + + uint **atomic_dec**(volatile {local} uint *_p_) + + uint **atom_dec**(volatile {local} uint *_p_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ - 1) and store result at location pointed by _p_. The function returns _old_. | int **atomic_cmpxchg**(volatile {global} int *_p_, int _cmp_, int _val_) + - int **atom_cmpxchg**(volatile {global} int *_p_, int _cmp_, int _val_) + + int **atom_cmpxchg**(volatile {global} int *_p_, int _cmp_, int _val_) - unsigned int **atomic_cmpxchg**(volatile {global} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + - unsigned int **atom_cmpxchg**(volatile {global} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + + uint **atomic_cmpxchg**(volatile {global} uint *_p_, uint _cmp_, uint _val_) + + uint **atom_cmpxchg**(volatile {global} uint *_p_, uint _cmp_, uint _val_) int **atomic_cmpxchg**(volatile {local} int *_p_, int _cmp_, int _val_) + - int **atom_cmpxchg**(volatile {local} int *_p_, int _cmp_, int _val_) + + int **atom_cmpxchg**(volatile {local} int *_p_, int _cmp_, int _val_) - unsigned int **atomic_cmpxchg**(volatile {local} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + - unsigned int **atom_cmpxchg**(volatile {local} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + + uint **atomic_cmpxchg**(volatile {local} uint *_p_, uint _cmp_, uint _val_) + + uint **atom_cmpxchg**(volatile {local} uint *_p_, uint _cmp_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store result at location pointed by _p_. The function returns _old_. | int **atomic_min**(volatile {global} int *_p_, int _val_) + - int **atom_min**(volatile {global} int *_p_, int _val_) + + int **atom_min**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_min**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_min**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_min**(volatile {global} uint *_p_, uint _val_) + + uint **atom_min**(volatile {global} uint *_p_, uint _val_) int **atomic_min**(volatile {local} int *_p_, int _val_) + - int **atom_min**(volatile {local} int *_p_, int _val_) + + int **atom_min**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_min**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_min**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_min**(volatile {local} uint *_p_, uint _val_) + + uint **atom_min**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute **min**(_old_, _val_) and store minimum value at location pointed by _p_. The function returns _old_. | int **atomic_max**(volatile {global} int *_p_, int _val_) + - int **atom_max**(volatile {global} int *_p_, int _val_) + + int **atom_max**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_max**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_max**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_max**(volatile {global} uint *_p_, uint _val_) + + uint **atom_max**(volatile {global} uint *_p_, uint _val_) int **atomic_max**(volatile {local} int *_p_, int _val_) + - int **atom_max**(volatile {local} int *_p_, int _val_) + + int **atom_max**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_max**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_max**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_max**(volatile {local} uint *_p_, uint _val_) + + uint **atom_max**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute **max**(_old_, _val_) and store maximum value at location pointed by _p_. The function returns _old_. | int **atomic_and**(volatile {global} int *_p_, int _val_) + - int **atom_and**(volatile {global} int *_p_, int _val_) + + int **atom_and**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_and**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_and**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_and**(volatile {global} uint *_p_, uint _val_) + + uint **atom_and**(volatile {global} uint *_p_, uint _val_) int **atomic_and**(volatile {local} int *_p_, int _val_) + - int **atom_and**(volatile {local} int *_p_, int _val_) + + int **atom_and**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_and**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_and**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_and**(volatile {local} uint *_p_, uint _val_) + + uint **atom_and**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ & _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_or**(volatile {global} int *_p_, int _val_) + - int **atom_or**(volatile {global} int *_p_, int _val_) + + int **atom_or**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_or**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_or**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_or**(volatile {global} uint *_p_, uint _val_) + + uint **atom_or**(volatile {global} uint *_p_, uint _val_) int **atomic_or**(volatile {local} int *_p_, int _val_) + - int **atom_or**(volatile {local} int *_p_, int _val_) + + int **atom_or**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_or**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_or**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_or**(volatile {local} uint *_p_, uint _val_) + + uint **atom_or**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ \| _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_xor**(volatile {global} int *_p_, int _val_) + - int **atom_xor**(volatile {global} int *_p_, int _val_) + + int **atom_xor**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_xor**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xor**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xor**(volatile {global} uint *_p_, uint _val_) + + uint **atom_xor**(volatile {global} uint *_p_, uint _val_) int **atomic_xor**(volatile {local} int *_p_, int _val_) + - int **atom_xor**(volatile {local} int *_p_, int _val_) + + int **atom_xor**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_xor**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xor**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xor**(volatile {local} uint *_p_, uint _val_) + + uint **atom_xor**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ ^ _val_) and store result at location pointed by _p_. The function returns _old_. |==== +ifdef::cl_khr_global_int32_base_atomics,cl_khr_global_int32_extended_atomics,cl_khr_local_int32_base_atomics,cl_khr_local_int32_extended_atomics[] +A subset of the atomic functions described above are also supported in +OpenCL 1.0 when appropriate OpenCL extension macros are supported, as +described in the <> table below. + +[[table-atomic-function-extensions]] +.Atomic Function Extensions +[cols=",",options="header",] +|==== +| Extension Macro | Supported Functions +ifdef::cl_khr_global_int32_base_atomics[] +| `<>` + | **atom_add** + + **atom_sub** + + **atom_xchg** + + **atom_inc** + + **atom_dec** + + **atom_cmpxchg** + + (with {global} parameters) +endif::cl_khr_global_int32_base_atomics[] +ifdef::cl_khr_global_int32_extended_atomics[] +| `<>` + | **atom_min** + + **atom_max** + + **atom_and** + + **atom_or** + + **atom_xor** + + (with {global} parameters) +endif::cl_khr_global_int32_extended_atomics[] +ifdef::cl_khr_local_int32_base_atomics[] +| `<>` + | **atom_add** + + **atom_sub** + + **atom_xchg** + + **atom_inc** + + **atom_dec** + + **atom_cmpxchg** + + (with {local} parameters) +endif::cl_khr_local_int32_base_atomics[] +ifdef::cl_khr_local_int32_extended_atomics[] +| `<>` + | **atom_min** + + **atom_max** + + **atom_and** + + **atom_or** + + **atom_xor** + + (with {local} parameters) +endif::cl_khr_local_int32_extended_atomics[] +|==== +endif::cl_khr_global_int32_base_atomics,cl_khr_global_int32_extended_atomics,cl_khr_local_int32_base_atomics,cl_khr_local_int32_extended_atomics[] + + +ifdef::cl_khr_int64_base_atomics,cl_khr_int64_extended_atomics[] +[[atomic-legacy-int64]] +==== Legacy 64-Bit Atomic Extensions + +Similar to the <>, atomic +functions operating on 64-bit integers are provided by extensions. + +ifdef::cl_khr_int64_base_atomics[] +If the `<>` extension macro is supported, it +provides the functions described in the <> table below. + +[[table-atomic-int64-base]] +.Built-in 64-Bit Base Atomic Functions +[cols="9,5",options="header",] +|==== +|*Function* |*Description* +| long **atom_add** (volatile {global} long *_p_, long _val_) + + long **atom_add** (volatile {local} long *_p_, long _val_) + + ulong **atom_add** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_add** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ + _val_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_sub** (volatile {global} long *_p_, long _val_) + + long **atom_sub** (volatile {local} long *_p_, long _val_) + + ulong **atom_sub** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_sub** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ - _val_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_xchg** (volatile {global} long *_p_, long _val_) + + long **atom_xchg** (volatile {local} long *_p_, long _val_) + + ulong **atom_xchg** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_xchg** (volatile {local} ulong *_p_, ulong _val_) + | Swaps the _old_ value stored at location _p_ with new value given by + _val_. + Returns _old_ value. +| long **atom_inc** (volatile {global} long *_p_) + + long **atom_inc** (volatile {local} long *_p_) + + ulong **atom_inc** (volatile {global} ulong *_p_) + + ulong **atom_inc** (volatile {local} ulong *_p_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ + _1_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_dec** (volatile {global} long *_p_) + + long **atom_dec** (volatile {local} long *_p_) + + ulong **atom_dec** (volatile {global} ulong *_p_) + + ulong **atom_dec** (volatile {local} ulong *_p_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ - _1_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_cmpxchg** (volatile {global} long *_p_, long _cmp_, long _val_) + + long **atom_cmpxchg** (volatile {local} long *_p_, long _cmp_, long _val_) + + ulong **atom_cmpxchg** (volatile {global} ulong *_p_, ulong _cmp_, ulong _val_) + + ulong **atom_cmpxchg** (volatile {local} ulong *_p_, ulong _cmp_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ == _cmp_) ? _val_ : _old_ and store result at location + pointed by _p_. + The function returns _old_. +|==== + +endif::cl_khr_int64_base_atomics[] + +ifdef::cl_khr_int64_extended_atomics[] +If the `<>` extension macro is supported, it +provides the functions described in the <> table below. + +[[table-atomic-int64-extended]] +.Built-in 64-Bit Extended Atomic Functions +[cols=",",options="header",] +|==== +|*Function* |*Description* +| long **atom_min** (volatile {global} long *_p_, long _val_) + + long **atom_min** (volatile {local} long *_p_, long _val_) + + ulong **atom_min** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_min** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute *min*(_old_, _val_) and store minimum value at location + pointed by _p_. + The function returns _old_. +| long **atom_max** (volatile {global} long *_p_, long _val_) + + long **atom_max** (volatile {local} long *_p_, long _val_) + + ulong **atom_max** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_max** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute *max*(_old_, _val_) and store maximum value at location + pointed by _p_. + The function returns _old_. +| long **atom_and** (volatile {global} long *_p_, long _val_) + + long **atom_and** (volatile {local} long *_p_, long _val_) + + ulong **atom_and** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_and** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ & val) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_or** (volatile {global} long *_p_, long _val_) + + long **atom_or** (volatile {local} long *_p_, long _val_) + + ulong **atom_or** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_or** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ \| val) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_xor** (volatile {global} long *_p_, long _val_) + + long **atom_xor** (volatile {local} long *_p_, long _val_) + + ulong **atom_xor** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_xor** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ ^ val) and store result at location pointed by _p_. + The function returns _old_. +|==== +endif::cl_khr_int64_extended_atomics[] + +NOTE: Atomic operations on 64-bit integers and 32-bit integers (and floats) +are also atomic with respect to each other. + +endif::cl_khr_int64_base_atomics,cl_khr_int64_extended_atomics[] + + [[atomic-restrictions]] ==== Restrictions @@ -7864,8 +9361,9 @@ semantics of the minimum requirements. <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_atomic_order_seq_cst} feature. * Using `memory_scope_sub_group` with any built-in atomic function - <> support for OpenCL C 3.0 or newer and the - {opencl_c_subgroups} feature. + <> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] + OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. * Using `memory_scope_device` <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_atomic_scope_device} feature. @@ -7898,9 +9396,9 @@ _n_ is 2, 4, 8, or 16. [[table-misc-vector]] .Built-in Miscellaneous Vector Functions -[cols="1,2",] +[cols="1,2",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *vec_step*(gentype__n__ _a_) + int *vec_step*(char3 _a_) + int *vec_step*(uchar3 _a_) + @@ -8001,9 +9499,9 @@ The OpenCL C programming language implements the *printf* function. [[table-printf]] .Built-in printf Function -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int **printf**(constant char *restrict _format_, ...) | The *printf* built-in function writes output to an implementation-defined stream such as stdout under control of the @@ -8022,7 +9520,7 @@ The OpenCL C programming language implements the *printf* function. [[printf-output-synchronization]] -==== printf output synchronization +==== printf Output Synchronization When the event that is associated with a particular kernel invocation is completed, the output of all printf() calls executed by this kernel @@ -8038,7 +9536,7 @@ For example, it is valid for the output of a work-item with a global id [[printf-format-string]] -==== printf format string +==== printf Format String The format shall be a character sequence, beginning and ending in its initial shift state. @@ -8204,7 +9702,7 @@ characters. *o,u,* -*x,X* The `unsigned int`, `uchar__n__`, `ushort__n__`, `uint__n__` or +*x,X* The `uint`, `uchar__n__`, `ushort__n__`, `uint__n__` or `ulong__n__` argument is converted to unsigned octal (*o*), unsigned decimal (*u*), or unsigned hexadecimal notation (*x* or *X*) in the style _dddd_; the letters *abcdef* are used for *x* conversion and the letters *ABCDEF* @@ -8294,11 +9792,10 @@ specifier. [NOTE] ==== The conversion specifiers *e,E,g,G,a,A* convert a `float` or `half` argument -that is a scalar type to a `double` only if the `double` data type is -supported, e.g. for OpenCL C 3.0 or newer the {opencl_c_fp64} feature -macro is present. -If the `double` data type is not supported, the argument will be a `float` -instead of a `double` and the `half` type will be converted to a `float`. +that is a scalar type to a `double` only if <>. +Otherwise, the argument will be a `float` instead of a `double` and the +`half` type will be converted to a `float`. ==== *c* The `int` argument is converted to an `unsigned char`, and the resulting @@ -8395,7 +9892,7 @@ kernel void my_kernel(global char *s, ... ) [[differences-between-opencl-c-and-c99-printf]] -==== Differences between OpenCL C and C99 printf +==== Differences Between OpenCL C and C99 printf * The *l* modifier followed by a *c* conversion specifier or *s* conversion specifier is not supported by OpenCL C. @@ -8515,9 +10012,9 @@ The sampler fields are described in the following table. [[table-sampler-descriptor]] .Sampler Descriptor -[cols=",",] +[cols=",",options="header",] |==== -| *Sampler State* | *Description* +| Sampler State | Description | `` | Specifies whether the _x_, _y_ and _z_ coordinates are passed in as normalized or unnormalized values. @@ -8590,7 +10087,7 @@ queried using the `CL_DEVICE_MAX_SAMPLERS` token in *clGetDeviceInfo*. [[determining-the-border-color-or-value]] -===== *Determining the border color or value* +===== *Determining the Border Color or Value* If `` in sampler is `CLK_ADDRESS_CLAMP`, then out-of-range image coordinates return the border color. @@ -8624,15 +10121,24 @@ The alpha component is returned as is. [open,refpage='imageReadFunctions',desc='Built-in Image Read Functions',type='freeform',spec='clang',anchor='built-in-image-read-functions',xrefs='imageQueryFunctions imageSamplerlessReadFunctions imageWriteFunctions',alias='read_imagef read_imagei read_imageui'] -- - The following built-in function calls to read images with a sampler are supported footnote:[{fn-read-image-with-sampler}]. +ifdef::cl_khr_mipmap_image[] +If the `<>` extension macro is supported, read +functions which do not either + + * explicitly specify a level of detail _lod_, or + * compute a level of detail from _gradient_ parameters + +read from mip level 0 if _image_ is a mipmapped image. +endif::cl_khr_mipmap_image[] + [[table-image-read]] .Built-in Image Read Functions -[cols=",",] +[cols=",",,options="header",] |==== -| *Function* | *Description* +| Function | Description | float4 *read_imagef*(read_only image2d_t _image_, sampler_t _sampler_, int2 _coord_) + float4 *read_imagef*(read_only image2d_t _image_, sampler_t _sampler_, @@ -8661,6 +10167,40 @@ supported footnote:[{fn-read-image-with-sampler}]. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image2d_t _image_, sampler_t _sampler_, + int2 _coord_) + + half4 *read_imageh*(read_only image2d_t _image_, sampler_t _sampler_, + float2 _coord_) + | Use the coordinate _(coord.x, coord.y)_ to do an element lookup in the + 2D image object specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed + formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(read_only image2d_t _image_, sampler_t _sampler_, int2 _coord_) + @@ -8736,6 +10276,41 @@ supported footnote:[{fn-read-image-with-sampler}]. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image3d_t _image_, sampler_t _sampler_, + int4 _coord_ ) + + half4 *read_imageh*(read_only image3d_t _image_, sampler_t _sampler_, + float4 _coord_) + | Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an + elementlookup in the 3D image object specified by _image_. + _coord.w_ is ignored. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + **read_imageh** returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description are + undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(read_only image3d_t _image_, sampler_t _sampler_, int4 _coord_) + @@ -8791,16 +10366,16 @@ supported footnote:[{fn-read-image-with-sampler}]. _coord.z_ in the 2D image array specified by _image_. *read_imagef* returns floating-point values in the range [0.0, 1.0] - for image objects created with image_channel_data_type set to one of + for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. *read_imagef* returns floating-point values in the range [-1.0, 1.0] - for image objects created with image_channel_data_type set to + for image objects created with _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. *read_imagef* returns floating-point values for image objects created - with image_channel_data_type set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -8809,8 +10384,43 @@ supported footnote:[{fn-read-image-with-sampler}]. `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. Values returned by *read_imagef* for image objects with - image_channel_data_type values not specified in the description above + _image_channel_data_type_ values not specified in the description above + are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image2d_array_t _image_, sampler_t + _sampler_, int4 _coord_) + + half4 *read_imageh*(read_only image2d_array_t _image_, sampler_t + _sampler_, float4 _coord_) + | Use _coord.xy_ to do an element lookup in the 2D image identified by + _coord.z_ in the 2D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] +| | | int4 *read_imagei*(read_only image2d_array_t _image_, sampler_t _sampler_, int4 _coord_) + int4 *read_imagei*(read_only image2d_array_t _image_, sampler_t _sampler_, @@ -8886,6 +10496,40 @@ supported footnote:[{fn-read-image-with-sampler}]. above are undefined. <> support for OpenCL C 1.2 or newer. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image1d_t _image_, sampler_t _sampler_, + int _coord_) + + half4 *read_imageh*(read_only image1d_t _image_, sampler_t _sampler_, + float _coord_) + | Use _coord_ to do an element lookup in the 1D image object specified + by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(read_only image1d_t _image_, sampler_t _sampler_, int _coord_) + @@ -8942,16 +10586,16 @@ supported footnote:[{fn-read-image-with-sampler}]. _coord.y_ in the 1D image array specified by _image_. *read_imagef* returns floating-point values in the range [0.0, 1.0] - for image objects created with image_channel_data_type set to one of + for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. *read_imagef* returns floating-point values in the range [-1.0, 1.0] - for image objects created with image_channel_data_type set to + for image objects created with _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. *read_imagef* returns floating-point values for image objects created - with image_channel_data_type set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -8960,10 +10604,45 @@ supported footnote:[{fn-read-image-with-sampler}]. `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. Values returned by *read_imagef* for image objects with - image_channel_data_type values not specified in the description above + _image_channel_data_type_ values not specified in the description above are undefined. <> support for OpenCL C 1.2 or newer. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image1d_array_t _image_, + sampler_t _sampler_, int2 _coord_) + + half4 *read_imageh*(read_only image1d_array_t _image_, + sampler_t _sampler_, float2 _coord_) + | Use _coord.x_ to do an element lookup in the 1D image identified by + _coord.y_ in the 1D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description above + are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] +| | | int4 *read_imagei*(read_only image1d_array_t _image_, sampler_t _sampler_, int2 _coord_) + int4 *read_imagei*(read_only image1d_array_t _image_, sampler_t _sampler_, @@ -9035,8 +10714,8 @@ supported footnote:[{fn-read-image-with-sampler}]. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | float *read_imagef*(read_only image2d_array_depth_t _image_, sampler_t _sampler_, int4 _coord_) + @@ -9062,19 +10741,341 @@ supported footnote:[{fn-read-image-with-sampler}]. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | + +ifdef::cl_khr_mipmap_image[] +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) + +int4 read_imagei( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) + +uint4 read_imageui( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) + +float read_imagef( + read_only image2d_depth_t image, + sampler_t sampler, + float2 coord, + float lod) +---- + | Use the coordinate _coord.xy_ to do an element lookup in the mip level + specified by _lod_ in the 2D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) + +int4 read_imagei( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) + +uint4 read_imageui( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) + +float read_imagef( + read_only image2d_depth_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord.xy_ to do + an element lookup in the mip level specified by the computed lod in + the 2D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) + +int4 read_imagei( + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) + +uint4 read_imageui( + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) +---- + | Use the coordinate _coord_ to do an element lookup in the mip level + specified by _lod_ in the 1D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) + +int4 read_imagei( + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) + +uint4 read_imageui( + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord_ to do an + element lookup in the mip level specified by the computed lod in the + 1D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) + +int4 read_imagei( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) + +uint4 read_imageui( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) +---- + | Use the coordinate _coord.xyz_ to do an element lookup in the mip + level specified by _lod_ in the 3D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) + +int4 read_imagei( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) + +uint4 read_imageui( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord.xyz_ to do + an element lookup in the mip level specified by the computed lod in + the 3D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) + +int4 read_imagei( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) + +uint4 read_imageui( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) +---- + | Use the coordinate _coord.x_ to do an element lookup in the 1D image + identified by _coord.x_ and mip level specified by _lod_ in the 1D + image array specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) + +int4 read_imagei( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) + +uint4 read_imageui( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord.x_ to do an + element lookup in the mip level specified by the computed lod in the + 1D image array specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) + +int4 read_imagei( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) + +uint4 read_imageui( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) + +float read_imagef( + read_only image2d_array_depth_t image, + sampler_t sampler, + float4 coord, + float lod) +---- + | Use the coordinate _coord.xy_ to do an element lookup in the 2D image + identified by _coord.z_ and mip level specified by _lod_ in the 2D + image array specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) + +int4 read_imagei( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) + +uint4 read_imageui( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) + +float read_imagef( + read_only image2d_array_depth_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) +---- + | Use the gradients to compute the lod coordinate and _coord.xy_ to do + an element lookup in the 2D image identified by _coord.z_ and mip + level specified by the computed lod in the 2D image array specified by + _image_. + + <> support for the `<>` + extension macro. +endif::cl_khr_mipmap_image[] + |==== -- +ifdef::cl_khr_mipmap_image[] +NOTE: If the `<>` extension macro is supported, +`CL_SAMPLER_NORMALIZED_COORDS` must be `CL_TRUE` for built-in functions +described in the table above that read from a mipmapped image; otherwise +behavior is undefined. +The value specified in the _lod_ argument is clamped to the minimum of +(actual number of mip levels - 1) in the image or the value specified for +`CL_SAMPLER_LOD_MAX`. +endif::cl_khr_mipmap_image[] + [[built-in-image-sampler-less-read-functions]] ==== Built-in Image Sampler-less Read Functions [open,refpage='imageSamplerlessReadFunctions',desc='Built-in Image Sampler-less Read Functions',type='freeform',spec='clang',anchor='built-in-image-sampler-less-read-functions',xrefs='imageQueryFunctions imageReadFunctions imageWriteFunctions'] -- - NOTE: Sampler-less image read functions <> support for OpenCL C 1.2 or newer, with some functions requiring support for newer versions of OpenCL C as noted in the <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] +| | +| int4 *read_imagei*(_aQual_ image2d_t _image_, int2 _coord_) + + uint4 *read_imageui*(_aQual_ image2d_t _image_, int2 _coord_) + | Use the coordinate (_coord.x_, _coord.y_) to do an element lookup in + the 2D image object specified by _image_. + + *read_imagei* and *read_imageui* return unnormalized signed integer + and unsigned integer values respectively. Each channel will be stored + in a 32-bit integer. *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: @@ -9169,6 +11195,32 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image3d_t _image_, int4 _coord_ ) + | Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an element + lookup in the 3D image object specified by _image_. _coord.w_ is + ignored. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description are + undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image3d_t _image_, int4 _coord_) + uint4 *read_imageui*(_aQual_ image3d_t _image_, int4 _coord_) @@ -9219,6 +11271,31 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image2d_array_t _image_, int4 _coord_) + | Use _coord.xy_ to do an element lookup in the 2D image identified by + _coord.z_ in the 2D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image2d_array_t _image_, int4 _coord_) + uint4 *read_imageui*(_aQual_ image2d_array_t _image_, int4 _coord_) @@ -9269,6 +11346,32 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image1d_t _image_, int _coord_) + + half4 *read_imageh*(_aQual_ image1d_buffer_t _image_, int _coord_) + | Use _coord_ to do an element lookup in the 1D image or 1D image buffer + object specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image1d_t _image_, int _coord_) + uint4 *read_imageui*(_aQual_ image1d_t _image_, int _coord_) + @@ -9320,6 +11423,31 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image1d_array_t _image_, int2 _coord_) + | Use _coord.x_ to do an element lookup in the 2D image identified by + _coord.y_ in the 2D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image1d_array_t _image_, int2 _coord_) + uint4 *read_imageui*(_aQual_ image1d_array_t _image_, int2 _coord_) @@ -9365,8 +11493,8 @@ For samplerless read functions this may be `read_only` or `read_write`. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | float *read_imagef*(_aQual_ image2d_array_depth_t _image_, int4 _coord_) | Use _coord.xy_ to do an element lookup in the 2D image identified by @@ -9383,9 +11511,211 @@ For samplerless read functions this may be `read_only` or `read_write`. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | + +ifdef::cl_khr_gl_msaa_sharing[] +a| +[source,opencl_c] +---- +float4 read_imagef( + image2d_msaa_t image, + int2 coord, + int sample) +---- + | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element + lookup in the 2D image object specified by _image_. + + *read_imagef* returns floating-point values in the range [0.0, 1.0] + for image objects created with _image_channel_data_type_ set to one of + the pre-defined packed formats or `CL_UNORM_INT8`, or + `CL_UNORM_INT16`. + + *read_imagef* returns floating-point values in the range [-1.0, 1.0] + for image objects created with _image_channel_data_type_ set to + `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imagef* returns floating-point values for image objects created + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +int4 read_imagei(image2d_msaa_t image, + int2 coord, + int sample) + +uint4 read_imageui(image2d_msaa_t image, + int2 coord, + int sample) +---- + | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element + lookup in the 2D image object specified by _image_. + + *read_imagei* and *read_imageui* return unnormalized signed integer + and unsigned integer values respectively. + Each channel will be stored in a 32-bit integer. + + *read_imagei* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_SIGNED_INT8`, + * `CL_SIGNED_INT16`, and + * `CL_SIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imagei* are undefined. + + *read_imageui* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_UNSIGNED_INT8`, + * `CL_UNSIGNED_INT16`, and + * `CL_UNSIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imageui* are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef(image2d_array_msaa_t image, + int4 coord, + int sample) +---- + | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image + identified by _coord.z_ in the 2D image array specified by _image_. + + *read_imagef* returns floating-point values in the range [0.0, 1.0] + for image objects created with _image_channel_data_type_ set to one of + the pre-defined packed formats or `CL_UNORM_INT8`, or + `CL_UNORM_INT16`. + + *read_imagef* returns floating-point values in the range [-1.0, 1.0] + for image objects created with _image_channel_data_type_ set to + `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imagef* returns floating-point values for image objects created + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +int4 read_imagei(image2d_array_msaa_t image, + int4 coord, + int sample) + +uint4 read_imageui(image2d_array_msaa_t image, + int4 coord, + int sample) +---- + | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image + identified by _coord.z_ in the 2D image array specified by _image_. + + *read_imagei* and *read_imageui* return unnormalized signed integer + and unsigned integer values respectively. + Each channel will be stored in a 32-bit integer. + + *read_imagei* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_SIGNED_INT8`, + * `CL_SIGNED_INT16`, and + * `CL_SIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imagei* are undefined. + + *read_imageui* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_UNSIGNED_INT8`, + * `CL_UNSIGNED_INT16`, and + * `CL_UNSIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imageui* are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +float read_imagef(image2d_msaa_depth_t image, + int2 coord, + int sample) +---- + | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element + lookup in the 2D depth image object specified by _image_. + + *read_imagef* returns a floating-point value in the range [0.0, 1.0] + for depth image objects created with _image_channel_data_type_ set to + `CL_UNORM_INT16` or `CL_UNORM_INT24`. + + *read_imagef* returns a floating-point value for depth image objects + created with _image_channel_data_type_ set to `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the + `<>` extension macro. +a| +[source,c] +---- +float read_imagef(image2d_array_msaaa_depth_t image, + int4 coord, + int sample) +---- + | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image + identified by _coord.z_ in the 2D depth image array specified by + _image_. + + *read_imagef* returns a floating-point value in the range [0.0, 1.0] + for depth image objects created with _image_channel_data_type_ set to + `CL_UNORM_INT16` or `CL_UNORM_INT24`. + + *read_imagef* returns a floating-point value for depth image objects + created with _image_channel_data_type_ set to `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + Note: When a multisample image is accessed in a kernel, the access + takes one vector of integers describing which pixel to fetch and an + integer corresponding to the sample numbers describing which sample + within the pixel to fetch. + _sample_ identifies the sample position in the multi-sample image. + + *For best performance, we recommend that _sample_ be a literal value + so it is known at compile time and the OpenCL compiler can perform + appropriate optimizations for multi-sample reads on the device*. + + No standard sampling instructions are allowed on the multisample + image. Accessing a coordinate outside the image and/or a sample that + is outside the number of samples associated with each pixel in the + image is undefined + + <> support for the + `<>` extension macro. +endif::cl_khr_gl_msaa_sharing[] |==== -- @@ -9395,18 +11725,39 @@ For samplerless read functions this may be `read_only` or `read_write`. [open,refpage='imageWriteFunctions',desc='Built-in Image Write Functions',type='freeform',spec='clang',anchor='built-in-image-write-functions',xrefs='imageQueryFunctions imageReadFunctions imageSamplerlessReadFunctions',alias='write_imagef write_imagei write_imageui'] -- - The following built-in function calls to write images are supported. _aQual_ in the following table refers to one of the access qualifiers. For write functions this may be `write_only` or `read_write`. +ifdef::cl_khr_mipmap_image_writes[] +If the `<>` extension macro is supported, write +functions which do not explicitly specify a level of detail _lod_ write to +mip level 0 if _image_ is a mipmapped image. +_mipwidth_, _mipheight_, and _mipdepth_ in the table refer to the width, +height, and depth of the _image_ mip level specified by _lod_ respectively; +_miplayers_ refers to the number of layers in _image_; and _miplevels_ +refers to the number of mip levels in _image_. +endif::cl_khr_mipmap_image_writes[] + +ifdef::cl_khr_srgb_image_writes[] +If the `<>` extension macro is supported, the +*write_imagef* functions described below may write to sRGB images. +Linear to sRGB conversion is performed by the function. +Only the R, G, and B components are converted from linear to sRGB; the A +component is written as-is. +endif::cl_khr_srgb_image_writes[] + + [[table-image-write]] .Built-in Image Write Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | void *write_imagef*(_aQual_ image2d_t _image_, int2 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image2d_t _image_, int2 _coord_, half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image2d_t _image_, int2 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image2d_t _image_, int2 _coord_, uint4 _color_) | Write _color_ value to location specified by _coord.xy_ in the 2D @@ -9417,7 +11768,9 @@ For write functions this may be `write_only` or `read_write`. and must be in the range [0, image width-1] and [0, image height-1] respectively. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9439,14 +11792,25 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above or with _x_ and _y_ coordinate values that are not in the range [0, image width-1] and [0, image height-1], respectively, is undefined. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] | | | void *write_imagef*(_aQual_ image2d_array_t _image_, int4 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image2d_array_t _image_, int4 _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image2d_array_t _image_, int4 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image2d_array_t _image_, int4 _coord_, @@ -9460,7 +11824,9 @@ For write functions this may be `write_only` or `read_write`. coordinates, and must be in the range [0, image width-1] and [0, image height-1], and [0, image number of layers-1], respectively. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9482,21 +11848,36 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above or with (_x_, _y_, _z_) coordinate values that are not in the range [0, image width-1], [0, image height-1], and [0, image number of layers-1], respectively, is undefined. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] | | | void *write_imagef*(_aQual_ image1d_t _image_, int _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image1d_t _image_, int _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image1d_t _image_, int _coord_, int4 _color_) + void *write_imageui*(_aQual_ image1d_t _image_, int _coord_, uint4 _color_) + void *write_imagef*(_aQual_ image1d_buffer_t _image_, int _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image1d_buffer_t _image_, int _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image1d_buffer_t _image_, int _coord_, int4 _color_) + void *write_imageui*(_aQual_ image1d_buffer_t _image_, int _coord_, @@ -9508,7 +11889,9 @@ For write functions this may be `write_only` or `read_write`. _coord_ is considered to be an unnormalized coordinate, and must be in the range [0, image width-1]. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9530,15 +11913,26 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above, or with a coordinate value that is not in the range [0, image width-1], is undefined. <> support for OpenCL C 1.2 or newer. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] | | | void *write_imagef*(_aQual_ image1d_array_t _image_, int2 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image1d_array_t _image_, int2 _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image1d_array_t _image_, int2 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image1d_array_t _image_, int2 _coord_, @@ -9551,7 +11945,9 @@ For write functions this may be `write_only` or `read_write`. and must be in the range [0, image width-1] and [0, image number of layers-1], respectively. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9573,7 +11969,9 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above or with (_x_, _y_) coordinate values that are not in the range [0, image width-1] and [0, image @@ -9604,8 +12002,8 @@ For write functions this may be `write_only` or `read_write`. values that are not in the range [0, image width-1] and [0, image height-1], respectively, is undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | void *write_imagef*(_aQual_ image2d_array_depth_t _image_, int4 _coord_, float _depth_) @@ -9631,25 +12029,31 @@ For write functions this may be `write_only` or `read_write`. values that are not in the range [0, image width-1], [0, image height-1], [0, image number of layers-1], respectively, is undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | void *write_imagef*(_aQual_ image3d_t _image_, int4 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image3d_t _image_, int4 _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image3d_t _image_, int4 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image3d_t _image_, int4 _coord_, uint4 _color_) - | Write color value to location specified by _coord.xyz_ in the 3D image - object specified by _image_. + | Write _color_ value to the location specified by _coord.xyz_ in the 3D + image object specified by _image_. Appropriate data format conversion to the specified image format is done before writing the color value. _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized coordinates, and must be in the range [0, image width-1], [0, image height-1], and [0, image depth-1], respectively. - *write_imagef* can only be used with image objects created with - image_channel_data_type set to one of the pre-defined packed formats + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. Appropriate data format conversion will be done to convert channel @@ -9657,28 +12061,225 @@ For write functions this may be `write_only` or `read_write`. channels are stored. *write_imagei* can only be used with image objects created with - image_channel_data_type set to one of the following values: + _image_channel_data_type_ set to one of the following values: `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + + `CL_SIGNED_INT16`, or + `CL_SIGNED_INT32`. *write_imageui* can only be used with image objects created with - image_channel_data_type set to one of the following values: + _image_channel_data_type_ set to one of the following values: `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + + `CL_UNSIGNED_INT16`, or + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects with _image_channel_data_type_ values not specified in the description above or with (_x_, _y_, _z_) coordinate values that are not in the range [0, image width-1], [0, image height-1], and [0, image depth-1], respectively, is undefined. <> support for OpenCL C 2.0, or OpenCL C 3.0 or - newer and the {opencl_c_3d_image_writes} feature, or the - `cl_khr_3d_image_writes` extension. + newer and the {c_3d_image_writes} feature, or the + `<>` extension. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] + +ifdef::cl_khr_mipmap_image_writes[] +a| +[source,opencl_c] +---- +void write_imagef( + write_only image2d_t image, + int2 coord, + int lod, + float4 color) + +void write_imagei( + write_only image2d_t image, + int2 coord, + int lod, + int4 color) + +void write_imageui( + write_only image2d_t image, + int2 coord, + int lod, + uint4 color) + +void write_imagef( + write_only image2d_depth_t image, + int2 coord, + int lod, + float depth) +---- + | Write _color_ value to location specified by _coord.xy_ in the mip + level specified by _lod_ in the 2D image object specified by _image_. + Appropriate data format conversion to the specified image format is + done before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_ and _coord.y_ are considered to be unnormalized coordinates + and must be in the range [0, _mipwidth_-1] and [0, _mipheight_-1] + respectively. + Behavior is undefined if _lod_, _coord.x_, or _coord.y_ is not in + range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image1d_t image, + int coord, + int lod, + float4 color) + +void write_imagei( + write_only image1d_t image, + int coord, + int lod, + int4 color) + +void write_imageui( + write_only image1d_t image, + int coord, + int lod, + uint4 color) +---- + | Write _color_ value to location specified by _coord_ in the mip level + specified by _lod_ in the 1D image object specified by _image_. + Appropriate data format conversion to the specified image format is + done before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord_ is considered to be an unnormalized coordinate and must be in + the range [0, _mipwidth_-1]. + Behavior is undefined if _lod_ or _coord_ is not in range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image1d_array_t image, + int2 coord, + int lod, + float4 color) + +void write_imagei( + write_only image1d_array_t image, + int2 coord, + int lod, + int4 color) + +void write_imageui( + write_only image1d_array_t image, + int2 coord, + int lod, + uint4 color) +---- + | Write _color_ value to location specified by _coord.x_ in the 1D image + identified by _coord.y_ and mip level _lod_ in the 1D image array + specified by _image_. + Appropriate data format conversion to the specified image format is done + before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_ and _coord.y_ are considered to be unnormalized coordinates + and must be in the range [0, _mipwidth_-1] and [0, _miplayers_ -1] + respectively. + Behavior is undefined if _lod_, _coord.x_, or _coord.y_ is not in range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image2d_array_t image, + int4 coord, + int lod, + float4 color) + +void write_imagei( + write_only image2d_array_t image, + int4 coord, + int lod, + int4 color) + +void write_imageui( + write_only image2d_array_t image, + int4 coord, + int lod, + uint4 color) + +void write_imagef( + write_only image2d_array_depth_t image, + int4 coord, + int lod, + float depth) +---- + | Write _color_ value to location specified by _coord.xy_ in the 2D image + identified by _coord.z_ and mip level _lod_ in the 2D image array + specified by _image_. + Appropriate data format conversion to the specified image format is done + before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized + coordinates and must be in the range [0, _mipwidth_-1], [0, + _mipheight_-1], and [0, _miplayers_-1] respectively. + Behavior is undefined if + _lod_, _coord.x_, _coord.y_, or _coord.z_ is not in range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image3d_t image, + int4 coord, + int lod, + float4 color) + +void write_imagei( + write_only image3d_t image, + int4 coord, + int lod, + int4 color) + +void write_imageui( + write_only image3d_t image, + int4 coord, + int lod, + uint4 color) +---- + | Write _color_ value to location specified by _coord.xyz_ and mip level + _lod_ in the 3D image object specified by _image_. + Appropriate data format conversion to the specified image format is done + before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized + coordinates and must be in the range [0, _mipwidth_-1], [0, + _mipheight_-1] and [0, _mipdepth_-1] respectively. + Behavior is undefined if _lod_, _coord.x_, _coord.y_, or _coord.z_ is + not in range. + + <> support for the + `<>` extension macro. +endif::cl_khr_mipmap_image_writes[] + |==== -- @@ -9697,55 +12298,87 @@ For query functions this may be `read_only`, `write_only` or `read_write`. [[table-image-query]] .Built-in Image Query Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *get_image_width*(_aQual_ image2d_t _image_) + - int *get_image_width*(_aQual_ image3d_t _image_) + + int *get_image_width*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: int *get_image_width*(_aQual_ image1d_t _image_) + int *get_image_width*(_aQual_ image1d_buffer_t _image_) + int *get_image_width*(_aQual_ image1d_array_t _image_) + - int *get_image_width*(_aQual_ image2d_array_t _image_) + + int *get_image_width*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_width*(_aQual_ image2d_depth_t _image_) + int *get_image_width*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_width*(_aQual_ image2d_msaa_t image) + + int *get_image_width*(_aQual_ image2d_array_msaa_t image) + + int *get_image_width*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_width*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the image width in pixels. + | int *get_image_height*(_aQual_ image2d_t _image_) + - int *get_image_height*(_aQual_ image3d_t _image_) + + int *get_image_height*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: - int *get_image_height*(_aQual_ image2d_array_t _image_) + + int *get_image_height*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_height*(_aQual_ image2d_depth_t _image_) + int *get_image_height*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_height*(_aQual_ image2d_msaa_t image) + + int *get_image_height*(_aQual_ image2d_array_msaa_t image) + + int *get_image_height*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_height*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the image height in pixels. + | int *get_image_depth*(image3d_t _image_) | Return the image depth in pixels. | | | int *get_image_channel_data_type*(_aQual_ image2d_t _image_) + - int *get_image_channel_data_type*(_aQual_ image3d_t _image_) + + int *get_image_channel_data_type*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: int *get_image_channel_data_type*(_aQual_ image1d_t _image_) + int *get_image_channel_data_type*(_aQual_ image1d_buffer_t _image_) + int *get_image_channel_data_type*(_aQual_ image2d_t _image_) + int *get_image_channel_data_type*(_aQual_ image3d_t _image_) + int *get_image_channel_data_type*(_aQual_ image1d_array_t _image_) + - int *get_image_channel_data_type*(_aQual_ image2d_array_t _image_) + + int *get_image_channel_data_type*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_channel_data_type*(_aQual_ image2d_depth_t _image_) + int *get_image_channel_data_type*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_channel_data_type*(_aQual_ image2d_msaa_t image) + + int *get_image_channel_data_type*(_aQual_ image2d_array_msaa_t image) + + int *get_image_channel_data_type*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_channel_data_type*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the channel data type. Valid values are: `CLK_SNORM_INT8` + @@ -9764,23 +12397,34 @@ For query functions this may be `read_only`, `write_only` or `read_write`. `CLK_HALF_FLOAT` + `CLK_FLOAT` + - Additionally, for OpenCL C 3.0 or newer: + + Additionally, for OpenCL C 3.0 or newer: `CLK_UNORM_INT_101010_2` footnote:[{fn-CLK_UNORM_INT_101010_2}] + | int *get_image_channel_order*(_aQual_ image2d_t _image_) + - int *get_image_channel_order*(_aQual_ image3d_t _image_) + + int *get_image_channel_order*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: int *get_image_channel_order*(_aQual_ image1d_t _image_) + int *get_image_channel_order*(_aQual_ image1d_buffer_t _image_) + int *get_image_channel_order*(_aQual_ image1d_array_t _image_) + - int *get_image_channel_order*(_aQual_ image2d_array_t _image_) + + int *get_image_channel_order*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_channel_order*(_aQual_ image2d_depth_t _image_) + int *get_image_channel_order*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_channel_order*(_aQual_ image2d_msaa_t image) + + int *get_image_channel_order*(_aQual_ image2d_array_msaa_t image) + + int *get_image_channel_order*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_channel_order*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the image channel order. Valid values are: `CLK_A` + @@ -9792,15 +12436,15 @@ For query functions this may be `read_only`, `write_only` or `read_write`. `CLK_ARGB` + `CLK_BGRA` + `CLK_INTENSITY` + - `CLK_LUMINANCE` + + `CLK_LUMINANCE` - Additionally, for OpenCL C 1.1 or newer: + + Additionally, for OpenCL C 1.1 or newer: `CLK_Rx` + `CLK_RGx` + - `CLK_RGBx` + + `CLK_RGBx` - Additionally, for OpenCL C 2.0 or newer: + + Additionally, for OpenCL C 2.0 or newer: `CLK_ABGR` + `CLK_DEPTH` + @@ -9808,37 +12452,82 @@ For query functions this may be `read_only`, `write_only` or `read_write`. `CLK_sRGBx` + `CLK_sRGBA` + `CLK_sBGRA` + | | -| int2 *get_image_dim*(_aQual_ image2d_t _image_) + +| int2 *get_image_dim*(_aQual_ image2d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: - int2 *get_image_dim*(_aQual_ image2d_array_t _image_) + + int2 *get_image_dim*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int2 *get_image_dim*(_aQual_ image2d_depth_t _image_) + int2 *get_image_dim*(_aQual_ image2d_array_depth_t _image_) - | Return the 2D image width and height as an int2 type. + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int2 *get_image_dim*(_aQual_ image2d_msaa_t image) + + int2 *get_image_dim*(_aQual_ image2d_array_msaa_t image) + + int2 *get_image_dim*(_aQual_ image2d_msaa_depth_t image) + + int2 *get_image_dim*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] + | Return the 2D image width and height as an `int2` type. The width is returned in the _x_ component, and the height in the _y_ component. + | int4 *get_image_dim*(_aQual_ image3d_t _image_) | Return the 3D image width, height, and depth as an `int4` type. The width is returned in the _x_ component, height in the _y_ component, depth in the _z_ component and the _w_ component is 0. | | -| For OpenCL C 1.2 or newer: + +| For OpenCL C 1.2 or newer: - size_t *get_image_array_size*(_aQual_ image2d_array_t _image_) + + size_t *get_image_array_size*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: size_t *get_image_array_size*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + size_t *get_image_array_size*(_aQual_ image2d_array_msaa_depth_t _image_) +endif::cl_khr_gl_msaa_sharing[] | Return the number of images in the 2D image array. -| For OpenCL C 1.2 or newer: + + +| For OpenCL C 1.2 or newer: size_t *get_image_array_size*(_aQual_ image1d_array_t _image_) | Return the number of images in the 1D image array. + +ifdef::cl_khr_gl_msaa_sharing[] +| If the `<>` extension macro is supported: + + int *get_image_num_samples*(_aQual_ image2d_msaa_t _image_) + + int *get_image_num_samples*(_aQual_ image2d_array_msaa_t _image_) + + int *get_image_num_samples*(_aQual_ image2d_msaa_depth_t _image_) + + int *get_image_num_samples*(_aQual_ image2d_array_msaa_depth_t _image_) + | Return the number of samples in the 2D MSAA image +endif::cl_khr_gl_msaa_sharing[] + +ifdef::cl_khr_mipmap_image[] +| If the `<>` extension macro is supported: + + int *get_image_num_mip_levels*(_aQual_ image1d_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image3d_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image1d_array_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_array_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_depth_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_array_depth_t _image_) + + | Return the number of mip levels in _image_. +endif::cl_khr_mipmap_image[] + |==== The values returned by *get_image_channel_data_type* and @@ -9853,7 +12542,7 @@ channel data type that is an unnormalized unsigned 8-bit integer. [[reading-and-writing-to-the-same-image-in-a-kernel]] -==== Reading and writing to the same image in a kernel +==== Reading and Writing to the Same Image in a Kernel The *atomic_work_item_fence*(`CLK_IMAGE_MEM_FENCE`) built-in function can be used to make sure that sampler-less writes are visible to later reads by the @@ -9894,7 +12583,7 @@ foo(read_write image2d_t img, ... ) [[mapping-image-channels-to-color-values-returned-by-read_image-and-color-values-passed-to-write_image-to-image-channels]] -==== Mapping image channels to color values returned by read_image and color values passed to write_image to image channels +==== Mapping Image Channels to Color Values Returned by read_image and Color Values Passed to write_image to Image Channels The following table describes the mapping of the number of channels of an image element to the appropriate components in the `float4`, `int4` or @@ -9903,9 +12592,9 @@ image element to the appropriate components in the `float4`, `int4` or The unmapped components will be set to 0.0 for red, green and blue channels and will be set to 1.0 for the alpha channel. -[cols=",",] +[cols=",",options="header",] |==== -| *Channel Order* | `float4`, `int4` or `uint4` *components of channel data* +| Channel Order | `float4`, `int4` or `uint4` components of channel data | `CL_R`, `CL_Rx` | (r, 0.0, 0.0, 1.0) | `CL_A` | (0.0, 0.0, 0.0, a) | `CL_RG`, `CL_RGx` | (r, g, 0.0, 1.0) @@ -9920,8 +12609,8 @@ and will be set to 1.0 for the alpha channel. For `CL_DEPTH` images, a scalar value is returned by *read_imagef* or supplied to *write_imagef*. -<> support for OpenCL C 2.0 or newer, also see -`cl_khr_depth_images` extension. +<> support for OpenCL C 2.0 or newer, or for +the `<>` extension macro. [NOTE] ==== @@ -9945,7 +12634,6 @@ support will result in a `CL_OUT_OF_RESOURCES` error being returned. [open,refpage='workGroupFunctions',desc='Work-group Collective Functions',type='freeform',spec='clang',anchor='work-group-functions',xrefs='',alias='work_group_all work_group_any work_group_broadcast work_group_reduce work_group_scan_exclusive work_group_scan_inclusive'] -- - NOTE: The functionality described in this section <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_work_group_collective_functions} feature. @@ -9961,9 +12649,9 @@ footnote:[{fn-double-supported}] as the type for the arguments. [[table-builtin-work-group]] .Built-in Work-group Collective Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *work_group_all*(int _predicate_) | Evaluates _predicate_ for all work-items in the work-group and returns a non-zero value if _predicate_ evaluates to non-zero for all @@ -10056,6 +12744,161 @@ given work-group. -- +ifdef::cl_khr_work_group_uniform_arithmetic[] +[[work-group-collective-uniform-arithmetic-functions]] +=== Work-group Collective Uniform Arithmetic Functions + +[open,refpage='workGroupUniformArithmeticFunctions',desc='Work-group Collective Uniform Arithmetic Functions',type='freeform',spec='clang',anchor='work-group-collective-uniform-arithmetic-functions',xrefs='workGroupFunctions',alias='work_group_all work_group_any work_group_broadcast work_group_reduce work_group_scan_exclusive work_group_scan_inclusive'] +-- +NOTE: The functionality described in this section <> +support for OpenCL C 2.0 and the `<>` +extension macro. + +The <> table describes the OpenCL C +programming language built-in functions that perform logical arithmetic +operations across work items in a work-group. +These functions must be encountered by all work items in a work-group +executing the kernel, otherwise the behavior is undefined. +For these functions, a non-zero _predicate_ argument or return value is +logically `true` and a zero _predicate_ argument or return value is +logically `false`. + +[[table-builtin-work-group-logical]] +.Built-in Work-group Logical Arithmetic Functions +[cols="2a,1",options="header"] +|==== +| Function | Description +|[source,opencl_c] +---- +int work_group_reduce_logical_and(int predicate); +int work_group_reduce_logical_or(int predicate); +int work_group_reduce_logical_xor(int predicate); +---- + | Returns the logical *and*, *or*, or *xor* of _predicate_ for all work + items in the work-group. +|[source,opencl_c] +---- +int work_group_scan_inclusive_logical_and(int predicate); +int work_group_scan_inclusive_logical_or(int predicate); +int work_group_scan_inclusive_logical_xor(int predicate); +---- + | Returns the result of an inclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all work items in the + work-group with a work-group linear local ID less than or equal to this + work item's work-group linear local ID. +|[source,opencl_c] +---- +int work_group_scan_exclusive_logical_and(int predicate); +int work_group_scan_exclusive_logical_or(int predicate); +int work_group_scan_exclusive_logical_xor(int predicate); +---- + | Returns the result of an exclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all work items in the + work-group with a work-group linear local ID less than this work item's + work-group linear local ID. + + If there is no work item in the work-group with a work-group linear + local ID less than this work item's work-group linear local ID then an + identity value `I` is returned. + For *and*, the identity value is `true` (non-zero). + For *or* and *xor*, the identity value is `false` (zero). +|==== + +The <> table describes the OpenCL +C programming language built-in functions that perform bitwise integer +operations across work items in a work-group. +These functions must be encountered by all work items in a work-group +executing the kernel, otherwise the behavior is undefined. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `int`, `uint`, `long`, and `ulong`. + +[[table-builtin-work-group-bitwise-integer]] +.Built-in Work-group Bitwise Integer Functions +[cols="2a,1",options="header"] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype work_group_reduce_and(gentype value); +gentype work_group_reduce_or(gentype value); +gentype work_group_reduce_xor(gentype value); +---- + | Returns the bitwise *and*, *or*, or *xor* of _value_ for all work items + in the work-group. +|[source,opencl_c] +---- +gentype work_group_scan_inclusive_and(gentype value); +gentype work_group_scan_inclusive_or(gentype value); +gentype work_group_scan_inclusive_xor(gentype value); +---- + | Returns the result of an inclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all work items in the work-group + with a work-group linear local ID less than or equal to this work item's + work-group linear local ID. +|[source,opencl_c] +---- +gentype work_group_scan_exclusive_and(gentype value); +gentype work_group_scan_exclusive_or(gentype value); +gentype work_group_scan_exclusive_xor(gentype value); +---- + | Returns the result of an exclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all work items in the work-group + with a work-group linear local ID less than this work item's work-group + linear local ID. + + If there is no work item in the work-group with a work-group linear + local ID less than this work item's work-group linear local ID then an + identity value `I` is returned. + For *and*, the identity value is `~0` (all bits set). + For *or* and *xor*, the identity value is `0`. +|==== + +The <> table describes the OpenCL C +programming language built-in functions that perform multiplicative +operations across work items in a work-group. +These functions must be encountered by all work items in a work-group +executing the kernel, otherwise the behavior is undefined. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `int`, `uint`, `long`, `ulong`, +`float`, `double` (if double precision is supported), or `half` (if half +precision is supported). + +[[table-builtin-work-group-multiplicative]] +.Built-in Work-group Multiplicative Functions +[cols="2a,1",options="header"] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype work_group_reduce_mul(gentype value); +---- + | Returns the multiplication of _value_ for all work items in the + work-group. +|[source,opencl_c] +---- +gentype work_group_scan_inclusive_mul(gentype value); +---- + | Returns the result of an inclusive scan operation which is the + multiplication of _value_ for all work items in the work-group with a + work-group linear local ID less than or equal to this work item's + work-group linear local ID. +|[source,opencl_c] +---- +gentype work_group_scan_exclusive_mul(gentype value); +---- + | Returns the result of an exclusive scan operation which is the + multiplication of _value_ for all work items in the work-group with a + work-group linear local ID less than this work item's work-group linear + local ID. + + If there is no work item in the work-group with a work-group linear + local ID less than this work item's work-group linear local ID then the + identity value `1` is returned. +|==== +-- +endif::cl_khr_work_group_uniform_arithmetic[] + + [[pipe-functions]] === Pipe Functions @@ -10135,9 +12978,9 @@ pipe functions listed in the following table. [[table-builtin-pipe]] .Built-in Pipe Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *read_pipe*(read_only pipe gentype _p_, gentype *_ptr_) | Read packet from pipe _p_ into _ptr_. Returns 0 if *read_pipe* is successful and a negative value if the @@ -10205,9 +13048,9 @@ pipe functions listed in the following table. [[table-builtin-pipe-work-group]] .Built-in Pipe Work-group Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | reserve_id_t *work_group_reserve_read_pipe*(read_only pipe gentype _p_, uint _num_packets_) + reserve_id_t *work_group_reserve_write_pipe*(write_only pipe gentype _p_, @@ -10279,9 +13122,9 @@ For pipe query functions this may be `read_only` or `write_only`. [[table-builtin-pipe-query]] .Built-in Pipe Query Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | uint *get_pipe_num_packets*(_aQual_ pipe gentype _p_) | Returns the number of available entries in the pipe. The number of available entries in a pipe is a dynamic value. @@ -10343,33 +13186,54 @@ single semantic step. The following table describes the list of built-in functions that can be used to enqueue a kernel(s). +ifdef::cl_khr_device_enqueue_local_arg_types[] +When the `<>` extension macro is +supported, the <> and <> described in this section can use any of the built-in OpenCL C +scalar or vector integer or floating-point data types, or any user defined +type built from these scalar and vector data types, as the pointee type of +their arguments. +This is indicated by the generic type name `gentype` in those function +signatures. + +When the `<>` extension macro is +not supported, the pointee type of these functions must be `void`. + +:localArgType: gentype +endif::cl_khr_device_enqueue_local_arg_types[] + +ifndef::cl_khr_device_enqueue_local_arg_types[] +:localArgType: void +endif::cl_khr_device_enqueue_local_arg_types[] + The macro `CLK_NULL_EVENT` refers to an invalid device event. The macro `CLK_NULL_QUEUE` refers to an invalid device queue. -- [[built-in-functions-enqueuing-a-kernel]] -==== Built-in Functions - Enqueuing a kernel +==== Built-in Functions - Enqueuing a Kernel [[table-builtin-kernel-enqueue]] .Built-in Kernel Enqueue Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, void (^__block__)(void)) + + const ndrange_t _ndrange_, void (^__block__)(void)) + int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, uint _num_events_in_wait_list_, - const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, - void (^__block__)(void)) + + const ndrange_t _ndrange_, uint _num_events_in_wait_list_, + const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, + void (^__block__)(void)) + int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, void (^__block__)(local void *, ...), - uint size0, ...) + + const ndrange_t _ndrange_, void (^__block__)(local {localArgType} *, ...), + uint size0, ...) + int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, uint _num_events_in_wait_list_, - const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, - void (^__block__)(local void *, ...), uint size0, ...) + const ndrange_t _ndrange_, uint _num_events_in_wait_list_, + const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, + void (^__block__)(local {localArgType} *, ...), uint size0, ...) | Enqueue the block for execution to _queue_. If an event is returned, *enqueue_kernel* performs an implicit retain @@ -10510,7 +13374,7 @@ foo(global int *a, local int *lptr, ...) [[arguments-that-are-a-pointer-type-to-local-address-space]] -==== Arguments that are a pointer type to local address space +==== Arguments That are a Pointer Type to Local Address Space A block passed to enqueue_kernel can have arguments declared to be a pointer to `local` memory. @@ -10645,18 +13509,19 @@ evaluate_dp_work_A(queue_t q,...) [[determining-when-a-child-kernel-begins-execution]] -==== Determining when a child kernel begins execution +==== Determining when a Child Kernel Begins Execution The `kernel_enqueue_flags_t` footnote:[{fn-dse-kernel_enqueue_flags_t}] argument to the `enqueue_kernel` built-in functions can be used to specify when the child kernel begins execution. -Supported values are described in the table below: +Supported values are described in the <>: [[table-kernel-enqueue-flags]] .Kernel Enqueue Flags -[cols=",",] +[cols=",",options="header",] |==== -| `kernel_enqueue_flags_t` *enum* | *Description* +| `kernel_enqueue_flags_t` enum | Description | `CLK_ENQUEUE_FLAGS_NO_WAIT` | Indicates that the enqueued kernels do not need to wait for the parent kernel to finish execution before they begin execution. @@ -10681,7 +13546,7 @@ child kernels can begin execution. [[determining-when-a-parent-kernel-has-finished-execution]] -==== Determining when a parent kernel has finished execution +==== Determining When a Parent Kernel has Finished Execution A parent kernel's execution status is considered to be complete when it and all its child kernels have finished execution. @@ -10708,24 +13573,27 @@ execution. [[built-in-functions-kernel-query-functions]] ==== Built-in Functions - Kernel Query Functions +// Note: the Unicode "zero width space" (​) is used in some places to +// cause long function names to break much more sensibly. +// Probably the asciidoc built-in {zwsp} should be used instead. + [open,refpage='kernelQueryFunctions',desc='Built-in Functions - Kernel Query Functions',type='freeform',spec='clang',anchor='built-in-functions-kernel-query-functions',xrefs='enqueue_kernel',alias='get_kernel_preferred get_kernel_work_group_size'] -- - [[table-builtin-kernel-query]] .Built-in Kernel Query Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | uint *get_kernel_work_group_size*(void (^block)(void)) + - uint *get_kernel_work_group_size*(void (^block)(local void *, ...)) + uint *get_kernel_work_group_size*(void (^block)(local {localArgType} *, ...)) | This provides a mechanism to query the maximum work-group size that can be used to execute a block on a specific device given by _device_. _block_ specifies the block to be enqueued. -| uint *get_kernel_preferred_* *work_group_size_multiple*( +| uint *get_kernel_preferred_​work_group_size_multiple*( void (^block)(void)) + - uint *get_kernel_preferred_* *work_group_size_multiple*( - void (^block)(local void *, ...)) + uint *get_kernel_preferred_​work_group_size_multiple*( + void (^block)(local {localArgType} *, ...)) | Returns the preferred multiple of work-group size for launch. This is a performance hint. Specifying a work-group size that is not a multiple of the value @@ -10737,7 +13605,7 @@ execution. [[built-in-functions-queuing-other-commands]] -==== Built-in Functions - Queuing other commands +==== Built-in Functions - Queuing Other Commands [open,refpage='enqueue_marker',desc='Built-in Functions - Queuing Other Commands',type='freeform',spec='clang',anchor='built-in-functions-queuing-other-commands',xrefs='enqueue_kernel'] -- @@ -10747,9 +13615,9 @@ used to enqueue commands such as a marker. [[table-builtin-other-enqueue]] .Built-in Other Enqueue Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | int *enqueue_marker*(queue_t _queue_, uint _num_events_in_wait_list_, const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_) | Enqueue a marker command to _queue_. @@ -10794,9 +13662,9 @@ events. [[table-builtin-event]] .Built-in Event Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | void *retain_event*(clk_event_t _event_) | Increments the event reference count. @@ -10969,9 +13837,9 @@ foo(queue_t q, ...) [[table-builtin-helper]] .Built-in Helper Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | queue_t *get_default_queue*(void) | Returns the default device queue. If a default device queue has not been created, `CLK_NULL_QUEUE` is @@ -10998,27 +13866,43 @@ foo(queue_t q, ...) |==== -- + [[sub-group-functions]] -=== Sub-group Functions +=== Sub-Group Functions -[open,refpage='subGroupFunctions',desc='Sub-group Functions',type='freeform',spec='clang',anchor='sub-group-functions',xrefs='',alias='sub_group_all sub_group_any sub_group_broadcast sub_group_reduce sub_group_scan_exclusive sub_group_scan_inclusive sub_group_reserve_read_pipe sub_gorup_reserve_write_pipe sub_group_commit_read_pipe sub_group_commit_write_pipe get_kernel_sub_group_count_for_ndrange get_kernel_max_sub_group_size_for_ndrange'] +[open,refpage='subGroupFunctions',desc='Sub-Group Functions',type='freeform',spec='clang',anchor='sub-group-functions',xrefs='',alias='sub_group_all sub_group_any sub_group_broadcast sub_group_reduce sub_group_scan_exclusive sub_group_scan_inclusive sub_group_reserve_read_pipe sub_gorup_reserve_write_pipe sub_group_commit_read_pipe sub_group_commit_write_pipe get_kernel_sub_group_count_for_ndrange get_kernel_max_sub_group_size_for_ndrange'] -- - NOTE: The functionality described in this section <> -support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. - -The table below describes OpenCL C programming language built-in functions that operate on a sub-group level. -These built-in functions must be encountered by all work-items in the sub-group executing the kernel. +support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. + +The <> describes OpenCL C +programming language built-in functions that operate on a sub-group level. +These built-in functions must be encountered by all work-items in the +sub-group executing the kernel. For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `int`, `uint`, `long` -footnote:[{fn-int64-supported}], `ulong`, `half` footnote:[{fn-half-supported}], -`float`, and `double` footnote:[{fn-double-supported}]. - -.Built-in Sub-group Collective Functions +footnote:[{fn-int64-supported}], `ulong`, `half` +footnote:[{fn-half-supported}], `float`, and `double` +footnote:[{fn-double-supported}]. + +ifdef::cl_khr_subgroup_extended_types[] +NOTE: If the `<>` extension is supported, +the supported `gentype`s also include `char`, `uchar`, `short`, and +`ushort`. +For the `sub_group_broadcast` function, `gentype` may additionally be one of +the supported built-in vector data types `char__n__`, `uchar__n__`, +`short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, +`ulong__n__`, `float__n__`, `half__n__` footnote:[{fn-half-supported}], or +`double__n__` footnote:[{fn-double-supported}] +endif::cl_khr_subgroup_extended_types[] + +[[table-collective-functions]] +.Built-in Sub-Group Collective Functions [cols=",",options="header",] |==== -| *Function* -| *Description* +| Function | Description | int *sub_group_all* (int _predicate_) | Evaluates _predicate_ for all work-items in the sub-group and returns a @@ -11085,11 +13969,13 @@ The order of these floating-point operations is also non-deterministic for a giv ==== NOTE: The functionality described in the following table <> support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} -and {opencl_c_pipes} features. +requires>> support +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} and {opencl_c_pipes} +features. -The following table describes built-in pipe functions that operate at a -sub-group level. +The <> describes built-in pipe +functions that operate at a sub-group level. These built-in functions must be encountered by all work-items in a sub-group executing the kernel with the same argument values, otherwise the behavior is undefined. @@ -11098,11 +13984,11 @@ scalar or vector integer or floating-point data types or any user defined type built from these scalar and vector data types can be used as the type for the arguments to the pipe functions listed in _table 6.29_. -.Built-in Sub-group Pipe Functions +[[table-pipe-functions]] +.Built-in Sub-Group Pipe Functions [cols=",",options="header",] |==== -| *Function* -| *Description* +| Function | Description | reserve_id_t *sub_group_reserve_read_pipe* ( + read_only pipe gentype _pipe_, + @@ -11138,17 +14024,19 @@ The order of sub-group based reservations that belong to different work groups is implementation-defined. NOTE: The functionality described in the following table <> support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} -and {opencl_c_device_enqueue} features. +requires>> support +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} and +{opencl_c_device_enqueue} features. -The following table describes built-in functions to query sub-group -information for a block to be enqueued. +The <> describes built-in +functions to query sub-group information for a block to be enqueued. -.Built-in Sub-group Kernel Query Functions +[[table-kernel-query-functions]] +.Built-in Sub-Group Kernel Query Functions [cols="5,4",options="header",] |==== -| *Built-in Function* -| *Description* +| Built-in Function | Description | uint *get_kernel_sub_group_count_for_ndrange* ( + const ndrange_t _ndrange_, + @@ -11175,6 +14063,1164 @@ information for a block to be enqueued. |==== -- + +ifdef::cl_khr_subgroup_ballot[] +[[sub-group-ballot-functions]] +==== Built-in Sub-Group Ballot Functions + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes OpenCL C +programming language built-in functions to allow work items in a sub-group +to collect and operate on ballots from work items in the sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. + +For the `sub_group_non_uniform_broadcast` and `sub_group_broadcast_first` +functions, the generic type name `gentype` may be one of the supported +built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, +`uint`, `long`, `ulong`, `float`, `half` footnote:[{fn-half-supported}], and +`double` footnote:[{fn-double-supported}]. + +For the `sub_group_non_uniform_broadcast` function, the generic type name +`gentype` may additionally be one of the supported built-in vector data +types `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, +`uint__n__`, `long__n__`, `ulong__n__`, `float__n__`, `half__n__` +footnote:[{fn-half-supported}], or `double__n__` +footnote:[{fn-double-supported}]. + +[[table-ballot-functions]] +.Built-in Sub-Group Ballot Functions +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_non_uniform_broadcast( + gentype value, + uint index ) +---- + | Returns _value_ for the work item with sub-group local ID equal to + _index_. + + Behavior is undefined when the value of _index_ is not equivalent for + all active work items in the sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to _index_ is inactive or if _index_ is greater than or equal to + the size of the sub-group. +|[source,opencl_c] +---- +gentype sub_group_broadcast_first( + gentype value ) +---- + | Returns _value_ for the work item with the smallest sub-group local ID + among active work items in the sub-group. +|[source,opencl_c] +---- +uint4 sub_group_ballot( + int predicate ) +---- + | Returns a bitfield combining the _predicate_ values from all work items + in the sub-group. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. + The representative bit in the bitfield is set if the work item is active + and the _predicate_ is non-zero, and is unset otherwise. +|[source,opencl_c] +---- +int sub_group_inverse_ballot( + uint4 value ) +---- + | Returns the predicate value for this work item in the sub-group from the + bitfield _value_ representing predicate values from all work items in + the sub-group. + The predicate return value will be non-zero if the bit in the bitfield + _value_ for this work item is set, and zero otherwise. + + Behavior is undefined when _value_ is not equivalent for all active work + items in the sub-group. + + This is a specialized function that may perform better than the + equivalent `sub_group_ballot_bit_extract` on some implementations. +|[source,opencl_c] +---- +int sub_group_ballot_bit_extract( + uint4 value, + uint index ) +---- + | Returns the predicate value for the work item with sub-group local ID + equal to _index_ from the bitfield _value_ representing predicate values + from all work items in the sub-group. + The predicate return value will be non-zero if the bit in the bitfield + _value_ for the work item with sub-group local ID equal to _index_ is + set, and zero otherwise. + + The predicate return value is undefined if the work item with sub-group + local ID equal to _index_ is greater than or equal to the size of the + sub-group. +|[source,opencl_c] +---- +uint sub_group_ballot_bit_count( + uint4 value ) +---- + | Returns the number of bits that are set in the bitfield _value_, only + considering the bits in _value_ that represent predicate values + corresponding to sub-group local IDs less than the maximum sub-group + size within the dispatch (as returned by `get_max_sub_group_size`). +|[source,opencl_c] +---- +uint sub_group_ballot_inclusive_scan( + uint4 value ) +---- + | Returns the number of bits that are set in the bitfield _value_, only + considering the bits in _value_ representing work items with a sub-group + local ID less than or equal to this work item's sub-group local ID. +|[source,opencl_c] +---- +uint sub_group_ballot_exclusive_scan( + uint4 value ) +---- + | Returns the number of bits that are set in the bitfield _value_, only + considering the bits in _value_ representing work items with a sub-group + local ID less than this work item's sub-group local ID. +|[source,opencl_c] +---- +uint sub_group_ballot_find_lsb( + uint4 value ) +---- + | Returns the smallest sub-group local ID with a bit set in the bitfield + _value_, only considering the bits in _value_ that represent predicate + values corresponding to sub-group local IDs less than the maximum + sub-group size within the dispatch (as returned by + `get_max_sub_group_size`). + If no bits representing predicate values from all work items in the + sub-group are set in the bitfield _value_ then the return value is + undefined. +|[source,opencl_c] +---- +uint sub_group_ballot_find_msb( + uint4 value ) +---- + | Returns the largest sub-group local ID with a bit set in the bitfield + _value_, only considering the bits in _value_ that represent predicate + values corresponding to sub-group local IDs less than the maximum + sub-group size within the dispatch (as returned by + `get_max_sub_group_size`). + If no bits representing predicate values from all work items in the + sub-group are set in the bitfield _value_ then the return value is + undefined. +|[source,opencl_c] +---- +uint4 get_sub_group_eq_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + equals the sub-group local ID and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_ge_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is greater than or equal to the sub-group local ID and less than the + maximum sub-group size, and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_gt_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is greater than the sub-group local ID and less than the maximum + sub-group size, and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_le_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is less than or equal to the sub-group local ID and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_lt_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is less than the sub-group local ID and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|==== + +endif::cl_khr_subgroup_ballot[] + + +ifdef::cl_khr_subgroup_clustered_reduce[] +[[sub-group-clustered-reduction-functions]] +==== Built-in Clustered Reduction Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +This section describes arithmetic operations that are performed on a subset +of work items in a sub-group, referred to as a cluster. +A cluster is described by a specified cluster size. +Work items in a sub-group are assigned to clusters such that for cluster +size _n_, the _n_ work items in the sub-group with the smallest sub-group +local IDs are assigned to the first cluster, then the _n_ remaining work +items with the smallest sub-group local IDs are assigned to the next +cluster, and so on. +Behavior is undefined if the specified cluster size is not an integer +constant expression, is not a power-of-two, or is greater than the maximum +size of a sub-group within the dispatch. + + +===== Arithmetic Operations + +The table below describes the OpenCL C programming language built-in +functions that perform simple arithmetic operations on a cluster of work +items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-clustered-reduce-math-functions]] +.Built-in Arithmetic Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_clustered_reduce_add( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_mul( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_min( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_max( + gentype value, uint clustersize ) +---- + | Returns the summation, multiplication, minimum, or maximum of _value_ + for all active work items in the sub-group within a cluster of the + specified _clustersize_. +|==== + +Note: The order of floating-point operations is not guaranteed for the +sub-group clustered reduction built-in functions that operate on +floating-point types, and the order of operations may additionally be +non-deterministic for a given sub-group. + + +===== Bitwise Operations + +The table below describes the OpenCL C programming language built-in +functions to perform simple bitwise integer operations across a cluster of +work items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be the one of +the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, or `ulong`. + +[[table-clustered-reduce-bitwise-functions]] +.Built-in Bitwise Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_clustered_reduce_and( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_or( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_xor( + gentype value, uint clustersize ) +---- + | Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work + items in the sub-group within a cluster of the specified _clustersize_. +|==== + + +===== Logical Operations + +The table below describes the OpenCL C programming language built-in +functions to perform simple logical operations across a cluster of work +items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For these functions, a non-zero _predicate_ argument or return value is +logically `true` and a zero _predicate_ argument or return value is +logically `false`. + +[[table-clustered-reduce-logical-functions]] +.Built-in Logical Functions for Sub-Groups +[cols="3a,2",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +int sub_group_clustered_reduce_logical_and( + int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_or( + int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_xor( + int predicate, uint clustersize ) +---- + | Returns the logical *and*, *or*, or *xor* of _predicate_ for all active + work items in the sub-group within a cluster of the specified + _clustersize_. +|==== + +endif::cl_khr_subgroup_clustered_reduce[] + + +ifdef::cl_khr_subgroup_non_uniform_arithmetic[] +==== Built-in Non-Uniform Scan and Reduction Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +===== Arithmetic Operations + +The <> describes the +OpenCL C programming language built-in functions that perform simple +arithmetic operations across work items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-non-uniform-math-functions]] +.Built-in Non-Uniform Arithmetic Functions for Sub-Groups +[cols="3a,2",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_non_uniform_reduce_add( + gentype value ) +gentype sub_group_non_uniform_reduce_min( + gentype value ) +gentype sub_group_non_uniform_reduce_max( + gentype value ) +gentype sub_group_non_uniform_reduce_mul( + gentype value ) +---- + | Returns the summation, multiplication, minimum, or maximum of _value_ + for all active work items in the sub-group. + + Note: This behavior is the same as the *add*, *min*, and *max* reduction + built-in functions from `<>` and OpenCL 2.1, except + these functions support additional types and need not be encountered by + all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_inclusive_add( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_min( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_max( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_mul( + gentype value ) +---- + | Returns the result of an inclusive scan operation, which is the + summation, multiplication, minimum, or maximum of _value_ for all active + work items in the sub-group with a sub-group local ID less than or equal + to this work item's sub-group local ID. + + Note: This behavior is the same as the *add*, *min*, and *max* inclusive + scan built-in functions from `<>` and OpenCL 2.1, + except these functions support additional types and need not be + encountered by all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_exclusive_add( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_min( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_max( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_mul( + gentype value ) +---- + | Returns the result of an exclusive scan operation, which is the + summation, multiplication, minimum, or maximum of _value_ for all active + work items in the sub-group with a sub-group local ID less than this + work item's sub-group local ID. + + If there is no active work item in the sub-group with a sub-group local + ID less than this work item's sub-group local ID then an identity value + `I` is returned. + For *add*, the identity value is `0`. + For *min*, the identity value is the largest representable value for + integer types, or `+INF` for floating-point types. + For *max*, the identity value is the minimum representable value for + integer types, or `-INF` for floating-point types. + For *mul*, the identity value is `1`. + + Note: This behavior is the same as the *add*, *min*, and *max* exclusive + scan built-in functions from `<>` and OpenCL 2.1, + except these functions support additional types and need not be + encountered by all work items in the sub-group executing the kernel. +|==== + +Note: The order of floating-point operations is not guaranteed for the +sub-group scan and reduction built-in functions that operate on +floating-point types, and the order of operations may additionally be +non-deterministic for a given sub-group. + + +===== Bitwise Operations + +The table below describes the OpenCL C programming language built-in +functions that perform simple bitwise integer operations across work items +in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, and `ulong`. + +[[table-non-uniform-bitwise-functions]] +.Built-in Non-Uniform Bitwise Functions for Sub-Groups +[cols="3a,2",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_non_uniform_reduce_and( + gentype value ) +gentype sub_group_non_uniform_reduce_or( + gentype value ) +gentype sub_group_non_uniform_reduce_xor( + gentype value ) +---- + | Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work + items in the sub-group. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_inclusive_and( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_or( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_xor( + gentype value ) +---- + | Returns the result of an inclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all active work items in the + sub-group with a sub-group local ID less than or equal to this work + item's sub-group local ID. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_exclusive_and( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_or( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_xor( + gentype value ) +---- + | Returns the result of an exclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all active work items in the + sub-group with a sub-group local ID less than this work item's sub-group + local ID. + + If there is no active work item in the sub-group with a sub-group local + ID less than this work item's sub-group local ID then an identity value + `I` is returned. + For *and*, the identity value is `~0` (all bits set). + For *or* and *xor*, the identity value is `0`. +|==== + + +===== Logical Operations + +The table below describes the OpenCL C programming language built-in +functions that perform simple logical operations across work items in a +sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For these functions, a non-zero _predicate_ argument or return value is +logically `true` and a zero _predicate_ argument or return value is +logically `false`. + +[[table-non-uniform-logical-functions]] +.Built-in Non-Uniform Logical Functions for Sub-Groups +[cols="2a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +int sub_group_non_uniform_reduce_logical_and( + int predicate ) +int sub_group_non_uniform_reduce_logical_or( + int predicate ) +int sub_group_non_uniform_reduce_logical_xor( + int predicate ) +---- + | Returns the logical *and*, *or*, or *xor* of _predicate_ for all active + work items in the sub-group. +|[source,opencl_c] +---- +int sub_group_non_uniform_scan_inclusive_logical_and( + int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_or( + int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_xor( + int predicate ) +---- + | Returns the result of an inclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all active work items in the + sub-group with a sub-group local ID less than or equal to this work + item's sub-group local ID. +|[source,opencl_c] +---- +int sub_group_non_uniform_scan_exclusive_logical_and( + int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_or( + int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_xor( + int predicate ) +---- + | Returns the result of an exclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all active work items in the + sub-group with a sub-group local ID less than this work item's sub-group + local ID. + + If there is no active work item in the sub-group with a sub-group local + ID less than this work item's sub-group local ID then an identity value + `I` is returned. + For *and*, the identity value is `true` (non-zero). + For *or* and *xor*, the identity value is `false` (zero). +|==== + +endif::cl_khr_subgroup_non_uniform_arithmetic[] + + +ifdef::cl_khr_subgroup_non_uniform_vote[] +==== Built-in Non-Uniform Vote Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes the +OpenCL C programming language built-in functions to elect a single work item +in a sub-group to perform a task and to collectively vote to determine a +boolean condition for the sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be the one of +the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-non-uniform-vote-functions]] +.Built-in Non-Uniform Vote Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +int sub_group_elect() +---- + | Elects a single work item in the sub-group to perform a task. + + This function will return true (nonzero) for the active work item in the + sub-group with the smallest sub-group local ID, and false (zero) for all + other active work items in the sub-group. +|[source,opencl_c] +---- +int sub_group_non_uniform_all( + int predicate ) +---- + | Examines _predicate_ for all active work items in the sub-group and + returns a non-zero value if _predicate_ is non-zero for all active work + items in the sub-group and zero otherwise. + + Note: This behavior is the same as `sub_group_all` from + `<>` and OpenCL 2.1, except this function need not be + encountered by all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +int sub_group_non_uniform_any( + int predicate ) +---- + | Examines _predicate_ for all active work items in the sub-group and + returns a non-zero value if _predicate_ is non-zero for any active work + item in the sub-group and zero otherwise. + + Note: This behavior is the same as `sub_group_any` from + `<>` and OpenCL 2.1, except this function need not be + encountered by all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +int sub_group_non_uniform_all_equal( + gentype value ) +---- + | Examines _value_ for all active work items in the sub-group and returns + a non-zero value if _value_ is equivalent for all active invocations in + the sub-group and zero otherwise. + + Integer types use a bitwise test for equality. Floating-point types use + an ordered floating-point test for equality. +|==== + +endif::cl_khr_subgroup_non_uniform_vote[] + + +ifdef::cl_khr_subgroup_rotate[] +[[sub-group-rotate-functions]] +==== Built-in Sub-Group Rotation Functions + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes a specialized +OpenCL C programming language built-in function that allow work items in a +sub-group to exchange data. +This function need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-rotate-functions]] +.Built-in Rotation Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description + +|[source,opencl_c] +---- +gentype sub_group_rotate( + gentype value, int delta) +---- + | Returns _value_ for the work item with sub-group local ID equal to the + remainder of the division of the sum of this work item's sub-group local + ID and _delta_ by the maximum sub-group size. + + The value of _delta_ is required to be dynamically-uniform for all work + items in the sub-group, otherwise the behavior is undefined. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive. +|[source,opencl_c] +---- +gentype sub_group_clustered_rotate( + gentype value, int delta, + uint clustersize) +---- + | Returns _value_ for the work item with sub-group local ID equal to the + sum of, the remainder of the division of the sum of this work item's ID + within the cluster and _delta_ by _clustersize_, and the sub-group local + ID of the first work-item of the cluster to which the work-item + executing the function belongs. + + The value of _delta_ is required to be dynamically-uniform for all work + items in the sub-group, otherwise the behavior is undefined. + + _clustersize_ must be an integer constant expression and a power of two, + smaller than or equal to the maximum sub-group size, otherwise the + behavior is undefined. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive. +|==== + +endif::cl_khr_subgroup_rotate[] + + +ifdef::cl_khr_subgroup_shuffle[] +==== Built-in Shuffle Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes the OpenCL C +programming language built-in functions that allow work items in a sub-group +to exchange data. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-shuffle-functions]] +.Built-in Shuffle Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_shuffle( + gentype value, uint index ) +---- + | Returns _value_ for the work item with sub-group local ID equal to + _index_. + The shuffle _index_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to _index_ is inactive or if _index_ is greater than or equal to + the size of the sub-group. +|[source,opencl_c] +---- +gentype sub_group_shuffle_xor( + gentype value, uint mask ) +---- + | Returns _value_ for the work item with sub-group local ID equal to + this work item's sub-group local ID xor'd with _mask_. + The shuffle _mask_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive or if the calculated index is + greater than or equal to the size of the sub-group. + + This is a specialized function that may perform better than the + equivalent `sub_group_shuffle` on some implementations. +|==== + +endif::cl_khr_subgroup_shuffle[] + + +ifdef::cl_khr_subgroup_shuffle_relative[] +==== Add a new Section 6.15.X - Sub-Group Relative Shuffle Built-in Functions + +The table below describes specialized OpenCL C programming language built-in +functions that allow work items in a sub-group to exchange data. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-shuffle-relative-functions]] +.Built-in Relative Shuffle Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_shuffle_up( + gentype value, uint delta ) +---- + | Returns _value_ for the work item with sub-group local ID equal to this + work item's sub-group local ID minus _delta_. + The shuffle _delta_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive, or _delta_ is greater than + this work item's sub-group local ID. + + This is a specialized function that may perform better than the + equivalent `sub_group_shuffle` on some implementations. +|[source,opencl_c] +---- +gentype sub_group_shuffle_down( + gentype value, uint delta ) +---- + | Returns _value_ for the work item with sub-group local ID equal to this + work item's sub-group local ID plus _delta_. + The shuffle _delta_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive, or this work item's sub-group + local ID plus _delta_ is greater than or equal to the size of the + sub-group. + + This is a specialized function that may perform better than the + equivalent `sub_group_shuffle` on some implementations. +|==== +endif::cl_khr_subgroup_shuffle_relative[] + + +[[extended-sub-groups-mapping]] +=== Sub-Groups Function Mapping and Capabilities + +This section describes a possible mapping between OpenCL built-in sub-group functions +and SPIR-V instructions and required SPIR-V capabilities. + +This section is informational and non-normative. + +[cols="1,1,1",options="header"] +|==== +| OpenCL C Function | SPIR-V BuiltIn or Instruction | Enabling SPIR-V Capability + +3+| For OpenCL 2.1 or `<>`: + +| `get_​sub_​group_​size` + | *SubgroupSize* + | *Kernel* +| `get_​max_​sub_​group_​size` + | *SubgroupMaxSize* + | *Kernel* +| `get_​num_​sub_​groups` + | *NumSubgroups* + | *Kernel* +| `get_​enqueued_​num_​sub_​groups` + | *NumEnqueuedSubgroups* + | *Kernel* +| `get_​sub_​group_​id` + | *SubgroupId* + | *Kernel* +| `get_​sub_​group_​local_​id` + | *SubgroupLocalInvocationId* + | *Kernel* + +| `sub_​group_​barrier` + | *OpControlBarrier* + | None Needed + +| `sub_​group_​all` + | *OpGroupAll* + | *Groups* +| `sub_​group_​any` + | *OpGroupAny* + | *Groups* + +| `sub_​group_​broadcast` + | *OpGroupBroadcast* + | *Groups* + +| `sub_​group_​reduce_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​reduce_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​reduce_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​exclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​exclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​exclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​inclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​inclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​inclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​reserve_​read_​pipe` + | *OpGroupReserveReadPipePackets* + | *Pipes* +| `sub_​group_​reserve_​write_​pipe` + | *OpGroupReserveReadWritePackets* + | *Pipes* +| `sub_​group_​commit_​read_​pipe` + | *OpGroupCommitReadPipe* + | *Pipes* +| `sub_​group_​commit_​write_​pipe` + | *OpGroupCommitWritePipe* + | *Pipes* + +| `get_​kernel_​sub_​group_​count_​for_​ndrange` + | *OpGetKernelNDrangeSubGroupCount* + | *DeviceEnqueue* +| `get_​kernel_​max_​sub_​group_​size_​for_​ndrange` + | *OpGetKernelNDrangeMaxSubGroupSize* + | *DeviceEnqueue* + +ifdef::cl_khr_subgroup_ballot[] +3+| For `<>`: + +| `sub_​group_​non_​uniform_​broadcast` + | *OpGroupNonUniformBroadcast* + | *GroupNonUniformBallot* +| `sub_​group_​broadcast_​first` + | *OpGroupNonUniformBroadcastFirst* + | *GroupNonUniformBallot* + +| `sub_​group_​ballot` + | *OpGroupNonUniformBallot* + | *GroupNonUniformBallot* +| `sub_​group_​inverse_​ballot` + | *OpGroupNonUniformInverseBallot* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​bit_​extract` + | *OpGroupNonUniformBallotBitExtract* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​bit_​count` + | *OpGroupNonUniformBallotBitCount* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​inclusive_​scan` + | *OpGroupNonUniformBallotBitCount* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​exclusive_​scan` + | *OpGroupNonUniformBallotBitCount* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​find_​lsb` + | *OpGroupNonUniformBallotFindLSB* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​find_​msb` + | *OpGroupNonUniformBallotFindMSB* + | *GroupNonUniformBallot* + +| `get_​sub_​group_​eq_​mask` + | *SubgroupEqMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​ge_​mask` + | *SubgroupGeMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​gt_​mask` + | *SubgroupGtMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​le_​mask` + | *SubgroupLeMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​lt_​mask` + | *SubgroupLtMask* + | *GroupNonUniformBallot* +endif::cl_khr_subgroup_ballot[] + +ifdef::cl_khr_subgroup_clustered_reduce[] +3+| For `<>`: + +| `sub_​group_​clustered_​reduce_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformClustered* +endif::cl_khr_subgroup_clustered_reduce[] + +ifdef::cl_khr_subgroup_extended_types[] +3+| For `<>`: + + Note: This extension adds new types to uniform sub-group operations. + +| `sub_​group_​broadcast` + | *OpGroupBroadcast* + | *Groups* + +| `sub_​group_​reduce_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​reduce_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​reduce_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​exclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​exclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​exclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​inclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​inclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​inclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* +endif::cl_khr_subgroup_extended_types[] + +ifdef::cl_khr_subgroup_non_uniform_arithmetic[] +3+| For `<>`: + +| `sub_​group_​non_​uniform_​reduce_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformArithmetic* + +| `sub_​group_​non_​uniform_​scan_​inclusive_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformArithmetic* + +| `sub_​group_​non_​uniform_​scan_​exclusive_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​exclusive_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​exclusive_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformArithmetic* +endif::cl_khr_subgroup_non_uniform_arithmetic[] + +ifdef::cl_khr_subgroup_non_uniform_vote[] +3+| For `<>`: + +| `sub_​group_​elect` + | *OpGroupNonUniformElect* + | *GroupNonUniform* +| `sub_​group_​non_​uniform_​all` + | *OpGroupNonUniformAll* + | *GroupNonUniformVote* +| `sub_​group_​non_​uniform_​any` + | *OpGroupNonUniformAny* + | *GroupNonUniformVote* +| `sub_​group_​non_​uniform_​all_​equal` + | *OpGroupNonUniformAllEqual* + | *GroupNonUniformVote* +endif::cl_khr_subgroup_non_uniform_vote[] + +ifdef::cl_khr_subgroup_shuffle[] +3+| For `<>`: + +| `sub_​group_​shuffle` + | *OpGroupNonUniformShuffle* + | *GroupNonUniformShuffle* +| `sub_​group_​shuffle_​xor` + | *OpGroupNonUniformShuffleXor* + | *GroupNonUniformShuffle* +endif::cl_khr_subgroup_shuffle[] + +ifdef::cl_khr_subgroup_shuffle_relative[] +3+| For `<>`: + +| `sub_​group_​shuffle_​up` + | *OpGroupNonUniformShuffleUp* + | *GroupNonUniformShuffleRelative* +| `sub_​group_​shuffle_​down` + | *OpGroupNonUniformShuffleDown* + | *GroupNonUniformShuffleRelative* +endif::cl_khr_subgroup_shuffle_relative[] + +|==== + + [[opencl-numerical-compliance]] = OpenCL Numerical Compliance @@ -11184,7 +15230,7 @@ standards that must be supported by all OpenCL compliant devices. This section describes the functionality that must be supported by all OpenCL devices for single precision floating-point numbers. Currently, only single precision floating-point is a requirement. -Double precision floating-point is an optional feature. +Double-precision floating-point is an optional feature. [[rounding-modes-1]] @@ -11200,13 +15246,103 @@ IEEE 754 defines four possible rounding modes: * Round toward zero _Round to nearest_ _even_ is currently the only rounding mode required by the -OpenCL specification for single precision and double precision operations and is +OpenCL specification for single precision and double-precision operations and is therefore the default rounding mode footnote:[{fn-float-required-rounding-mode}]. In addition, only static selection of rounding mode is supported. Dynamically reconfiguring the rounding modes as specified by the IEEE 754 spec is unsupported. +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, then +if `CL_FP_ROUND_TO_NEAREST` is supported, the default rounding mode for +half-precision floating-point operations will be round to nearest even; +otherwise the default rounding mode will be round to zero. + +Conversions to half floating-point format must be correctly rounded using +the indicated `convert` operator rounding mode or the default rounding mode +for half-precision floating-point operations if no rounding mode is +specified by the operator, or a C-style cast is used. + +Conversions from half to integer format shall correctly round using the +indicated `convert` operator rounding mode, or towards zero if no rounding +mode is specified by the operator or a C-style cast is used. +All conversions from half to floating-point formats are exact. +endif::cl_khr_fp16[] + +ifdef::cl_khr_select_fprounding_mode[] +[open,refpage='SELECT_ROUNDING_MODE',desc='Select rounding mode for a group of instructions',type='freeform',spec='clang',anchor='select-rounding-mode-macro',xrefs='fpMacros'] +-- +[[select-rounding-mode]] + +If the `<>` extension macro is supported, the +floating-point rounding mode may be specified using the following *#pragma* +in the OpenCL program source: + +[source,opencl_c] +---- +#pragma OPENCL SELECT_ROUNDING_MODE +---- + +The __ may be one of the following values: + + * *rte* - round to nearest even + * *rtz* - round to zero + * *rtp* - round to positive infinity + * *rtn* - round to negative infinity + +If this extensions is supported then the OpenCL implementation must support +all four rounding modes for single precision floating-point. + +The *#pragma* sets the rounding mode for all instructions that operate on +floating-point types (scalar or vector types) or produce floating-point +values that follow this pragma in the program source until the next +*#pragma*. +Note that the rounding mode specified for a block of code is known at +compile time. +When inside a compound statement, the pragma takes effect from its +occurrence until another *#pragma* is encountered (including within a nested +compound statement), or until the end of the compound statement; at the end +of a compound statement the state for the pragma is restored to its +condition just before the compound statement. +Except where otherwise documented, the callee functions do not inherit the +rounding mode of the caller function. + +If the `<>` extension is enabled, the +`\\__ROUNDING_MODE__` preprocessor symbol shall be defined to be one of the +following according to the current rounding mode: + +[source,opencl_c] +---- +#define __ROUNDING_MODE__ rte +#define __ROUNDING_MODE__ rtz +#define __ROUNDING_MODE__ rtp +#define __ROUNDING_MODE__ rtz +---- + +This is intended to enable remapping `foo()` to `foo_rte()` by the +preprocessor by using: + +[source,opencl_c] +---- +#define foo foo ## __ROUNDING_MODE__ +---- + +The default rounding mode is round to nearest even. +The <>, <>, and <> are +implemented with the round to nearest even rounding mode. +Various built-in conversions and the *vstore_half* and *vstorea_half* +built-in functions that do not specify a rounding mode inherit the current +rounding mode. +Conversions from floating-point to integer type always use `rtz` mode, +except where the user specifically asks for another rounding mode. + +NOTE: The `<>` extension was deprecated in +OpenCL 1.1, and its use is not recommended. +-- +endif::cl_khr_select_fprounding_mode[] + [[inf-nan-and-denormalized-numbers]] == INF, NaN and Denormalized Numbers @@ -11270,6 +15406,13 @@ compliant and are therefore correctly rounded. Conversion between floating-point formats and <> must be correctly rounded. +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, +addition, subtraction, multiplication, fused multiply-add operations on half +types are required to be correctly rounded using the default rounding mode +for half-precision floating-point operations. +endif::cl_khr_fp16[] + The ULP is defined as follows: ==== @@ -11301,10 +15444,10 @@ floating-point number that would be representable after the finite maximum, if there was sufficient range, meets ULP error tolerance. [[table-ulp-float-math]] -.ULP values for single precision built-in math functions -[cols=",",] +.ULP Values for Single-Precision Built-in Math Functions +[cols=",",options="header",] |==== -| *Function* | *Min Accuracy - ULP values* +| Function | Min Accuracy - ULP values | _x_ + _y_ | Correctly rounded | _x_ - _y_ | Correctly rounded | _x_ * _y_ | Correctly rounded @@ -11487,10 +15630,10 @@ is the infinitely precise result. 0 ulp is used for math functions that do not require rounding. [[table-ulp-embedded]] -.ULP values for the embedded profile -[cols=",",] +.ULP Values for the Embedded Profile +[cols=",",options="header",] |==== -| *Function* | *Min Accuracy - ULP values* +| Function | Min Accuracy - ULP values | _x_ + _y_ | Correctly rounded | _x_ - _y_ | Correctly rounded | _x_ * _y_ | Correctly rounded @@ -11633,11 +15776,10 @@ operations and builtins with `-cl-unsafe-math-optimizations` <> support for OpenCL C 2.0 or newer. [[table-float-ulp-relaxed]] -.ULP values for single precision built-in math functions with unsafe math optimizations in the full and embedded profiles -[cols="3,7",] +.ULP Values for Single-Precision Built-in Math Functions With Unsafe Math Optimizations in the Full and Embedded Profiles +[cols="3,7",options="header",] |==== -| *Function* -| *Minimum Accuracy* +| Function | Minimum Accuracy | 1.0 / _x_ | {leq} 2.5 ulp for _x_ in the domain of 2^-126^ to 2^126^ for the full @@ -11823,17 +15965,17 @@ requires>> support for OpenCL C 2.0 or newer. an add both of which are correctly rounded. |==== -The following table describes the minimum accuracy of double precision +The following table describes the minimum accuracy of double-precision floating-point arithmetic operations given as ULP values. The reference value used to compute the ULP value of an arithmetic operation is the infinitely precise result. 0 ulp is used for math functions that do not require rounding. [[table-ulp-double]] -.ULP values for double precision built-in math functions -[cols=",",] +.ULP Values for Double-Precision Built-in Math Functions +[cols=",",options="header",] |==== -| *Function* | *Min Accuracy - ULP values* +| Function | Min Accuracy - ULP values | _x_ + _y_ | Correctly rounded | _x_ - _y_ | Correctly rounded | _x_ * _y_ | Correctly rounded @@ -11962,6 +16104,166 @@ is the infinitely precise result. |==== +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, +the following table describes the minimum accuracy of half-precision +floating-point arithmetic operations given as ULP values. +The reference value used to compute the ULP value of an arithmetic operation +is the infinitely precise result. +0 ulp is used for math functions that do not require rounding. + +[[table-ulp-half-math]] +.ULP Values for Half-Precision Floating-Point Arithmetic Operations +[cols=",,",options="header",] +|==== +| Function | Min Accuracy - Full Profile | Min Accuracy - Embedded Profile +| *_x_ + _y_* | Correctly rounded | Correctly rounded +| *_x_ - _y_* | Correctly rounded | Correctly rounded +| *_x_ * _y_* | Correctly rounded | Correctly rounded +| *1.0 / _x_* | Correctly rounded | \<= 1 ulp +| *_x_ / _y_* | Correctly rounded | \<= 1 ulp +| | | +| *acos* | \<= 2 ulp | \<= 3 ulp +| *acosh* | \<= 2 ulp | \<= 3 ulp +| *acospi* | \<= 2 ulp | \<= 3 ulp +| *asin* | \<= 2 ulp | \<= 3 ulp +| *asinh* | \<= 2 ulp | \<= 3 ulp +| *asinpi* | \<= 2 ulp | \<= 3 ulp +| *atan* | \<= 2 ulp | \<= 3 ulp +| *atanh* | \<= 2 ulp | \<= 3 ulp +| *atanpi* | \<= 2 ulp | \<= 3 ulp +| *atan2* | \<= 2 ulp | \<= 3 ulp +| *atan2pi* | \<= 2 ulp | \<= 3 ulp +| *cbrt* | \<= 2 ulp | \<= 2 ulp +| *ceil* | Correctly rounded | Correctly rounded +| *clamp* | 0 ulp | 0 ulp +| *copysign* | 0 ulp | 0 ulp +| *cos* | \<= 2 ulp | \<= 2 ulp +| *cosh* | \<= 2 ulp | \<= 3 ulp +| *cospi* | \<= 2 ulp | \<= 2 ulp + +// 3 operations from the 2 multiplications and 1 subtraction per component +| *cross* + | absolute error tolerance of 'max * max * (3 * HALF_EPSILON)' per vector + component, where _max_ is the maximum input operand magnitude + | Implementation-defined +| *degrees* | \<= 2 ulp | \<= 2 ulp + +// 0.5 ULP error in sqrt +// 1.5 * n cumulative error for multiplications +// 0.5 * (n-1) cumulative error for additions +// +// = 0.5 + (1.5 * n) + (0.5 * (n - 1)) +// = 0.5 + 1.5n + (0.5n - 0.5) +// = 2n +| *distance* + | \<= 2n ulp, for gentype with vector width _n_ + | Implementation-defined + +// n + n-1 Number of operations from n multiples and (n-1) additions +// 2n - 1 +| *dot* + | absolute error tolerance of 'max * max * (2n - 1) * HALF_EPSILON', for + vector width _n_ and maximum input operand magnitude _max_ across all + vector components + | Implementation-defined + +| *erfc* | \<= 4 ulp | \<= 4 ulp +| *erf* | \<= 4 ulp | \<= 4 ulp +| *exp* | \<= 2 ulp | \<= 3 ulp +| *exp2* | \<= 2 ulp | \<= 3 ulp +| *exp10* | \<= 2 ulp | \<= 3 ulp +| *expm1* | \<= 2 ulp | \<= 3 ulp +| *fabs* | 0 ulp | 0 ulp +| *fdim* | Correctly rounded | Correctly rounded +| *floor* | Correctly rounded | Correctly rounded +| *fma* | Correctly rounded | Correctly rounded +| *fmax* | 0 ulp | 0 ulp +| *fmin* | 0 ulp | 0 ulp +| *fmod* | 0 ulp | 0 ulp +| *fract* | Correctly rounded | Correctly rounded +| *frexp* | 0 ulp | 0 ulp +| *hypot* | \<= 2 ulp | \<= 3 ulp +| *ilogb* | 0 ulp | 0 ulp +| *ldexp* | Correctly rounded | Correctly rounded + +// 0.5 ULP error in sqrt +// 0.5 effect on e of taking sqrt(x + e) +// 0.5 * n cumulative error for multiplications +// 0.5 * (n-1) cumulative error for additions +// +// = (0.5 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) +// = 0.5 + 0.5 * (n - 0.5) +// = 0.25 + 0.5n +| *length* + | \<= 0.25 + 0.5n ulp, for gentype with vector width _n_ + | Implementation-defined +| *log* | \<= 2 ulp | \<= 3 ulp +| *log2* | \<= 2 ulp | \<= 3 ulp +| *log10* | \<= 2 ulp | \<= 3 ulp +| *log1p* | \<= 2 ulp | \<= 3 ulp +| *logb* | 0 ulp | 0 ulp +| *mad* | Implementation-defined | Implementation-defined +| *max* | 0 ulp | 0 ulp +| *maxmag* | 0 ulp | 0 ulp +| *min* | 0 ulp | 0 ulp +| *minmag* | 0 ulp | 0 ulp +| *mix* | Implementation-defined | Implementation-defined +| *modf* | 0 ulp | 0 ulp +| *nan* | 0 ulp | 0 ulp +| *nextafter* | 0 ulp | 0 ulp + +// 1.5 error in rsqrt + error in multiply +// 0.5 * n cumulative error for multiplications +// 0.5 * (n-1) cumulative error for additions +// +// = 1.5 + (0.5 * n) + (0.5 * (n - 1)) +// = 1.5 + 0.5n + (0.5n - 0.5) +// = 1.0 + n +| *normalize* + | \<= 1 + n ulp, for gentype with vector width _n_ + | Implementation-defined +| *pow(x, y)* | \<= 4 ulp | \<= 5 ulp +| *pown(x, y)* | \<= 4 ulp | \<= 5 ulp +| *powr(x, y)* | \<= 4 ulp | \<= 5 ulp +| *radians* | \<= 2 ulp | \<= 2 ulp +| *remainder* | 0 ulp | 0 ulp +| *remquo* + | 0 ulp for the remainder, at least the lower 7 bits of the integral + quotient + | 0 ulp for the remainder, at least the lower 7 bits of the integral + quotient +| *rint* | Correctly rounded | Correctly rounded +| *rootn* | \<= 4 ulp | \<= 5 ulp +| *round* | Correctly rounded | Correctly rounded +| *rsqrt* | \<=1 ulp | \<=1 ulp +| *sign* | 0 ulp | 0 ulp +| *sin* | \<= 2 ulp | \<= 2 ulp +| *sincos* + | \<= 2 ulp for sine and cosine values + | \<= 2 ulp for sine and cosine values +| *sinh* | \<= 2 ulp | \<= 3 ulp +| *sinpi* | \<= 2 ulp | \<= 2 ulp +| *smoothstep* | Implementation-defined | Implementation-defined +| *sqrt* | Correctly rounded | \<= 1 ulp +| *step* | 0 ulp | 0 ulp +| *tan* | \<= 2 ulp | \<= 3 ulp +| *tanh* | \<= 2 ulp | \<= 3 ulp +| *tanpi* | \<= 2 ulp | \<= 3 ulp +| *tgamma* | \<= 4 ulp | \<= 4 ulp +| *trunc* | Correctly rounded | Correctly rounded +|==== + +NOTE: _Implementations may perform floating-point operations on_ `half` +_scalar or vector data types by converting the_ `half` _values to single +precision floating-point values and performing the operation in single +precision floating-point. +In this case, the implementation will use the_ `half` _scalar or vector data +type as a storage only format_. + +endif::cl_khr_fp16[] + + [[edge-case-behavior]] == Edge Case Behavior @@ -12010,7 +16312,7 @@ For example, *sin*({plusmn}0) = {plusmn}0 shall be interpreted to mean * *atan2pi*(_y_, {plusmn}0) returns 0.5 for _y_ > 0. * *atan2pi*({plusmn}_y_, -{inf}) returns {plusmn}1 for finite _y_ > 0. * *atan2pi*({plusmn}_y_, +{inf}) returns {plusmn}0 for finite _y_ > 0. -* *atan2pi*({plusmn}{inf}, _x_) returns {plusmn}0.5 for finite _x._ +* *atan2pi*({plusmn}{inf}, _x_) returns {plusmn}0.5 for finite _x_. * *atan2pi*({plusmn}{inf}, -{inf}) returns {plusmn}0.75. * *atan2pi*({plusmn}{inf}, +{inf}) returns {plusmn}0.25. * *ceil*(-1 < _x_ < 0) returns -0. @@ -12064,7 +16366,7 @@ for (i = 0; i < sizeof(v) / sizeof(v[0]); i++) * *powr*({plusmn}0, _y_) is +{inf} for finite _y_ < 0. * *powr*({plusmn}0, -{inf}) is +{inf}. * *powr*({plusmn}0, _y_) is +0 for _y_ > 0. -* *powr*(+1, _y_) is 1 for finite _y._ +* *powr*(+1, _y_) is 1 for finite _y_. * *powr*(_x_, _y_) returns NaN for _x_ < 0. * *powr*({plusmn}0, {plusmn}0) returns NaN. * *powr*(+{inf}, {plusmn}0) returns NaN. @@ -12117,7 +16419,7 @@ if the caller is in some other rounding mode. [[edge-case-behavior-in-flush-to-zero-mode]] -=== Edge Case Behavior in Flush To Zero Mode +=== Edge Case Behavior in Flush to Zero Mode If denormals are flushed to zero, then a function may return one of four results: @@ -12218,9 +16520,9 @@ The following table describes the address_mode function. [[table-address-modes-texel-location]] .Addressing modes to generate texel location -[cols=",",] +[cols=",",options="header",] |==== -| *Addressing Mode* | *Result of address_mode(coord)* +| Addressing Mode | Result of address_mode(coord) | `CLK_ADDRESS_CLAMP_TO_EDGE` | clamp (coord, 0, size - 1) | `CLK_ADDRESS_CLAMP` | clamp (coord, -1, size) | `CLK_ADDRESS_NONE` | coord @@ -12573,14 +16875,14 @@ and writing images in a kernel. [[conversion-rules-for-normalized-integer-channel-data-types]] -=== Conversion rules for normalized integer channel data types +=== Conversion Rules for Normalized Integer Channel Data Types In this section we discuss converting normalized integer channel data types to floating-point values and vice-versa. [[converting-normalized-integer-channel-data-types-to-floating-point-values]] -==== Converting normalized integer channel data types to floating-point values +==== Converting Normalized Integer Channel Data Types to Floating-point Values For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, *read_imagef* will convert the channel values from an @@ -12620,7 +16922,7 @@ These conversions are performed as follows: * normalized `float` value = *max*(`-1.0f`, `(float)c / 32767.0f`) The precision of the above conversions is \<= 1.5 ulp except for the -following cases. +following cases: For `CL_UNORM_INT8` @@ -12655,8 +16957,87 @@ For `CL_SNORM_INT16` * 32767 must convert to `1.0f` +ifdef::cl_khr_fp16[] +[[converting-normalized-integer-channel-data-types-to-half-precision-floating-point-values]] +==== Converting Normalized Integer Channel Data Types to Half-Precision Floating-Point Values + +If the `<>` extension is supported, then +for images created with image channel data type of `CL_UNORM_INT8` and +`CL_UNORM_INT16`, *read_imageh* will convert the channel values from an +8-bit or 16-bit unsigned integer to normalized half-precision floating-point +values in the range [`0.0h`, `1.0h`]. + +For images created with image channel data type of `CL_SNORM_INT8` and +`CL_SNORM_INT16`, *read_imageh* will convert the channel values from an +8-bit or 16-bit signed integer to normalized half-precision floating-point +values in the range [`-1.0h`, `1.0h`]. + +These conversions are performed as follows: + +`CL_UNORM_INT8` (8-bit unsigned integer) {rightarrow} `half` + +[none] +* normalized `half` value = `round_to_half(c / 255)` + +`CL_UNORM_INT_101010` (10-bit unsigned integer) {rightarrow} `half` + +[none] +* normalized `half` value = `round_to_half(c / 1023)` + +`CL_UNORM_INT16` (16-bit unsigned integer) {rightarrow} `half` + +[none] +* normalized `half` value = `round_to_half(c / 65535)` + +`CL_SNORM_INT8` (8-bit signed integer) {rightarrow} `half` + +[none] +* normalized `half` value = *max*(`-1.0h`, `round_to_half(c / 127)`) + +`CL_SNORM_INT16` (16-bit signed integer) {rightarrow} `half` + +[none] +* normalized `half` value = *max*(`-1.0h`, `round_to_half(c / 32767)`) + +The precision of the above conversions is \<= 1.5 ulp except for the +following cases: + +For `CL_UNORM_INT8` + +[none] +* 0 must convert to `0.0h` and +* 255 must convert to `1.0h` + +For `CL_UNORM_INT_101010` + +[none] +* 0 must convert to `0.0h` and +* 1023 must convert to `1.0h` + +For `CL_UNORM_INT16` + +[none] +* 0 must convert to `0.0h` and +* 65535 must convert to `1.0h` + +For `CL_SNORM_INT8` + +[none] +* -128 and -127 must convert to `-1.0h`, +* 0 must convert to `0.0h` and +* 127 must convert to `1.0h` + +For `CL_SNORM_INT16` + +[none] +* -32768 and -32767 must convert to `-1.0h`, +* 0 must convert to `0.0h` and +* 32767 must convert to `1.0h` +endif::cl_khr_fp16[] + + [[converting-floating-point-values-to-normalized-integer-channel-data-types]] -==== Converting floating-point values to normalized integer channel data types +==== Converting Floating-Point Values to Normalized Integer Channel Data Types For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, *write_imagef* will convert the floating-point color value @@ -12743,8 +17124,71 @@ the result produced by the round to nearest even rounding mode must be {leq} * *fabs*(f~preferred~ - f~approx~) must be \<= 0.6 +ifdef::cl_khr_fp16[] +[[converting-half-precision-floating-point-values-to-normalized-integer-channel-data-types]] +==== Converting Half-Precision Floating-point Values to Normalized Integer Channel Data Types + +If the `<>` extension is supported, then +for images created with image channel data type of `CL_UNORM_INT8` and +`CL_UNORM_INT16`, *write_imageh* will convert the floating-point color value +to an 8-bit or 16-bit unsigned integer. + +For images created with image channel data type of `CL_SNORM_INT8` and +`CL_SNORM_INT16`, *write_imageh* will convert the floating-point color value +to an 8-bit or 16-bit signed integer. + +The preferred conversion uses the round to nearest even (`_rte`) rounding +mode, but OpenCL implementations may choose to approximate the rounding mode +used in the conversions described below. +When approximate rounding is used instead of the preferred rounding, the +result of the conversion must satisfy the bound given below. + +`half` {rightarrow` `CL_UNORM_INT8` (8-bit unsigned integer) + +[none] + * Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) + * Let f~preferred~ = *convert_uchar_sat_rte*(`f * 255.0f`) + * Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_UNORM_INT_101010` (10-bit unsigned integer) + +[none] + * Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) + * Let f~preferred~ = *min*(*convert_ushort_sat_rte*(`f * 1023.0f`), + `1023`) + * Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_UNORM_INT16` (16-bit unsigned integer) + +[none] + * Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) + * Let f~preferred~ = *convert_ushort_sat_rte*(`f * 65535.0f`) + * Let f~approx~ = *convert_ushort_sat_*(`f * + 65535.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_SNORM_INT8` (8-bit signed integer) + +[none] + * Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) + * Let f~preferred~ = *convert_char_sat_rte*(`f * 127.0f`) + * Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_SNORM_INT16` (16-bit signed integer) + +[none] + * Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) + * Let f~preferred~ = *convert_short_sat_rte*(`f * 32767.0f`) + * Let f~approx~ = *convert_short_sat_*(`f * 32767.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 +endif::cl_khr_fp16[] + + [[conversion-rules-for-half-precision-floating-point-channel-data-type]] -=== Conversion rules for half precision floating-point channel data type +=== Conversion Rules for Half-Precision Floating-Point Channel Data Type For images created with a channel data type of `CL_HALF_FLOAT`, the conversions from `half` to `float` are lossless (as described in @@ -12759,7 +17203,7 @@ type. [[conversion-rules-for-floating-point-channel-data-type]] -=== Conversion rules for floating-point channel data type +=== Conversion Rules for Floating-Point Channel Data Type The following rules apply for reading and writing images created with channel data type of `CL_FLOAT`. @@ -12770,7 +17214,7 @@ channel data type of `CL_FLOAT`. [[conversion-rules-for-signed-and-unsigned-8-bit-16-bit-and-32-bit-integer-channel-data-types]] -=== Conversion rules for signed and unsigned 8-bit, 16-bit and 32-bit integer channel data types +=== Conversion Rules for Signed and Unsigned 8-Bit, 16-Bit and 32-Bit Integer Channel Data Types Calls to *read_imagei* with channel data type values of `CL_SIGNED_INT8`, `CL_SIGNED_INT16` and `CL_SIGNED_INT32` return the unmodified integer values @@ -12818,7 +17262,7 @@ The conversions described in this section must be correctly saturated. [[conversion-rules-for-srgba-and-sbgra-images]] -=== Conversion rules for sRGBA and sBGRA images +=== Conversion Rules for sRGBA and sBGRA Images Standard RGB data, which roughly displays colors in a linear ramp of luminosity levels such that an average observer, under average viewing @@ -12895,7 +17339,7 @@ implementation produces and being checked for conformance. [[selecting-an-image-from-an-image-array]] -== Selecting an Image from an Image Array +== Selecting an Image From an Image Array Let (_u_,_v_,_w_) represent the unnormalized image coordinate values for reading from and/or writing to a 2D image in a 2D image array. @@ -12978,7 +17422,7 @@ one of the integers 0, 1, ... h~t~ - 1. include::c/appendix_a.asciidoc[] -// This is generatig asciidoctor errors: +// This is generating asciidoctor errors: // OpenCL_C.txt: Failed to load AsciiDoc document - undefined method `+' for nil:NilClass // Disabling acknowledgements for now. We have them in the API spec already. //<<< diff --git a/OpenCL_Ext.txt b/OpenCL_Ext.txt index a2e983ea3..68e4295b7 100644 --- a/OpenCL_Ext.txt +++ b/OpenCL_Ext.txt @@ -47,75 +47,9 @@ include::copyrights.txt[] include::ext/introduction.asciidoc[] -include::ext/cl_khr_icd.asciidoc[] -include::ext/cl_khr_byte_addressable_store.asciidoc[] -include::ext/cl_khr_3d_image_writes.asciidoc[] -include::ext/cl_khr_fp16.asciidoc[] -include::ext/cl_khr_fp64.asciidoc[] -include::ext/cl_khr_int32_atomics.asciidoc[] -include::ext/cl_khr_int64_atomics.asciidoc[] -include::ext/cl_khr_select_fprounding_mode.asciidoc[] -include::ext/cl_khr_gl_sharing__context.asciidoc[] -include::ext/cl_khr_gl_sharing__memobjs.asciidoc[] - -include::ext/cl_khr_gl_event.asciidoc[] -include::ext/cl_khr_d3d10_sharing.asciidoc[] - -include::ext/cl_khr_d3d11_sharing.asciidoc[] -include::ext/cl_khr_dx9_media_sharing.asciidoc[] -include::ext/cl_khr_depth_images.asciidoc[] -include::ext/cl_khr_gl_depth_images.asciidoc[] -include::ext/cl_khr_gl_msaa_sharing.asciidoc[] -include::ext/cl_khr_egl_event.asciidoc[] -include::ext/cl_khr_egl_image.asciidoc[] -include::ext/cl_khr_image2d_from_buffer.asciidoc[] -include::ext/cl_khr_initialize_memory.asciidoc[] -include::ext/cl_khr_terminate_context.asciidoc[] -include::ext/cl_khr_spir.asciidoc[] -include::ext/cl_khr_il_program.asciidoc[] -include::ext/cl_khr_create_command_queue.asciidoc[] - -include::ext/cl_khr_device_enqueue_local_arg_types.asciidoc[] -include::ext/cl_khr_subgroups.asciidoc[] -include::ext/cl_khr_mipmap_image.asciidoc[] -include::ext/cl_khr_srgb_image_writes.asciidoc[] - -include::ext/cl_khr_priority_hints.asciidoc[] -include::ext/cl_khr_throttle_hints.asciidoc[] - -include::ext/cl_khr_subgroup_named_barrier.asciidoc[] - -include::ext/cl_khr_extended_async_copies.asciidoc[] -include::ext/cl_khr_async_work_group_copy_fence.asciidoc[] - -include::ext/cl_khr_device_uuid.asciidoc[] -include::ext/cl_khr_extended_versioning.asciidoc[] - -include::ext/cl_khr_subgroup_extensions.asciidoc[] - -include::ext/cl_khr_pci_bus_info.asciidoc[] - -include::ext/cl_khr_extended_bit_ops.asciidoc[] -include::ext/cl_khr_suggested_local_work_size.asciidoc[] - -include::ext/cl_khr_integer_dot_product.asciidoc[] - -include::ext/cl_khr_semaphore.asciidoc[] - -include::ext/cl_khr_external_semaphore.asciidoc[] -include::ext/cl_khr_external_memory.asciidoc[] - -include::ext/cl_khr_command_buffer.asciidoc[] -include::ext/cl_khr_expect_assume.asciidoc[] - -include::ext/cl_khr_subgroup_rotate.asciidoc[] -include::ext/cl_khr_work_group_uniform_arithmetic.asciidoc[] - -include::ext/cl_khr_command_buffer_mutable_dispatch.asciidoc[] -include::ext/cl_khr_command_buffer_multi_device.asciidoc[] - -// NOTE: To keep meaningful section numbers, new -// extension documents should be added above here! +// khr extension specifications must now be written as conditionally +// included markup in the OpenCL API and C Language Specifications, rather +// than being included here as separate documents. // These are SPIR-V Extensions: diff --git a/README.adoc b/README.adoc index 410abb0bc..0ef04fb09 100644 --- a/README.adoc +++ b/README.adoc @@ -142,6 +142,48 @@ build is very fast, even for the whole Specification, but PDF builds may take several minutes. +[[building-extensions]] +== Building With Extensions Included + +Invoking 'make' with no extra arguments will build the OpenCL API and OpenCL +C Language specifications with only the core APIs and functionality +included. +To build versions of these specifications with extension language included, +you should use the `makeSpec` script. +`makeSpec` is a Python script accepting the following arguments: + + * -spec _variant_ - _variant_ is `core`, `khr`, or `all`, building + specifications with only core, core + all khr extensions and core + all + extensions, respectively. + At present, `all` is equivalent to `khr` as only `khr` extensions are + included in the specification source. + * -ext _name_ - add the specified extension _name_ and its dependencies to + the build. + * -clean - clean generated files before building. + * -registry _path_ - use specified API XML instead of the default + `xml/cl.xml`. + * -v - verbose, print actions before executing them. + * -n - dry-run, print actions instead of executing them. + * Unrecognized options are passed on to `make`, so must be valid Makefile + targets or `make` options such as `-j`. + +Any other options reported by `makeSpec --help` are not yet functional, and +should not be used. + +For example, to build with all `khr` extensions, you could use + +[source,sh] +---- +$ makeSpec -clean -spec khr -j html refpages +---- + +`makeSpec` is a wrapper which constructs options and invokes `make` +when building with extensions included, which only affects building the API +(including reference pages) and C language specifications. +`makeSpec` determines extension dependencies from the metadata in the +specified registry XML path. + + [[refpage-install]] == Reference Page Installation @@ -688,6 +730,7 @@ by Khronos. [[history]] == Revision History + * 2024-04-07 - Add `makeSpec` instructions. * 2023-11-05 - Add hexapdf, remove ghostscript * 2020-03-13 - Updated package versions to match Travis build. * 2019-06-20 - Add directions for publishing OpenCL 2.2 reference pages, diff --git a/api/acknowledgements.asciidoc b/api/acknowledgements.asciidoc index 6df49ab06..72fec4365 100644 --- a/api/acknowledgements.asciidoc +++ b/api/acknowledgements.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2016-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2016-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = Acknowledgements diff --git a/api/appendix_a.asciidoc b/api/appendix_a.asciidoc index c2fc908df..0078df977 100644 --- a/api/appendix_a.asciidoc +++ b/api/appendix_a.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] = Host environment and thread safety @@ -73,7 +72,7 @@ The OpenCL implementation should be able to create multiple command-queues for a given OpenCL context and multiple OpenCL contexts in an application running on the host processor. -== Global constructors and destructors +== Global Constructors and Destructors The execution order of global constructors and destructors is left undefined by the C and C++ standards. It is therefore not possible to know the relative diff --git a/api/appendix_b.asciidoc b/api/appendix_b.asciidoc index 2759903f1..c5698515e 100644 --- a/api/appendix_b.asciidoc +++ b/api/appendix_b.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2016-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2016-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] = Portability @@ -242,12 +241,12 @@ vector machines OpenCL could have made it illegal to do a conversion free cast that changes the number of elements in the name of portability. However, while OpenCL provides a common set of operators drawing from the -set that are typically found on vector machines, it can not provide access +set that are typically found on vector machines, it cannot provide access to everything every ISA may offer in a consistent uniform portable manner. Many vector ISAs provide special purpose instructions that greatly accelerate specific operations such as DCT, SAD, or 3D geometry. It is not intended for OpenCL to be so heavy handed that time-critical -performance sensitive algorithms can not be written by knowledgeable +performance sensitive algorithms cannot be written by knowledgeable developers to perform at near peak performance. Developers willing to throw away portability should be able to use the platform-specific instructions in their code. diff --git a/api/appendix_c.asciidoc b/api/appendix_c.asciidoc index a8b236c21..2531dbb2d 100644 --- a/api/appendix_c.asciidoc +++ b/api/appendix_c.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2016-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2016-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[data-types]] @@ -140,7 +139,7 @@ so their use should be accompanied by a check of the corresponding preprocessor symbol. -=== Named vector components notation +=== Named Vector Components Notation Vector data type components may be accessed using the `.sN`, `.sn` or `.xyzw` field naming convention, similar to how they are used within the @@ -198,7 +197,7 @@ foo.s7 // illegal - no component s7 ---- -=== High/Low vector component notation +=== High/Low Vector Component Notation Vector data type components may be accessed using the `.hi` and `.lo` notation similar to that supported within the language types. @@ -217,7 +216,7 @@ For example: ---- -=== Native vector type notation +=== Native Vector Type Notation Certain native vector types are defined for providing a mapping of vector types to architecturally built-in vector types. @@ -270,14 +269,14 @@ Explicit casting of native vector types (`+__cl_typen+`) is defined by the external compiler. -== Other operators and functions +== Other Operators and Functions The behavior of standard operators and function on both application vector types (`cl_typen`) and native vector types (`+__cl_typen+`) is defined by the external compiler. -== Application constant definitions +== Application Constant Definitions In addition to the above application type definitions, the following literal definitions are also available. @@ -390,54 +389,54 @@ include::{generated}/api/version-notes/CL_FLT_EPSILON.asciidoc[] | {CL_DBL_DIG_anchor} include::{generated}/api/version-notes/CL_DBL_DIG.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Number of decimal digits of precision for the type {cl_double_TYPE} | {CL_DBL_MANT_DIG_anchor} include::{generated}/api/version-notes/CL_DBL_MANT_DIG.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Number of digits in the mantissa of type {cl_double_TYPE} | {CL_DBL_MAX_10_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MAX_10_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Maximum positive integer such that 10 raised to this power minus one can be represented as a normalized floating-point number of type {cl_double_TYPE} | {CL_DBL_MAX_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MAX_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Maximum exponent value of type {cl_double_TYPE} | {CL_DBL_MIN_10_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MIN_10_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum negative integer such that 10 raised to this power minus one can be represented as a normalized floating-point number of type {cl_double_TYPE} | {CL_DBL_MIN_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MIN_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum exponent value of type {cl_double_TYPE} | {CL_DBL_RADIX_anchor} include::{generated}/api/version-notes/CL_DBL_RADIX.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Base value of type {cl_double_TYPE} | {CL_DBL_MAX_anchor} include::{generated}/api/version-notes/CL_DBL_MAX.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Maximum value of type {cl_double_TYPE} | {CL_DBL_MIN_anchor} include::{generated}/api/version-notes/CL_DBL_MIN.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum value of type {cl_double_TYPE} | {CL_DBL_EPSILON_anchor} include::{generated}/api/version-notes/CL_DBL_EPSILON.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum positive floating-point number of type {cl_double_TYPE} such that `1.0 {plus} {CL_DBL_EPSILON} != 1` is true. | {CL_NAN_anchor} diff --git a/api/appendix_d.asciidoc b/api/appendix_d.asciidoc index aad002ee8..7ae631b06 100644 --- a/api/appendix_d.asciidoc +++ b/api/appendix_d.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[check-copy-overlap]] diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 8cc593172..2bec7aeb0 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[changes_to_opencl]] @@ -12,7 +11,7 @@ versions are summarized below. // (Jon) Are these section and table numbers for the current spec, in which // case they should turn into asciidoctor xrefs, or to older specs? -== Summary of changes from OpenCL 1.0 to OpenCL 1.1 +== Summary of Changes from OpenCL 1.0 to OpenCL 1.1 The following features are added to the OpenCL 1.1 platform layer and runtime (_sections 4 and 5_): @@ -75,11 +74,11 @@ The following features are added to the OpenCL C programming language _section 6.12.3_. ** *async_work_group_strided_copy* defined in section _6.15.11_. ** *vec_step*, *shuffle* and *shuffle2* defined in section _6.15.13_. - * *cl_khr_byte_addressable_store* extension is a core feature. - * *cl_khr_global_int32_base_atomics*, - *cl_khr_global_int32_extended_atomics*, - *cl_khr_local_int32_base_atomics* and - *cl_khr_local_int32_extended_atomics* extensions are core features. + * `<>` extension is a core feature. + * `<>`, + `<>`, + `<>` and + `<>` extensions are core features. The built-in atomic function names are changed to use the *atomic_* prefix instead of *atom_*. * Macros `CL_VERSION_1_0` and `CL_VERSION_1_1`. @@ -98,20 +97,20 @@ The following features in OpenCL 1.0 are deprecated (see glossary) in OpenCL * The `-cl-strict-aliasing` build option has been deprecated. It is no longer required after defining type-based aliasing rules. // Bugzilla 5593 and 6068 - * The *cl_khr_select_fprounding_mode* extension is deprecated and its + * The `<>` extension is deprecated and its use is no longer recommended. The following new extensions are added to _section 9_ in OpenCL 1.1: - * *cl_khr_gl_event* for creating a CL event object from a GL sync object. - * *cl_khr_d3d10_sharing* for sharing memory objects with Direct3D 10. + * `<>` for creating a CL event object from a GL sync object. + * `<>` for sharing memory objects with Direct3D 10. The following modifications are made to the OpenCL ES Profile described in _section 10_ in OpenCL 1.1: * 64-bit integer support is optional. -== Summary of changes from OpenCL 1.1 to OpenCL 1.2 +== Summary of Changes from OpenCL 1.1 to OpenCL 1.2 The following features are added to the OpenCL 1.2 platform layer and runtime (_sections 4 and 5_): @@ -147,7 +146,7 @@ runtime (_sections 4 and 5_): a kernel. * {clEnqueueMarkerWithWaitList} and {clEnqueueBarrierWithWaitList} APIs. * {clUnloadPlatformCompiler} to request that a single platform's compiler is - unloaded. This is compatible with the *cl_khr_icd* extension if that is + unloaded. This is compatible with the `<>` extension if that is supported, unlike {clUnloadCompiler}. The following features are added to the OpenCL C programming language @@ -184,7 +183,7 @@ The following APIs in OpenCL 1.1 are deprecated (see glossary) in OpenCL // Bugzilla 5391 - cl_khr_icd specification * {clUnloadCompiler} and {clGetExtensionFunctionAddress} APIs are deprecated. The {clUnloadPlatformCompiler} and {clGetExtensionFunctionAddressForPlatform} - APIs provide equivalent functionality are compatible with the *cl_khr_icd* + APIs provide equivalent functionality are compatible with the `<>` extension. The following queries are deprecated (see glossary) in OpenCL 1.2: @@ -194,7 +193,7 @@ The following queries are deprecated (see glossary) in OpenCL 1.2: The minimum data type alignment can be derived from {CL_DEVICE_MEM_BASE_ADDR_ALIGN}. -== Summary of changes from OpenCL 1.2 to OpenCL 2.0 +== Summary of Changes from OpenCL 1.2 to OpenCL 2.0 The following features are added to the OpenCL 2.0 platform layer and runtime (_sections 4 and 5_): @@ -289,7 +288,7 @@ The following queries are deprecated (see glossary) in OpenCL 2.0: The deprecated functions are still described in section 6.15.12.8 of this specification. -== Summary of changes from OpenCL 2.0 to OpenCL 2.1 +== Summary of Changes from OpenCL 2.0 to OpenCL 2.1 The following features are added to the OpenCL 2.1 platform layer and runtime (_sections 4 and 5_): @@ -328,7 +327,7 @@ runtimes. The SPIR-V and OpenCL SPIR-V Environment specifications have been added. -== Summary of changes from OpenCL 2.1 to OpenCL 2.2 +== Summary of Changes from OpenCL 2.1 to OpenCL 2.2 The following changes have been made to the OpenCL 2.2 execution model (section 3) @@ -354,7 +353,7 @@ runtime (section 4 and 5): Added definition of Deprecation and Specialization constants to the glossary. -== Summary of changes from OpenCL 2.2 to OpenCL 3.0 +== Summary of Changes from OpenCL 2.2 to OpenCL 3.0 OpenCL 3.0 is a major revision that breaks backwards compatibility with previous versions of OpenCL, see @@ -458,7 +457,7 @@ conformance process: * {CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED} -== Summary of changes from OpenCL 3.0 +== Summary of Changes from OpenCL 3.0 The first non-provisional version of the OpenCL 3.0 specifications was *v3.0.5*. @@ -466,14 +465,14 @@ Changes from *v3.0.5*: * Fixed the calculation in "mapping work-items onto an ND-range". * Added new extensions: - ** `cl_khr_extended_versioning` - ** `cl_khr_subgroup_extended_types` - ** `cl_khr_subgroup_non_uniform_vote` - ** `cl_khr_subgroup_ballot` - ** `cl_khr_subgroup_non_uniform_arithmetic` - ** `cl_khr_subgroup_shuffle` - ** `cl_khr_subgroup_shuffle_relative` - ** `cl_khr_subgroup_clustered_reduce` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` Changes from *v3.0.6*: @@ -482,11 +481,11 @@ Changes from *v3.0.6*: * Clarified the table structure in the backwards compatibility appendix. * Clarified that `-cl-unsafe-math-optimizations` also implies `-cl-denorms-are-zero`. * Added new extensions: - ** `cl_khr_extended_bit_ops` - ** `cl_khr_pci_bus_info` - ** `cl_khr_spirv_extended_debug_info` - ** `cl_khr_spirv_linkonce_odr` - ** `cl_khr_suggested_local_work_size` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` Changes from *v3.0.7*: @@ -494,7 +493,7 @@ Changes from *v3.0.7*: * Removed unnecessary phrase from sub-group mask function descriptions. * Added _input_slice_pitch_ error condition for read and write image APIs. * Added new extension: - ** `cl_khr_integer_dot_product` + ** `<>` Changes from *v3.0.8*: @@ -502,18 +501,18 @@ Changes from *v3.0.8*: * Clarified requirements for {CL_DEVICE_DOUBLE_FP_CONFIG} prior to OpenCL 2.0. * Clarified the behavior of ballot operations for remainder sub-groups. * Added new extensions: - ** `cl_khr_integer_dot_product` (version 2) - ** `cl_khr_semaphore` (provisional) - ** `cl_khr_external_semaphore` (provisional) - ** `cl_khr_external_semaphore_dx_fence` (provisional) - ** `cl_khr_external_semaphore_opaque_fd` (provisional) - ** `cl_khr_external_semaphore_sync_fd` (provisional) - ** `cl_khr_external_semaphore_win32` (provisional) - ** `cl_khr_external_memory` (provisional) - ** `cl_khr_external_memory_dma_buf` (provisional) - ** `cl_khr_external_memory_dx` (provisional) - ** `cl_khr_external_memory_opaque_fd` (provisional) - ** `cl_khr_external_memory_win32` (provisional) + ** `<>` (version 2) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) Changes from *v3.0.9*: @@ -522,10 +521,10 @@ Changes from *v3.0.9*: * Clarified that {clCompileProgram} is valid for programs created from SPIR. * Documented the possible state of a kernel object after a failed call to {clSetKernelArg}. * Added new extensions: - ** `cl_khr_async_copy_fence` (final) - ** `cl_khr_extended_async_copies` (final) - ** `cl_khr_expect_assume` - ** `cl_khr_command_buffer` (provisional) + ** `<>` (final) + ** `<>` (final) + ** `<>` + ** `<>` (provisional) Changes from *v3.0.10*: @@ -538,8 +537,8 @@ Changes from *v3.0.10*: * Clarified that the extended versioning extension is a core OpenCL 3.0 feature. * Clarified sub-group clustered reduction behavior when the cluster size is not an integer constant or a power of two. * Added new extensions: - ** `cl_khr_subgroup_rotate` - ** `cl_khr_work_group_uniform_arithmetic` + ** `<>` + ** `<>` Changes from *v3.0.11*: @@ -547,29 +546,29 @@ Changes from *v3.0.11*: * Added a maximum limit for the number of arguments supported by a kernel. * Clarified requirements for comparability and uniqueness of object handles. * Clarified behavior for invalid device-side enqueue `clk_event_t` handles. - * Clarified `cl_khr_command_buffer` interactions with other extensions. + * Clarified `<>` interactions with other extensions. * Specified error behavior when a command buffer is finalized multiple times. * Added new extension: - ** `cl_khr_command_buffer_mutable_dispatch` (provisional) + ** `<>` (provisional) Changes from *v3.0.12*: * Fixed the accuracy requirements description for half-precision math functions (those prefixed by `half_`). * Clarified that the semaphore type must always be provided when creating a semaphore. * Removed an unnecessary and contradictory error condition when creating a semaphore. - * Added an issue regarding non-linear image import to the `cl_khr_external_memory` extension. - * Added missing calls to {clBuildProgram} to the `cl_khr_command_buffer` and `cl_khr_command_buffer_mutable_dispatch` sample code. + * Added an issue regarding non-linear image import to the `<>` extension. + * Added missing calls to {clBuildProgram} to the `<>` and `<>` sample code. * Fixed a copy-paste error in the extensions quick reference appendix. * Fixed typos and improved formatting consistency in the extensions spec. Changes from *v3.0.13*: - * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `cl_khr_fp16`, see {khronos-opencl-pr}/893[#893]. - * Added a context query for command-buffers to `cl_khr_command_buffer`, see {khronos-opencl-pr}/899[#899]. - * Updated the semaphore wait and signal rules for binary semaphores in `cl_khr_semaphore`, see {khronos-opencl-pr}/882[#882]. - * Removed redundant error conditions from `cl_khr_external_semaphore` and `cl_khr_external_memory`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. + * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `<>`, see {khronos-opencl-pr}/893[#893]. + * Added a context query for command-buffers to `<>`, see {khronos-opencl-pr}/899[#899]. + * Updated the semaphore wait and signal rules for binary semaphores in `<>`, see {khronos-opencl-pr}/882[#882]. + * Removed redundant error conditions from `<>` and `<>`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. * Added new extension: - ** `cl_khr_command_buffer_multi_device` (provisional) + ** `<>` (provisional) Changes from *v3.0.14*: @@ -579,24 +578,23 @@ Changes from *v3.0.14*: * Clarified that {clSetCommandQueueProperty} is only required for OpenCL 1.0 devices and may return an error otherwise, see {khronos-opencl-pr}/980[#980]. * Clarified that the application must ensure the free function passed to {clEnqueueSVMFree} is thread safe, see {khronos-opencl-pr}/1016[#1016]. * Clarified that the application must ensure the user function passed to {clEnqueueNativeKernel} is thread safe, see {khronos-opencl-pr}/1026[#1026]. - * `cl_khr_command_buffers` (provisional): + * `<>` (provisional): ** Removed the "invalid" command buffer state, see {khronos-opencl-pr}/885[#885]. ** Added support for recording SVM memory copies and memory fills in a command buffer, see {khronos-opencl-pr}/915[#915]. - * `cl_khr_command_buffer_multi_device` (provisional): + * `<>` (provisional): ** Clarified that the sync devices query should only return root devices, see {khronos-opencl-pr}/925[#925]. - * `cl_khr_external_memory` (provisional): + * `<>` (provisional): ** Disallowed specifying a device handle list without also specifying an external memory handle, see {khronos-opencl-pr}/922[#922]. ** Added a query to determine the handle types an implementation will assume have a linear memory layout, see {khronos-opencl-pr}/940[#940]. ** Added an external memory-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. ** Clarified that implementations may acquire information about an image from an external memory handle when the image is created, see {khronos-opencl-pr}/970[#970]. - * `cl_khr_external_semaphore` (provisional): + * `<>` (provisional): ** Added the ability to re-import "sync fd" handles into an existing semaphore, see {khronos-opencl-pr}/939[#939]. - ** Clarified that a semaphore may only export one handle type, and that a semaphore created from an external handle cannot also export a handle, see {khronos-opencl-pr}/975[#975]. - ** Clarified that `cl_khr_external_semaphore` requires support for `cl_khr_semaphore`, see {khronos-opencl-pr}/976[#976]. + ** Clarified that a semaphore may only export one handle type, and that a semaphore created from an external handle cannot also export a handle, see {khronos-opencl-pr}/975[#975]. + ** Clarified that `<>` requires support for `<>`, see {khronos-opencl-pr}/976[#976]. ** Added a query to determine if a semaphore may export an external handle, see {khronos-opencl-pr}/997[#997]. - * `cl_khr_semaphore` (provisional): + * `<>` (provisional): ** Added an semaphore-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. ** Restricted semaphores to a single associated device, see {khronos-opencl-pr}/996[#996]. - * `cl_khr_subgroup_rotate`: + * `<>`: ** Clarified that only rotating within a subgroup is supported, see {khronos-opencl-pr}/967[#967]. - diff --git a/api/appendix_extensions.asciidoc b/api/appendix_extensions.asciidoc new file mode 100644 index 000000000..2e5ceccbb --- /dev/null +++ b/api/appendix_extensions.asciidoc @@ -0,0 +1,37 @@ +// Copyright 2023-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +[appendix] + +[[extensions]] += OpenCL Extensions (Informative) + +Extensions to the OpenCL API can be defined by authors, groups of authors, +and the Khronos OpenCL Working Group. +The online Registry of extensions is available at URL + +https://registry.khronos.org/OpenCL + +It is possible to generate versions of the API Specification incorporating +different extensions. +At present only a subset of defined extensions can be incorporated in this +fashion. + +The remainder of this appendix documents a set of extensions chosen when +this document was built. + +Extensions are grouped as Khronos `khr`, multivendor `ext`, and then +alphabetically by author ID. +Within each group, extensions are listed in alphabetical order by their +names. + +== Extension Dependencies + +Extensions which have dependencies on specific core versions or on other +extensions will list such dependencies. + +All extensions implicitly require support for OpenCL 1.0. + +include::{generated}/meta/current_extensions_appendix.txt[] +include::{generated}/meta/provisional_extensions_appendix.txt[] +include::{generated}/meta/deprecated_extensions_appendix.txt[] diff --git a/api/appendix_f.asciidoc b/api/appendix_f.asciidoc index 57b6c2435..f2bb39c62 100644 --- a/api/appendix_f.asciidoc +++ b/api/appendix_f.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[error_codes]] @@ -333,4 +332,11 @@ include::{generated}/api/version-notes/CL_MAX_SIZE_RESTRICTION_EXCEEDED.asciidoc include::{generated}/api/version-notes/CL_PROFILING_INFO_NOT_AVAILABLE.asciidoc[] | Returned by {clGetEventProfilingInfo} when the command associated with the specified event was not enqueued into a command-queue with {CL_QUEUE_PROFILING_ENABLE}. +ifdef::cl_khr_icd[] +| {CL_PLATFORM_NOT_FOUND_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_NOT_FOUND_KHR.asciidoc[] + | Returned by {clGetPlatformIDs} when no platforms are available. +endif::cl_khr_icd[] + |==== diff --git a/api/appendix_g.asciidoc b/api/appendix_g.asciidoc index fe0850ce4..1c4703727 100644 --- a/api/appendix_g.asciidoc +++ b/api/appendix_g.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2019-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[error_other_misc_enums]] diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index eda297c9a..9e9ec6ea8 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -1,6 +1,4 @@ -// Copyright 2020-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2020-2024 The Khronos Group Inc. [appendix] [[opencl-3.0-backwards-compatibility]] @@ -298,7 +296,7 @@ When read-write images are not supported: OpenCL C compilers supporting read-write images will define the feature macro `+__opencl_c_read_write_images+`. -== Creating 2D Images from Buffers +== Creating 2D Images From Buffers Creating a 2D image from a buffer is optional for devices supporting OpenCL 3.0. When creating a 2D image from a buffer is not supported: @@ -315,7 +313,7 @@ When creating a 2D image from a buffer is not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `cl_khr_image2d_from_buffer` extension if _device_ does not support creating a 2D image from a buffer. +| Will not describe support for the `<>` extension if _device_ does not support creating a 2D image from a buffer. | {clCreateImage} or + {clCreateImageWithProperties}, passing + @@ -426,7 +424,7 @@ When sub-groups are not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `cl_khr_subgroups` extension if _device_ does not support sub-groups. +| Will not describe support for the `<>` extension if _device_ does not support sub-groups. | {clGetKernelSubGroupInfo} | Returns {CL_INVALID_OPERATION} if _device_ does not support sub-groups. @@ -470,7 +468,7 @@ When writing to 3D image objects is not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `cl_khr_3d_image_writes` extension if _device_ does not support writing to 3D image objects. +| Will not describe support for the `<>` extension if _device_ does not support writing to 3D image objects. | {clGetSupportedImageFormats}, passing + {CL_MEM_OBJECT_IMAGE3D} and one of + @@ -546,7 +544,7 @@ OpenCL C compilers supporting the generic address space will define the feature // *** `get_local_linear_id` // ** `work_group_barrier` (as a synonym for `barrier`) -== Language Features that Were Already Optional +== Language Features That Were Already Optional Some OpenCL C language features were already optional before OpenCL 3.0, the API mechanisms for querying these have not changed. diff --git a/api/cl_khr_3d_image_writes.asciidoc b/api/cl_khr_3d_image_writes.asciidoc new file mode 100644 index 000000000..7c980404c --- /dev/null +++ b/api/cl_khr_3d_image_writes.asciidoc @@ -0,0 +1,26 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_3d_image_writes.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_3d_image_writes` adds built-in OpenCL C functions that allow a +kernel to write to 3D image objects in addition to 2D image objects. + +See the link:{OpenCLCSpecURL}#cl_khr_3d_image_writes[3D Image Writes] +section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_async_work_group_copy_fence.asciidoc b/api/cl_khr_async_work_group_copy_fence.asciidoc new file mode 100644 index 000000000..3ec9923c7 --- /dev/null +++ b/api/cl_khr_async_work_group_copy_fence.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_async_work_group_copy_fence.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-11-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_async_work_group_copy_fence` adds a new built-in OpenCL C function +to establish a memory synchronization ordering of asynchronous copies. + +See the link:{OpenCLCSpecURL}#cl_khr_async_work_group_copy_fence[Async +Work-group Copy Fence] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 0.9.0, 2020-04-21 + ** First assigned version (provisional). + * Revision 1.0.0, 2021-11-10 + ** First non-provisional version. diff --git a/api/cl_khr_byte_addressable_store.asciidoc b/api/cl_khr_byte_addressable_store.asciidoc new file mode 100644 index 000000000..56a32e6e2 --- /dev/null +++ b/api/cl_khr_byte_addressable_store.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_byte_addressable_store.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*Interactions and External Dependencies*:: + - Promoted to OpenCL 1.1 core +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_byte_addressable_store` relaxes restrictions on pointers to `char`, +`uchar`, `char2`, `uchar2`, `short`, `ushort` and `half` that were present +in _Section 6.8m: Restrictions_ of the OpenCL 1.0 specification. +With this extension, applications are able to read from and write to +pointers to these types. + +This extension became a core feature in OpenCL 1.1. + +See the link:{OpenCLCSpecURL}#cl_khr_byte_addressable_store[Byte-Addressable +Storage] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc new file mode 100644 index 000000000..6099d7729 --- /dev/null +++ b/api/cl_khr_command_buffer.asciidoc @@ -0,0 +1,436 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] + +// *Revision*:: +// 0.9.4 +// *Extension and Version Dependencies*:: +// This extension requires OpenCL 1.2 or later. +// Buffering of SVM commands requires OpenCL 2.0 or later. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-03-31 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ewan Crawford, Codeplay Software Ltd. + - Gordon Brown, Codeplay Software Ltd. + - Kenneth Benzie, Codeplay Software Ltd. + - Alastair Murray, Codeplay Software Ltd. + - Jack Frankland, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm Technologies Inc. + - Joshua Kelly, Qualcomm Technologies, Inc. + - Kevin Petit, Arm Ltd. + - Aharon Abramson, Intel. + - Ben Ashbaugh, Intel. + - Boaz Ouriel, Intel. + - Chris Gearing, Intel. + - Pekka Jääskeläinen, Tampere University and Intel + - Jan Solanti, Tampere University + - Nikhil Joshi, NVIDIA + - James Price, Google + - Brice Videau, Argonne National Laboratory + +=== Description + +`cl_khr_command_buffer` adds the ability to record and replay buffers of +OpenCL commands. + +Command-buffers enable a reduction in overhead when enqueuing the same +workload multiple times. By separating the command-queue setup from dispatch, +the ability to replay a set of previously created commands is introduced. + +Device-side _cl_sync_point_khr_ synchronization-points can be used within +command-buffers to define command dependencies. This allows the commands of a +command-buffer to execute out-of-order on a single <> +command-queue. The command-buffer itself has no inherent in-order/out-of-order +property, this ordering is inferred from the command-queue used on command +recording. Out-of-order enqueues without event dependencies of both regular +commands, such as {clEnqueueFillBuffer}, and command-buffers are allowed to +execute concurrently, and it is up to the user to express any dependencies using +events. + +The command-queues a command-buffer will be executed on can be set on replay via +parameters to {clEnqueueCommandBufferKHR}, provided they are +<> with the command-queues used on command-buffer +recording. + +==== Background + +On embedded devices where building a command stream accounts for a significant +expenditure of resources and where workloads are often required to be pipelined, +a solution that minimizes driver overhead can significantly improve the +utilization of accelerators by removing a bottleneck in repeated command stream +generation. + +An additional motivator is lowering task execution latency, as devices can be +kept occupied with work by repeated submissions, without having to wait on +the host to construct commands again for a similar workload. + +==== Rationale + +The command-buffer abstraction over the generation of command streams is a +proven approach which facilitates a significant reduction in driver overhead in +existing real-world applications with repetitive pipelined workloads which are +built on top of Vulkan, DirectX 12, and Metal. + +A primary goal is for a command-buffer to avoid any interaction with +application code after being enqueued until all recorded commands have +completed. As such, any command which maps or migrates memory objects; reads +or writes memory objects; or enqueues a native kernel, is not available for +command-buffer recording. Finally commands recorded into a command buffer do +not wait for or return event objects, these are instead replaced with +device-side synchronization-point identifiers which enable out-of-order +execution when enqueued on <> command-queues. + +Adding new entry-points for individual commands, rather than recording existing +command-queue APIs with begin/end markers was a design decision made for the +following reasons: + +* Individually specified entry points makes it clearer to the user what's + supported, as opposed to adding a large number of error conditions + throughout the specification with all the restrictions. + +* Prevents code forking in existing entry points for the implementer, as + otherwise separate paths in each entry point need to be maintained for both + the recording and normal cases. + +* Allows the definition of a new device-side synchronization primitive rather + than overloading {cl_event_TYPE}. As use of {cl_event_TYPE} in individual commands + allows host interaction from callback and user-events, as well as introducing + complexities when a command-buffer is enqueued multiple times regarding + profiling and execution status. + +* New entry points facilitate returning handles to individual commands, allowing + those commands to be modified between enqueues of the command buffer. Not all + command handles are used in this extension, but providing them facilitates + other extensions layered on top to take advantage of them to provide additional + mutable functionality. + +==== Simultaneous Use + +The optional simultaneous use capability was added to the extension so that +vendors can support pipelined workflows, where command-buffers are repeatedly +enqueued without blocking in user code. However, simultaneous use may result in +command-buffers being more expensive to enqueue than in a sequential model, so +the capability is optional to enable optimizations on command-buffer recording. + +=== Interactions With Other Extensions + +The introduction of the command-buffer abstraction enables functionality +beyond what the `cl_khr_command_buffer` extension currently provides, i.e. +the recording of immutable commands to a single queue which can then be +executed without commands synchronizing outside the command-buffer. It is +intended that extra functionality expanding on this will be provided as layered +extensions on top of `cl_khr_command_buffer`. + +Having `cl_khr_command_buffer` as a minimal base specification means that the +API defines mechanisms for functionality that is not enabled by this extension, +these are described in the following sub-sections. `cl_khr_command_buffer` will +retain its provisional extension status until other layered extensions are +released, as these may reveal modifications needed to the base specification to +support their intended use cases. + +==== ND-range Kernel Command Properties + +The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of +new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined +in `cl_khr_command_buffer`, but the parameter is intended to enable future +functionality that would change the characteristics of the kernel command. + +==== Command Handles + +All command recording entry-points define a {cl_mutable_command_khr_TYPE} output +parameter which provides a handle to the specific command being recorded. Use of +these output handles is not enabled by the `cl_khr_command_buffer` extension, +but the handles will allow individual commands in a command-buffer to be +referenced by the user. In particular, the capability for an application to use +these handles to modify commands between enqueues of a command-buffer is +envisaged. + +==== List of Queues + +Only a single command-queue can be associated with a command-buffer in the +`cl_khr_command_buffer` extension, but the API is designed with the intention +that a future extension will allow commands to be recorded across multiple +queues in the same command-buffer, providing replay of heterogeneous task +graphs. + +Using multiple queue functionality will result in an error without any layered +extensions to relax usage of the following API features: + +* When a command-buffer is created the API enables passing a list of queues + that the command-buffer will record commands to. Only a single queue is + permitted in `cl_khr_command_buffer`. + +* Individual command recording entry-points define a {cl_command_queue_TYPE} + parameter for which of the queues set on command-buffer creation that command + should be record to. This must be passed as NULL in `cl_khr_command_buffer`. + +* {clEnqueueCommandBufferKHR} takes a list of queues for command-buffer execution, + correspond to those set on creation. Only a single queue is permitted in + `cl_khr_command_buffer`. + +// The 'New ...' section can be auto-generated + +=== New Commands + + * {clCreateCommandBufferKHR} + * {clRetainCommandBufferKHR} + * {clReleaseCommandBufferKHR} + * {clFinalizeCommandBufferKHR} + * {clEnqueueCommandBufferKHR} + * {clCommandBarrierWithWaitListKHR} + * {clCommandCopyBufferKHR} + * {clCommandCopyBufferRectKHR} + * {clCommandCopyBufferToImageKHR} + * {clCommandCopyImageKHR} + * {clCommandCopyImageToBufferKHR} + * {clCommandFillBufferKHR} + * {clCommandFillImageKHR} + * {clCommandNDRangeKernelKHR} + * {clGetCommandBufferInfoKHR} + * The following SVM entry points are supported only with at least OpenCL 2.0, + and starting from version 0.9.4 of this extension + ** {clCommandSVMMemcpyKHR} + ** {clCommandSVMMemFillKHR} + +=== New Structures + + * {cl_command_buffer_khr} + * {cl_mutable_command_khr} + +=== New Types + + * {cl_device_command_buffer_capabilities_khr_TYPE} + * {cl_command_buffer_khr_TYPE} + * {cl_sync_point_khr_TYPE} + * {cl_command_buffer_info_khr_TYPE} + * {cl_command_buffer_state_khr_TYPE} + * {cl_command_buffer_properties_khr_TYPE} + * {cl_command_buffer_flags_khr_TYPE} + * {cl_ndrange_kernel_command_properties_khr_TYPE} + * {cl_mutable_command_khr_TYPE} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} + ** {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} + * {cl_device_command_buffer_capabilities_khr - bitfield_TYPE} + ** {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} + ** {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} + ** {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR} + ** {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} + * {cl_command_buffer_properties_khr_TYPE} + ** {CL_COMMAND_BUFFER_FLAGS_KHR} + * {cl_command_buffer_flags_khr - bitfield_TYPE} + ** {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} + * {Error codes_TYPE} + ** {CL_INVALID_COMMAND_BUFFER_KHR} + ** {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} + ** {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} + * {cl_command_buffer_info_khr_TYPE} + ** {CL_COMMAND_BUFFER_QUEUES_KHR} + ** {CL_COMMAND_BUFFER_NUM_QUEUES_KHR} + ** {CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR} + ** {CL_COMMAND_BUFFER_STATE_KHR} + ** {CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR} + ** {CL_COMMAND_BUFFER_CONTEXT_KHR} + * {cl_command_buffer_state_khr_TYPE} + ** {CL_COMMAND_BUFFER_STATE_RECORDING_KHR} + ** {CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR} + ** {CL_COMMAND_BUFFER_STATE_PENDING_KHR} + * {cl_command_type_TYPE} + ** {CL_COMMAND_COMMAND_BUFFER_KHR} + +=== Sample Code + +[source] +---- + #define CL_CHECK(ERROR) \ + if (ERROR) { \ + std::cerr << "OpenCL error: " << ERROR << "\n"; \ + return ERROR; \ + } + + int main() { + cl_platform_id platform; + CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); + cl_device_id device; + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); + + cl_int error; + cl_context context = + clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); + CL_CHECK(error); + + const char* code = R"OpenCLC( + kernel void vector_addition(global int* tile1, global int* tile2, + global int* res) { + size_t index = get_global_id(0); + res[index] = tile1[index] + tile2[index]; + } + )OpenCLC"; + const size_t length = std::strlen(code); + + cl_program program = + clCreateProgramWithSource(context, 1, &code, &length, &error); + CL_CHECK(error); + + CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); + + cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); + CL_CHECK(error); + + constexpr size_t frame_count = 60; + constexpr size_t frame_elements = 1024; + constexpr size_t frame_size = frame_elements * sizeof(cl_int); + + constexpr size_t tile_count = 16; + constexpr size_t tile_elements = frame_elements / tile_count; + constexpr size_t tile_size = tile_elements * sizeof(cl_int); + + cl_mem buffer_tile1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_tile2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_res = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, nullptr, &error); + CL_CHECK(error); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); + + cl_command_queue command_queue = + clCreateCommandQueue(context, device, + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + CL_CHECK(error); + + cl_command_buffer_khr command_buffer = + clCreateCommandBufferKHR(1, &command_queue, nullptr, &error); + CL_CHECK(error); + + cl_mem buffer_src1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_src2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_dst = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + + cl_sync_point_khr tile_sync_point = 0; + for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { + std::array copy_sync_points; + CL_CHECK(clCommandCopyBufferKHR(command_buffer, + command_queue, buffer_src1, buffer_tile1, tile_index * tile_size, 0, + tile_size, tile_sync_point ? 1 : 0, + tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[0]), + nullptr); + CL_CHECK(clCommandCopyBufferKHR(command_buffer, + command_queue, buffer_src2, buffer_tile2, tile_index * tile_size, 0, + tile_size, tile_sync_point ? 1 : 0, + tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[1]), + nullptr); + + cl_sync_point_khr nd_sync_point; + CL_CHECK(clCommandNDRangeKernelKHR(command_buffer, + command_queue, nullptr, kernel, 1, nullptr, &tile_elements, nullptr, + copy_sync_points.size(), copy_sync_points.data(), &nd_sync_point, + nullptr)); + + CL_CHECK(clCommandCopyBufferKHR(command_buffer, + command_queue, buffer_res, buffer_dst, 0, tile_index * tile_size, + tile_size, 1, &nd_sync_point, &tile_sync_point, nullptr)); + } + + CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); + + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + std::uniform_int_distribution random_distribution{ + 0, std::numeric_limits::max() / 2}; + auto random_generator = [&]() { return random_distribution(random_engine); }; + + for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { + std::array write_src_events; + std::vector src1(frame_elements); + std::generate(src1.begin(), src1.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src1, CL_FALSE, 0, + frame_size, src1.data(), 0, nullptr, + &write_src_events[0])); + std::vector src2(frame_elements); + std::generate(src2.begin(), src2.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src2, CL_FALSE, 0, + frame_size, src2.data(), 0, nullptr, + &write_src_events[1])); + + CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, + write_src_events.data(), nullptr)); + + CL_CHECK(clFinish(command_queue)); + + CL_CHECK(clReleaseEvent(write_src_event[0])); + CL_CHECK(clReleaseEvent(write_src_event[1])); + } + + CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); + CL_CHECK(clReleaseCommandQueue(command_queue)); + + CL_CHECK(clReleaseMemObject(buffer_src1)); + CL_CHECK(clReleaseMemObject(buffer_src2)); + CL_CHECK(clReleaseMemObject(buffer_dst)); + + CL_CHECK(clReleaseMemObject(buffer_tile1)); + CL_CHECK(clReleaseMemObject(buffer_tile2)); + CL_CHECK(clReleaseMemObject(buffer_res)); + + CL_CHECK(clReleaseKernel(kernel)); + CL_CHECK(clReleaseProgram(program)); + CL_CHECK(clReleaseContext(context)); + + return 0; + } +---- + +=== Issues + +. Introduce a `clCloneCommandBufferKHR` entry-point for cloning a + command-buffer. ++ +-- +*UNRESOLVED* +-- +. Enable detached command-buffer execution, where command-buffers are executed + on their own internal queue to prevent locking user created queues for the + duration of their execution. ++ +-- +*UNRESOLVED* +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-11-10 + ** First assigned version (provisional). + * 0.9.1, 2022-08-24 + ** Specify an error if a command-buffer is finalized multiple times + (provisional). + * 0.9.2, 2023-03-31 + ** Introduce context query {CL_COMMAND_BUFFER_CONTEXT_KHR} (provisional). + * 0.9.3, 2023-04-04 + ** Remove Invalid command-buffer state (provisional). + * 0.9.4, 2023-05-11 + ** Add clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR command entries + (provisional). + diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc new file mode 100644 index 000000000..9d3d87c34 --- /dev/null +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -0,0 +1,316 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_command_buffer_multi_device.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-04-30 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ewan Crawford, Codeplay Software Ltd. + - Gordon Brown, Codeplay Software Ltd. + - Kenneth Benzie, Codeplay Software Ltd. + - Alastair Murray, Codeplay Software Ltd. + - Jack Frankland, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm Technologies Inc. + - Joshua Kelly, Qualcomm Technologies, Inc. + - Kevin Petit, Arm Ltd. + - Aharon Abramson, Intel. + - Ben Ashbaugh, Intel. + - Boaz Ouriel, Intel. + - Pekka Jääskeläinen, Tampere University and Intel. + - Jan Solanti, Tampere University + - Nikhil Joshi, NVIDIA + - James Price, Google + +=== Description + +The `cl_khr_command_buffer` extension separates command construction from +enqueue by providing a mechanism to record a set of commands which can then +be repeatedly enqueued. +However, the commands in a command-buffer can only be recorded to a single +command-queue specified on command-buffer creation. + +`cl_khr_command_buffer_multi_device` extends the scope of a command-buffer +to allow commands to be recorded across multiple queues in the same +command-buffer, providing execution of heterogeneous task graphs from +command-queues associated with different devices. + +The ability for a user to deep copy an existing command-buffer so that the +commands target a different device is also made possible by +`cl_khr_command_buffer_multi_device`. +Depending on platform support the mapping of commands to the new target +device can be done either explicitly by the user, or automatically by the +OpenCL runtime. + +=== New Types + +Bitfield for querying command-buffer capabilities of an OpenCL Platform with +{clGetPlatformInfo}, see the <>: + + * {cl_platform_command_buffer_capabilities_khr_TYPE} + +=== New Commands + + * {clRemapCommandBufferKHR} + +=== New Enums + +Enums for querying device command-buffer capabilities with +{clGetDeviceInfo}, see the <>: + + * {cl_device_info_TYPE} + ** {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR} + ** {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} + * {cl_device_command_buffer_capabilities_khr_TYPE} + ** {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} + * {cl_command_buffer_flags_khr_TYPE} + ** {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR} + * {cl_platform_command_buffer_capabilities_khr_TYPE} + ** {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} + ** {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} + ** {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} + +=== Sample Code + +[source,opencl] +---- +#define CL_CHECK(ERROR) \ + if (ERROR) { \ + std::cerr << "OpenCL error: " << ERROR << "\n"; \ + return ERROR; \ + } + +int main() { + cl_platform_id platform; + CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); + cl_platform_command_buffer_capabilities_khr platform_caps; + CL_CHECK(clGetPlatformInfo(platform, + CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR, + sizeof(platform_caps), &platform_caps, NULL)); + if (!(platform_caps & CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR)) { + std::cerr << "Command-buffer remapping not supported but used in example, " + "skipping\n"; + return 0; + } + + cl_uint num_devices = 0; + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices)); + std::vector devices(num_devices); + CL_CHECK( + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, devices.data(), nullptr)); + + // Checks omitted for brevity that either a) the platform supports + // CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR or b) each device is listed + // in the others CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR + + cl_int error; + cl_context context = + clCreateContext(NULL, num_devices, devices.data(), NULL, NULL, &error); + CL_CHECK(error); + + std::vector queues(num_devices); + for (cl_uint i = 0; i < num_devices; i++) { + queues[i] = clCreateCommandQueue(context, devices[i], 0, &error); + CL_CHECK(error); + } + + const char *code = R"OpenCLC( + kernel void vector_addition(global int* tile1, global int* tile2, + global int* res) { + size_t index = get_global_id(0); + res[index] = tile1[index] + tile2[index]; + } + )OpenCLC"; + const size_t length = std::strlen(code); + + cl_program program = + clCreateProgramWithSource(context, 1, &code, &length, &error); + CL_CHECK(error); + + CL_CHECK( + clBuildProgram(program, num_devices, devices.data(), NULL, NULL, NULL)); + + cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); + CL_CHECK(error); + + constexpr size_t frame_count = 60; + constexpr size_t frame_elements = 1024; + constexpr size_t frame_size = frame_elements * sizeof(cl_int); + + constexpr size_t tile_count = 16; + constexpr size_t tile_elements = frame_elements / tile_count; + constexpr size_t tile_size = tile_elements * sizeof(cl_int); + + cl_mem buffer_tile1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_tile2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_res = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, NULL, &error); + CL_CHECK(error); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); + + cl_command_buffer_khr original_cmdbuf = + clCreateCommandBufferKHR(num_devices, queues.data(), nullptr, &error); + CL_CHECK(error); + + cl_mem buffer_src1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_src2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_dst = + clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size, NULL, &error); + CL_CHECK(error); + + cl_sync_point_khr tile_sync_point = 0; + for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { + cl_sync_point_khr copy_sync_points[2]; + CL_CHECK(clCommandCopyBufferKHR( + original_cmdbuf, queues[tile_index % num_devices], buffer_src1, + buffer_tile1, tile_index * tile_size, 0, tile_size, + tile_sync_point ? 1 : 0, tile_sync_point ? &tile_sync_point : NULL, + ©_sync_points[0], NULL)); + + CL_CHECK(clCommandCopyBufferKHR( + original_cmdbuf, queues[tile_index % num_devices], buffer_src2, + buffer_tile2, tile_index * tile_size, 0, tile_size, + tile_sync_point ? 1 : 0, + tile_sync_point ? &tile_sync_point : nullptr, + ©_sync_points[1], NULL)); + + cl_sync_point_khr nd_sync_point; + CL_CHECK(clCommandNDRangeKernelKHR( + original_cmdbuf, queues[tile_index % num_devices], NULL, kernel, 1, + NULL, &tile_elements, NULL, 2, copy_sync_points, &nd_sync_point, NULL)); + + CL_CHECK(clCommandCopyBufferKHR( + original_cmdbuf, queues[tile_index % num_devices], buffer_res, + buffer_dst, 0, tile_index * tile_size, tile_size, 1, &nd_sync_point, + &tile_sync_point, NULL)); + } + + CL_CHECK(clFinalizeCommandBufferKHR(original_cmdbuf)); + + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + std::uniform_int_distribution random_distribution{ + 0, std::numeric_limits::max() / 2}; + auto random_generator = [&]() { return random_distribution(random_engine); }; + + auto enqueue_frame = [&](cl_command_buffer_khr command_buffer) { + for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { + std::array enqueue_events; + std::vector src1(frame_elements); + std::generate(src1.begin(), src1.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src1, CL_FALSE, 0, + frame_size, src1.data(), 0, nullptr, + &enqueue_events[0])); + std::vector src2(frame_elements); + std::generate(src2.begin(), src2.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src2, CL_FALSE, 0, + frame_size, src2.data(), 0, nullptr, + &enqueue_events[1])); + + CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, + enqueue_events.data(), + &enqueue_events[2])); + + CL_CHECK(clWaitForEvents(1, enqueue_events[2])); + + for (auto e : enqueue_events) { + CL_CHECK(clReleaseEvent(e)); + } + } + return 0; + }; + + error = enqueue_frame(original_cmdbuf); + CL_CHECK(error); + + // Remap from N queues to 1 queue and run again + cl_command_buffer_khr remapped_cmdbuf = clRemapCommandBufferKHR( + original_cmdbuf, CL_TRUE, 1, queues.data(), 0, NULL, NULL, &error); + CL_CHECK(error); + + error = enqueue_frame(remapped_cmdbuf); + CL_CHECK(error); + + for (unsigned i = 0; i < num_devices; ++i) { + CL_CHECK(clReleaseCommandQueue(queues[i])); + } + CL_CHECK(clReleaseMemObject(buffer_src1)); + CL_CHECK(clReleaseMemObject(buffer_src2)); + CL_CHECK(clReleaseMemObject(buffer_dst)); + + CL_CHECK(clReleaseMemObject(buffer_tile1)); + CL_CHECK(clReleaseMemObject(buffer_tile2)); + CL_CHECK(clReleaseMemObject(buffer_res)); + + CL_CHECK(clReleaseCommandBufferKHR(original_cmdbuf)); + CL_CHECK(clReleaseCommandBufferKHR(remapped_cmdbuf)); + + CL_CHECK(clReleaseKernel(kernel)); + CL_CHECK(clReleaseProgram(program)); + CL_CHECK(clReleaseContext(context)); + + return 0; +} +---- + +=== Issues + +. In cl_event profiling info for a command-buffer running across the queues for + several devices, how do we know what the first & last commands executed + are if there is concurrent execution across devices. ++ +-- +*RESOLVED*: Allowed an implementation to fallback to +{CL_PROFILING_COMMAND_SUBMIT} and {CL_PROFILING_COMMAND_COMPLETE} when +reporting {CL_PROFILING_COMMAND_START} & {CL_PROFILING_COMMAND_END}. +-- +. Is an atomic constraint required? This would forbid regular clEnqueue* commands, + from interleaving execution on a queue which a command-buffer is being + executed on. ++ +-- +*RESOLVED*: This behavior can block parallelism, and constraint is +expressible by the user through existing synchronization mechanisms if they +require it. +-- +. It is currently an error if a set of command-queues passed to + {clEnqueueCommandBufferKHR} aren't compatible with those set on recording. + Should we relax this as an optional capability that allows an + implementation to do a more expensive command-buffer enqueue for this + case? ++ +-- +*RESOLVED*: Added as an optional feature. +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2023-04-14 + ** First assigned version (provisional). + * Revision 0.9.1, 2023-04-30 + ** Added clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR as affected + functions (provisional). diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc new file mode 100644 index 000000000..beda74ae0 --- /dev/null +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -0,0 +1,378 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-08-31 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ewan Crawford, Codeplay Software Ltd. + - Gordon Brown, Codeplay Software Ltd. + - Kenneth Benzie, Codeplay Software Ltd. + - Alastair Murray, Codeplay Software Ltd. + - Jack Frankland, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm Technologies Inc. + - Joshua Kelly, Qualcomm Technologies, Inc. + - Kevin Petit, Arm Ltd. + - Aharon Abramson, Intel. + - Ben Ashbaugh, Intel. + - Boaz Ouriel, Intel. + - Pekka Jääskeläinen, Tampere University + - Jan Solanti, Tampere University + - Nikhil Joshi, NVIDIA + - James Price, Google + +=== Description + +The `<>` extension separates command construction +from enqueue by providing a mechanism to record a set of commands which can +then be repeatedly enqueued. +However, the commands recorded to the command-buffer are immutable between +enqueues. + +`cl_khr_command_buffer_mutable_dispatch` removes this restriction. +In particular, this extension allows the configuration of a kernel execution +command in a command-buffer, called a _mutable-dispatch_, to be modified. +This allows inputs and outputs to the kernel, as well as work-item sizes and +offsets, to change without having to re-record the entire command sequence +in a new command-buffer. + +=== Interactions With Other Extensions + +The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this +extension for the purpose of allowing expansion of mutable functionality in +future extensions layered on top of +`cl_khr_command_buffer_mutable_dispatch`. +Any parameter that is a structure containing a `void* next` member *must* +have a value of `next` that is either `NULL`, or is a pointer to a valid +structure defined by `cl_khr_command_buffer_mutable_dispatch` or an +extension layered on top. +To be a valid structure in the pointer chain the first member of the +structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier +for the structure being iterated through, and the second member a `void* +next` pointer to the next structure in the chain. + +[NOTE] +==== +This approach is based on structure pointer chains in Vulkan, for more +details see the "`Valid Usage for Structure Pointer Chains`" section of the +Vulkan specification. +==== + +This is designed so that another extension layered on +`cl_khr_command_buffer_mutable_dispatch` could allow modification of +commands recorded to a command-buffer other than kernel execution commands. +As all command recording entry-points return a {cl_mutable_command_khr_TYPE} +handle, and aspects like which {cl_mem_TYPE} object a command uses could +also be updated between enqueues of the command-buffer. + +=== New Types + + * {cl_mutable_dispatch_fields_khr_TYPE} + * {cl_mutable_command_info_khr_TYPE} + * {cl_command_buffer_structure_type_khr_TYPE} + * {cl_mutable_base_config_khr_TYPE} + * {cl_mutable_dispatch_asserts_khr_TYPE} + * {cl_mutable_dispatch_config_khr_TYPE} + * {cl_mutable_dispatch_exec_info_khr_TYPE} + * {cl_mutable_dispatch_arg_khr_TYPE} + +=== New Commands + + * {clUpdateMutableCommandsKHR} + * {clGetMutableCommandInfoKHR} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} + * {cl_ndrange_kernel_command_properties_khr_TYPE} + ** {CL_MUTABLE_DISPATCH_ASSERTS_KHR} + ** {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} + * {cl_mutable_dispatch_asserts_khr_TYPE} + ** {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} + * {cl_mutable_dispatch_fields_khr_TYPE} + ** {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} + ** {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} + ** {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} + ** {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} + ** {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} + * {cl_mutable_command_info_khr_TYPE} + ** {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR} + ** {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR} + ** {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR} + ** {CL_MUTABLE_DISPATCH_KERNEL_KHR} + ** {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR} + ** {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR} + ** {CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR} + ** {CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR} + ** {CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR} + * {cl_command_buffer_flags_khr_TYPE} + ** {CL_COMMAND_BUFFER_MUTABLE_KHR} + * {cl_command_buffer_properties_khr_TYPE} + ** {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} + * {cl_command_buffer_structure_type_khr_TYPE} + ** {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + ** {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR} + * New <> + ** {CL_INVALID_MUTABLE_COMMAND_KHR} + +=== Sample Code + +==== Sample Application Updating the Arguments to a Mutable-dispatch Between Command-buffer Submissions + +[source,opencl] +---- +#define CL_CHECK(ERROR) \ + if (ERROR) { \ + std::cerr << "OpenCL error: " << ERROR << "\n"; \ + return ERROR; \ + } + +int main() { + cl_platform_id platform; + CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); + cl_device_id device; + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, + nullptr)); + if (!(mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR)) { + std::cerr + << "Device does not support update arguments to a mutable-dispatch, " + "skipping example.\n"; + return 0; + } + + cl_int error; + cl_context context = + clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); + CL_CHECK(error); + + const char* code = R"OpenCLC( +kernel void vector_addition(global int* tile1, global int* tile2, + global int* res) { + size_t index = get_global_id(0); + res[index] = tile1[index] + tile2[index]; +} +)OpenCLC"; + const size_t length = std::strlen(code); + + cl_program program = + clCreateProgramWithSource(context, 1, &code, &length, &error); + CL_CHECK(error); + + CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); + + cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); + CL_CHECK(error); + + // Set the parameters of the frames + constexpr size_t iterations = 60; + constexpr size_t elem_size = sizeof(cl_int); + constexpr size_t frame_width = 32; + constexpr size_t frame_count = frame_width * frame_width; + constexpr size_t frame_size = frame_count * elem_size; + + cl_mem input_A_buffers[2] = {nullptr, nullptr}; + cl_mem input_B_buffers[2] = {nullptr, nullptr}; + cl_mem output_buffers[2] = {nullptr, nullptr}; + + // Create the buffer to swap between even and odd kernel iterations + for (size_t i = 0; i < 2; i++) { + input_A_buffers[i] = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + + input_B_buffers[i] = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + + output_buffers[i] = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + } + + cl_command_queue command_queue = + clCreateCommandQueue(context, device, 0, &error); + CL_CHECK(error); + + // Create command-buffer with mutable flag so we can update it + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0}; + cl_command_buffer_khr command_buffer = + clCreateCommandBufferKHR(1, &command_queue, properties, &error); + CL_CHECK(error); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_A_buffers[0])); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_B_buffers[0])); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_buffers[0])); + + // Instruct the nd-range command to allow for mutable kernel arguments + cl_ndrange_kernel_command_properties_khr mutable_properties[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0}; + + // Create command handle for mutating nd-range command + cl_mutable_command_khr command_handle = nullptr; + + // Add the nd-range kernel command + error = clCommandNDRangeKernelKHR( + command_buffer, command_queue, mutable_properties, kernel, 1, nullptr, + &frame_count, nullptr, 0, nullptr, nullptr, &command_handle); + CL_CHECK(error); + + CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); + + // Prepare for random input generation + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + std::uniform_int_distribution random_distribution{ + std::numeric_limits::min() / 2, + std::numeric_limits::max() / 2}; + + // Iterate over each frame + for (size_t i = 0; i < iterations; i++) { + // Set the buffers for the current frame + cl_mem input_A_buffer = input_A_buffers[i % 2]; + cl_mem input_B_buffer = input_B_buffers[i % 2]; + cl_mem output_buffer = output_buffers[i % 2]; + + // Generate input A data + std::vector input_a(frame_count); + std::generate(std::begin(input_a), std::end(input_a), + [&]() { return random_distribution(random_engine); }); + + // Write the generated data to the input A buffer + error = + clEnqueueWriteBuffer(command_queue, input_A_buffer, CL_FALSE, 0, + frame_size, input_a.data(), 0, nullptr, nullptr); + CL_CHECK(error); + + // Generate input B data + std::vector input_b(frame_count); + std::generate(std::begin(input_b), std::end(input_b), + [&]() { return random_distribution(random_engine); }); + + // Write the generated data to the input B buffer + error = + clEnqueueWriteBuffer(command_queue, input_B_buffer, CL_FALSE, 0, + frame_size, input_b.data(), 0, nullptr, nullptr); + CL_CHECK(error); + + // If not executing the first frame + if (i != 0) { + // Configure the mutable configuration to update the kernel arguments + cl_mutable_dispatch_arg_khr arg_0{0, sizeof(cl_mem), &input_A_buffer}; + cl_mutable_dispatch_arg_khr arg_1{1, sizeof(cl_mem), &input_B_buffer}; + cl_mutable_dispatch_arg_khr arg_2{2, sizeof(cl_mem), &output_buffer}; + cl_mutable_dispatch_arg_khr args[] = {arg_0, arg_1, arg_2}; + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command_handle, + 3 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */}; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config}; + + // Update the command buffer with the mutable configuration + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + CL_CHECK(error); + } + + // Enqueue the command buffer + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr, + nullptr); + CL_CHECK(error); + + // Allocate memory for the output data + std::vector output(frame_count); + + // Read the output data from the output buffer + error = clEnqueueReadBuffer(command_queue, output_buffer, CL_TRUE, 0, + frame_size, output.data(), 0, nullptr, nullptr); + CL_CHECK(error); + + // Flush and execute the read buffer + error = clFinish(command_queue); + CL_CHECK(error); + + // Verify the results of the frame + for (size_t i = 0; i < frame_count; ++i) { + const cl_int result = input_a[i] + input_b[i]; + if (output[i] != result) { + std::cerr << "Error: Incorrect result at index " << i << " - Expected " + << output[i] << " was " << result << std::endl; + std::exit(1); + } + } + } + + std::cout << "Result verified\n"; + + CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); + for (size_t i = 0; i < 2; i++) { + CL_CHECK(clReleaseMemObject(input_A_buffers[i])); + CL_CHECK(clReleaseMemObject(input_B_buffers[i])); + CL_CHECK(clReleaseMemObject(output_buffers[i])); + } + CL_CHECK(clReleaseCommandQueue(command_queue)); + CL_CHECK(clReleaseKernel(kernel)); + CL_CHECK(clReleaseProgram(program)); + CL_CHECK(clReleaseContext(context)); + CL_CHECK(clReleaseDevice(device)); + return 0; +} +---- + +=== Issues + +. Include simpler, more user friendly, entry-points for updating kernel + arguments? ++ +-- +*RESOLVED*: Can be implemented in the ecosystem as a layer on top, if that +layer proves popular then can be introduced, possibly as another extension +on top. +-- + +. Add a command-buffer clone entry-point for deep copying a command-buffer? + Arguments could then be updated and both command-buffers used. + Useful for techniques like double buffering. ++ +-- +*RESOLVED*: In the use-case we're targeting a user would only have a handle +to the original command-buffer, but not the clone, which may limit the +usefulness of this capability. +Additionally, an implementation could be complicated by non-trivial deep +copying of the underlying objects contained in the command-buffer. +As a result of this new entry-point being an additive change to the +specification it is omitted, and if its functionality has demand later, it +may be a introduced as a stand alone extension. +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2022-08-31 + ** First assigned version (provisional). + * Revision 0.9.1, 2023-11-07 + ** Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values + (provisional). diff --git a/api/cl_khr_create_command_queue.asciidoc b/api/cl_khr_create_command_queue.asciidoc new file mode 100644 index 000000000..fe6bf0c08 --- /dev/null +++ b/api/cl_khr_create_command_queue.asciidoc @@ -0,0 +1,58 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_create_command_queue.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_create_command_queue` allows OpenCL 1.x devices to support an +equivalent of the {clCreateCommandQueueWithProperties} API that was added in +OpenCL 2.0. +This allows OpenCL 1.x devices to support other optional extensions or +features that use the {clCreateCommandQueueWithProperties} API to specify +additional command-queue properties that cannot be specified using the +OpenCL 1.x {clCreateCommandQueue} API. + +No new command-queue properties are required by this extension. +Applications may use the existing {CL_DEVICE_QUEUE_PROPERTIES} query to +determine command-queue properties that are supported by the device. + +OpenCL 2.x devices may support this extension for compatibility. +In this scenario, the function added by this extension will have the same +capabilities as the core {clCreateCommandQueueWithProperties} API. +Applications that only target OpenCL 2.x devices should use the core OpenCL +2.x {clCreateCommandQueueWithProperties} API instead of this extension API. + +NOTE: The type of the property value passed as {CL_QUEUE_PROPERTIES} to +{clCreateCommandQueueWithPropertiesKHR} is specified as +{cl_bitfield_TYPE} while the type passed to +{clCreateCommandQueueWithProperties} is +{cl_command_queue_properties_TYPE}. +While this is not a promotion in terms of the suffixing, both types +are aliased to {cl_ulong_TYPE}, so no ABI or compiler issues should +result if the extension and core APIs are supported. + + +=== New Commands + + * {clCreateCommandQueueWithPropertiesKHR} + +=== New Types + + * {cl_queue_properties_khr_TYPE} + +//@ TODO Missing bitfield values allowed for CL_QUEUE_PROPERTIES? + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc new file mode 100644 index 000000000..ace32dbe9 --- /dev/null +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -0,0 +1,136 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_d3d10_sharing` provides interoperability between OpenCL and Direct3D 10. + +=== New Types + + * {cl_d3d10_device_source_khr_TYPE} + * {cl_d3d10_device_set_khr_TYPE} + +=== New Commands + + * {clGetDeviceIDsFromD3D10KHR} + * {clCreateFromD3D10BufferKHR} + * {clCreateFromD3D10Texture2DKHR} + * {clCreateFromD3D10Texture3DKHR} + * {clEnqueueAcquireD3D10ObjectsKHR} + * {clEnqueueReleaseD3D10ObjectsKHR} + +=== New Tokens + + * {cl_d3d10_device_source_khr_TYPE} + ** {CL_D3D10_DEVICE_KHR} + ** {CL_D3D10_DXGI_ADAPTER_KHR} + * {cl_d3d10_device_set_khr_TYPE} + ** {CL_PREFERRED_DEVICES_FOR_D3D10_KHR} + ** {CL_ALL_DEVICES_FOR_D3D10_KHR} + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_D3D10_DEVICE_KHR} + * {cl_context_info_TYPE} + ** {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} + * {cl_mem_info_TYPE} + ** {CL_MEM_D3D10_RESOURCE_KHR} + * {cl_image_info_TYPE} + ** {CL_IMAGE_D3D10_SUBRESOURCE_KHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR} + * New Error Codes + ** {CL_INVALID_D3D10_DEVICE_KHR} + ** {CL_INVALID_D3D10_RESOURCE_KHR} + ** {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR} + ** {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR} + + +=== Issues + + . Should this extension be KHR or EXT? ++ +-- +*PROPOSED*: KHR. +If this extension is to be approved by Khronos then it should be KHR, +otherwise EXT. +Not all platforms can support this extension, but that is also true of +OpenGL interop. + +*RESOLVED*: KHR. +-- + + . Requiring SharedHandle on ID3D10Resource ++ +-- +Requiring this can largely simplify things at the DDI level and make some +implementations faster. +However, the DirectX spec only defines the shared handle for a subset of the +resources we would like to support: + + * `D3D10_RESOURCE_MISC_SHARED` - Enables the sharing of resource data + between two or more Direct3D devices. + The only resources that can be shared are 2D non-mipmapped textures. + +*PROPOSED*: A: Add wording to the spec about some implementations needing +the resource setup as shared: + +Some implementations may require the resource to be shared on the D3D10 side +of the API. + +If we do that, do we need another enum to describe this failure case? + +*PROPOSED*: B: Require that all implementations support both shared and +non-shared resources. +The restrictions prohibiting multisample textures and the flag +D3D10_USAGE_IMMUTABLE guarantee software access to all shareable resources. + +*RESOLVED*: Require that implementations support both +D3D10_RESOURCE_MISC_SHARED being set and not set. +Add the query for {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} to +determine on a per-context basis which method will be faster. +-- + + . Texture1D support ++ +-- +There is not a matching CL type, so do we want to support this and map to +buffer or Texture2D? + +*RESOLVED*: We will not add support for ID3D10Texture1D objects unless a +corresponding OpenCL 1D Image type is created. +-- + + . CL/D3D10 queries ++ +-- +The GL interop has {clGetGLObjectInfo} and {clGetGLTextureInfo}. +It is unclear if these are needed on the D3D10 interop side since the D3D10 +spec makes these queries trivial on the D3D10 object itself. +Also, not all of the semantics of the GL call map across. + +*PROPOSED*: Add the {clGetMemObjectInfo} and {clGetImageInfo} parameter +names {CL_MEM_D3D10_RESOURCE_KHR} and {CL_IMAGE_D3D10_SUBRESOURCE_KHR} to +query the D3D10 resource from which a {cl_mem_TYPE} was created. +From this data, any D3D10 side information may be queried using the D3D10 +API. + +*RESOLVED*: We will use {clGetMemObjectInfo} and {clGetImageInfo} to access +this information. +-- + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc new file mode 100644 index 000000000..884044eda --- /dev/null +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -0,0 +1,56 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_d3d11_sharing` provides interoperability between OpenCL and Direct3D 11. + +=== New Commands + + * {clGetDeviceIDsFromD3D11KHR} + * {clCreateFromD3D11BufferKHR} + * {clCreateFromD3D11Texture2DKHR} + * {clCreateFromD3D11Texture3DKHR} + * {clEnqueueAcquireD3D11ObjectsKHR} + * {clEnqueueReleaseD3D11ObjectsKHR} + +=== New Tokens + + * {cl_d3d11_device_source_khr_TYPE} + ** {CL_D3D11_DEVICE_KHR} + ** {CL_D3D11_DXGI_ADAPTER_KHR} + * {cl_d3d11_device_set_khr_TYPE} + ** {CL_PREFERRED_DEVICES_FOR_D3D11_KHR} + ** {CL_ALL_DEVICES_FOR_D3D11_KHR} + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_D3D11_DEVICE_KHR} + * {cl_context_info_TYPE} + ** {CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR} + * {cl_mem_info_TYPE} + ** {CL_MEM_D3D11_RESOURCE_KHR} + * {cl_image_info_TYPE} + ** {CL_IMAGE_D3D11_SUBRESOURCE_KHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR} + * New Error Codes + ** {CL_INVALID_D3D11_DEVICE_KHR} + ** {CL_INVALID_D3D11_RESOURCE_KHR} + ** {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR} + ** {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_depth_images.asciidoc b/api/cl_khr_depth_images.asciidoc new file mode 100644 index 000000000..73469eecc --- /dev/null +++ b/api/cl_khr_depth_images.asciidoc @@ -0,0 +1,25 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_depth_images.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_depth_images` adds OpenCL C support for depth images. + +See the link:{OpenCLCSpecURL}#cl_khr_depth_images[Depth Images] section of +the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_device_enqueue_local_arg_types.asciidoc b/api/cl_khr_device_enqueue_local_arg_types.asciidoc new file mode 100644 index 000000000..ee3acb41b --- /dev/null +++ b/api/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_device_enqueue_local_arg_types.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_device_enqueue_local_arg_types` allows arguments to blocks that are +passed to the *enqueue_kernel* built-in OpenCL C function to be pointers to +any type (built-in or user-defined) in local memory, instead of requiring +arguments to blocks to be pointers to void in local memory. + +See the link:{OpenCLCSpecURL}#cl_khr_device_enqueue_local_arg_types[Device +Enqueue Local Argument Types] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_device_uuid.asciidoc b/api/cl_khr_device_uuid.asciidoc new file mode 100644 index 000000000..023b34dfd --- /dev/null +++ b/api/cl_khr_device_uuid.asciidoc @@ -0,0 +1,40 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_device_uuid.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + DateTBD +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_device_uuid` adds the ability to query a universally unique +identifier (UUID) for an OpenCL driver and OpenCL device. +The UUIDs returned by the query may be used to identify drivers and devices +across processes or APIs. + +=== New Enums + +Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: + + * {cl_device_info} + ** {CL_DEVICE_UUID_KHR} + ** {CL_DRIVER_UUID_KHR} + ** {CL_DEVICE_LUID_VALID_KHR} + ** {CL_DEVICE_LUID_KHR} + ** {CL_DEVICE_NODE_MASK_KHR} + * Constants describing the size of the driver and device UUIDs, and the + device LUID: + ** {CL_UUID_SIZE_KHR} + ** {CL_LUID_SIZE_KHR} + +=== Version History + + * Revision 1.0.0, 2020-08-27 + ** First assigned version. diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc new file mode 100644 index 000000000..92e8ed517 --- /dev/null +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -0,0 +1,65 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_dx9_media_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_dx9_media_sharing` allows applications to use media surfaces as +OpenCL memory objects. +This allows efficient sharing of data between OpenCL and selected adapter +APIs (only DX9 for now). +If this extension is supported, an OpenCL image object can be created from a +media surface and the OpenCL API can be used to execute kernels that read +and/or write memory objects that are media surfaces. +Note that OpenCL memory objects may be created from the adapter media +surface if and only if the OpenCL context has been created from that +adapter. + +=== New Commands + + * {clGetDeviceIDsFromDX9MediaAdapterKHR} + * {clCreateFromDX9MediaSurfaceKHR} + * {clEnqueueAcquireDX9MediaSurfacesKHR} + * {clEnqueueReleaseDX9MediaSurfacesKHR} + +=== New Tokens + + * {cl_dx9_media_adapter_type_khr_TYPE} + ** {CL_ADAPTER_D3D9_KHR} + ** {CL_ADAPTER_D3D9EX_KHR} + ** {CL_ADAPTER_DXVA_KHR} + * {cl_dx9_media_adapter_set_khr_TYPE} + ** {CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} + ** {CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} + * {cl_context_info_TYPE} + ** {CL_CONTEXT_ADAPTER_D3D9_KHR} + ** {CL_CONTEXT_ADAPTER_D3D9EX_KHR} + ** {CL_CONTEXT_ADAPTER_DXVA_KHR} + * {cl_mem_info_TYPE} + ** {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR} + ** {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} + * {cl_image_info_TYPE} + ** {CL_IMAGE_DX9_MEDIA_PLANE_KHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR} + ** {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR} + * New Error Codes + ** {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} + ** {CL_INVALID_DX9_MEDIA_SURFACE_KHR} + ** {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR} + ** {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc new file mode 100644 index 000000000..86b5fbb32 --- /dev/null +++ b/api/cl_khr_egl_event.asciidoc @@ -0,0 +1,72 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_egl_event.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_egl_event` allows creating OpenCL event objects linked to EGL fence +sync objects, potentially improving efficiency of sharing images and buffers +between the two APIs. +The companion `EGL_KHR_cl_event` extension provides the complementary +functionality of creating an EGL sync object from an OpenCL event object. + +=== New Commands + + * {clCreateEventFromEGLSyncKHR} + +=== New Tokens + + * New Error Codes + ** {CL_INVALID_EGL_OBJECT_KHR} + ** {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR} + +=== Issues + +Most issues are shared with `<>` and are resolved as +described in that extension. + + . Should we support implicit synchronization? ++ +-- +*RESOLVED*: No, as this may be very difficult since the synchronization +would not be with EGL, it would be with currently bound EGL client APIs. +It would be necessary to know which client APIs might be bound, to validate +that they're associated with the `EGLDisplay` associated with the OpenCL +context, and to reach into each such context. +-- + + . Do we need to have typedefs to use EGL handles in OpenCL? ++ +-- +*RESOLVED* Using typedefs for EGL handles. +-- + + . Should we restrict which CL APIs can be used with this cl_event? ++ +-- +*RESOLVED* Use is limited to {clEnqueueAcquire}*** calls only. +-- + + . What is the desired behaviour for this extension when EGLSyncKHR is of a + type other than `EGL_SYNC_FENCE_KHR`? ++ +-- +*RESOLVED* This extension only requires support for `EGL_SYNC_FENCE_KHR`. +Support of other types is an implementation choice, and will result in +CL_INVALID_EGL_OBJECT_KHR if unsupported. +-- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc new file mode 100644 index 000000000..d324637ad --- /dev/null +++ b/api/cl_khr_egl_image.asciidoc @@ -0,0 +1,103 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_egl_image` provides a mechanism to creating OpenCL memory objects +from from EGLImages. + +=== New Commands + + * {clCreateFromEGLImageKHR} + * {clEnqueueAcquireEGLObjectsKHR} + * {clEnqueueReleaseEGLObjectsKHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR} + * New Error Codes + ** {CL_EGL_RESOURCE_NOT_ACQUIRED_KHR} + ** {CL_INVALID_EGL_OBJECT_KHR} + +=== Issues + + . This extension does not support reference counting of the images, so the + onus is on the application to behave sensibly and not release the + underlying {cl_mem_TYPE} object while the `EGLImage` is still being + used. + . In order to ensure data integrity, the application is responsible for + synchronizing access to shared CL/EGL image objects by their respective + APIs. + Failure to provide such synchronization may result in race conditions + and other undefined behavior. + This may be accomplished by calling {clWaitForEvents} with the event + objects returned by any OpenCL commands which use the shared image + object or by calling {clFinish}. + . Currently {CL_MEM_READ_ONLY} is the only supported flag for _flags_. ++ +-- +*RESOLVED*: Implementation will now return an error if writing to a shared +object that is not supported rather than disallowing it entirely. +-- + . Currently restricted to 2D image objects. + . What should happen for YUV color-space conversion, multi plane images, + and chroma-siting, and channel mapping? ++ +-- +*RESOLVED*: YUV is no longer explicitly described in this extension. +Before this removal the behavior was dependent on the platform. +This extension explicitly leaves the YUV layout to the platform and `EGLImage` +source extension (i.e. is implementation specific). +Colorspace conversion must be applied by the application using a color +conversion matrix. + +The expected extension path if YUV color-space conversion is to be supported +is to introduce a YUV image type and provide overloaded versions of the +read_image built-in functions. + +Getting image information for a YUV image should return the original image +size (non quantized size) when all of Y U and V are present in the image. +If the planes have been separated then the actual dimensionality of the +separated plane should be reported. +For example with YUV 4:2:0 (NV12) with a YUV image of 256x256, the Y only +image would return 256x256 whereas the UV only image would return 128x128. +-- + . Should an attribute list be used instead? ++ +-- +*RESOLVED*: function has been changed to use an attribute list. +-- + . What should happen for `EGLImage` extensions which introduce formats + without a mapping to an OpenCL image channel data type or channel order? ++ +-- +*RESOLVED*: This extension does not define those formats. +It is expected that as additional EGL extensions are added to create EGL +images from other sources, an extension to CL will be introduced where +needed to represent those image types. +-- + . What are the guarantees to synchronization behavior provided by the + implementation? ++ +-- +The basic portable form of synchronization is to use a {clFinish}, as is the +case for GL interop. +In addition implementations which support the synchronization extensions +`<>` and `EGL_KHR_cl_event` can interoperate more +efficiently as described in those extensions. +-- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_expect_assume.asciidoc b/api/cl_khr_expect_assume.asciidoc new file mode 100644 index 000000000..c5559a134 --- /dev/null +++ b/api/cl_khr_expect_assume.asciidoc @@ -0,0 +1,75 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_expect_assume.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-11-10 +*Interactions and External Dependencies*:: + The initial version of this extension extends the OpenCL SPIR-V + environment to support new instructions. + Please refer to the OpenCL SPIR-V Environment Specification that + describes how this extension modifies the OpenCL SPIR-V environment. +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_expect_assume` adds mechanisms to provide information to the +compiler that may improve the performance of some kernels. +Specifically, this extension adds the ability to: + + * Tell the compiler the _expected_ value of a variable. + * Allow the compiler to _assume_ a condition is true. + +These functions are not required for functional correctness. + +The initial version of this extension extends the OpenCL SPIR-V environment +to support new instructions for offline compilation tool chains. +Similar functionality may be provided by some OpenCL C online compilation +tool chains, but formal support in OpenCL C is not required by the initial +version of the extension. + +=== Sample Code + +Although this extension does not formally extend OpenCL C, the ability to +provide _expect_ and _assume_ information is supported by many OpenCL C +compiler tool chains. +The sample code below describes how to test for and provide _expect_ and +_assume_ information to compilers based on Clang: + +[source,opencl_c] +---- +// __has_builtin is an optional compiler feature that is supported by Clang. +// If this feature is not supported, we will assume the builtin is not present. +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +kernel void test(global int* dst, global int* src) +{ + int value = src[get_global_id(0)]; + + // Tell the compiler that the most likely source value is zero. +#if __has_builtin(__builtin_expect) + value = __builtin_expect(value, 0); +#endif + + // Tell the compiler that the source value is non-negative. + // Behavior is undefined if the source value is actually negative. +#if __has_builtin(__builtin_assume) + __builtin_assume(value >= 0); +#endif + + dst[get_global_id(0)] = value % 4; +} +---- + +=== Version History + + * Revision 1.0.0, 2021-11-10 + ** First assigned version. diff --git a/api/cl_khr_extended_async_copies.asciidoc b/api/cl_khr_extended_async_copies.asciidoc new file mode 100644 index 000000000..0fac1890f --- /dev/null +++ b/api/cl_khr_extended_async_copies.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_extended_async_copies.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-11-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_extended_async_copies` augments built-in OpenCL C asynchronous copy +functions to support more patterns: + + . For async copy between 2D source and 2D destination. + . For async copy between 3D source and 3D destination. + +See the link:{OpenCLCSpecURL}#cl_khr_extended_async_copies[Extended Async +Copy Functions] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 0.9.0, 2020-04-21 + ** First assigned version (provisional). + * Revision 0.9.1, 2021-09-06 + ** Elements-based proposal update. + * Revision 1.0.0, 2021-11-10 + ** First non-provisional version. diff --git a/api/cl_khr_extended_bit_ops.asciidoc b/api/cl_khr_extended_bit_ops.asciidoc new file mode 100644 index 000000000..b516f1a23 --- /dev/null +++ b/api/cl_khr_extended_bit_ops.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_extended_bit_ops.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-04-22 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_extended_bit_ops` adds built-in OpenCL C functions for performing +extended bit operations. +Specifically, the following functions are added: + + * bitfield insert: insert bits from one source operand into another source + operand. + * bitfield extract: extract bits from a source operand, with sign- or + zero-extension. + * bit reverse: reverse the bits of a source operand. + +See the link:{OpenCLCSpecURL}#cl_khr_extended_bit_ops[Extended Bit +Operations] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2021-04-22 + ** Initial version. diff --git a/api/cl_khr_extended_versioning.asciidoc b/api/cl_khr_extended_versioning.asciidoc new file mode 100644 index 000000000..4cf053be1 --- /dev/null +++ b/api/cl_khr_extended_versioning.asciidoc @@ -0,0 +1,158 @@ +// Copyright 2019-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_extended_versioning.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-02-12 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + - Ben Ashbaugh, Intel + - Alastair Murray, Codeplay Software Ltd. + - Einar Hov, Arm Ltd. + +=== Description + +The `cl_khr_extended_versioning` extension introduces new platform and +device queries that return detailed version information to applications. +It makes it possible to return the exact revision of the specification or +intermediate languages supported by an implementation. +It also enables implementations to communicate a version number for each of +the extensions they support and remove the requirement for applications to +process strings to test for the presence of an extension or intermediate +language or built-in kernel. + +Extended versioning was promoted to a core feature in OpenCL 3.0. +However, the query for {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} was replaced +by the query for {CL_DEVICE_OPENCL_C_ALL_VERSIONS}. +With the exception of this query, all types, structures, enums, and macro +names defined by this extension are equivalent to the corresponding core +name (with the `_KHR` or `_khr` suffix removed). + +The version number encoding scheme is described in the <> section. + +=== New Types + + * {cl_version_khr_TYPE} + +=== New Structures + + * {cl_name_version_khr_TYPE} + * {CL_NAME_VERSION_MAX_NAME_SIZE_KHR_anchor} + +=== New Macro Names + + * {CL_VERSION_MAJOR_BITS_KHR_anchor} + * {CL_VERSION_MINOR_BITS_KHR_anchor} + * {CL_VERSION_PATCH_BITS_KHR_anchor} + * `CL_VERSION_MAJOR_MASK_KHR` + * `CL_VERSION_MINOR_MASK_KHR` + * `CL_VERSION_PATCH_MASK_KHR` + * `CL_VERSION_MAJOR_KHR` + * `CL_VERSION_MINOR_KHR` + * `CL_VERSION_PATCH_KHR` + * `CL_MAKE_VERSION_KHR` + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_NUMERIC_VERSION_KHR} + ** {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} + ** {CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR} + ** {CL_DEVICE_ILS_WITH_VERSION_KHR} + ** {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_NUMERIC_VERSION_KHR} + ** {CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR} + +=== Conformance Tests + +. Each of the new queries described in this extension must be attempted and + succeed. +. It must be verified that the information returned by all queries that + extend existing queries is consistent with the information returned by + existing queries. +. Some of the queries introduced by this extension impose uniqueness constraints + on the list of returned values. + It must be verified that these constraints are satisfied. + +=== Issues + +. What compatibility policy should we define? e.g. a _revision_ has to be + backwards-compatible with previous ones ++ +-- +*RESOLVED*: No general rules as that wouldn't be testable. +Here's a recommended policy: + + - Patch version bump: only clarifications and small/obvious bugfixes. + - Minor version bump: backwards-compatible changes only. + - Major version bump: backwards compatibility may break. +-- + +. Do we want versioning for built-in kernels as returned by {CL_DEVICE_BUILT_IN_KERNELS}? ++ +-- +*RESOLVED*: No immediate use-case for versioning but being able to get a + list of individual kernels without parsing a string is desirable. + Adding {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR}. +-- + +. What is the behaviour of the queries that return an array of structures when +there are no elements to return? ++ +-- +*RESOLVED*: The query succeeds and the size returned is zero. +-- + +. What value should be returned when version information is not available? ++ +-- +*RESOLVED*: If a patch version is not available, it should be reported as 0. + If no version information is available, 0.0.0 should be + reported. + These values have been chosen as they are guaranteed to be lower + than or equal to any other version. +-- + +. Should we add a query to report SPIR-V extended instruction sets? ++ +-- +*RESOLVED*: It is unlikely that we will introduce many SPIR-V extended + instruction sets without an accompanying API extension. + Decided not to do this. +-- + +. Should the queries for which the old-style query doesn't exist in a given + OpenCL version be present (e.g. + {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} prior to OpenCL 2.1 or + without support for `<>` or + {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} on OpenCL 1.0)? ++ +-- +*RESOLVED*: All the queries are always present. + {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} returns an empty + set when Intermediate Languages are not supported. + {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} always returns 1.0 on + an OpenCL 1.0 platform. +-- + +. Is reporting multiple Intermediate Languages with the same name and major/minor + versions but differing patch versions allowed? ++ +-- +*RESOLVED*: No. + This isn't aligned with the intended use for patch versions and + makes it harder for implementations to guarantee consistency + with the existing IL queries. +-- + +=== Version History + + * Revision 1.0.0, 2020-02-12 + ** Initial version. diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc new file mode 100644 index 000000000..0685d72c3 --- /dev/null +++ b/api/cl_khr_external_memory.asciidoc @@ -0,0 +1,316 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory` defines a generic mechanism to share buffer and +image objects between OpenCL and many other APIs, including: + + * Optional properties to import external memory exported by other APIs + into OpenCL for a set of devices. + * Routines to explicitly hand off memory ownership between OpenCL and + other APIs. + +Other related extensions define specific external memory types that may be +imported into OpenCL. + + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + * {clEnqueueAcquireExternalMemObjectsKHR} + * {clEnqueueReleaseExternalMemObjectsKHR} + +=== New Structures + + * None + +=== New Types + + * {cl_external_memory_handle_type_khr_TYPE} + +=== New Enums + + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} + ** {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + * {cl_mem_properties_TYPE} + ** {CL_MEM_DEVICE_HANDLE_LIST_KHR} + ** {CL_MEM_DEVICE_HANDLE_LIST_END_KHR} + * Return values from from {clGetEventInfo} when _param_name_ is + {CL_EVENT_COMMAND_TYPE}: + ** {CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR} + +[[cl_khr_external_memory-Sample-Code]] +=== Sample Code + +==== Example for Creating a CL Buffer From an Exported External Buffer in a Single Device Context + +This example also requires use of the `<>` +extension. + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with just first device +clCreateContext(..., 1, devices, ...); + +// Obtain fd/win32 or similar handle for external memory to be imported +// from other API. +int fd = getFdForExternalMemory(); + +// Create extMemBuffer of type cl_mem from fd. +cl_mem_properties_khr extMemProperties[] = +{ + (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, + (cl_mem_properties_khr)fd, + 0 +}; + +cl_mem extMemBuffer = clCreateBufferWithProperties(/*context*/ clContext, + /*properties*/ extMemProperties, + /*flags*/ 0, + /*size*/ size, + /*host_ptr*/ NULL, + /*errcode_ret*/ &errcode_ret); +---- + + +==== Example for Creating a CL Image From an Exported External Image for Single Device Usage in a Multi-Device Context + +This example also requires use of the `<>` +extension. + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Create img of type cl_mem usable only on devices[0] + +// Create img of type cl_mem. +// Obtain fd/win32 or similar handle for external memory to be imported +// from other API. +int fd = getFdForExternalMemory(); + +// Set cl_image_format based on external image info +cl_image_format clImgFormat = { }; +clImageFormat.image_channel_order = CL_RGBA; +clImageFormat.image_channel_data_type = CL_UNORM_INT8; + +// Set cl_image_desc based on external image info +size_t clImageFormatSize; +cl_image_desc image_desc = { }; +image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; +image_desc.image_width = width; +image_desc.image_height = height; +image_desc.image_depth = depth; +image_desc.image_array_size = num_slices; +image_desc.image_row_pitch = width * 8 * 4; // May need alignment +image_desc.image_slice_pitch = image_desc.image_row_pitch * height; +image_desc.num_mip_levels = 1; +image_desc.num_samples = 0; +image_desc.buffer = NULL; + +cl_mem_properties_khr extMemProperties[] = { + (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, + (cl_mem_properties_khr)fd, + (cl_mem_properties_khr)CL_MEM_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties_khr)devices[0], + CL_MEM_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +cl_mem img = clCreateImageWithProperties(/*context*/ clContext, + /*properties*/ extMemProperties, + /*flags*/ 0, + /*image_format*/ &clImgFormat, + /*image_desc*/ &image_desc, + /*errcode_ret*/ &errcode_ret); + +// Use clGetImageInfo to get cl_image_format details. +size_t clImageFormatSize; +clGetImageInfo(img, + CL_IMAGE_FORMAT, + sizeof(cl_image_format), + &clImageFormat, + &clImageFormatSize); +---- + + +==== Example for Synchronization Using Wait and Signal + +[source] +---- +// Start the main rendering loop + +// Create extSem of type cl_semaphore_khr using clCreateSemaphoreWithPropertiesKHR + +// Create extMem of type cl_mem using clCreateBufferWithProperties or clCreateImageWithProperties + +while (true) { + // (not shown) Signal the semaphore from the other API + + // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'extSem' + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel that accesses extMem + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in other API that waits on 'extSem' +} +---- + +==== Example With Memory Sharing Using Acquire/Release + +[source] +---- +// Create extSem of type cl_semaphore_khr using +// clCreateSemaphoreWithPropertiesKHR with CL_SEMAPHORE_HANDLE_*_KHR. + +// Create extMem1 and extMem2 of type cl_mem using clCreateBufferWithProperties +// or clCreateImageWithProperties + +while (true) { + // (not shown) Signal the semaphore from the other API. Wait for the + // semaphore in OpenCL, by calling clEnqueueWaitForSemaphore on extSem + clEnqueueWaitSemaphoresKHR(/*command_queue*/ cq1, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Get explicit ownership of extMem1 + clEnqueueAcquireExternalMemObjectsKHR(/*command_queue*/ cq1, + /*num_mem_objects*/ 1, + /*mem_objects*/ extMem1, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel that accesses extMem1 on cq1 on cl_device1 + clEnqueueNDRangeKernel(cq1, ..., &event1); + + // Launch kernel that accesses both extMem1 and extMem2 on cq2 on cl_device2 + // Migration of extMem1 and extMem2 handles through regular CL memory + // migration. + clEnqueueNDRangeKernel(cq2, ..., &event1, &event2); + + // Give up ownership of extMem1 before you signal the semaphore. Handle + // memory migration here. + clEnqueueReleaseExternalMemObjectsKHR(/*command_queue*/ cq2 + /*num_mem_objects*/ 1, + /*mem_objects*/ &extMem1, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Signal the semaphore from OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ cq2, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in other API that waits on 'extSem' + // Other API accesses ext1, but not ext2 on device-1 +} +---- + +=== Issues + +. How should the import of images that are created in external APIs with + non-linear tiling be robustly handled? ++ +-- +*UNRESOLVED* +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc new file mode 100644 index 000000000..19d54ffa4 --- /dev/null +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -0,0 +1,92 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_dma_buf` extends +{cl_external_memory_handle_type_khr_TYPE} to support Linux `dma_buf` as an +external memory handle type that may be specified when creating a buffer or +image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc new file mode 100644 index 000000000..c2fbf6184 --- /dev/null +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -0,0 +1,95 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_dx` extends +{cl_external_memory_handle_type_khr_TYPE} to support Windows handles +referring to Direct 3D resources as external memory handle types that may be +specified when creating a buffer or image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc new file mode 100644 index 000000000..990582b40 --- /dev/null +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -0,0 +1,92 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_opaque_fd` extends +{cl_external_memory_handle_type_khr_TYPE} to support a POSIX file descriptor +handle as an external memory handle type that may be specified when creating +a buffer or image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc new file mode 100644 index 000000000..fdbb7e75d --- /dev/null +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -0,0 +1,93 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_win32` extends +{cl_external_memory_handle_type_khr_TYPE} to support Windows handles as +external memory handle types that may be specified when creating a buffer or +image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc new file mode 100644 index 000000000..211b42ff2 --- /dev/null +++ b/api/cl_khr_external_semaphore.asciidoc @@ -0,0 +1,289 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*Interactions and External Dependencies*:: + * This extension requires OpenCL 1.2. + * The `<>` extension is required as it defines semaphore + objects as well as for wait and signal operations on semaphores. + * For OpenCL to be able to import external semaphores from other APIs + using this extension, the other API is required to provide below + mechanisms: + ** Ability to export semaphore handles + ** Ability to query semaphore handle in the form of one of the handle type + supported by OpenCL. + * The other APIs that want to use semaphore exported by OpenCL using this + extension are required to provide below mechanism: + ** Ability to import semaphore handles using handle types exported by + OpenCL. +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_semaphore` introduced semaphores as a new type along with a set of +APIs for create, release, retain, wait and signal operations on it. +This extension defines APIs and mechanisms to share semaphores created in an +external API by importing into and exporting from OpenCL. + +This extension defines: + + * New attributes that can be passed as part of + {cl_semaphore_properties_khr_TYPE} for specifying properties of external + semaphores to be imported or exported. + * New attributes that can be passed as part of + {cl_semaphore_info_khr_TYPE} for specifying properties of external + semaphores to be exported. + * An extension to {clCreateSemaphoreWithPropertiesKHR} to accept external + semaphore properties allowing to import or export an external semaphore + into or from OpenCL. + * Semaphore handle types required for importing and exporting semaphores. + * Modifications to Wait and Signal API behavior when dealing with external + semaphores created from different handle types. + * API query exportable semaphores handles using specified handle type. + +The layered extensions `<>`, +`<>`, +`<>`, and +`<>` define specific external semaphores +that may be imported into or exported from OpenCL. + +=== New Types + + * {cl_external_semaphore_handle_type_khr_TYPE} + +=== New Enums + + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} + ** {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} + ** {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} + * {cl_semaphore_properties_khr_TYPE} and {cl_semaphore_info_khr_TYPE}: + ** {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} + ** {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR} + * {cl_semaphore_info_khr_TYPE} + ** {CL_SEMAPHORE_EXPORTABLE_KHR} + +=== Sample Code + +The following examples use the `<>` +extension to obtain an external semaphore. +Similar code can be written using the other layered extensions. + +==== Example for Importing a Semaphore Created by Another API in OpenCL in a Single-Device Context + +[source,c] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with just first device +clCreateContext(..., 1, devices, ...); + +// Obtain fd/win32 or similar handle for external semaphore to be imported +// from the other API. +int fd = getFdForExternalSemaphore(); + +// Create clSema of type cl_semaphore_khr usable on the only available device +// assuming the semaphore was imported from the same device. + +cl_semaphore_properties_khr sema_props[] = + {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + (cl_semaphore_properties_khr)fd, + 0}; + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); +---- + +==== Example for Importing a Semaphore Created by Another API in OpenCL in a Multi-device Context for Single Device Usage + +[source,c] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Obtain fd/win32 or similar handle for external semaphore to be imported +// from the other API. +int fd = getFdForExternalSemaphore(); + +// Create clSema of type cl_semaphore_khr usable only on device 1 +// assuming the semaphore was imported from the same device. +cl_semaphore_properties_khr sema_props[] = { + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + (cl_semaphore_properties_khr)fd, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)devices[1], + CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +---- + +==== Example for Synchronization Using a Semaphore Created by Another API and Imported in OpenCL + +[source,c] +---- +// Create clSema using one of the above examples of external semaphore creation. + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Start the main loop + +while (true) { + // (not shown) Signal the semaphore from the other API + + // Wait for the semaphore in OpenCL + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in the other API that waits on 'clSema' + +} +---- + +==== Example for Synchronization Using a Semaphore Exported by OpenCL + +[source,c] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Create clSema of type cl_semaphore_khr usable only on device 1 +cl_semaphore_properties_khr sema_props[] = { + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)devices[1], + CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Application queries handle-type and the exportable handle associated with the semaphore. +clGetSemaphoreInfoKHR(clSema, + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + sizeof(cl_external_semaphore_handle_type_khr), + &handle_type, + &handle_type_size); + +// The other API or process can use the exported semaphore handle +// to import +int fd = -1; +if (handle_type == CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR) { + clGetSemaphoreHandleForTypeKHR(clSema, + device, + CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + sizeof(int), + &fd, + NULL); +} + +// Start the main rendering loop + +while (true) { + // (not shown) Signal the semaphore from the other API + + // Wait for the semaphore in OpenCL + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in the other API that waits on 'clSema' +} +---- + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-11-16 + ** Added {CL_SEMAPHORE_EXPORTABLE_KHR_anchor}. + * Revision 0.9.2, 2023-11-21 + ** Added re-import function call to `<>` + diff --git a/api/cl_khr_external_semaphore_dx_fence.asciidoc b/api/cl_khr_external_semaphore_dx_fence.asciidoc new file mode 100644 index 000000000..6f9c2ee71 --- /dev/null +++ b/api/cl_khr_external_semaphore_dx_fence.asciidoc @@ -0,0 +1,49 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_dx_fence.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_dx_fence` supports importing and exporting a +D3D12 fence as an external semaphore using the APIs introduced by +`<>`. + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). diff --git a/api/cl_khr_external_semaphore_opaque_fd.asciidoc b/api/cl_khr_external_semaphore_opaque_fd.asciidoc new file mode 100644 index 000000000..d1119242f --- /dev/null +++ b/api/cl_khr_external_semaphore_opaque_fd.asciidoc @@ -0,0 +1,49 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_opaque_fd.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_opaque_fd` supports importing and exporting a +restricted POSIX file descriptor as an external semaphore using the APIs +introduced by `<>`. + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc new file mode 100644 index 000000000..a8175fe28 --- /dev/null +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -0,0 +1,62 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_sync_fd.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_sync_fd` supports importing and exporting a POSIX +file descriptor handle to a Linux Sync File or Android Fence object as an +external semaphore using the APIs introduced by +`<>`. + +=== New Commands + + * {clGetSemaphoreHandleForTypeKHR} + +=== New Types + + * {cl_semaphore_reimport_properties_khr_TYPE} + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-11-16 + ** Added {CL_SEMAPHORE_EXPORTABLE_KHR_anchor}. + * Revision 0.9.2, 2023-11-21 + ** Added re-import function call to `<>` diff --git a/api/cl_khr_external_semaphore_win32.asciidoc b/api/cl_khr_external_semaphore_win32.asciidoc new file mode 100644 index 000000000..224302f2d --- /dev/null +++ b/api/cl_khr_external_semaphore_win32.asciidoc @@ -0,0 +1,50 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_win32.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_win32` supports importing and exporting an NT +handle or global share handle as an external semaphore using the APIs +introduced by `<>`. + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). diff --git a/api/cl_khr_fp16.asciidoc b/api/cl_khr_fp16.asciidoc new file mode 100644 index 000000000..7732cc29e --- /dev/null +++ b/api/cl_khr_fp16.asciidoc @@ -0,0 +1,34 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +//@ TODO There are API elements (defines) to this, as well as OpenCL C +//@ TODO Why does this even exist? All API elements appear to be in OpenCL 1.0 + +include::{generated}/meta/{refprefix}cl_khr_fp16.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_fp16` adds support to OpenCL C for half scalar and vector types as +built-in types that can be used for arithmetic operations, conversions, etc. + +See the link:{OpenCLCSpecURL}#cl_khr_fp16[Half-Precision Floating-Point] +section of the OpenCL C specification for more information. + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_HALF_FP_CONFIG} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_fp64.asciidoc b/api/cl_khr_fp64.asciidoc new file mode 100644 index 000000000..ad1e8f763 --- /dev/null +++ b/api/cl_khr_fp64.asciidoc @@ -0,0 +1,35 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +//@ TODO There are API elements (defines) to this, like DBL_RADIX, as well as OpenCL C +//@ TODO Most API elements appear to be in OpenCL 1.0 / OpenCL 1.2 + +include::{generated}/meta/{refprefix}cl_khr_fp64.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_fp64` adds support to OpenCL C for double-precision scalar and +vector types as built-in types that can be used for arithmetic operations, +conversions, etc. + +See the link:{OpenCLCSpecURL}#cl_khr_fp64[Double-Precision Floating-Point] +section of the OpenCL C specification for more information. + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_DOUBLE_FP_CONFIG} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_depth_images.asciidoc b/api/cl_khr_gl_depth_images.asciidoc new file mode 100644 index 000000000..470af9859 --- /dev/null +++ b/api/cl_khr_gl_depth_images.asciidoc @@ -0,0 +1,34 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_depth_images.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_gl_depth_images` extends OpenCL / OpenGL sharing defined by the +`<>` extension to allow an OpenCL image to be created +from an OpenGL depth or depth-stencil texture. + +Depth images with an image channel order of {CL_DEPTH_STENCIL} can only be +created using the {clCreateFromGLTexture} API. + +=== New Enums + + * {cl_channel_order_TYPE} + ** {CL_DEPTH_STENCIL} + * {cl_channel_type_TYPE} + ** {CL_UNORM_INT24} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc new file mode 100644 index 000000000..b7d10c5c4 --- /dev/null +++ b/api/cl_khr_gl_event.asciidoc @@ -0,0 +1,109 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_event.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_gl_event` allows creating OpenCL event objects linked to OpenGL +fence sync objects, potentially improving efficiency of sharing images and +buffers between the two APIs. +The companion `GL_ARB_cl_event` extension provides the complementary +functionality of creating an OpenGL sync object from an OpenCL event object. + +In addition, this extension modifies the behavior of +{clEnqueueAcquireGLObjects} and {clEnqueueReleaseGLObjects} to +<> with an OpenGL context bound in the same thread +as the OpenCL context. + +=== New Commands + + * {clCreateEventFromGLsyncKHR} + +=== New Tokens + + * {cl_command_type_TYPE} + ** {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR} + +=== Issues + + . How are references between CL events and GL syncs handled? ++ +-- +*PROPOSED*: The linked CL event places a single reference on the GL sync +object. +That reference is removed when the CL event is deleted. +A more expensive alternative would be to reflect changes in the CL event +reference count through to the GL sync. +-- + + . How are linkages to synchronization primitives in other APIs handled? ++ +-- +*UNRESOLVED*. +We will at least want to have a way to link events to EGL sync objects. +There is probably no analogous DX concept. +There would be an entry point for each type of synchronization primitive to +be linked to, such as {clCreateEventFromEGLSyncKHR}. + +An alternative is a generic clCreateEventFromExternalEvent taking an +attribute list. +The attribute list would include information defining the type of the +external primitive and additional information (GL sync object handle, EGL +display and sync object handle, etc.) specific to that type. +This allows a single entry point to be reused. + +These will probably be separate extensions following the API proposed here. +-- + + . Should the {CL_EVENT_COMMAND_TYPE} correspond to the type of command + (fence) or the type of the linked sync object? ++ +-- +*PROPOSED*: To the type of the linked sync object. +-- + + . Should we support both explicit and implicit synchronization? ++ +-- +*PROPOSED*: Yes. +Implicit synchronization is suitable when GL and CL are executing in the +same application thread. +Explicit synchronization is suitable when they are executing in different +threads but the expense of glFinish is too high. +-- + + . Should this be a platform or device extension? ++ +-- +*PROPOSED*: Platform extension. +This may result in considerable under-the-hood work to implement the +sync->event semantics using only the public GL API, however, when multiple +drivers and devices with different GL support levels coexist in the same +runtime. +-- + + . Where can events generated from GL syncs be usable? ++ +-- +*PROPOSED*: Only with clEnqueueAcquireGLObjects, and attempting to use such +an event elsewhere will generate an error. +There is no apparent use case for using such events elsewhere, and possibly +some cost to supporting it, balanced by the cost of checking the source of +events in all other commands accepting them as parameters. +-- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_msaa_sharing.asciidoc b/api/cl_khr_gl_msaa_sharing.asciidoc new file mode 100644 index 000000000..eb0fed1ee --- /dev/null +++ b/api/cl_khr_gl_msaa_sharing.asciidoc @@ -0,0 +1,38 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_msaa_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_gl_msaa_sharing` extends the `<>` extension to +allow a shared OpenCL/OpenGL image object to be created from an OpenGL +multi-sampled ("`MSAA`") color or depth texture. + +This extension adds multi-sample support to {clCreateFromGLTexture} and +{clGetGLTextureInfo}, and allows <>. + +This extension requires `<>`. + +See the link:{OpenCLCSpecURL}#cl_khr_gl_msaa_sharing[cl_khr_gl_msaa_sharing] +section of the OpenCL C specification for more information. + +=== New Enums + + * {cl_gl_texture_info_TYPE} + ** {CL_GL_NUM_SAMPLES} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc new file mode 100644 index 000000000..b6c55df65 --- /dev/null +++ b/api/cl_khr_gl_sharing.asciidoc @@ -0,0 +1,242 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_gl_sharing` extension allows use of OpenGL buffer, texture, and +renderbuffer objects as OpenCL memory objects, referred to as "`Shared +OpenCL/OpenGL Memory Objects`". + +An OpenCL context may be associated with an OpenGL context or share group +object, using additional attributes described for {clCreateContext}. + +An OpenCL image object may be created from an OpenGL texture or renderbuffer +object as described for {clCreateFromGLTexture} and +{clCreateFromGLRenderuffer}, respectively. + +An OpenCL buffer object may be created from an OpenGL buffer object using +{clCreateFromGLBuffer}. + +Any supported OpenGL object defined within the associated OpenGL context or +share group object may be shared, with the exception of the default OpenGL +objects (i.e. objects named zero), which may not be shared. + +Additional information on the use of shared OpenCL/OpenGL memory objects is +found in the <>, +<> and +<> sections. + +An OpenGL implementation supporting buffer objects and sharing of texture +and buffer object images with OpenCL is required by this extension. + +=== New Commands + + * {clGetGLContextInfoKHR} + * {clCreateFromGLBuffer} + * {clCreateFromGLTexture} + * {clCreateFromGLRenderbuffer} + * {clGetGLObjectInfo} + * {clGetGLTextureInfo} + * {clEnqueueAcquireGLObjects} + * {clEnqueueReleaseGLObjects} + +=== New Types + + * {cl_gl_context_info_TYPE} + * {cl_gl_object_type_TYPE} + * {cl_gl_texture_info_TYPE} + * {cl_gl_platform_info} + +=== New Tokens + + * New Error Codes + ** {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} + * {cl_gl_context_info_TYPE} + ** {CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR} + ** {CL_DEVICES_FOR_GL_CONTEXT_KHR} + * {cl_context_properties_TYPE} + ** {CL_GL_CONTEXT_KHR} + ** {CL_EGL_DISPLAY_KHR} + ** {CL_GLX_DISPLAY_KHR} + ** {CL_WGL_HDC_KHR} + ** {CL_CGL_SHAREGROUP_KHR} + * {cl_gl_object_type_TYPE} + ** {CL_GL_OBJECT_BUFFER} + ** {CL_GL_OBJECT_TEXTURE2D} + ** {CL_GL_OBJECT_TEXTURE3D} + ** {CL_GL_OBJECT_RENDERBUFFER} + ** {CL_GL_OBJECT_TEXTURE2D_ARRAY} + ** {CL_GL_OBJECT_TEXTURE1D} + ** {CL_GL_OBJECT_TEXTURE1D_ARRAY} + ** {CL_GL_OBJECT_TEXTURE_BUFFER} + * {cl_gl_texture_info_TYPE} + ** {CL_GL_TEXTURE_TARGET} + ** {CL_GL_MIPMAP_LEVEL} + + +=== Issues + + . How should the OpenGL context be identified when creating an associated + OpenCL context? ++ +-- +*RESOLVED*: by using a (display,context handle) attribute pair to identify +an arbitrary OpenGL or OpenGL ES context with respect to one of the +window-system binding layers EGL, GLX, or WGL, or a share group handle to +identify a CGL share group. +If a context is specified, it need not be current to the thread calling +clCreateContext*. + +A previously suggested approach would use a single boolean attribute +CL_USE_GL_CONTEXT_KHR to allow creating a context associated with the +currently bound OpenGL context. +This may still be implemented as a separate extension, and might allow more +efficient acquire/release behavior in the special case where they are being +executed in the same thread as the bound GL context used to create the CL +context. +-- + + . What should the format of an attribute list be? ++ +-- +After considerable discussion, we think we can live with a list of + pairs terminated by zero. +The list is passed as 'cl_context_properties *_properties'_, where +cl_context_properties is typedefed to be 'intptr_t' in cl.h. + +This effectively allows encoding all scalar integer, pointer, and handle +values in the host API into the argument list and is analogous to the +structure and type of EGL attribute lists. +`NULL` attribute lists are also allowed. +Again as for EGL, any attributes not explicitly passed in the list will take +on a defined default value that does something reasonable. + +Experience with EGL, GLX, and WGL has shown attribute lists to be a +sufficiently flexible and general mechanism to serve the needs of management +calls such as context creation. +It is not completely general (encoding floating-point and non-scalar +attribute values is not straightforward), and other approaches were +suggested such as opaque attribute lists with getter/setter methods, or +arrays of varadic structures. +-- + + . What's the behavior of an associated OpenGL or OpenCL context when using + resources defined by the other associated context, and that context is + destroyed? ++ +-- +*RESOLVED*: OpenCL objects place a reference on the data store underlying +the corresponding GL object when they're created. +The GL name corresponding to that data store may be deleted, but the data +store itself remains so long as any CL object has a reference to it. +However, destroying all GL contexts in the share group corresponding to a CL +context results in implementation-dependent behavior when using a +corresponding CL object, up to and including program termination. +-- + + . How about sharing with D3D? ++ +-- +Sharing between D3D and OpenCL should use the same attribute list mechanism, +though obviously with different parameters, and be exposed as a similar +parallel OpenCL extension. +There may be an interaction between that extension and this one since it's +not yet clear if it will be possible to create a CL context simultaneously +sharing GL and D3D objects. +-- + + . Under what conditions will context creation fail due to sharing? ++ +-- +*RESOLVED*: Several cross-platform failure conditions are described (GL +context or CGL share group doesn't exist, GL context doesn't support types +of GL objects, GL context implementation doesn't allow sharing), but +additional failures may result due to implementation-dependent reasons and +should be added to this extension as such failures are discovered. +Sharing between OpenCL and OpenGL requires integration at the driver +internals level. +-- + + . What command-queues can *clEnqueueAcquire/ReleaseGLObjects* be placed + on? ++ +-- +*RESOLVED*: All command-queues. +This restriction is enforced at context creation time. +If any device passed to context creation cannot support shared OpenCL/OpenGL +memory objects, context creation will fail with a {CL_INVALID_OPERATION} +error. +-- + + . How can applications determine which command-queue to place an + Acquire/Release on? ++ +-- +*RESOLVED*: The {clGetGLContextInfoKHR} returns either the CL device +currently corresponding to a specified GL context (typically the display +it's running on), or a list of all the CL devices the specified context +might run on (potentially useful in multiheaded / "`virtual screen`" +environments). +This command is not placed together with commands to create shared +OpenCL/OpenGL memory objects because it relies on the same property-list +method of specifying a GL context introduced by this extension. + +If no devices are returned, it means that the GL context exists on an older +GPU not capable of running OpenCL, but still capable of sharing objects +between GL running on that GPU and CL running elsewhere. +-- + + . What is the meaning of the {CL_DEVICES_FOR_GL_CONTEXT_KHR} query? ++ +-- +*RESOLVED*: The list of all CL devices that may ever be associated with a +specific GL context. +On platforms such as MacOS X, the "`virtual screen`" concept allows multiple +GPUs to back a single virtual display. +Similar functionality might be implemented on other windowing systems, such +as a transparent heterogenous multiheaded X server. +Therefore the exact meaning of this query is interpreted relative to the +binding layer API in use. +-- + + . What happened to the "`extension`"s `+cl_khr_gl_sharing__context+` and + `+cl_khr_gl_sharing__memobjs+` that were previously published? ++ +-- +*RESOLVED*: These were not actual extensions, but the result of splitting +the `cl_khr_gl_sharing` extension language into two separate sections for +publication. +All extension language has now been integrated into the unified +Specification and this distinction is not useful. +-- + + . Where are the `clCreateFromGLTexture2D` and `clCreateFromGLTexture3D` + fuctions described? ++ +-- +*PROPOSED*: These functions are present in cl.xml, listed as OpenCL 1.0 APIs +that were deprecated in OpenCL 1.2, but the current extension language does +not described them. +Since OpenCL 1.2 itself is so old, it is not worth the effort to look back +and determine the exact details of these APIs. +-- + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_global_int32_base_atomics.asciidoc b/api/cl_khr_global_int32_base_atomics.asciidoc new file mode 100644 index 000000000..36b331670 --- /dev/null +++ b/api/cl_khr_global_int32_base_atomics.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_global_int32_base_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_global_int32_base_atomics` allows OpenCL C atomic operations to be +performed on 32-bit signed and unsigned integers in global memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_global_int32_base_atomics[Global 32-Bit +Base Atomics] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_global_int32_extended_atomics.asciidoc b/api/cl_khr_global_int32_extended_atomics.asciidoc new file mode 100644 index 000000000..e4fd74210 --- /dev/null +++ b/api/cl_khr_global_int32_extended_atomics.asciidoc @@ -0,0 +1,31 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_global_int32_extended_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_global_int32_extended_atomics` allows OpenCL C extended atomic +operations to be performed on 32-bit signed and unsigned integers in global +memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_global_int32_extended_atomics[Global +32-Bit Extended Atomics] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/ext/cl_khr_icd.asciidoc b/api/cl_khr_icd.asciidoc similarity index 58% rename from ext/cl_khr_icd.asciidoc rename to api/cl_khr_icd.asciidoc index 2298e6cb3..a28baa504 100644 --- a/ext/cl_khr_icd.asciidoc +++ b/api/cl_khr_icd.asciidoc @@ -1,43 +1,51 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 -[[cl_khr_icd-opencl]] -== Installable Client Drivers +//@ TODO This should probably be in an appendix? It is a "platform +//@ TODO extension" but so are others -[[cl_khr_icd-overview]] -=== Overview +include::{generated}/meta/{refprefix}cl_khr_icd.txt[] -This section describes a platform extension which defines a simple mechanism +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_icd` describes a platform extension which defines a simple mechanism through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. An application written against the ICD Loader will be able to access all -{cl_platform_id_TYPE}s exposed by all vendor implementations with the ICD Loader -acting as a demultiplexor. +{cl_platform_id_TYPE}s exposed by all vendor implementations with the ICD +Loader acting as a demultiplexor. This is a platform extension, so if this extension is supported by an -implementation, the string *cl_khr_icd* will be present in the +implementation, the string `"cl_khr_icd"` will be present in the {CL_PLATFORM_EXTENSIONS} string. -=== General Information +=== Source Code -==== Version History +The official source for the ICD Loader is available on github, at: -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== +https://github.com/KhronosGroup/OpenCL-ICD-Loader -[[cl_khr_icd-inferring-vendors-from-function-call-arguments]] -=== Inferring Vendors from Function Call Arguments +The complete `_cl_icd_dispatch` structure is defined in the header +`cl_icd.h`, which is available as a part of the OpenCL headers. + + +=== Inferring Vendors From Function Call Arguments At every OpenCL function call, the ICD Loader infers the vendor ICD function to call from the arguments to the function. An object is said to be ICD compatible if it is of the following structure: -[source,opencl] +[source,c] ---- struct _cl_ { @@ -54,13 +62,14 @@ is used to direct calls to a particular vendor implementation. All objects created from ICD compatible objects must be ICD compatible. The definition for `_cl_icd_dispatch` is provided along with the OpenCL -headers. Existing members can never be removed from that structure but new -members can be appended. +headers. +Existing members can never be removed from that structure but new members +can be appended. Functions which do not have an argument from which the vendor implementation may be inferred have been deprecated and may be ignored. -[[cl_khr_icd-icd-data]] + === ICD Data A Vendor ICD is defined by two pieces of data: @@ -73,14 +82,14 @@ A Vendor ICD is defined by two pieces of data: default suffix for extensions implemented only by that vendor. The vendor suffix string is optional. -[[cl_khr_icd-icd-loader-vendor-enumeration-on-windows]] + === ICD Loader Vendor Enumeration on Windows -To enumerate Vendor ICDs on Windows, the ICD Loader will first -scan for REG_SZ string values in the "Display Adapter" and -"Software Components" HKR registry keys. The exact registry -keys to scan should be obtained via PnP Configuration Manager -APIs, but will look like: +To enumerate Vendor ICDs on Windows, the ICD Loader will first scan for +REG_SZ string values in the "Display Adapter" and "Software Components" HKR +registry keys. +The exact registry keys to scan should be obtained via PnP Configuration +Manager APIs, but will look like: For 64-bit ICDs: @@ -120,17 +129,16 @@ Then the ICD Loader will open the Vendor ICD library: c:\vendor a\vndra_ocl.dll ---- -The ICD Loader will also scan for REG_DWORD values in the registry -key: +The ICD Loader will also scan for REG_DWORD values in the registry key: [literal] ---- HKLM\SOFTWARE\Khronos\OpenCL\Vendors ---- -For each registry value in this key which has data set to 0, the -ICD Loader will open the Vendor ICD library specified by the name -of the registry value. +For each registry value in this key which has data set to 0, the ICD Loader +will open the Vendor ICD library specified by the name of the registry +value. For example, if the registry contains the value: @@ -147,7 +155,7 @@ Then the ICD Loader will open the Vendor ICD library: c:\vendor a\vndra_ocl.dll ---- -[[cl_khr_icd-icd-loader-vendor-enumeration-on-linux]] + === ICD Loader Vendor Enumeration on Linux To enumerate vendor ICDs on Linux, the ICD Loader scans the files in the @@ -175,7 +183,7 @@ libVendorAOpenCL.so then the ICD Loader will load the library `libVendorAOpenCL.so`. -[[cl_khr_icd-icd-loader-vendor-enumeration-on-android]] + === ICD Loader Vendor Enumeration on Android To enumerate vendor ICDs on Android, the ICD Loader scans the files in the @@ -203,7 +211,7 @@ libVendorAOpenCL.so then the ICD Loader will load the library `libVendorAOpenCL.so`. -[[cl_khr_icd-adding-a-vendor-library]] + === Adding a Vendor Library Upon successfully loading a Vendor ICD's library, the ICD Loader queries the @@ -217,115 +225,36 @@ ignore the library. Next the ICD Loader queries available ICD-enabled platforms in the library using {clIcdGetPlatformIDsKHR}. For each of these platforms, the ICD Loader queries the platform's extension -string to verify that *cl_khr_icd* is supported, then queries the platform's -Vendor ICD extension suffix using {clGetPlatformInfo} with the value -{CL_PLATFORM_ICD_SUFFIX_KHR}. +string to verify that `<>` is supported, then queries the +platform's Vendor ICD extension suffix using {clGetPlatformInfo} with the +value {CL_PLATFORM_ICD_SUFFIX_KHR}. If any of these steps fail, the ICD Loader will ignore the Vendor ICD and continue on to the next. -[[cl_khr_icd-new-procedures-and-functions]] -=== New Procedures and Functions +=== New Commands -[source,opencl] ----- -cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, - cl_platform_id *platforms, - cl_uint *num_platforms); ----- + * {clIcdGetPlatformIDsKHR} -[[cl_khr_icd-new-tokens]] === New Tokens Accepted as _param_name_ to the function {clGetPlatformInfo}: ----- -CL_PLATFORM_ICD_SUFFIX_KHR ----- + * {CL_PLATFORM_ICD_SUFFIX_KHR} Returned by {clGetPlatformIDs} when no platforms are found: ----- -CL_PLATFORM_NOT_FOUND_KHR ----- - -[[cl_khr_icd-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.1_, replace the description of the return values of -{clGetPlatformIDs} with: + * {CL_PLATFORM_NOT_FOUND_KHR} -"{clGetPlatformIDs} returns {CL_SUCCESS} if the function is executed -successfully and there are a non zero number of platforms available. -It returns {CL_PLATFORM_NOT_FOUND_KHR} if zero platforms are available. -It returns {CL_INVALID_VALUE} if _num_entries_ is equal to zero and -_platforms_ is not `NULL` or if both _num_platforms_ and _platforms_ are -`NULL`." - -In _section 4.1_, add the following after the description of -{clGetPlatformIDs}: - -"The list of platforms accessible through the Khronos ICD Loader can be -obtained using the following function: - -include::{generated}/api/protos/clIcdGetPlatformIDsKHR.txt[] - -_num_entries_ is the number of {cl_platform_id_TYPE} entries that can be added to -_platforms_. -If _platforms_ is not `NULL`, then _num_entries_ must be greater than zero. - -_platforms_ returns a list of OpenCL platforms available for access through -the Khronos ICD Loader. -The {cl_platform_id_TYPE} values returned in _platforms_ are ICD compatible and can -be used to identify a specific OpenCL platform. -If the _platforms_ argument is `NULL`, then this argument is ignored. -The number of OpenCL platforms returned is the minimum of the value -specified by _num_entries_ or the number of OpenCL platforms available. - -_num_platforms_ returns the number of OpenCL platforms available. -If _num_platforms_ is `NULL`, then this argument is ignored. - -{clIcdGetPlatformIDsKHR} returns {CL_SUCCESS} if the function is executed -successfully and there are a non zero number of platforms available. -It returns {CL_PLATFORM_NOT_FOUND_KHR} if zero platforms are available. -It returns {CL_INVALID_VALUE} if _num_entries_ is equal to zero and -_platforms_ is not `NULL` or if both _num_platforms_ and _platforms_ are -`NULL`." - -Add the following to _table 4.1_: - -[cols="2,1,2",options="header"] -|==== -| Platform Info -| Return Type -| Description - -| {CL_PLATFORM_ICD_SUFFIX_KHR} -| {char_TYPE}[] -| The function name suffix used to identify extension functions to be - directed to this platform by the ICD Loader. - -|==== - -[[cl_khr_icd-source-code]] -=== Source Code - -The official source for the ICD Loader is available on github, at: - -https://github.com/KhronosGroup/OpenCL-ICD-Loader - -The complete `_cl_icd_dispatch` structure is defined in the header -*cl_icd.h*, which is available as a part of the OpenCL headers. - -[[cl_khr_icd-issues]] === Issues . Some OpenCL functions do not take an object argument from which their - vendor library may be identified (e.g, {clUnloadCompiler}), how will they - be handled? + vendor library may be identified (e.g, {clUnloadCompiler}), how will + they be handled? + -- -RESOLVED: Such functions will be a noop for all calls through the ICD Loader. +*RESOLVED*: Such functions will be a noop for all calls through the ICD +Loader. -- . How are OpenCL extension to be handled? @@ -336,14 +265,14 @@ RESOLVED: Such functions will be a noop for all calls through the ICD Loader. //are implemented by any vendor. //The suffix mechanism provides access for vendor extensions which are not yet //added to the ICD Loader. -RESOLVED: Extension APIs must be queried using +*RESOLVED*: Extension APIs must be queried using {clGetExtensionFunctionAddressForPlatform}. -- . How will the ICD Loader handle a `NULL` {cl_platform_id_TYPE}? + -- -RESOLVED: The ICD will by default choose the first enumerated platform as +*RESOLVED*: The ICD will by default choose the first enumerated platform as the `NULL` platform. // TODO: This seems out-of-date and incorrect. //The user can override this default by setting an environment variable @@ -355,7 +284,7 @@ the `NULL` platform. . There exists no mechanism to unload the ICD Loader, should there be one? + -- -RESOLVED: As there is no standard mechanism for unloading a vendor +*RESOLVED*: As there is no standard mechanism for unloading a vendor implementation, do not add one for the ICD Loader. -- @@ -363,9 +292,15 @@ implementation, do not add one for the ICD Loader. functions? + -- -RESOLVED: The ICD Loader will check for `NULL` objects passed to the OpenCL -functions without trying to dereference the `NULL` objects for obtaining the -ICD dispatch table. -On detecting a `NULL` object it will return one of the an invalid object error -values (e.g. {CL_INVALID_DEVICE} corresponding to the object in question. +*RESOLVED*: The ICD Loader will check for `NULL` objects passed to the +OpenCL functions without trying to dereference the `NULL` objects for +obtaining the ICD dispatch table. +On detecting a `NULL` object it will return one of the an invalid object +error values (e.g. {CL_INVALID_DEVICE} corresponding to the object in +question. -- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_il_program.asciidoc b/api/cl_khr_il_program.asciidoc new file mode 100644 index 000000000..fc4a3d7e9 --- /dev/null +++ b/api/cl_khr_il_program.asciidoc @@ -0,0 +1,41 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_il_program.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_il_program` adds the ability to create programs with intermediate +language (IL), usually SPIR-V. +Further information about the format and contents of SPIR-V may be found in +the SPIR-V Specification. +Information about how SPIR-V modules behave in the OpenCL environment may be +found in the OpenCL SPIR-V Environment Specification. + +This functionality described by this extension is a core feature in OpenCL +2.1. + +=== New Commands + + * {clCreateProgramWithILKHR} + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_IL_VERSION_KHR} + * {cl_platform_info_TYPE} + ** {CL_PROGRAM_IL_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_image2d_from_buffer.asciidoc b/api/cl_khr_image2d_from_buffer.asciidoc new file mode 100644 index 000000000..17432c1ea --- /dev/null +++ b/api/cl_khr_image2d_from_buffer.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_image2d_from_buffer.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_image2d_from_buffer` allows a 2D image to be created from an +existing OpenCL buffer memory object. + +This extension became a core feature in OpenCL 2.0. + +Refer to the discussion of 2D images created from buffers in the +<> section for additional details. + +=== New Tokens + + * {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} + * {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_initialize_memory.asciidoc b/api/cl_khr_initialize_memory.asciidoc new file mode 100644 index 000000000..8eddeedff --- /dev/null +++ b/api/cl_khr_initialize_memory.asciidoc @@ -0,0 +1,46 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_initialize_memory.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_initialize_memory` adds OpenCL C support for initializing local and +private memory before a kernel begins execution. +This is accomplished by specifying a flag at context creation time affecting +all such memory. + +Memory is allocated in various forms in OpenCL both explicitly (global +memory) or implicitly (local, private memory). +This allocation so far does not provide a straightforward mechanism to +initialize the memory on allocation. +In other words what is lacking is the equivalent of `calloc` for the +currently supported `malloc` like capability. +This functionality is useful for a variety of reasons including ease of +debugging, application controlled limiting of visibility to previous +contents of memory and in some cases, optimization. + +See the link:{OpenCLCSpecURL}#cl_khr_initialize_memory[Initializing Memory] +section of the OpenCL C specification for more information. + +=== New Enums + + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_MEMORY_INITIALIZE_KHR} + * {cl_context_memory_initialize_khr_TYPE} + ** {CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR} + ** {CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_int64_base_atomics.asciidoc b/api/cl_khr_int64_base_atomics.asciidoc new file mode 100644 index 000000000..8723cab2c --- /dev/null +++ b/api/cl_khr_int64_base_atomics.asciidoc @@ -0,0 +1,27 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_int64_base_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_int64_base_atomics` adds built-in OpenCL functions supporting atomic +operations to be performed on 64-bit signed and unsigned integers in global +and local memory. + +See the link:{OpenCLCSpecURL}#cl_khr_int64_base_atomics[64-Bit Base Atomics] +section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_int64_extended_atomics.asciidoc b/api/cl_khr_int64_extended_atomics.asciidoc new file mode 100644 index 000000000..f4fab1075 --- /dev/null +++ b/api/cl_khr_int64_extended_atomics.asciidoc @@ -0,0 +1,27 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_int64_extended_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_int64_extended_atomics` adds built-in OpenCL functions supporting +extended atomic operations to be performed on 64-bit signed and unsigned +integers in global and local memory. + +See the link:{OpenCLCSpecURL}#cl_khr_int64_extended_atomics[64-Bit Extended +Atomics] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_integer_dot_product.asciidoc b/api/cl_khr_integer_dot_product.asciidoc new file mode 100644 index 000000000..ef47c2a6c --- /dev/null +++ b/api/cl_khr_integer_dot_product.asciidoc @@ -0,0 +1,64 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_integer_dot_product.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-06-23 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + - Jeremy Kemp, Imagination Technologies + - Ben Ashbaugh, Intel + - Ruihao Zhang, Qualcomm + - Stuart Brady, Arm Ltd + - Balaji Calidas, Qualcomm + - Ayal Zaks, Intel + +=== Description + +`cl_khr_integer_dot_product` adds support for SPIR-V instructions and OpenCL +C built-in functions to compute the dot product of vectors of integers. + +OpenCL C compilers supporting this extension will define the extension macro +`cl_khr_integer_dot_product`, and may define corresponding feature macros +{opencl_c_integer_dot_product_input_4x8bit} and +{opencl_c_integer_dot_product_input_4x8bit_packed} depending on the reported +capabilities. + +See the link:{OpenCLCSpecURL}#cl_khr_integer_dot_product[Integer Dot +Product] section of the OpenCL C specification for more information. + +// The 'New ...' section can be auto-generated + +=== New Structures + + * {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} + +=== New Types + + * {cl_device_integer_dot_product_capabilities_khr_TYPE} + +=== New Enums + + * {cl_device_integer_dot_product_capabilities_khr_TYPE} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} + +=== New SPIR-V Capabilities + + * TBD + +=== Version History + + * Revision 1.0.0, 2021-06-17 + ** Initial version + * Revision 2.0.0, 2021-06-23 + ** 8-bit support is mandatory, added 8-bit acceleration properties. diff --git a/api/cl_khr_local_int32_base_atomics.asciidoc b/api/cl_khr_local_int32_base_atomics.asciidoc new file mode 100644 index 000000000..4fba21aa5 --- /dev/null +++ b/api/cl_khr_local_int32_base_atomics.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_local_int32_base_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_local_int32_base_atomics` allows OpenCL C atomic operations to be +performed on 32-bit signed and unsigned integers in local memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_local_int32_base_atomics[Local 32-Bit +Base Atomics] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_local_int32_extended_atomics.asciidoc b/api/cl_khr_local_int32_extended_atomics.asciidoc new file mode 100644 index 000000000..05b5d0cab --- /dev/null +++ b/api/cl_khr_local_int32_extended_atomics.asciidoc @@ -0,0 +1,31 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_local_int32_extended_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_local_int32_extended_atomics` allows OpenCL C extended atomic +operations to be performed on 32-bit signed and unsigned integers in local +memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_local_int32_extended_atomics[Local +32-Bit Extended Atomics] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_mipmap_image.asciidoc b/api/cl_khr_mipmap_image.asciidoc new file mode 100644 index 000000000..ab2a7fe0f --- /dev/null +++ b/api/cl_khr_mipmap_image.asciidoc @@ -0,0 +1,39 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_mipmap_image.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_mipmap_image` extension adds the ability to create and access +mipmapped images: + + * {clCreateImage} is extended to create mipmapped images. + * {clCreateFromGLTexture} is extended to create a mipmapped image from a + mipmapped GL texture. + * {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueCopyImage}, + {clEnqueueFillImage}, {clEnqueueCopyImageToBuffer}, + {clEnqueueCopyBufferToImage}, and {clEnqueueMapImage} are + extended to operate on regions of mipmapped images. + ** The <> section describes how mipmap levels are encoded in + existing parameters to these commands. + * OpenCL C built-in functions are added to read from and query a mipmapped + image. + +See the link:{OpenCLCSpecURL}#cl_khr_mipmap_image[Mipmapped Image Reads and +Queries] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_mipmap_image_writes.asciidoc b/api/cl_khr_mipmap_image_writes.asciidoc new file mode 100644 index 000000000..84278ce60 --- /dev/null +++ b/api/cl_khr_mipmap_image_writes.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_mipmap_image_writes.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_mipmap_image_writes` extension adds OpenCL C built-in functions +to write to a mipmapped image. + +If `cl_khr_mipmap_image_writes` is supported by the OpenCL device, the +`<>` extension must also be supported. + +See the link:{OpenCLCSpecURL}#cl_khr_mipmap_image_writes[Mipmapped Image +Writes] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_pci_bus_info.asciidoc b/api/cl_khr_pci_bus_info.asciidoc new file mode 100644 index 000000000..0f5a87ed4 --- /dev/null +++ b/api/cl_khr_pci_bus_info.asciidoc @@ -0,0 +1,43 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_pci_bus_info.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-04-19 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_pci_bus_info` extension adds a new query to obtain PCI bus +information about an OpenCL device. + +Not all OpenCL devices have PCI bus information, either due to the device +not being connected to the system through a PCI interface or due to platform +specific restrictions and policies. +Thus this extension is only expected to be supported by OpenCL devices which +can provide the information. + +As a consequence, applications should always check for the presence of the +extension string for each individual OpenCL device for which they intend to +issue the new query for and should not have any assumptions about the +availability of the extension on any given platform. + +=== New Types + + * {cl_device_pci_bus_info_khr_TYPE} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_PCI_BUS_INFO_KHR} + +=== Version History + + * Revision 1.0.0, 2021-04-19 + ** Initial version. diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc new file mode 100644 index 000000000..d36c20831 --- /dev/null +++ b/api/cl_khr_priority_hints.asciidoc @@ -0,0 +1,46 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_priority_hints` extension adds priority hints for OpenCL, but +does not specify the scheduling behavior or minimum guarantees. +It is expected that the the user guides associated with each implementation +which supports this extension will describe the scheduling behavior +guarantees. + +Note that the priority hint is orthogonal to functionality defined in the +`<>` extension. +For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) +but should at the same time be executed at an optimized throttle setting +({CL_QUEUE_THROTTLE_LOW_KHR}). + +=== New Types + + * {cl_queue_priority_khr_TYPE} + +=== New Enums + + * {cl_queue_properties_TYPE} + ** {CL_QUEUE_PRIORITY_KHR} + * {cl_queue_priority_khr_TYPE} + ** {CL_QUEUE_PRIORITY_HIGH_KHR} + ** {CL_QUEUE_PRIORITY_MED_KHR} + ** {CL_QUEUE_PRIORITY_LOW_KHR} + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_select_fprounding_mode.asciidoc b/api/cl_khr_select_fprounding_mode.asciidoc new file mode 100644 index 000000000..c97814158 --- /dev/null +++ b/api/cl_khr_select_fprounding_mode.asciidoc @@ -0,0 +1,31 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_select_fprounding_mode.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_select_fprounding_mode` allows an application to specify the +rounding mode for an instruction or group of instructions in the OpenCL C +program source. + +NOTE: This extension was deprecated in OpenCL 1.1, and its use is not +recommended. + +See the link:{OpenCLCSpecURL}#cl_khr_select_fprounding_mode[Select +Floating-Point Rounding Mode] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc new file mode 100644 index 000000000..43c2b1ae9 --- /dev/null +++ b/api/cl_khr_semaphore.asciidoc @@ -0,0 +1,262 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_semaphore.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-01 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - Gorazd Sumkovski, ARM + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +OpenCL provides {cl_event_TYPE} as a primary mechanism of synchronization +between host and device as well as across devices. +While events can be waited on or can be passed as dependencies across +work-submissions, they suffer from following limitations: + + * They are immutable. + * They are not reusable. + +`cl_khr_semaphore` introduces a new type of synchronization object to +represent _semaphores_ that can be reused, waited on, and signaled multiple +times by OpenCL work-submissions. + +In particular, this extension defines: + + * a new type called {cl_semaphore_khr_TYPE} to represent the semaphore + objects. + * A new type called {cl_semaphore_properties_khr_TYPE} to specify metadata + associated with semaphores. + * Functions to create, retain, and release semaphores. + * Functions to wait on and signal semaphore objects. + * Functions to query the properties of semaphore objects. + +// The 'New ...' section can be auto-generated + +=== New Commands + + * {clCreateSemaphoreWithPropertiesKHR} + * {clEnqueueWaitSemaphoresKHR} + * {clEnqueueSignalSemaphoresKHR} + * {clGetSemaphoreInfoKHR} + * {clReleaseSemaphoreKHR} + * {clRetainSemaphoreKHR} + +=== New Types + + * {cl_semaphore_khr_TYPE} + * {cl_semaphore_properties_khr_TYPE} + * {cl_semaphore_info_khr_TYPE} + * {cl_semaphore_type_khr_TYPE} + * {cl_semaphore_payload_khr_TYPE} + +=== New Enums + + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_SEMAPHORE_TYPES_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_SEMAPHORE_TYPES_KHR} + * {cl_semaphore_type_khr_TYPE} + ** {CL_SEMAPHORE_TYPE_BINARY_KHR} 1 + * {cl_semaphore_info_khr_TYPE} + ** {CL_SEMAPHORE_CONTEXT_KHR} + ** {CL_SEMAPHORE_REFERENCE_COUNT_KHR} + ** {CL_SEMAPHORE_PROPERTIES_KHR} + ** {CL_SEMAPHORE_PAYLOAD_KHR} + * {cl_semaphore_info_khr_TYPE} or {cl_semaphore_properties_khr_TYPE} + ** {CL_SEMAPHORE_TYPE_KHR} + ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} + ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR} +// TODO these are not described anywhere in the extension spec document + * New return values from {clGetEventInfo} + ** {CL_COMMAND_SEMAPHORE_WAIT_KHR} + ** {CL_COMMAND_SEMAPHORE_SIGNAL_KHR} + * New error codes + ** {CL_INVALID_SEMAPHORE_KHR} + + +[[cl_khr_semaphore-Sample-Code]] +=== Sample Code + +==== Example for Semaphore Creation in a Single Device Context + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with just first device +context = clCreateContext(..., 1, devices, ...); + +// Create clSema of type cl_semaphore_khr usable on single device in the context + +cl_semaphore_properties_khr sema_props[] = + {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + 0}; + +int errcode_ret = 0; + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); +---- + +==== Example for Semaphore Creation for a Single Device in a Multi-Device Context + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Create clSema of type cl_semaphore_khr usable only on device 0 +cl_semaphore_properties_khr sema_props[] = { + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)devices[0], + CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +int errcode_ret = 0; + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); +---- + +==== Example for Synchronization Using Wait and Signal + +[source] +---- +// clSema is created using clCreateSemaphoreWithPropertiesKHR +// using one of the examples for semaphore creation. + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Start the main loop + +while (true) { + // (not shown) Signal the semaphore from other work + + // Wait for the semaphore in OpenCL + // by calling clEnqueueWaitSemaphoresKHR on 'clSema' + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel that accesses extMem + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch other work that waits on 'clSema' +} +---- + +==== Example for {clGetSemaphoreInfoKHR} + +[source] +---- +// clSema is created using clCreateSemaphoreWithPropertiesKHR +// using one of the examples for semaphore creation. + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Start the main rendering loop + +while (true) { + // (not shown) Signal the semaphore from other work + + // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'clSema' + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel in OpenCL + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Query type of clSema + clGetSemaphoreInfoKHR(/*sema_object*/ clSema, + /*param_name*/ CL_SEMAPHORE_TYPE_KHR, + /*param_value_size*/ sizeof(cl_semaphore_type_khr), + /*param_value*/ &clSemaType, + /*param_value_ret_size*/ &clSemaTypeSize); + + if (clSemaType == CL_SEMAPHORE_TYPE_BINARY_KHR) { + // Do something + } + else { + // Do something else + } + // (not shown) Launch other work that waits on 'clSema' +} +---- + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-08-01 + ** Changed device handle list enum to the semaphore-specific + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). + + diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc new file mode 100644 index 000000000..33ec23e43 --- /dev/null +++ b/api/cl_khr_spir.asciidoc @@ -0,0 +1,39 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_spir.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_spir` adds the ability to create an OpenCL program object from a +Standard Portable Intermediate Representation (SPIR) instance. +A SPIR instance is a vendor-neutral non-source representation for OpenCL C +programs. + +See the <> for +information on compiling SPIR binaries. + +`cl_khr_spir` has been superseded by the SPIR-V intermediate representation, +which is supported by the `<>` extension, and is a core +feature in OpenCL 2.1. + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_SPIR_VERSIONS} + * {cl_program_binary_type_TYPE} + ** CL_PROGRAM_BINARY_TYPE_INTERMEDIATE} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_srgb_image_writes.asciidoc b/api/cl_khr_srgb_image_writes.asciidoc new file mode 100644 index 000000000..e12ccb362 --- /dev/null +++ b/api/cl_khr_srgb_image_writes.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_srgb_image_writes.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_srgb_image_writes` enables OpenCL C kernels to write to sRGB images +using the *write_imagef* built-in function. +The sRGB image formats that may be written to will be returned by +{clGetSupportedImageFormats}. + +When the image is an sRGB image, the *write_imagef* built-in function will +perform the linear to sRGB conversion. +Only the R, G, and B components are converted from linear to sRGB; the A +component is written as-is. + +See the link:{OpenCLCSpecURL}#cl_khr_srgb_image_writes[sRGB Image Write +Functions] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_subgroup_ballot.asciidoc b/api/cl_khr_subgroup_ballot.asciidoc new file mode 100644 index 000000000..0978f2d66 --- /dev/null +++ b/api/cl_khr_subgroup_ballot.asciidoc @@ -0,0 +1,52 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_ballot.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_ballot` adds built-in OpenCL C functions with the ability +to collect and operate on ballots from work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_ballot[Sub-Group Ballots] +section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_ballot: + +gentype sub_group_non_uniform_broadcast( gentype value, uint index ) +gentype sub_group_broadcast_first( gentype value ) + +uint4 sub_group_ballot( int predicate ) +int sub_group_inverse_ballot( uint4 value ) +int sub_group_ballot_bit_extract( uint4 value, uint index ) +uint sub_group_ballot_bit_count( uint4 value ) +uint sub_group_ballot_inclusive_scan( uint4 value ) +uint sub_group_ballot_exclusive_scan( uint4 value ) +uint sub_group_ballot_find_lsb( uint4 value ) +uint sub_group_ballot_find_msb( uint4 value ) + +uint4 get_sub_group_eq_mask() +uint4 get_sub_group_ge_mask() +uint4 get_sub_group_gt_mask() +uint4 get_sub_group_le_mask() +uint4 get_sub_group_lt_mask() +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_clustered_reduce.asciidoc b/api/cl_khr_subgroup_clustered_reduce.asciidoc new file mode 100644 index 000000000..a2d60ca89 --- /dev/null +++ b/api/cl_khr_subgroup_clustered_reduce.asciidoc @@ -0,0 +1,46 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_clustered_reduce.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_clustered_reduce` adds built-in OpenCL functions for +clustered reductions that operate on a subset of work items in the +sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_clustered_reduce[Clustered +Reductions] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_clustered_reduce: + +gentype sub_group_clustered_reduce_add( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_mul( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_min( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_max( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_and( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_or( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_xor( gentype value, uint clustersize ) +int sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_extended_types.asciidoc b/api/cl_khr_subgroup_extended_types.asciidoc new file mode 100644 index 000000000..b5cfc7939 --- /dev/null +++ b/api/cl_khr_subgroup_extended_types.asciidoc @@ -0,0 +1,50 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_extended_types.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_extended_types` adds additional supported OpenCL C data +types to the existing sub-group broadcast, scan, and reduction functions. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_extended_types[Sub-Group +Extended Types] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_extended_types: + +// Note: Existing functions supporting additional data types. + +gentype sub_group_broadcast( gentype value, uint index ) + +gentype sub_group_reduce_add( gentype value ) +gentype sub_group_reduce_min( gentype value ) +gentype sub_group_reduce_max( gentype value ) + +gentype sub_group_scan_inclusive_add( gentype value ) +gentype sub_group_scan_inclusive_min( gentype value ) +gentype sub_group_scan_inclusive_max( gentype value ) + +gentype sub_group_scan_exclusive_add( gentype value ) +gentype sub_group_scan_exclusive_min( gentype value ) +gentype sub_group_scan_exclusive_max( gentype value ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_named_barrier.asciidoc b/api/cl_khr_subgroup_named_barrier.asciidoc new file mode 100644 index 000000000..07df0c863 --- /dev/null +++ b/api/cl_khr_subgroup_named_barrier.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_named_barrier.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_named_barrier` adds barrier operations that cover subsets +of an OpenCL work-group. +Only the OpenCL API changes are described in this section. +Please refer to the SPIR-V specification for information about using +sub-group named barriers in the SPIR-V intermediate representation, and to +the OpenCL {cpp} specification for descriptions of the sub-group named +barrier built-in functions in the OpenCL {cpp} kernel language. + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc new file mode 100644 index 000000000..3389abe08 --- /dev/null +++ b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc @@ -0,0 +1,71 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_arithmetic.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_non_uniform_arithmetic` adds built-in OpenCL C functions +providing the ability to use some sub-group functions within non-uniform +flow control, including additional scan and reduction operators. + +See the +link:{OpenCLCSpecURL}#cl_khr_subgroup_non_uniform_arithmetic[Built-in +Non-Uniform Arithmetic Functions for Sub-Groups] section of the OpenCL C +specification for more information. + +[[extended-sub-groups-summariy]] +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_non_uniform_arithmetic: + +gentype sub_group_non_uniform_reduce_add( gentype value ) +gentype sub_group_non_uniform_reduce_mul( gentype value ) +gentype sub_group_non_uniform_reduce_min( gentype value ) +gentype sub_group_non_uniform_reduce_max( gentype value ) +gentype sub_group_non_uniform_reduce_and( gentype value ) +gentype sub_group_non_uniform_reduce_or( gentype value ) +gentype sub_group_non_uniform_reduce_xor( gentype value ) +int sub_group_non_uniform_reduce_logical_and( int predicate ) +int sub_group_non_uniform_reduce_logical_or( int predicate ) +int sub_group_non_uniform_reduce_logical_xor( int predicate ) + +gentype sub_group_non_uniform_scan_inclusive_add( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_mul( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_min( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_max( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_and( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_or( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_xor( gentype value ) +int sub_group_non_uniform_scan_inclusive_logical_and( int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_or( int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ) + +gentype sub_group_non_uniform_scan_exclusive_add( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_mul( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_min( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_max( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_and( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_or( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_xor( gentype value ) +int sub_group_non_uniform_scan_exclusive_logical_and( int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_or( int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_non_uniform_vote.asciidoc b/api/cl_khr_subgroup_non_uniform_vote.asciidoc new file mode 100644 index 000000000..f884c58bc --- /dev/null +++ b/api/cl_khr_subgroup_non_uniform_vote.asciidoc @@ -0,0 +1,45 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_vote.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +[[extended-sub-groups]] +=== Description + +`cl_khr_subgroup_non_uniform_vote` adds built-in OpenCL C functions with the +ability to elect a single work item from a sub-group to perform a task and +to hold votes among work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_non_uniform_vote[Built-in +Non-Uniform Vote and Election Functions for Sub-Groups] section of the +OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_non_uniform_vote: + +int sub_group_elect() + +int sub_group_non_uniform_all( int predicate ) +int sub_group_non_uniform_any( int predicate ) +int sub_group_non_uniform_all_equal( gentype value ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_rotate.asciidoc b/api/cl_khr_subgroup_rotate.asciidoc new file mode 100644 index 000000000..3f9d04dff --- /dev/null +++ b/api/cl_khr_subgroup_rotate.asciidoc @@ -0,0 +1,32 @@ +// Copyright 2022-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_rotate.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-04-22 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + - Ben Ashbaugh, Intel + - Ruihao Zhang, Qualcomm + - Sven van Haastregt, Arm Ltd. + - Anastasia Stulova, Arm Ltd. + - Stuart Brady, Arm Ltd. + +=== Description + +`cl_khr_subgroup_rotate` adds built-in OpenCL C functions with support for a +new sub-group data exchange operation that makes it possible to rotate +values through the work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_rotate[Sub-Group Rotation] +section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2022-04-22 + ** Initial version. diff --git a/api/cl_khr_subgroup_shuffle.asciidoc b/api/cl_khr_subgroup_shuffle.asciidoc new file mode 100644 index 000000000..f90bd70bc --- /dev/null +++ b/api/cl_khr_subgroup_shuffle.asciidoc @@ -0,0 +1,37 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_shuffle` adds built-in OpenCL C functions providing +additional ways to exchange data among work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_shuffle[General Purpose +Shuffles] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_shuffle: + +gentype sub_group_shuffle( gentype value, uint index ) +gentype sub_group_shuffle_xor( gentype value, uint mask ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_shuffle_relative.asciidoc b/api/cl_khr_subgroup_shuffle_relative.asciidoc new file mode 100644 index 000000000..1419828e2 --- /dev/null +++ b/api/cl_khr_subgroup_shuffle_relative.asciidoc @@ -0,0 +1,38 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle_relative.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_shuffle_relative` adds built-in OpenCL C functions +providing specialized ways to exchange data among work items in a sub-group +that may perform better on some implementations. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_shuffle_relative[Relative +Shuffles] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_shuffle_relative: + +gentype sub_group_shuffle_up( gentype value, uint delta ) +gentype sub_group_shuffle_down( gentype value, uint delta ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc new file mode 100644 index 000000000..4acff29aa --- /dev/null +++ b/api/cl_khr_subgroups.asciidoc @@ -0,0 +1,53 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroups.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroups` adds support for implementation-controlled groups of work +items, known as sub-groups. +Sub-groups behave similarly to work-groups and have their own sets of +built-ins and synchronization primitives. +Sub-groups within a work-group are independent, may make forward progress +with respect to each other, and may map to optimized hardware structures +where that makes sense. + +Sub-groups were promoted to a core feature in OpenCL 2.1. +However, note that: + + * The sub-group OpenCL C built-in functions described by this extension + must still be accessed as an OpenCL C extension in OpenCL 2.1. + * Sub-group independent forward progress is an optional device property in + OpenCL 2.1, see {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroups[Sub-Groups] section of the +OpenCL C specification for more information. + +=== New Types + + * {cl_kernel_sub_group_info} + +=== New Commands + + * {clGetKernelSubGroupInfoKHR} + +=== New Enums + + * {cl_kernel_sub_group_info_TYPE} + ** {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR} + ** {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_suggested_local_work_size.asciidoc b/api/cl_khr_suggested_local_work_size.asciidoc new file mode 100644 index 000000000..558979623 --- /dev/null +++ b/api/cl_khr_suggested_local_work_size.asciidoc @@ -0,0 +1,37 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_suggested_local_work_size.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-04-22 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_suggested_local_work_size` adds the ability to query a suggested +local work-group size for a kernel running on a device for a specified +global work size and global work offset. +The suggested local work-group size will match the work-group size that +would be chosen if the kernel were enqueued with the specified global work +size and global work offset and a `NULL` local work size. + +By using the suggested local work-group size query an application has +greater insight into the local work-group size chosen by the OpenCL +implementation, and the OpenCL implementation need not re-compute the local +work-group size if the same kernel is enqueued multiple times with the same +parameters. + +=== New Commands + + * {clGetKernelSuggestedLocalWorkSizeKHR} + +=== Version History + + * Revision 1.0.0, 2021-04-22 + ** Initial version. diff --git a/api/cl_khr_terminate_context.asciidoc b/api/cl_khr_terminate_context.asciidoc new file mode 100644 index 000000000..fac86afd8 --- /dev/null +++ b/api/cl_khr_terminate_context.asciidoc @@ -0,0 +1,60 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_terminate_context.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_terminate_context` extension provides a new query to check +whether a device can terminate an OpenCL context, and adds an API to +terminate a context. + +Today, OpenCL provides an API to release a context. +This operation is done only after all queues, memory object, programs and +kernels are released, which in turn might wait for all ongoing operations to +complete. +However, there are cases in which a fast release is required, or release +operation cannot be done, as commands are stuck in mid execution. +An example of the first case can be program termination due to exception, or +quick shutdown due to low power. +Examples of the second case are when a kernel is running too long, or gets +stuck, or it may result from user action which makes the results of the +computation unnecessary. + +In many cases, the driver or the device is capable of speeding up the +closure of ongoing operations when the results are no longer required in a +much more expedient manner than waiting for all previously enqueued +operations to finish. + +=== New Types + + * {cl_device_terminate_capability_khr_TYPE} + +=== New Commands + + * {clTerminateContextKHR} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_TERMINATE_CAPABILITY_KHR} + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_TERMINATE_KHR} + * {cl_device_terminate_capability_khr_TYPE} + ** {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} + * New Error codes + ** {CL_CONTEXT_TERMINATED_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_throttle_hints.asciidoc b/api/cl_khr_throttle_hints.asciidoc new file mode 100644 index 000000000..af11a1507 --- /dev/null +++ b/api/cl_khr_throttle_hints.asciidoc @@ -0,0 +1,45 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_throttle_hints.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_throttle_hints` extension adds throttle hints for OpenCL, but +does not specify the throttling behavior or minimum guarantees. +It is expected that the user guide associated with each implementation which +supports this extension will describe the throttling behavior guarantees. + +Note that the throttle hint is orthogonal to functionality defined in +`<>` extension. +For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) +but should at the same time be executed at an optimized throttle setting +({CL_QUEUE_THROTTLE_LOW_KHR}). + +=== New Types + + * {cl_queue_throttle_khr_TYPE} + +=== New Enums + + * {cl_queue_properties_TYPE} + ** {CL_QUEUE_THROTTLE_KHR} + * {cl_queue_throttle_khr_TYPE} + ** {CL_QUEUE_THROTTLE_HIGH_KHR} + ** {CL_QUEUE_THROTTLE_MED_KHR} + ** {CL_QUEUE_THROTTLE_LOW_KHR} + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_work_group_uniform_arithmetic.asciidoc b/api/cl_khr_work_group_uniform_arithmetic.asciidoc new file mode 100644 index 000000000..5dfb1281b --- /dev/null +++ b/api/cl_khr_work_group_uniform_arithmetic.asciidoc @@ -0,0 +1,49 @@ +// Copyright 2022-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_work_group_uniform_arithmetic.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-04-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kevin Petit, Arm Ltd. + - Ben Ashbaugh, Intel + +=== Description + +`cl_khr_work_group_uniform_arithmetic` adds additional built-in work-group +collective functions to OpenCL C. +Specifically, this extension adds support for work-group scans and +reductions for the following operators: + + * Logical operations (`and`, `or`, and `xor`). + * Bitwise operations (`and`, `or`, and `xor`). + * Integer multiplication (`mul`). + * Floating-point multiplication (`mul`). + +See the +link:{OpenCLCSpecURL}#cl_khr_work_group_uniform_arithmetic[Work-group +Collective Uniform Arithmetic Functions] section of the OpenCL C +specification for more information. + +=== Issues + +. For these built-in functions, do we only want to support the types supported by the existing work-group collective functions, or do we want to support the types supported by the sub-group collective functions? ++ +-- +`RESOLVED`: The extension will require the same types as the existing +work-group collective functions. + +The difference are the 8-bit and 16-bit types: `char`, `uchar`, `short`, and +`ushort`. +Note that `half` is already supported, if half-precision is supported. +-- + +=== Version History + + * Revision 1.0.0, 2022-04-29 + ** Initial version. diff --git a/api/dictionary.asciidoc b/api/dictionary.asciidoc index 2f85dfdd5..a75c7a6df 100644 --- a/api/dictionary.asciidoc +++ b/api/dictionary.asciidoc @@ -1,5 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 include::{generated}/api/api-dictionary.asciidoc[] diff --git a/api/embedded_profile.asciidoc b/api/embedded_profile.asciidoc index cc53176c3..8f841a1e2 100644 --- a/api/embedded_profile.asciidoc +++ b/api/embedded_profile.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [[opencl-embedded-profile]] = OpenCL Embedded Profile diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index 66da48e80..e21cb8719 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 // Please keep footnotes in alphabetical order! @@ -11,10 +10,6 @@ Note that this flag does not provide meaning for atomic memory operations, but only for atomic fence operations in certain circumstances, refer to the Memory Scope section of the OpenCL C specification. \ ] -:fn-compatible-image-channel-orders: pass:n[ \ -This allows creation of a sRGB view of the image from a linear RGB view or vice-versa, i.e. the pixels stored in the image can be accessed as linear RGB or sRGB values. \ -] - :fn-create-context-all-or-subset: pass:n[ \ {clCreateContextfromType} may may create a context for all or a subset of the actual physical devices present in the platform that match _device_type_. \ ] @@ -125,7 +120,7 @@ Rather than attempt to share {cl_kernel_TYPE} objects among multiple host thread ] :fn-readimageh: pass:n[ \ -And *read_imageh*, if the *cl_khr_fp16* extension is supported. \ +And *read_imageh*, if the `<>` extension is supported. \ ] :fn-reference-count-usage: pass:n[ \ diff --git a/api/glossary.asciidoc b/api/glossary.asciidoc index dbe50cd01..4794c760a 100644 --- a/api/glossary.asciidoc +++ b/api/glossary.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 // [glossary] // MK:Don't enable [glossary] - prevents chapter numbering. = Glossary diff --git a/api/introduction.asciidoc b/api/introduction.asciidoc index 74fea0ce9..84ba8e1ff 100644 --- a/api/introduction.asciidoc +++ b/api/introduction.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = Introduction diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index a3b42ee28..3ad82c904 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = The OpenCL Architecture @@ -300,7 +299,7 @@ within a work-group. The details of this mapping are described in the following section. -=== Mapping work-items onto an ND-range +=== Mapping Work-items Onto an Nd-range The index space supported by OpenCL is called an ND-range. An ND-range is an N-dimensional index space, where N is one, two or three. @@ -411,7 +410,7 @@ In this situation all sub-group scope functions are equivalent to their work-group level equivalents. -=== Execution of kernel-instances +=== Execution of Kernel-instances The work carried out by an OpenCL program occurs through the execution of kernel-instances on compute devices. @@ -515,7 +514,7 @@ work-group synchronization functions. [[device-side-enqueue]] -=== Device-side enqueue +=== Device-Side Enqueue NOTE: Device-side enqueue is <> version 2.0. @@ -990,6 +989,79 @@ explicitly manage association of memory objects with devices in order to improve performance. +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +[[lifetime-of-shared-opencl-direct3d-memory-objects]] +=== Lifetime of Shared Direct3D Memory Objects + +This section refers to similar Direct3D 10 and Direct3D 11 objects and +concepts such as _resources_, _reference counts_, and _devices_. + +Sharing is accomplished by creating an OpenCL context via the context create +parameters {CL_CONTEXT_D3D10_DEVICE_KHR} (for Direct3D 10, if the +`<>` extension is supported) or +{CL_CONTEXT_D3D11_DEVICE_KHR} (for Direct3D 11, if the +`<>` extension is supported. + +An OpenCL memory object created from a Direct3D resource remains valid as +long as the corresponding Direct3D resource has not been deleted. +If the Direct3D resource is deleted through the Direct3D API, subsequent use +of the OpenCL memory object will result in undefined behavior, including but +not limited to possible OpenCL errors, data corruption, and program +termination. + +The successful creation of a {cl_context_TYPE} against a Direct3D device +will increment the internal Direct3D reference count on the specified +device. +The internal Direct3D reference count on that Direct3D device will be +decremented when the OpenCL reference count on the returned OpenCL context +drops to zero. + +The OpenCL context and corresponding command-queues are dependent on the +existence of the Direct3D device from which the OpenCL context was created. +If the Direct3D device is deleted through the Direct3D API, subsequent use +of the OpenCL context will result in undefined behavior, including but not +limited to possible OpenCL errors, data corruption, and program termination. +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_egl_image[] +==== Lifetime of Shared EGLImage Objects + +An OpenCL memory object created from an EGL `EGLImage` object remains valid +according to the lifetime behavior as described in the `EGL_KHR_image_base` +extension. + +Any `EGLImage` siblings exist in any client API context + +For OpenCL this means that while the application retains a reference on the +{cl_mem_TYPE} (the EGL sibling), the image remains valid. +endif::cl_khr_egl_image[] + + +ifdef::cl_khr_gl_sharing[] +[[lifetime-of-shared-opencl-opengl-memory-objects]] +=== Lifetime of Shared OpenCL/OpenGL Memory Objects + +An OpenCL memory object created from an OpenGL object (hereinafter referred +to as a "`shared OpenCL/OpenGL object`") remains valid as long as the +corresponding OpenGL object has not been deleted. +If the OpenGL object is deleted through the OpenGL API (e.g. +`glDeleteBuffers`, `glDeleteTextures`, or `glDeleteRenderbuffers`), +subsequent use of the OpenCL buffer or image object will result in undefined +behavior, including but not limited to possible OpenCL errors and data +corruption, but may not result in program termination. + +The OpenCL context and corresponding command-queues are dependent on the +existence of the OpenGL share group object, or the share group associated +with the OpenGL context from which the OpenCL context is created. +If the OpenGL share group object or all OpenGL contexts in the share group +are destroyed, any use of the OpenCL context or command-queue(s) will result +in undefined behavior, which may include program termination. +Applications should destroy the OpenCL command-queue(s) and OpenCL context +before destroying the corresponding OpenGL share group or contexts +endif::cl_khr_gl_sharing[] + + [[shared-virtual-memory]] === Shared Virtual Memory @@ -1232,7 +1304,7 @@ If these guidelines are followed in your OpenCL programs, you can skip the detailed rules behind the relaxed memory models and go directly to <>. -=== Overview of atomic and fence operations +=== Overview of Atomic and Fence Operations OpenCL 2.x has a number of _synchronization operations_ that are used to define memory order constraints in a program. @@ -1741,7 +1813,7 @@ conditions required for *X* to local-synchronize-with *Y* are met, or both sets of conditions are met. -==== Work-group Functions +==== Work-Group Functions The OpenCL kernel execution model includes collective operations across the work-items within a single work-group. @@ -1794,10 +1866,10 @@ must execute the same work-group function call site, or dynamic work-group function instance. -==== Sub-group Functions +==== Sub-Group Functions NOTE: Sub-group functions are <> version 2.1. -Also see extension *cl_khr_subgroups*. +Also see `<>`. The OpenCL kernel execution model includes collective operations across the work-items within a single sub-group. @@ -1849,7 +1921,7 @@ must execute the same sub-group function call site, or dynamic sub-group function instance. -==== Host-side and Device-side Commands +==== Host-Side and Device-Side Commands This section describes how the OpenCL API functions associated with command-queues contribute to happens-before relations. @@ -2111,6 +2183,8 @@ It is therefore strongly recommended that applications <> supported by the OpenCL device they are running on in order to remain robust to future changes. + +[[versioning]] === Versioning The OpenCL specification is regularly updated with bug fixes and clarifications. @@ -2120,35 +2194,46 @@ and to provide a way to identify each set of changes, the OpenCL API, C language intermediate languages and extensions maintain a version number. Built-in kernels are also versioned. -==== Versions + +[[version-numbers]] +==== Version Numbers A version number comprises three logical fields: -* The _major_ version indicates a significant change. Backwards compatibility may - break across major versions. -* The _minor_ version indicates the addition of new functionality with backwards - compatibility for any existing profiles. -* The _patch_ version indicates bug fixes, clarifications and general improvements. + * The _major_ version indicates a significant change. Backwards + compatibility may break across major versions. + * The _minor_ version indicates the addition of new functionality with + backwards compatibility for any existing profiles. + * The _patch_ version indicates bug fixes, clarifications and general + improvements. -Version numbers are represented using the {cl_version_TYPE} type that is an alias for -a 32-bit integer. The fields are packed as follows: +Version numbers are represented using the {cl_version_TYPE} type that is an +alias for a 32-bit integer. +The fields are packed as follows: -* The _major_ version is a 10-bit integer packed into bits 31-22. -* The _minor_ version is a 10-bit integer packed into bits 21-12. -* The _patch_ version is a 12-bit integer packed into bits 11-0. + * The _major_ version is a 10-bit integer packed into bits 31-22. + * The _minor_ version is a 10-bit integer packed into bits 21-12. + * The _patch_ version is a 12-bit integer packed into bits 11-0. This enables versions to be ordered using standard C/C++ operators. A number of convenience macros are provided by the OpenCL Headers to make working with version numbers easier. -`CL_VERSION_MAJOR` extracts the _major_ version from a packed {cl_version_TYPE}. + -`CL_VERSION_MINOR` extracts the _minor_ version from a packed {cl_version_TYPE}. + -`CL_VERSION_PATCH` extracts the _patch_ version from a packed {cl_version_TYPE}. + -`CL_MAKE_VERSION` returns a packed {cl_version_TYPE} from a _major_, _minor_ and -_patch_ version. - -These are defined as follows: + * `CL_VERSION_MAJOR` extracts the _major_ version from a packed + {cl_version_TYPE}. + * `CL_VERSION_MINOR` extracts the _minor_ version from a packed + {cl_version_TYPE}. + * `CL_VERSION_PATCH` extracts the _patch_ version from a packed + {cl_version_TYPE}. + * `CL_MAKE_VERSION` returns a packed `cl_version_TYPE} from a + _major_, _minor_ and _patch_ version. + * {CL_VERSION_MAJOR_BITS_anchor}, {CL_VERSION_MINOR_BITS_anchor}, and + {CL_VERSION_PATCH_BITS_anchor} are the number of bits in the + corresponding field. + * `CL_VERSION_MAJOR_MASK`, `CL_VERSION_MINOR_MASK`, and + {CL_VERSION_PATCH_MASK` are bitmasks used to extract the + corresponding packed fields from the version number. [source,opencl] ---- @@ -2178,14 +2263,18 @@ typedef cl_uint cl_version; ((patch) & CL_VERSION_PATCH_MASK)) ---- -==== Version name pairing +[[version-name-pairing]] +==== Version-Name Pairing -It is sometimes necessary to associate a version to an entity it applies to -(e.g. extension or built-in kernel). This is done using a dedicated -{cl_name_version_TYPE} structure, defined as follows: +[open,refpage='cl_name_version',desc='Structure describing a version number and corresponding entity name',type='structs'] +-- +The {cl_name_version_TYPE} structure describes a version number and a +corresponding entity (e.g. extension or built-in kernel) name: include::{generated}/api/structs/cl_name_version.txt[] -The `name` field is an array of `CL_NAME_VERSION_MAX_NAME_SIZE` bytes used as -storage for a NUL-terminated string whose maximum length is therefore -`CL_NAME_VERSION_MAX_NAME_SIZE - 1`. + * _version_ is a <>. + * _name_ is an array of {CL_NAME_VERSION_MAX_NAME_SIZE_anchor} `char` + containing a null-terminated string whose maximum length is therefore + {CL_NAME_VERSION_MAX_NAME_SIZE} - 1`. +-- diff --git a/api/opencl_assoc_spec.asciidoc b/api/opencl_assoc_spec.asciidoc index aa19b1add..08627472c 100644 --- a/api/opencl_assoc_spec.asciidoc +++ b/api/opencl_assoc_spec.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = Associated OpenCL specification diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 3aeba7375..b27079192 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [[opencl-platform-layer]] = The OpenCL Platform Layer @@ -22,10 +21,10 @@ include::{generated}/api/version-notes/clGetPlatformIDs.asciidoc[] * _num_entries_ is the number of {cl_platform_id_TYPE} entries that can be added to _platforms_. - If _platforms_ is not `NULL`, the _num_entries_ must be greater than zero. + If _platforms_ is not `NULL`, _num_entries_ must be greater than zero. * _platforms_ returns a list of OpenCL platforms found. - The {cl_platform_id_TYPE} values returned in _platforms_ can be used to identify a - specific OpenCL platform. + The {cl_platform_id_TYPE} values returned in _platforms_ can be used to + identify a specific OpenCL platform. If _platforms_ is `NULL`, this argument is ignored. The number of OpenCL platforms returned is the minimum of the value specified by _num_entries_ or the number of OpenCL platforms available. @@ -35,15 +34,58 @@ include::{generated}/api/version-notes/clGetPlatformIDs.asciidoc[] // refError {clGetPlatformIDs} returns {CL_SUCCESS} if the function is executed -successfully. +ifndef::cl_khr_icd[successfully.] +ifdef::cl_khr_icd[] +and, if the `<>` extension is supported, there are a non-zero +number of platforms available. +endif::cl_khr_icd[] Otherwise, it returns one of the following errors: +ifdef::cl_khr_icd[] + * {CL_PLATFORM_NOT_FOUND_KHR} if the `<>` extension is + supported and zero platforms are available. +endif::cl_khr_icd[] * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _platforms_ is not `NULL` or if both _num_platforms_ and _platforms_ are `NULL`. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +ifdef::cl_khr_icd[] +[open,refpage='clIcdGetPlatformIDsKHR',desc='Query platforms accessible through the Khronos ICD Loader',type='protos'] +-- +To obtain the list of platforms accessible through the Khronos ICD Loader, +call the function: + +include::{generated}/api/protos/clIcdGetPlatformIDsKHR.txt[] +include::{generated}/api/version-notes/clIcdGetPlatformIDsKHR.asciidoc[] + + * _num_entries_ is the number of {cl_platform_id_TYPE} entries that can be + added to _platforms_. + If _platforms_ is not `NULL`, then _num_entries_ must be greater than + zero. + * _platforms_ returns a list of OpenCL platforms available for access + through the Khronos ICD Loader. + The {cl_platform_id_TYPE} values returned in _platforms_ are ICD + compatible and can be used to identify a specific OpenCL platform. + If the _platforms_ argument is `NULL`, then this argument is ignored. + The number of OpenCL platforms returned is the minimum of the value + specified by _num_entries_ or the number of OpenCL platforms available. + * _num_platforms_ returns the number of OpenCL platforms available. + If _num_platforms_ is `NULL`, then this argument is ignored. + +// refError + +{clIcdGetPlatformIDsKHR} returns {CL_SUCCESS} if the function is executed +successfully and there are a non zero number of platforms available. +Otherwise, it returns one of the following errors: + + * {CL_PLATFORM_NOT_FOUND_KHR} if zero platforms are available. + * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _platforms_ is + not `NULL` or if both _num_platforms_ and _platforms_ are `NULL`. +-- +endif::cl_khr_icd[] + [open,refpage='clGetPlatformInfo',desc='Query information about an OpenCL platform',type='protos'] -- Specific information about an OpenCL platform can be obtained with @@ -114,7 +156,17 @@ include::{generated}/api/version-notes/CL_PLATFORM_VERSION.asciidoc[] | {CL_PLATFORM_NUMERIC_VERSION_anchor} include::{generated}/api/version-notes/CL_PLATFORM_NUMERIC_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_PLATFORM_NUMERIC_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_NUMERIC_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_version_TYPE} + +ifdef::cl_khr_extended_versioning[or {cl_version_khr_TYPE}] | Returns the detailed (major, minor, patch) version supported by the platform. The major and minor version numbers returned must match those returned via {CL_PLATFORM_VERSION}. @@ -141,11 +193,22 @@ include::{generated}/api/version-notes/CL_PLATFORM_EXTENSIONS.asciidoc[] | {CL_PLATFORM_EXTENSIONS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_PLATFORM_EXTENSIONS_WITH_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of description (name and version) structures that lists all the extensions supported by the platform. The same extension name must not be reported more than once. The list of extensions reported must match the list reported via {CL_PLATFORM_EXTENSIONS}. + | {CL_PLATFORM_HOST_TIMER_RESOLUTION_anchor} include::{generated}/api/version-notes/CL_PLATFORM_HOST_TIMER_RESOLUTION.asciidoc[] @@ -157,6 +220,87 @@ include::{generated}/api/version-notes/CL_PLATFORM_HOST_TIMER_RESOLUTION.asciido platforms supporting OpenCL 2.1 or 2.2. This value must be 0 for devices that do not support device and host timer synchronization. + +ifdef::cl_khr_command_buffer_multi_device[] +| {CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR.asciidoc[] + | {cl_platform_command_buffer_capabilities_khr_TYPE} + | Describes platform command-buffer capabilities, encoded as bits in a + bitfield. + Supported capabilities are: + + {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR_anchor} - Platform + supports the ability to synchronize all commands in a command-buffer + using sync-points, irrespective of the queue the individual commands + are recorded to. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR_anchor} - Platform + supports the ability to create a deep copy of an existing + command-buffer with the commands explicitly remapped to different, + potentially <>, queues. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR_anchor} - Platform + supports the ability to create a remapped command-buffer where the + mapping of commands to queues is done by the OpenCL runtime in a way + it determines as optimal. + If {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} is reported, + {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} must also be reported. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR.asciidoc[] +endif::cl_khr_command_buffer_multi_device[] + +ifdef::cl_khr_external_memory[] +| {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_memory_handle_type_khr_TYPE}[] + | Returns the list of importable external memory handle types + supported by all devices in _platform_. +endif::cl_khr_external_memory[] + +ifdef::cl_khr_semaphore[] +| {CL_PLATFORM_SEMAPHORE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_SEMAPHORE_TYPES_KHR.asciidoc[] + | {cl_semaphore_type_khr_TYPE}[] + | Returns the list of the semaphore types supported all devices in + _platform_. +endif::cl_khr_semaphore[] + +ifdef::cl_khr_external_semaphore[] +| {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of importable external semaphore handle types + supported by all devices in _platform_. + + The size of this query may be 0 if no importable external semaphore + handle types are supported by all devices in _platform_. +| {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of exportable external semaphore handle types + supported by all devices in the platform. + + This size of this query may be 0 if no exportable external semaphore + handle types are supported by all devices in _platform_. +endif::cl_khr_external_semaphore[] + +ifdef::cl_khr_icd[] +| {CL_PLATFORM_ICD_SUFFIX_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_ICD_SUFFIX_KHR.asciidoc[] + | {char_TYPE}[] + | The function name suffix used to identify extension functions to be + directed to this platform by the ICD Loader. +endif::cl_khr_icd[] |==== // refError @@ -417,7 +561,7 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_WORK_GROUP_SIZE.asciidoc[] If double precision is not supported, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE} must return 0. - If the *cl_khr_fp16* extension is not supported, + If the `<>` extension is not supported, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF} must return 0. | {CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR_anchor} + {CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT_anchor} + @@ -438,7 +582,7 @@ include::{generated}/api/version-notes/CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR.asciid If double precision is not supported, {CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE} must return 0. - If the *cl_khr_fp16* extension is not supported, + If the `<>` extension is not supported, {CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF} must return 0. | {CL_DEVICE_MAX_CLOCK_FREQUENCY_anchor} @@ -506,29 +650,51 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS.ascii | {CL_DEVICE_IL_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_IL_VERSION.asciidoc[] -Also see extension *cl_khr_il_program*. + +ifdef::cl_khr_il_program[] +or + +{CL_DEVICE_IL_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_IL_VERSION_KHR.asciidoc[] +endif::cl_khr_il_program[] | {char_TYPE}[] | The intermediate languages that can be supported by {clCreateProgramWithIL} for this device. Returns a space-separated list of IL version strings of the form - _.. + + _. For an OpenCL 2.1 or 2.2 device, SPIR-V is a required IL prefix. If the device does not support intermediate language programs, the value must be `""` (an empty string). +ifdef::cl_khr_il_program[] + A device that supports the `<>` extension must + support the `"SPIR-V"` IL prefix. +endif::cl_khr_il_program[] | {CL_DEVICE_ILS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_ILS_WITH_VERSION.asciidoc[] -Also see extension *cl_khr_il_program*. + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_ILS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_ILS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of descriptions (name and version) for all supported - intermediate languages. Intermediate languages with the same name may be - reported more than once but each name and major/minor version - combination may only be reported once. The list of intermediate - languages reported must match the list reported via - {CL_DEVICE_IL_VERSION}. + intermediate languages. + Intermediate languages with the same name may be reported more than + once but each name and major/minor version combination may only be + reported once. + The list of intermediate languages reported must match the list + reported via {CL_DEVICE_IL_VERSION}. For an OpenCL 2.1 or 2.2 device, at least one version of SPIR-V must be reported. @@ -601,6 +767,11 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_SAMPLERS.asciidoc[] | {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_anchor} include::{generated}/api/version-notes/CL_DEVICE_IMAGE_PITCH_ALIGNMENT.asciidoc[] + +ifdef::cl_khr_image2d_from_buffer[] +The equivalent {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR_anchor} may be used if +the `<>` extension is supported. +endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} | The row pitch alignment size in pixels for 2D images created from a buffer. @@ -613,6 +784,11 @@ include::{generated}/api/version-notes/CL_DEVICE_IMAGE_PITCH_ALIGNMENT.asciidoc[ | {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_anchor} include::{generated}/api/version-notes/CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT.asciidoc[] + +ifdef::cl_khr_image2d_from_buffer[] +The equivalent {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_anchor} may be used +if the `<>` extension is supported. +endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} | This query specifies the minimum alignment in pixels of the host_ptr specified to {clCreateBuffer} or {clCreateBufferWithProperties} when a 2D image @@ -711,7 +887,8 @@ include::{generated}/api/version-notes/CL_DEVICE_SINGLE_FP_CONFIG.asciidoc[] | {CL_DEVICE_DOUBLE_FP_CONFIG_anchor} footnote:native-rounding-modes[] include::{generated}/api/version-notes/CL_DEVICE_DOUBLE_FP_CONFIG.asciidoc[] -Also see extension *cl_khr_fp64*. + +Also see `<>`. | {cl_device_fp_config_TYPE} | Describes double precision floating-point capability of the OpenCL device. @@ -987,10 +1164,21 @@ include::{generated}/api/version-notes/CL_DEVICE_BUILT_IN_KERNELS.asciidoc[] | {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of descriptions for the built-in kernels supported by - the device. Each built-in kernel may only be reported once. The list of - reported kernels must match the list returned via + the device. + Each built-in kernel may only be reported once. + The list of reported kernels must match the list returned via {CL_DEVICE_BUILT_IN_KERNELS}. | {CL_DEVICE_PLATFORM_anchor} @@ -1044,16 +1232,27 @@ include::{generated}/api/version-notes/CL_DEVICE_VERSION.asciidoc[] | {CL_DEVICE_NUMERIC_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_NUMERIC_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_NUMERIC_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_NUMERIC_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_version_TYPE} + +ifdef::cl_khr_extended_versioning[or {cl_version_khr_TYPE}] | Returns the detailed (major, minor, patch) version supported by the - device. The major and minor version numbers returned must match - those returned via {CL_DEVICE_VERSION}. + device. + The major and minor version numbers returned must match those + returned via {CL_DEVICE_VERSION}. | {CL_DEVICE_OPENCL_C_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_VERSION.asciidoc[] | {char_TYPE}[] - | Returns the highest fully backwards compatible OpenCL C version + | Returns the highest fully backwards compatible OpenCL C version supported by the compiler for the device. For devices supporting compilation from OpenCL C source, this will return a version string with the following format: @@ -1121,6 +1320,20 @@ include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_ALL_VERSIONS.asciidoc[ For devices that do not support compilation from OpenCL C source, this query may return an empty array. +ifdef::cl_khr_extended_versioning[] +| {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR.asciidoc[] + | {cl_version_khr_TYPE} + | Returns detailed (major, minor, patch) numeric version information. + The major and minor version numbers returned must match those returned + via {CL_DEVICE_OPENCL_C_VERSION}. + + This query was not promoted to core in OpenCL version 3.0, but the + core query {CL_DEVICE_OPENCL_C_ALL_VERSIONS} can be used to obtain + equivalent information. +endif::cl_khr_extended_versioning[] + | {CL_DEVICE_OPENCL_C_FEATURES_anchor} include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_FEATURES.asciidoc[] @@ -1147,26 +1360,26 @@ include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS.asciidoc[] The following Khronos extension names must be returned by all devices that support OpenCL 1.1: - *cl_khr_byte_addressable_store* + - *cl_khr_global_int32_base_atomics* + - *cl_khr_global_int32_extended_atomics* + - *cl_khr_local_int32_base_atomics* + - *cl_khr_local_int32_extended_atomics* + `<>` + + `<>` + + `<>` + + `<>` + + `<>` Additionally, the following Khronos extension names must be returned by all devices that support OpenCL 2.0, OpenCL 2.1, or OpenCL 2.2. For devices that support OpenCL 3.0, these extension names must be returned when and only when the optional feature is supported: - *cl_khr_3d_image_writes* + - *cl_khr_depth_images* + - *cl_khr_image2d_from_buffer* + `<>` + + `<>` + + `<>` Please refer to the OpenCL Extension Specification or vendor provided documentation for a detailed description of these extensions. @@ -1174,7 +1387,17 @@ include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS.asciidoc[] | {CL_DEVICE_EXTENSIONS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS_WITH_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of description (name and version) structures. The same extension name must not be reported more than once. The list of extensions reported must match the list reported via @@ -1354,7 +1577,7 @@ include::{generated}/api/version-notes/CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_P sub-groups, {CL_FALSE} otherwise. This query must return {CL_TRUE} for devices that support the - *cl_khr_subgroups* extension, and must return {CL_FALSE} for + `<>` extension, and must return {CL_FALSE} for devices that do not support sub-groups. | {CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES_anchor} @@ -1465,8 +1688,418 @@ include::{generated}/api/version-notes/CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASS | Returns the latest version of the conformance test suite that this device has fully passed in accordance with the official conformance process. +ifdef::cl_khr_command_buffer[] +| {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR.asciidoc[] + + | {cl_device_command_buffer_capabilities_khr_TYPE} + | Describes device command-buffer capabilities, encoded as bits in a bitfield. + Supported capabilities are: + + {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR_anchor} Device + supports the ability to record commands that execute kernels which + contain printf calls. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR_anchor} Device + supports the ability to record commands that execute kernels which + contain device-side kernel-enqueue calls. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR_anchor} Device + supports the command-buffers having a <> that exceeds 1. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR_anchor} Device + supports the ability to record command-buffers to out-of-order + command-queues. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR.asciidoc[] + +ifdef::cl_khr_command_buffer_multi_device[] + {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR_anchor} Device + supports the ability to record commands to more than one + command-queue associated with _device_ in a single command-buffer. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR.asciidoc[] +endif::cl_khr_command_buffer_multi_device[] + +| {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR.asciidoc[] + + | {cl_command_queue_properties_TYPE} + | Bitmask of the minimum properties with which a command-queue must be created + to allow a command-buffer to be executed on it. + It is valid for a command-queue to be created with extra properties + in addition to this base requirement and still be compatible with + command-buffer execution. +endif::cl_khr_command_buffer[] + +ifdef::cl_khr_command_buffer_multi_device[] +| {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR.asciidoc[] + | {cl_uint_TYPE} + | Return the number of root devices listed in + {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} that _device_ can use + device-side synchronization with. + +| {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Return the list of root devices _device_ can use device-side + synchronization with. + A device should list itself only if it has native support for + synchronizing commands. + Sub-devices are not listed to avoid non-deterministic results as + sub-devices are created. + Instead if a root device is listed, then any of its partitioned + sub-devices can also be natively synchronized with. +endif::cl_khr_command_buffer_multi_device[] + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR.asciidoc[] + | {cl_mutable_dispatch_fields_khr_TYPE} + | Describes device mutable-dispatch capabilities, encoded as bits in a + bitfield. + Supported capabilities are: + + {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR_anchor} - Device supports the + ability to modify the _global_work_offset_ of kernel execution after + command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR_anchor} - Device supports the + ability to modify the _global_work_size_ of kernel execution after + command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR_anchor} - Device supports the + ability to modify the _local_work_size_ of kernel execution after + command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR_anchor} - Device supports the + ability to modify arguments set on a kernel after command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR_anchor} - Device supports the + ability to modify execution information set on a kernel after command + recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_EXEC_INFO_KHR.asciidoc[] +endif::cl_khr_command_buffer_mutable_dispatch[] + +ifdef::cl_khr_device_uuid[] +| {CL_DEVICE_UUID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_UUID_KHR.asciidoc[] + + | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] + | Returns a universally unique identifier (UUID) for the device. + + Device UUIDs must be immutable for a given device across processes, + driver APIs, driver versions, and system reboots. +| {CL_DRIVER_UUID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DRIVER_UUID_KHR.asciidoc[] + + | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] + | Returns a universally unique identifier (UUID) for the software driver + for the device. +| {CL_DEVICE_LUID_VALID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_LUID_VALID_KHR.asciidoc[] + + | {cl_bool_TYPE} + | Returns {CL_TRUE} if the device has a valid LUID and {CL_FALSE} + otherwise. +| {CL_DEVICE_LUID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_LUID_KHR.asciidoc[] + + | {cl_uchar_TYPE}[{CL_LUID_SIZE_KHR}] + | Returns a locally unique identifier (LUID) for the device. + + It is not an error to query {CL_DEVICE_LUID_KHR} when + {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the + returned LUID value is undefined. + + When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, and the OpenCL + device is running on the Windows operating system, the returned LUID + value can be cast to an `LUID` object and must be equal to the locally + unique identifier of an `IDXGIAdapter1` object that corresponds to the + OpenCL device. +| {CL_DEVICE_NODE_MASK_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_NODE_MASK_KHR.asciidoc[] + + | {cl_uint_TYPE} + | Returns a node mask for the device. + + It is not an error to query {CL_DEVICE_NODE_MASK_KHR} when + {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the + returned node mask is undefined. + + When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, the returned node + mask must contain exactly one bit. + If the OpenCL device is running on an operating system that supports + the Direct3D 12 API and the OpenCL device corresponds to an individual + device in a linked device adapter, the returned node mask identifies + the Direct3D 12 node corresponding to the OpenCL device. + Otherwise, the returned node mask must be `1`. +endif::cl_khr_device_uuid[] + +ifdef::cl_khr_external_memory[] +| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + + | {cl_external_memory_handle_type_khr_TYPE}[] + | Returns the list of importable external memory handle types + supported by _device_. + + Must return a non-empty list of external memory handle types for at + least one of the devices in the platform. + +| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR.asciidoc[] + + | {cl_external_memory_handle_type_khr_TYPE}[] + | Returns the list of importable external memory handle types + supported by _device_, that are assumed to apply linear layout to + imported images when no other tiling information is provided. + + This list contains a subset of + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR}. + The returned list may be empty. + + External memory handle types not in + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + may have any memory layout. + The layout interpretation of images imported with these handle types + is implementation defined. +endif::cl_khr_external_memory[] + +ifdef::cl_khr_fp16[] +| {CL_DEVICE_HALF_FP_CONFIG_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_HALF_FP_CONFIG.asciidoc[] + | {cl_device_fp_config_TYPE} + | Describes half-precision floating-point capability of the OpenCL + device. + This is a bit-field that describes one or more of the following + values: + + {CL_FP_DENORM} - denorms are supported + + {CL_FP_INF_NAN} - INF and NaNs are supported + + {CL_FP_ROUND_TO_NEAREST} - round to nearest even rounding mode + supported + + {CL_FP_ROUND_TO_ZERO} - round to zero rounding mode supported + + {CL_FP_ROUND_TO_INF} - round to positive and negative infinity + rounding modes supported + + {CL_FP_FMA} - IEEE754-2008 fused multiply-add is supported + + {CL_FP_SOFT_FLOAT} - Basic floating-point operations (such as + addition, subtraction, multiplication) are implemented in software + + If half-precision is supported by the device, then the minimum + half-precision floating-point capability for OpenCL 2.0 or newer + devices is: + + {CL_FP_ROUND_TO_ZERO} + + or + + {CL_FP_ROUND_TO_NEAREST} \| + + {CL_FP_INF_NAN}. +endif::cl_khr_fp16[] + +ifdef::cl_khr_integer_dot_product[] +| {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR.asciidoc[] + | {cl_device_integer_dot_product_capabilities_khr_type} + | Returns the integer dot product capabilities supported by the + device. + + {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} is always + set, indicating that all implementations that support + `<>` must support dot product built-in + functions and, when SPIR-V is supported, SPIR-V instructions that + take four-component vectors of 8-bit integers packed into 32-bit + integers as input. + + {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} is set when dot + product built-in functions and, when SPIR-V is supported, SPIR-V + instructions that take four-component of 8-bit elements as input are + supported. + NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} + must be set in version 2.x of the extension. + +| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR.asciidoc[] + + | {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} + | Returns a structure describing the exact 8-bit dot product + combinations that are <> on the device. + Each member is {CL_TRUE} if the combination it corresponds to is + accelerated, {CL_FALSE} otherwise. + NOTE: + {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} is + missing before version 2.0 of the extension. + +| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR.asciidoc[] + + | {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} + | Returns a structure describing the exact 4x8-bit packed dot product + combinations that are <> on the device. + Each member is {CL_TRUE} if the combination it corresponds to is + accelerated, {CL_FALSE} otherwise. + NOTE: + {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} + is missing before version 2.0 of the extension. +endif::cl_khr_integer_dot_product[] + +ifdef::cl_khr_pci_bus_info[] +| {CL_DEVICE_PCI_BUS_INFO_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_PCI_BUS_INFO_KHR.asciidoc[] + | {cl_device_pci_bus_info_khr_TYPE} + | Returns PCI bus information for the device. + + The PCI bus information is returned as a single structure that + includes the PCI bus domain, the PCI bus identifier, the PCI device + identifier, and the PCI device function identifier. +endif::cl_khr_pci_bus_info[] + +ifdef::cl_khr_semaphore[] +| {CL_DEVICE_SEMAPHORE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SEMAPHORE_TYPES_KHR.asciidoc[] + | {cl_semaphore_type_khr_TYPE}[] + | Returns the list of the semaphore types supported by _device_. + + Must return a non-empty list for at least one of the devices in the + platform, meeting the minimum requirements described for + {cl_semaphore_type_khr_TYPE}. +endif::cl_khr_semaphore[] + +ifdef::cl_khr_external_semaphore[] +| {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of importable external semaphore handle types + supported by _device_. + + This size of this query may be 0 indicating that the device does not + support importing semaphores. +| {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of exportable external semaphore handle types + supported by _device_. + + This size of this query may be 0 indicating that the device does not + support exporting semaphores. +endif::cl_khr_external_semaphore[] + +ifdef::cl_khr_spir[] +| {CL_DEVICE_SPIR_VERSIONS_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SPIR_VERSIONS.asciidoc[] + | {char_TYPE}[] + | A space separated list of SPIR versions supported by the device. + + For example, returning `"1.2"` in this query implies that SPIR version + 1.2 is supported by the implementation. +endif::cl_khr_spir[] + +ifdef::cl_khr_subgroup_named_barrier[] +| {CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR.asciidoc[] + | {cl_uint_TYPE} + | Maximum number of named barriers in a work-group for any given + kernel-instance running on the device. + The minimum value is 8. +endif::cl_khr_subgroup_named_barrier[] + +ifdef::cl_khr_terminate_context[] +| {CL_DEVICE_TERMINATE_CAPABILITY_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_TERMINATE_CAPABILITY_KHR.asciidoc[] + | {cl_device_terminate_capability_khr_TYPE} + | Describes the termination capability of the OpenCL device. + This is a bit-field, where the following values are currently + supported: + + {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR_anchor} - Indicates that + context termination is supported. +endif::cl_khr_terminate_context[] |==== +ifdef::cl_khr_integer_dot_product[] +OpenCL 3 devices must report the following feature macros via +{CL_DEVICE_OPENCL_C_FEATURES} when the corresponding bit is set in the bitfield +returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: + +[cols="1,1",options="header"] +|==== +| Feature Bit | Feature Macro +| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} + | `__opencl_c_integer_dot_product_input_4x8bit_packed` +| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} + | `__opencl_c_integer_dot_product_input_4x8bit` +|==== +endif::cl_khr_integer_dot_product[] + +ifdef::cl_khr_external_semaphore[] +One of the two queries {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} and +{CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} must return a non-empty list +indicating support for at least one of the valid semaphore handle types +either for import, for export, or both. +endif::cl_khr_external_semaphore[] + +ifdef::cl_khr_device_uuid[] +[NOTE] +.Note +==== +While {CL_DEVICE_UUID_KHR} is specified to remain consistent across driver +versions and system reboots, it is not intended to be usable as a +serializable persistent identifier for a device. +It may change when a device is physically added to, removed from, or moved +to a different connector in a system while that system is powered down. +Further, there is no reasonable way to verify with conformance testing that +a given device retains the same UUID in a given system across all driver +versions supported in that system. +While implementations should make every effort to report consistent device +UUIDs across driver versions, applications should avoid relying on the +persistence of this value for uses other than identifying compatible devices +for external object sharing purposes. +==== +endif::cl_khr_device_uuid[] + // refError {clGetDeviceInfo} returns {CL_SUCCESS} if the function is executed @@ -1486,6 +2119,55 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- +ifdef::cl_khr_integer_dot_product[] +[open,refpage='cl_device_integer_dot_product_acceleration_properties_khr',desc='Structure describing supported dot product operations',type='structs'] +-- +[[integer-dot-product-acceleration-properties]] +The {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} structure +describes the exact dot product operations that are accelerated on the device: + +include::{generated}/api/structs/cl_device_integer_dot_product_acceleration_properties_khr.txt[] + + * _signed_accelerated_ is {CL_TRUE} when signed dot product operations are + accelerated, {CL_FALSE} otherwise. + * _unsigned_accelerated_ is {CL_TRUE} when unsigned dot product operations + are accelerated, {CL_FALSE} otherwise. + * _mixed_signedness_accelerated_ is {CL_TRUE} when mixed signedness dot + product operations are accelerated, {CL_FALSE} otherwise. + * _accumulating_saturating_signed_accelerated_ is {CL_TRUE} when accumulating + saturating signed dot product operations are accelerated, {CL_FALSE} + otherwise. + * _accumulating_saturating_unsigned_accelerated_ is {CL_TRUE} when accumulating + saturating unsigned dot product operations are accelerated, {CL_FALSE} + otherwise. + * _accumulating_saturating_mixed_signedness_accelerated_ is {CL_TRUE} when + accumulating saturating mixed signedness dot product operations are + accelerated, {CL_FALSE} otherwise. + +A dot product operation is deemed accelerated if its implementation provides +a performance advantage over application-provided code composed from elementary +instructions and/or other dot product instructions, either because the +implementation uses optimized machine code sequences whose generation from +application-provided code cannot be guaranteed or because it uses hardware +features that cannot otherwise be targeted from application-provided code. +-- +endif::cl_khr_integer_dot_product[] + +ifdef::cl_khr_pci_bus_info[] +[open,refpage='cl_device_pci_bus_info_khr',desc='Structure describing PCI bus information',type='structs'] +-- +The {cl_device_pci_bus_info_khr_TYPE} structure describes PCI bus +information for a device: + +include::{generated}/api/structs/cl_device_pci_bus_info_khr.txt[] + + * _pci_domain_ is the PCI bus domain of the device. + * _pci_bus_ is the PCI bus identified of the device. + * _pci_device_ is the PCI device identifier of the device. + * _pci_function_ is the PCI device function identifier of the device. +-- +endif::cl_khr_pci_bus_info[] + [open,refpage='clGetDeviceAndHostTimer',desc='Query synchronized host and device timestamps',type='protos'] -- To query device and host timestamps, call the function: @@ -1575,6 +2257,336 @@ Otherwise, it returns one of the following errors: -- +ifdef::cl_khr_dx9_media_sharing[] +=== Sharing DirectX9 Media Surfaces With OpenCL Images + +This section discusses OpenCL functions that allow applications to use media +surfaces as OpenCL memory objects. +This allows efficient sharing of data between OpenCL and media surface APIs. +The OpenCL API may be used to execute kernels that read and/or write memory +objects that are also media surfaces. +An OpenCL image object may be created from a media surface. +OpenCL memory objects may be created from media surfaces if and only if the +OpenCL context has been created from a media adapter. + + +==== Querying OpenCL Devices Corresponding to Media Adapters + +Media adapters are an abstraction associated with devices that provide media +capabilities. +Adapters with associated OpenCL devices can enable media surface sharing +between the two. + + +[open,refpage='clGetDeviceIDsFromDX9MediaAdapterKHR',desc='Query media adapter for any associated OpenCL devices',type='protos'] +-- +To query a media adapter for any associated OpenCL devices, call the +function + +include::{generated}/api/protos/clGetDeviceIDsFromDX9MediaAdapterKHR.txt[] +include::{generated}/api/version-notes/clGetDeviceIDsFromDX9MediaAdapterKHR.asciidoc[] + + * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. + * _num_media_adapters_ specifies the number of media adapters. + * _media_adapters_type_ is an array of _num_media_adapters_ entries. + Each entry specifies the type of media adapter and must be one of the + values described in the <> below. + * _media_adapters_ is an array of _num_media_adapters_ entries. + Each entry specifies the actual adapter whose type is specified by + _media_adapter_type_. + The _media_adapters_ must be one of the types described in the + <> + table. + * _media_adapter_set_ specifies the set of adapters to return and must be + one of the values described in the <> table. + * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be + added to _devices_. + If _devices_ is not `NULL`, the _num_entries_ must be greater than zero. + * _devices_ returns a list of OpenCL devices found that support the list + of media adapters specified. + The {cl_device_id_TYPE} values returned in _devices_ can be used to + identify a specific OpenCL device. + If _devices_ argument is `NULL`, this argument is ignored. + The number of OpenCL devices returned is the minimum of the value + specified by _num_entries_ or the number of OpenCL devices whose type + matches _device_type_. + * _num_devices_ returns the number of OpenCL devices. + If _num_devices_ is `NULL`, this argument is ignored. + +[[dx9-media-adapter-types]] +.DirectX 9 object types that may be used by {clGetDeviceIDsFromDX9MediaAdapterKHR} +[cols=",",options="header",] +|==== +| {cl_dx9_media_adapter_type_khr_TYPE} | Type of Media Adapter +| {CL_ADAPTER_D3D9_KHR_anchor} + +include::{generated}/api/version-notes/CL_ADAPTER_D3D9_KHR.asciidoc[] + | `IDirect3DDevice9 *` +| {CL_ADAPTER_D3D9EX_KHR_anchor} + +include::{generated}/api/version-notes/CL_ADAPTER_D3D9EX_KHR.asciidoc[] + | `IDirect3DDevice9Ex *` +| {CL_ADAPTER_DXVA_KHR_anchor} + +include::{generated}/api/version-notes/CL_ADAPTER_DXVA_KHR.asciidoc[] + | `IDXVAHD_Device *` +|==== + +[[dx9-media-adapter-sets]] +.Sets of devices queriable using {clGetDeviceIDsFromDX9MediaAdapterKHR} +[cols=",",options="header",] +|==== +| {cl_dx9_media_adapter_set_khr_TYPE} | Description +| {CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR.asciidoc[] + | The preferred OpenCL devices associated with the media adapter. +| {CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR.asciidoc[] + | All OpenCL devices that may interoperate with the media adapter +|==== + +// refError + +{clGetDeviceIDsFromDX9MediaAdapterKHR} returns {CL_SUCCESS} if the function +is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_VALUE} if _num_media_adapters_ is zero or if + _media_adapters_type_ is `NULL` or if _media_adapters_ is `NULL`. + * {CL_INVALID_VALUE} if any of the entries in _media_adapters_type_ or + _media_adapters_ is not a valid value. + * {CL_INVALID_VALUE} if _media_adapter_set_ is not a valid value. + * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _devices_ is + not `NULL` or if both _num_devices_ and _devices_ are `NULL`. + * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to adapters + specified in _media_adapters_ and _media_adapters_type_ were found. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_dx9_media_sharing[] + + +ifdef::cl_khr_d3d10_sharing[] +=== Sharing Direct3D 10 Resources With OpenCL Memory Objects + +This section discusses OpenCL functions that allow applications to use +Direct3D 10 resources as OpenCL memory objects. +This allows efficient sharing of data between OpenCL and Direct3D 10. +The OpenCL API may be used to execute kernels that read and/or write memory +objects that are also Direct3D 10 resources. +An OpenCL image object may be created from a Direct3D 10 texture resource. +An OpenCL buffer object may be created from a Direct3D 10 buffer resource. +OpenCL memory objects may be created from Direct3D 10 objects if and only if +the OpenCL context has been created from a Direct3D 10 device. + +==== Querying OpenCL Devices Corresponding to Direct3D 10 Devices + +The OpenCL devices corresponding to a Direct3D 10 device may be queried. +The OpenCL devices corresponding to a DXGI adapter may also be queried. +The OpenCL devices corresponding to a Direct3D 10 device will be a subset of +the OpenCL devices corresponding to the DXGI adapter against which the +Direct3D 10 device was created. + +[open,refpage='clGetDeviceIDsFromD3D10KHR',desc='Query OpenCL devices corresponding to a Direct3D 10 or DXGI device',type='protos'] +-- +To query OpenCL devices corresponding to a Direct3D 10 device or a DXGI +device, call the function + +include::{generated}/api/protos/clGetDeviceIDsFromD3D10KHR.txt[] +include::{generated}/api/version-notes/clGetDeviceIDsFromD3D10KHR.asciidoc[] + + * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. + * _d3d_device_source_ specifies the type of _d3d_object_, and must be one + of the values shown in the <> + table. + * _d3d_object_ specifies the object whose corresponding OpenCL devices are + being queried. + The type of _d3d_object_ must be as specified in the + <> table. + * _d3d_device_set_ specifies the set of devices to return, and must be one + of the values shown in the <> table. + * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be + added to _devices_. + If _devices_ is not `NULL` then _num_entries_ must be greater than zero. + * _devices_ returns a list of OpenCL devices found. + The {cl_device_id_TYPE} values returned in _devices_ can be used to + identify a specific OpenCL device. + If _devices_ is `NULL`, this argument is ignored. + The number of OpenCL devices returned is the minimum of the value + specified by _num_entries_ and the number of OpenCL devices + corresponding to _d3d_object_. + * _num_devices_ returns the number of OpenCL devices available that + correspond to _d3d_object_. + If _num_devices_ is `NULL`, this argument is ignored. + +[[d3d10-device-object-types-table]] +.Direct3D 10 object types that may be used by {clGetDeviceIDsFromD3D10KHR} +[cols=",",options="header",] +|==== +| {cl_d3d10_device_source_khr_TYPE} | Type of _d3d_object_ +| {CL_D3D10_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D10_DEVICE_KHR.asciidoc[] + | `ID3D10Device *` +| {CL_D3D10_DXGI_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D10_DXGI_ADAPTER_KHR.asciidoc[] + | `IDXGIAdapter *` +|==== + +[[d3d10-device-sets-table]] +.Sets of devices queriable using {clGetDeviceIDsFromD3D10KHR} +[cols=",",options="header",] +|==== +| {cl_d3d10_device_set_khr_TYPE} | Devices returned in _devices_ +| {CL_PREFERRED_DEVICES_FOR_D3D10_KHR_anchor} + +include::{generated}/api/version-notes/CL_PREFERRED_DEVICES_FOR_D3D10_KHR.asciidoc[] + | The preferred OpenCL devices associated with the specified Direct3D + object. +| {CL_ALL_DEVICES_FOR_D3D10_KHR_anchor} + +include::{generated}/api/version-notes/CL_ALL_DEVICES_FOR_D3D10_KHR.asciidoc[] + | All OpenCL devices which may interoperate with the specified Direct3D + object. + Performance of sharing data on these devices may be considerably less + than on the preferred devices. +|==== + +// refError + +{clGetDeviceIDsFromD3D10KHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise it may return + + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, + _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero + and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are + `NULL`. + * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to + _d3d_object_ were found. +-- +endif::cl_khr_d3d10_sharing[] + + +ifdef::cl_khr_d3d11_sharing[] +=== Sharing Direct3D 11 Resources With OpenCL Memory Objects + +This section discusses OpenCL functions that allow applications to use +Direct3D 11 resources as OpenCL memory objects. +This allows efficient sharing of data between OpenCL and Direct3D 11. +The OpenCL API may be used to execute kernels that read and/or write memory +objects that are also Direct3D 11 resources. +An OpenCL image object may be created from a Direct3D 11 texture resource. +An OpenCL buffer object may be created from a Direct3D 11 buffer resource. +OpenCL memory objects may be created from Direct3D 11 objects if and only if +the OpenCL context has been created from a Direct3D 11 device. + +==== Querying OpenCL Devices Corresponding to Direct3D 11 Devices + +The OpenCL devices corresponding to a Direct3D 11 device may be queried. +The OpenCL devices corresponding to a DXGI adapter may also be queried. +The OpenCL devices corresponding to a Direct3D 11 device will be a subset of +the OpenCL devices corresponding to the DXGI adapter against which the +Direct3D 11 device was created. + +[open,refpage='clGetDeviceIDsFromD3D11KHR',desc='Query OpenCL devices corresponding to a Direct3D 11 or DXGI device',type='protos'] +-- +To query OpenCL devices corresponding to a Direct3D 11 device or a DXGI +device, call the function + +include::{generated}/api/protos/clGetDeviceIDsFromD3D11KHR.txt[] +include::{generated}/api/version-notes/clGetDeviceIDsFromD3D11KHR.asciidoc[] + + * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. + * _d3d_device_source_ specifies the type of _d3d_object_, and must be one + of the values shown in the <> + table. + * _d3d_object_ specifies the object whose corresponding OpenCL devices are + being queried. + The type of _d3d_object_ must be as specified in the + <> table. + * _d3d_device_set_ specifies the set of devices to return, and must be one + of the values shown in the <> table. + * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be + added to _devices_. + If _devices_ is not `NULL` then _num_entries_ must be greater than zero. + * _devices_ returns a list of OpenCL devices found. + The {cl_device_id_TYPE} values returned in _devices_ can be used to + identify a specific OpenCL device. + If _devices_ is `NULL`, this argument is ignored. + The number of OpenCL devices returned is the minimum of the value + specified by _num_entries_ and the number of OpenCL devices + corresponding to _d3d_object_. + * _num_devices_ returns the number of OpenCL devices available that + correspond to _d3d_object_. + If _num_devices_ is `NULL`, this argument is ignored. + +[[d3d11-device-object-types-table]] +.Direct3D 11 object types that may be used by {clGetDeviceIDsFromD3D11KHR} +[cols=",",options="header",] +|==== +| {cl_d3d11_device_source_khr_TYPE} | Type of _d3d_object_ +| {CL_D3D11_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D11_DEVICE_KHR.asciidoc[] + | `ID3D11Device *` +| {CL_D3D11_DXGI_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D11_DXGI_ADAPTER_KHR.asciidoc[] + | `IDXGIAdapter *` +|==== + +[[d3d11-device-sets-table]] +.Sets of devices queriable using {clGetDeviceIDsFromD3D11KHR} +[cols=",",options="header",] +|==== +| {cl_d3d11_device_set_khr_TYPE} | Devices returned in _devices_ +| {CL_PREFERRED_DEVICES_FOR_D3D11_KHR_anchor} + +include::{generated}/api/version-notes/CL_PREFERRED_DEVICES_FOR_D3D11_KHR.asciidoc[] + | The preferred OpenCL devices associated with the specified Direct3D + object. +| {CL_ALL_DEVICES_FOR_D3D11_KHR_anchor} + +include::{generated}/api/version-notes/CL_ALL_DEVICES_FOR_D3D11_KHR.asciidoc[] + | All OpenCL devices which may interoperate with the specified Direct3D + object. + Performance of sharing data on these devices may be considerably less + than on the preferred devices. +|==== + +// refError + +{clGetDeviceIDsFromD3D11KHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise it may return + + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, + _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero + and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are + `NULL`. + * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to + _d3d_object_ were found. +-- +endif::cl_khr_d3d11_sharing[] + + == Partitioning a Device NOTE: Partitioning devices is <> version 1.2. @@ -1650,7 +2662,7 @@ include::{generated}/api/version-notes/CL_DEVICE_PARTITION_BY_COUNTS.asciidoc[] include::{generated}/api/version-notes/CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN.asciidoc[] | {cl_device_affinity_domain_TYPE} - | Split the device into smaller aggregate devices containing one or + | Split the device into smaller aggregate devices containing one or more compute units that all share part of a cache hierarchy. The value accompanying this property may be drawn from the following list: @@ -1822,10 +2834,11 @@ include::{generated}/api/version-notes/clCreateContext.asciidoc[] Each property name is immediately followed by the corresponding desired value. The list is terminated with 0. - The list of supported properties is described in the - <> table. - _properties_ can be `NULL` in which case the platform that is selected is - implementation-defined. + The list of supported properties, and their default values if not + present in _properties_, is described in the <> table. + _properties_ can be `NULL`, in which case all properties take on their + default values. * _num_devices_ is the number of devices specified in the _devices_ argument. * _devices_ is a pointer to a list of unique devices returned by {clGetDeviceIDs} or sub-devices created by {clCreateSubDevices} for a @@ -1868,6 +2881,8 @@ on one or more devices specified in the context. include::{generated}/api/version-notes/CL_CONTEXT_PLATFORM.asciidoc[] | {cl_platform_id_TYPE} | Specifies the platform to use. + + Defaults to an implementation-defined platform if not specified. | {CL_CONTEXT_INTEROP_USER_SYNC_anchor} include::{generated}/api/version-notes/CL_CONTEXT_INTEROP_USER_SYNC.asciidoc[] @@ -1878,10 +2893,154 @@ include::{generated}/api/version-notes/CL_CONTEXT_INTEROP_USER_SYNC.asciidoc[] Specification that describe sharing with other APIs for restrictions on using this flag. - If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified, a default of - {CL_FALSE} is assumed. + Defaults to {CL_FALSE} if not specified. + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_CONTEXT_ADAPTER_D3D9_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_ADAPTER_D3D9_KHR.asciidoc[] + | `IDirect3DDevice9 *` + | Specifies an `IDirect3DDevice9` to use for D3D9 interop. +| {CL_CONTEXT_ADAPTER_D3D9EX_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_ADAPTER_D3D9EX_KHR.asciidoc[] + | `IDirect3DDeviceEx*` + | Specifies an `IDirect3DDevice9Ex` to use for D3D9 interop. +| {CL_CONTEXT_ADAPTER_DXVA_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_ADAPTER_DXVA_KHR.asciidoc[] + | `IDXVAHD_Device *` + | Specifies an `IDXVAHD_Device` to use for DXVA interop. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_CONTEXT_D3D10_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D10_DEVICE_KHR.asciidoc[] + | `ID3D10Device *` + | Specifies the `ID3D10Device *` to use for Direct3D 10 interoperability. + + The default value is `NULL`. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_CONTEXT_D3D11_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D11_DEVICE_KHR.asciidoc[] + | `ID3D11Device *` + | Specifies the `ID3D11Device *` to use for Direct3D 11 interoperability. + + The default value is `NULL`. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_gl_sharing[] +| {CL_GL_CONTEXT_KHR_anchor} + +include::{generated}/api/version-notes/CL_GL_CONTEXT_KHR.asciidoc[] + | OpenGL context handle + | OpenGL context to associate the OpenCL context with + + Defaults to 0 if not specified. +| {CL_CGL_SHAREGROUP_KHR_anchor} + +include::{generated}/api/version-notes/CL_CGL_SHAREGROUP_KHR.asciidoc[] + | CGL share group handle + | CGL share group to associate the OpenCL context with + + Defaults to 0 if not specified. +| {CL_EGL_DISPLAY_KHR_anchor} + +include::{generated}/api/version-notes/CL_EGL_DISPLAY_KHR.asciidoc[] + | EGL `EGLDisplay` handle + | `EGLDisplay` an OpenGL context was created with respect to + + Defaults to `EGL_NO_DISPLAY` if not specified. +| {CL_GLX_DISPLAY_KHR_anchor} + +include::{generated}/api/version-notes/CL_GLX_DISPLAY_KHR.asciidoc[] + | X handle + | X Display an OpenGL context was created with respect to + + Defaults to `None` if not specified. +| {CL_WGL_HDC_KHR_anchor} + +include::{generated}/api/version-notes/CL_WGL_HDC_KHR.asciidoc[] + | Windows HDC handle + | HDC an OpenGL context was created with respect to + + Defaults to 0 if not specified. +endif::cl_khr_gl_sharing[] + +ifdef::cl_khr_initialize_memory[] +| {CL_CONTEXT_MEMORY_INITIALIZE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_MEMORY_INITIALIZE_KHR.asciidoc[] + | {cl_context_memory_initialize_khr_TYPE} + | Describes which memory types for the context must be initialized. + This is a bit-field, where the following values are currently + supported: + + {CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR_anchor} -- Initialize local + memory to zeros. + + {CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR_anchor} -- Initialize + private memory to zeros. +endif::cl_khr_initialize_memory[] + +ifdef::cl_khr_terminate_context[] +| {CL_CONTEXT_TERMINATE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_TERMINATE_KHR.asciidoc[] + | {cl_bool_TYPE} + | Specifies whether the context can be terminated. + The default value is {CL_FALSE}. +endif::cl_khr_terminate_context[] |==== +ifdef::cl_khr_gl_sharing[] +Some of the properties specified in the <> table control sharing of OpenCL memory objects with OpenGL +buffer, texture, and renderbuffer objects. + +[[specifying-gl-context]] +Depending on the platform-specific API used to bind OpenGL contexts to the +window system, the following properties may be set to identify an OpenGL +context: + + * When the CGL binding API is supported, the property + {CL_CGL_SHAREGROUP_KHR} should be set to a CGLShareGroup handle to a CGL + share group object. + * When the EGL binding API is supported, the property {CL_GL_CONTEXT_KHR} + should be set to an EGLContext handle to an OpenGL ES or OpenGL context, + and the property {CL_EGL_DISPLAY_KHR} should be set to the `EGLDisplay` + handle of the display used to create the OpenGL ES or OpenGL context. + * When the GLX binding API is supported, the property {CL_GL_CONTEXT_KHR} + should be set to a GLXContext handle to an OpenGL context, and the + property {CL_GLX_DISPLAY_KHR} should be set to the `Display` handle of + the X Window System display used to create the OpenGL context. + * When the WGL binding API is supported, the property {CL_GL_CONTEXT_KHR} + should be set to an HGLRC handle to an OpenGL context, and the property + {CL_WGL_HDC_KHR} should be set to the HDC handle of the display used to + create the OpenGL context. + +Memory objects created in the context so specified may be shared with the +specified OpenGL or OpenGL ES context (as well as with any other OpenGL +contexts on the share list of that context, according to the description of +sharing in the GLX 1.4 and EGL 1.5 specifications, and the WGL documentation +for OpenGL implementations on Microsoft Windows), or with the explicitly +identified OpenGL share group for CGL. +If no OpenGL or OpenGL ES context or share group is specified in the +property list, then memory objects may not be shared, and attempts to create +such objects will result in a {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} +error. + +OpenCL / OpenGL sharing does not support the {CL_CONTEXT_INTEROP_USER_SYNC} +property defined in the <> +table. +Specifying this property when creating a context with OpenCL / OpenGL +sharing will return an appropriate error. +endif::cl_khr_gl_sharing[] + NOTE: There are a number of cases where error notifications need to be delivered due to an error that occurs outside a context. Such notifications may not be delivered through the _pfn_notify_ callback. @@ -1897,7 +3056,7 @@ returned in _errcode_ret_: * {CL_INVALID_PLATFORM} if no platform is specified in _properties_ and no platform could be selected, or if the platform specified in _properties_ is not a valid platform. - * {CL_INVALID_PROPERTY} if context property name in _properties_ is not a + * {CL_INVALID_PROPERTY} if a context property name in _properties_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. @@ -1914,6 +3073,92 @@ returned in _errcode_ret_: * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the + properties {CL_CONTEXT_ADAPTER_D3D9_KHR}, + {CL_CONTEXT_ADAPTER_D3D9EX_KHR} or {CL_CONTEXT_ADAPTER_DXVA_KHR} is + non-`NULL` and does not specify a valid media adapter with which the + _cl_device_ids_ against which this context is to be created may + interoperate. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D10_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 10 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 10 interoperability is specified by + setting {CL_INVALID_D3D10_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D11_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 11 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 11 interoperability is specified by + setting {CL_INVALID_D3D11_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_gl_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for + an OpenGL or OpenGL ES implementation using the EGL, GLX, or WGL binding + APIs, as <>; and any of the + following conditions hold: + ** The specified display and context properties do not identify a valid + OpenGL or OpenGL ES context. + ** The specified context does not support buffer and renderbuffer objects. + ** The specified context is not compatible with the OpenCL context being + created (for example, it exists in a physically distinct address space, + such as another hardware device; or it does not support sharing data + with OpenCL due to implementation restrictions). + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a share group was specified + for a CGL-based OpenGL implementation by setting the property + {CL_CGL_SHAREGROUP_KHR}, and the specified share group does not identify + a valid CGL share group object. + * {CL_INVALID_OPERATION} if a context was specified as described above and + any of the following conditions hold: + ** A context or share group object was specified for one of CGL, EGL, GLX, + or WGL and the OpenGL implementation does not support that + window-system binding API. + ** More than one of the properties {CL_CGL_SHAREGROUP_KHR}, + {CL_EGL_DISPLAY_KHR}, {CL_GLX_DISPLAY_KHR}, and {CL_WGL_HDC_KHR} is set + to a non-default value. + ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} + are set to non-default values. + ** Any of the devices specified in the _devices_ argument cannot support + OpenCL objects which share the data store of an OpenGL object. + * {CL_INVALID_PROPERTY} if both {CL_CONTEXT_INTEROP_USER_SYNC}, and any of + the properties defined by the `<>` extension are + defined in _properties_. +endif::cl_khr_gl_sharing[] + +ifdef::cl_khr_terminate_context[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_PROPERTY} if the `<>` extension is + supported and {CL_CONTEXT_TERMINATE_KHR} is set to {CL_TRUE} in + _properties_, but not all of the devices associated with the context + support the ability to support context termination (i.e. + {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} is set for + {CL_DEVICE_TERMINATE_CAPABILITY_KHR}). +endif::cl_khr_terminate_context[] + [NOTE] ==== It is possible that a device(s) becomes unavailable after a context and @@ -1940,10 +3185,11 @@ include::{generated}/api/version-notes/clCreateContextFromType.asciidoc[] corresponding values. Each property name is immediately followed by the corresponding desired value. - The list of supported properties is described in the - <> table. - _properties_ can also be `NULL` in which case the platform that is selected - is implementation-defined. + The list of supported properties, and their default values if not + present in _properties_, is described in the <> table. + _properties_ can be `NULL`, in which case all properties take on their + default values. * _device_type_ is a bit-field that identifies the type of device and is described in the <> table. * _pfn_notify_ and _user_data_ are described in {clCreateContext}. @@ -1965,7 +3211,7 @@ returned in _errcode_ret_: * {CL_INVALID_PLATFORM} if no platform is specified in _properties_ and no platform could be selected, or if the platform specified in _properties_ is not a valid platform. - * {CL_INVALID_PROPERTY} if context property name in _properties_ is not a + * {CL_INVALID_PROPERTY} if a context property name in _properties_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. @@ -1981,6 +3227,81 @@ returned in _errcode_ret_: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. + +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the + properties {CL_CONTEXT_ADAPTER_D3D9_KHR}, + {CL_CONTEXT_ADAPTER_D3D9EX_KHR} or {CL_CONTEXT_ADAPTER_DXVA_KHR} is + non-`NULL` and does not specify a valid media adapter with which the + _cl_device_ids_ against which this context is to be created may + interoperate. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D10_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 10 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 10 interoperability is specified by + setting {CL_INVALID_D3D10_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D11_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 11 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 11 interoperability is specified by + setting {CL_INVALID_D3D11_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_gl_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for + an OpenGL or OpenGL ES implementation using the EGL, GLX, or WGL binding + APIs, as <>; and + any of the following conditions hold: + ** The specified display and context properties do not identify a valid + OpenGL or OpenGL ES context. + ** The specified context does not support buffer and renderbuffer objects. + ** The specified context is not compatible with the OpenCL context being + created (for example, it exists in a physically distinct address space, + such as another hardware device; or it does not support sharing data + with OpenCL due to implementation restrictions). + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a share group was specified + for a CGL-based OpenGL implementation by setting the property + {CL_CGL_SHAREGROUP_KHR}, and the specified share group does not identify + a valid CGL share group object. + * {CL_INVALID_OPERATION} if a context was specified as described above and + any of the following conditions hold: + ** A context or share group object was specified for one of CGL, EGL, GLX, + or WGL and the OpenGL implementation does not support that + window-system binding API. + ** More than one of the properties {CL_CGL_SHAREGROUP_KHR}, + {CL_EGL_DISPLAY_KHR}, {CL_GLX_DISPLAY_KHR}, and {CL_WGL_HDC_KHR} is set + to a non-default value. + ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} + are set to non-default values. + ** Any of the devices specified in the _devices_ argument cannot support + OpenCL objects which share the data store of an OpenGL object. + * {CL_INVALID_PROPERTY} if both {CL_CONTEXT_INTEROP_USER_SYNC}, and any of + the properties defined by the `<>` extension are + defined in _properties_. +endif::cl_khr_gl_sharing[] + -- [open,refpage='clRetainContext',desc='Retain an OpenCL context',type='protos',xrefs='clCreateContext clReleaseContext'] @@ -2044,6 +3365,75 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- +ifdef::cl_khr_terminate_context[] +[open,refpage='clTerminateContextKHR',desc='Terminate pending work associated with a context and render data owned by the context invalid',type='protos'] +-- +To terminate all pending work associated with a context and render all data +owned by the context invalid, call the function + +include::{generated}/api/protos/clTerminateContextKHR.txt[] +include::{generated}/api/version-notes/clTerminateContextKHR.asciidoc[] + + * _context_ must be a valid OpenCL context. + +It is the responsibility of the application to release all objects +associated with the context being terminated. + +When a context is terminated: + + * The execution status of enqueued commands will be + {CL_CONTEXT_TERMINATED_KHR_anchor}. + Event objects can be queried using {clGetEventInfo}. + Event callbacks can be registered and registered event callbacks will be + called with _event_command_status_ set to {CL_CONTEXT_TERMINATED_KHR}. + {clWaitForEvents} will return as immediately for commands associated + with event objects specified in event_list. + The status of user events can be set. + Event objects can be retained and released. + {clGetEventProfilingInfo} returns {CL_PROFILING_INFO_NOT_AVAILABLE}. + * The context is considered to be terminated. + A callback function registered when the context was created will be + called. + Only queries, retain and release operations can be performed on the + context. + All other APIs that use a context as an argument will return + {CL_CONTEXT_TERMINATED_KHR}. + * The contents of the memory regions of the memory objects is undefined. + Queries, registering a destructor callback, retain and release + operations can be performed on the memory objects. + * Once a context has been terminated, all OpenCL API calls that create + objects or enqueue commands will return {CL_CONTEXT_TERMINATED_KHR}. + APIs that release OpenCL objects will continue to operate as though + {clTerminateContextKHR} was not called. + * The behavior of callbacks will remain unchanged, and will report + appropriate error, if executing after termination of context. + This behavior is similar to enqueued commands, after the command-queue + has become invalid. + +// refError + +{clTerminateContextKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid OpenCL context. + * {CL_CONTEXT_TERMINATED_KHR} if _context_ has already been terminated. + * {CL_INVALID_OPERATION} if _context_ was not created with + {CL_CONTEXT_TERMINATE_KHR} set to {CL_TRUE}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +An implementation that supports this extension must be able to terminate +commands currently executing on devices or queued across all command-queues +associated with the context that is being terminated. +The implementation cannot implement this extension by waiting for currently +executing (or queued) commands to finish execution on devices associated +with this context (i.e. doing a {clFinish}). +-- +endif::cl_khr_terminate_context[] + [open,refpage='clGetContextInfo',desc='Query information about an OpenCL context',type='protos'] -- To query information about a context, call the function: @@ -2106,6 +3496,31 @@ include::{generated}/api/version-notes/CL_CONTEXT_PROPERTIES.asciidoc[] {clCreateContextFromType} used to create _context_ was `NULL`, the implementation must return _param_value_size_ret_ equal to 0, indicating that there are no properties to be returned. + +ifdef::cl_khr_d3d10_sharing[] +| {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR.asciidoc[] + | {cl_bool_TYPE} + | Returns {CL_TRUE} if Direct3D 10 resources created as shared by + setting _MiscFlags_ to include `D3D10_RESOURCE_MISC_SHARED` will + perform faster when shared with OpenCL, compared with resources which + have not set this flag. + Otherwise returns {CL_FALSE}. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR.asciidoc[] + | {cl_bool_TYPE} + | Returns {CL_TRUE} if Direct3D 11 resources created as shared by + setting _MiscFlags_ to include `D3D11_RESOURCE_MISC_SHARED` will + perform faster when shared with OpenCL, compared with resources which + have not set this flag. + Otherwise returns {CL_FALSE}. +endif::cl_khr_d3d11_sharing[] + |==== // refError diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 18902d1b0..298620641 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [[opencl-runtime]] = The OpenCL Runtime @@ -25,20 +24,26 @@ Sharing of objects across multiple command-queues will require the application to perform appropriate synchronization. This is described in <> -[open,refpage='clCreateCommandQueueWithProperties',desc='Create a host or device command-queue on a specific device.',type='protos'] +[open,refpage='clCreateCommandQueueWithProperties',desc='Create a host or device command-queue on a specific device.',type='protos',alias='clCreateCommandQueueWithPropertiesKHR'] -- To create a host or device command-queue on a specific device, call the function include::{generated}/api/protos/clCreateCommandQueueWithProperties.txt[] include::{generated}/api/version-notes/clCreateCommandQueueWithProperties.asciidoc[] -Also see extension *cl_khr_create_command_queue*. + +ifdef::cl_khr_create_command_queue[] +or the equivalent + +include::{generated}/api/protos/clCreateCommandQueueWithPropertiesKHR.txt[] +include::{generated}/api/version-notes/clCreateCommandQueueWithPropertiesKHR.asciidoc[] +endif::cl_khr_create_command_queue[] * _context_ must be a valid OpenCL context. * _device_ must be a device or sub-device associated with _context_. It can either be in the list of devices and sub-devices specified when - _context_ is created using {clCreateContext} or be a root device with the - same device type as specified when _context_ is created using + _context_ is created using {clCreateContext} or be a root device with + the same device type as specified when _context_ is created using {clCreateContextFromType}. * _properties_ specifies a list of properties for the command-queue and their corresponding values. @@ -63,6 +68,10 @@ Also see extension *cl_khr_create_command_queue*. include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES.asciidoc[] | {cl_command_queue_properties_TYPE} + +ifdef::cl_khr_create_command_queue[] +or {cl_bitfield_TYPE} if the `<>` extension is supported +endif::cl_khr_create_command_queue[] | This is a bitfield and can be set to a combination of the following values: @@ -71,12 +80,14 @@ include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES.asciidoc[] out-of-order. If set, the commands in the command-queue are executed out-of-order. Otherwise, commands are executed in-order. + include::{generated}/api/version-notes/CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE.asciidoc[] {CL_QUEUE_PROFILING_ENABLE_anchor} - Enable or disable profiling of commands in the command-queue. If set, the profiling of commands is enabled. Otherwise profiling of commands is disabled. + include::{generated}/api/version-notes/CL_QUEUE_PROFILING_ENABLE.asciidoc[] {CL_QUEUE_ON_DEVICE_anchor} - Indicates that this is a device queue. @@ -84,12 +95,14 @@ include::{generated}/api/version-notes/CL_QUEUE_PROFILING_ENABLE.asciidoc[] {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} footnote:[{fn-out-of-order-device-queue}] must also be set. + include::{generated}/api/version-notes/CL_QUEUE_ON_DEVICE.asciidoc[] {CL_QUEUE_ON_DEVICE_DEFAULT_anchor} footnote:[{fn-default-device-queue}] - indicates that this is the default device queue. This can only be used with {CL_QUEUE_ON_DEVICE}. + include::{generated}/api/version-notes/CL_QUEUE_ON_DEVICE_DEFAULT.asciidoc[] If {CL_QUEUE_PROPERTIES} is not specified an in-order host command-queue @@ -109,6 +122,55 @@ include::{generated}/api/version-notes/CL_QUEUE_SIZE.asciidoc[] If {CL_QUEUE_SIZE} is not specified, the device queue is created with {CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE} as the size of the queue. + +ifdef::cl_khr_priority_hints[] +| {CL_QUEUE_PRIORITY_KHR_anchor} + +include::{generated}/api/version-notes/CL_QUEUE_PRIORITY_KHR.asciidoc[] + | {cl_queue_priority_khr_TYPE} + | Specifies a priority hint for command queues belonging to the same + OpenCL context. + + NOTE: Refer to the user guide associated with each implementation + supporting this extension for its priority behavior guarantees, if + any. + + {CL_QUEUE_PRIORITY_HIGH_KHR_anchor} - Indicates command queues + should have high priority. + + {CL_QUEUE_PRIORITY_MED_KHR_anchor} - Indicates command queues should + have medium priority. + + {CL_QUEUE_PRIORITY_LOW_KHR_anchor} - Indicates command queues should + have low priority. + + If {CL_QUEUE_PRIORITY_KHR} is not specified, the default priority + {CL_QUEUE_PRIORITY_MED_KHR} is used. +endif::cl_khr_priority_hints[] + +ifdef::cl_khr_throttle_hints[] +| {CL_QUEUE_THROTTLE_KHR_anchor} + +include::{generated}/api/version-notes/CL_QUEUE_THROTTLE_KHR.asciidoc[] + | {cl_queue_throttle_khr_TYPE} + | Specifies a throttle hint for a command queue. + + NOTE: Refer to the user guide associated with each implementation + supporting this extension for its throttling behavior guarantees, if + any. + + {CL_QUEUE_THROTTLE_HIGH_KHR_anchor} - Indicates the queue should + execute at full throttle, which may consume more energy. + + {CL_QUEUE_THROTTLE_MED_KHR_anchor} - Indicates normal throttling + behavior. + + {CL_QUEUE_THROTTLE_LOW_KHR_anchor} - Indicates the queue should + execute at low throttle, optimized for lowest energy consumption. + + If {CL_QUEUE_THROTTLE_KHR} is not specified, the default priority + {CL_QUEUE_THROTTLE_MED_KHR} is used. +endif::cl_khr_throttle_hints[] |==== // refError @@ -120,13 +182,23 @@ Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_DEVICE} if _device_ is not a valid device or is not associated - with _context_. + * {CL_INVALID_DEVICE} if _device_ is not a valid device or is not + associated with _context_. * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_QUEUE_PROPERTIES} if values specified in _properties_ are valid but are not supported by the device. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. +ifdef::cl_khr_priority_hints[] + * {CL_INVALID_QUEUE_PROPERTIES} if the `<>` + extension is supported, the {CL_QUEUE_PRIORITY_KHR} property is + specified, and the queue is a {CL_QUEUE_ON_DEVICE}. +endif::cl_khr_priority_hints[] +ifdef::cl_khr_throttle_hints[] + * {CL_INVALID_QUEUE_PROPERTIES} if the `<>` + extension is supported, the {CL_QUEUE_THROTTLE_KHR} property is + specified, and the queue is a {CL_QUEUE_ON_DEVICE}. +endif::cl_khr_throttle_hints[] + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- @@ -169,7 +241,6 @@ include::{generated}/api/version-notes/clCreateCommandQueue.asciidoc[] If set, the profiling of commands is enabled. Otherwise profiling of commands is disabled. |==== - * _errcode_ret_ will return an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. @@ -425,8 +496,8 @@ include::{generated}/api/version-notes/clSetCommandQueueProperty.asciidoc[] enabled (if _enable_ is {CL_TRUE}) or disabled (if _enable_ is {CL_FALSE}) for the command-queue. * _old_properties_ returns the command-queue properties before they were - changed by {clSetCommandQueueProperty}. If _old_properties_ is `NULL`, it - is ignored. + changed by {clSetCommandQueueProperty}. If _old_properties_ is `NULL`, + it is ignored. // refError @@ -478,7 +549,9 @@ include::{generated}/api/version-notes/clCreateBufferWithProperties.asciidoc[] * _properties_ is an optional list of properties for the buffer object and their corresponding values. The list is terminated with the special property `0`. If no properties are required, _properties_ may be `NULL`. - OpenCL 3.0 does not define any optional properties for buffers. + OpenCL 3.0 does not define any optional properties for buffers, + but extensions may define properties as described in the + <>. * _flags_ is a bit-field that is used to specify allocation and usage information about the image memory object being created and is described in the <> table. @@ -510,6 +583,38 @@ Locations in the buffers underlying shared memory can be operated on using atomic operations to the devices level of support as defined in the memory model. +[[external-buffer-memory-properties-table]] +.List of supported buffer creation properties +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Property | Property Value | Description + +ifdef::cl_khr_external_memory[] +| {CL_MEM_DEVICE_HANDLE_LIST_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_DEVICE_HANDLE_LIST_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Specifies the list of OpenCL devices (terminated with + {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external + memory handle. +endif::cl_khr_external_memory[] +|==== + +ifdef::cl_khr_external_memory[] +If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, +the memory object created by {clCreateBufferWithProperties} or +{clCreateImageWithProperties} is by default accessible to all devices in the +_context_. + +The properties used to create a buffer from an external memory handle are +<>. +When a buffer is created from an external memory handle, the +_flags_ used to specify usage information for the buffer must not +include {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or +{CL_MEM_COPY_HOST_PTR}, and the _host_ptr_ argument must be `NULL`. +endif::cl_khr_external_memory[] + // refError {clCreateBuffer} and {clCreateBufferWithProperties} returns a valid non-zero @@ -538,6 +643,26 @@ returned in _errcode_ret_: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_external_memory[] + * {CL_INVALID_DEVICE} + ** if a device identified by the property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + is not a valid device or is not associated with _context_, or + ** if a device identified by property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + cannot import the requested external memory object type, or + ** if {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of + _properties_ and one or more devices in _context_ cannot import the + requested external memory object type. + * {CL_INVALID_VALUE} + ** if _properties_ includes a supported external memory handle and _flags_ + includes {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or + {CL_MEM_COPY_HOST_PTR}. + * {CL_INVALID_HOST_PTR} + ** if _properties_ includes a supported external memory handle and + _host_ptr_ is not `NULL`. + * {CL_INVALID_PROPERTY} + ** if _properties_ does not include a supported external memory handle and + {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. +endif::cl_khr_external_memory[] [[memory-flags-table]] .List of supported memory flag values @@ -1583,6 +1708,188 @@ include::{generated}/api/version-notes/CL_MAP_WRITE_INVALIDATE_REGION.asciidoc[] -- +ifdef::cl_khr_d3d10_sharing[] +=== Creating OpenCL Buffer Objects From Direct3D 10 Buffer Resources + +[open,refpage='clCreateFromD3D10BufferKHR',desc='Create OpenCL buffer object from a Direct3D 10 buffer',type='protos'] +-- +To create an OpenCL buffer object from a Direct3D 10 buffer, call the +function + +include::{generated}/api/protos/clCreateFromD3D10BufferKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D10BufferKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 10 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 10 buffer to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The size of the returned OpenCL buffer object is the same as the size of +_resource_. +This call will increment the internal Direct3D 10 reference count on +_resource_. +The internal Direct3D 10 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D10BufferKHR} returns a valid non-zero OpenCL buffer object +and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 + buffer resource, if _resource_ was created with the D3D10_USAGE flag + D3D10_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already + been created using {clCreateFromD3D10BufferKHR}, or if _context_ was not + created against the same Direct3D 10 device from which _resource_ was + created. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d10_sharing[] + + +ifdef::cl_khr_d3d11_sharing[] +=== Creating OpenCL Buffer Objects From Direct3D 11 Buffer Resources + +[open,refpage='clCreateFromD3D11BufferKHR',desc='Create OpenCL buffer object from a Direct3D 11 buffer',type='protos'] +-- +To create an OpenCL buffer object from a Direct3D 11 buffer, call the +function + +include::{generated}/api/protos/clCreateFromD3D11BufferKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D11BufferKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 11 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 11 buffer to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The size of the returned OpenCL buffer object is the same as the size of +_resource_. +This call will increment the internal Direct3D 11 reference count on +_resource_. +The internal Direct3D 11 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D11BufferKHR} returns a valid non-zero OpenCL buffer object +and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 + buffer resource, if _resource_ was created with the D3D11_USAGE flag + D3D11_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already + been created using {clCreateFromD3D11BufferKHR}, or if _context_ was not + created against the same Direct3D 11 device from which _resource_ was + created. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_gl_sharing[] +=== Creating OpenCL Buffer Objects From OpenGL Buffer Objects + +[open,refpage='clCreateFromGLBuffer',desc='Create OpenCL buffer object from an OpenGL buffer object',type='protos'] +-- +To create an OpenCL buffer object from an OpenGL buffer object, call the +function + +include::{generated}/api/protos/clCreateFromGLBuffer.txt[] +include::{generated}/api/version-notes/clCreateFromGLBuffer.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a + description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _bufobj_ is the name of an OpenGL buffer object. + The data store of the OpenGL buffer object must have have been + previously created by calling `glBufferData`, although its contents need + not be initialized. + The size of the data store will be used to determine the size of the + OpenCL buffer object. + * _errcode_ret_ will return an appropriate error code as described below. + If _errcode_ret_ is `NULL`, no error code is returned. + +The size of the OpenGL buffer object data store at the time +{clCreateFromGLBuffer} is called will be used as the size of buffer object +returned by {clCreateFromGLBuffer}. +If the state of an OpenGL buffer object is modified through the OpenGL API +(e.g. `glBufferData`) while there exists a corresponding OpenCL buffer +object, subsequent use of the OpenCL buffer object will result in undefined +behavior. + +The {clRetainMemObject} and {clReleaseMemObject} functions can be used to +retain and release the buffer object. + +The OpenCL buffer object created using {clCreateFromGLBuffer} can also be +used to create an OpenCL 1D image buffer object. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromGLBuffer} returns a valid non-zero OpenCL buffer object and +_errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context or was not + created from an OpenGL context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_GL_OBJECT} if _bufobj_ is not an OpenGL buffer object or is a + OpenGL buffer object but does not have an existing data store or the + size of the buffer is 0. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_gl_sharing[] + + == Image Objects An _image_ object is used to store a one-, two- or three-dimensional @@ -1610,7 +1917,9 @@ include::{generated}/api/version-notes/clCreateImageWithProperties.asciidoc[] * _properties_ is an optional list of properties for the image object and their corresponding values. The list is terminated with the special property `0`. If no properties are required, _properties_ may be `NULL`. - OpenCL 3.0 does not define any optional properties for images. + OpenCL 3.0 does not define any optional properties for images, + but extensions may define properties as described in the + <>. * _flags_ is a bit-field that is used to specify allocation and usage information about the image memory object being created and is described in the <> table. @@ -1655,6 +1964,32 @@ If the {CL_MEM_HOST_WRITE_ONLY}, {CL_MEM_HOST_READ_ONLY} or inherited from the corresponding memory access qualifiers associated with __mem_object__. +ifdef::cl_khr_mipmap_image[] +*Mipmap Images* + +A mipmapped 1D image, 1D image array, 2D image, 2D image array or 3D image +is created by specifying _num_mip_levels_ to be a value greater than one in +_image_desc_. +The dimensions of a mipmapped image can be a power of two or a non-power of +two. +Each successively smaller mipmap level is half the size of the previous +level, rounded down to the nearest integer. + +The following restrictions apply when mipmapped images are created with +{clCreateImage}: + +// TODO The actual errors returned from clCreateImage are not specified by +// the cl_khr_mipmap_image extension + + * {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} cannot be specified if a + mipmapped image is created. + * The _host_ptr_ argument to {clCreateImage} must be a `NULL` value. + * Mip-mapped images cannot be created for {CL_MEM_OBJECT_IMAGE1D_BUFFER} + images, depth images or multi-sampled (i.e. msaa) images. +endif::cl_khr_mipmap_image[] + +*Image Data in Host Memory* + For a 3D image or 2D image array, the image data specified by _host_ptr_ is stored as a linear sequence of adjacent 2D image slices or 2D images respectively. @@ -1676,6 +2011,42 @@ stored as a single scanline which is a linear sequence of adjacent elements. Image elements are stored according to their image format as described in the <> section. +[[external-image-memory-properties-table]] +.List of supported image creation properties +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Property | Property Value | Description +ifdef::cl_khr_external_memory[] +| {CL_MEM_DEVICE_HANDLE_LIST_KHR} + +include::{generated}/api/version-notes/CL_MEM_DEVICE_HANDLE_LIST_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Specifies the list of OpenCL devices (terminated with + {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external + memory handle. +endif::cl_khr_external_memory[] +|==== + +ifdef::cl_khr_external_memory[] +If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, +the memory object created by {clCreateBufferWithProperties} or +{clCreateImageWithProperties} is by default accessible to all devices in the +_context_. + +The properties used to create an image from an external memory handle are +<>. +When an image is created from an external memory handle, the +_flags_ used to specify usage information for the image must not +include {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or +{CL_MEM_COPY_HOST_PTR}, and the _host_ptr_ argument must be `NULL`. +When images are created from an external memory handle, implementations may +acquire information about image attributes such as format and layout at the +time of creation. +When such information is acquired at image creation time, it is used for the +lifetime of the image object. +endif::cl_khr_external_memory[] + // refError {clCreateImage} and {clCreateImageWithProperties} returns a valid non-zero @@ -1731,6 +2102,26 @@ returned in _errcode_ret_: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_external_memory[] + * {CL_INVALID_DEVICE} + ** if a device identified by the property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + is not a valid device or is not associated with _context_, or + ** if a device identified by property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + cannot import the requested external memory object type, or + ** if {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of + _properties_ and one or more devices in _context_ cannot import the + requested external memory object type. + * {CL_INVALID_VALUE} + ** if _properties_ includes a supported external memory handle and _flags_ + includes {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or + {CL_MEM_COPY_HOST_PTR}. + * {CL_INVALID_HOST_PTR} + ** if _properties_ includes a supported external memory handle and + _host_ptr_ is not `NULL`. + * {CL_INVALID_PROPERTY} + ** if _properties_ does not include a supported external memory handle and + {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. +endif::cl_khr_external_memory[] [[host-ptr-buffer-size-table]] .Required _host_ptr_ buffer sizes for images @@ -1935,11 +2326,11 @@ format, and is defined as: include::{generated}/api/structs/cl_image_format.txt[] - * `image_channel_order` specifies the number of channels and the channel + * _image_channel_order_ specifies the number of channels and the channel layout i.e. the memory layout in which channels are stored in the image. Valid values are described in the <> table. - * `image_channel_data_type` describes the size of the channel data type. + * _image_channel_data_type_ describes the size of the channel data type. The list of supported values is described in the <> table. The number of bits per element determined by the `image_channel_data_type` @@ -1958,6 +2349,10 @@ include::{generated}/api/version-notes/CL_R.asciidoc[] | {CL_DEPTH_anchor} include::{generated}/api/version-notes/CL_DEPTH.asciidoc[] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | A single channel image format where the single channel represents a `DEPTH` component. | {CL_LUMINANCE_anchor} @@ -1980,7 +2375,23 @@ include::{generated}/api/version-notes/CL_RG.asciidoc[] | {CL_Rx_anchor} include::{generated}/api/version-notes/CL_Rx.asciidoc[] - | A two channel image format, where the first channel represents a `RED` component and the second channel is ignored. + | A two channel image format, where the first channel represents a `RED` + component and the second channel is ignored. + +ifdef::cl_khr_gl_depth_images[] +| {CL_DEPTH_STENCIL_anchor} + +include::{generated}/api/version-notes/CL_DEPTH_STENCIL.asciidoc[] + | A two channel image format, where the first channel represents + a `DEPTH` component and the second channel represents + a stencil component. + This format can only be used if the image channel data type is + {CL_UNORM_INT24} or {CL_FLOAT}. + + See <>. +endif::cl_khr_gl_depth_images[] + | {CL_RGB_anchor} include::{generated}/api/version-notes/CL_RGB.asciidoc[] @@ -2037,6 +2448,10 @@ include::{generated}/api/version-notes/CL_UNORM_INT8.asciidoc[] | {CL_UNORM_INT16_anchor} include::{generated}/api/version-notes/CL_UNORM_INT16.asciidoc[] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | Each channel component is a normalized unsigned 16-bit integer value | {CL_UNORM_SHORT_565_anchor} @@ -2078,6 +2493,14 @@ include::{generated}/api/version-notes/CL_UNSIGNED_INT8.asciidoc[] include::{generated}/api/version-notes/CL_UNSIGNED_INT16.asciidoc[] | Each channel component is an unnormalized unsigned 16-bit integer value + +ifdef::cl_khr_gl_depth_images[] +| {CL_UNORM_INT24_anchor} + +include::{generated}/api/version-notes/CL_UNORM_INT24.asciidoc[] + | Each channel component is a normalized unsigned 24-bit integer value +endif::cl_khr_gl_depth_images[] + | {CL_UNSIGNED_INT32_anchor} include::{generated}/api/version-notes/CL_UNSIGNED_INT32.asciidoc[] @@ -2089,6 +2512,10 @@ include::{generated}/api/version-notes/CL_HALF_FLOAT.asciidoc[] | {CL_FLOAT_anchor} include::{generated}/api/version-notes/CL_FLOAT.asciidoc[] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | Each channel component is a single precision floating-point value |==== @@ -2162,11 +2589,11 @@ and dimensions of an image or image array when creating an image using include::{generated}/api/structs/cl_image_desc.txt[] - * `image_type` describes the image type and must be either + * _image_type_ describes the image type and must be either {CL_MEM_OBJECT_IMAGE1D}, {CL_MEM_OBJECT_IMAGE1D_BUFFER}, {CL_MEM_OBJECT_IMAGE1D_ARRAY}, {CL_MEM_OBJECT_IMAGE2D}, {CL_MEM_OBJECT_IMAGE2D_ARRAY}, or {CL_MEM_OBJECT_IMAGE3D}. - * `image_width` is the width of the image in pixels. + * _image_width_ is the width of the image in pixels. For a 2D image and image array, the image width must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE2D_MAX_WIDTH}. For a 3D image, the image width must be a value {geq} 1 and {leq} @@ -2175,44 +2602,66 @@ include::{generated}/api/structs/cl_image_desc.txt[] {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE}. For a 1D image and 1D image array, the image width must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE2D_MAX_WIDTH}. - * `image_height` is the height of the image in pixels. + * _image_height_ is the height of the image in pixels. This is only used if the image is a 2D or 3D image, or a 2D image array. For a 2D image or image array, the image height must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE2D_MAX_HEIGHT}. For a 3D image, the image height must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE3D_MAX_HEIGHT}. - * `image_depth` is the depth of the image in pixels. + * _image_depth_ is the depth of the image in pixels. This is only used if the image is a 3D image and must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE3D_MAX_DEPTH}. - * `image_array_size` footnote:[{fn-image-array-performance}] is the number of + * _image_array_size_ footnote:[{fn-image-array-performance}] is the number of images in the image array. This is only used if the image is a 1D or 2D image array. The values for `image_array_size`, if specified, must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE_MAX_ARRAY_SIZE}. - * `image_row_pitch` is the scan-line pitch in bytes. - This must be 0 if _host_ptr_ is `NULL` and can be either 0 or {geq} - `image_width` {times} size of element in bytes if _host_ptr_ is not `NULL`. - If _host_ptr_ is not `NULL` and `image_row_pitch` = 0, `image_row_pitch` is - calculated as `image_width` {times} size of element in bytes. - If `image_row_pitch` is not 0, it must be a multiple of the image element - size in bytes. - For a 2D image created from a buffer, the pitch specified (or computed if - pitch specified is 0) must be a multiple of the maximum of the - {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in the context - associated with the buffer specified by `mem_object` that support images. - * `image_slice_pitch` is the size in bytes of each 2D slice in the 3D image or - the size in bytes of each image in a 1D or 2D image array. - This must be 0 if _host_ptr_ is `NULL`. - If _host_ptr_ is not `NULL`, `image_slice_pitch` can be either 0 or {geq} - `image_row_pitch` {times} `image_height` for a 2D image array or 3D image - and can be either 0 or {geq} `image_row_pitch` for a 1D image array. - If _host_ptr_ is not `NULL` and `image_slice_pitch` = 0, `image_slice_pitch` - is calculated as `image_row_pitch` {times} `image_height` for a 2D image - array or 3D image and `image_row_pitch` for a 1D image array. - If `image_slice_pitch` is not 0, it must be a multiple of the - `image_row_pitch`. - * `num_mip_levels` and `num_samples` must be 0. - * `mem_object` may refer to a valid buffer or image memory object. + * _image_row_pitch_ is the scan-line pitch in bytes. + The _image_row_pitch_ must be zero if _host_ptr_ is `NULL`, +ifdef::cl_khr_external_memory[] + the image is not an image created from an external memory handle, +endif::cl_khr_external_memory[] + and the image is not a 2D image created from a buffer, + If _image_row_pitch_ is zero and _host_ptr_ is not `NULL`, then the + image row pitch is calculated as _image_width_ {times} the size of an + image element in bytes. +ifdef::cl_khr_external_memory[] + If _image_row_pitch_ is zero and the image is created from an external + memory handle, then the image row pitch is implementation-defined. +endif::cl_khr_external_memory[] + The image row pitch must be {geq} _image_width_ {times} the size of an + image element in bytes, and must be a multiple of the size of an image + element in bytes. + For a 2D image created from a buffer the image row pitch must also be a + multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value + for all devices in the context that support images. + * _image_slice_pitch_ is the size in bytes of each 2D slice in a 3D image, + or the size in bytes of each image in a 1D or 2D image array. + The _image_slice_pitch_ must be zero if _host_ptr_ is `NULL` +ifdef::cl_khr_external_memory[] + and the image is not an image created from an external memory handle, +endif::cl_khr_external_memory[] + If _image_slice_pitch_ is zero and _host_ptr_ is not `NULL` then the + image slice pitch is calculated as the image row pitch {times} + _image_height_ for a 2D image array or a 3D image, and as the image row + pitch for a 1D image array. +ifdef::cl_khr_external_memory[] + If _image_slice_pitch_ is zero and the image is created from an external + memory handle, then the image slice pitch is implementation-defined. +endif::cl_khr_external_memory[] + The image slice pitch must be {geq} the image image row pitch {times} + _image_height_ for a 2D image array or a 3D image, must be {geq} the + image row pitch for a 1D image array, and must be a multiple of the + image row pitch. + * _num_mip_levels_ must be +ifndef::cl_khr_mipmap_image[0.] +ifdef::cl_khr_mipmap_image[] + 0 unless the `<>` extension is supported, in which + case it must be a value greater than 1 specifying the number of mipmap + levels in the image. +endif::cl_khr_mipmap_image[] + * _num_samples_ must be 0. + * _mem_object_ may refer to a valid buffer or image memory object. `mem_object` can be a buffer memory object if `image_type` is {CL_MEM_OBJECT_IMAGE1D_BUFFER} or {CL_MEM_OBJECT_IMAGE2D} footnote:[{fn-image-from-buffer}]. @@ -2258,16 +2707,19 @@ Restrictions are: * All of the values specified in _image_desc_ must match the image descriptor information associated with `mem_object`, except for `mem_object`. - * The image channel data type specified in _image_format_ must match the image channel data type associated with `mem_object`. - * The image channel order specified in _image_format_ must be compatible - with the image channel order associated with `mem_object`. - Compatible image channel orders - footnote:[{fn-compatible-image-channel-orders}] are: -+ --- + with the image channel order associated with `mem_object`, as described + in the <> table. + +NOTE: The image channel order compatibility constraint allows creation of a +sRGB view of the image from a linear RGB view or vice-versa, i.e. the pixels +stored in the image can be accessed as linear RGB or sRGB values. + +[[compatibile-image-channel-orders-table]] +.Compatible Image Channel Orders [width="100%",cols="<50%,<50%",options="header"] |==== | Image Channel Order in _image_format_: @@ -2291,7 +2743,6 @@ Restrictions are: | {CL_DEPTH} | {CL_R} |==== --- [NOTE] ==== @@ -2418,8 +2869,21 @@ is: {CL_FLOAT} | 1 | {CL_DEPTH} footnote:[{fn-depth-image-requirements}] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | {CL_UNORM_INT16} + {CL_FLOAT} +ifdef::cl_khr_gl_depth_images[] +| 1 + | {CL_DEPTH_STENCIL} + | {CL_UNORM_INT24} + + {CL_FLOAT} + + See <>. +endif::cl_khr_gl_depth_images[] | 2 | {CL_RG} | {CL_UNORM_INT8} + @@ -2526,7 +2990,7 @@ the same kernel instance is: -- [[image-format-mapping]] -==== Image format mapping to OpenCL kernel language image access qualifiers +==== Image Format Mapping to OpenCL Kernel Language Image Access Qualifiers Image arguments to kernels may have the `read_only`, `write_only` or `read_write` qualifier. @@ -2558,6 +3022,234 @@ parameter and any other image parameter. |==== + +=== Mapping to External Image Formats + +OpenCL image objects can be created which share storage with image objects +in external APIs such as DirectX and OpenGL when the corresponding OpenCL +extensions are supported. +When creating such OpenCL images, there are restrictions on the allowed +formats. +The tables in this section list, for each such external API, the supported +image formats in that API and the corresponding OpenCL image format. + + +ifdef::cl_khr_dx9_media_sharing[] +==== Image Formats for DirectX 9 Media Surface Sharing + +When the `<>` extension is supported, image +objects sharing storage with Direct3D 9 surfaces can be created. +This section describes the Direct3D 9 surface formats that are supported +when the adapter type is one of the Direct 3D lineage. +Using a Direct3D 9 surface format not listed here is an error. +To extend the use of this extension to support media adapters beyond DirectX +9 tables similar to the ones in this section will need to be defined for the +surface formats supported by the new media adapter. +All implementations that support this extension are required to support the +NV12 surface format. +The other surface formats supported are the same surface formats that the +adapter you are sharing with supports as long as they are listed in the +<> or <> tables. + +[[fourcc-image-formats-table]] +.YUV FourCC Codes and Corresponding OpenCL Image Formats +[cols=",",options="header",] +|==== +| FOUR CC Code | CL Image Format (Channel Order, Channel Data Type) +| FOURCC('N','V','1','2'), Plane 0 | {CL_R}, {CL_UNORM_INT8} +| FOURCC('N','V','1','2'), Plane 1 | {CL_RG}, {CL_UNORM_INT8} +| FOURCC('Y','V','1','2'), Plane 0 | {CL_R}, {CL_UNORM_INT8} +| FOURCC('Y','V','1','2'), Plane 1 | {CL_R}, {CL_UNORM_INT8} +| FOURCC('Y','V','1','2'), Plane 2 | {CL_R}, {CL_UNORM_INT8} +|==== + +In the <> table, NV12 Plane 0 corresponds to the luminance (Y) +channel and Plane 1 corresponds to the UV channels. The YV12 Plane 0 +corresponds to the Y channel, Plane 1 corresponds to the V channel and Plane +2 corresponds to the U channel. +Note that the YUV formats map to {CL_R} and {CL_RG} but do not perform any +YUV to RGB conversion, and vice-versa. + +[[d3d9-image-formats-table]] +.Direct3D 9 Formats and Corresponding OpenCL Image Formats +[cols=",",options="header",] +|==== +| Direct3D 9 Format | CL Image Format (Channel Order, Channel Data Type) +| `D3DFMT_R32F` | {CL_R}, {CL_FLOAT} +| `D3DFMT_R16F` | {CL_R}, {CL_HALF_FLOAT} +| `D3DFMT_L16` | {CL_R}, {CL_UNORM_INT16} +| `D3DFMT_A8` | {CL_A}, {CL_UNORM_INT8} +| `D3DFMT_L8` | {CL_R}, {CL_UNORM_INT8} +| | +| `D3DFMT_G32R32F` | {CL_RG}, {CL_FLOAT} +| `D3DFMT_G16R16F` | {CL_RG}, {CL_HALF_FLOAT} +| `D3DFMT_G16R16` | {CL_RG}, {CL_UNORM_INT16} +| `D3DFMT_A8L8` | {CL_RG}, {CL_UNORM_INT8} +| | +| `D3DFMT_A32B32G32R32F` | {CL_RGBA}, {CL_FLOAT} +| `D3DFMT_A16B16G16R16F` | {CL_RGBA}, {CL_HALF_FLOAT} +| `D3DFMT_A16B16G16R16` | {CL_RGBA}, {CL_UNORM_INT16} +| `D3DFMT_A8B8G8R8` | {CL_RGBA}, {CL_UNORM_INT8} +| `D3DFMT_X8B8G8R8` | {CL_RGBA}, {CL_UNORM_INT8} +| `D3DFMT_A8R8G8B8` | {CL_BGRA}, {CL_UNORM_INT8} +| `D3DFMT_X8R8G8B8` | {CL_BGRA}, {CL_UNORM_INT8} +|==== + +NOTE: The Direct3D 9 format names in the table above seem to imply that the order +of the color channels are switched relative to OpenCL, but this is not the +case. +For example, the layout of channels for each pixel for +`D3DFMT_A32FB32FG32FR32F` is the same as {CL_RGBA}, {CL_FLOAT}. +endif::cl_khr_dx9_media_sharing[] + + +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +==== Image Formats for Direct3D Texture Sharing + +When the `<>` or `<>` extensions +are supported, image objects sharing storage with Direct3D 10 and Direct3D +11 textures, respectively, can be created. +The <> table describes the supported DirectX Graphics Infrastructure +(DXGI) texture formats. + +[[dxgi-image-formats-table]] +.DXGI Formats and Corresponding OpenCL Image Formats +[cols=",",options="header",] +|==== +| DXGI Format | CL Image Format (Channel Order, Channel Data Type) + +| `DXGI_FORMAT_R32G32B32A32_FLOAT` | {CL_RGBA}, {CL_FLOAT} +| `DXGI_FORMAT_R32G32B32A32_UINT` | {CL_RGBA}, {CL_UNSIGNED_INT32} +| `DXGI_FORMAT_R32G32B32A32_SINT` | {CL_RGBA}, {CL_SIGNED_INT32} +| | +| `DXGI_FORMAT_R16G16B16A16_FLOAT` | {CL_RGBA}, {CL_HALF_FLOAT} +| `DXGI_FORMAT_R16G16B16A16_UNORM` | {CL_RGBA}, {CL_UNORM_INT16} +| `DXGI_FORMAT_R16G16B16A16_UINT` | {CL_RGBA}, {CL_UNSIGNED_INT16} +| `DXGI_FORMAT_R16G16B16A16_SNORM` | {CL_RGBA}, {CL_SNORM_INT16} +| `DXGI_FORMAT_R16G16B16A16_SINT` | {CL_RGBA}, {CL_SIGNED_INT16} +| | +| `DXGI_FORMAT_B8G8R8A8_UNORM` | {CL_BGRA}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8G8B8A8_UNORM` | {CL_RGBA}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8G8B8A8_UINT` | {CL_RGBA}, {CL_UNSIGNED_INT8} +| `DXGI_FORMAT_R8G8B8A8_SNORM` | {CL_RGBA}, {CL_SNORM_INT8} +| `DXGI_FORMAT_R8G8B8A8_SINT` | {CL_RGBA}, {CL_SIGNED_INT8} +| | +| `DXGI_FORMAT_R32G32_FLOAT` | {CL_RG}, {CL_FLOAT} +| `DXGI_FORMAT_R32G32_UINT` | {CL_RG}, {CL_UNSIGNED_INT32} +| `DXGI_FORMAT_R32G32_SINT` | {CL_RG}, {CL_SIGNED_INT32} +| | +| `DXGI_FORMAT_R16G16_FLOAT` | {CL_RG}, {CL_HALF_FLOAT} +| `DXGI_FORMAT_R16G16_UNORM` | {CL_RG}, {CL_UNORM_INT16} +| `DXGI_FORMAT_R16G16_UINT` | {CL_RG}, {CL_UNSIGNED_INT16} +| `DXGI_FORMAT_R16G16_SNORM` | {CL_RG}, {CL_SNORM_INT16} +| `DXGI_FORMAT_R16G16_SINT` | {CL_RG}, {CL_SIGNED_INT16} +| | +| `DXGI_FORMAT_R8G8_UNORM` | {CL_RG}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8G8_UINT` | {CL_RG}, {CL_UNSIGNED_INT8} +| `DXGI_FORMAT_R8G8_SNORM` | {CL_RG}, {CL_SNORM_INT8} +| `DXGI_FORMAT_R8G8_SINT` | {CL_RG}, {CL_SIGNED_INT8} +| | +| `DXGI_FORMAT_R32_FLOAT` | {CL_R}, {CL_FLOAT} +| `DXGI_FORMAT_R32_UINT` | {CL_R}, {CL_UNSIGNED_INT32} +| `DXGI_FORMAT_R32_SINT` | {CL_R}, {CL_SIGNED_INT32} +| | +| `DXGI_FORMAT_R16_FLOAT` | {CL_R}, {CL_HALF_FLOAT} +| `DXGI_FORMAT_R16_UNORM` | {CL_R}, {CL_UNORM_INT16} +| `DXGI_FORMAT_R16_UINT` | {CL_R}, {CL_UNSIGNED_INT16} +| `DXGI_FORMAT_R16_SNORM` | {CL_R}, {CL_SNORM_INT16} +| `DXGI_FORMAT_R16_SINT` | {CL_R}, {CL_SIGNED_INT16} +| | +| `DXGI_FORMAT_R8_UNORM` | {CL_R}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8_UINT` | {CL_R}, {CL_UNSIGNED_INT8} +| `DXGI_FORMAT_R8_SNORM` | {CL_R}, {CL_SNORM_INT8} +| `DXGI_FORMAT_R8_SINT` | {CL_R}, {CL_SIGNED_INT8} +|==== +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_gl_sharing[] +==== Image Formats for OpenGL Texture and Renderbuffer Sharing + +When the `<>` extension is supported, image objects +sharing storage with OpenGL texture and renderbuffer objects can be created. +The <> table describes the supported OpenGL image +formats. +If an OpenGL texture or renderbuffer object with an internal format from the +table is successfully created by OpenGL, then there is guaranteed to be a +mapping to one of the corresponding OpenCL image format(s) in the table. +Texture and renderbuffer objects created with other OpenGL internal formats +may (but are not guaranteed to) have a mapping to an OpenCL image format. +If such mappings exist, they are guaranteed to preserve all color +components, data types, and at least the number of bits/component actually +allocated by OpenGL for that format. + +[[opengl-image-formats-table]] +.OpenGL Internal Formats and Corresponding OpenCL Internal Formats +[cols=",",options="header",] +|==== +| OpenGL internal format | OpenCL Image Format (Channel Order, Channel Data Type) +| `GL_RGBA8` | {CL_RGBA}, {CL_UNORM_INT8} or + + {CL_BGRA}, {CL_UNORM_INT8} +| `GL_SRGB8_ALPHA8` | {CL_sRGBA}, {CL_UNORM_INT8} +| `GL_RGBA`, `GL_UNSIGNED_INT_8_8_8_8_REV` | {CL_RGBA}, {CL_UNORM_INT8} +| `GL_BGRA`, `GL_UNSIGNED_INT_8_8_8_8_REV` | {CL_BGRA}, {CL_UNORM_INT8} +| | +| `GL_RGBA8I`, `GL_RGBA8I_EXT` | {CL_RGBA}, {CL_SIGNED_INT8} +| `GL_RGBA16I`, `GL_RGBA16I_EXT` | {CL_RGBA}, {CL_SIGNED_INT16} +| `GL_RGBA32I`, `GL_RGBA32I_EXT` | {CL_RGBA}, {CL_SIGNED_INT32} +| | +| `GL_RGBA8UI`, `GL_RGBA8UI_EXT` | {CL_RGBA}, {CL_UNSIGNED_INT8} +| `GL_RGBA16UI`, `GL_RGBA16UI_EXT` | {CL_RGBA}, {CL_UNSIGNED_INT16} +| `GL_RGBA32UI`, `GL_RGBA32UI_EXT` | {CL_RGBA}, {CL_UNSIGNED_INT32} +| | +| `GL_RGBA8_SNORM` | {CL_RGBA}, {CL_SNORM_INT8} +| `GL_RGBA16` | {CL_RGBA}, {CL_UNORM_INT16} +| `GL_RGBA16_SNORM` | {CL_RGBA}, {CL_SNORM_INT16} +| `GL_RGBA16F`, `GL_RGBA16F_ARB` | {CL_RGBA}, {CL_HALF_FLOAT} +| `GL_RGBA32F`, `GL_RGBA32F_ARB` | {CL_RGBA}, {CL_FLOAT} +| | +| `GL_R8` | {CL_R}, {CL_UNORM_INT8} +| `GL_R8_SNORM` | {CL_R}, {CL_SNORM_INT8} +| `GL_R16` | {CL_R}, {CL_UNORM_INT16} +| `GL_R16_SNORM` | {CL_R}, {CL_SNORM_INT16} +| `GL_R16F` | {CL_R}, {CL_HALF_FLOAT} +| `GL_R32F` | {CL_R}, {CL_FLOAT} +| | +| `GL_R8I` | {CL_R}, {CL_SIGNED_INT8} +| `GL_R16I` | {CL_R}, {CL_SIGNED_INT16} +| `GL_R32I` | {CL_R}, {CL_SIGNED_INT32} +| `GL_R8UI` | {CL_R}, {CL_UNSIGNED_INT8} +| `GL_R16UI` | {CL_R}, {CL_UNSIGNED_INT16} +| `GL_R32UI` | {CL_R}, {CL_UNSIGNED_INT32} +| | +| `GL_RG8` | {CL_RG}, {CL_UNORM_INT8} +| `GL_RG8_SNORM` | {CL_RG}, {CL_SNORM_INT8} +| `GL_RG16` | {CL_RG}, {CL_UNORM_INT16} +| `GL_RG16_SNORM` | {CL_RG}, {CL_SNORM_INT16} +| `GL_RG16F` | {CL_RG}, {CL_HALF_FLOAT} +| `GL_RG32F` | {CL_RG}, {CL_FLOAT} +| | +| `GL_RG8I` | {CL_RG}, {CL_SIGNED_INT8} +| `GL_RG16I` | {CL_RG}, {CL_SIGNED_INT16} +| `GL_RG32I` | {CL_RG}, {CL_SIGNED_INT32} +| `GL_RG8UI` | {CL_RG}, {CL_UNSIGNED_INT8} +| `GL_RG16UI` | {CL_RG}, {CL_UNSIGNED_INT16} +| `GL_RG32UI` | {CL_RG}, {CL_UNSIGNED_INT32} +ifdef::cl_khr_gl_depth_images[] +| `GL_DEPTH_COMPONENT32F` | {CL_DEPTH}, {CL_FLOAT} +| `GL_DEPTH_COMPONENT16` | {CL_DEPTH}, {CL_UNORM_INT16} +| `GL_DEPTH24_STENCIL8` | {CL_DEPTH_STENCIL}, {CL_UNORM_INT24} +| `GL_DEPTH32F_STENCIL8` | {CL_DEPTH_STENCIL}, {CL_FLOAT} +endif::cl_khr_gl_depth_images[] +|==== +endif::cl_khr_gl_sharing[] + + === Reading, Writing and Copying Image Objects [open,refpage='clEnqueueReadImage',desc='Enqueue commands to read from an image or image array object to host memory.',type='protos',alias='clEnqueueWriteImage'] @@ -2598,6 +3290,11 @@ include::{generated}/api/version-notes/clEnqueueWriteImage.asciidoc[] _region_[2] must be 1. If _image_ is a 1D image array object, _region_[2] must be 1. The values in _region_ cannot be 0. +ifdef::cl_khr_mipmap_image[] + If _image_ is a mipmapped image, the mip level to read or write is + determined from _origin_ as described in <> +endif::cl_khr_mipmap_image[] * _row_pitch_ in {clEnqueueReadImage} and _input_row_pitch_ in {clEnqueueWriteImage} is the length of each row in bytes. This value must be greater than or equal to the element size in bytes @@ -2714,6 +3411,11 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _origin_ is not a valid level + for _image_, +endif::cl_khr_mipmap_image[] [NOTE] ==== @@ -2779,6 +3481,11 @@ include::{generated}/api/version-notes/clEnqueueCopyImage.asciidoc[] image index in the 1D image array. If _src_image_ is a 2D image array object, _src_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _src_image_ is a mipmapped image, the mip level to read is determined + from _src_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _dst_origin_ defines the (_x_, _y_, _z_) offset in pixels in the 1D, 2D or 3D image, the (_x_, _y_) offset and the image index in the 2D image array or the (_x_) offset and the image index in the 1D image array. @@ -2790,6 +3497,11 @@ include::{generated}/api/version-notes/clEnqueueCopyImage.asciidoc[] image index in the 1D image array. If _dst_image_ is a 2D image array object, _dst_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _dst_image_ is a mipmapped image, the mip level to write is + determined from _dst_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -2872,6 +3584,11 @@ Otherwise, it returns one of the following errors: <> table is {CL_FALSE}). * {CL_MEM_COPY_OVERLAP} if _src_image_ and _dst_image_ are the same image object and the source and destination regions overlap. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _src_origin_ or _dst_origin_ + is not a valid level for the corresponding _src_image_ or _dst_image_, +endif::cl_khr_mipmap_image[] -- @@ -2901,8 +3618,8 @@ include::{generated}/api/version-notes/clEnqueueFillImage.asciidoc[] _image_ channel data type is an unnormalized signed integer type and is a four component unsigned integer value if the _image_ channel data type is an unnormalized unsigned integer type. - The fill color will be converted to the appropriate image channel format and - order associated with _image_. + The fill color will be converted to the appropriate image channel format + and order associated with _image_. * _origin_ defines the (_x_, _y_, _z_) offset in pixels in the 1D, 2D or 3D image, the (_x_, _y_) offset and the image index in the 2D image array or the (_x_) offset and the image index in the 1D image array. @@ -2914,6 +3631,11 @@ include::{generated}/api/version-notes/clEnqueueFillImage.asciidoc[] in the 1D image array. If _image_ is a 2D image array object, _origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _image_ is a mipmapped image, the mip level to fill is determined + from _origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -2983,10 +3705,15 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _origin_ is not a valid level + for _image_, +endif::cl_khr_mipmap_image[] -- -=== Copying between Image and Buffer Objects +=== Copying Between Image and Buffer Objects [open,refpage='clEnqueueCopyImageToBuffer',desc='Enqueues a command to copy an image object to a buffer object.',type='protos'] -- @@ -3012,6 +3739,11 @@ include::{generated}/api/version-notes/clEnqueueCopyImageToBuffer.asciidoc[] image index in the 1D image array. If _src_image_ is a 2D image array object, _src_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _src_image_ is a mipmapped image, the mip level to read is determined + from _src_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -3100,6 +3832,11 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _src_origin_ is not a valid + level for _src_image_, +endif::cl_khr_mipmap_image[] -- @@ -3129,6 +3866,11 @@ include::{generated}/api/version-notes/clEnqueueCopyBufferToImage.asciidoc[] image index in the 1D image array. If _dst_image_ is a 2D image array object, _dst_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _dst_image_ is a mipmapped image, the mip level to write is + determined from _dst_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -3217,6 +3959,11 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _dst_origin_ is not a valid + level for _dst_image_, +endif::cl_khr_mipmap_image[] -- @@ -3385,6 +4132,33 @@ Objects>>. -- +ifdef::cl_khr_mipmap_image[] +[[image-mipmap-access]] +=== Specifying Mipmap Levels to Image Operations + +When the `<>` extension is supported, the +{clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueMapImage}, +{clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, +{clEnqueueCopyBufferToImage}, and {clEnqueueFillImage} functions described +above can operate on mipmapped images. + +The mipmap image level(s) to access for each command are determined from the +_origin_ parameter when accessing a single _image_ (non-copy functions), or +from the _src_origin_ and _dst_origin_ parameters when accessing two +_src_image_ and _dst_image_ images (copy functions). The logic below applies +to each of these parameters, with _image_ and _origin_ replaced by +_src_image_ and _src_origin_, or _dst_image_ and _dst_origin_ as +appropriate: + + * If _image_ is a 1D image, _origin_[1] specifies the mip level to use. + * If _image_ is a 1D image array, _origin_[2] specifies the mip level to + use. + * If _image_ is a 2D image, _origin_[2] specifies the mip level to use. + * If _image_ is a 2D image array or a 3D image, _origin_[3] specifies the + mip level to use. +endif::cl_khr_mipmap_image[] + + [[image-object-queries]] === Image Object Queries @@ -3499,6 +4273,36 @@ include::{generated}/api/version-notes/CL_IMAGE_NUM_MIP_LEVELS.asciidoc[] include::{generated}/api/version-notes/CL_IMAGE_NUM_SAMPLES.asciidoc[] | {cl_uint_TYPE} | Return `num_samples` associated with _image_. + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_IMAGE_DX9_MEDIA_PLANE_KHR_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_DX9_MEDIA_PLANE_KHR.asciidoc[] + | {cl_uint_TYPE} + | If _image_ was created using {clCreateFromDX9MediaSurfaceKHR}, + returns the _plane_ argument specified when _image_ was created. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_IMAGE_D3D10_SUBRESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_D3D10_SUBRESOURCE_KHR.asciidoc[] + | {cl_uint_TYPE} + | If _image_ was created using {clCreateFromD3D10Texture2DKHR}, or + {clCreateFromD3D10Texture3DKHR}, returns the _subresource_ argument + specified when _image_ was created. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_IMAGE_D3D11_SUBRESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_D3D11_SUBRESOURCE_KHR.asciidoc[] + | {cl_uint_TYPE} + | If _image_ was created using {clCreateFromD3D11Texture2DKHR}, or + {clCreateFromD3D11Texture3DKHR}, returns the _subresource_ argument + specified when _image_ was created. +endif::cl_khr_d3d11_sharing[] + |==== // refError @@ -3516,80 +4320,828 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. --- +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is + {CL_IMAGE_DX9_MEDIA_PLANE_KHR} and _image_ was not created by calling + {clCreateFromDX9MediaSurfaceKHR}. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is + {CL_IMAGE_D3D10_SUBRESOURCE_KHR} and _image_ was not created by the + function {clCreateFromD3D10Texture2DKHR}, or + {clCreateFromD3D10Texture3DKHR}. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is + {CL_IMAGE_D3D11_SUBRESOURCE_KHR} and _image_ was not created by the + function {clCreateFromD3D11Texture2DKHR}, or + {clCreateFromD3D11Texture3DKHR}. +endif::cl_khr_d3d11_sharing[] + +-- + + +ifdef::cl_khr_dx9_media_sharing[] +=== Creating OpenCL Image Objects From DirectX 9 Media Resources + +[open,refpage='clCreateFromDX9MediaSurfaceKHR',desc='Create OpenCL image object from a media surface',type='protos'] +-- +To create an OpenCL image object from a media surface, call the function + +include::{generated}/api/protos/clCreateFromDX9MediaSurfaceKHR.txt[] +include::{generated}/api/version-notes/clCreateFromDX9MediaSurfaceKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a media adapter. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _adapter_type_ is a value from enumeration of supported adapters + described in the <> table. + The type of _surface_info_ is determined by the adapter type. + The implementation does not need to support all adapter types. + This approach provides flexibility to support additional adapter types + in the future. + Supported adapter types are {CL_ADAPTER_D3D9_KHR}, + {CL_ADAPTER_D3D9EX_KHR} and {CL_ADAPTER_DXVA_KHR}. + * _surface_info_ is a pointer to one of the structures defined in the + _adapter_type_ description above, passed in as a `void *`. + If _adapter_type_ is {CL_ADAPTER_D3D9_KHR}, {CL_ADAPTER_D3D9EX_KHR} and + {CL_ADAPTER_DXVA_KHR}, _surface_info_ points to a + <> structure describing the surface. + * _plane_ is the plane of resource to share for planar surface formats. + For planar formats, we use the plane parameter to obtain a handle to + thie specific plane (Y, U or V for example). + For non-planar formats used by media, _plane_ must be 0. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -== Pipes +The width and height of the returned OpenCL 2D image object are determined +by the width and height of the _plane_ of the resource +(_surface_info_->_resource_). +The channel type and order of the returned image object is determined by the +format and plane of the resource, and are described in the +<> and <> tables. -NOTE: Pipes are <> version 2.0. +This call will increment the internal media surface count on the resource. +The internal media surface reference count on the resource will be +decremented when the OpenCL reference count on the returned OpenCL memory +object drops to zero. -A _pipe_ is a memory object that stores data organized as a FIFO. -Pipe objects can only be accessed using built-in functions that read from -and write to a pipe. -Pipe objects are not accessible from the host. -A pipe object encapsulates the following information: +// refError - * Packet size in bytes - * Maximum capacity in packets - * Information about the number of packets currently in the pipe - * Data packets +{clCreateFromDX9MediaSurfaceKHR} returns a valid non-zero 2D image object +and _errcode_ret_ is set to {CL_SUCCESS} if the 2D image object is created +successfully. +Otherwise it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _plane_ is not a valid plane of _resource_ specified in _surface_info_. + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _resource_ specified in + _surface_info_ is not a valid resource or is not associated with + _adapter_type_ (e.g., _adapter_type_ is set to {CL_ADAPTER_D3D9_KHR} and + _resource_ is not a Direct3D 9 surface created in D3DPOOL_DEFAULT). + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _shared_handle_ specified in + _surface_info_ is not `NULL` or a valid handle value. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the texture format of _resource_ + is not listed in the <> or <> tables. + * {CL_INVALID_OPERATION} if there are no devices in _context_ that support + _adapter_type_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +[open,refpage='cl_dx9_surface_info_khr',desc='Structure describing a DX surface',type='structs'] +-- +The {cl_dx9_surface_info_khr_TYPE} structure is passed to +{clCreateFromDX9MediaSurfaceKHR} to describe a DX9 surface, and is defined +as: -=== Creating Pipe Objects +include::{generated}/api/structs/cl_dx9_surface_info_khr.txt[] -[open,refpage='clCreatePipe',desc='Creates a pipe object.',type='protos'] + * _resource_ is a pointer to a `IDirect3DSurface9` surface interface. + * _shared_handle_ is a `HANDLE` to the resource. + +For DX9 surfaces, we need both the handle to the resource and the resource +itself to have a sufficient amount of information to eliminate a copy of the +surface for sharing in cases where this is possible. +Elimination of the copy is driver dependent. +_shared_handle_ may be `NULL` and this may result in sub-optimal +performance. -- -To create a *pipe object*, call the function +endif::cl_khr_dx9_media_sharing[] -include::{generated}/api/protos/clCreatePipe.txt[] -include::{generated}/api/version-notes/clCreatePipe.asciidoc[] - * _context_ is a valid OpenCL context used to create the pipe object. - * _flags_ is a bit-field that is used to specify allocation and usage - information such as the memory arena that should be used to allocate the - pipe object and how it will be used. - The <> table describes the possible values for - _flags_. - Only {CL_MEM_READ_WRITE} and {CL_MEM_HOST_NO_ACCESS} can be specified when - creating a pipe object. - If the value specified for _flags_ is 0, the default is used which is - {CL_MEM_READ_WRITE} | {CL_MEM_HOST_NO_ACCESS}. - * _pipe_packet_size_ is the size in bytes of a pipe packet. - * _pipe_max_packets_ specifies the pipe capacity by specifying the maximum - number of packets the pipe can hold. - * _properties_ specifies a list of properties for the pipe and their - corresponding values. - Each property name is immediately followed by the corresponding desired - value. - The list is terminated with 0. - Currently, in all OpenCL versions, _properties_ must be `NULL`. +ifdef::cl_khr_d3d10_sharing[] +=== Creating OpenCL Image Objects From Direct3D 10 Textures and Resources + +[open,refpage='clCreateFromD3D10Texture2DKHR',desc='Create OpenCL 2D image object from a Direct3D 10 2D texture',type='protos'] +-- +To create an OpenCL 2D image object from a subresource of a Direct3D 10 2D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D10Texture2DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D10Texture2DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 10 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 10 2D texture to share. + * _subresource_ is the subresource of _resource_ to share. * _errcode_ret_ will return an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. +The width and height of the returned OpenCL 2D image object are determined +by the width and height of subresource _subresource_ of _resource_. +The channel type and order of the returned OpenCL 2D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 10 reference count on +_resource_. +The internal Direct3D 10 reference count on _resource_ will be decremented +when the OpenCL reference count on the returned OpenCL memory object drops +to zero. + +NOTE: Refer to the <> and +<> sections for +more information. + // refError -{clCreatePipe} returns a valid non-zero pipe object and _errcode_ret_ is set -to {CL_SUCCESS} if the pipe object is created successfully. +{clCreateFromD3D10Texture2DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_OPERATION} if no devices in _context_ support pipes. - * {CL_INVALID_VALUE} if values specified in _flags_ are not as defined - above. - * {CL_INVALID_VALUE} if _properties_ is not `NULL`. - * {CL_INVALID_PIPE_SIZE} if _pipe_packet_size_ is 0 or the - _pipe_packet_size_ exceeds {CL_DEVICE_PIPE_MAX_PACKET_SIZE} value - specified in the <> table for all - devices in _context_ or if _pipe_max_packets_ is 0. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for the pipe object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 + texture resource, if _resource_ was created with the D3D10_USAGE flag + D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D10Texture2DKHR}, or if _context_ was + not created against the same Direct3D 10 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 10 texture format of _resource_ does not map to a supported + OpenCL image format. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- -Pipes follow the same memory consistency model as defined for buffer and +[open,refpage='clCreateFromD3D10Texture3DKHR',desc='Create OpenCL 3D image object from a Direct3D 10 3D texture',type='protos'] +-- +To create an OpenCL 3D image object from a subresource of a Direct3D 10 3D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D10Texture3DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D10Texture3DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 10 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 10 3D texture to share. + * _subresource_ is the subresource of _resource_ to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The width, height and depth of the returned OpenCL 3D image object are +determined by the width, height and depth of subresource _subresource_ of +_resource_. +The channel type and order of the returned OpenCL 3D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 10 reference count on +_resource_. +The internal Direct3D 10 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D10Texture3DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 + texture resource, if _resource_ was created with the D3D10_USAGE flag + D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D10Texture3DKHR}, or if _context_ was + not created against the same Direct3D 10 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 10 texture format of _resource_ does not map to a supported + OpenCL image format. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d10_sharing[] + + +ifdef::cl_khr_d3d11_sharing[] +=== Creating OpenCL Image Objects From Direct3D 11 Textures and Resources + +[open,refpage='clCreateFromD3D11Texture2DKHR',desc='Create OpenCL 2D image object from a Direct3D 11 2D texture',type='protos'] +-- +To create an OpenCL 2D image object from a subresource of a Direct3D 11 2D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D11Texture2DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D11Texture2DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 11 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 11 2D texture to share. + * _subresource_ is the subresource of _resource_ to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The width and height of the returned OpenCL 2D image object are determined +by the width and height of subresource _subresource_ of _resource_. +The channel type and order of the returned OpenCL 2D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 11 reference count on +_resource_. +The internal Direct3D 11 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D11Texture2DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 + texture resource, if _resource_ was created with the D3D11_USAGE flag + D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D11Texture2DKHR}, or if _context_ was + not created against the same Direct3D 11 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 11 texture format of _resource_ does not map to a supported + OpenCL image format. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clCreateFromD3D11Texture3DKHR',desc='Create OpenCL 3D image object from a Direct3D 11 3D texture',type='protos'] +-- +To create an OpenCL 3D image object from a subresource of a Direct3D 11 3D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D11Texture3DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D11Texture3DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 11 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 11 3D texture to share. + * _subresource_ is the subresource of _resource_ to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The width, height and depth of the returned OpenCL 3D image object are +determined by the width, height and depth of subresource _subresource_ of +_resource_. +The channel type and order of the returned OpenCL 3D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 11 reference count on +_resource_. +The internal Direct3D 11 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D11Texture3DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 + texture resource, if _resource_ was created with the D3D11_USAGE flag + D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D11Texture3DKHR}, or if _context_ was + not created against the same Direct3D 11 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 11 texture format of _resource_ does not map to a supported + OpenCL image format. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_egl_image[] +=== Creating OpenCL Image Objects From EGL Images + +[open,refpage='clCreateFromEGLImageKHR',desc='Create cl_mem target from EGLImage source',type='protos'] +-- +To create an `EGLImage` target of type {cl_mem} from the `EGLImage` source +provided as _image_, call the function + +include::{generated}/api/protos/clCreateFromEGLImageKHR.txt[] +include::{generated}/api/version-notes/clCreateFromEGLImageKHR.asciidoc[] + + * _display_ should be of type `EGLDisplay`, cast into the type + {CLeglDisplayKHR}. + * _image_ should be of type `EGLImageKHR`, cast into the type + {CLeglImageKHR_TYPE}. + Assuming no errors are generated in this function, the resulting image + object will be an `EGLImage` target of the specified `EGLImage` _image_. + The resulting {cl_mem} is an image object which may be used normally by + all OpenCL operations. + This maps to an `image2d_t` type in OpenCL kernel code. + * _flags_ is a bit-field that is used to specify usage information about + the memory object being created. + Refer to the <> table for a + description of _flags_. + Accepted values in _flags_ are described below. + * _properties_ specifies a list of property names and their corresponding + values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. + No properties are currently supported with this version of the + extension. + _properties_ can be `NULL`. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Accepted for _flags_ are {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and +CL_MEM_READ_WRITE. +If OpenCL 1.2 is supported, _flags_ also accepts {CL_MEM_HOST_WRITE_ONLY}, +{CL_MEM_HOST_READ_ONLY}, and {CL_MEM_HOST_NO_ACCESS}. + +`<>` only requires support for {CL_MEM_READ_ONLY}, and for +CL_MEM_HOST_NO_ACCESS if OpenCL 1.2 or later is supported. +For OpenCL 1.1, a {CL_INVALID_OPERATION} will be returned for images which +do not support host mapping. + +If the value passed in _flags_ is not supported by the OpenCL +implementation, it will return {CL_INVALID_VALUE}. +The accepted _flags_ may be dependent upon the texture format used. + +// refError + +{clCreateFromEGLImageKHR} returns a valid non-zero OpenCL image object and +_errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid OpenCL context. + * {CL_INVALID_VALUE} if _properties_ contains invalid values, if _display_ + is not a valid display object or if _flags_ are not in the set defined + above. + * {CL_INVALID_EGL_OBJECT_KHR} if _image_ is not a valid `EGLImage` object. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if the OpenCL implementation is not able + to create a {cl_mem} compatible with the provided {CLeglImageKHR_TYPE} + for an implementation-dependent reason (this could be caused by, but not + limited to, reasons such as unsupported texture formats, etc). + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_INVALID_OPERATION} if there are no devices in _context_ that support + images (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in table 4.3 is + {CL_FALSE}) or if the flags passed are not supported for that image + type. +-- +endif::cl_khr_egl_image[] + + +ifdef::cl_khr_gl_sharing[] +=== Creating OpenCL Image Objects From OpenGL Textures and Renderbuffers + +[open,refpage='clCreateFromGLTexture',desc='Create OpenCL image object from an OpenGL texture object',type='protos'] +-- +To create an OpenCL image object from an OpenGL texture object, call the +function + +include::{generated}/api/protos/clCreateFromGLTexture.txt[] +include::{generated}/api/version-notes/clCreateFromGLTexture.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a + description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _texture_target_ must be one of `GL_TEXTURE_1D`, `GL_TEXTURE_1D_ARRAY`, + `GL_TEXTURE_BUFFER`, `GL_TEXTURE_2D`, `GL_TEXTURE_2D_ARRAY`, + `GL_TEXTURE_3D`, `GL_TEXTURE_CUBE_MAP_POSITIVE_X`, + `GL_TEXTURE_CUBE_MAP_POSITIVE_Y`, `GL_TEXTURE_CUBE_MAP_POSITIVE_Z`, + `GL_TEXTURE_CUBE_MAP_NEGATIVE_X`, `GL_TEXTURE_CUBE_MAP_NEGATIVE_Y`, + `GL_TEXTURE_CUBE_MAP_NEGATIVE_Z`. + `GL_TEXTURE_RECTANGLE` or the equivalent `GL_TEXTURE_RECTANGLE_ARB` may + be specified if an OpenGL implementation supporting rectangular textures + is supported. +ifdef::cl_khr_gl_msaa_sharing[] + `GL_TEXTURE_2D_MULTISAMPLE` and `GL_TEXTURE_2D_MULTISAMPLE_ARRAY` may be + specified if an OpenGL implementation supporting multi-sample + two-dimensional textures is supported, and the + `<>` extension is supported. + Refer to the <> section for more information on multi-sample images. +endif::cl_khr_gl_msaa_sharing[] + _texture_target_ is used only to define the image type of _texture_. + No reference to a bound OpenGL texture object is made or implied by this + parameter. + * _miplevel_ is the mipmap level to be used. + If _texture_target_ is `GL_TEXTURE_BUFFER`, _miplevel_ must be 0. + Note: Implementations may return {CL_INVALID_OPERATION} for miplevel + values > 0. + * _texture_ is the name of an OpenGL 1D, 2D, 3D, 1D array, 2D array, + cubemap, rectangle or buffer texture object. + The texture object must be a complete texture as per OpenGL rules on + texture completeness. + The _texture_ format and dimensions defined by OpenGL for the specified + _miplevel_ of the texture will be used to create the OpenCL image memory + object. + Only OpenGL texture objects with an internal format that maps to an + appropriate <> and + <> may be used + to create the OpenCL image memory object. + * _errcode_ret_ will return an appropriate error code as described below. + If _errcode_ret_ is `NULL`, no error code is returned. + +{clCreateFromGLTexture} may create any of the following: + + * an OpenCL 2D image object from an OpenGL 2D texture object or a single + face of an OpenGL cubemap texture object, + * an OpenCL 2D image array object from an OpenGL 2D texture array object, + * an OpenCL 2D multi-sample image object from an OpenGL 2D multi-sample + texture. + * an OpenCL 2D multi-sample array image object from an OpenGL 2D + multi-sample texture. + * an OpenCL 1D image object from an OpenGL 1D texture object, + * an OpenCL 1D image buffer object from an OpenGL texture buffer object, + * an OpenCL 1D image array object from an OpenGL 1D texture array object, + * an OpenCL 3D image object from an OpenGL 3D texture object. + +ifdef::cl_khr_mipmap_image[] +If both the `<>` and `<>` extensions +are supported by the OpenCL device, {clCreateFromGLTexture} may also be used +to create a mipmapped OpenCL image from a mipmapped OpenGL texture by +specify a negative value for _miplevel_. +In this case, then an OpenCL mipmapped image object is created from a +mipmapped OpenGL texture object, instead of an OpenCL image object for a +specific miplevel of the OpenGL texture. + +NOTE: For a detailed description of how the level of detail is computed, +please refer to the "`Scale Factor and Level-of-Detail`" section of the +OpenGL 4.6 Specification. +endif::cl_khr_mipmap_image[] + +If the state of an OpenGL texture object is modified through the OpenGL API +(e.g. `glTexImage2D`, `glTexImage3D` or the values of the texture parameters +`GL_TEXTURE_BASE_LEVEL` or `GL_TEXTURE_MAX_LEVEL` are modified) while there +exists a corresponding OpenCL image object, subsequent use of the OpenCL +image object will result in undefined behavior. + +The {clRetainMemObject} and {clReleaseMemObject} functions can be used to +retain and release the image objects. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromGLTexture} returns a valid non-zero OpenCL image object and +_errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context or was not + created from an OpenGL context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + value specified in _texture_target_ is not one of the values specified + in the description of _texture_target_. + * {CL_INVALID_MIP_LEVEL} if _miplevel_ is less than the value of + _level~base~_ (for OpenGL implementations) or zero (for OpenGL ES + implementations); or greater than the value of _q_ (for both OpenGL and + OpenGL ES). + _level~base~_ and _q_ are defined for the texture in _section 3.8.10_ + (Texture Completeness) of the OpenGL 2.1 Specification and _section + 3.7.10_ of the OpenGL ES 2.0 Specification. + * {CL_INVALID_MIP_LEVEL} if _miplevel_ is greather than zero and the + OpenGL implementation does not support creating from non-zero mipmap + levels. + * {CL_INVALID_GL_OBJECT} if _texture_ is not an OpenGL texture object + whose type matches _texture_target_, if the specified _miplevel_ of + _texture_ is not defined, or if the width or height of the specified + _miplevel_ is zero or if the OpenGL texture object is incomplete. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the internal format of _texture_ + is not listed in the <> table. + * {CL_INVALID_OPERATION} if _texture_ is an OpenGL texture object created + with a border width value greater than zero. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +ifdef::cl_khr_gl_depth_images[] +[[restrictions-on-depth-stencil-images]] +==== Restrictions on Depth/Stencil Images + +Depth images with an image channel order of {CL_DEPTH_STENCIL} can only be +created using the {clCreateFromGLTexture} API, and only when the +`<>` extension is supported. + +For the image format given by channel order of {CL_DEPTH_STENCIL} and +channel data type of {CL_UNORM_INT24}, the depth is stored as an unsigned +normalized 24-bit value. + +For the image format given by channel order of {CL_DEPTH_STENCIL} and +channel data type of {CL_FLOAT}, each pixel is two 32-bit values. +The depth is stored as a single precision floating-point value followed by +the stencil which is stored as a 8-bit integer value. + +Such images appear in the <>, but only require read support, +not write support. + +The stencil value cannot be read or written using the *read_imagef* and +*write_imagef* built-in functions in an OpenCL kernel. + +Depth image objects with an image channel order of {CL_DEPTH_STENCIL} cannot +be used as arguments to {clEnqueueReadImage}, {clEnqueueWriteImage}, +{clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, +{clEnqueueCopyBufferToImage}, {clEnqueueMapImage}, and {clEnqueueFillImage}. +Such use will return a {CL_INVALID_OPERATION} error. +endif::cl_khr_gl_depth_images[] + + +ifdef::cl_khr_gl_msaa_sharing[] +[[restrictions-on-msaa-images]] +==== Restrictions on Multi-Sample Images + +The formats described in the <> and <> tables of the OpenCL 3.0 specification, specification and the +additional formats described in the <> table also +support OpenCL images created from a OpenGL multi-sampled color or depth +texture. + +Multi-sample OpenCL image objects can only be read from a kernel. +Multi-sample OpenCL image objects cannot be used as arguments to +{clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueCopyImage}, +{clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage}, +{clEnqueueMapImage}, and {clEnqueueFillImage}. +Such use will return a {CL_INVALID_OPERATION} error. +endif::cl_khr_gl_msaa_sharing[] + + +[open,refpage='clCreateFromGLRenderbuffer',desc='Create OpenCL 2D image object from an OpenGL renderbuffer',type='protos'] +-- +To create an OpenCL 2D image object from an OpenGL renderbuffer object, call +the function + +include::{generated}/api/protos/clCreateFromGLRenderbuffer.txt[] +include::{generated}/api/version-notes/clCreateFromGLRenderbuffer.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a + description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _renderbuffer_ is the name of an OpenGL renderbuffer object. + The renderbuffer storage must be specified before the image object can + be created. + The _renderbuffer_ format and dimensions defined by OpenGL will be used + to create the 2D image object. + Only OpenGL renderbuffers with an internal format that maps to an + appropriate <> and + <> may be used + to create the 2D image object. + * _errcode_ret_ will return an appropriate error code as described below. + If _errcode_ret_ is `NULL`, no error code is returned. + +If the state of an OpenGL renderbuffer object is modified through the OpenGL +API (i.e. changes to the dimensions or format used to represent pixels of +the OpenGL renderbuffer using appropriate OpenGL API calls such as +`glRenderbufferStorage`) while there exists a corresponding OpenCL image +object, subsequent use of the OpenCL image object will result in undefined +behavior. + +The {clRetainMemObject} and {clReleaseMemObject} functions can be used to +retain and release the image objects. + +The <> table describes the list of OpenGL +renderbuffer internal formats and the Corresponding OpenCL Image Formats. +If an OpenGL renderbuffer object with an internal format from the table is +successfully created by OpenGL, then there is guaranteed to be a mapping to +one of the corresponding OpenCL image format(s) in that table. +Renderbuffer objects created with other OpenGL internal formats may (but are +not guaranteed to) have a mapping to an OpenCL image format; if such +mappings exist, they are guaranteed to preserve all color components, data +types, and at least the number of bits/component actually allocated by +OpenGL for that format. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromGLRenderbuffer} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context or was not + created from an OpenGL context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_GL_OBJECT} if _renderbuffer_ is not an OpenGL renderbuffer + object, or if the width or height of _renderbuffer_ is zero. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the internal format of + _renderbuffer_ is not listed in the <> table. + * {CL_INVALID_OPERATION} if _renderbuffer_ is a multi-sample OpenGL + renderbuffer object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_gl_sharing[] + + +== Pipes + +NOTE: Pipes are <> version 2.0. + +A _pipe_ is a memory object that stores data organized as a FIFO. +Pipe objects can only be accessed using built-in functions that read from +and write to a pipe. +Pipe objects are not accessible from the host. +A pipe object encapsulates the following information: + + * Packet size in bytes + * Maximum capacity in packets + * Information about the number of packets currently in the pipe + * Data packets + + +=== Creating Pipe Objects + +[open,refpage='clCreatePipe',desc='Creates a pipe object.',type='protos'] +-- +To create a *pipe object*, call the function + +include::{generated}/api/protos/clCreatePipe.txt[] +include::{generated}/api/version-notes/clCreatePipe.asciidoc[] + + * _context_ is a valid OpenCL context used to create the pipe object. + * _flags_ is a bit-field that is used to specify allocation and usage + information such as the memory arena that should be used to allocate the + pipe object and how it will be used. + The <> table describes the possible values for + _flags_. + Only {CL_MEM_READ_WRITE} and {CL_MEM_HOST_NO_ACCESS} can be specified when + creating a pipe object. + If the value specified for _flags_ is 0, the default is used which is + {CL_MEM_READ_WRITE} | {CL_MEM_HOST_NO_ACCESS}. + * _pipe_packet_size_ is the size in bytes of a pipe packet. + * _pipe_max_packets_ specifies the pipe capacity by specifying the maximum + number of packets the pipe can hold. + * _properties_ specifies a list of properties for the pipe and their + corresponding values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. + Currently, in all OpenCL versions, _properties_ must be `NULL`. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +// refError + +{clCreatePipe} returns a valid non-zero pipe object and _errcode_ret_ is set +to {CL_SUCCESS} if the pipe object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_OPERATION} if no devices in _context_ support pipes. + * {CL_INVALID_VALUE} if values specified in _flags_ are not as defined + above. + * {CL_INVALID_VALUE} if _properties_ is not `NULL`. + * {CL_INVALID_PIPE_SIZE} if _pipe_packet_size_ is 0 or the + _pipe_packet_size_ exceeds {CL_DEVICE_PIPE_MAX_PACKET_SIZE} value + specified in the <> table for all + devices in _context_ or if _pipe_max_packets_ is 0. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for the pipe object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +Pipes follow the same memory consistency model as defined for buffer and image objects. The pipe state i.e. contents of the pipe across kernel-instances (on the same or different devices) is enforced at a synchronization point. @@ -3834,67 +5386,347 @@ of OpenCL APIs is considered to be undefined. -- -[[unmapping-mapped-memory]] -=== Unmapping Mapped Memory Objects +ifdef::cl_khr_external_memory[] -[open,refpage='clEnqueueUnmapMemObject',desc='Enqueues a command to unmap a previously mapped region of a memory object.',type='protos'] +[[acquiring-external-memory]] +==== Acquiring and Releasing External Memory Objects + +[open,refpage='clEnqueueAcquireExternalMemObjectsKHR',desc='Enqueue a command to acquire OpenCL memory objects created from external memory handles',type='protos'] -- -To enqueue a command to unmap a previously mapped region of a memory object, -call the function +To enqueue a command to acquire OpenCL memory objects created from external +memory handles, call the function -include::{generated}/api/protos/clEnqueueUnmapMemObject.txt[] -include::{generated}/api/version-notes/clEnqueueUnmapMemObject.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireExternalMemObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireExternalMemObjectsKHR.asciidoc[] - * _command_queue_ must be a valid host command-queue. - * _memobj_ is a valid memory (buffer or image) object. - The OpenCL context associated with _command_queue_ and _memobj_ must be the - same. - * _mapped_ptr_ is the host address returned by a previous call to - {clEnqueueMapBuffer}, or {clEnqueueMapImage} for _memobj_. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before {clEnqueueUnmapMemObject} can be executed. - If _event_wait_list_ is `NULL`, then {clEnqueueUnmapMemObject} does not wait - on any event to complete. + * _command_queue_ specifies a valid command-queue. + * _num_mem_objects_ specifies the number of memory objects to acquire. + * _mem_objects_ points to a list of valid memory objects. + * _num_events_in_wait_list_ specifies the number of events in + _event_wait_list_. + * _event_wait_list_ points to the list of events that need to complete + before {clEnqueueAcquireExternalMemObjectsKHR} can be executed. + If _event_wait_list_ is `NULL`, then + {clEnqueueAcquireExternalMemObjectsKHR} does not explicitly wait on any + event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -Reads or writes from the host using the pointer returned by -{clEnqueueMapBuffer} or {clEnqueueMapImage} are considered to be complete. - -{clEnqueueMapBuffer} and {clEnqueueMapImage} increment the mapped count of -the memory object. -The initial mapped count value of the memory object is zero. -Multiple calls to {clEnqueueMapBuffer}, or {clEnqueueMapImage} on the same -memory object will increment this mapped count by appropriate number of -calls. -{clEnqueueUnmapMemObject} decrements the mapped count of the memory object. - -{clEnqueueMapBuffer}, and {clEnqueueMapImage} act as synchronization points -for a region of the buffer object being mapped. + The context associated with events in _event_wait_list_ and that of + _command_queue_ must be the same. + * _event_ returns an event object that identifies this particular command + and can be used to query or queue a wait for this particular command to + complete. + _event_ can be `NULL` in which case it will not be possible for the + application to query the status of this command or queue a wait for this + command to complete. + +Applications must acquire the memory objects that are created using external +handles before they can be used by any OpenCL commands queued to a +command-queue. +Behavior is undefined if a memory object created from an external memory +handle is used by an OpenCL command queued to a command-queue without being +acquired. +This is to guarantee that the state of the memory objects is up-to-date and +they are accessible to OpenCL. +See <> for more details on how to use this API. + +If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will +trivially succeed after its event dependencies are satisfied and will update +its completion event. // refError -{clEnqueueUnmapMemObject} returns {CL_SUCCESS} if the function is executed -successfully. +{clEnqueueAcquireExternalMemObjectsKHR} returns {CL_SUCCESS} if the function +is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid memory object or is a + * {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not + a `NULL` value, or if _num_mem_objects_ is greater than 0 and + _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is + not a valid OpenCL memory object created using an external memory + handle. + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if device associated with _command_queue_ is not one of the devices + specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating + one or more of _mem_objects_, or ** if one or more of _mem_objects_ + belong to a context that does not contain a device associated with + _command_queue_. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueReleaseExternalMemObjectsKHR',desc='Enqueue a command to release OpenCL memory objects created from external memory handles',type='protos'] +-- +To enqueue a command to release OpenCL memory objects created from external +memory handles, call the function + +include::{generated}/api/protos/clEnqueueReleaseExternalMemObjectsKHR.txt[] + + * _command_queue_ specifies a valid command-queue. + * _num_mem_objects_ specifies the number of memory objects to release. + * _mem_objects_ points to a list of valid memory objects. + * _num_events_in_wait_list_ specifies the number of events in + _event_wait_list_. + * _event_wait_list_ points to the list of events that need to complete + before {clEnqueueReleaseExternalMemObjectsKHR} can be executed. + If _event_wait_list_ is `NULL`, then + {clEnqueueReleaseExternalMemObjectsKHR} does not wait on any event to + complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and that of + _command_queue_ must be the same. + * _event_ returns an event object that identifies this particular command + and can be used to query or queue a wait for this particular command to + complete. + _event_ can be `NULL` in which case it will not be possible for the + application to query the status of this command or queue a wait for this + command to complete. + +Applications must release the memory objects that are acquired using +{clEnqueueReleaseExternalMemObjectsKHR} before using them through any +commands in the other API. +This is to guarantee that the state of memory objects is up-to-date and they +are accessible to the other API. +See "`Example with Acquire / Release`" provided in +<> for more details on how to use this +API. + +If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will +trivially succeed after its event dependencies are satisfied and will update +its completion event. + +// refError + +{clEnqueueReleaseExternalMemObjectsKHR} returns {CL_SUCCESS} if the function +is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not + a `NULL` value, or if _num_mem_objects_ is greater than 0 and + _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is + not a valid OpenCL memory object created using an external memory + handle. + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if device associated with _command_queue_ is not one of the devices + specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating + one or more of _mem_objects_, or + ** if one or more of _mem_objects_ belong to a context that does not + contain a device associated with _command_queue_. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +[[external-memory-handle-types]] +=== Descriptions of External Memory Handle Types + +This section describes external memory handle types that are added by +extensions. + +Applications can import the same payload into multiple OpenCL contexts and +multiple times into a given OpenCL context. In all cases, each import +operation must create a distinct memory object. + + +==== File Descriptor Handle Types + +ifdef::cl_khr_external_memory_opaque_fd[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following new types +of handles, and adds as a property that may be specified when creating a +buffer or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file + descriptor handle that has only limited valid usage outside of OpenCL + and other compatible APIs. + It must be compatible with the POSIX system calls `dup`, `dup2`, + `close`, and the non-standard system call `dup3`. + Additionally, it must be transportable over a socket using a + `SCM_RIGHTS` control message. + It owns a reference to the underlying memory resource represented by its + memory object. +endif::cl_khr_external_memory_opaque_fd[] + +ifdef::cl_khr_external_memory_dma_buf[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following types of +handles, and adds as a property that may be specified when creating a buffer +or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} is a file descriptor for a Linux + dma_buf. + It owns a reference to the underlying memory resource represented by its + memory object. +endif::cl_khr_external_memory_dma_buf[] + +For these extensions, importing memory from a file descriptor transfers +ownership of the file descriptor from the application to the OpenCL +implementation. +The application must not perform any operations on the file descriptor after +a successful import. +The imported memory object holds a reference to its payload. + + +==== NT Handle Types + +ifdef::cl_khr_external_memory_dx[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following new types +of handles, and adds as a property that may be specified when creating a +buffer or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} specifies an NT handle + returned by `IDXGIResource1::CreateSharedHandle` referring to a Direct3D + 10 or 11 texture resource. + It owns a reference to the memory used by the Direct3D resource. + * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} specifies a global + share handle returned by `IDXGIResource::GetSharedHandle` referring to a + Direct3D 10 or 11 texture resource. + It does not own a reference to the underlying Direct3D resource, and + will therefore become invalid when all memory objects and Direct3D + resources associated with it are destroyed. + * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} specifies an NT handle + returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D + 12 heap resource. + It owns a reference to the resources used by the Direct3D heap. + * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} specifies an NT handle + returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D + 12 committed resource. + It owns a reference to the memory used by the Direct3D resource. +endif::cl_khr_external_memory_dx[] + +ifdef::cl_khr_external_memory_win32[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following new types +of handles, and adds as a property that may be specified when creating a +buffer or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that + has only limited valid usage outside of OpenCL and other compatible + APIs. + It must be compatible with the functions `DuplicateHandle`, + `CloseHandle`, `CompareObjectHandles`, `GetHandleInformation`, and + `SetHandleInformation`. + It owns a reference to the underlying memory resource represented by its + memory object. + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global + share handle that has only limited valid usage outside of OpenCL and + other compatible APIs. + It is not compatible with any native APIs. + It does not own a reference to the underlying memory resource + represented by its memory object, and will therefore become invalid when + all memory objects associated with it are destroyed. +endif::cl_khr_external_memory_win32[] + +For these extensions, importing memory object payloads from Windows handles +does not transfer ownership of the handle to the OpenCL implementation. +For handle types defined as NT handles, the application must release handle +ownership using the CloseHandle system call when the handle is no longer +needed. +For handle types defined as NT handles, the imported memory object holds a +reference to its payload. + +Note: Non-NT handle import operations do not add a reference to their +associated payload. +If the original object owning the payload is destroyed, all resources and +handles sharing that payload will become invalid. + +endif::cl_khr_external_memory[] + + +[[unmapping-mapped-memory]] +=== Unmapping Mapped Memory Objects + +[open,refpage='clEnqueueUnmapMemObject',desc='Enqueues a command to unmap a previously mapped region of a memory object.',type='protos'] +-- +To enqueue a command to unmap a previously mapped region of a memory object, +call the function + +include::{generated}/api/protos/clEnqueueUnmapMemObject.txt[] +include::{generated}/api/version-notes/clEnqueueUnmapMemObject.asciidoc[] + + * _command_queue_ must be a valid host command-queue. + * _memobj_ is a valid memory (buffer or image) object. + The OpenCL context associated with _command_queue_ and _memobj_ must be the + same. + * _mapped_ptr_ is the host address returned by a previous call to + {clEnqueueMapBuffer}, or {clEnqueueMapImage} for _memobj_. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before {clEnqueueUnmapMemObject} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueUnmapMemObject} does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +Reads or writes from the host using the pointer returned by +{clEnqueueMapBuffer} or {clEnqueueMapImage} are considered to be complete. + +{clEnqueueMapBuffer} and {clEnqueueMapImage} increment the mapped count of +the memory object. +The initial mapped count value of the memory object is zero. +Multiple calls to {clEnqueueMapBuffer}, or {clEnqueueMapImage} on the same +memory object will increment this mapped count by appropriate number of +calls. +{clEnqueueUnmapMemObject} decrements the mapped count of the memory object. + +{clEnqueueMapBuffer}, and {clEnqueueMapImage} act as synchronization points +for a region of the buffer object being mapped. + +// refError + +{clEnqueueUnmapMemObject} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid memory object or is a pipe object. * {CL_INVALID_VALUE} if _mapped_ptr_ is not a valid pointer returned by {clEnqueueMapBuffer} or {clEnqueueMapImage} for _memobj_. @@ -3913,7 +5745,7 @@ Otherwise, it returns one of the following errors: [[accessing-mapped-regions]] -=== Accessing mapped regions of a memory object +=== Accessing Mapped Regions of a Memory Object This section describes the behavior of OpenCL commands that access mapped regions of a memory object. @@ -3961,8 +5793,8 @@ pointer that has been unmapped is undefined. The mapped pointer returned by {clEnqueueMapBuffer} or {clEnqueueMapImage} can be used as the _ptr_ argument value to {clEnqueueReadBuffer}, -{clEnqueueWriteBuffer}, {clEnqueueReadBufferRect}, -{clEnqueueWriteBufferRect}, {clEnqueueReadImage}, or +{clEnqueueWriteBuffer}, {clEnqueueReadBufferRect}, +{clEnqueueWriteBufferRect}, {clEnqueueReadImage}, or {clEnqueueWriteImage} provided the rules described above are adhered to. @@ -4246,13 +6078,51 @@ include::{generated}/api/version-notes/CL_MEM_PROPERTIES.asciidoc[] return the values specified in the properties argument in the same order and without including additional properties. - If _memobj_ was created using {clCreateBuffer}, + If _memobj_ was created using {clCreateBuffer}, {clCreateSubBuffer}, {clCreateImage}, {clCreateImage2D}, or {clCreateImage3D}, or if the _properties_ argument specified in {clCreateBufferWithProperties} or {clCreateImageWithProperties} was `NULL`, the implementation must return _param_value_size_ret_ equal to 0, indicating that there are no properties to be returned. + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR.asciidoc[] + | {cl_dx9_media_adapter_type_khr_TYPE} + | If _memobj_ was created using {clCreateFromDX9MediaSurfaceKHR}, + returns the _adapter_type_ argument specified when _memobj_ was + created. +| {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR.asciidoc[] + | {cl_dx9_surface_info_khr_TYPE} + | If _memobj_ was created using {clCreateFromDX9MediaSurfaceKHR}, + returns the _surface_info_ argument specified when _memobj_ was + created. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_MEM_D3D10_RESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_D3D10_RESOURCE_KHR.asciidoc[] + | `ID3D10Resource *` + | If _memobj_ was created using {clCreateFromD3D10BufferKHR}, + {clCreateFromD3D10Texture2DKHR}, or {clCreateFromD3D10Texture3DKHR}, + returns the _resource_ argument specified when _memobj_ was created. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_MEM_D3D11_RESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_D3D11_RESOURCE_KHR.asciidoc[] + | `ID3D11Resource *` + | If _memobj_ was created using {clCreateFromD3D11BufferKHR}, + {clCreateFromD3D11Texture2DKHR}, or {clCreateFromD3D11Texture3DKHR}, + returns the _resource_ argument specified when _memobj_ was created. +endif::cl_khr_d3d11_sharing[] + |==== // refError @@ -4270,1501 +6140,1844 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. --- +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: -== Shared Virtual Memory + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is + {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and _memobj_ was not created by + calling {clCreateFromDX9MediaSurfaceKHR} from a Direct3D9 surface. +endif::cl_khr_dx9_media_sharing[] -NOTE: Shared virtual memory is <> version 2.0. +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: -Shared virtual memory (a.k.a. SVM) allows the host and kernels executing on -devices to directly share complex, pointer-containing data structures such as -trees and linked lists. -It also eliminates the need to marshal data between the host and devices. -As a result, SVM substantially simplifies OpenCL programming and may improve -performance. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is + {CL_MEM_D3D10_RESOURCE_KHR} and _memobj_ was not created by calling + {clCreateFromD3D10BufferKHR}, {clCreateFromD3D10Texture2DKHR}, or + {clCreateFromD3D10Texture3DKHR}. +endif::cl_khr_d3d10_sharing[] +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: -=== SVM sharing granularity: coarse- and fine- grained sharing + * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is + {CL_MEM_D3D11_RESOURCE_KHR} and _memobj_ was not created by calling + {clCreateFromD3D11BufferKHR}, {clCreateFromD3D11Texture2DKHR}, or + {clCreateFromD3D11Texture3DKHR}. +endif::cl_khr_d3d11_sharing[] -OpenCL maintains memory consistency in a coarse-grained fashion in regions -of buffers. -We call this coarse-grained sharing. -Many platforms such as those with integrated CPU-GPU processors and ones -using the SVM-related PCI-SIG IOMMU services can do better, and can support -sharing at a granularity smaller than a buffer. -We call this fine-grained sharing. +-- - * Coarse-grained sharing: Coarse-grain sharing may be used for memory and - virtual pointer sharing between multiple devices as well as between the - host and one or more devices. - The shared memory region is a memory buffer allocated using - {clSVMAlloc}. - Memory consistency is guaranteed at synchronization points and the host - can use calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} or create a - {cl_mem_TYPE} buffer object using the SVM pointer and use OpenCL's existing host - API functions {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} to - update regions of the buffer. - What coarse-grain buffer SVM adds to OpenCL's earlier buffer support are - the ability to share virtual memory pointers and a guarantee that - concurrent access to the same memory allocation from multiple kernels on - a single device is valid. - The coarse-grain buffer SVM provides a memory consistency model similar - to the global memory consistency model described in _sections 3.3.1_ and - _3.4.3_ of the OpenCL 1.2 specification. - This memory consistency applies to the regions of buffers being shared - in a coarse-grained fashion. - It is enforced at the synchronization points between commands enqueued - to command-queues in a single context with the additional consideration - that multiple kernels concurrently running on the same device may safely - share the data. - * Fine-grained sharing: Shared virtual memory where memory consistency is - maintained at a granularity smaller than a buffer. - How fine-grained SVM is used depends on whether the device supports SVM - atomic operations. - ** If SVM atomic operations are supported, they provide memory consistency - for loads and stores by the host and kernels executing on devices - supporting SVM. - This means that the host and devices can concurrently read and update - the same memory. - The consistency provided by SVM atomics is in addition to the - consistency provided at synchronization points. - There is no need for explicit calls to {clEnqueueSVMMap} and - {clEnqueueSVMUnmap} or {clEnqueueMapBuffer} and - {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} buffer object created using the - SVM pointer. - ** If SVM atomic operations are not supported, the host and devices can - concurrently read the same memory locations and can concurrently update - non-overlapping memory regions, but attempts to update the same memory - locations are undefined. - Memory consistency is guaranteed at synchronization points without the - need for explicit calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} - or {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} - buffer object created using the SVM pointer. - * There are two kinds of fine-grain sharing support. - Devices may support either fine-grain buffer sharing or fine-grain - system sharing. - ** Fine-grain buffer sharing provides fine-grain SVM only within buffers - and is an extension of coarse-grain sharing. - To support fine-grain buffer sharing in an OpenCL context, all devices - in the context must support {CL_DEVICE_SVM_FINE_GRAIN_BUFFER}. - ** Fine-grain system sharing enables fine-grain sharing of the host's - entire virtual memory, including memory regions allocated by the system - *malloc* API. - OpenCL buffer objects are unnecessary and programmers can pass pointers - allocated using *malloc* to OpenCL kernels. -As an illustration of fine-grain SVM using SVM atomic operations to maintain -memory consistency, consider the following example. -The host and a set of devices can simultaneously access and update a shared -work-queue data structure holding work-items to be done. -The host can use atomic operations to insert new work-items into the queue -at the same time as the devices using similar atomic operations to remove -work-items for processing. +ifdef::cl_khr_dx9_media_sharing[] +=== Querying Media Surface Properties of Memory Objects Created From DirectX 9 Media Surfaces -It is the programmer's responsibility to ensure that no host code or -executing kernels attempt to access a shared memory region after that memory -is freed. -We require the SVM implementation to work with either 32- or 64- bit host -applications subject to the following requirement: the address space size -must be the same for the host and all OpenCL devices in the context. +Properties of media surface objects may be queried using {clGetMemObjectInfo} +and {clGetImageInfo} with _param_name_ {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR}, +{CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and {CL_IMAGE_DX9_MEDIA_PLANE_KHR}. +endif::cl_khr_dx9_media_sharing[] -[open,refpage='clSVMAlloc',desc='Allocates a shared virtual memory (SVM) buffer that can be shared by the host and all devices in an OpenCL context that support shared virtual memory.',type='protos'] --- -To allocate a shared virtual memory buffer (referred to as a SVM buffer) -that can be shared by the host and all devices in an OpenCL context that -support shared virtual memory, call the function -include::{generated}/api/protos/clSVMAlloc.txt[] -include::{generated}/api/version-notes/clSVMAlloc.asciidoc[] +ifdef::cl_khr_d3d10_sharing[] +=== Querying Direct3D Properties of Memory Objects Created From Direct3D 10 Resources - * _context_ is a valid OpenCL context used to create the SVM buffer. - * _flags_ is a bit-field that is used to specify allocation and usage - information. - The <> table describes the possible values - for _flags_. - * _size_ is the size in bytes of the SVM buffer to be allocated. - * _alignment_ is the minimum alignment in bytes that is required for the newly - created buffers memory region. - It must be a power of two up to the largest data type supported by the - OpenCL device. - For the full profile, the largest data type is long16. - For the embedded profile, it is long16 if the device supports 64-bit - integers; otherwise it is int16. - If alignment is 0, a default alignment will be used that is equal to the - size of largest data type supported by the OpenCL implementation. +Properties of Direct3D 10 objects may be queried using {clGetMemObjectInfo} +and {clGetImageInfo} with _param_name_ {CL_MEM_D3D10_RESOURCE_KHR} and +{CL_IMAGE_D3D10_SUBRESOURCE_KHR} respectively. +endif::cl_khr_d3d10_sharing[] -[[svm-flags-table]] -.List of supported SVM memory flag values -[width="100%",cols="<50%,<50%",options="header"] -|==== -| SVM Memory Flags | Description -| {CL_MEM_READ_WRITE} - | This flag specifies that the SVM buffer will be read and written by a - kernel. - This is the default. -| {CL_MEM_WRITE_ONLY} - | This flag specifies that the SVM buffer will be written but not read by - a kernel. - Reading from a SVM buffer created with {CL_MEM_WRITE_ONLY} inside a kernel - is undefined. +ifdef::cl_khr_d3d11_sharing[] +=== Querying Direct3D Properties of Memory Objects Created From Direct3D 11 Resources - {CL_MEM_READ_WRITE} and {CL_MEM_WRITE_ONLY} are mutually exclusive. -| {CL_MEM_READ_ONLY} - | This flag specifies that the SVM buffer object is a read-only memory - object when used inside a kernel. +Properties of Direct3D 11 objects may be queried using {clGetMemObjectInfo} +and {clGetImageInfo} with _param_name_ {CL_MEM_D3D11_RESOURCE_KHR} and +{CL_IMAGE_D3D11_SUBRESOURCE_KHR} respectively. +endif::cl_khr_d3d11_sharing[] - Writing to a SVM buffer created with {CL_MEM_READ_ONLY} inside a kernel is - undefined. - {CL_MEM_READ_WRITE} or {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_ONLY} are mutually - exclusive. -| {CL_MEM_SVM_FINE_GRAIN_BUFFER_anchor} +ifdef::cl_khr_gl_sharing[] +=== Querying OpenGL Object Information From an OpenCL Memory Object -include::{generated}/api/version-notes/CL_MEM_SVM_FINE_GRAIN_BUFFER.asciidoc[] - | This specifies that the application wants the OpenCL implementation to - do a fine-grained allocation. -| {CL_MEM_SVM_ATOMICS_anchor} +[open,refpage='clGetGLObjectInfo',desc='Query OpenGL object and object type used to create an OpenCL memory object',type='protos'] +-- +To query the OpenGL object and object type used to create an OpenCL memory +object, call the function -include::{generated}/api/version-notes/CL_MEM_SVM_ATOMICS.asciidoc[] - | This flag is valid only if {CL_MEM_SVM_FINE_GRAIN_BUFFER} is specified in - flags. - It is used to indicate that SVM atomic operations can control visibility - of memory accesses in this SVM buffer. -|==== +include::{generated}/api/protos/clGetGLObjectInfo.txt[] +include::{generated}/api/version-notes/clGetGLObjectInfo.asciidoc[] -If {CL_MEM_SVM_FINE_GRAIN_BUFFER} is not specified, the buffer can be created -as a coarse grained SVM allocation. -Similarly, if {CL_MEM_SVM_ATOMICS} is not specified, the buffer can be created -without support for SVM atomic operations (refer to an OpenCL kernel -language specifications). + * _memobj_ is the memory object to query. + * _gl_object_type_ returns the type of OpenGL object attached to _memobj_ + and can be {CL_GL_OBJECT_BUFFER}, {CL_GL_OBJECT_TEXTURE2D}, + {CL_GL_OBJECT_TEXTURE3D}, {CL_GL_OBJECT_TEXTURE2D_ARRAY}, + {CL_GL_OBJECT_TEXTURE1D}, {CL_GL_OBJECT_TEXTURE1D_ARRAY}, + {CL_GL_OBJECT_TEXTURE_BUFFER}, or {CL_GL_OBJECT_RENDERBUFFER}. + If _gl_object_type_ is `NULL`, it is ignored + * _gl_object_name_ returns the OpenGL object name used to create _memobj_. + If _gl_object_name_ is `NULL`, it is ignored. -Calling {clSVMAlloc} does not itself provide consistency for the shared -memory region. -When the host cannot use the SVM atomic operations, it must rely on OpenCL's -guaranteed memory consistency at synchronization points. +// refError -For SVM to be used efficiently, the host and any devices sharing a buffer -containing virtual memory pointers should have the same endianness. -If the context passed to {clSVMAlloc} has devices with mixed endianness and -the OpenCL implementation is unable to implement SVM because of that mixed -endianness, {clSVMAlloc} will fail and return `NULL`. - -Although SVM is generally not supported for image objects, {clCreateImage} -and {clCreateImageWithProperties} -may create an image from a buffer (a 1D image from a buffer or a 2D image -from buffer) if the buffer specified in its image description parameter is a -SVM buffer. -Such images have a linear memory representation so their memory can be -shared using SVM. -However, fine grained sharing and atomics are not supported for image reads -and writes in a kernel. - -// refError - -{clSVMAlloc} returns a valid non-`NULL` shared virtual memory address if the -SVM buffer is successfully allocated. -Otherwise, like *malloc*, it returns a `NULL` pointer value. -{clSVMAlloc} will fail if +{clGetGLObjectInfo} returns {CL_SUCCESS} if the call was executed +successfully. +Otherwise, it returns one of the following errors: - * _context_ is not a valid context, or no devices in _context_ support SVM. - * _flags_ does not contain {CL_MEM_SVM_FINE_GRAIN_BUFFER} but does contain - {CL_MEM_SVM_ATOMICS}. - * Values specified in _flags_ do not follow rules described for supported - values in the <> table. - * {CL_MEM_SVM_FINE_GRAIN_BUFFER} or {CL_MEM_SVM_ATOMICS} is specified in - _flags_ and these are not supported by at least one device in _context_. - * The values specified in _flags_ are not valid, i.e. don't match those - defined in the <> table. - * _size_ is 0 or > {CL_DEVICE_MAX_MEM_ALLOC_SIZE} value for any device in - _context_. - * _alignment_ is not a power of two or the OpenCL implementation cannot - support the specified alignment for at least one device in _context_. - * There was a failure to allocate resources. + * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid OpenCL memory object. + * {CL_INVALID_GL_OBJECT} if there is no OpenGL object associated with + _memobj_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -[open,refpage='clSVMFree',desc='Frees a shared virtual memory buffer allocated using clSVMAlloc.',type='protos'] +[open,refpage='clGetGLTextureInfo',desc='Query additional information about the OpenGL texture object associated with an OpenCL memory object',type='protos'] -- -To free a shared virtual memory buffer allocated using {clSVMAlloc}, call -the function +To query additional information about the OpenGL texture object associated +with an OpenCL memory object, call the function -include::{generated}/api/protos/clSVMFree.txt[] -include::{generated}/api/version-notes/clSVMFree.asciidoc[] +include::{generated}/api/protos/clGetGLTextureInfo.txt[] +include::{generated}/api/version-notes/clGetGLTextureInfo.asciidoc[] - * _context_ is a valid OpenCL context used to create the SVM buffer. - If no devices in _context_ support SVM, no action occurs. - * _svm_pointer_ must be the value returned by a call to {clSVMAlloc}. - If a `NULL` pointer is passed in _svm_pointer_, no action occurs. + * _memobj_ is the memory object to query. + * _param_name_ specifies what additional information about the OpenGL + texture object associated with _memobj_ to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetGLTextureInfo} is described in the table below. + * _param_value_ is a pointer to memory where the result being queried is + returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory + pointed to by _param_value_. + This size must be >= size of return type as described in the table + below. + * _param_value_size_ret_ returns the actual size in bytes of data copied + to _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. -Note that {clSVMFree} does not wait for previously enqueued commands that -may be using _svm_pointer_ to finish before freeing _svm_pointer_. -It is the responsibility of the application to make sure that enqueued -commands that use _svm_pointer_ have finished before freeing _svm_pointer_. -This can be done by enqueuing a blocking operation such as {clFinish}, -{clWaitForEvents}, {clEnqueueReadBuffer} or by registering a callback with -the events associated with enqueued commands and when the last enqueued -command has finished freeing _svm_pointer_. +[[gl-texture-info-queries-table]] +.OpenGL texture info that may be queried with {clGetGLTextureInfo} +[cols=",,",options="header",] +|==== +| {cl_gl_texture_info} | Return Type | Info. Returned in _param_value_ +| {CL_GL_TEXTURE_TARGET_anchor} + +include::{generated}/api/version-notes/CL_GL_TEXTURE_TARGET.asciidoc[] + | `GLenum` + | The _texture_target_ argument specified in {clCreateFromGLTexture}. +| {CL_GL_MIPMAP_LEVEL_anchor} + +include::{generated}/api/version-notes/CL_GL_MIPMAP_LEVEL.asciidoc[] + | `GLint` + | The _miplevel_ argument specified in {clCreateFromGLTexture}. +ifdef::cl_khr_gl_msaa_sharing[] +| {CL_GL_NUM_SAMPLES_anchor} + +include::{generated}/api/version-notes/CL_GL_NUM_SAMPLES.asciidoc[] + | `GLsizei` + | The _samples_ argument passed to `glTexImage2DMultisample` or + `glTexImage3DMultisample`. + + If _image_ is not a MSAA texture, 1 is returned. +endif::cl_khr_gl_msaa_sharing[] +|==== -The behavior of using _svm_pointer_ after it has been freed is undefined. -In addition, if a buffer object is created using {clCreateBuffer} or -{clCreateBufferWithProperties} with _svm_pointer_, the buffer object must -first be released before the _svm_pointer_ is freed. +// refError -The {clEnqueueSVMFree} API can also be used to enqueue a callback to free -the shared virtual memory buffer allocated using {clSVMAlloc} or a shared -system memory pointer. +{clGetGLTextureInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid OpenCL memory object. + * {CL_INVALID_GL_OBJECT} if there is no OpenGL texture object associated + with _memobj_. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is less than the size of the return type + as described in the table above and _param_value_ is not `NULL`, or if + _param_value_ and _param_value_size_ret_ are `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- +endif::cl_khr_gl_sharing[] -[open,refpage='clEnqueueSVMFree',desc='Enqueues a command to free shared virtual memory allocated using clSVMAlloc or a shared system memory pointer.',type='protos'] + +ifdef::cl_khr_dx9_media_sharing[] +=== Sharing Memory Objects Created From Media Surfaces Between a Media Adapter and OpenCL + +[open,refpage='clEnqueueAcquireDX9MediaSurfacesKHR',desc='Acquire OpenCL memory objects created from a media surface',type='protos'] -- -To enqueue a command to free the shared virtual memory allocated using -{clSVMAlloc} or a shared system memory pointer, call the function +To acquire OpenCL memory objects that have been created from a media +surface, call the function -include::{generated}/api/protos/clEnqueueSVMFree.txt[] -include::{generated}/api/version-notes/clEnqueueSVMFree.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireDX9MediaSurfacesKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireDX9MediaSurfacesKHR.asciidoc[] - * _command_queue_ is a valid host command-queue. - * _svm_pointers_ and _num_svm_pointers_ specify shared virtual memory pointers - to be freed. - Each pointer in _svm_pointers_ that was allocated using {clSVMAlloc} must - have been allocated from the same context from which _command_queue_ was - created. - The memory associated with _svm_pointers_ can be reused or freed after the - function returns. - * _pfn_free_func_ specifies the callback function to be called to free the SVM - pointers. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - _pfn_free_func_ takes four arguments: _queue_ which is the command-queue in - which {clEnqueueSVMFree} was enqueued, the count and list of SVM pointers to - free and _user_data_ which is a pointer to user specified data. - If _pfn_free_func_ is `NULL`, all pointers specified in _svm_pointers_ must - be allocated using {clSVMAlloc} and the OpenCL implementation will free - these SVM pointers. - _pfn_free_func_ must be a valid callback function if any SVM pointer to be - freed is a shared system memory pointer i.e. not allocated using - {clSVMAlloc}. - If _pfn_free_func_ is a valid callback function, the OpenCL implementation - will call _pfn_free_func_ to free all the SVM pointers specified in - _svm_pointers_. - * _user_data_ will be passed as the _user_data_ argument when _pfn_free_func_ - is called. - _user_data_ can be `NULL`. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before {clEnqueueSVMFree} can be executed. - If _event_wait_list_ is `NULL`, then {clEnqueueSVMFree} does not wait on any - event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from media surfaces. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -// refError +The media surfaces are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. + +OpenCL memory objects created from media surfaces must be acquired before +they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a media surface is used while it is +not currently acquired by OpenCL, the call attempting to use that OpenCL +memory object will return {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR}. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueAcquireDX9MediaSurfacesKHR} provides the +synchronization guarantee that any media adapter API calls involving the +interop device(s) used in the OpenCL context made before +{clEnqueueAcquireDX9MediaSurfacesKHR} is called will complete executing +before _event_ reports completion and before the execution of any subsequent +OpenCL work issued in _command_queue_ begins. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any media adapter API calls involving the interop +device(s) used in the OpenCL context made before +{clEnqueueAcquireDX9MediaSurfacesKHR} is called have completed before +calling {clEnqueueAcquireDX9MediaSurfacesKHR} *.* -{clEnqueueSVMFree} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +// refError - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function +is executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from media surfaces. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_VALUE} if _num_svm_pointers_ is 0 and _svm_pointers_ is - non-`NULL`, _or_ if _svm_pointers_ is `NULL` and _num_svm_pointers_ is - not 0. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a device that can share the media surface referenced by + _mem_objects_. + * {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR} if memory objects in + _mem_objects_ have previously been acquired using + {clEnqueueAcquireDX9MediaSurfacesKHR} but have not been released using + {clEnqueueReleaseDX9MediaSurfacesKHR}. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueSVMMemcpy',desc='Enqueues a command to do a memcpy operation.',type='protos'] +[open,refpage='clEnqueueReleaseDX9MediaSurfacesKHR',desc='Release OpenCL memory objects created from a media surface',type='protos'] -- -To enqueue a command to do a memcpy operation, call the function +To release OpenCL memory objects that have been created from media surfaces, +call the function -include::{generated}/api/protos/clEnqueueSVMMemcpy.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMemcpy.asciidoc[] +include::{generated}/api/protos/clEnqueueReleaseDX9MediaSurfacesKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseDX9MediaSurfacesKHR.asciidoc[] - * _command_queue_ refers to the host command-queue in which the read / write - command will be queued. - If either _dst_ptr_ or _src_ptr_ is allocated using {clSVMAlloc} then the - OpenCL context allocated against must match that of _command_queue_. - * _blocking_copy_ indicates if the copy operation is _blocking_ or - _non-blocking_. - * If _blocking_copy_ is {CL_TRUE} i.e. the copy command is blocking, - {clEnqueueSVMMemcpy} does not return until the buffer data has been copied - into memory pointed to by _dst_ptr_. - * _size_ is the size in bytes of data being copied. - * _dst_ptr_ is the pointer to a host or SVM memory allocation where data is - copied to. - * _src_ptr_ is the pointer to a host or SVM memory allocation where data is - copied from. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from media surfaces. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this read / write command - and can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -If _blocking_copy_ is {CL_FALSE} i.e. the copy command is non-blocking, -{clEnqueueSVMMemcpy} queues a non-blocking copy command and returns. -The contents of the buffer that _dst_ptr_ points to cannot be used until the -copy command has completed. -The _event_ argument returns an event object which can be used to query the -execution status of the read command. -When the copy command has completed, the contents of the buffer that -_dst_ptr_ points to can be used by the application. - -If the memory allocation(s) containing _dst_ptr_ and/or _src_ptr_ are -allocated using {clSVMAlloc} and either is not allocated from the same -context from which _command_queue_ was created the behavior is undefined. +The media surfaces are released by the OpenCL context associated with +_command_queue_. + +OpenCL memory objects created from media surfaces which have been acquired +by OpenCL must be released by OpenCL before they may be accessed by the +media adapter API. +Accessing a media surface while its corresponding OpenCL memory object is +acquired is in error and will result in undefined behavior, including but +not limited to possible OpenCL errors, data corruption, and program +termination. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueReleaseDX9MediaSurfacesKHR} provides the +synchronization guarantee that any calls to media adapter APIs involving the +interop device(s) used in the OpenCL context made after the call to +{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after +all events in _event_wait_list_ are complete and all work already submitted +to _command_queue_ completes execution. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any media adapter API calls involving the interop +device(s) used in the OpenCL context made after +{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after +event returned by {clEnqueueReleaseDX9MediaSurfacesKHR} reports completion. // refError -{clEnqueueSVMMemcpy} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueReleaseDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function +is executed successfully. +If _num_objects_ is 0 and <__mem_objects__> is `NULL` the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from valid media surfaces. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - events in _event_wait_list_ are not the same. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a media object. + * {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR} if memory objects in + _mem_objects_ have not previously been acquired using + {clEnqueueAcquireDX9MediaSurfacesKHR}, or have been released using + {clEnqueueReleaseDX9MediaSurfacesKHR} since the last time that they were + acquired. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the copy operation is - blocking and the execution status of any of the events in - _event_wait_list_ is a negative integer value. - * {CL_INVALID_VALUE} if _dst_ptr_ or _src_ptr_ is `NULL`. - * {CL_MEM_COPY_OVERLAP} if the values specified for _dst_ptr_, _src_ptr_ and - _size_ result in an overlapping copy. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + __num_events_in_wait_list__> is 0, or if event objects in + _event_wait_list_ are not valid events. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_dx9_media_sharing[] -[open,refpage='clEnqueueSVMMemFill',desc='Enqueues a command to fill a region in memory with a pattern of a given pattern size.',type='protos'] + +ifdef::cl_khr_d3d10_sharing[] +=== Sharing Memory Objects Created From Direct3D 10 Resources Between Direct3D 10 and OpenCL Contexts + +[open,refpage='clEnqueueAcquireD3D10ObjectsKHR',desc='Acquire OpenCL memory objects created from Direct3D 10 resources',type='protos'] -- -To enqueue a command to fill a region in memory with a pattern of a given -pattern size, call the function +To acquire OpenCL memory objects that have been created from Direct3D 10 +resources, call the function -include::{generated}/api/protos/clEnqueueSVMMemFill.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMemFill.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireD3D10ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireD3D10ObjectsKHR.asciidoc[] - * _command_queue_ refers to the host command-queue in which the fill command - will be queued. - The OpenCL context associated with _command_queue_ and SVM pointer referred - to by _svm_ptr_ must be the same. - * _svm_ptr_ is a pointer to a memory region that will be filled with - _pattern_. - It must be aligned to _pattern_size_ bytes. - If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from - the same context from which _command_queue_ was created. - Otherwise the behavior is undefined. - * _pattern_ is a pointer to the data pattern of size _pattern_size_ in bytes. - _pattern_ will be used to fill a region in _buffer_ starting at _svm_ptr_ - and is _size_ bytes in size. - The data pattern must be a scalar or vector integer or floating-point data - type supported by OpenCL as described in <> and <>. - For example, if region pointed to by _svm_ptr_ is to be filled with a - pattern of float4 values, then _pattern_ will be a pointer to a cl_float4 - value and _pattern_size_ will be `sizeof(cl_float4)`. - The maximum value of _pattern_size_ is the size of the largest integer or - floating-point vector data type supported by the OpenCL device. - The memory associated with _pattern_ can be reused or freed after the - function returns. - * _size_ is the size in bytes of region being filled starting with _svm_ptr_ - and must be a multiple of _pattern_size_. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 10 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -// refError +The Direct3D 10 objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. + +OpenCL memory objects created from Direct3D 10 resources must be acquired +before they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a Direct3D 10 resource is used while +it is not currently acquired by OpenCL, the behavior is undefined. +Implementations may fail the execution of commands attempting to use that +OpenCL memory object and set their associated event's execution status to +{CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR}. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueAcquireD3D10ObjectsKHR} provides the +synchronization guarantee that any Direct3D 10 calls involving the interop +device(s) used in the OpenCL context made before +{clEnqueueAcquireD3D10ObjectsKHR} is called will complete executing before +_event_ reports completion and before the execution of any subsequent OpenCL +work issued in _command_queue_ begins. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 10 calls involving the interop device(s) used +in the OpenCL context made before {clEnqueueAcquireD3D10ObjectsKHR} is +called have completed before calling {clEnqueueAcquireD3D10ObjectsKHR}. -{clEnqueueSVMMemFill} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +// refError - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 10 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - events in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. - * {CL_INVALID_VALUE} if _svm_ptr_ is not aligned to _pattern_size_ bytes. - * {CL_INVALID_VALUE} if _pattern_ is `NULL` or if _pattern_size_ is 0 or if - _pattern_size_ is not one of {1, 2, 4, 8, 16, 32, 64, 128}. - * {CL_INVALID_VALUE} if _size_ is not a multiple of _pattern_size_. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an Direct3D 10 context. + * {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in + _mem_objects_ have previously been acquired using + {clEnqueueAcquireD3D10ObjectsKHR} but have not been released using + {clEnqueueReleaseD3D10ObjectsKHR}. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueSVMMap',desc='Enqueues a command that will allow the host to update a region of a SVM buffer',type='protos'] +[open,refpage='clEnqueueReleaseD3D10ObjectsKHR',desc='Release OpenCL memory objects created from Direct3D 10 resources',type='protos'] -- -To enqueue a command that will allow the host to update a region of a SVM -buffer, call the function +To release OpenCL memory objects that have been created from Direct3D 10 +resources, call the function -include::{generated}/api/protos/clEnqueueSVMMap.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMap.asciidoc[] +include::{generated}/api/protos/clEnqueueReleaseD3D10ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseD3D10ObjectsKHR.asciidoc[] - * _command_queue_ must be a valid host command-queue. - * _blocking_map_ indicates if the map operation is _blocking_ or - _non-blocking_. - * _map_flags_ is a bit-field and is described in the - <> table. - * _svm_ptr_ and _size_ are a pointer to a memory region and size in bytes that - will be updated by the host. - If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from - the same context from which _command_queue_ was created. - Otherwise the behavior is undefined. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 10 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -If _blocking_map_ is {CL_TRUE}, {clEnqueueSVMMap} does not return until the -application can access the contents of the SVM region specified by _svm_ptr_ -and _size_ on the host. - -If _blocking_map_ is {CL_FALSE} i.e. map operation is non-blocking, the region -specified by _svm_ptr_ and _size_ cannot be used until the map command has -completed. -The _event_ argument returns an event object which can be used to query the -execution status of the map command. -When the map command is completed, the application can access the contents -of the region specified by _svm_ptr_ and _size_. - -Note that since we are enqueuing a command with a SVM buffer, the region is -already mapped in the host address space. +The Direct3D 10 objects are released by the OpenCL context associated with +_command_queue_. + +OpenCL memory objects created from Direct3D 10 resources which have been +acquired by OpenCL must be released by OpenCL before they may be accessed by +Direct3D 10. +Accessing a Direct3D 10 resource while its corresponding OpenCL memory +object is acquired is in error and will result in undefined behavior, +including but not limited to possible OpenCL errors, data corruption, and +program termination. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueReleaseD3D10ObjectsKHR} provides the +synchronization guarantee that any calls to Direct3D 10 calls involving the +interop device(s) used in the OpenCL context made after the call to +{clEnqueueReleaseD3D10ObjectsKHR} will not start executing until after all +events in _event_wait_list_ are complete and all work already submitted to +_command_queue_ completes execution. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 10 calls involving the interop device(s) used +in the OpenCL context made after {clEnqueueReleaseD3D10ObjectsKHR} will not +start executing until after event returned by +{clEnqueueReleaseD3D10ObjectsKHR} reports completion. // refError -{clEnqueueSVMMap} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueReleaseD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 10 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. - * {CL_INVALID_VALUE} if _size_ is 0 or if values specified in _map_flags_ - are not valid. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a Direct3D 10 device. + * {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ + have not previously been acquired using + {clEnqueueAcquireD3D10ObjectsKHR}, or have been released using + {clEnqueueReleaseD3D10ObjectsKHR} since the last time that they were + acquired. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the map operation is - blocking and the execution status of any of the events in - _event_wait_list_ is a negative integer value. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + __num_events_in_wait_list__> is 0, or if event objects in + _event_wait_list_ are not valid events. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_d3d10_sharing[] -[open,refpage='clEnqueueSVMUnmap',desc='Enqueues a command to indicate that the host has completed updating the region given by an SVM pointer and which was specified in a previous call to clEnqueueSVMMap.',type='protos'] + +ifdef::cl_khr_d3d11_sharing[] +=== Sharing Memory Objects Created From Direct3D 11 Resources Between Direct3D 11 and OpenCL Contexts + +[open,refpage='clEnqueueAcquireD3D11ObjectsKHR',desc='Acquire OpenCL memory objects created from Direct3D 11 resources',type='protos'] -- -To enqueue a command to indicate that the host has completed updating the -region given by _svm_ptr_ and which was specified in a previous call to -{clEnqueueSVMMap}, call the function +To acquire OpenCL memory objects that have been created from Direct3D 11 +resources, call the function -include::{generated}/api/protos/clEnqueueSVMUnmap.txt[] -include::{generated}/api/version-notes/clEnqueueSVMUnmap.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireD3D11ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireD3D11ObjectsKHR.asciidoc[] - * _command_queue_ must be a valid host command-queue. - * _svm_ptr_ is a pointer that was specified in a previous call to - {clEnqueueSVMMap}. - If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from - the same context from which _command_queue_ was created. - Otherwise the behavior is undefined. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before {clEnqueueSVMUnmap} can be executed. - If _event_wait_list_ is `NULL`, then {clEnqueueSVMUnmap} does not wait on any - event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 11 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -{clEnqueueSVMMap} and {clEnqueueSVMUnmap} act as synchronization points for -the region of the SVM buffer specified in these calls. +The Direct3D 11 objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. + +OpenCL memory objects created from Direct3D 11 resources must be acquired +before they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a Direct3D 11 resource is used while +it is not currently acquired by OpenCL, the behavior is undefined. +Implementations may fail the execution of commands attempting to use that +OpenCL memory object and set their associated event's execution status to +{CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR}. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueAcquireD3D11ObjectsKHR} provides the +synchronization guarantee that any Direct3D 11 calls involving the interop +device(s) used in the OpenCL context made before +{clEnqueueAcquireD3D11ObjectsKHR} is called will complete executing before +_event_ reports completion and before the execution of any subsequent OpenCL +work issued in _command_queue_ begins. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 11 calls involving the interop device(s) used +in the OpenCL context made before {clEnqueueAcquireD3D11ObjectsKHR} is +called have completed before calling {clEnqueueAcquireD3D11ObjectsKHR}. // refError -{clEnqueueSVMUnmap} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 11 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an Direct3D 11 context. + * {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in + _mem_objects_ have previously been acquired using + {clEnqueueAcquireD3D11ObjectsKHR} but have not been released using + {clEnqueueReleaseD3D11ObjectsKHR}. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- -[NOTE] -==== -If a coarse-grained SVM buffer is currently mapped for writing, the -application must ensure that the SVM buffer is unmapped before any enqueued -kernels or commands that read from or write to this SVM buffer or any of its -associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is -undefined. +[open,refpage='clEnqueueReleaseD3D11ObjectsKHR',desc='Release OpenCL memory objects created from Direct3D 11 resources',type='protos'] +-- +To release OpenCL memory objects that have been created from Direct3D 11 +resources, call the function -If a coarse-grained SVM buffer is currently mapped for reading, the -application must ensure that the SVM buffer is unmapped before any enqueued -kernels or commands that write to this memory object or any of its -associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is -undefined. +include::{generated}/api/protos/clEnqueueReleaseD3D11ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseD3D11ObjectsKHR.asciidoc[] -A SVM buffer is considered as mapped if there are one or more active -mappings for the SVM buffer irrespective of whether the mapped regions span -the entire SVM buffer. + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 11 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -The above note does not apply to fine-grained SVM buffers (fine-grained -buffers allocated using {clSVMAlloc} or fine-grained system allocations). -==== +The Direct3D 11 objects are released by the OpenCL context associated with +_command_queue_. + +OpenCL memory objects created from Direct3D 11 resources which have been +acquired by OpenCL must be released by OpenCL before they may be accessed by +Direct3D 11. +Accessing a Direct3D 11 resource while its corresponding OpenCL memory +object is acquired is in error and will result in undefined behavior, +including but not limited to possible OpenCL errors, data corruption, and +program termination. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueReleaseD3D11ObjectsKHR} provides the +synchronization guarantee that any calls to Direct3D 11 calls involving the +interop device(s) used in the OpenCL context made after the call to +{clEnqueueReleaseD3D11ObjectsKHR} will not start executing until after all +events in _event_wait_list_ are complete and all work already submitted to +_command_queue_ completes execution. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 11 calls involving the interop device(s) used +in the OpenCL context made after {clEnqueueReleaseD3D11ObjectsKHR} will not +start executing until after event returned by +{clEnqueueReleaseD3D11ObjectsKHR} reports completion. + +// refError + +{clEnqueueReleaseD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 11 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a Direct3D 11 device. + * {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ + have not previously been acquired using + {clEnqueueAcquireD3D11ObjectsKHR}, or have been released using + {clEnqueueReleaseD3D11ObjectsKHR} since the last time that they were + acquired. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + __num_events_in_wait_list__> is 0, or if event objects in + _event_wait_list_ are not valid events. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- +endif::cl_khr_d3d11_sharing[] -[open,refpage='clEnqueueSVMMigrateMem',desc='Enqueues a command to indicate which device a set of ranges of SVM allocations should be associated with.',type='protos'] + +ifdef::cl_khr_egl_image[] +=== Sharing Memory Objects Created From EGL Resources Between EGL and OpenCL Contexts + +[open,refpage='clEnqueueAcquireEGLObjectsKHR',desc='Acquire OpenCL memory objects created from EGL resources',type='protos'] -- -To enqueue a command to indicate which device a set of ranges of SVM -allocations should be associated with, call the function +To acquire OpenCL memory objects that have been created from EGL resources, +call the function -include::{generated}/api/protos/clEnqueueSVMMigrateMem.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMigrateMem.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireEGLObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireEGLObjectsKHR.asciidoc[] - * _command_queue_ is a valid host command-queue. - The specified set of allocation ranges will be migrated to the OpenCL device - associated with _command_queue_. - * _num_svm_pointers_ is the number of pointers in the specified _svm_pointers_ - array, and the number of sizes in the _sizes_ array, if _sizes_ is not - `NULL`. - * _svm_pointers_ is a pointer to an array of pointers. - Each pointer in this array must be within an allocation produced by a call - to {clSVMAlloc}. - * _sizes_ is an array of sizes. - The pair _svm_pointers_[i] and _sizes_[i] together define the starting - address and number of bytes in a range to be migrated. - _sizes_ may be `NULL` indicating that every allocation containing any - _svm_pointer_[i] is to be migrated. - Also, if _sizes_[i] is zero, then the entire allocation containing - _svm_pointer_[i] is migrated. - * _flags_ is a bit-field that is used to specify migration options. - The <> describes the possible - values for _flags_. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from EGL resources, within the context associate with + command_queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -Once the event returned by {clEnqueueSVMMigrateMem} has become {CL_COMPLETE}, -the ranges specified by svm pointers and sizes have been successfully -migrated to the device associated with command-queue. +The EGL objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. -The user is responsible for managing the event dependencies associated with -this command in order to avoid overlapping access to SVM allocations. -Improperly specified event dependencies passed to {clEnqueueSVMMigrateMem} -could result in undefined results. +OpenCL memory objects created from EGL resources must be acquired before +they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a EGL resource is used while it is +not currently acquired by OpenCL, the behavior is undefined. +Implementations may fail the execution of commands attempting to use that +OpenCL memory object and set their associated event's execution status to +{CL_EGL_RESOURCE_NOT_ACQUIRED_KHR}. // refError -{clEnqueueSVMMigrateMem} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireEGLObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if num_objects > 0 and mem_objects is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects in the context associated with _command_queue_. + * {CL_INVALID_EGL_OBJECT_KHR} if memory objects in _mem_objects_ have not + been created from EGL resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _num_svm_pointers_ is zero or _svm_pointers_ is - `NULL`. - * {CL_INVALID_VALUE} if _sizes_[i] is non-zero range [_svm_pointers_[i], - _svm_pointers_[i]+_sizes_[i]) is not contained within an existing - {clSVMAlloc} allocation. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +[open,refpage='clEnqueueReleaseEGLObjectsKHR',desc='Release OpenCL memory objects created from EGL resources',type='protos'] +-- +To release OpenCL memory objects that have been created from EGL resources, +call the function -=== Memory consistency for SVM allocations +include::{generated}/api/protos/clEnqueueReleaseEGLObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseEGLObjectsKHR.asciidoc[] -To ensure memory consistency in SVM allocations, the program can rely on the -guaranteed memory consistency at synchronization points. -This consistency support already exists in OpenCL 1.x and can be used for -coarse-grained SVM allocations or for fine-grained buffer SVM allocations; -what SVM adds is the ability to share pointers between the host and all SVM -devices. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from EGL resources, within the context associate with + command_queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -In addition, sub-buffers can also be used to ensure that each device gets a -consistent view of a SVM buffers memory when it is shared by multiple -devices. -For example, assume that two devices share a SVM pointer. -The host can create a {cl_mem_TYPE} buffer object using {clCreateBuffer} or -{clCreateBufferWithProperties} with {CL_MEM_USE_HOST_PTR} and _host_ptr_ set -to the SVM pointer and then create two disjoint sub-buffers with starting -virtual addresses _sb1_ptr_ and _sb2_ptr_. -These pointers (_sb1_ptr_ and _sb2_ptr_) can be passed to kernels executing -on the two devices. -{clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} and the existing -<> ensure -consistency for buffer regions (_sb1_ptr_ and _sb2_ptr_) read and written by -these kernels. +The EGL objects are released by the OpenCL context associated with +. -When the host and devices are able to use SVM atomic operations (i.e. -{CL_DEVICE_SVM_ATOMICS} is set in {CL_DEVICE_SVM_CAPABILITIES}), these atomic -operations can be used to provide memory consistency at a fine grain in a -shared memory region. -The effect of these operations is visible to the host and all devices with -which that memory is shared. +OpenCL memory objects created from EGL resources which have been acquired by +OpenCL must be released by OpenCL before they may be accessed by EGL or by +EGL client APIs. +Accessing a EGL resource while its corresponding OpenCL memory object is +acquired is in error and will result in undefined behavior, including but +not limited to possible OpenCL errors, data corruption, and program +termination. +// refError -== Sampler Objects - -A sampler object describes how to sample an image when the image is read in -the kernel. -The built-in functions to read from an image in a kernel take a sampler as -an argument. -The sampler arguments to the image read function can be sampler objects -created using OpenCL functions and passed as argument values to the kernel -or can be samplers declared inside a kernel. -In this section we discuss how sampler objects are created using OpenCL -functions. +{clEnqueueReleaseEGLObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if num_objects > 0 and mem_objects is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects in the context associated with _command_queue_. + * {CL_INVALID_EGL_OBJECT_KHR} if memory objects in _mem_objects_ have not + been created from EGL resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_egl_image[] -=== Creating Sampler Objects +ifdef::cl_khr_gl_sharing[] +[[acquiring-shared-opencl-opengl-memory-objects]] +=== Acquiring, Releasing, and Synchronizing Access to Shared OpenCL/OpenGL Memory Objects -[open,refpage='clCreateSamplerWithProperties',desc='Creates a sampler object.',type='protos'] +[open,refpage='clEnqueueAcquireGLObjects',desc='Acquire OpenCL memory objects created from OpenGL objects',type='protos'] -- -To create a sampler object, call the function +To acquire OpenCL memory objects that have been created from OpenGL objects, +call the function -include::{generated}/api/protos/clCreateSamplerWithProperties.txt[] -include::{generated}/api/version-notes/clCreateSamplerWithProperties.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireGLObjects.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireGLObjects.asciidoc[] - * _context_ must be a valid OpenCL context. - * _sampler_properties_ specifies a list of sampler property names and their - corresponding values. - Each sampler property name is immediately followed by the corresponding - desired value. - The list is terminated with 0. - The list of supported properties is described in the - <> table. - If a supported property and its value is not specified in - _sampler_properties_, its default value will be used. - _sampler_properties_ can be `NULL` in which case the default values for - supported sampler properties will be used. + * _command_queue_ is a valid command-queue. + All devices used to create the OpenCL context associated with + _command_queue_ must support acquiring shared OpenCL/OpenGL objects. + This constraint is enforced at context creation time. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that + correspond to OpenGL objects. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + * _event_ returns an event object that identifies this command and can be + used to query wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -[[sampler-properties-table]] -.List of supported sampler creation properties by {clCreateSamplerWithProperties} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Sampler Property | Property Value | Description -| {CL_SAMPLER_NORMALIZED_COORDS_anchor} +ifdef::cl_khr_gl_event[] +If an OpenGL context is bound to the current thread, then any OpenGL +commands which -include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] - | {cl_bool_TYPE} - | A boolean value that specifies whether the image coordinates - specified are normalized or not. + . affect or access the contents of a memory object listed in the + _mem_objects_ list, and + . were issued on that OpenGL context prior to the call to + {clEnqueueAcquireGLObjects} - The default value (i.e. the value used if this property is not - specified in sampler_properties) is {CL_TRUE}. -| {CL_SAMPLER_ADDRESSING_MODE_anchor} +will complete before execution of any OpenCL commands following the +{clEnqueueAcquireGLObjects} which affect or access any of those memory +objects. +If a non-`NULL` _event_ object is returned, it will report completion only +after completion of such OpenGL commands. +endif::cl_khr_gl_event[] -include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] - | {cl_addressing_mode_TYPE} - | Specifies how out-of-range image coordinates are handled when - reading from an image. - Valid values are: +These objects need to be acquired before they can be used by any OpenCL +commands queued to a command-queue or the behaviour is undefined. +The OpenGL objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. - {CL_ADDRESS_NONE_anchor} - Behavior is undefined for out-of-range - image coordinates. +// refError - {CL_ADDRESS_CLAMP_TO_EDGE_anchor} - Out-of-range image coordinates - are clamped to the edge of the image. +{clEnqueueAcquireGLObjects} returns {CL_SUCCESS} if the function is executed +successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise, it returns one of the following errors: - {CL_ADDRESS_CLAMP_anchor} - Out-of-range image coordinates are - assigned a border color value. + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an OpenGL context + * {CL_INVALID_GL_OBJECT} if memory objects in _mem_objects_ have not been + created from an OpenGL object(s). + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- - {CL_ADDRESS_REPEAT_anchor} - Out-of-range image coordinates read - from the image as if the image data were replicated in all dimensions. +[open,refpage='clEnqueueReleaseGLObjects',desc='Release OpenCL memory objects created from OpenGL objects',type='protos'] +-- +To release OpenCL memory objects that have been created from OpenGL objects, +call the function - {CL_ADDRESS_MIRRORED_REPEAT_anchor} - Out-of-range image coordinates - read from the image as if the image data were replicated in all - dimensions, mirroring the image contents at the edge of each - replication. +include::{generated}/api/protos/clEnqueueReleaseGLObjects.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseGLObjects.asciidoc[] - The default is {CL_ADDRESS_CLAMP}. -| {CL_SAMPLER_FILTER_MODE_anchor} + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that + correspond to OpenGL objects. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] - | {cl_filter_mode_TYPE} - | Specifies the type of filter that is applied when reading an - image. - Valid values are: +ifdef::cl_khr_gl_event[] +If an OpenGL context is bound to the current thread, then then any OpenGL +commands which - {CL_FILTER_NEAREST_anchor} - Returns the image element nearest - to the image coordinate. + . affect or access the contents of the memory objects listed in the + _mem_objects_ list, and + . are issued on that context after the call to {clEnqueueReleaseGLObjects} - {CL_FILTER_LINEAR_anchor} - Returns a weighted average of the - four image elements nearest to the image coordinate. +will not execute until after execution of any OpenCL commands preceding the - The default value is {CL_FILTER_NEAREST}. -|==== - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +{clEnqueueReleaseGLObjects} which affect or access any of those memory +objects. +If a non-`NULL` _event_ object is returned, it will report completion before +execution of such OpenGL commands. +endif::cl_khr_gl_event[] + +These objects need to be released before they can be used by OpenGL. +The OpenGL objects are released by the OpenCL context associated with +_command_queue_. // refError -{clCreateSamplerWithProperties} returns a valid non-zero sampler object and -_errcode_ret_ is set to {CL_SUCCESS} if the sampler object is created +{clEnqueueReleaseGLObjects} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if the property name in _sampler_properties_ is not a - supported property name, if the value specified for a supported property - name is not valid, or if the same property name is specified more than - once. - * {CL_INVALID_OPERATION} if images are not supported by any device - associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the - <> table is {CL_FALSE}). - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an OpenGL context + * {CL_INVALID_GL_OBJECT} if memory objects in _mem_objects_ have not been + created from an OpenGL object(s). + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clCreateSampler',desc='Creates a sampler object.',type='protos'] --- -To create a sampler object, call the function -include::{generated}/api/protos/clCreateSampler.txt[] -include::{generated}/api/version-notes/clCreateSampler.asciidoc[] +// The following section is quite ugly and duplicative, and potentially +// could be simplified. +// The problem is that there are a large number of scenarios being +// described: +// * Using either EGL or another OpenGL binding API via the egl_image or +// gl_sharing extensions +// * Using either OpenGL or OpenGL ES, or potentially another EGL client API +// * Attempting to synchronize via either EGL or OpenGL/OpenGL ES fence +// sync objects, via the egl_event or gl_event extensions - * _context_ must be a valid OpenCL context. - * _normalized_coords_ has the same interpretation as - {CL_SAMPLER_NORMALIZED_COORDS} in the <>. - * _addressing_mode_ has the same interpretation as - {CL_SAMPLER_ADDRESSING_MODE} in the <>. - * _filter_mode_ has the same interpretation as - {CL_SAMPLER_FILTER_MODE} in the <>. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +ifdef::cl_khr_egl_image,cl_khr_gl_sharing[] +==== Synchronizing Access to Memory Objects Shared With EGL or OpenGL + +When sharing objects such as EGL images (if the `<>` +extension is supported) or OpenGL buffers, textures, and renderbuffers (if +the `<>` extension is supported), in order to ensure data +integrity, the application is responsible for synchronizing access to shared +memory objects through the other API with which such objects are shared. -// refError +Failure to provide such synchronization may result in race conditions and +other undefined behavior including non-portability between implementations. -{clCreateSampler} returns a valid non-zero sampler object and _errcode_ret_ is -set to {CL_SUCCESS} if the sampler object is created successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +Prior to acquiring objects shared with the other API via an appropriate +{clEnqueueAcquire}*** call, the application must ensure that any pending +operations in that API which accesses the objects specified in _mem_objects_ +have completed. - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _addressing_mode_, _filter_mode_, _normalized_coords_ - or a combination of these arguements are not valid. - * {CL_INVALID_OPERATION} if images are not supported by any device - associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the - <> table is {CL_FALSE}). - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +Depending on the application and the implementation, there are two +extensions which may be used to synchronize with other APIs: -[open,refpage='clRetainSampler',desc='Increments the sampler reference count.',type='protos'] --- -To retain a sampler object, call the function -include::{generated}/api/protos/clRetainSampler.txt[] -include::{generated}/api/version-notes/clRetainSampler.asciidoc[] +ifdef::cl_khr_egl_image[] +===== Synchronization With EGL and EGL Client APIs - * _sampler_ specifies the sampler to be released. +When sharing with an EGL context via the `<>` extension, +if the `<>` extension is supported, and the EGL context in +question supports fence sync objects, _explicit synchronization_ with EGL or +EGL client APIs can be achieved as described in the +<> section. -The _sampler_ reference count is incremented. -{clCreateSamplerWithProperties} and {clCreateSampler} perform an implicit -retain. +If the `<>` extension is not supported, completion of EGL +client API commands may be determined by issuing and waiting for completion +of commands such as `glFinish` or `vgFinish` on all client API contexts with +pending references to these objects. +endif::cl_khr_egl_image[] -// refError -{clRetainSampler} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +ifdef::cl_khr_gl_sharing[] +===== Synchronization With OpenGL - * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +When sharing with an OpenGL context via the `<>` +extension, the OpenCL implementation will ensure that any such pending +OpenGL operations are complete for an OpenGL context bound to the same +thread as the OpenCL context. This is referred to as _implicit +synchronization_. -[open,refpage='clReleaseSampler',desc='Decrements the sampler reference count.',type='protos'] --- -To release a sampler object, call the function +If the `<>` extension is supported, and the OpenGL context +in question supports fence sync objects, _explicit synchronization_ with +OpenGL can be achieved as described in the <> section. -include::{generated}/api/protos/clReleaseSampler.txt[] -include::{generated}/api/version-notes/clReleaseSampler.asciidoc[] +If the `<>` extension is not supported, completion of +OpenGL commands may be determined by issuing and waiting for completion of a +`glFinish` command on all OpenGL contexts with pending references to these +objects. +endif::cl_khr_gl_sharing[] - * _sampler_ specifies the sampler to be released. -The _sampler_ reference count is decremented. -The sampler object is deleted after the reference count becomes zero and -commands queued for execution on a command-queue(s) that use _sampler_ have -finished. +===== General Considerations for Synchronization With Other APIs -// refError +Some implementations may offer other efficient synchronization methods. If +such methods exist they will be described in platform-specific +documentation. -{clReleaseSampler} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +Note that no synchronization method other than `glFinish` is portable +between all OpenGL implementations and all OpenCL implementations. +While this is the only way to ensure completion that is portable to all +platforms, `glFinish` is an expensive operation and its use should be +avoided if the `<>` or `<>` extensions +are supported on a platform. - * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainSampler} causes undefined behavior. --- +===== Synchronizing OpenCL Operations With Other APIs +After releasing a shared memory object via an appropriate +{clEnqueueRelease}*** call, the application is responsible for ensuring that +any pending OpenCL operations which access the objects specified in +_mem_objects_ have completed prior to executing subsequent commands in the +other API which reference these objects. -=== Sampler Object Queries +This may be accomplished portably by calling {clWaitForEvents} with the +event object returned by *clEnqueueReleaseGLObjects,* or by calling +{clFinish}. +As above, some implementations may offer more efficient methods. -[open,refpage='clGetSamplerInfo',desc='Returns information about the sampler object.',type='protos'] --- -To return information about a sampler object, call the function +The application is responsible for maintaining the proper order of +operations if the OpenCL context and the other API context are in separate +threads. + +If an OpenGL context is bound to a thread other than the one in which +{clEnqueueReleaseGLObjects} is called, changes to any of the objects in +_mem_objects_ may not be visible to that context without additional steps +being taken by the application. +For an OpenGL 3.1 (or later) context, the requirements are described in +Appendix D ("`Shared Objects and Multiple Contexts`") of the OpenGL 3.1 +Specification. +For prior versions of OpenGL, the requirements are implementation-dependent. -include::{generated}/api/protos/clGetSamplerInfo.txt[] -include::{generated}/api/version-notes/clGetSamplerInfo.asciidoc[] +Attempting to access the data store of an OpenGL object after it has been +acquired by OpenCL and before it has been released will result in undefined +behavior. +Similarly, attempting to access a shared OpenCL/OpenGL object from OpenCL +before it has been acquired by the OpenCL command-queue, or after it has +been released, will result in undefined behavior. - * _sampler_ specifies the sampler being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetSamplerInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +endif::cl_khr_egl_image,cl_khr_gl_sharing[] +endif::cl_khr_gl_sharing[] -[[sampler-info-table]] -.List of supported param_names by {clGetSamplerInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Sampler Info | Return Type | Description -| {CL_SAMPLER_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] -include::{generated}/api/version-notes/CL_SAMPLER_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _sampler_ reference count. -| {CL_SAMPLER_CONTEXT_anchor} +== Shared Virtual Memory -include::{generated}/api/version-notes/CL_SAMPLER_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context specified when the sampler is created. -// Note: This enum is used for two purposes: as a property and for a query. -// We use the property as the anchor. -| {CL_SAMPLER_NORMALIZED_COORDS} +NOTE: Shared virtual memory is <> version 2.0. -include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] - | {cl_bool_TYPE} - | Return the normalized coords value associated with _sampler_. -// Note: This enum is used for two purposes: as a property and for a query. -// We use the property as the anchor. -| {CL_SAMPLER_ADDRESSING_MODE} - -include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] - | {cl_addressing_mode_TYPE} - | Return the addressing mode value associated with _sampler_. -// Note: This enum is used for two purposes: as a property and for a query. -// We use the property as the anchor. -| {CL_SAMPLER_FILTER_MODE} - -include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] - | {cl_filter_mode_TYPE} - | Return the filter mode value associated with _sampler_. +Shared virtual memory (a.k.a. SVM) allows the host and kernels executing on +devices to directly share complex, pointer-containing data structures such as +trees and linked lists. +It also eliminates the need to marshal data between the host and devices. +As a result, SVM substantially simplifies OpenCL programming and may improve +performance. -| {CL_SAMPLER_PROPERTIES_anchor} -include::{generated}/api/version-notes/CL_SAMPLER_PROPERTIES.asciidoc[] - | {cl_sampler_properties_TYPE}[] - | Return the properties argument specified in - {clCreateSamplerWithProperties}. +=== SVM Sharing Granularity: Coarse- and Fine- Grained Sharing - If the _properties_ argument specified in {clCreateSamplerWithProperties} - used to create _sampler_ was not `NULL`, the implementation must return - the values specified in the properties argument in the same order and - without including additional properties. +OpenCL maintains memory consistency in a coarse-grained fashion in regions +of buffers. +We call this coarse-grained sharing. +Many platforms such as those with integrated CPU-GPU processors and ones +using the SVM-related PCI-SIG IOMMU services can do better, and can support +sharing at a granularity smaller than a buffer. +We call this fine-grained sharing. - If _sampler_ was created using {clCreateSampler}, or if the _properties_ - argument specified in {clCreateSamplerWithProperties} was `NULL`, the - implementation must return _param_value_size_ret_ equal to 0, - indicating that there are no properties to be returned. -|==== + * Coarse-grained sharing: Coarse-grain sharing may be used for memory and + virtual pointer sharing between multiple devices as well as between the + host and one or more devices. + The shared memory region is a memory buffer allocated using + {clSVMAlloc}. + Memory consistency is guaranteed at synchronization points and the host + can use calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} or create a + {cl_mem_TYPE} buffer object using the SVM pointer and use OpenCL's existing host + API functions {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} to + update regions of the buffer. + What coarse-grain buffer SVM adds to OpenCL's earlier buffer support are + the ability to share virtual memory pointers and a guarantee that + concurrent access to the same memory allocation from multiple kernels on + a single device is valid. + The coarse-grain buffer SVM provides a memory consistency model similar + to the global memory consistency model described in _sections 3.3.1_ and + _3.4.3_ of the OpenCL 1.2 specification. + This memory consistency applies to the regions of buffers being shared + in a coarse-grained fashion. + It is enforced at the synchronization points between commands enqueued + to command-queues in a single context with the additional consideration + that multiple kernels concurrently running on the same device may safely + share the data. + * Fine-grained sharing: Shared virtual memory where memory consistency is + maintained at a granularity smaller than a buffer. + How fine-grained SVM is used depends on whether the device supports SVM + atomic operations. + ** If SVM atomic operations are supported, they provide memory consistency + for loads and stores by the host and kernels executing on devices + supporting SVM. + This means that the host and devices can concurrently read and update + the same memory. + The consistency provided by SVM atomics is in addition to the + consistency provided at synchronization points. + There is no need for explicit calls to {clEnqueueSVMMap} and + {clEnqueueSVMUnmap} or {clEnqueueMapBuffer} and + {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} buffer object created using the + SVM pointer. + ** If SVM atomic operations are not supported, the host and devices can + concurrently read the same memory locations and can concurrently update + non-overlapping memory regions, but attempts to update the same memory + locations are undefined. + Memory consistency is guaranteed at synchronization points without the + need for explicit calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} + or {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} + buffer object created using the SVM pointer. + * There are two kinds of fine-grain sharing support. + Devices may support either fine-grain buffer sharing or fine-grain + system sharing. + ** Fine-grain buffer sharing provides fine-grain SVM only within buffers + and is an extension of coarse-grain sharing. + To support fine-grain buffer sharing in an OpenCL context, all devices + in the context must support {CL_DEVICE_SVM_FINE_GRAIN_BUFFER}. + ** Fine-grain system sharing enables fine-grain sharing of the host's + entire virtual memory, including memory regions allocated by the system + *malloc* API. + OpenCL buffer objects are unnecessary and programmers can pass pointers + allocated using *malloc* to OpenCL kernels. -// refError +As an illustration of fine-grain SVM using SVM atomic operations to maintain +memory consistency, consider the following example. +The host and a set of devices can simultaneously access and update a shared +work-queue data structure holding work-items to be done. +The host can use atomic operations to insert new work-items into the queue +at the same time as the devices using similar atomic operations to remove +work-items for processing. -{clGetSamplerInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +It is the programmer's responsibility to ensure that no host code or +executing kernels attempt to access a shared memory region after that memory +is freed. +We require the SVM implementation to work with either 32- or 64- bit host +applications subject to the following requirement: the address space size +must be the same for the host and all OpenCL devices in the context. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_SAMPLER} if _sampler_ is a not a valid sampler object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +[open,refpage='clSVMAlloc',desc='Allocates a shared virtual memory (SVM) buffer that can be shared by the host and all devices in an OpenCL context that support shared virtual memory.',type='protos'] -- +To allocate a shared virtual memory buffer (referred to as a SVM buffer) +that can be shared by the host and all devices in an OpenCL context that +support shared virtual memory, call the function + +include::{generated}/api/protos/clSVMAlloc.txt[] +include::{generated}/api/version-notes/clSVMAlloc.asciidoc[] + * _context_ is a valid OpenCL context used to create the SVM buffer. + * _flags_ is a bit-field that is used to specify allocation and usage + information. + The <> table describes the possible values + for _flags_. + * _size_ is the size in bytes of the SVM buffer to be allocated. + * _alignment_ is the minimum alignment in bytes that is required for the newly + created buffers memory region. + It must be a power of two up to the largest data type supported by the + OpenCL device. + For the full profile, the largest data type is long16. + For the embedded profile, it is long16 if the device supports 64-bit + integers; otherwise it is int16. + If alignment is 0, a default alignment will be used that is equal to the + size of largest data type supported by the OpenCL implementation. -== Program Objects +[[svm-flags-table]] +.List of supported SVM memory flag values +[width="100%",cols="<50%,<50%",options="header"] +|==== +| SVM Memory Flags | Description +| {CL_MEM_READ_WRITE} + | This flag specifies that the SVM buffer will be read and written by a + kernel. + This is the default. +| {CL_MEM_WRITE_ONLY} + | This flag specifies that the SVM buffer will be written but not read by + a kernel. -An OpenCL program consists of a set of kernels that are identified as -functions declared with the `+__kernel+` qualifier in the program source. -OpenCL programs may also contain auxiliary functions and constant data that -can be used by kernel functions. -The program executable can be generated _online_ or _offline_ by the OpenCL -compiler for the appropriate target device(s). + Reading from a SVM buffer created with {CL_MEM_WRITE_ONLY} inside a kernel + is undefined. -A program object encapsulates the following information: + {CL_MEM_READ_WRITE} and {CL_MEM_WRITE_ONLY} are mutually exclusive. +| {CL_MEM_READ_ONLY} + | This flag specifies that the SVM buffer object is a read-only memory + object when used inside a kernel. - * An associated context. - * A program source or binary. - * The latest successfully built program executable, library or compiled - binary, the list of devices for which the program executable, library or - compiled binary is built, the build options used and a build log. - * The number of kernel objects currently attached. + Writing to a SVM buffer created with {CL_MEM_READ_ONLY} inside a kernel is + undefined. + {CL_MEM_READ_WRITE} or {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_ONLY} are mutually + exclusive. +| {CL_MEM_SVM_FINE_GRAIN_BUFFER_anchor} -=== Creating Program Objects +include::{generated}/api/version-notes/CL_MEM_SVM_FINE_GRAIN_BUFFER.asciidoc[] + | This specifies that the application wants the OpenCL implementation to + do a fine-grained allocation. +| {CL_MEM_SVM_ATOMICS_anchor} -[open,refpage='clCreateProgramWithSource',desc='Creates a program object for a context, and loads source code specified by text strings into the program object.',type='protos'] --- -To creates a program object for a context and load source code into that -object, call the function +include::{generated}/api/version-notes/CL_MEM_SVM_ATOMICS.asciidoc[] + | This flag is valid only if {CL_MEM_SVM_FINE_GRAIN_BUFFER} is specified in + flags. + It is used to indicate that SVM atomic operations can control visibility + of memory accesses in this SVM buffer. +|==== -include::{generated}/api/protos/clCreateProgramWithSource.txt[] -include::{generated}/api/version-notes/clCreateProgramWithSource.asciidoc[] +If {CL_MEM_SVM_FINE_GRAIN_BUFFER} is not specified, the buffer can be created +as a coarse grained SVM allocation. +Similarly, if {CL_MEM_SVM_ATOMICS} is not specified, the buffer can be created +without support for SVM atomic operations (refer to an OpenCL kernel +language specifications). - * _context_ must be a valid OpenCL context. - * _strings_ is an array of _count_ pointers to optionally null-terminated - character strings that make up the source code. - * _lengths_ argument is an array with the number of chars in each string - (the string length). - If an element in _lengths_ is zero, its accompanying string is - null-terminated. - If _lengths_ is `NULL`, all strings in the _strings_ argument are considered - null-terminated. - Any length value passed in that is greater than zero excludes the null - terminator in its count. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +Calling {clSVMAlloc} does not itself provide consistency for the shared +memory region. +When the host cannot use the SVM atomic operations, it must rely on OpenCL's +guaranteed memory consistency at synchronization points. -The source code specified by _strings_ will be loaded into the program -object. +For SVM to be used efficiently, the host and any devices sharing a buffer +containing virtual memory pointers should have the same endianness. +If the context passed to {clSVMAlloc} has devices with mixed endianness and +the OpenCL implementation is unable to implement SVM because of that mixed +endianness, {clSVMAlloc} will fail and return `NULL`. -The devices associated with the program object are the devices associated -with _context_. -The source code specified by _strings_ is either an OpenCL C program source, -header or implementation-defined source for custom devices that support an -online compiler. -OpenCL {cpp} is not supported as an online-compiled kernel language through -this interface. +Although SVM is generally not supported for image objects, {clCreateImage} +and {clCreateImageWithProperties} +may create an image from a buffer (a 1D image from a buffer or a 2D image +from buffer) if the buffer specified in its image description parameter is a +SVM buffer. +Such images have a linear memory representation so their memory can be +shared using SVM. +However, fine grained sharing and atomics are not supported for image reads +and writes in a kernel. // refError -{clCreateProgramWithSource} returns a valid non-zero program object and -_errcode_ret_ is set to {CL_SUCCESS} if the program object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +{clSVMAlloc} returns a valid non-`NULL` shared virtual memory address if the +SVM buffer is successfully allocated. +Otherwise, like *malloc*, it returns a `NULL` pointer value. +{clSVMAlloc} will fail if - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _count_ is zero or if _strings_ or any entry in - _strings_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. + * _context_ is not a valid context, or no devices in _context_ support SVM. + * _flags_ does not contain {CL_MEM_SVM_FINE_GRAIN_BUFFER} but does contain + {CL_MEM_SVM_ATOMICS}. + * Values specified in _flags_ do not follow rules described for supported + values in the <> table. + * {CL_MEM_SVM_FINE_GRAIN_BUFFER} or {CL_MEM_SVM_ATOMICS} is specified in + _flags_ and these are not supported by at least one device in _context_. + * The values specified in _flags_ are not valid, i.e. do not match those + defined in the <> table. + * _size_ is 0 or > {CL_DEVICE_MAX_MEM_ALLOC_SIZE} value for any device in + _context_. + * _alignment_ is not a power of two or the OpenCL implementation cannot + support the specified alignment for at least one device in _context_. + * There was a failure to allocate resources. -- -[open,refpage='clCreateProgramWithIL',desc='Creates a program object for a context, and loads the IL into the program object.',type='protos'] +[open,refpage='clSVMFree',desc='Frees a shared virtual memory buffer allocated using clSVMAlloc.',type='protos'] -- -To create a program object for a context and load code in an intermediate -language into that object, call the function - -include::{generated}/api/protos/clCreateProgramWithIL.txt[] -include::{generated}/api/version-notes/clCreateProgramWithIL.asciidoc[] -Also see extension *cl_khr_il_program*. +To free a shared virtual memory buffer allocated using {clSVMAlloc}, call +the function - * _context_ must be a valid OpenCL context. - * _il_ is a pointer to a _length_-byte block of memory containing SPIR-V or an - implementation-defined intermediate language. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +include::{generated}/api/protos/clSVMFree.txt[] +include::{generated}/api/version-notes/clSVMFree.asciidoc[] -The intermediate language pointed to by _il_ and with length in bytes _length_ -will be loaded into the program object. -The devices associated with the program object are the devices associated -with _context_. + * _context_ is a valid OpenCL context used to create the SVM buffer. + If no devices in _context_ support SVM, no action occurs. + * _svm_pointer_ must be the value returned by a call to {clSVMAlloc}. + If a `NULL` pointer is passed in _svm_pointer_, no action occurs. -// refError +Note that {clSVMFree} does not wait for previously enqueued commands that +may be using _svm_pointer_ to finish before freeing _svm_pointer_. +It is the responsibility of the application to make sure that enqueued +commands that use _svm_pointer_ have finished before freeing _svm_pointer_. +This can be done by enqueuing a blocking operation such as {clFinish}, +{clWaitForEvents}, {clEnqueueReadBuffer} or by registering a callback with +the events associated with enqueued commands and when the last enqueued +command has finished freeing _svm_pointer_. -{clCreateProgramWithIL} returns a valid non-zero program object and -_errcode_ret_ is set to {CL_SUCCESS} if the program object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +The behavior of using _svm_pointer_ after it has been freed is undefined. +In addition, if a buffer object is created using {clCreateBuffer} or +{clCreateBufferWithProperties} with _svm_pointer_, the buffer object must +first be released before the _svm_pointer_ is freed. - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_OPERATION} if no devices in _context_ support intermediate - language programs. - * {CL_INVALID_VALUE} if _il_ is `NULL` or if _length_ is zero. - * {CL_INVALID_VALUE} if the _length_-byte memory pointed to by _il_ does not - contain well-formed intermediate language input that can be consumed by - the OpenCL runtime. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +The {clEnqueueSVMFree} API can also be used to enqueue a callback to free +the shared virtual memory buffer allocated using {clSVMAlloc} or a shared +system memory pointer. -- -[open,refpage='clCreateProgramWithBinary',desc='Creates a program object for a context, and loads binary bits into the program object.',type='protos'] +[open,refpage='clEnqueueSVMFree',desc='Enqueues a command to free shared virtual memory allocated using clSVMAlloc or a shared system memory pointer.',type='protos'] -- -To create a program object for a context and load binary bits into that -object, call the function - -include::{generated}/api/protos/clCreateProgramWithBinary.txt[] -include::{generated}/api/version-notes/clCreateProgramWithBinary.asciidoc[] - - * _context_ must be a valid OpenCL context. - * _device_list_ is a pointer to a list of devices that are in _context_. - _device_list_ must be a non-`NULL` value. - The binaries are loaded for devices specified in this list. - * _num_devices_ is the number of devices listed in _device_list_. - * _lengths_ is an array of the size in bytes of the program binaries to be - loaded for devices specified by _device_list_. - * _binaries_ is an array of pointers to program binaries to be loaded for - devices specified by _device_list_. - For each device given by _device_list_[i], the pointer to the program binary - for that device is given by _binaries_[i] and the length of this - corresponding binary is given by _lengths_[i]. - _lengths_[i] cannot be zero and _binaries_[i] cannot be a `NULL` pointer. - -The devices associated with the program object will be the list of devices -specified by _device_list_. -The list of devices specified by _device_list_ must be devices associated -with _context_. - -The program binaries specified by _binaries_ will be loaded into the program -object. -They contain bits that describe one of the following: - - * a program executable to be run on the device(s) associated with - _context_, - * a compiled program for device(s) associated with _context_, or - * a library of compiled programs for device(s) associated with _context_. - -The program binary can consist of either or both: - - * Device-specific code and/or, - * Implementation-specific intermediate representation (IR) which will be - converted to the device-specific code. +To enqueue a command to free the shared virtual memory allocated using +{clSVMAlloc} or a shared system memory pointer, call the function - * _binary_status_ returns whether the program binary for each device specified - in _device_list_ was loaded successfully or not. - It is an array of _num_devices_ entries and returns {CL_SUCCESS} in - _binary_status_[i] if binary was successfully loaded for device specified by - _device_list_[i]; otherwise returns {CL_INVALID_VALUE} if _lengths_[i] is zero - or if _binaries_[i] is a `NULL` value or {CL_INVALID_BINARY} in - _binary_status_[i] if program binary is not a valid binary for the specified - device. - If _binary_status_ is `NULL`, it is ignored. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +include::{generated}/api/protos/clEnqueueSVMFree.txt[] +include::{generated}/api/version-notes/clEnqueueSVMFree.asciidoc[] -OpenCL allows applications to create a program object using the program -source or binary and build appropriate program executables. -This can be very useful as it allows applications to load program source and -then compile and link to generate a program executable online on its first -instance for appropriate OpenCL devices in the system. -These executables can now be queried and cached by the application. -The cached executables can be read and loaded by the application, which can -help significantly reduce the application initialization time. + * _command_queue_ is a valid host command-queue. + * _svm_pointers_ and _num_svm_pointers_ specify shared virtual memory pointers + to be freed. + Each pointer in _svm_pointers_ that was allocated using {clSVMAlloc} must + have been allocated from the same context from which _command_queue_ was + created. + The memory associated with _svm_pointers_ can be reused or freed after the + function returns. + * _pfn_free_func_ specifies the callback function to be called to free the SVM + pointers. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + _pfn_free_func_ takes four arguments: _queue_ which is the command-queue in + which {clEnqueueSVMFree} was enqueued, the count and list of SVM pointers to + free and _user_data_ which is a pointer to user specified data. + If _pfn_free_func_ is `NULL`, all pointers specified in _svm_pointers_ must + be allocated using {clSVMAlloc} and the OpenCL implementation will free + these SVM pointers. + _pfn_free_func_ must be a valid callback function if any SVM pointer to be + freed is a shared system memory pointer i.e. not allocated using + {clSVMAlloc}. + If _pfn_free_func_ is a valid callback function, the OpenCL implementation + will call _pfn_free_func_ to free all the SVM pointers specified in + _svm_pointers_. + * _user_data_ will be passed as the _user_data_ argument when _pfn_free_func_ + is called. + _user_data_ can be `NULL`. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before {clEnqueueSVMFree} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueSVMFree} does not wait on any + event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. // refError -{clCreateProgramWithBinary} returns a valid non-zero program object and -_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +{clEnqueueSVMFree} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in - the list of devices associated with _context_. - * {CL_INVALID_VALUE} if _lengths_ or _binaries_ is `NULL` or if any entry - in _lengths_[i] is zero or _binaries_[i] is `NULL`. - * {CL_INVALID_BINARY} if an invalid program binary was encountered for any - device. - _binary_status_ will return specific status for each device. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_VALUE} if _num_svm_pointers_ is 0 and _svm_pointers_ is + non-`NULL`, _or_ if _svm_pointers_ is `NULL` and _num_svm_pointers_ is + not 0. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clCreateProgramWithBuiltInKernels',desc='Creates a program object for a context, and loads the information related to the built-in kernels into a program object.',type='protos'] +[open,refpage='clEnqueueSVMMemcpy',desc='Enqueues a command to do a memcpy operation.',type='protos'] -- -To create a program object for a context and loads the information related -to the built-in kernels into that object, call the function +To enqueue a command to do a memcpy operation, call the function -include::{generated}/api/protos/clCreateProgramWithBuiltInKernels.txt[] -include::{generated}/api/version-notes/clCreateProgramWithBuiltInKernels.asciidoc[] +include::{generated}/api/protos/clEnqueueSVMMemcpy.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMemcpy.asciidoc[] - * _context_ must be a valid OpenCL context. - * _num_devices_ is the number of devices listed in _device_list_. - * _device_list_ is a pointer to a list of devices that are in _context_. - _device_list_ must be a non-`NULL` value. - The built-in kernels are loaded for devices specified in this list. - * _kernel_names_ is a semi-colon separated list of built-in kernel names. + * _command_queue_ refers to the host command-queue in which the read / write + command will be queued. + If either _dst_ptr_ or _src_ptr_ is allocated using {clSVMAlloc} then the + OpenCL context allocated against must match that of _command_queue_. + * _blocking_copy_ indicates if the copy operation is _blocking_ or + _non-blocking_. + * If _blocking_copy_ is {CL_TRUE} i.e. the copy command is blocking, + {clEnqueueSVMMemcpy} does not return until the buffer data has been copied + into memory pointed to by _dst_ptr_. + * _size_ is the size in bytes of data being copied. + * _dst_ptr_ is the pointer to a host or SVM memory allocation where data is + copied to. + * _src_ptr_ is the pointer to a host or SVM memory allocation where data is + copied from. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this read / write command + and can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -The devices associated with the program object will be the list of devices -specified by _device_list_. -The list of devices specified by _device_list_ must be devices associated -with _context_. +If _blocking_copy_ is {CL_FALSE} i.e. the copy command is non-blocking, +{clEnqueueSVMMemcpy} queues a non-blocking copy command and returns. +The contents of the buffer that _dst_ptr_ points to cannot be used until the +copy command has completed. +The _event_ argument returns an event object which can be used to query the +execution status of the read command. +When the copy command has completed, the contents of the buffer that +_dst_ptr_ points to can be used by the application. + +If the memory allocation(s) containing _dst_ptr_ and/or _src_ptr_ are +allocated using {clSVMAlloc} and either is not allocated from the same +context from which _command_queue_ was created the behavior is undefined. // refError -{clCreateProgramWithBuiltInKernels} returns a valid non-zero program object -and _errcode_ret_ is set to {CL_SUCCESS} if the program object is created +{clEnqueueSVMMemcpy} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. - * {CL_INVALID_VALUE} if _kernel_names_ is `NULL` or _kernel_names_ contains - a kernel name that is not supported by any of the devices in - _device_list_. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in the list - of devices associated with _context_. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + events in _event_wait_list_ are not the same. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the copy operation is + blocking and the execution status of any of the events in + _event_wait_list_ is a negative integer value. + * {CL_INVALID_VALUE} if _dst_ptr_ or _src_ptr_ is `NULL`. + * {CL_MEM_COPY_OVERLAP} if the values specified for _dst_ptr_, _src_ptr_ and + _size_ result in an overlapping copy. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- - -=== Retaining and Releasing Program Objects - -[open,refpage='clRetainProgram',desc='Increments the program reference count.',type='protos'] +[open,refpage='clEnqueueSVMMemFill',desc='Enqueues a command to fill a region in memory with a pattern of a given pattern size.',type='protos'] -- -To retain a program object, call the function - -include::{generated}/api/protos/clRetainProgram.txt[] -include::{generated}/api/version-notes/clRetainProgram.asciidoc[] +To enqueue a command to fill a region in memory with a pattern of a given +pattern size, call the function - * _program_ is the program object to be retained. +include::{generated}/api/protos/clEnqueueSVMMemFill.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMemFill.asciidoc[] -The _program_ reference count is incremented. -All APIs that create a program do an implicit retain. + * _command_queue_ refers to the host command-queue in which the fill command + will be queued. + The OpenCL context associated with _command_queue_ and SVM pointer referred + to by _svm_ptr_ must be the same. + * _svm_ptr_ is a pointer to a memory region that will be filled with + _pattern_. + It must be aligned to _pattern_size_ bytes. + If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from + the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _pattern_ is a pointer to the data pattern of size _pattern_size_ in bytes. + _pattern_ will be used to fill a region in _buffer_ starting at _svm_ptr_ + and is _size_ bytes in size. + The data pattern must be a scalar or vector integer or floating-point data + type supported by OpenCL as described in <> and <>. + For example, if region pointed to by _svm_ptr_ is to be filled with a + pattern of float4 values, then _pattern_ will be a pointer to a cl_float4 + value and _pattern_size_ will be `sizeof(cl_float4)`. + The maximum value of _pattern_size_ is the size of the largest integer or + floating-point vector data type supported by the OpenCL device. + The memory associated with _pattern_ can be reused or freed after the + function returns. + * _size_ is the size in bytes of region being filled starting with _svm_ptr_ + and must be a multiple of _pattern_size_. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. // refError -{clRetainProgram} returns {CL_SUCCESS} if the function is executed +{clEnqueueSVMMemFill} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + events in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_VALUE} if _svm_ptr_ is not aligned to _pattern_size_ bytes. + * {CL_INVALID_VALUE} if _pattern_ is `NULL` or if _pattern_size_ is 0 or if + _pattern_size_ is not one of {1, 2, 4, 8, 16, 32, 64, 128}. + * {CL_INVALID_VALUE} if _size_ is not a multiple of _pattern_size_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clReleaseProgram',desc='Decrements the program reference count.',type='protos'] +[open,refpage='clEnqueueSVMMap',desc='Enqueues a command that will allow the host to update a region of a SVM buffer',type='protos'] -- -To release a program object, call the function +To enqueue a command that will allow the host to update a region of a SVM +buffer, call the function -include::{generated}/api/protos/clReleaseProgram.txt[] -include::{generated}/api/version-notes/clReleaseProgram.asciidoc[] +include::{generated}/api/protos/clEnqueueSVMMap.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMap.asciidoc[] - * _program_ is the program object to be released. + * _command_queue_ must be a valid host command-queue. + * _blocking_map_ indicates if the map operation is _blocking_ or + _non-blocking_. + * _map_flags_ is a bit-field and is described in the + <> table. + * _svm_ptr_ and _size_ are a pointer to a memory region and size in bytes that + will be updated by the host. + If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from + the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -The _program_ reference count is decremented. -The program object is deleted after all kernel objects associated with -_program_ have been deleted and the _program_ reference count becomes zero. +If _blocking_map_ is {CL_TRUE}, {clEnqueueSVMMap} does not return until the +application can access the contents of the SVM region specified by _svm_ptr_ +and _size_ on the host. + +If _blocking_map_ is {CL_FALSE} i.e. map operation is non-blocking, the region +specified by _svm_ptr_ and _size_ cannot be used until the map command has +completed. +The _event_ argument returns an event object which can be used to query the +execution status of the map command. +When the map command is completed, the application can access the contents +of the region specified by _svm_ptr_ and _size_. + +Note that since we are enqueuing a command with a SVM buffer, the region is +already mapped in the host address space. // refError -{clReleaseProgram} returns {CL_SUCCESS} if the function is executed +{clEnqueueSVMMap} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_VALUE} if _size_ is 0 or if values specified in _map_flags_ + are not valid. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the map operation is + blocking and the execution status of any of the events in + _event_wait_list_ is a negative integer value. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainProgram} causes undefined behavior. -- -[open,refpage='clSetProgramReleaseCallback',desc='Registers a destructor callback function with a program object.',type='protos'] +[open,refpage='clEnqueueSVMUnmap',desc='Enqueues a command to indicate that the host has completed updating the region given by an SVM pointer and which was specified in a previous call to clEnqueueSVMMap.',type='protos'] -- -To register a callback function with a program object that is called when -the program object is destroyed, call the function +To enqueue a command to indicate that the host has completed updating the +region given by _svm_ptr_ and which was specified in a previous call to +{clEnqueueSVMMap}, call the function -include::{generated}/api/protos/clSetProgramReleaseCallback.txt[] -include::{generated}/api/version-notes/clSetProgramReleaseCallback.asciidoc[] +include::{generated}/api/protos/clEnqueueSVMUnmap.txt[] +include::{generated}/api/version-notes/clEnqueueSVMUnmap.asciidoc[] - * _program_ specifies the memory object to register the callback to. - * _pfn_notify_ is the callback function to register. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - The parameters to this callback function are: - ** _program_ is the program being deleted. - When the callback function is called by the implementation, this program - object is not longer valid. - _program_ is only provided for reference purposes. - ** _user_data_ is a pointer to user supplied data. - * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is - called. - _user_data_ can be `NULL`. + * _command_queue_ must be a valid host command-queue. + * _svm_ptr_ is a pointer that was specified in a previous call to + {clEnqueueSVMMap}. + If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from + the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before {clEnqueueSVMUnmap} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueSVMUnmap} does not wait on any + event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -Each call to {clSetProgramReleaseCallback} registers the specified -callback function on a callback stack associated with _program_. -The registered callback functions are called in the reverse order in -which they were registered. -The registered callback functions are called after destructors (if any) for -program scope global variables (if any) are called and before the program -object is deleted. -This provides a mechanism for an application to be notified when destructors -for program scope global variables are complete. +{clEnqueueSVMMap} and {clEnqueueSVMUnmap} act as synchronization points for +the region of the SVM buffer specified in these calls. // refError -{clSetProgramReleaseCallback} may unconditionally return an error if no -devices in the context associated with _program_ support destructors for -program scope global variables. -Support for constructors and destructors for program scope global variables -is required only for OpenCL 2.2 devices. - -{clSetProgramReleaseCallback} returns {CL_SUCCESS} if the function is executed +{clEnqueueSVMUnmap} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_OPERATION} if no devices in the context associated with - _program_ support destructors for program scope global variables. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. --- +[NOTE] +==== +If a coarse-grained SVM buffer is currently mapped for writing, the +application must ensure that the SVM buffer is unmapped before any enqueued +kernels or commands that read from or write to this SVM buffer or any of its +associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is +undefined. -=== Setting SPIR-V specialization constants +If a coarse-grained SVM buffer is currently mapped for reading, the +application must ensure that the SVM buffer is unmapped before any enqueued +kernels or commands that write to this memory object or any of its +associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is +undefined. -NOTE: Specialization constants are <> version -2.2. +A SVM buffer is considered as mapped if there are one or more active +mappings for the SVM buffer irrespective of whether the mapped regions span +the entire SVM buffer. -[open,refpage='clSetProgramSpecializationConstant',desc='',type='protos'] +The above note does not apply to fine-grained SVM buffers (fine-grained +buffers allocated using {clSVMAlloc} or fine-grained system allocations). +==== -- -To set the value of a specialization constant, call the function -include::{generated}/api/protos/clSetProgramSpecializationConstant.txt[] -include::{generated}/api/version-notes/clSetProgramSpecializationConstant.asciidoc[] +[open,refpage='clEnqueueSVMMigrateMem',desc='Enqueues a command to indicate which device a set of ranges of SVM allocations should be associated with.',type='protos'] +-- +To enqueue a command to indicate which device a set of ranges of SVM +allocations should be associated with, call the function - * _program_ must be a valid OpenCL program created from an intermediate - language (e.g. SPIR-V). - * _spec_id_ identifies the specialization constant whose value will be - set. - * _spec_size_ specifies the size in bytes of the data pointed to by - _spec_value_. - This should be 1 for boolean constants. - For all other constant types this should match the size of the - specialization constant in the module. - * _spec_value_ is a pointer to the memory location that contains the value of - the specialization constant. - The data pointed to by _spec_value_ are copied and can be safely reused by - the application after {clSetProgramSpecializationConstant} returns. - This specialization value will be used by subsequent calls to - {clBuildProgram} until another call to {clSetProgramSpecializationConstant} - changes it. - If a specialization constant is a boolean constant, _spec_value_ should be a - pointer to a {cl_uchar_TYPE} value. - A value of zero will set the specialization constant to false; any other - value will set it to true. +include::{generated}/api/protos/clEnqueueSVMMigrateMem.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMigrateMem.asciidoc[] -Calling this function multiple times for the same specialization constant -shall cause the last provided value to override any previously specified -value. -The values are used by a subsequent {clBuildProgram} call for the _program_. + * _command_queue_ is a valid host command-queue. + The specified set of allocation ranges will be migrated to the OpenCL device + associated with _command_queue_. + * _num_svm_pointers_ is the number of pointers in the specified _svm_pointers_ + array, and the number of sizes in the _sizes_ array, if _sizes_ is not + `NULL`. + * _svm_pointers_ is a pointer to an array of pointers. + Each pointer in this array must be within an allocation produced by a call + to {clSVMAlloc}. + * _sizes_ is an array of sizes. + The pair _svm_pointers_[i] and _sizes_[i] together define the starting + address and number of bytes in a range to be migrated. + _sizes_ may be `NULL` indicating that every allocation containing any + _svm_pointer_[i] is to be migrated. + Also, if _sizes_[i] is zero, then the entire allocation containing + _svm_pointer_[i] is migrated. + * _flags_ is a bit-field that is used to specify migration options. + The <> describes the possible + values for _flags_. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -Application is not required to provide values for every specialization -constant contained in the module. If the value is not set by this API -call, default values will be used during the build. +Once the event returned by {clEnqueueSVMMigrateMem} has become {CL_COMPLETE}, +the ranges specified by svm pointers and sizes have been successfully +migrated to the device associated with command-queue. -// refError +The user is responsible for managing the event dependencies associated with +this command in order to avoid overlapping access to SVM allocations. +Improperly specified event dependencies passed to {clEnqueueSVMMigrateMem} +could result in undefined results. -{clSetProgramSpecializationConstant} returns {CL_SUCCESS} if the function is -executed successfully. +// refError +{clEnqueueSVMMigrateMem} returns {CL_SUCCESS} if the function is executed +successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object created - from an intermediate language (e.g. SPIR-V), or if the intermediate - language does not support specialization constants. - * {CL_INVALID_OPERATION} if no devices associated with _program_ support - intermediate language programs. - * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with - {clCreateProgramWithIL} and a compiler is not - available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_INVALID_SPEC_ID} if _spec_id_ is not a valid specialization constant - identifier. - * {CL_INVALID_VALUE} if _spec_size_ does not match the size of the - specialization constant in the module, or if _spec_value_ is + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _num_svm_pointers_ is zero or _svm_pointers_ is `NULL`. + * {CL_INVALID_VALUE} if _sizes_[i] is non-zero range [_svm_pointers_[i], + _svm_pointers_[i]+_sizes_[i]) is not contained within an existing + {clSVMAlloc} allocation. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -5772,1319 +7985,1243 @@ Otherwise, it returns one of the following errors: -- -=== Building Program Executables +=== Memory Consistency for SVM Allocations -[open,refpage='clBuildProgram',desc='Builds (compiles and links) a program executable from the program source or binary.',type='protos'] --- -To build (compile & link) a program executable, call the function +To ensure memory consistency in SVM allocations, the program can rely on the +guaranteed memory consistency at synchronization points. +This consistency support already exists in OpenCL 1.x and can be used for +coarse-grained SVM allocations or for fine-grained buffer SVM allocations; +what SVM adds is the ability to share pointers between the host and all SVM +devices. -include::{generated}/api/protos/clBuildProgram.txt[] -include::{generated}/api/version-notes/clBuildProgram.asciidoc[] +In addition, sub-buffers can also be used to ensure that each device gets a +consistent view of a SVM buffers memory when it is shared by multiple +devices. +For example, assume that two devices share a SVM pointer. +The host can create a {cl_mem_TYPE} buffer object using {clCreateBuffer} or +{clCreateBufferWithProperties} with {CL_MEM_USE_HOST_PTR} and _host_ptr_ set +to the SVM pointer and then create two disjoint sub-buffers with starting +virtual addresses _sb1_ptr_ and _sb2_ptr_. +These pointers (_sb1_ptr_ and _sb2_ptr_) can be passed to kernels executing +on the two devices. +{clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} and the existing +<> ensure +consistency for buffer regions (_sb1_ptr_ and _sb2_ptr_) read and written by +these kernels. - * _program_ is the program object. - * _device_list_ is a pointer to a list of devices associated with _program_. - If _device_list_ is a `NULL` value, the program executable is built for all - devices associated with _program_ for which a source or binary has been - loaded. - If _device_list_ is a non-`NULL` value, the program executable is built for - devices specified in this list for which a source or binary has been loaded. - * _num_devices_ is the number of devices listed in _device_list_. - * _options_ is a pointer to a null-terminated string of characters that - describes the build options to be used for building the program executable. - The list of supported options is described in <>. - If the program was created using {clCreateProgramWithBinary} and _options_ - is a `NULL` pointer, the program will be built as if _options_ were the same - as when the program binary was originally built. - If the program was created using {clCreateProgramWithBinary} and _options_ - string contains anything other than the same options in the same order - (whitespace ignored) as when the program binary was originally built, then - the behavior is implementation-defined. - Otherwise, if _options_ is a `NULL` pointer then it will have the same - result as the empty string. - * _pfn_notify_ is a function pointer to a notification routine. - The notification routine is a callback function that an application can - register and which will be called when the program executable has been built - (successfully or unsuccessfully). - If _pfn_notify_ is not `NULL`, {clBuildProgram} does not need to wait for - the build to complete and can return immediately once the build operation - can begin. - Any state changes of the program object that result from calling - {clBuildProgram} (e.g. build status or log) will be observable from this - callback function. - The build operation can begin if the context, program whose sources are - being compiled and linked, list of devices and build options specified are - all valid and appropriate host and device resources needed to perform the - build are available. - If _pfn_notify_ is `NULL`, {clBuildProgram} does not return until the build - has completed. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - * _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. +When the host and devices are able to use SVM atomic operations (i.e. +{CL_DEVICE_SVM_ATOMICS} is set in {CL_DEVICE_SVM_CAPABILITIES}), these atomic +operations can be used to provide memory consistency at a fine grain in a +shared memory region. +The effect of these operations is visible to the host and all devices with +which that memory is shared. -The program executable is built from the program source or binary for all -the devices, or a specific device(s) in the OpenCL context associated with -_program_. -OpenCL allows program executables to be built using the source or the -binary. -{clBuildProgram} must be called for _program_ created using -{clCreateProgramWithSource}, {clCreateProgramWithIL} or -{clCreateProgramWithBinary} to build the program executable for one or more -devices associated with _program_. -If _program_ is created with {clCreateProgramWithBinary}, then the program -binary must be an executable binary (not a compiled binary or library). -The executable binary can be queried using {clGetProgramInfo}(_program_, -{CL_PROGRAM_BINARIES}, ...) and can be specified to -{clCreateProgramWithBinary} to create a new program object. +== Sampler Objects -// refError +A sampler object describes how to sample an image when the image is read in +the kernel. +The built-in functions to read from an image in a kernel take a sampler as +an argument. +The sampler arguments to the image read function can be sampler objects +created using OpenCL functions and passed as argument values to the kernel +or can be samplers declared inside a kernel. +In this section we discuss how sampler objects are created using OpenCL +functions. -{clBuildProgram} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater - than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not - `NULL`. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in - the list of devices associated with _program_. - * {CL_INVALID_BINARY} if _program_ is created with - {clCreateProgramWithBinary} and devices listed in _device_list_ do not - have a valid program binary loaded. - * {CL_INVALID_BUILD_OPTIONS} if the build options specified by _options_ are - invalid. - * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with - {clCreateProgramWithSource} or {clCreateProgramWithIL} and a compiler is - not available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_BUILD_PROGRAM_FAILURE} if there is a failure to build the program - executable. - This error will be returned if {clBuildProgram} does not return until - the build has completed. - * {CL_INVALID_OPERATION} if the build of a program executable for any of the - devices listed in _device_list_ by a previous call to {clBuildProgram} - for _program_ has not completed. - * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. - * {CL_INVALID_OPERATION} if _program_ was not created with - {clCreateProgramWithSource}, {clCreateProgramWithIL} or - {clCreateProgramWithBinary}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +=== Creating Sampler Objects + +[open,refpage='clCreateSamplerWithProperties',desc='Creates a sampler object.',type='protos'] -- +To create a sampler object, call the function +include::{generated}/api/protos/clCreateSamplerWithProperties.txt[] +include::{generated}/api/version-notes/clCreateSamplerWithProperties.asciidoc[] -=== Separate Compilation and Linking of Programs + * _context_ must be a valid OpenCL context. + * _sampler_properties_ specifies a list of sampler property names and their + corresponding values. + Each sampler property name is immediately followed by the corresponding + desired value. + The list is terminated with 0. + The list of supported properties is described in the + <> table. + If a supported property and its value is not specified in + _sampler_properties_, its default value will be used. + _sampler_properties_ can be `NULL` in which case the default values for + supported sampler properties will be used. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -NOTE: Separate compilation and linking are <> -version 1.2. +[[sampler-properties-table]] +.List of supported sampler creation properties by {clCreateSamplerWithProperties} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Sampler Property | Property Value | Description +| {CL_SAMPLER_NORMALIZED_COORDS_anchor} -OpenCL programs are compiled and linked to support the following: +include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] + | {cl_bool_TYPE} + | A boolean value that specifies whether the image coordinates + specified are normalized or not. - * Separate compilation and link stages. - Program sources can be compiled to generate a compiled binary object and - linked in a separate stage with other compiled program objects to the - program executable. - * Embedded headers. - In OpenCL 1.0 and 1.1, the I build option could be used to specify the - list of directories to be searched for headers files that are included - by a program source(s). - OpenCL 1.2 extends this by allowing the header sources to come from - program objects instead of just header files. - * Libraries. - The linker can be used to link compiled objects and libraries into a - program executable or to create a library of compiled binaries. + The default value (i.e. the value used if this property is not + specified in sampler_properties) is {CL_TRUE}. +| {CL_SAMPLER_ADDRESSING_MODE_anchor} -[open,refpage='clCompileProgram',desc='Compiles a program\'s source for all the devices or a specific device(s) in the OpenCL context associated with a program.',type='protos'] --- -To compile a program's source for all the devices or a specific device(s) in -the OpenCL context associated with the program, call the function +include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] + | {cl_addressing_mode_TYPE} + | Specifies how out-of-range image coordinates are handled when + reading from an image. + Valid values are: -include::{generated}/api/protos/clCompileProgram.txt[] -include::{generated}/api/version-notes/clCompileProgram.asciidoc[] + {CL_ADDRESS_NONE_anchor} - Behavior is undefined for out-of-range + image coordinates. - * _program_ is the program object that is the compilation target. - * _device_list_ is a pointer to a list of devices associated with _program_. - If _device_list_ is a `NULL` value, the compile is performed for all devices - associated with _program_. - If _device_list_ is a non-`NULL` value, the compile is performed for devices - specified in this list. - * _num_devices_ is the number of devices listed in _device_list_. - * _options_ is a pointer to a null-terminated string of characters that - describes the compilation options to be used for building the program - executable. - If _options_ is a `NULL` pointer then it will have the same result as the - empty string. - Certain options are ignored when program is created with IL. - The list of supported options is as described in <>. - * _num_input_headers_ specifies the number of programs that describe headers - in the array referenced by _input_headers_. - * _input_headers_ is an array of program embedded headers created with - {clCreateProgramWithSource}. - * _header_include_names_ is an array that has a one to one correspondence with - _input_headers_. - Each entry in _header_include_names_ specifies the include name used by - source in _program_ that comes from an embedded header. - The corresponding entry in _input_headers_ identifies the program object - which contains the header source to be used. - The embedded headers are first searched before the headers in the list of - directories specified by the `-I` compile option (as described in - <>). - If multiple entries in _header_include_names_ refer to the same header name, - the first one encountered will be used. - * _pfn_notify_ is a function pointer to a notification routine. - The notification routine is a callback function that an application can - register and which will be called when the program executable has been built - (successfully or unsuccessfully). - If _pfn_notify_ is not `NULL`, {clCompileProgram} does not need to wait for - the compiler to complete and can return immediately once the compilation can - begin. - Any state changes of the program object that result from calling - {clCompileProgram} (e.g. compile status or log) will be observable from this - callback function. - The compilation can begin if the context, program whose sources are being - compiled, list of devices, input headers, programs that describe input - headers and compiler options specified are all valid and appropriate host - and device resources needed to perform the compile are available. - If _pfn_notify_ is `NULL`, {clCompileProgram} does not return until the - compiler has completed. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - * _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + {CL_ADDRESS_CLAMP_TO_EDGE_anchor} - Out-of-range image coordinates + are clamped to the edge of the image. -The pre-processor runs before the program sources are compiled. -The compiled binary is built for all devices associated with _program_ or -the list of devices specified. -The compiled binary can be queried using {clGetProgramInfo}(_program_, -{CL_PROGRAM_BINARIES}, ...) and can be passed to {clCreateProgramWithBinary} -to create a new program object. + {CL_ADDRESS_CLAMP_anchor} - Out-of-range image coordinates are + assigned a border color value. -If _program_ was created using {clCreateProgramWithIL}, then -_num_input_headers_, _input_headers_, and _header_include_names_ are -ignored. + {CL_ADDRESS_REPEAT_anchor} - Out-of-range image coordinates read + from the image as if the image data were replicated in all dimensions. -For example, consider the following program source: + {CL_ADDRESS_MIRRORED_REPEAT_anchor} - Out-of-range image coordinates + read from the image as if the image data were replicated in all + dimensions, mirroring the image contents at the edge of each + replication. -[source,opencl_c] ----- -#include -#include -__kernel void -image_filter (int n, int m, - __constant float *filter_weights, - __read_only image2d_t src_image, - __write_only image2d_t dst_image) -{ -... -} ----- + The default is {CL_ADDRESS_CLAMP}. +| {CL_SAMPLER_FILTER_MODE_anchor} -This kernel includes two headers foo.h and mydir/myinc.h. -The following describes how these headers can be passed as embedded headers -in program objects: +include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] + | {cl_filter_mode_TYPE} + | Specifies the type of filter that is applied when reading an + image. + Valid values are: -[source,opencl] ----- -cl_program foo_pg = clCreateProgramWithSource(context, - 1, &foo_header_src, NULL, &err); -cl_program myinc_pg = clCreateProgramWithSource(context, - 1, &myinc_header_src, NULL, &err); + {CL_FILTER_NEAREST_anchor} - Returns the image element nearest + to the image coordinate. -// lets assume the program source described above is given -// by program_A and is loaded via clCreateProgramWithSource -cl_program input_headers[2] = { foo_pg, myinc_pg }; -char * input_header_names[2] = { foo.h, mydir/myinc.h }; -clCompileProgram(program_A, - 0, NULL, // num_devices & device_list - NULL, // compile_options - 2, // num_input_headers - input_headers, - input_header_names, - NULL, NULL); // pfn_notify & user_data ----- + {CL_FILTER_LINEAR_anchor} - Returns a weighted average of the + four image elements nearest to the image coordinate. + + The default value is {CL_FILTER_NEAREST}. +ifdef::cl_khr_mipmap_image[] +| {CL_SAMPLER_MIP_FILTER_MODE_KHR_anchor} + +include::{generated}/api/version-notes/CL_SAMPLER_MIP_FILTER_MODE_KHR.asciidoc[] + | {cl_filter_mode_TYPE} + | Specifies the mipmap filter used when sampling from a mipmapped + image. + The available filter are: + + {CL_FILTER_NEAREST} - Use the nearest mipmap level to the image + coordinate. + + {CL_FILTER_LINEAR} - Use a weighted average of the two mipmap levels + nearest to the image coordinate. + + The default is {CL_FILTER_NEAREST}. +| {CL_SAMPLER_LOD_MIN_KHR_anchor} + +include::{generated}/api/version-notes/CL_SAMPLER_LOD_MIN_KHR.asciidoc[] + | {cl_float_TYPE} + | Specifies the minimum value to which the computed level of detail + _lambda_ is clamped when sampling from a mipmapped image. + + The default is `0.0f`. +| {CL_SAMPLER_LOD_MAX_KHR_anchor} + +include::{generated}/api/version-notes/CL_SAMPLER_LOD_MAX_KHR.asciidoc[] + | {cl_float_TYPE} + | Specifies the maximum value to which the computed level of detail + _lambda_ is clamped when sampling from a mipmapped image. + + The default is `MAXFLOAT`. +endif::cl_khr_mipmap_image[] +|==== + +ifdef::cl_khr_mipmap_image[] +NOTE: When the `<>` extension is supported, the sampler +properties {CL_SAMPLER_MIP_FILTER_MODE_KHR}, {CL_SAMPLER_LOD_MIN_KHR} and +{CL_SAMPLER_LOD_MAX_KHR} cannot be specified with any samplers initialized +in the OpenCL program source. +Only the default values for these properties will be used. +To create a sampler with specific values for these properties, a sampler +object must be created with {clCreateSamplerWithProperties} and passed as an +argument to a kernel. +endif::cl_khr_mipmap_image[] // refError -{clCompileProgram} returns {CL_SUCCESS} if the function is executed +{clCreateSamplerWithProperties} returns a valid non-zero sampler object and +_errcode_ret_ is set to {CL_SUCCESS} if the sampler object is created successfully. -Otherwise, it returns one of the following errors: +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater - than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. - * {CL_INVALID_VALUE} if _num_input_headers_ is zero and - _header_include_names_ or _input_headers_ are not `NULL` or if - _num_input_headers_ is not zero and _header_include_names_ or - _input_headers_ are `NULL`. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not - `NULL`. - * {CL_INVALID_DEVICE} if device in _device_list_ is not in - the list of devices associated with _program_. - * {CL_INVALID_COMPILER_OPTIONS} if the compiler options specified by - _options_ are invalid. - * {CL_INVALID_OPERATION} if the compilation or build of a program executable - for any of the devices listed in _device_list_ by a previous call to - {clCompileProgram} or {clBuildProgram} for _program_ has not completed. - * {CL_COMPILER_NOT_AVAILABLE} if a compiler is not available, i.e. - {CL_DEVICE_COMPILER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_COMPILE_PROGRAM_FAILURE} if there is a failure to compile the program - source. - This error will be returned if {clCompileProgram} does not return until - the compile has completed. - * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. - * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it - has not been created with {clCreateProgramWithSource} or - {clCreateProgramWithIL}. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if the property name in _sampler_properties_ is not a + supported property name, if the value specified for a supported property + name is not valid, or if the same property name is specified more than + once. + * {CL_INVALID_OPERATION} if images are not supported by any device + associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the + <> table is {CL_FALSE}). * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clLinkProgram',desc='Links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates a library or executable.',type='protos'] +[open,refpage='clCreateSampler',desc='Creates a sampler object.',type='protos'] -- -To link a set of compiled program objects and libraries for all the devices -or a specific device(s) in the OpenCL context and create a library or -executable, call the function +To create a sampler object, call the function -include::{generated}/api/protos/clLinkProgram.txt[] -include::{generated}/api/version-notes/clLinkProgram.asciidoc[] +include::{generated}/api/protos/clCreateSampler.txt[] +include::{generated}/api/version-notes/clCreateSampler.asciidoc[] * _context_ must be a valid OpenCL context. - * _device_list_ is a pointer to a list of devices that are in _context_. - If _device_list_ is a `NULL` value, the link is performed for all devices - associated with _context_ for which a compiled object is available. - If _device_list_ is a non-`NULL` value, the link is performed for devices - specified in this list for which a compiled object is available. - * _num_devices_ is the number of devices listed in _device_list_. - * _options_ is a pointer to a null-terminated string of characters that - describes the link options to be used for building the program executable. - The list of supported options is as described in <>. - If the program was created using {clCreateProgramWithBinary} and _options_ - is a `NULL` pointer, the program will be linked as if _options_ were the - same as when the program binary was originally built. - If the program was created using {clCreateProgramWithBinary} and _options_ - string contains anything other than the same options in the same order - (whitespace ignored) as when the program binary was originally built, then - the behavior is implementation-defined. - Otherwise, if _options_ is a `NULL` pointer then it will have the same - result as the empty string. - * _num_input_programs_ specifies the number of programs in array referenced by - _input_programs_. - * _input_programs_ is an array of program objects that are compiled binaries - or libraries that are to be linked to create the program executable. - For each device in _device_list_ or if _device_list_ is `NULL` the list of - devices associated with context, the following cases occur: - ** All programs specified by _input_programs_ contain a compiled binary or - library for the device. - In this case, a link is performed to generate a program executable for - this device. - ** None of the programs contain a compiled binary or library for that - device. - In this case, no link is performed and there will be no program - executable generated for this device. - ** All other cases will return a {CL_INVALID_OPERATION} error. - * _pfn_notify_ is a function pointer to a notification routine. - The notification routine is a callback function that an application can - register and which will be called when the program executable has been built - (successfully or unsuccessfully). - * _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + * _normalized_coords_ has the same interpretation as + {CL_SAMPLER_NORMALIZED_COORDS} in the <>. + * _addressing_mode_ has the same interpretation as + {CL_SAMPLER_ADDRESSING_MODE} in the <>. + * _filter_mode_ has the same interpretation as + {CL_SAMPLER_FILTER_MODE} in the <>. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -If _pfn_notify_ is not `NULL`, {clLinkProgram} does not need to wait for the -linker to complete, and can return immediately once the linking operation can -begin. -Once the linker has completed, the _pfn_notify_ callback function is called -which returns the program object returned by {clLinkProgram}. -Any state changes of the program object that result from calling {clLinkProgram} -(e.g. link status or log) will be observable from this callback function. -This callback function may be called asynchronously by the OpenCL -implementation. -It is the application's responsibility to ensure that the callback function -is thread-safe. +// refError -If _pfn_notify_ is `NULL`, {clLinkProgram} does not return until the linker -has completed. +{clCreateSampler} returns a valid non-zero sampler object and _errcode_ret_ is +set to {CL_SUCCESS} if the sampler object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -{clLinkProgram} creates a new program object which contains the library or -executable. -The library or executable binary can be queried using -{clGetProgramInfo}(_program_, {CL_PROGRAM_BINARIES}, ...) and can be specified -to {clCreateProgramWithBinary} to create a new program object. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _addressing_mode_, _filter_mode_, _normalized_coords_ + or a combination of these arguements are not valid. + * {CL_INVALID_OPERATION} if images are not supported by any device + associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the + <> table is {CL_FALSE}). + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -The devices associated with the returned program object will be the list of -devices specified by _device_list_ or if _device_list_ is `NULL` it will be -the list of devices associated with _context_. +[open,refpage='clRetainSampler',desc='Increments the sampler reference count.',type='protos'] +-- +To retain a sampler object, call the function -The linking operation can begin if the context, list of devices, input -programs and linker options specified are all valid and appropriate host and -device resources needed to perform the link are available. -If the linking operation can begin, {clLinkProgram} returns a valid non-zero -program object. +include::{generated}/api/protos/clRetainSampler.txt[] +include::{generated}/api/version-notes/clRetainSampler.asciidoc[] -// refError + * _sampler_ specifies the sampler to be released. -If _pfn_notify_ is `NULL`, the _errcode_ret_ will be set to {CL_SUCCESS} if -the link operation was successful and {CL_LINK_PROGRAM_FAILURE} if there is a -failure to link the compiled binaries and/or libraries. +The _sampler_ reference count is incremented. +{clCreateSamplerWithProperties} and {clCreateSampler} perform an implicit +retain. -If _pfn_notify_ is not `NULL`, {clLinkProgram} does not have to wait until -the linker to complete and can return {CL_SUCCESS} in _errcode_ret_ if the -linking operation can begin. -The _pfn_notify_ callback function will return a {CL_SUCCESS} or -{CL_LINK_PROGRAM_FAILURE} if the linking operation was successful or not. +// refError -Otherwise {clLinkProgram} returns a `NULL` program object with an -appropriate error in _errcode_ret_. -The application should query the linker status of this program object to -check if the link was successful or not. -The list of errors that can be returned are: +{clRetainSampler} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater - than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. - * {CL_INVALID_VALUE} if _num_input_programs_ is zero and _input_programs_ is - `NULL` or if _num_input_programs_ is zero and _input_programs_ is not - `NULL` or if _num_input_programs_ is not zero and _input_programs_ is - `NULL`. - * {CL_INVALID_PROGRAM} if programs specified in _input_programs_ are not - valid program objects. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not - `NULL`. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in - the list of devices associated with _context_. - * {CL_INVALID_LINKER_OPTIONS} if the linker options specified by _options_ - are invalid. - * {CL_INVALID_OPERATION} if the compilation or build of a program executable - for any of the devices listed in _device_list_ by a previous call to - {clCompileProgram} or {clBuildProgram} for _program_ has not completed. - * {CL_INVALID_OPERATION} if the rules for devices containing compiled - binaries or libraries as described in _input_programs_ argument above - are not followed. - * {CL_LINKER_NOT_AVAILABLE} if a linker is not available, i.e. - {CL_DEVICE_LINKER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_LINK_PROGRAM_FAILURE} if there is a failure to link the compiled - binaries and/or libraries. + * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +[open,refpage='clReleaseSampler',desc='Decrements the sampler reference count.',type='protos'] +-- +To release a sampler object, call the function -[[compiler-options]] -=== Compiler Options - -The compiler options are categorized as pre-processor options, options for -math intrinsics, options that control optimization and miscellaneous -options. -This specification defines a standard set of options that must be supported -by the compiler when building program executables online or offline from -OpenCL C/{cpp} or, where relevant, from an IL. -These may be extended by a set of vendor- or platform-specific options. +include::{generated}/api/protos/clReleaseSampler.txt[] +include::{generated}/api/version-notes/clReleaseSampler.asciidoc[] + * _sampler_ specifies the sampler to be released. -[[preprocessor-options]] -==== Preprocessor options +The _sampler_ reference count is decremented. +The sampler object is deleted after the reference count becomes zero and +commands queued for execution on a command-queue(s) that use _sampler_ have +finished. -These options control the OpenCL C/{cpp} preprocessor which is run on each -program source before actual compilation. -These options are ignored for programs created with IL. +// refError -`-D name` :: - Predefine _name_ as a macro, with definition 1. +{clReleaseSampler} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -`-D name=definition` :: - The contents of _definition_ are tokenized and processed as if they - appeared during translation phase three in a `#define` directive. - In particular, the definition will be truncated by embedded newline - characters. -+ --- -`-D` options are processed in the order they are given in the _options_ -argument to {clBuildProgram} or {clCompileProgram}. -Note that a space is required between the `-D` option and the symbol it -defines, otherwise behavior is implementation-defined. --- + * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -`-I dir` :: - Add the directory _dir_ to the list of directories to be searched for - header files. - _dir_ can optionally be enclosed in double quotes. -+ --- -This option is not portable due to its dependency on host file system and -host operating system. -It is supported for backwards compatibility with previous OpenCL versions. -Developers are encouraged to create and use explicit header objects by means -of {clCompileProgram} followed by {clLinkProgram}. +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainSampler} causes undefined behavior. -- -[[math-intrinsics-options]] -==== Math Intrinsics Options +=== Sampler Object Queries -These options control compiler behavior regarding floating-point arithmetic. -These options trade off between speed and correctness. +[open,refpage='clGetSamplerInfo',desc='Returns information about the sampler object.',type='protos'] +-- +To return information about a sampler object, call the function -`-cl-single-precision-constant` :: - This option forces implicit conversions of double-precision floating-point - literals to single precision. - This option is ignored for programs created with IL. +include::{generated}/api/protos/clGetSamplerInfo.txt[] +include::{generated}/api/version-notes/clGetSamplerInfo.asciidoc[] -`-cl-denorms-are-zero` :: - This option controls how single precision and double precision - denormalized numbers are handled. - If specified as a build option, the single precision denormalized - numbers may be flushed to zero; double precision denormalized numbers - may also be flushed to zero if the optional extension for double - precision is supported. - This is intended to be a performance hint and the OpenCL compiler can - choose not to flush denorms to zero if the device supports single - precision (or double precision) denormalized numbers. -+ --- -This option is ignored for single precision numbers if the device does not -support single precision denormalized numbers i.e. {CL_FP_DENORM} bit is not -set in {CL_DEVICE_SINGLE_FP_CONFIG}. + * _sampler_ specifies the sampler being queried. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetSamplerInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. -This option is ignored for double precision numbers if the device does not -support double precision or if it does support double precision but not -double precision denormalized numbers i.e. {CL_FP_DENORM} bit is not set in -{CL_DEVICE_DOUBLE_FP_CONFIG}. +[[sampler-info-table]] +.List of supported param_names by {clGetSamplerInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Sampler Info | Return Type | Description +| {CL_SAMPLER_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] -This flag only applies for scalar and vector single precision floating-point -variables and computations on these floating-point variables inside a -program. -It does not apply to reading from or writing to image objects. --- +include::{generated}/api/version-notes/CL_SAMPLER_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _sampler_ reference count. +| {CL_SAMPLER_CONTEXT_anchor} -`-cl-fp32-correctly-rounded-divide-sqrt` :: - The `-cl-fp32-correctly-rounded-divide-sqrt` build option to - {clBuildProgram} or {clCompileProgram} allows an application to specify - that single precision floating-point divide (x/y and 1/x) and sqrt used - in the program source are correctly rounded. - If this build option is not specified, the minimum numerical accuracy of - single precision floating-point divide and sqrt are as defined in the - OpenCL C or OpenCL SPIR-V Environment specifications. -+ --- -This build option can only be specified if the -{CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is set in {CL_DEVICE_SINGLE_FP_CONFIG} (as -defined in the <> table) for devices -that the program is being build. -{clBuildProgram} or {clCompileProgram} will fail to compile the program for -a device if the `-cl-fp32-correctly-rounded-divide-sqrt` option is specified -and {CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is not set for the device. +include::{generated}/api/version-notes/CL_SAMPLER_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context specified when the sampler is created. +// Note: This enum is used for two purposes: as a property and for a query. +// We use the property as the anchor. +| {CL_SAMPLER_NORMALIZED_COORDS} -Note: This option is <> version 1.2. --- +include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] + | {cl_bool_TYPE} + | Return the normalized coords value associated with _sampler_. +// Note: This enum is used for two purposes: as a property and for a query. +// We use the property as the anchor. +| {CL_SAMPLER_ADDRESSING_MODE} +include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] + | {cl_addressing_mode_TYPE} + | Return the addressing mode value associated with _sampler_. +// Note: This enum is used for two purposes: as a property and for a query. +// We use the property as the anchor. +| {CL_SAMPLER_FILTER_MODE} -[[optimization-options]] -==== Optimization Options +include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] + | {cl_filter_mode_TYPE} + | Return the filter mode value associated with _sampler_. -These options control various sorts of optimizations. -Turning on optimization flags makes the compiler attempt to improve the -performance and/or code size at the expense of compilation time and possibly -the ability to debug the program. +| {CL_SAMPLER_PROPERTIES_anchor} -`-cl-opt-disable` :: - This option disables all optimizations. - The default is optimizations are enabled. +include::{generated}/api/version-notes/CL_SAMPLER_PROPERTIES.asciidoc[] + | {cl_sampler_properties_TYPE}[] + | Return the properties argument specified in + {clCreateSamplerWithProperties}. -`-cl-strict-aliasing` :: - This option allows the compiler to assume the strictest aliasing rules. -+ --- -Note: This option is <> version 1.1. --- + If the _properties_ argument specified in {clCreateSamplerWithProperties} + used to create _sampler_ was not `NULL`, the implementation must return + the values specified in the properties argument in the same order and + without including additional properties. -`-cl-uniform-work-group-size` :: - This requires that the global work-size be a multiple of the work-group - size specified to {clEnqueueNDRangeKernel}. - Allow optimizations that are made possible by this restriction. -+ --- -Note: This option is <> version 2.0. --- + If _sampler_ was created using {clCreateSampler}, or if the _properties_ + argument specified in {clCreateSamplerWithProperties} was `NULL`, the + implementation must return _param_value_size_ret_ equal to 0, + indicating that there are no properties to be returned. +|==== -`-cl-no-subgroup-ifp` :: - This indicates that kernels in this program do not require sub-groups to - make independent forward progress. - Allows optimizations that are made possible by this restriction. - This option has no effect for devices that do not support independent - forward progress for sub-groups. -+ --- -Note: This option is <> version 2.1. +// refError + +{clGetSamplerInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_SAMPLER} if _sampler_ is a not a valid sampler object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -The following options control compiler behavior regarding floating-point -arithmetic. -These options trade off between performance and correctness and must be -specifically enabled. -These options are not turned on by default since it can result in incorrect -output for programs which depend on an exact implementation of IEEE 754 -rules/specifications for math functions. -`-cl-mad-enable` :: - Allow `a * b + c` to be replaced by a *mad* instruction. - The *mad* instruction may compute `a * b + c` with reduced accuracy - in the embedded profile. - See the OpenCL C or OpenCL SPIR-V Environment specification for accuracy - details. - On some hardware the *mad* instruction may provide better performance - than the expanded computation. +== Program Objects -`-cl-no-signed-zeros` :: - Allow optimizations for floating-point arithmetic that ignore the - signedness of zero. - IEEE 754 arithmetic specifies the distinct behavior of `+0.0` and `-0.0` - values, which then prohibits simplification of expressions such as `x - {plus} 0.0` or `0.0 * x` (even with `-cl-finite-math-only`). - This option implies that the sign of a zero result isn't significant. +An OpenCL program consists of a set of kernels that are identified as +functions declared with the `+__kernel+` qualifier in the program source. +OpenCL programs may also contain auxiliary functions and constant data that +can be used by kernel functions. +The program executable can be generated _online_ or _offline_ by the OpenCL +compiler for the appropriate target device(s). -`-cl-unsafe-math-optimizations` :: - Allow optimizations for floating-point arithmetic that (a) assume that - arguments and results are valid, (b) may violate the IEEE 754 standard, - (c) assume relaxed OpenCL numerical compliance requirements as defined - in the unsafe math optimization section of the OpenCL C or OpenCL SPIR-V - Environment specifications, and (d) may violate edge case behavior in the - OpenCL C or OpenCL SPIR-V Environment specifications. - This option includes the `-cl-no-signed-zeros`, `-cl-mad-enable`, and - `-cl-denorms-are-zero` footnote:[{fn-unsafe-denorms-are-zero}] options. +A program object encapsulates the following information: -`-cl-finite-math-only` :: - Allow optimizations for floating-point arithmetic that assume that - arguments and results are not NaNs, +Inf, -Inf. - This option may violate the OpenCL numerical compliance requirements for - single precision and double precision floating-point, as well as edge - case behavior. + * An associated context. + * A program source or binary. + * The latest successfully built program executable, library or compiled + binary, the list of devices for which the program executable, library or + compiled binary is built, the build options used and a build log. + * The number of kernel objects currently attached. -`-cl-fast-relaxed-math` :: - Sets the optimization options `-cl-finite-math-only` and - `-cl-unsafe-math-optimizations`. - This option causes the preprocessor macro `+__FAST_RELAXED_MATH__+` to - be defined in the OpenCL program. +=== Creating Program Objects -==== Options to Request or Suppress Warnings +[open,refpage='clCreateProgramWithSource',desc='Creates a program object for a context, and loads source code specified by text strings into the program object.',type='protos'] +-- +To creates a program object for a context and load source code into that +object, call the function -Warnings are diagnostic messages that report constructions which are not -inherently erroneous but which are risky or suggest there may have been an -error. -The following language-independent options do not enable specific warnings -but control the kinds of diagnostics produced by the OpenCL compiler. -These options are ignored for programs created with IL. +include::{generated}/api/protos/clCreateProgramWithSource.txt[] +include::{generated}/api/version-notes/clCreateProgramWithSource.asciidoc[] -`-w` :: - Inhibit all warning messages. + * _context_ must be a valid OpenCL context. + * _strings_ is an array of _count_ pointers to optionally null-terminated + character strings that make up the source code. + * _lengths_ argument is an array with the number of chars in each string + (the string length). + If an element in _lengths_ is zero, its accompanying string is + null-terminated. + If _lengths_ is `NULL`, all strings in the _strings_ argument are considered + null-terminated. + Any length value passed in that is greater than zero excludes the null + terminator in its count. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -`-Werror` :: - Make all warnings into errors. +The source code specified by _strings_ will be loaded into the program +object. +The devices associated with the program object are the devices associated +with _context_. +The source code specified by _strings_ is either an OpenCL C program source, +header or implementation-defined source for custom devices that support an +online compiler. +OpenCL {cpp} is not supported as an online-compiled kernel language through +this interface. -[[opencl-c-version]] -==== Options Controlling the OpenCL C version +// refError -The following option controls the version of OpenCL C that the compiler -accepts. -These options are ignored for programs created with IL. +{clCreateProgramWithSource} returns a valid non-zero program object and +_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -`-cl-std=` :: - Determine the OpenCL C language version to use. - A value for this option must be provided. - Valid values are: -+ --- - * `CL1.1`: Support OpenCL C 1.1 language features defined in _section 6_ of - the OpenCL 1.1 specification or in the unified OpenCL C specification. - * `CL1.2`: Support OpenCL C 1.2 language features defined in _section 6_ of - the OpenCL 1.2 specification or in the unified OpenCL C specification. - * `CL2.0`: Support OpenCL C 2.0 language features defined in the OpenCL C 2.0 - specification or in the unified OpenCL C specification. - * `CL3.0`: Support OpenCL C 3.0 language features defined in the unified - OpenCL C specification. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _count_ is zero or if _strings_ or any entry in + _strings_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.1` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.0 and when -{CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.1. +[open,refpage='clCreateProgramWithIL',desc='Creates a program object for a context, and loads the IL into the program object.',type='protos',alias='clCreateProgramWithILKHR'] +-- +To create a program object for a context and load code in an intermediate +language into that object, call the function -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.2` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.1 or earlier -and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.2. +include::{generated}/api/protos/clCreateProgramWithIL.txt[] +include::{generated}/api/version-notes/clCreateProgramWithIL.asciidoc[] -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL2.0` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.2 or earlier -and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 2.0. +ifdef::cl_khr_il_program[] +or the equivalent -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL3.0` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 2.0 or earlier -and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 3.0. +include::{generated}/api/protos/clCreateProgramWithILKHR.txt[] +include::{generated}/api/version-notes/clCreateProgramWithILKHR.asciidoc[] +endif::cl_khr_il_program[] -If the `-cl-std` build option is not specified, the highest OpenCL C 1.x -language version supported by each device is used when compiling the program -for each device. -Applications are required to specify the `-cl-std=CL2.0` build option to -compile or build programs with OpenCL C 2.0 and the `-cl-std=CL3.0` -build option to compile or build programs with OpenCL C 3.0. + * _context_ must be a valid OpenCL context. + * _il_ is a pointer to a block of memory containing SPIR-V or an + implementation-defined intermediate language. + * _length_ is the length of the block pointed to by _il_. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. +The intermediate language pointed to by _il_ and with length in bytes _length_ +will be loaded into the program object. +The devices associated with the program object are the devices associated +with _context_. -==== Options for Querying Kernel Argument Information +// refError -IMPORTANT: Querying for kernel argument information is <> version 1.2. +{clCreateProgramWithIL} returns a valid non-zero program object and +_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -`-cl-kernel-arg-info` :: - This option allows the compiler to store information about the arguments - of a kernel(s) in the program executable. - The argument information stored includes the argument name, its type, - the address space and access qualifiers used. - Refer to description of {clGetKernelArgInfo} on how to query this - information. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_OPERATION} if no devices in _context_ support intermediate + language programs. + * {CL_INVALID_VALUE} if _il_ is `NULL` or if _length_ is zero. + * {CL_INVALID_VALUE} if the _length_-byte block of memory pointed to by + _il_ does not contain well-formed intermediate language input that can + be consumed by the OpenCL runtime. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +[open,refpage='clCreateProgramWithBinary',desc='Creates a program object for a context, and loads binary bits into the program object.',type='protos'] +-- +To create a program object for a context and load binary bits into that +object, call the function -==== Options for debugging your program +include::{generated}/api/protos/clCreateProgramWithBinary.txt[] +include::{generated}/api/version-notes/clCreateProgramWithBinary.asciidoc[] -IMPORTANT: Debugging options are <> version 2.0. + * _context_ must be a valid OpenCL context. + * _device_list_ is a pointer to a list of devices that are in _context_. + _device_list_ must be a non-`NULL` value. + The binaries are loaded for devices specified in this list. + * _num_devices_ is the number of devices listed in _device_list_. + * _lengths_ is an array of the size in bytes of the program binaries to be + loaded for devices specified by _device_list_. + * _binaries_ is an array of pointers to program binaries to be loaded for + devices specified by _device_list_. + For each device given by _device_list_[i], the pointer to the program binary + for that device is given by _binaries_[i] and the length of this + corresponding binary is given by _lengths_[i]. + _lengths_[i] cannot be zero and _binaries_[i] cannot be a `NULL` pointer. + * _binary_status_ returns whether the program binary for each device specified + in _device_list_ was loaded successfully or not. + It is an array of _num_devices_ entries and returns {CL_SUCCESS} in + _binary_status_[i] if binary was successfully loaded for device specified by + _device_list_[i]; otherwise returns {CL_INVALID_VALUE} if _lengths_[i] is zero + or if _binaries_[i] is a `NULL` value or {CL_INVALID_BINARY} in + _binary_status_[i] if program binary is not a valid binary for the specified + device. + If _binary_status_ is `NULL`, it is ignored. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -`-g` :: - This option can currently be used to generate additional errors for the - built-in functions that allow you to enqueue commands on a device (refer - to OpenCL kernel languages specifications). +The devices associated with the program object will be the list of devices +specified by _device_list_. +The list of devices specified by _device_list_ must be devices associated +with _context_. +The program binaries specified by _binaries_ will be loaded into the program +object. +They contain bits that describe one of the following: -[[linker-options]] -=== Linker Options + * a program executable to be run on the device(s) associated with + _context_, + * a compiled program for device(s) associated with _context_, or + * a library of compiled programs for device(s) associated with _context_. -NOTE: Linker options are <> version 1.2. +The program binary can consist of either or both: -This specification defines a standard set of linker options that must be -supported by the OpenCL C compiler when linking compiled programs online or -offline. -These linker options are categorized as library linking options and program -linking options. -These may be extended by a set of vendor- or platform-specific options. + * Device-specific code and/or, + * Implementation-specific intermediate representation (IR) which will be + converted to the device-specific code. +OpenCL allows applications to create a program object using the program +source or binary and build appropriate program executables. +This can be very useful as it allows applications to load program source and +then compile and link to generate a program executable online on its first +instance for appropriate OpenCL devices in the system. +These executables can now be queried and cached by the application. +The cached executables can be read and loaded by the application, which can +help significantly reduce the application initialization time. -==== Library Linking Options +ifdef::cl_khr_spir[] +If the `<>` extension is supported, {clCreateProgramWithBinary} +can be used to load a SPIR binary. +Once a program object has been created from a SPIR binary, {clBuildProgram} +can be called to build a program executable or {clCompileProgram} can be +called to compile the SPIR binary. +endif::cl_khr_spir[] -IMPORTANT: Library linking options are <> version -1.2. +// refError -The following options can be specified when creating a library of compiled -binaries. +{clCreateProgramWithBinary} returns a valid non-zero program object and +_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -`-create-library` :: - Create a library of compiled binaries specified in _input_programs_ - argument to {clLinkProgram}. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in + the list of devices associated with _context_. + * {CL_INVALID_VALUE} if _lengths_ or _binaries_ is `NULL` or if any entry + in _lengths_[i] is zero or _binaries_[i] is `NULL`. + * {CL_INVALID_BINARY} if an invalid program binary was encountered for any + device. + _binary_status_ will return specific status for each device. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -`-enable-link-options` :: - Allows the linker to modify the library behavior based on one or more - link options (described in <>) when this library is linked with a program executable. - This option must be specified with the create-library option. +[open,refpage='clCreateProgramWithBuiltInKernels',desc='Creates a program object for a context, and loads the information related to the built-in kernels into a program object.',type='protos'] +-- +To create a program object for a context and loads the information related +to the built-in kernels into that object, call the function +include::{generated}/api/protos/clCreateProgramWithBuiltInKernels.txt[] +include::{generated}/api/version-notes/clCreateProgramWithBuiltInKernels.asciidoc[] -[[program-linking-options]] -==== Program Linking Options + * _context_ must be a valid OpenCL context. + * _num_devices_ is the number of devices listed in _device_list_. + * _device_list_ is a pointer to a list of devices that are in _context_. + _device_list_ must be a non-`NULL` value. + The built-in kernels are loaded for devices specified in this list. + * _kernel_names_ is a semi-colon separated list of built-in kernel names. -The following options can be specified when linking a program executable. +The devices associated with the program object will be the list of devices +specified by _device_list_. +The list of devices specified by _device_list_ must be devices associated +with _context_. -`-cl-denorms-are-zero` + -`-cl-no-signed-zeros` + -`-cl-unsafe-math-optimizations` + -`-cl-finite-math-only` + -`-cl-fast-relaxed-math` + -`-cl-no-subgroup-ifp` (<> version 2.1) +// refError -The options are described in <> and <>. -The linker may apply these options to all compiled program objects -specified to {clLinkProgram}. -The linker may apply these options only to libraries which were created -with the option `-enable-link-options`. +{clCreateProgramWithBuiltInKernels} returns a valid non-zero program object +and _errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. + * {CL_INVALID_VALUE} if _kernel_names_ is `NULL` or _kernel_names_ contains + a kernel name that is not supported by any of the devices in + _device_list_. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in the list + of devices associated with _context_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -=== Unloading the OpenCL Compiler -[open,refpage='clUnloadPlatformCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler for a platform.',type='protos'] +=== Retaining and Releasing Program Objects + +[open,refpage='clRetainProgram',desc='Increments the program reference count.',type='protos'] -- -To unload an OpenCL compiler for a platform, call the function +To retain a program object, call the function -include::{generated}/api/protos/clUnloadPlatformCompiler.txt[] -include::{generated}/api/version-notes/clUnloadPlatformCompiler.asciidoc[] +include::{generated}/api/protos/clRetainProgram.txt[] +include::{generated}/api/version-notes/clRetainProgram.asciidoc[] - * _platform_ is the platform to unload. + * _program_ is the program object to be retained. -This function allows the implementation to release the resources allocated -by the OpenCL compiler for _platform_. -This is a hint from the application and does not guarantee that the compiler -will not be used in the future or that the compiler will actually be -unloaded by the implementation. -Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after -{clUnloadPlatformCompiler} will reload the compiler, if necessary, to build -the appropriate program executable. +The _program_ reference count is incremented. +All APIs that create a program do an implicit retain. // refError -{clUnloadPlatformCompiler} returns {CL_SUCCESS} if the function is executed +{clRetainProgram} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -[open,refpage='clUnloadCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler.',type='protos'] +[open,refpage='clReleaseProgram',desc='Decrements the program reference count.',type='protos'] -- -Alternatively, if you are not using OpenCL via the ICD loader, you may unload the OpenCL compiler with the function +To release a program object, call the function -include::{generated}/api/protos/clUnloadCompiler.txt[] -include::{generated}/api/version-notes/clUnloadCompiler.asciidoc[] +include::{generated}/api/protos/clReleaseProgram.txt[] +include::{generated}/api/version-notes/clReleaseProgram.asciidoc[] -This function allows the implementation to release the resources allocated -by the OpenCL compiler. -This is a hint from the application and does not guarantee that the compiler -will not be used in the future or that the compiler will actually be -unloaded by the implementation. -Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after -{clUnloadCompiler} will reload the compiler, if necessary, to build -the appropriate program executable. + * _program_ is the program object to be released. -// refError +The _program_ reference count is decremented. +The program object is deleted after all kernel objects associated with +_program_ have been deleted and the _program_ reference count becomes zero. -{clUnloadCompiler} will always return {CL_SUCCESS}. --- +// refError +{clReleaseProgram} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -=== Program Object Queries + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -[open,refpage='clGetProgramInfo',desc='Returns information about the program object.',type='protos'] +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainProgram} causes undefined behavior. -- -To return information about a program object, call the function - -include::{generated}/api/protos/clGetProgramInfo.txt[] -include::{generated}/api/version-notes/clGetProgramInfo.asciidoc[] - * _program_ specifies the program object being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetProgramInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +[open,refpage='clSetProgramReleaseCallback',desc='Registers a destructor callback function with a program object.',type='protos'] +-- +To register a callback function with a program object that is called when +the program object is destroyed, call the function -[[program-info-table]] -.List of supported param_names by {clGetProgramInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Program Info | Return Type | Description -| {CL_PROGRAM_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] +include::{generated}/api/protos/clSetProgramReleaseCallback.txt[] +include::{generated}/api/version-notes/clSetProgramReleaseCallback.asciidoc[] -include::{generated}/api/version-notes/CL_PROGRAM_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _program_ reference count. -| {CL_PROGRAM_CONTEXT_anchor} + * _program_ specifies the memory object to register the callback to. + * _pfn_notify_ is the callback function to register. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + The parameters to this callback function are: + ** _program_ is the program being deleted. + When the callback function is called by the implementation, this program + object is not longer valid. + _program_ is only provided for reference purposes. + ** _user_data_ is a pointer to user supplied data. + * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is + called. + _user_data_ can be `NULL`. -include::{generated}/api/version-notes/CL_PROGRAM_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context specified when the program object is created -| {CL_PROGRAM_NUM_DEVICES_anchor} +Each call to {clSetProgramReleaseCallback} registers the specified +callback function on a callback stack associated with _program_. +The registered callback functions are called in the reverse order in +which they were registered. +The registered callback functions are called after destructors (if any) for +program scope global variables (if any) are called and before the program +object is deleted. +This provides a mechanism for an application to be notified when destructors +for program scope global variables are complete. -include::{generated}/api/version-notes/CL_PROGRAM_NUM_DEVICES.asciidoc[] - | {cl_uint_TYPE} - | Return the number of devices associated with _program_. -| {CL_PROGRAM_DEVICES_anchor} +// refError -include::{generated}/api/version-notes/CL_PROGRAM_DEVICES.asciidoc[] - | {cl_device_id_TYPE}[] - | Return the list of devices associated with the program object. - This can be the devices associated with context on which the program - object has been created or can be a subset of devices that are - specified when a program object is created using - {clCreateProgramWithBinary}. -| {CL_PROGRAM_SOURCE_anchor} +{clSetProgramReleaseCallback} may unconditionally return an error if no +devices in the context associated with _program_ support destructors for +program scope global variables. +Support for constructors and destructors for program scope global variables +is required only for OpenCL 2.2 devices. -include::{generated}/api/version-notes/CL_PROGRAM_SOURCE.asciidoc[] - | {char_TYPE}[] - | Return the program source code specified by - {clCreateProgramWithSource}. - The source string returned is a concatenation of all source strings - specified to {clCreateProgramWithSource} with a null terminator. - The concatenation strips any nulls in the original source strings. +{clSetProgramReleaseCallback} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: - If _program_ is created using {clCreateProgramWithBinary}, - {clCreateProgramWithIL} or {clCreateProgramWithBuiltInKernels}, a - null string or the appropriate program source code is returned - depending on whether or not the program source code is stored in the - binary. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_OPERATION} if no devices in the context associated with + _program_ support destructors for program scope global variables. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- - The actual number of characters that represents the program source - code including the null terminator is returned in - _param_value_size_ret_. -| {CL_PROGRAM_IL_anchor} -include::{generated}/api/version-notes/CL_PROGRAM_IL.asciidoc[] -Also see extension *cl_khr_il_program*. - | {char_TYPE}[] - | Returns the program IL for programs created with - {clCreateProgramWithIL}. +=== Setting SPIR-V Specialization Constants - If _program_ is created with {clCreateProgramWithSource}, - {clCreateProgramWithBinary} or {clCreateProgramWithBuiltInKernels} - the memory pointed to by param_value will be unchanged and - param_value_size_retwill be set to 0. -| {CL_PROGRAM_BINARY_SIZES_anchor} +NOTE: Specialization constants are <> version +2.2. -include::{generated}/api/version-notes/CL_PROGRAM_BINARY_SIZES.asciidoc[] - | {size_t_TYPE}[] - | Returns an array that contains the size in bytes of the program - binary (could be an executable binary, compiled binary or library - binary) for each device associated with program. - The size of the array is the number of devices associated with - program. - If a binary is not available for a device(s), a size of zero is - returned. +[open,refpage='clSetProgramSpecializationConstant',desc='',type='protos'] +-- +To set the value of a specialization constant, call the function - If _program_ is created using {clCreateProgramWithBuiltInKernels}, - the implementation may return zero in any entries of the returned - array. -| {CL_PROGRAM_BINARIES_anchor} +include::{generated}/api/protos/clSetProgramSpecializationConstant.txt[] +include::{generated}/api/version-notes/clSetProgramSpecializationConstant.asciidoc[] -include::{generated}/api/version-notes/CL_PROGRAM_BINARIES.asciidoc[] - | {unsigned_char_TYPE}*[] - | Return the program binaries (could be an executable binary, compiled - binary or library binary) for all devices associated with program. - For each device in program, the binary returned can be the binary - specified for the device when program is created with - {clCreateProgramWithBinary} or it can be the executable binary - generated by {clBuildProgram} or {clLinkProgram}. - If _program_ is created with {clCreateProgramWithSource} or - {clCreateProgramWithIL}, the binary returned is the binary generated - by {clBuildProgram}, {clCompileProgram} or {clLinkProgram}. - The bits returned can be an implementation-specific intermediate - representation (a.k.a. IR) or device specific executable bits or - both. - The decision on which information is returned in the binary is up to - the OpenCL implementation. + * _program_ must be a valid OpenCL program created from an intermediate + language (e.g. SPIR-V). + * _spec_id_ identifies the specialization constant whose value will be + set. + * _spec_size_ specifies the size in bytes of the data pointed to by + _spec_value_. + This should be 1 for boolean constants. + For all other constant types this should match the size of the + specialization constant in the module. + * _spec_value_ is a pointer to the memory location that contains the value of + the specialization constant. + The data pointed to by _spec_value_ are copied and can be safely reused by + the application after {clSetProgramSpecializationConstant} returns. + This specialization value will be used by subsequent calls to + {clBuildProgram} until another call to {clSetProgramSpecializationConstant} + changes it. + If a specialization constant is a boolean constant, _spec_value_ should be a + pointer to a {cl_uchar_TYPE} value. + A value of zero will set the specialization constant to false; any other + value will set it to true. - param_value points to an array of `n` pointers allocated by the - caller, where `n` is the number of devices associated with program. - The buffer sizes needed to allocate the memory that these `n` - pointers refer to can be queried using the {CL_PROGRAM_BINARY_SIZES} - query as described in this table. +Calling this function multiple times for the same specialization constant +shall cause the last provided value to override any previously specified +value. +The values are used by a subsequent {clBuildProgram} call for the _program_. - Each entry in this array is used by the implementation as the - location in memory where to copy the program binary for a specific - device, if there is a binary available. - To find out which device the program binary in the array refers to, - use the {CL_PROGRAM_DEVICES} query to get the list of devices. - There is a one-to-one correspondence between the array of n pointers - returned by {CL_PROGRAM_BINARIES} and array of devices returned by - {CL_PROGRAM_DEVICES}. -| {CL_PROGRAM_NUM_KERNELS_anchor} +Application is not required to provide values for every specialization +constant contained in the module. If the value is not set by this API +call, default values will be used during the build. -include::{generated}/api/version-notes/CL_PROGRAM_NUM_KERNELS.asciidoc[] - | {size_t_TYPE} - | Returns the number of kernels declared in _program_ that can be - created with {clCreateKernel}. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. -| {CL_PROGRAM_KERNEL_NAMES_anchor} +// refError -include::{generated}/api/version-notes/CL_PROGRAM_KERNEL_NAMES.asciidoc[] - | {char_TYPE}[] - | Returns a semi-colon separated list of kernel names in _program_ - that can be created with {clCreateKernel}. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. -| {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT_anchor} +{clSetProgramSpecializationConstant} returns {CL_SUCCESS} if the function is +executed successfully. -include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT.asciidoc[] - | {cl_bool_TYPE} - | This indicates that the _program_ object contains non-trivial - constructor(s) that will be executed by runtime before any kernel - from the program is executed. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. +Otherwise, it returns one of the following errors: - Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally - return {CL_FALSE} if no devices associated with _program_ support - constructors for program scope global variables. - Support for constructors and destructors for program scope global - variables is required only for OpenCL 2.2 devices. -| {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT.asciidoc[] - | {cl_bool_TYPE} - | This indicates that the program object contains non-trivial - destructor(s) that will be executed by runtime when _program_ is - destroyed. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. - - Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally - return {CL_FALSE} if no devices associated with _program_ support - destructors for program scope global variables. - Support for constructors and destructors for program scope global - variables is required only for OpenCL 2.2 devices. -|==== - -// refError - -{clGetProgramInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. - * {CL_INVALID_PROGRAM_EXECUTABLE} if _param_name_ is - {CL_PROGRAM_NUM_KERNELS}, {CL_PROGRAM_KERNEL_NAMES}, - {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT}, or - {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT} and a successful program executable - has not been built for at least one device in the list of devices - associated with _program_. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object created + from an intermediate language (e.g. SPIR-V), or if the intermediate + language does not support specialization constants. + * {CL_INVALID_OPERATION} if no devices associated with _program_ support + intermediate language programs. + * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with + {clCreateProgramWithIL} and a compiler is not + available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_INVALID_SPEC_ID} if _spec_id_ is not a valid specialization constant + identifier. + * {CL_INVALID_VALUE} if _spec_size_ does not match the size of the + specialization constant in the module, or if _spec_value_ is + `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clGetProgramBuildInfo',desc='Returns build information for each device in the program object.',type='protos'] --- -To return build information for each device in the program object, call the -function - -include::{generated}/api/protos/clGetProgramBuildInfo.txt[] -include::{generated}/api/version-notes/clGetProgramBuildInfo.asciidoc[] - - * _program_ specifies the program object being queried. - * _device_ specifies the device for which build information is being queried. - _device_ must be a valid device associated with _program_. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetProgramBuildInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. - -[[program-build-info-table]] -.List of supported param_names by {clGetProgramBuildInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Program Build Info | Return Type | Description -| {CL_PROGRAM_BUILD_STATUS_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_STATUS.asciidoc[] - | {cl_build_status_TYPE} - | Returns the build, compile or link status, whichever was performed - last on the specified _program_ object for _device_. - - This can be one of the following: - - {CL_BUILD_NONE_anchor} - The build status returned if no {clBuildProgram}, - {clCompileProgram} or {clLinkProgram} has been performed on the - specified _program_ object for _device_). - - {CL_BUILD_ERROR_anchor} - The build status returned if {clBuildProgram}, - {clCompileProgram} or {clLinkProgram} - whichever was performed last - on the specified _program_ object for _device_ - generated an error. - - {CL_BUILD_SUCCESS_anchor} - The build status returned if {clBuildProgram}, - {clCompileProgram} or {clLinkProgram} - whichever was performed last - on the specified _program_ object for _device_ - was successful. - - {CL_BUILD_IN_PROGRESS_anchor} - The build status returned if - {clBuildProgram}, {clCompileProgram} or {clLinkProgram} - whichever - was performed last on the specified _program_ object for _device_ - has - not finished. -| {CL_PROGRAM_BUILD_OPTIONS_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_OPTIONS.asciidoc[] - | {char_TYPE}[] - | Return the build, compile or link options specified by the options - argument in {clBuildProgram}, {clCompileProgram} or {clLinkProgram}, - whichever was performed last on the specified _program_ object for - _device_. - If build status of the specified _program_ for _device_ is - {CL_BUILD_NONE}, an empty string is returned. -| {CL_PROGRAM_BUILD_LOG_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_LOG.asciidoc[] - | {char_TYPE}[] - | Return the build, compile or link log for {clBuildProgram}, - {clCompileProgram} or {clLinkProgram}, whichever was performed last - on program for device. - - If build status of the specified _program_ for _device_ is - {CL_BUILD_NONE}, an empty string is returned. -| {CL_PROGRAM_BINARY_TYPE_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BINARY_TYPE.asciidoc[] - | {cl_program_binary_type_TYPE} - | Return the program binary type for device. - This can be one of the following values: +=== Building Program Executables - {CL_PROGRAM_BINARY_TYPE_NONE_anchor} - There is no binary associated - with the specified _program_ object for _device_. +[open,refpage='clBuildProgram',desc='Builds (compiles and links) a program executable from the program source or binary.',type='protos'] +-- +To build (compile & link) a program executable, call the function - {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT_anchor} - A compiled binary is - associated with _device_. - This is the case when the specified _program_ object was created using - {clCreateProgramWithSource} and compiled using {clCompileProgram}, or - when a compiled binary was loaded using {clCreateProgramWithBinary}. +include::{generated}/api/protos/clBuildProgram.txt[] +include::{generated}/api/version-notes/clBuildProgram.asciidoc[] - {CL_PROGRAM_BINARY_TYPE_LIBRARY_anchor} - A library binary is - associated with _device_. - This is the case when the specified _program_ object was linked by - {clLinkProgram} using the `-create-library` link option, or when a - compiled library binary was loaded using {clCreateProgramWithBinary}. + * _program_ is the program object. + * _device_list_ is a pointer to a list of devices associated with _program_. + If _device_list_ is a `NULL` value, the program executable is built for all + devices associated with _program_ for which a source or binary has been + loaded. + If _device_list_ is a non-`NULL` value, the program executable is built for + devices specified in this list for which a source or binary has been loaded. + * _num_devices_ is the number of devices listed in _device_list_. + * _options_ is a pointer to a null-terminated string of characters that + describes the build options to be used for building the program executable. + The list of supported options is described in <>. + If the program was created using {clCreateProgramWithBinary} and _options_ + is a `NULL` pointer, the program will be built as if _options_ were the same + as when the program binary was originally built. + If the program was created using {clCreateProgramWithBinary} and _options_ + string contains anything other than the same options in the same order + (whitespace ignored) as when the program binary was originally built, then + the behavior is implementation-defined. + Otherwise, if _options_ is a `NULL` pointer then it will have the same + result as the empty string. + * _pfn_notify_ is a function pointer to a notification routine. + The notification routine is a callback function that an application can + register and which will be called when the program executable has been built + (successfully or unsuccessfully). + If _pfn_notify_ is not `NULL`, {clBuildProgram} does not need to wait for + the build to complete and can return immediately once the build operation + can begin. + Any state changes of the program object that result from calling + {clBuildProgram} (e.g. build status or log) will be observable from this + callback function. + The build operation can begin if the context, program whose sources are + being compiled and linked, list of devices and build options specified are + all valid and appropriate host and device resources needed to perform the + build are available. + If _pfn_notify_ is `NULL`, {clBuildProgram} does not return until the build + has completed. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. - {CL_PROGRAM_BINARY_TYPE_EXECUTABLE_anchor} - An executable binary is - associated with _device_. - This is the case when the specified _program_ object was linked by - {clLinkProgram} without the `-create-library` link option, or when an - executable binary was built using {clBuildProgram}. -| {CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE_anchor} +The program executable is built from the program source or binary for all +the devices, or a specific device(s) in the OpenCL context associated with +_program_. +OpenCL allows program executables to be built using the source or the +binary. +{clBuildProgram} must be called for _program_ created using +{clCreateProgramWithSource}, {clCreateProgramWithIL} or +{clCreateProgramWithBinary} to build the program executable for one or more +devices associated with _program_. +If _program_ is created with {clCreateProgramWithBinary}, then the program +binary must be an executable binary (not a compiled binary or library). -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE.asciidoc[] - | {size_t_TYPE} - | The total amount of storage, in bytes, used by program variables in - the global address space. -|==== +The executable binary can be queried using {clGetProgramInfo}(_program_, +{CL_PROGRAM_BINARIES}, ...) and can be specified to +{clCreateProgramWithBinary} to create a new program object. // refError -{clGetProgramBuildInfo} returns {CL_SUCCESS} if the function is executed +{clBuildProgram} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _program_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater + than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not + `NULL`. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in + the list of devices associated with _program_. + * {CL_INVALID_BINARY} if _program_ is created with + {clCreateProgramWithBinary} and devices listed in _device_list_ do not + have a valid program binary loaded. + * {CL_INVALID_BUILD_OPTIONS} if the build options specified by _options_ are + invalid. + * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with +ifdef::cl_khr_il_program[{clCreateProgramWithILKHR},] + {clCreateProgramWithSource} or {clCreateProgramWithIL} and a compiler is + not available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_BUILD_PROGRAM_FAILURE} if there is a failure to build the program + executable. + This error will be returned if {clBuildProgram} does not return until + the build has completed. + * {CL_INVALID_OPERATION} if the build of a program executable for any of the + devices listed in _device_list_ by a previous call to {clBuildProgram} + for _program_ has not completed. + * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. + * {CL_INVALID_OPERATION} if _program_ was not created with + {clCreateProgramWithSource}, {clCreateProgramWithIL} or + {clCreateProgramWithBinary}. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- -[NOTE] -==== -A program binary (compiled binary, library binary or executable binary) -built for a parent device can be used by all its sub-devices. -If a program binary has not been built for a sub-device, the program binary -associated with the parent device will be used. - -A program binary for a device specified with {clCreateProgramWithBinary} or -queried using {clGetProgramInfo} can be used as the binary for the -associated root device, and all sub-devices created from the root-level -device or sub-devices thereof. -==== --- +=== Separate Compilation and Linking of Programs -== Kernel Objects - -A kernel is a function declared in a program. -A kernel is identified by the `+__kernel+` qualifier applied to any function -in a program. -A kernel object encapsulates the specific `+__kernel+` function declared in -a program and the argument values to be used when executing this -`+__kernel+` function. +NOTE: Separate compilation and linking are <> +version 1.2. +OpenCL programs are compiled and linked to support the following: -=== Creating Kernel Objects + * Separate compilation and link stages. + Program sources can be compiled to generate a compiled binary object and + linked in a separate stage with other compiled program objects to the + program executable. + * Embedded headers. + In OpenCL 1.0 and 1.1, the I build option could be used to specify the + list of directories to be searched for headers files that are included + by a program source(s). + OpenCL 1.2 extends this by allowing the header sources to come from + program objects instead of just header files. + * Libraries. + The linker can be used to link compiled objects and libraries into a + program executable or to create a library of compiled binaries. -[open,refpage='clCreateKernel',desc='Creates a kernel object.',type='protos'] +[open,refpage='clCompileProgram',desc='Compiles a program\'s source for all the devices or a specific device(s) in the OpenCL context associated with a program.',type='protos'] -- -To create a kernel object, use the function - -include::{generated}/api/protos/clCreateKernel.txt[] -include::{generated}/api/version-notes/clCreateKernel.asciidoc[] +To compile a program's source for all the devices or a specific device(s) in +the OpenCL context associated with the program, call the function - * _program_ is a program object with a successfully built executable. - * _kernel_name_ is a function name in the program declared with the - `+__kernel+` qualifier. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +include::{generated}/api/protos/clCompileProgram.txt[] +include::{generated}/api/version-notes/clCompileProgram.asciidoc[] -// refError + * _program_ is the program object that is the compilation target. + * _device_list_ is a pointer to a list of devices associated with _program_. + If _device_list_ is a `NULL` value, the compile is performed for all devices + associated with _program_. + If _device_list_ is a non-`NULL` value, the compile is performed for devices + specified in this list. + * _num_devices_ is the number of devices listed in _device_list_. + * _options_ is a pointer to a null-terminated string of characters that + describes the compilation options to be used for building the program + executable. + If _options_ is a `NULL` pointer then it will have the same result as the + empty string. + Certain options are ignored when _program_ is created with IL. + The list of supported options is as described in <>. + * _num_input_headers_ specifies the number of programs that describe headers + in the array referenced by _input_headers_. + * _input_headers_ is an array of program embedded headers created with + {clCreateProgramWithSource}. + * _header_include_names_ is an array that has a one to one correspondence with + _input_headers_. + Each entry in _header_include_names_ specifies the include name used by + source in _program_ that comes from an embedded header. + The corresponding entry in _input_headers_ identifies the program object + which contains the header source to be used. + The embedded headers are first searched before the headers in the list of + directories specified by the `-I` compile option (as described in + <>). + If multiple entries in _header_include_names_ refer to the same header name, + the first one encountered will be used. + * _pfn_notify_ is a function pointer to a notification routine. + The notification routine is a callback function that an application can + register and which will be called when the program executable has been built + (successfully or unsuccessfully). + If _pfn_notify_ is not `NULL`, {clCompileProgram} does not need to wait for + the compiler to complete and can return immediately once the compilation can + begin. + Any state changes of the program object that result from calling + {clCompileProgram} (e.g. compile status or log) will be observable from this + callback function. + The compilation can begin if the context, program whose sources are being + compiled, list of devices, input headers, programs that describe input + headers and compiler options specified are all valid and appropriate host + and device resources needed to perform the compile are available. + If _pfn_notify_ is `NULL`, {clCompileProgram} does not return until the + compiler has completed. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. -{clCreateKernel} returns a valid non-zero kernel object and _errcode_ret_ is -set to {CL_SUCCESS} if the kernel object is created successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +The pre-processor runs before the program sources are compiled. +The compiled binary is built for all devices associated with _program_ or +the list of devices specified. +The compiled binary can be queried using {clGetProgramInfo}(_program_, +{CL_PROGRAM_BINARIES}, ...) and can be passed to {clCreateProgramWithBinary} +to create a new program object. - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built - executable for _program_. - * {CL_INVALID_KERNEL_NAME} if _kernel_name_ is not found in _program_. - * {CL_INVALID_KERNEL_DEFINITION} if the function definition for `+__kernel+` - function given by _kernel_name_ such as the number of arguments, the - argument types are not the same for all devices for which the _program_ - executable has been built. - * {CL_INVALID_VALUE} if _kernel_name_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +If _program_ was created using {clCreateProgramWithIL}, then +_num_input_headers_, _input_headers_, and _header_include_names_ are +ignored. -[open,refpage='clCreateKernelsInProgram',desc='Creates kernel objects for all kernel functions in a program object.',type='protos'] --- -To create kernel objects for all kernel functions in a program, -call the function +For example, consider the following program source: -include::{generated}/api/protos/clCreateKernelsInProgram.txt[] -include::{generated}/api/version-notes/clCreateKernelsInProgram.asciidoc[] +[source,opencl_c] +---- +#include +#include +__kernel void +image_filter (int n, int m, + __constant float *filter_weights, + __read_only image2d_t src_image, + __write_only image2d_t dst_image) +{ +... +} +---- - * _program_ is a program object with a successfully built executable. - * _num_kernels_ is the size of memory pointed to by _kernels_ specified as the - number of {cl_kernel_TYPE} entries. - * _kernels_ is the buffer where the kernel objects for kernels in _program_ - will be returned. - If _kernels_ is `NULL`, it is ignored. - If _kernels_ is not `NULL`, _num_kernels_ must be greater than or equal to - the number of kernels in _program_. - * _num_kernels_ret_ is the number of kernels in _program_. - If _num_kernels_ret_ is `NULL`, it is ignored. +This kernel includes two headers foo.h and mydir/myinc.h. +The following describes how these headers can be passed as embedded headers +in program objects: -Kernel objects are not created for any `+__kernel+` functions in _program_ -that do not have the same function definition across all devices for which a -program executable has been successfully built. +[source,opencl] +---- +cl_program foo_pg = clCreateProgramWithSource(context, + 1, &foo_header_src, NULL, &err); +cl_program myinc_pg = clCreateProgramWithSource(context, + 1, &myinc_header_src, NULL, &err); -Kernel objects can only be created once you have a program object with a -valid program source or binary loaded into the program object and the -program executable has been successfully built for one or more devices -associated with program. -No changes to the program executable are allowed while there are kernel -objects associated with a program object. -This means that calls to {clBuildProgram} and {clCompileProgram} return -{CL_INVALID_OPERATION} if there are kernel objects attached to a program -object. -The OpenCL context associated with _program_ will be the context associated -with _kernel_. -The list of devices associated with _program_ are the devices associated -with _kernel_. -Devices associated with a program object for which a valid program -executable has been built can be used to execute kernels declared in the -program object. +// lets assume the program source described above is given +// by program_A and is loaded via clCreateProgramWithSource +cl_program input_headers[2] = { foo_pg, myinc_pg }; +char * input_header_names[2] = { foo.h, mydir/myinc.h }; +clCompileProgram(program_A, + 0, NULL, // num_devices & device_list + NULL, // compile_options + 2, // num_input_headers + input_headers, + input_header_names, + NULL, NULL); // pfn_notify & user_data +---- // refError -{clCreateKernelsInProgram} will return {CL_SUCCESS} if the kernel objects were -successfully allocated. +{clCompileProgram} returns {CL_SUCCESS} if the function is executed +successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built - executable for any device in _program_. - * {CL_INVALID_VALUE} if _kernels_ is not `NULL` and _num_kernels_ is less - than the number of kernels in _program_. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater + than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. + * {CL_INVALID_VALUE} if _num_input_headers_ is zero and + _header_include_names_ or _input_headers_ are not `NULL` or if + _num_input_headers_ is not zero and _header_include_names_ or + _input_headers_ are `NULL`. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not + `NULL`. + * {CL_INVALID_DEVICE} if device in _device_list_ is not in + the list of devices associated with _program_. + * {CL_INVALID_COMPILER_OPTIONS} if the compiler options specified by + _options_ are invalid. + * {CL_INVALID_OPERATION} if the compilation or build of a program executable + for any of the devices listed in _device_list_ by a previous call to + {clCompileProgram} or {clBuildProgram} for _program_ has not completed. + * {CL_COMPILER_NOT_AVAILABLE} if a compiler is not available, i.e. + {CL_DEVICE_COMPILER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_COMPILE_PROGRAM_FAILURE} if there is a failure to compile the program + source. + This error will be returned if {clCompileProgram} does not return until + the compile has completed. + * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. + * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it + has not been created with one of + ** {clCreateProgramWithIL} +ifdef::cl_khr_il_program[or {clCreateProgramWithILKHR}] +ifdef::cl_khr_spir[] + ** {clCreateProgramWithBinary} where `-x spir` is present in _options_, + if the `<>` extension is supported. +endif::cl_khr_spir[] + ** {clCreateProgramWithSource} * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -- -[open,refpage='clRetainKernel',desc='Increments the kernel object reference count.',type='protos'] +[open,refpage='clLinkProgram',desc='Links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates a library or executable.',type='protos'] -- -To retain a kernel object, call the function - -include::{generated}/api/protos/clRetainKernel.txt[] -include::{generated}/api/version-notes/clRetainKernel.asciidoc[] - - * _kernel_ is the kernel object to be retained. - -The _kernel_ reference count is incremented. - -// refError +To link a set of compiled program objects and libraries for all the devices +or a specific device(s) in the OpenCL context and create a library or +executable, call the function -{clRetainKernel} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +include::{generated}/api/protos/clLinkProgram.txt[] +include::{generated}/api/version-notes/clLinkProgram.asciidoc[] - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -{clCreateKernel} or {clCreateKernelsInProgram} do an implicit retain. --- + * _context_ must be a valid OpenCL context. + * _device_list_ is a pointer to a list of devices that are in _context_. + If _device_list_ is a `NULL` value, the link is performed for all devices + associated with _context_ for which a compiled object is available. + If _device_list_ is a non-`NULL` value, the link is performed for devices + specified in this list for which a compiled object is available. + * _num_devices_ is the number of devices listed in _device_list_. + * _options_ is a pointer to a null-terminated string of characters that + describes the link options to be used for building the program executable. + The list of supported options is as described in <>. + If the program was created using {clCreateProgramWithBinary} and _options_ + is a `NULL` pointer, the program will be linked as if _options_ were the + same as when the program binary was originally built. + If the program was created using {clCreateProgramWithBinary} and _options_ + string contains anything other than the same options in the same order + (whitespace ignored) as when the program binary was originally built, then + the behavior is implementation-defined. + Otherwise, if _options_ is a `NULL` pointer then it will have the same + result as the empty string. + * _num_input_programs_ specifies the number of programs in array referenced by + _input_programs_. + * _input_programs_ is an array of program objects that are compiled binaries + or libraries that are to be linked to create the program executable. + For each device in _device_list_ or if _device_list_ is `NULL` the list of + devices associated with context, the following cases occur: + ** All programs specified by _input_programs_ contain a compiled binary or + library for the device. + In this case, a link is performed to generate a program executable for + this device. + ** None of the programs contain a compiled binary or library for that + device. + In this case, no link is performed and there will be no program + executable generated for this device. + ** All other cases will return a {CL_INVALID_OPERATION} error. + * _pfn_notify_ is a function pointer to a notification routine. + The notification routine is a callback function that an application can + register and which will be called when the program executable has been built + (successfully or unsuccessfully). + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. -[open,refpage='clReleaseKernel',desc='Decrements the kernel reference count.',type='protos'] --- -To release a kernel object, call the function +If _pfn_notify_ is not `NULL`, {clLinkProgram} does not need to wait for the +linker to complete, and can return immediately once the linking operation can +begin. +Once the linker has completed, the _pfn_notify_ callback function is called +which returns the program object returned by {clLinkProgram}. +Any state changes of the program object that result from calling {clLinkProgram} +(e.g. link status or log) will be observable from this callback function. +This callback function may be called asynchronously by the OpenCL +implementation. +It is the application's responsibility to ensure that the callback function +is thread-safe. -include::{generated}/api/protos/clReleaseKernel.txt[] -include::{generated}/api/version-notes/clReleaseKernel.asciidoc[] +If _pfn_notify_ is `NULL`, {clLinkProgram} does not return until the linker +has completed. - * _kernel_ is the kernel object to be released. +{clLinkProgram} creates a new program object which contains the library or +executable. +The library or executable binary can be queried using +{clGetProgramInfo}(_program_, {CL_PROGRAM_BINARIES}, ...) and can be specified +to {clCreateProgramWithBinary} to create a new program object. -The _kernel_ reference count is decremented. +The devices associated with the returned program object will be the list of +devices specified by _device_list_ or if _device_list_ is `NULL` it will be +the list of devices associated with _context_. -The kernel object is deleted once the number of instances that are retained -to _kernel_ become zero and the kernel object is no longer needed by any -enqueued commands that use _kernel_. -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainKernel} causes undefined behavior. +The linking operation can begin if the context, list of devices, input +programs and linker options specified are all valid and appropriate host and +device resources needed to perform the link are available. +If the linking operation can begin, {clLinkProgram} returns a valid non-zero +program object. // refError -{clReleaseKernel} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +If _pfn_notify_ is `NULL`, _errcode_ret_ will be set to {CL_SUCCESS} if +the link operation was successful and {CL_LINK_PROGRAM_FAILURE} if there is a +failure to link the compiled binaries and/or libraries. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. +If _pfn_notify_ is not `NULL`, {clLinkProgram} does not have to wait until +the linker to complete and can return {CL_SUCCESS} in _errcode_ret_ if the +linking operation can begin. +The _pfn_notify_ callback function will return a {CL_SUCCESS} or +{CL_LINK_PROGRAM_FAILURE} if the linking operation was successful or not. + +Otherwise {clLinkProgram} returns a `NULL` program object with an +appropriate error in _errcode_ret_. +The application should query the linker status of this program object to +check if the link was successful or not. +The list of errors that can be returned are: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater + than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. + * {CL_INVALID_VALUE} if _num_input_programs_ is zero and _input_programs_ is + `NULL` or if _num_input_programs_ is zero and _input_programs_ is not + `NULL` or if _num_input_programs_ is not zero and _input_programs_ is + `NULL`. + * {CL_INVALID_PROGRAM} if programs specified in _input_programs_ are not + valid program objects. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not + `NULL`. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in + the list of devices associated with _context_. + * {CL_INVALID_LINKER_OPTIONS} if the linker options specified by _options_ + are invalid. + * {CL_INVALID_OPERATION} if the compilation or build of a program executable + for any of the devices listed in _device_list_ by a previous call to + {clCompileProgram} or {clBuildProgram} for _program_ has not completed. + * {CL_INVALID_OPERATION} if the rules for devices containing compiled + binaries or libraries as described in _input_programs_ argument above + are not followed. + * {CL_LINKER_NOT_AVAILABLE} if a linker is not available, i.e. + {CL_DEVICE_LINKER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_LINK_PROGRAM_FAILURE} if there is a failure to link the compiled + binaries and/or libraries. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -7092,2394 +9229,7039 @@ Otherwise, it returns one of the following errors: -- -=== Setting Kernel Arguments +[[compiler-options]] +=== Compiler Options -To execute a kernel, the kernel arguments must be set. +The compiler options are categorized as pre-processor options, options for +math intrinsics, options that control optimization and miscellaneous +options. +This specification defines a standard set of options that must be supported +by the compiler when building program executables online or offline from +OpenCL C/{cpp} or, where relevant, from an IL. +These may be extended by a set of vendor- or platform-specific options. -[open,refpage='clSetKernelArg',desc='Set the argument value for a specific argument of a kernel.',type='protos'] --- -To set the argument value for a specific argument of a kernel, call the -function -include::{generated}/api/protos/clSetKernelArg.txt[] -include::{generated}/api/version-notes/clSetKernelArg.asciidoc[] +[[preprocessor-options]] +==== Preprocessor Options - * _kernel_ is a valid kernel object. - * _arg_index_ is the argument index. - Arguments to the kernel are referred by indices that go from 0 for the - leftmost argument to _n_ - 1, where _n_ is the total number of arguments - declared by a kernel (see below). - * _arg_size_ specifies the size of the argument value. - If the argument is a memory object, the _arg_size_ value must be equal to - `sizeof({cl_mem_TYPE})`. - For arguments declared with the `local` qualifier, the size specified will - be the size in bytes of the buffer that must be allocated for the `local` - argument. - If the argument is of type _sampler_t_, the _arg_size_ value must be equal - to `sizeof({cl_sampler_TYPE})`. - If the argument is of type _queue_t_, the _arg_size_ value must be equal to - `sizeof({cl_command_queue_TYPE})`. - For all other arguments, the size will be the size of argument type. - * _arg_value_ is a pointer to data that should be used as the argument value - for argument specified by _arg_index_. - The argument data pointed to by _arg_value_ is copied and the _arg_value_ - pointer can therefore be reused by the application after {clSetKernelArg} - returns. - The argument value specified is the value used by all API calls that enqueue - _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument - value is changed by a call to {clSetKernelArg} for _kernel_. +These options control the OpenCL C/{cpp} preprocessor which is run on each +program source before actual compilation. +These options are ignored for programs created with IL. -For example, consider the following kernel: +`-D name` :: + Predefine _name_ as a macro, with definition 1. -[source,opencl_c] ----- -kernel void image_filter (int n, - int m, - constant float *filter_weights, - read_only image2d_t src_image, - write_only image2d_t dst_image) -{ -... -} ----- +`-D name=definition` :: + The contents of _definition_ are tokenized and processed as if they + appeared during translation phase three in a `#define` directive. + In particular, the definition will be truncated by embedded newline + characters. ++ +-- +`-D` options are processed in the order they are given in the _options_ +argument to {clBuildProgram} or {clCompileProgram}. +Note that a space is required between the `-D` option and the symbol it +defines, otherwise behavior is implementation-defined. +-- -Argument index values for `image_filter` will be 0 for `n`, 1 for `m`, 2 for -`filter_weights`, 3 for `src_image` and 4 for `dst_image`. +`-I dir` :: + Add the directory _dir_ to the list of directories to be searched for + header files. + _dir_ can optionally be enclosed in double quotes. ++ +-- +This option is not portable due to its dependency on host file system and +host operating system. +It is supported for backwards compatibility with previous OpenCL versions. +Developers are encouraged to create and use explicit header objects by means +of {clCompileProgram} followed by {clLinkProgram}. +-- -If the argument is a memory object (buffer, pipe, image or image array), the -_arg_value_ entry will be a pointer to the appropriate buffer, pipe, image -or image array object. -The memory object must be created with the context associated with the -kernel object. -If the argument is a buffer object, the _arg_value_ pointer can be `NULL` or -point to a `NULL` value in which case a `NULL` value will be used as the -value for the argument declared as a pointer to `global` or `constant` -memory in the kernel. -If the argument is declared with the `local` qualifier, the _arg_value_ -entry must be `NULL`. -If the argument is of type _sampler_t_, the _arg_value_ entry must be a -pointer to the sampler object. -If the argument is of type _queue_t_, the _arg_value_ entry must be a -pointer to the device queue object. -If the argument is declared to be a pointer of a built-in scalar or vector -type, or a user defined structure type in the global or constant address -space, the memory object specified as argument value must be a buffer object -(or `NULL`). -If the argument is declared with the `constant` qualifier, the size in bytes -of the memory object cannot exceed {CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE} and -the number of arguments declared as pointers to `constant` memory cannot -exceed {CL_DEVICE_MAX_CONSTANT_ARGS}. +[[math-intrinsics-options]] +==== Math Intrinsics Options -The memory object specified as argument value must be a pipe object if the -argument is declared with the _pipe_ qualifier. +These options control compiler behavior regarding floating-point arithmetic. +These options trade off between speed and correctness. -The memory object specified as argument value must be a 2D image object if -the argument is declared to be of type _image2d_t_. -The memory object specified as argument value must be a 2D image object with -image channel order = {CL_DEPTH} if the argument is declared to be of type -_image2d_depth_t_. -The memory object specified as argument value must be a 3D image object if -argument is declared to be of type _image3d_t_. -The memory object specified as argument value must be a 1D image object if -the argument is declared to be of type _image1d_t_. -The memory object specified as argument value must be a 1D image buffer -object if the argument is declared to be of type _image1d_buffer_t_. -The memory object specified as argument value must be a 1D image array -object if argument is declared to be of type _image1d_array_t_. -The memory object specified as argument value must be a 2D image array -object if argument is declared to be of type _image2d_array_t_. -The memory object specified as argument value must be a 2D image array -object with image channel order = {CL_DEPTH} if argument is declared to be of -type _image2d_array_depth_t_. - -For all other kernel arguments, the _arg_value_ entry must be a pointer to -the actual data to be used as argument value. - -[NOTE] -==== -A kernel object does not update the reference count for objects such as -memory or sampler objects specified as argument values by {clSetKernelArg}. -Users may not rely on a kernel object to retain objects specified as -argument values to the kernel. - -Implementations shall not allow {cl_kernel_TYPE} objects to hold reference -counts to {cl_kernel_TYPE} arguments, because no mechanism is provided for the -user to tell the kernel to release that ownership right. -If the kernel holds ownership rights on kernel args, that would make it -impossible for users to tell with certainty when they may safely -release user allocated resources associated with OpenCL objects such as -the {cl_mem_TYPE} backing store used with {CL_MEM_USE_HOST_PTR}. -==== - -// refError +`-cl-single-precision-constant` :: + This option forces implicit conversions of double-precision floating-point + literals to single precision. + This option is ignored for programs created with IL. -{clSetKernelArg} returns {CL_SUCCESS} if the function was executed -successfully. -Otherwise, it returns one of the following errors: +`-cl-denorms-are-zero` :: + This option controls how single precision and double precision + denormalized numbers are handled. + If specified as a build option, the single precision denormalized + numbers may be flushed to zero; double precision denormalized numbers + may also be flushed to zero if the optional extension for double + precision is supported. + This is intended to be a performance hint and the OpenCL compiler can + choose not to flush denorms to zero if the device supports single + precision (or double precision) denormalized numbers. ++ +-- +This option is ignored for single precision numbers if the device does not +support single precision denormalized numbers i.e. {CL_FP_DENORM} bit is not +set in {CL_DEVICE_SINGLE_FP_CONFIG}. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. - * {CL_INVALID_MEM_OBJECT} for an argument declared to be a memory object - when the specified _arg_value_ is not a valid memory object. - * {CL_INVALID_SAMPLER} for an argument declared to be of type _sampler_t_ - when the specified _arg_value_ is not a valid sampler object. - * {CL_INVALID_DEVICE_QUEUE} for an argument declared to be of type _queue_t_ - when the specified _arg_value_ is not a valid device queue object. - This error code is <> version 2.0. - * {CL_INVALID_ARG_SIZE} if _arg_size_ does not match the size of the data - type for an argument that is not a memory object or if the argument is a - memory object and _arg_size_ != `sizeof({cl_mem_TYPE})` or if _arg_size_ is - zero and the argument is declared with the local qualifier or if the - argument is a sampler and _arg_size_ != `sizeof({cl_sampler_TYPE})`. - * {CL_MAX_SIZE_RESTRICTION_EXCEEDED} if the size in bytes of the memory - object (if the argument is a memory object) or _arg_size_ (if the - argument is declared with `local` qualifier) exceeds a language- - specified maximum size restriction for this argument, such as the - *MaxByteOffset* SPIR-V decoration. - This error code is <> version 2.2. - * {CL_INVALID_ARG_VALUE} if the argument is an image declared with the - `read_only` qualifier and _arg_value_ refers to an image object created - with _cl_mem_flags_ of {CL_MEM_WRITE_ONLY} or if the image argument is - declared with the `write_only` qualifier and _arg_value_ refers to an - image object created with _cl_mem_flags_ of {CL_MEM_READ_ONLY}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +This option is ignored for double precision numbers if the device does not +support double precision or if it does support double precision but not +double precision denormalized numbers i.e. {CL_FP_DENORM} bit is not set in +{CL_DEVICE_DOUBLE_FP_CONFIG}. -When {clSetKernelArg} returns an error code different from {CL_SUCCESS}, the -internal state of _kernel_ may only be modified when that error code is -{CL_OUT_OF_RESOURCES} or {CL_OUT_OF_HOST_MEMORY}. When the internal state -of _kernel_ is modified, it is implementation-defined whether: +This flag only applies for scalar and vector single precision floating-point +variables and computations on these floating-point variables inside a +program. +It does not apply to reading from or writing to image objects. +-- - * The argument value that was previously set is kept so that it can be used in - further kernel enqueues. - * The argument value is unset such that a subsequent kernel enqueue fails with - {CL_INVALID_KERNEL_ARGS}. footnote:[{fn-setkernelarg-prefer-unset-on-error}] +`-cl-fp32-correctly-rounded-divide-sqrt` :: + The `-cl-fp32-correctly-rounded-divide-sqrt` build option to + {clBuildProgram} or {clCompileProgram} allows an application to specify + that single precision floating-point divide (x/y and 1/x) and sqrt used + in the program source are correctly rounded. + If this build option is not specified, the minimum numerical accuracy of + single precision floating-point divide and sqrt are as defined in the + OpenCL C or OpenCL SPIR-V Environment specifications. ++ -- +This build option can only be specified if the +{CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is set in {CL_DEVICE_SINGLE_FP_CONFIG} (as +defined in the <> table) for devices +that the program is being build. +{clBuildProgram} or {clCompileProgram} will fail to compile the program for +a device if the `-cl-fp32-correctly-rounded-divide-sqrt` option is specified +and {CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is not set for the device. -[open,refpage='clSetKernelArgSVMPointer',desc='Set a SVM pointer as the argument value for a specific argument of a kernel.',type='protos'] +Note: This option is <> version 1.2. -- -To set a SVM pointer as the argument value for a specific argument of a -kernel, call the function -include::{generated}/api/protos/clSetKernelArgSVMPointer.txt[] -include::{generated}/api/version-notes/clSetKernelArgSVMPointer.asciidoc[] - * _kernel_ is a valid kernel object. - * _arg_index_ is the argument index. - Arguments to the kernel are referred by indices that go from 0 for the - leftmost argument to _n_ - 1, where _n_ is the total number of arguments - declared by a kernel. - * _arg_value_ is the SVM pointer that should be used as the argument value for - argument specified by _arg_index_. - The SVM pointer specified is the value used by all API calls that enqueue - _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument - value is changed by a call to {clSetKernelArgSVMPointer} for _kernel_. - The SVM pointer can only be used for arguments that are declared to be a - pointer to `global` or `constant` memory. - The SVM pointer value must be aligned according to the arguments type. - For example, if the argument is declared to be `+global float4 *p+`, the SVM - pointer value passed for `p` must be at a minimum aligned to a `float4`. - The SVM pointer value specified as the argument value can be the pointer - returned by {clSVMAlloc} or can be a pointer offset into the SVM region. +[[optimization-options]] +==== Optimization Options -// refError +These options control various sorts of optimizations. +Turning on optimization flags makes the compiler attempt to improve the +performance and/or code size at the expense of compilation time and possibly +the ability to debug the program. -{clSetKernelArgSVMPointer} returns {CL_SUCCESS} if the function was executed -successfully. -Otherwise, it returns one of the following errors: +`-cl-opt-disable` :: + This option disables all optimizations. + The default is optimizations are enabled. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. - * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +`-cl-strict-aliasing` :: + This option allows the compiler to assume the strictest aliasing rules. ++ +-- +Note: This option is <> version 1.1. -- -[open,refpage='clSetKernelExecInfo',desc='Pass additional information other than argument values to a kernel.',type='protos'] +`-cl-uniform-work-group-size` :: + This requires that the global work-size be a multiple of the work-group + size specified to {clEnqueueNDRangeKernel}. + Allow optimizations that are made possible by this restriction. ++ +-- +Note: This option is <> version 2.0. -- -To pass additional information other than argument values to a kernel, call -the function -include::{generated}/api/protos/clSetKernelExecInfo.txt[] -include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] +`-cl-no-subgroup-ifp` :: + This indicates that kernels in this program do not require sub-groups to + make independent forward progress. + Allows optimizations that are made possible by this restriction. + This option has no effect for devices that do not support independent + forward progress for sub-groups. ++ +-- +Note: This option is <> version 2.1. +-- - * _kernel_ specifies the kernel object being queried. - * _param_name_ specifies the information to be passed to kernel. - The list of supported _param_name_ types and the corresponding values passed - in _param_value_ is described in the <> table. - * _param_value_size_ specifies the size in bytes of the memory pointed to by - _param_value_. - * _param_value_ is a pointer to memory where the appropriate values determined - by _param_name_ are specified. +The following options control compiler behavior regarding floating-point +arithmetic. +These options trade off between performance and correctness and must be +specifically enabled. +These options are not turned on by default since it can result in incorrect +output for programs which depend on an exact implementation of IEEE 754 +rules/specifications for math functions. -[[kernel-exec-info-table]] -.List of supported param_names by {clSetKernelExecInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Kernel Exec Info | Type | Description -| {CL_KERNEL_EXEC_INFO_SVM_PTRS_anchor} +`-cl-mad-enable` :: + Allow `a * b + c` to be replaced by a *mad* instruction. + The *mad* instruction may compute `a * b + c` with reduced accuracy + in the embedded profile. + See the OpenCL C or OpenCL SPIR-V Environment specification for accuracy + details. + On some hardware the *mad* instruction may provide better performance + than the expanded computation. -include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_PTRS.asciidoc[] - | {void_TYPE}*[] - | SVM pointers must reference locations contained entirely within - buffers that are passed to kernel as arguments, or that are passed - through the execution information. +`-cl-no-signed-zeros` :: + Allow optimizations for floating-point arithmetic that ignore the + signedness of zero. + IEEE 754 arithmetic specifies the distinct behavior of `+0.0` and `-0.0` + values, which then prohibits simplification of expressions such as `x + {plus} 0.0` or `0.0 * x` (even with `-cl-finite-math-only`). + This option implies that the sign of a zero result is not significant. - Non-argument SVM buffers must be specified by passing pointers to - those buffers via {clSetKernelExecInfo} for coarse-grain and - fine-grain buffer SVM allocations but not for finegrain system SVM - allocations. -| {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_anchor} +`-cl-unsafe-math-optimizations` :: + Allow optimizations for floating-point arithmetic that (a) assume that + arguments and results are valid, (b) may violate the IEEE 754 standard, + (c) assume relaxed OpenCL numerical compliance requirements as defined + in the unsafe math optimization section of the OpenCL C or OpenCL SPIR-V + Environment specifications, and (d) may violate edge case behavior in the + OpenCL C or OpenCL SPIR-V Environment specifications. + This option includes the `-cl-no-signed-zeros`, `-cl-mad-enable`, and + `-cl-denorms-are-zero` footnote:[{fn-unsafe-denorms-are-zero}] options. -include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM.asciidoc[] - | {cl_bool_TYPE} - | This flag indicates whether the kernel uses pointers that are fine - grain system SVM allocations. - These fine grain system SVM pointers may be passed as arguments or - defined in SVM buffers that are passed as arguments to _kernel_. -|==== +`-cl-finite-math-only` :: + Allow optimizations for floating-point arithmetic that assume that + arguments and results are not NaNs, +Inf, -Inf. + This option may violate the OpenCL numerical compliance requirements for + single precision and double precision floating-point, as well as edge + case behavior. -// refError +`-cl-fast-relaxed-math` :: + Sets the optimization options `-cl-finite-math-only` and + `-cl-unsafe-math-optimizations`. + This option causes the preprocessor macro `+__FAST_RELAXED_MATH__+` to + be defined in the OpenCL program. -{clSetKernelExecInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. - * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is - `NULL` or if the size specified by _param_value_size_ is not valid. - * {CL_INVALID_OPERATION} if _param_name_ is - {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} and _param_value_ is {CL_TRUE} - but no devices in context associated with _kernel_ support fine-grain - system SVM allocations. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +==== Options to Request or Suppress Warnings -[NOTE] -==== -Coarse-grain or fine-grain buffer SVM pointers used by a kernel which -are not passed as a kernel arguments must be specified using -{clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS}. -For example, if SVM buffer A contains a pointer to another SVM buffer B, -and the kernel dereferences that pointer, then a pointer to B must -either be passed as an argument in the call to that kernel or it must be -made available to the kernel using {clSetKernelExecInfo}. -For example, we might pass extra SVM pointers as follows: +Warnings are diagnostic messages that report constructions which are not +inherently erroneous but which are risky or suggest there may have been an +error. +The following language-independent options do not enable specific warnings +but control the kinds of diagnostics produced by the OpenCL compiler. +These options are ignored for programs created with IL. -[source,opencl] ----- -clSetKernelExecInfo(kernel, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - num_ptrs * sizeof(void *), - extra_svm_ptr_list); ----- +`-w` :: + Inhibit all warning messages. -Here `num_ptrs` specifies the number of additional SVM pointers while -`extra_svm_ptr_list` specifies a pointer to memory containing those SVM -pointers. +`-Werror` :: + Make all warnings into errors. -When calling {clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS} to -specify pointers to non-argument SVM buffers as extra arguments to a kernel, -each of these pointers can be the SVM pointer returned by {clSVMAlloc} or -can be a pointer + offset into the SVM region. -It is sufficient to provide one pointer for each SVM buffer used. -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is used to indicate whether -SVM pointers used by a kernel will refer to system allocations or not. +[[opencl-c-version]] +==== Options Controlling the OpenCL C Version -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_FALSE} indicates that the -OpenCL implementation may assume that system pointers are not passed as -kernel arguments and are not stored inside SVM allocations passed as kernel -arguments. +The following option controls the version of OpenCL C that the compiler +accepts. +These options are ignored for programs created with IL. -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_TRUE} indicates that the -OpenCL implementation must assume that system pointers might be passed as -kernel arguments and/or stored inside SVM allocations passed as kernel -arguments. -In this case, if the device to which the kernel is enqueued does not support -system SVM pointers, {clEnqueueNDRangeKernel} and {clEnqueueTask} will return a -{CL_INVALID_OPERATION} error. -If none of the devices in the context associated with kernel support -fine-grain system SVM allocations, {clSetKernelExecInfo} will return a -{CL_INVALID_OPERATION} error. +`-cl-std=` :: + Determine the OpenCL C language version to use. + A value for this option must be provided. + Valid values are: ++ +-- + * `CL1.1`: Support OpenCL C 1.1 language features defined in _section 6_ of + the OpenCL 1.1 specification or in the unified OpenCL C specification. + * `CL1.2`: Support OpenCL C 1.2 language features defined in _section 6_ of + the OpenCL 1.2 specification or in the unified OpenCL C specification. + * `CL2.0`: Support OpenCL C 2.0 language features defined in the OpenCL C 2.0 + specification or in the unified OpenCL C specification. + * `CL3.0`: Support OpenCL C 3.0 language features defined in the unified + OpenCL C specification. +-- -If {clSetKernelExecInfo} has not been called with a value for -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is used for -this kernel attribute. -The default value depends on whether the device on which the kernel is -enqueued supports fine-grain system SVM allocations. -If so, the default value used is {CL_TRUE} (system pointers might be passed); -otherwise, the default is {CL_FALSE}. +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.1` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.0 and when +{CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.1. -A call to {clSetKernelExecInfo} for a given value of _param_name_ -replaces any prior value passed for that value of _param_name_. -Only one _param_value_ will be stored for each value of _param_name_. -==== +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.2` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.1 or earlier +and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.2. +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL2.0` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.2 or earlier +and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 2.0. -=== Copying Kernel Objects +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL3.0` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 2.0 or earlier +and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 3.0. -NOTE: Copying kernel objects is <> version 2.1. +If the `-cl-std` build option is not specified, the highest OpenCL C 1.x +language version supported by each device is used when compiling the program +for each device. +Applications are required to specify the `-cl-std=CL2.0` build option to +compile or build programs with OpenCL C 2.0 and the `-cl-std=CL3.0` +build option to compile or build programs with OpenCL C 3.0. -[open,refpage='clCloneKernel',desc='Make a shallow copy of the kernel object.',type='protos'] --- -To clone a kernel object, call the function -include::{generated}/api/protos/clCloneKernel.txt[] -include::{generated}/api/version-notes/clCloneKernel.asciidoc[] +==== Options for Querying Kernel Argument Information - * _source_kernel_ is a valid {cl_kernel_TYPE} object that will be copied. - _source_kernel_ will not be modified in any way by this function. - * _errcode_ret_ will be assigned an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +IMPORTANT: Querying for kernel argument information is <> version 1.2. -Cloning is used to make a shallow copy of the kernel object, its arguments -and any information passed to the kernel object using {clSetKernelExecInfo}. -If the kernel object was ready to be enqueued before copying it, the clone -of the kernel object is ready to enqueue. +`-cl-kernel-arg-info` :: + This option allows the compiler to store information about the arguments + of a kernel(s) in the program executable. + The argument information stored includes the argument name, its type, + the address space and access qualifiers used. + Refer to description of {clGetKernelArgInfo} on how to query this + information. -The returned kernel object is an exact copy of _source_kernel_, with one -caveat: the reference count on the returned kernel object is set as if it -had been returned by {clCreateKernel}. -The reference count of _source_kernel will_ not be changed. -The resulting kernel will be in the same state as if {clCreateKernel} is -called to create the resultant kernel with the same arguments as those used -to create _source_kernel_, the latest call to {clSetKernelArg} or -{clSetKernelArgSVMPointer} for each argument index applied to kernel and the -last call to {clSetKernelExecInfo} for each value of the param name -parameter are applied to the new kernel object. +==== Options for Debugging Your Program -All arguments of the new kernel object must be intact and it may be -correctly used in the same situations as kernel except those that assume a -pre-existing reference count. -Setting arguments on the new kernel object will not affect _source_kernel_ -except insofar as the argument points to a shared underlying entity and in -that situation behavior is as if two kernel objects had been created and the -same argument applied to each. -Only the data stored in the kernel object is copied; data referenced by the -kernels arguments are not copied. -For example, if a buffer or pointer argument is set on a kernel object, the -pointer is copied but the underlying memory allocation is not. +IMPORTANT: Debugging options are <> version 2.0. -// refError +`-g` :: + This option can currently be used to generate additional errors for the + built-in functions that allow you to enqueue commands on a device (refer + to OpenCL kernel languages specifications). -{clCloneKernel} returns a valid non-zero kernel object and _errcode_ret_ is -set to {CL_SUCCESS} if the kernel is successfully copied. -Otherwise it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +[[linker-options]] +=== Linker Options +NOTE: Linker options are <> version 1.2. -=== Kernel Object Queries +This specification defines a standard set of linker options that must be +supported by the OpenCL C compiler when linking compiled programs online or +offline. +These linker options are categorized as library linking options and program +linking options. +These may be extended by a set of vendor- or platform-specific options. -[open,refpage='clGetKernelInfo',desc='Returns information about the kernel object.',type='protos'] --- -To return information about a kernel object, call the function -include::{generated}/api/protos/clGetKernelInfo.txt[] -include::{generated}/api/version-notes/clGetKernelInfo.asciidoc[] +==== Library Linking Options - * _kernel_ specifies the kernel object being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +IMPORTANT: Library linking options are <> version +1.2. -[[kernel-info-table]] -.List of supported param_names by {clGetKernelInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Kernel Info | Return Type | Description -| {CL_KERNEL_FUNCTION_NAME_anchor} +The following options can be specified when creating a library of compiled +binaries. -include::{generated}/api/version-notes/CL_KERNEL_FUNCTION_NAME.asciidoc[] - | {char_TYPE}[] - | Return the kernel function name. -| {CL_KERNEL_NUM_ARGS_anchor} +`-create-library` :: + Create a library of compiled binaries specified in _input_programs_ + argument to {clLinkProgram}. -include::{generated}/api/version-notes/CL_KERNEL_NUM_ARGS.asciidoc[] - | {cl_uint_TYPE} - | Return the number of arguments to kernel. -| {CL_KERNEL_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] +`-enable-link-options` :: + Allows the linker to modify the library behavior based on one or more + link options (described in <>) when this library is linked with a program executable. + This option must be specified with the create-library option. -include::{generated}/api/version-notes/CL_KERNEL_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _kernel_ reference count. -| {CL_KERNEL_CONTEXT_anchor} -include::{generated}/api/version-notes/CL_KERNEL_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context associated with _kernel_. -| {CL_KERNEL_PROGRAM_anchor} +[[program-linking-options]] +==== Program Linking Options -include::{generated}/api/version-notes/CL_KERNEL_PROGRAM.asciidoc[] - | {cl_program_TYPE} - | Return the program object associated with kernel. -| {CL_KERNEL_ATTRIBUTES_anchor} +The following options can be specified when linking a program executable. -include::{generated}/api/version-notes/CL_KERNEL_ATTRIBUTES.asciidoc[] - | {char_TYPE}[] - | Returns any attributes specified using the `+__attribute__+` - OpenCL C qualifier (or using an OpenCL {cpp} qualifier syntax [[]] ) - with the kernel function declaration in the program source. - These attributes include attributes described in the earlier OpenCL - C kernel language specifications and other attributes supported by - an implementation. - - Attributes are returned as they were declared inside - `+__attribute__((...))+`, with any surrounding whitespace and - embedded newlines removed. - When multiple attributes are present, they are returned as a single, - space delimited string. - - For kernels not created from OpenCL C source and the - {clCreateProgramWithSource} API call the string returned from this - query will be empty. -|==== - -// refError +`-cl-denorms-are-zero` + +`-cl-no-signed-zeros` + +`-cl-unsafe-math-optimizations` + +`-cl-finite-math-only` + +`-cl-fast-relaxed-math` + +`-cl-no-subgroup-ifp` (<> version 2.1) -{clGetKernelInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +The options are described in <> and <>. +The linker may apply these options to all compiled program objects +specified to {clLinkProgram}. +The linker may apply these options only to libraries which were created +with the option `-enable-link-options`. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and _param_value_ - is not `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- -[open,refpage='clGetKernelWorkGroupInfo',desc='Returns information about the kernel object that may be specific to a device.',type='protos'] --- -To return information about the kernel object that may be specific to a -device, call the function +ifdef::cl_khr_spir[] +[[spir-compilation-options]] +==== SPIR Compilation Options -include::{generated}/api/protos/clGetKernelWorkGroupInfo.txt[] -include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] +If the `<>` extension is supported, the compile option - * _kernel_ specifies the kernel object being queried. - * _device_ identifies a specific device in the list of devices associated with - _kernel_. - The list of devices is the list of devices in the OpenCL context that is - associated with _kernel_. - If the list of devices associated with _kernel_ is a single device, _device_ - can be a `NULL` value. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelWorkGroupInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +`-x spir` -[[kernel-workgroup-info-table]] -.List of supported param_names by {clGetKernelWorkGroupInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Kernel Work-group Info | Return Type | Description -| {CL_KERNEL_GLOBAL_WORK_SIZE_anchor} +must be specified to indicate that the binary is in SPIR format, and the +compile option -include::{generated}/api/version-notes/CL_KERNEL_GLOBAL_WORK_SIZE.asciidoc[] - | {size_t_TYPE}[3] - | This provides a mechanism for the application to query the maximum - global size that can be used to execute a kernel (i.e. - _global_work_size_ argument to {clEnqueueNDRangeKernel}) on a custom - device given by device or a built-in kernel on an OpenCL device - given by device. +`-spir-std` - If device is not a custom device and kernel is not a built-in - kernel, {clGetKernelWorkGroupInfo} returns the error - {CL_INVALID_VALUE}. -| {CL_KERNEL_WORK_GROUP_SIZE_anchor} +must be used to specify the version of the SPIR specification that describes +the format and meaning of the binary. -include::{generated}/api/version-notes/CL_KERNEL_WORK_GROUP_SIZE.asciidoc[] - | {size_t_TYPE} - | This provides a mechanism for the application to query the maximum - work-group size that can be used to execute the kernel on a specific - device given by device. - The OpenCL implementation uses the resource requirements of the - kernel (register usage etc.) to determine what this work-group size - should be. +For example, if the binary is as described in SPIR version 1.2, then - As a result and unlike {CL_DEVICE_MAX_WORK_GROUP_SIZE} this value may - vary from one kernel to another as well as one device to another. +`-spir-std=1.2` - {CL_KERNEL_WORK_GROUP_SIZE} will be less than or equal to - {CL_DEVICE_MAX_WORK_GROUP_SIZE} for a given kernel object. -| {CL_KERNEL_COMPILE_WORK_GROUP_SIZE_anchor} +must be specified. +Failing to specify these compile options may result in +implementation-defined behavior. +endif::cl_khr_spir[] -include::{generated}/api/version-notes/CL_KERNEL_COMPILE_WORK_GROUP_SIZE.asciidoc[] - | {size_t_TYPE}[3] - | Returns the work-group size specified in the kernel source or IL. - If the work-group size is not specified in the kernel source or IL, - (0, 0, 0) is returned. -| {CL_KERNEL_LOCAL_MEM_SIZE_anchor} +=== Unloading the OpenCL Compiler -include::{generated}/api/version-notes/CL_KERNEL_LOCAL_MEM_SIZE.asciidoc[] - | {cl_ulong_TYPE} - | Returns the amount of local memory in bytes being used by a kernel. - This includes local memory that may be needed by an implementation - to execute the kernel, variables declared inside the kernel with the - `+__local+` address qualifier and local memory to be allocated for - arguments to the kernel declared as pointers with the `+__local+` - address qualifier and whose size is specified with {clSetKernelArg}. +[open,refpage='clUnloadPlatformCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler for a platform.',type='protos'] +-- +To unload an OpenCL compiler for a platform, call the function - If the local memory size, for any pointer argument to the kernel - declared with the `+__local+` address qualifier, is not specified, - its size is assumed to be 0. -| {CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE_anchor} +include::{generated}/api/protos/clUnloadPlatformCompiler.txt[] +include::{generated}/api/version-notes/clUnloadPlatformCompiler.asciidoc[] -include::{generated}/api/version-notes/CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.asciidoc[] - | {size_t_TYPE} - | Returns the preferred multiple of work-group size for launch. - This is a performance hint. - Specifying a work-group size that is not a multiple of the value - returned by this query as the value of the local work size argument - to {clEnqueueNDRangeKernel} will not fail to enqueue the kernel for - execution unless the work-group size specified is larger than the - device maximum. -| {CL_KERNEL_PRIVATE_MEM_SIZE_anchor} + * _platform_ is the platform to unload. -include::{generated}/api/version-notes/CL_KERNEL_PRIVATE_MEM_SIZE.asciidoc[] - | {cl_ulong_TYPE} - | Returns the minimum amount of private memory, in bytes, used by each - work-item in the kernel. - This value may include any private memory needed by an - implementation to execute the kernel, including that used by the - language built-ins and variable declared inside the kernel with the - `+__private+` qualifier. -|==== +This function allows the implementation to release the resources allocated +by the OpenCL compiler for _platform_. +This is a hint from the application and does not guarantee that the compiler +will not be used in the future or that the compiler will actually be +unloaded by the implementation. +Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after +{clUnloadPlatformCompiler} will reload the compiler, if necessary, to build +the appropriate program executable. // refError -{clGetKernelWorkGroupInfo} returns {CL_SUCCESS} if the function is executed +{clUnloadPlatformCompiler} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _kernel_ or if _device_ is `NULL` but there is more than one device - associated with _kernel_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table - and _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if _param_name_ is {CL_KERNEL_GLOBAL_WORK_SIZE} and - _device_ is not a custom device and _kernel_ is not a built-in kernel. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. -- -[open,refpage='clGetKernelSubGroupInfo',desc='Returns information about the kernel object.',type='protos'] +[open,refpage='clUnloadCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler.',type='protos'] -- -To return information about a kernel object, call the function +Alternatively, if you are not using OpenCL via the ICD loader, you may unload the OpenCL compiler with the function -include::{generated}/api/protos/clGetKernelSubGroupInfo.txt[] -include::{generated}/api/version-notes/clGetKernelSubGroupInfo.asciidoc[] -Also see extension *cl_khr_subgroups*. +include::{generated}/api/protos/clUnloadCompiler.txt[] +include::{generated}/api/version-notes/clUnloadCompiler.asciidoc[] - * _kernel_ specifies the kernel object being queried. - * _device_ identifies a specific device in the list of devices associated with - _kernel_. - The list of devices is the list of devices in the OpenCL context that is - associated with _kernel_. - If the list of devices associated with _kernel_ is a single device, _device_ - can be a `NULL` value. +This function allows the implementation to release the resources allocated +by the OpenCL compiler. +This is a hint from the application and does not guarantee that the compiler +will not be used in the future or that the compiler will actually be +unloaded by the implementation. +Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after +{clUnloadCompiler} will reload the compiler, if necessary, to build +the appropriate program executable. + +// refError + +{clUnloadCompiler} will always return {CL_SUCCESS}. +-- + + +=== Program Object Queries + +[open,refpage='clGetProgramInfo',desc='Returns information about the program object.',type='protos'] +-- +To return information about a program object, call the function + +include::{generated}/api/protos/clGetProgramInfo.txt[] +include::{generated}/api/version-notes/clGetProgramInfo.asciidoc[] + + * _program_ specifies the program object being queried. * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelSubGroupInfo} is described in the - <> table. - * _input_value_size_ is used to specify the size in bytes of memory pointed to - by _input_value_. - This size must be == size of input type as described in the table below. - * _input_value_ is a pointer to memory where the appropriate parameterization - of the query is passed from. - If _input_value_ is `NULL`, it is ignored. + _param_value_ by {clGetProgramInfo} is described in the + <> table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. * _param_value_size_ is used to specify the size in bytes of memory pointed to by _param_value_. This size must be {geq} size of return type as described in the - <> table. + <> table. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[kernel-sub-group-info-table]] -.List of supported param_names by {clGetKernelSubGroupInfo} -[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] +[[program-info-table]] +.List of supported param_names by {clGetProgramInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] |==== -| Kernel Sub-group Info | Input Type | Return Type | Description -| {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_anchor} +| Program Info | Return Type | Description +| {CL_PROGRAM_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] -include::{generated}/api/version-notes/CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE.asciidoc[] -Also see extension *cl_khr_subgroups*. - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the maximum sub-group size for this kernel. - All sub-groups must be the same size, while the last sub-group in - any work-group (i.e. the sub-group with the maximum index) could - be the same or smaller size. +include::{generated}/api/version-notes/CL_PROGRAM_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _program_ reference count. +| {CL_PROGRAM_CONTEXT_anchor} - The _input_value_ must be an array of {size_t_TYPE} values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -| {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context specified when the program object is created +| {CL_PROGRAM_NUM_DEVICES_anchor} -include::{generated}/api/version-notes/CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE.asciidoc[] -Also see extension *cl_khr_subgroups*. - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the number of sub-groups that will be present in each - work-group for a given local work size. - All workgroups, apart from the last work-group in each dimension - in the presence of non-uniform work-group sizes, will have the - same number of sub-groups. +include::{generated}/api/version-notes/CL_PROGRAM_NUM_DEVICES.asciidoc[] + | {cl_uint_TYPE} + | Return the number of devices associated with _program_. +| {CL_PROGRAM_DEVICES_anchor} - The _input_value_ must be an array of {size_t_TYPE} values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -| {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_DEVICES.asciidoc[] + | {cl_device_id_TYPE}[] + | Return the list of devices associated with the program object. + This can be the devices associated with context on which the program + object has been created or can be a subset of devices that are + specified when a program object is created using + {clCreateProgramWithBinary}. +| {CL_PROGRAM_SOURCE_anchor} -include::{generated}/api/version-notes/CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT.asciidoc[] -Also see extension *cl_khr_subgroups*. +include::{generated}/api/version-notes/CL_PROGRAM_SOURCE.asciidoc[] + | {char_TYPE}[] + | Return the program source code specified by + {clCreateProgramWithSource}. + The source string returned is a concatenation of all source strings + specified to {clCreateProgramWithSource} with a null terminator. + The concatenation strips any nulls in the original source strings. + + If _program_ is created using {clCreateProgramWithBinary}, + {clCreateProgramWithIL}, +ifdef::cl_khr_il_program[{clCreateProgramWithILKHR},] + or {clCreateProgramWithBuiltInKernels}, a null string or the + appropriate program source code is returned depending on whether or + not the program source code is stored in the binary. + + The actual number of characters that represents the program source + code including the null terminator is returned in + _param_value_size_ret_. +| {CL_PROGRAM_IL_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_IL.asciidoc[] + +ifdef::cl_khr_il_program[] +{CL_PROGRAM_IL_KHR_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_IL_KHR.asciidoc[] +endif::cl_khr_il_program[] + | {char_TYPE}[] + | Returns the program IL for programs created with +ifdef::cl_khr_il_program[{clCreateProgramWithILKHR} or] + {clCreateProgramWithIL}. + + If _program_ is created with {clCreateProgramWithSource}, + {clCreateProgramWithBinary} or {clCreateProgramWithBuiltInKernels} + the memory pointed to by param_value will be unchanged and + _param_value_size_ret_ will be set to 0. +| {CL_PROGRAM_BINARY_SIZES_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_BINARY_SIZES.asciidoc[] + | {size_t_TYPE}[] + | Returns an array that contains the size in bytes of the program + binary (could be an executable binary, compiled binary or library + binary) for each device associated with program. + The size of the array is the number of devices associated with + program. + If a binary is not available for a device(s), a size of zero is + returned. + + If _program_ is created using {clCreateProgramWithBuiltInKernels}, + the implementation may return zero in any entries of the returned + array. +| {CL_PROGRAM_BINARIES_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_BINARIES.asciidoc[] + | {unsigned_char_TYPE}*[] + | Return the program binaries (could be an executable binary, compiled + binary or library binary) for all devices associated with program. + For each device in program, the binary returned can be the binary + specified for the device when program is created with + {clCreateProgramWithBinary} or it can be the executable binary + generated by {clBuildProgram} or {clLinkProgram}. + If _program_ is created with {clCreateProgramWithSource} or + {clCreateProgramWithIL}, the binary returned is the binary generated + by {clBuildProgram}, {clCompileProgram} or {clLinkProgram}. + The bits returned can be an implementation-specific intermediate + representation (a.k.a. IR) or device specific executable bits or + both. + The decision on which information is returned in the binary is up to + the OpenCL implementation. + + param_value points to an array of `n` pointers allocated by the + caller, where `n` is the number of devices associated with program. + The buffer sizes needed to allocate the memory that these `n` + pointers refer to can be queried using the {CL_PROGRAM_BINARY_SIZES} + query as described in this table. + + Each entry in this array is used by the implementation as the + location in memory where to copy the program binary for a specific + device, if there is a binary available. + To find out which device the program binary in the array refers to, + use the {CL_PROGRAM_DEVICES} query to get the list of devices. + There is a one-to-one correspondence between the array of n pointers + returned by {CL_PROGRAM_BINARIES} and array of devices returned by + {CL_PROGRAM_DEVICES}. +| {CL_PROGRAM_NUM_KERNELS_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_NUM_KERNELS.asciidoc[] | {size_t_TYPE} - | {size_t_TYPE}[] - | Returns the local size that will generate the requested number - of sub-groups for the kernel. - The output array must be an array of {size_t_TYPE} values corresponding - to the local size parameter. - Any returned work-group will have one dimension. - Other dimensions inferred from the value specified for - param_value_size will be filled with the value 1. - The returned value will produce an exact number of sub-groups - and result in no partial groups for an executing kernel except - in the case where the last work-group in a dimension has a size - different from that of the other groups. - If no work-group size can accommodate the requested number of - sub-groups, 0 will be returned in each element of the return - array. -| {CL_KERNEL_MAX_NUM_SUB_GROUPS_anchor} + | Returns the number of kernels declared in _program_ that can be + created with {clCreateKernel}. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. +| {CL_PROGRAM_KERNEL_NAMES_anchor} -include::{generated}/api/version-notes/CL_KERNEL_MAX_NUM_SUB_GROUPS.asciidoc[] -Also see extension *cl_khr_subgroups*. - | ignored - | {size_t_TYPE} - | This provides a mechanism for the application to query the - maximum number of sub-groups that may make up each work-group to - execute a kernel on a specific device given by device. - The OpenCL implementation uses the resource requirements of the - kernel (register usage etc.) to determine what this work-group - size should be. - The returned value may be used to compute a work-group size to - enqueue the kernel with to give a round number of sub-groups for - an enqueue. -| {CL_KERNEL_COMPILE_NUM_SUB_GROUPS_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_KERNEL_NAMES.asciidoc[] + | {char_TYPE}[] + | Returns a semi-colon separated list of kernel names in _program_ + that can be created with {clCreateKernel}. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. +| {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT_anchor} -include::{generated}/api/version-notes/CL_KERNEL_COMPILE_NUM_SUB_GROUPS.asciidoc[] -Also see extension *cl_khr_subgroups*. - | ignored - | {size_t_TYPE} - | Returns the number of sub-groups per work-group specified in the kernel - source or IL. If the sub-group count is not specified then 0 is returned. +include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT.asciidoc[] + | {cl_bool_TYPE} + | This indicates that the _program_ object contains non-trivial + constructor(s) that will be executed by runtime before any kernel + from the program is executed. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. + + Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally + return {CL_FALSE} if no devices associated with _program_ support + constructors for program scope global variables. + Support for constructors and destructors for program scope global + variables is required only for OpenCL 2.2 devices. +| {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT.asciidoc[] + | {cl_bool_TYPE} + | This indicates that the program object contains non-trivial + destructor(s) that will be executed by runtime when _program_ is + destroyed. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. + + Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally + return {CL_FALSE} if no devices associated with _program_ support + destructors for program scope global variables. + Support for constructors and destructors for program scope global + variables is required only for OpenCL 2.2 devices. |==== // refError -{clGetKernelSubGroupInfo} returns {CL_SUCCESS} if the function is executed +{clGetProgramInfo} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _kernel_ or if _device_ is `NULL` but there is more than one device - associated with _kernel_. - * {CL_INVALID_OPERATION} if _device_ does not support sub-groups. * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes specified by _param_value_size_ is < size of return type as described in - the <> table - and _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if _param_name_ is - {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE}, - {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE} or - {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT} and the size in bytes specified - by _input_value_size_ is not valid or if _input_value_ is `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_INVALID_PROGRAM_EXECUTABLE} if _param_name_ is + {CL_PROGRAM_NUM_KERNELS}, {CL_PROGRAM_KERNEL_NAMES}, + {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT}, or + {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT} and a successful program executable + has not been built for at least one device in the list of devices + associated with _program_. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clGetKernelArgInfo',desc='Returns information about the arguments of a kernel.',type='protos'] +[open,refpage='clGetProgramBuildInfo',desc='Returns build information for each device in the program object.',type='protos'] -- -To return information about the arguments of a kernel, call the function +To return build information for each device in the program object, call the +function -include::{generated}/api/protos/clGetKernelArgInfo.txt[] -include::{generated}/api/version-notes/clGetKernelArgInfo.asciidoc[] +include::{generated}/api/protos/clGetProgramBuildInfo.txt[] +include::{generated}/api/version-notes/clGetProgramBuildInfo.asciidoc[] - * _kernel_ specifies the kernel object being queried. - * _arg_index_ is the argument index. - Arguments to the kernel are referred by indices that go from 0 for the - leftmost argument to _n_ - 1, where _n_ is the total number of arguments - declared by a kernel. - * _param_name_ specifies the argument information to query. + * _program_ specifies the program object being queried. + * _device_ specifies the device for which build information is being queried. + _device_ must be a valid device associated with _program_. + * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelArgInfo} is described in the - <> table. + _param_value_ by {clGetProgramBuildInfo} is described in the + <> table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. * _param_value_size_ is used to specify the size in bytes of memory pointed to by _param_value_. - This size must be > size of return type as described in the - <> table. - * _param_value_size ret_ returns the actual size in bytes of data being + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -Kernel argument information is only available if the program object -associated with _kernel_ is created with {clCreateProgramWithSource} and the -program executable was built with the `-cl-kernel-arg-info option` specified -in options argument to {clBuildProgram} or {clCompileProgram}. - -[[kernel-argument-info-table]] -.List of supported param_names by {clGetKernelArgInfo} +[[program-build-info-table]] +.List of supported param_names by {clGetProgramBuildInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== -| Kernel Arg Info | Return Type | Description -| {CL_KERNEL_ARG_ADDRESS_QUALIFIER_anchor} - -include::{generated}/api/version-notes/CL_KERNEL_ARG_ADDRESS_QUALIFIER.asciidoc[] - | {cl_kernel_arg_address_qualifier_TYPE} - | Returns the address qualifier specified for the argument given by - _arg_index_. - This can be one of the following values: +| Program Build Info | Return Type | Description +| {CL_PROGRAM_BUILD_STATUS_anchor} - {CL_KERNEL_ARG_ADDRESS_GLOBAL_anchor} + - {CL_KERNEL_ARG_ADDRESS_LOCAL_anchor} + - {CL_KERNEL_ARG_ADDRESS_CONSTANT_anchor} + - {CL_KERNEL_ARG_ADDRESS_PRIVATE_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_STATUS.asciidoc[] + | {cl_build_status_TYPE} + | Returns the build, compile or link status, whichever was performed + last on the specified _program_ object for _device_. - If no address qualifier is specified, the default address qualifier - which is {CL_KERNEL_ARG_ADDRESS_PRIVATE} is returned. -| {CL_KERNEL_ARG_ACCESS_QUALIFIER_anchor} + This can be one of the following: -include::{generated}/api/version-notes/CL_KERNEL_ARG_ACCESS_QUALIFIER.asciidoc[] - | {cl_kernel_arg_access_qualifier_TYPE} - | Returns the access qualifier specified for the argument given by - _arg_index_. - This can be one of the following values: + {CL_BUILD_NONE_anchor} - The build status returned if no {clBuildProgram}, + {clCompileProgram} or {clLinkProgram} has been performed on the + specified _program_ object for _device_). - {CL_KERNEL_ARG_ACCESS_READ_ONLY_anchor} + - {CL_KERNEL_ARG_ACCESS_WRITE_ONLY_anchor} + - {CL_KERNEL_ARG_ACCESS_READ_WRITE_anchor} + - {CL_KERNEL_ARG_ACCESS_NONE_anchor} + {CL_BUILD_ERROR_anchor} - The build status returned if {clBuildProgram}, + {clCompileProgram} or {clLinkProgram} - whichever was performed last + on the specified _program_ object for _device_ - generated an error. - If argument is not an image type and is not declared with the pipe - qualifier, {CL_KERNEL_ARG_ACCESS_NONE} is returned. - If argument is an image type, the access qualifier specified or the - default access qualifier is returned. -| {CL_KERNEL_ARG_TYPE_NAME_anchor} + {CL_BUILD_SUCCESS_anchor} - The build status returned if {clBuildProgram}, + {clCompileProgram} or {clLinkProgram} - whichever was performed last + on the specified _program_ object for _device_ - was successful. -include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_NAME.asciidoc[] - | {char_TYPE}[] - | Returns the type name specified for the argument given by - _arg_index_. - The type name returned will be the argument type name as it was - declared with any whitespace removed. - If argument type name is an unsigned scalar type (i.e. unsigned - char, unsigned short, unsigned int, unsigned long), uchar, ushort, - uint and ulong will be returned. - The argument type name returned does not include any type - qualifiers. -| {CL_KERNEL_ARG_TYPE_QUALIFIER_anchor} + {CL_BUILD_IN_PROGRESS_anchor} - The build status returned if + {clBuildProgram}, {clCompileProgram} or {clLinkProgram} - whichever + was performed last on the specified _program_ object for _device_ - has + not finished. +| {CL_PROGRAM_BUILD_OPTIONS_anchor} -include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_QUALIFIER.asciidoc[] - | {cl_kernel_arg_type_qualifier_TYPE} - | Returns a bitfield describing one or more type qualifiers specified - for the argument given by _arg_index_. - The returned values can be: - - {CL_KERNEL_ARG_TYPE_CONST_anchor} - footnote:[{fn-kernel-arg-type-qualifier}] - footnote:[{fn-kernel-arg-type-const-addr-space}] + - {CL_KERNEL_ARG_TYPE_RESTRICT_anchor} + - {CL_KERNEL_ARG_TYPE_VOLATILE_anchor} + - {CL_KERNEL_ARG_TYPE_PIPE_anchor}, or + - {CL_KERNEL_ARG_TYPE_NONE_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_OPTIONS.asciidoc[] + | {char_TYPE}[] + | Return the build, compile or link options specified by the options + argument in {clBuildProgram}, {clCompileProgram} or {clLinkProgram}, + whichever was performed last on the specified _program_ object for + _device_. - {CL_KERNEL_ARG_TYPE_NONE} is returned for all parameters passed by - value. -| {CL_KERNEL_ARG_NAME_anchor} + If build status of the specified _program_ for _device_ is + {CL_BUILD_NONE}, an empty string is returned. +| {CL_PROGRAM_BUILD_LOG_anchor} -include::{generated}/api/version-notes/CL_KERNEL_ARG_NAME.asciidoc[] +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_LOG.asciidoc[] | {char_TYPE}[] - | Returns the name specified for the argument given by _arg_index_. -|==== + | Return the build, compile or link log for {clBuildProgram}, + {clCompileProgram} or {clLinkProgram}, whichever was performed last + on program for device. -{clGetKernelArgInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: + If build status of the specified _program_ for _device_ is + {CL_BUILD_NONE}, an empty string is returned. +| {CL_PROGRAM_BINARY_TYPE_anchor} - * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_ size is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_KERNEL_ARG_INFO_NOT_AVAILABLE} if the argument information is not - available for kernel. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. --- +include::{generated}/api/version-notes/CL_PROGRAM_BINARY_TYPE.asciidoc[] + | {cl_program_binary_type_TYPE} + | Return the program binary type for device. + This can be one of the following values: + {CL_PROGRAM_BINARY_TYPE_NONE_anchor} - There is no binary associated + with the specified _program_ object for _device_. -== Executing Kernels + {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT_anchor} - A compiled binary is + associated with _device_. + This is the case when the specified _program_ object was created using + {clCreateProgramWithSource} and compiled using {clCompileProgram}, or + when a compiled binary was loaded using {clCreateProgramWithBinary}. -[open,refpage='clEnqueueNDRangeKernel',desc='Enqueues a command to execute a kernel on a device.',type='protos'] --- -To enqueue a command to execute a kernel on a device, call the function + {CL_PROGRAM_BINARY_TYPE_LIBRARY_anchor} - A library binary is + associated with _device_. + This is the case when the specified _program_ object was linked by + {clLinkProgram} using the `-create-library` link option, or when a + compiled library binary was loaded using {clCreateProgramWithBinary}. -include::{generated}/api/protos/clEnqueueNDRangeKernel.txt[] -include::{generated}/api/version-notes/clEnqueueNDRangeKernel.asciidoc[] + {CL_PROGRAM_BINARY_TYPE_EXECUTABLE_anchor} - An executable binary is + associated with _device_. + This is the case when the specified _program_ object was linked by + {clLinkProgram} without the `-create-library` link option, or when an + executable binary was built using {clBuildProgram}. - * _command_queue_ is a valid host command-queue. - The kernel will be queued for execution on the device associated with - _command_queue_. - * _kernel_ is a valid kernel object. - The OpenCL context associated with _kernel_ and _command-queue_ must be the - same. - * _work_dim_ is the number of dimensions used to specify the global work-items - and work-items in the work-group. - _work_dim_ must be greater than zero and less than or equal to - {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}. - If _global_work_size_ is `NULL`, or the value in any passed dimension is 0 - then the kernel command will trivially succeed after its event dependencies - are satisfied and subsequently update its completion event. - The behavior in this situation is similar to that of an enqueued marker, - except that unlike a marker, an enqueued kernel with no events passed to - _event_wait_list_ may run at any time. - * _global_work_offset_ can be used to specify an array of _work_dim_ unsigned - values that describe the offset used to calculate the global ID of a - work-item. - If _global_work_offset_ is `NULL`, the global IDs start at offset (0, 0, 0). - _global_work_offset_ must be `NULL` <> version 1.1. - * _global_work_size_ points to an array of _work_dim_ unsigned values that - describe the number of global work-items in _work_dim_ dimensions that will - execute the kernel function. - The total number of global work-items is computed as _global_work_size_[0] - {times} ... {times} _global_work_size_[_work_dim_ - 1]. - * _local_work_size_ points to an array of _work_dim_ unsigned values that - describe the number of work-items that make up a work-group (also referred - to as the size of the work-group) that will execute the kernel specified by - _kernel_. - The total number of work-items in a work-group is computed as - _local_work_size_[0] {times} ... {times} _local_work_size_[_work_dim_ - 1]. - The total number of work-items in the work-group must be less than or equal - to the {CL_KERNEL_WORK_GROUP_SIZE} value specified in the - <> table, and the - number of work-items specified in _local_work_size_[0], ..., - _local_work_size_[_work_dim_ - 1] must be less than or equal to the - corresponding values specified by {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., - {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. - The explicitly specified _local_work_size_ will be used to determine how to - break the global work-items specified by _global_work_size_ into appropriate - work-group instances. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. - If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. - If _event_wait_list_ is not `NULL`, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. +ifdef::cl_khr_spir[] + {CL_PROGRAM_BINARY_TYPE_INTERMEDIATE_anchor} -- An intermediate + (non-source) representation for the program is loaded as a binary. + The program must be further processed with {clCompileProgram} or + {clBuildProgram}. -An ND-range kernel command may require uniform work-groups or may support non-uniform work-groups. -To support non-uniform work-groups: + If processed with {clCompileProgram}, the result will be a binary of + type {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT} or + {CL_PROGRAM_BINARY_TYPE_LIBRARY}. + If processed with {clBuildProgram}, the result will be a binary of + type {CL_PROGRAM_BINARY_TYPE_EXECUTABLE}. -. The device associated with _command_queue_ must support non-uniform work-groups. -. The program object associated with _kernel_ must support non-uniform work-groups. -Specifically, this means: -.. If the program was created with {clCreateProgramWithSource}, the program must be compiled or built using the `-cl-std=CL2.0` or `-cl-std=CL3.0` build option and without the `-cl-uniform-work-group-size` build option. -.. If the program was created with {clCreateProgramWithIL} or {clCreateProgramWithBinary}, the program must be compiled or built without the `-cl-uniform-work-group-size` build options. -.. If the program was created using {clLinkProgram}, all input programs must support non-uniform work-groups. +include::{generated}/api/version-notes/CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE.asciidoc[] +endif::cl_khr_spir[] -If non-uniform work-groups are supported, any single dimension -for which the global size is not divisible by the local size will be -partitioned into two regions. -One region will have work-groups that have the same number of work-items as -was specified by the local size parameter in that dimension. -The other region will have work-groups with less than the number of work -items specified by the local size parameter in that dimension. -The global IDs and group IDs of the work-items in the first region will be -numerically lower than those in the second, and the second region will be at -most one work-group wide in that dimension. -Work-group sizes could be non-uniform in multiple dimensions, potentially -producing work-groups of up to 4 different sizes in a 2D range and 8 -different sizes in a 3D range. +| {CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE_anchor} -If non-uniform work-groups are supported and _local_work_size_ is `NULL`, the OpenCL runtime may choose a uniform or non-uniform work-group size. +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE.asciidoc[] + | {size_t_TYPE} + | The total amount of storage, in bytes, used by program variables in + the global address space. +|==== -Otherwise, when non-uniform work-groups are not supported, the size of each work-group must be uniform. -If _local_work_size_ is specified, the values specified in _global_work_size_[0], ..., _global_work_size_[_work_dim_ - 1] must be evenly divisible by the corresponding values specified in _local_work_size_[0], ..., _local_work_size_[_work_dim_ - 1]. -If _local_work_size_ is `NULL`, the OpenCL runtime must choose a uniform work-group size. +// refError -The work-group size to be used for _kernel_ can also be specified in the -program source or intermediate language. -In this case the size of work-group specified by _local_work_size_ must -match the value specified in the program source. +{clGetProgramBuildInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -These work-group instances are executed in parallel across multiple compute -units or concurrently on the same compute unit. + * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated + with _program_. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -Each work-item is uniquely identified by a global identifier. -The global ID, which can be read inside the kernel, is computed using the -value given by _global_work_size_ and _global_work_offset_. -In addition, a work-item is also identified within a work-group by a unique -local ID. -The local ID, which can also be read by the kernel, is computed using the -value given by _local_work_size_. -The starting local ID is always (0, 0, ..., 0). +[NOTE] +==== +A program binary (compiled binary, library binary or executable binary) +built for a parent device can be used by all its sub-devices. +If a program binary has not been built for a sub-device, the program binary +associated with the parent device will be used. -// refError +A program binary for a device specified with {clCreateProgramWithBinary} or +queried using {clGetProgramInfo} can be used as the binary for the +associated root device, and all sub-devices created from the root-level +device or sub-devices thereof. +==== +-- -{clEnqueueNDRangeKernel} returns {CL_SUCCESS} if the kernel-instance was -successfully queued. -Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program - executable available for device associated with _command_queue_. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. +== Kernel Objects + +A kernel is a function declared in a program. +A kernel is identified by the `+__kernel+` qualifier applied to any function +in a program. +A kernel object encapsulates the specific `+__kernel+` function declared in +a program and the argument values to be used when executing this +`+__kernel+` function. + + +=== Creating Kernel Objects + +[open,refpage='clCreateKernel',desc='Creates a kernel object.',type='protos'] +-- +To create a kernel object, use the function + +include::{generated}/api/protos/clCreateKernel.txt[] +include::{generated}/api/version-notes/clCreateKernel.asciidoc[] + + * _program_ is a program object with a successfully built executable. + * _kernel_name_ is a function name in the program declared with the + `+__kernel+` qualifier. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +// refError + +{clCreateKernel} returns a valid non-zero kernel object and _errcode_ret_ is +set to {CL_SUCCESS} if the kernel object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built + executable for _program_. + * {CL_INVALID_KERNEL_NAME} if _kernel_name_ is not found in _program_. + * {CL_INVALID_KERNEL_DEFINITION} if the function definition for `+__kernel+` + function given by _kernel_name_ such as the number of arguments, the + argument types are not the same for all devices for which the _program_ + executable has been built. + * {CL_INVALID_VALUE} if _kernel_name_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clCreateKernelsInProgram',desc='Creates kernel objects for all kernel functions in a program object.',type='protos'] +-- +To create kernel objects for all kernel functions in a program, +call the function + +include::{generated}/api/protos/clCreateKernelsInProgram.txt[] +include::{generated}/api/version-notes/clCreateKernelsInProgram.asciidoc[] + + * _program_ is a program object with a successfully built executable. + * _num_kernels_ is the size of memory pointed to by _kernels_ specified as the + number of {cl_kernel_TYPE} entries. + * _kernels_ is the buffer where the kernel objects for kernels in _program_ + will be returned. + If _kernels_ is `NULL`, it is ignored. + If _kernels_ is not `NULL`, _num_kernels_ must be greater than or equal to + the number of kernels in _program_. + * _num_kernels_ret_ is the number of kernels in _program_. + If _num_kernels_ret_ is `NULL`, it is ignored. + +Kernel objects are not created for any `+__kernel+` functions in _program_ +that do not have the same function definition across all devices for which a +program executable has been successfully built. + +Kernel objects can only be created once you have a program object with a +valid program source or binary loaded into the program object and the +program executable has been successfully built for one or more devices +associated with program. +No changes to the program executable are allowed while there are kernel +objects associated with a program object. +This means that calls to {clBuildProgram} and {clCompileProgram} return +{CL_INVALID_OPERATION} if there are kernel objects attached to a program +object. +The OpenCL context associated with _program_ will be the context associated +with _kernel_. +The list of devices associated with _program_ are the devices associated +with _kernel_. +Devices associated with a program object for which a valid program +executable has been built can be used to execute kernels declared in the +program object. + +// refError + +{clCreateKernelsInProgram} will return {CL_SUCCESS} if the kernel objects were +successfully allocated. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built + executable for any device in _program_. + * {CL_INVALID_VALUE} if _kernels_ is not `NULL` and _num_kernels_ is less + than the number of kernels in _program_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +-- + +[open,refpage='clRetainKernel',desc='Increments the kernel object reference count.',type='protos'] +-- +To retain a kernel object, call the function + +include::{generated}/api/protos/clRetainKernel.txt[] +include::{generated}/api/version-notes/clRetainKernel.asciidoc[] + + * _kernel_ is the kernel object to be retained. + +The _kernel_ reference count is incremented. + +// refError + +{clRetainKernel} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and - _kernel_ are not the same or if the context associated with - _command_queue_ and events in _event_wait_list_ are not the same. - * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been - specified. - * {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a - value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). - * {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of - the values specified in _global_work_size_[0], ... - _global_work_size_[_work_dim_ - 1] are 0. - Returning this error code under these circumstances is <> version 2.1. - * {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in - _global_work_size_[0], ... _global_work_size_[_work_dim_ - 1] exceed the - maximum value representable by {size_t_TYPE} on the device on which the - kernel-instance will be enqueued. - * {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ - {plus} the corresponding values in _global_work_offset_ for any - dimensions is greater than the maximum value representable by size t on - the device on which the kernel-instance will be enqueued, or if - _global_work_offset_ is non-`NULL` <> version 1.1. - * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and does - not match the required work-group size for _kernel_ in the program - source. - * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and is not - consistent with the required number of sub-groups for _kernel_ in the - program source. - * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and the - total number of work-items in the work-group computed as - _local_work_size_[0] {times} ... _local_work_size_[_work_dim_ - 1] is - greater than the value specified by {CL_KERNEL_WORK_GROUP_SIZE} in the - <> table. - * {CL_INVALID_WORK_GROUP_SIZE} if the work-group size must be uniform and - the _local_work_size_ is not `NULL`, is not equal to the required - work-group size specified in the kernel source, or the - _global_work_size_ is not evenly divisible by the _local_work_size_. - * {CL_INVALID_WORK_ITEM_SIZE} if the number of work-items specified in any - of _local_work_size_[0], ... _local_work_size_[_work_dim_ - 1] is - greater than the corresponding values specified by - {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., - {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. - * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as - the value for an argument that is a buffer object and the _offset_ - specified when the sub-buffer object is created is not aligned to - {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. - This error code is <> version 1.1. - * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument - value and the image dimensions (image width, height, specified or - compute row and/or slice pitch) are not supported by device associated - with _queue_. - * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an - argument value and the image format (image channel order and data type) - is not supported by device associated with _queue_. - * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution - instance of _kernel_ on the command-queue because of insufficient - resources needed to execute the kernel. - For example, the explicitly specified _local_work_size_ causes a failure - to execute the kernel because of insufficient resources such as - registers or local memory. - Another example would be the number of read-only image args used in - _kernel_ exceed the {CL_DEVICE_MAX_READ_IMAGE_ARGS} value for device or - the number of write-only and read-write image args used in _kernel_ - exceed the {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS} value for device or the - number of samplers used in _kernel_ exceed {CL_DEVICE_MAX_SAMPLERS} for - device. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for data store associated with image or buffer objects specified - as arguments to _kernel_. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +{clCreateKernel} or {clCreateKernelsInProgram} do an implicit retain. +-- + +[open,refpage='clReleaseKernel',desc='Decrements the kernel reference count.',type='protos'] +-- +To release a kernel object, call the function + +include::{generated}/api/protos/clReleaseKernel.txt[] +include::{generated}/api/version-notes/clReleaseKernel.asciidoc[] + + * _kernel_ is the kernel object to be released. + +The _kernel_ reference count is decremented. + +The kernel object is deleted once the number of instances that are retained +to _kernel_ become zero and the kernel object is no longer needed by any +enqueued commands that use _kernel_. +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainKernel} causes undefined behavior. + +// refError + +{clReleaseKernel} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +[[setting-kernel-arguments]] +=== Setting Kernel Arguments + +To execute a kernel, the kernel arguments must be set. + +[open,refpage='clSetKernelArg',desc='Set the argument value for a specific argument of a kernel.',type='protos'] +-- +To set the argument value for a specific argument of a kernel, call the +function + +include::{generated}/api/protos/clSetKernelArg.txt[] +include::{generated}/api/version-notes/clSetKernelArg.asciidoc[] + + * _kernel_ is a valid kernel object. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel (see below). + * _arg_size_ specifies the size of the argument value. + If the argument is a memory object, the _arg_size_ value must be equal to + `sizeof({cl_mem_TYPE})`. + For arguments declared with the `local` qualifier, the size specified will + be the size in bytes of the buffer that must be allocated for the `local` + argument. + If the argument is of type _sampler_t_, the _arg_size_ value must be equal + to `sizeof({cl_sampler_TYPE})`. + If the argument is of type _queue_t_, the _arg_size_ value must be equal to + `sizeof({cl_command_queue_TYPE})`. + For all other arguments, the size will be the size of argument type. + * _arg_value_ is a pointer to data that should be used as the argument value + for argument specified by _arg_index_. + The argument data pointed to by _arg_value_ is copied and the _arg_value_ + pointer can therefore be reused by the application after {clSetKernelArg} + returns. + The argument value specified is the value used by all API calls that enqueue + _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument + value is changed by a call to {clSetKernelArg} for _kernel_. + +For example, consider the following kernel: + +[source,opencl_c] +---- +kernel void image_filter (int n, + int m, + constant float *filter_weights, + read_only image2d_t src_image, + write_only image2d_t dst_image) +{ +... +} +---- + +Argument index values for `image_filter` will be 0 for `n`, 1 for `m`, 2 for +`filter_weights`, 3 for `src_image` and 4 for `dst_image`. + +If the argument is a memory object (buffer, pipe, image or image array), the +_arg_value_ entry will be a pointer to the appropriate buffer, pipe, image +or image array object. +The memory object must be created with the context associated with the +kernel object. +If the argument is a buffer object, the _arg_value_ pointer can be `NULL` or +point to a `NULL` value in which case a `NULL` value will be used as the +value for the argument declared as a pointer to `global` or `constant` +memory in the kernel. +If the argument is declared with the `local` qualifier, the _arg_value_ +entry must be `NULL`. +If the argument is of type _sampler_t_, the _arg_value_ entry must be a +pointer to the sampler object. +If the argument is of type _queue_t_, the _arg_value_ entry must be a +pointer to the device queue object. + +ifdef::cl_khr_gl_msaa_sharing[] +If the `<>` extension is supported, then: +If the argument is a multi-sample 2D image, the _arg_value_ entry must be a +pointer to a multi-sample image object. +If the argument is a multi-sample 2D depth image, the _arg_value_ entry must +be a pointer to a multisample depth image object. +If the argument is a multi-sample 2D image array, the _arg_value_ entry must +be a pointer to a multi-sample image array object. +If the argument is a multi-sample 2D depth image array, the _arg_value_ +entry must be a pointer to a multi-sample depth image array object. +endif::cl_khr_gl_msaa_sharing[] + +If the argument is declared to be a pointer of a built-in scalar or vector +type, or a user defined structure type in the global or constant address +space, the memory object specified as argument value must be a buffer object +(or `NULL`). +If the argument is declared with the `constant` qualifier, the size in bytes +of the memory object cannot exceed {CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE} and +the number of arguments declared as pointers to `constant` memory cannot +exceed {CL_DEVICE_MAX_CONSTANT_ARGS}. + +The memory object specified as argument value must be a pipe object if the +argument is declared with the _pipe_ qualifier. + +The memory object specified as argument value must be a 2D image object if +the argument is declared to be of type _image2d_t_. +The memory object specified as argument value must be a 2D image object with +image channel order = {CL_DEPTH} if the argument is declared to be of type +_image2d_depth_t_. +The memory object specified as argument value must be a 3D image object if +argument is declared to be of type _image3d_t_. +The memory object specified as argument value must be a 1D image object if +the argument is declared to be of type _image1d_t_. +The memory object specified as argument value must be a 1D image buffer +object if the argument is declared to be of type _image1d_buffer_t_. +The memory object specified as argument value must be a 1D image array +object if argument is declared to be of type _image1d_array_t_. +The memory object specified as argument value must be a 2D image array +object if argument is declared to be of type _image2d_array_t_. +The memory object specified as argument value must be a 2D image array +object with image channel order = {CL_DEPTH} if argument is declared to be of +type _image2d_array_depth_t_. + +For all other kernel arguments, the _arg_value_ entry must be a pointer to +the actual data to be used as argument value. + +[NOTE] +==== +A kernel object does not update the reference count for objects such as +memory or sampler objects specified as argument values by {clSetKernelArg}. +Users may not rely on a kernel object to retain objects specified as +argument values to the kernel. + +Implementations shall not allow {cl_kernel_TYPE} objects to hold reference +counts to {cl_kernel_TYPE} arguments, because no mechanism is provided for the +user to tell the kernel to release that ownership right. +If the kernel holds ownership rights on kernel args, that would make it +impossible for users to tell with certainty when they may safely +release user allocated resources associated with OpenCL objects such as +the {cl_mem_TYPE} backing store used with {CL_MEM_USE_HOST_PTR}. +==== + +// refError + +{clSetKernelArg} returns {CL_SUCCESS} if the function was executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. + * {CL_INVALID_MEM_OBJECT} for an argument declared to be a memory object + when the specified _arg_value_ is not a valid memory object. +ifdef::cl_khr_depth_images,cl_khr_gl_msaa_sharing[] + * {CL_INVALID_MEM_OBJECT} for an argument declared to be a +ifdef::cl_khr_depth_images[] + depth image, depth image array, +endif::cl_khr_depth_images[] +ifdef::cl_khr_gl_msaa_sharing[] + multi-sample image, multi-sample image array, multi-sample depth image, + or a multi-sample depth image array +endif::cl_khr_gl_msaa_sharing[] + when the specified _arg_value_ does not follow the rules described above + for a depth memory object or memory array object argument. +endif::cl_khr_depth_images,cl_khr_gl_msaa_sharing[] + * {CL_INVALID_SAMPLER} for an argument declared to be of type _sampler_t_ + when the specified _arg_value_ is not a valid sampler object. + * {CL_INVALID_DEVICE_QUEUE} for an argument declared to be of type _queue_t_ + when the specified _arg_value_ is not a valid device queue object. + This error code is <> version 2.0. + * {CL_INVALID_ARG_SIZE} if _arg_size_ does not match the size of the data + type for an argument that is not a memory object or if the argument is a + memory object and _arg_size_ != `sizeof({cl_mem_TYPE})` or if _arg_size_ is + zero and the argument is declared with the local qualifier or if the + argument is a sampler and _arg_size_ != `sizeof({cl_sampler_TYPE})`. + * {CL_MAX_SIZE_RESTRICTION_EXCEEDED} if the size in bytes of the memory + object (if the argument is a memory object) or _arg_size_ (if the + argument is declared with `local` qualifier) exceeds a language- + specified maximum size restriction for this argument, such as the + *MaxByteOffset* SPIR-V decoration. + This error code is <> version 2.2. + * {CL_INVALID_ARG_VALUE} if the argument is an image declared with the + `read_only` qualifier and _arg_value_ refers to an image object created + with _cl_mem_flags_ of {CL_MEM_WRITE_ONLY} or if the image argument is + declared with the `write_only` qualifier and _arg_value_ refers to an + image object created with _cl_mem_flags_ of {CL_MEM_READ_ONLY}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +When {clSetKernelArg} returns an error code different from {CL_SUCCESS}, the +internal state of _kernel_ may only be modified when that error code is +{CL_OUT_OF_RESOURCES} or {CL_OUT_OF_HOST_MEMORY}. When the internal state +of _kernel_ is modified, it is implementation-defined whether: + + * The argument value that was previously set is kept so that it can be used in + further kernel enqueues. + * The argument value is unset such that a subsequent kernel enqueue fails with + {CL_INVALID_KERNEL_ARGS}. footnote:[{fn-setkernelarg-prefer-unset-on-error}] +-- + +[open,refpage='clSetKernelArgSVMPointer',desc='Set a SVM pointer as the argument value for a specific argument of a kernel.',type='protos'] +-- +To set a SVM pointer as the argument value for a specific argument of a +kernel, call the function + +include::{generated}/api/protos/clSetKernelArgSVMPointer.txt[] +include::{generated}/api/version-notes/clSetKernelArgSVMPointer.asciidoc[] + + * _kernel_ is a valid kernel object. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel. + * _arg_value_ is the SVM pointer that should be used as the argument value for + argument specified by _arg_index_. + The SVM pointer specified is the value used by all API calls that enqueue + _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument + value is changed by a call to {clSetKernelArgSVMPointer} for _kernel_. + The SVM pointer can only be used for arguments that are declared to be a + pointer to `global` or `constant` memory. + The SVM pointer value must be aligned according to the arguments type. + For example, if the argument is declared to be `+global float4 *p+`, the SVM + pointer value passed for `p` must be at a minimum aligned to a `float4`. + The SVM pointer value specified as the argument value can be the pointer + returned by {clSVMAlloc} or can be a pointer offset into the SVM region. + +// refError + +{clSetKernelArgSVMPointer} returns {CL_SUCCESS} if the function was executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clSetKernelExecInfo',desc='Pass additional information other than argument values to a kernel.',type='protos'] +-- +To pass additional information other than argument values to a kernel, call +the function + +include::{generated}/api/protos/clSetKernelExecInfo.txt[] +include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _param_name_ specifies the information to be passed to kernel. + The list of supported _param_name_ types and the corresponding values passed + in _param_value_ is described in the <> table. + * _param_value_size_ specifies the size in bytes of the memory pointed to by + _param_value_. + * _param_value_ is a pointer to memory where the appropriate values determined + by _param_name_ are specified. + +[[kernel-exec-info-table]] +.List of supported param_names by {clSetKernelExecInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Exec Info | Type | Description +| {CL_KERNEL_EXEC_INFO_SVM_PTRS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_PTRS.asciidoc[] + | {void_TYPE}*[] + | SVM pointers must reference locations contained entirely within + buffers that are passed to kernel as arguments, or that are passed + through the execution information. + + Non-argument SVM buffers must be specified by passing pointers to + those buffers via {clSetKernelExecInfo} for coarse-grain and + fine-grain buffer SVM allocations but not for finegrain system SVM + allocations. +| {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM.asciidoc[] + | {cl_bool_TYPE} + | This flag indicates whether the kernel uses pointers that are fine + grain system SVM allocations. + These fine grain system SVM pointers may be passed as arguments or + defined in SVM buffers that are passed as arguments to _kernel_. +|==== + +// refError + +{clSetKernelExecInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. + * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is + `NULL` or if the size specified by _param_value_size_ is not valid. + * {CL_INVALID_OPERATION} if _param_name_ is + {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} and _param_value_ is {CL_TRUE} + but no devices in context associated with _kernel_ support fine-grain + system SVM allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[NOTE] +==== +Coarse-grain or fine-grain buffer SVM pointers used by a kernel which +are not passed as a kernel arguments must be specified using +{clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS}. +For example, if SVM buffer A contains a pointer to another SVM buffer B, +and the kernel dereferences that pointer, then a pointer to B must +either be passed as an argument in the call to that kernel or it must be +made available to the kernel using {clSetKernelExecInfo}. +For example, we might pass extra SVM pointers as follows: + +[source,opencl] +---- +clSetKernelExecInfo(kernel, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + num_ptrs * sizeof(void *), + extra_svm_ptr_list); +---- + +Here `num_ptrs` specifies the number of additional SVM pointers while +`extra_svm_ptr_list` specifies a pointer to memory containing those SVM +pointers. + +When calling {clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS} to +specify pointers to non-argument SVM buffers as extra arguments to a kernel, +each of these pointers can be the SVM pointer returned by {clSVMAlloc} or +can be a pointer + offset into the SVM region. +It is sufficient to provide one pointer for each SVM buffer used. + +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is used to indicate whether +SVM pointers used by a kernel will refer to system allocations or not. + +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_FALSE} indicates that the +OpenCL implementation may assume that system pointers are not passed as +kernel arguments and are not stored inside SVM allocations passed as kernel +arguments. + +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_TRUE} indicates that the +OpenCL implementation must assume that system pointers might be passed as +kernel arguments and/or stored inside SVM allocations passed as kernel +arguments. +In this case, if the device to which the kernel is enqueued does not support +system SVM pointers, {clEnqueueNDRangeKernel} and {clEnqueueTask} will return a +{CL_INVALID_OPERATION} error. +If none of the devices in the context associated with kernel support +fine-grain system SVM allocations, {clSetKernelExecInfo} will return a +{CL_INVALID_OPERATION} error. + +If {clSetKernelExecInfo} has not been called with a value for +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is used for +this kernel attribute. +The default value depends on whether the device on which the kernel is +enqueued supports fine-grain system SVM allocations. +If so, the default value used is {CL_TRUE} (system pointers might be passed); +otherwise, the default is {CL_FALSE}. + +A call to {clSetKernelExecInfo} for a given value of _param_name_ +replaces any prior value passed for that value of _param_name_. +Only one _param_value_ will be stored for each value of _param_name_. +==== + + +=== Copying Kernel Objects + +NOTE: Copying kernel objects is <> version 2.1. + +[open,refpage='clCloneKernel',desc='Make a shallow copy of the kernel object.',type='protos'] +-- +To clone a kernel object, call the function + +include::{generated}/api/protos/clCloneKernel.txt[] +include::{generated}/api/version-notes/clCloneKernel.asciidoc[] + + * _source_kernel_ is a valid {cl_kernel_TYPE} object that will be copied. + _source_kernel_ will not be modified in any way by this function. + * _errcode_ret_ will be assigned an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Cloning is used to make a shallow copy of the kernel object, its arguments +and any information passed to the kernel object using {clSetKernelExecInfo}. +If the kernel object was ready to be enqueued before copying it, the clone +of the kernel object is ready to enqueue. + +The returned kernel object is an exact copy of _source_kernel_, with one +caveat: the reference count on the returned kernel object is set as if it +had been returned by {clCreateKernel}. +The reference count of _source_kernel will_ not be changed. + +The resulting kernel will be in the same state as if {clCreateKernel} is +called to create the resultant kernel with the same arguments as those used +to create _source_kernel_, the latest call to {clSetKernelArg} or +{clSetKernelArgSVMPointer} for each argument index applied to kernel and the +last call to {clSetKernelExecInfo} for each value of the param name +parameter are applied to the new kernel object. + +All arguments of the new kernel object must be intact and it may be +correctly used in the same situations as kernel except those that assume a +pre-existing reference count. +Setting arguments on the new kernel object will not affect _source_kernel_ +except insofar as the argument points to a shared underlying entity and in +that situation behavior is as if two kernel objects had been created and the +same argument applied to each. +Only the data stored in the kernel object is copied; data referenced by the +kernels arguments are not copied. +For example, if a buffer or pointer argument is set on a kernel object, the +pointer is copied but the underlying memory allocation is not. + +// refError + +{clCloneKernel} returns a valid non-zero kernel object and _errcode_ret_ is +set to {CL_SUCCESS} if the kernel is successfully copied. +Otherwise it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Kernel Object Queries + +[open,refpage='clGetKernelInfo',desc='Returns information about the kernel object.',type='protos'] +-- +To return information about a kernel object, call the function + +include::{generated}/api/protos/clGetKernelInfo.txt[] +include::{generated}/api/version-notes/clGetKernelInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[kernel-info-table]] +.List of supported param_names by {clGetKernelInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Info | Return Type | Description +| {CL_KERNEL_FUNCTION_NAME_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_FUNCTION_NAME.asciidoc[] + | {char_TYPE}[] + | Return the kernel function name. +| {CL_KERNEL_NUM_ARGS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_NUM_ARGS.asciidoc[] + | {cl_uint_TYPE} + | Return the number of arguments to kernel. +| {CL_KERNEL_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] + +include::{generated}/api/version-notes/CL_KERNEL_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _kernel_ reference count. +| {CL_KERNEL_CONTEXT_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context associated with _kernel_. +| {CL_KERNEL_PROGRAM_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_PROGRAM.asciidoc[] + | {cl_program_TYPE} + | Return the program object associated with kernel. +| {CL_KERNEL_ATTRIBUTES_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ATTRIBUTES.asciidoc[] + | {char_TYPE}[] + | Returns any attributes specified using the `+__attribute__+` + OpenCL C qualifier (or using an OpenCL {cpp} qualifier syntax [[]] ) + with the kernel function declaration in the program source. + These attributes include attributes described in the earlier OpenCL + C kernel language specifications and other attributes supported by + an implementation. + + Attributes are returned as they were declared inside + `+__attribute__((...))+`, with any surrounding whitespace and + embedded newlines removed. + When multiple attributes are present, they are returned as a single, + space delimited string. + + For kernels not created from OpenCL C source and the + {clCreateProgramWithSource} API call the string returned from this + query will be empty. +|==== + +// refError + +{clGetKernelInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and _param_value_ + is not `NULL`. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetKernelWorkGroupInfo',desc='Returns information about the kernel object that may be specific to a device.',type='protos'] +-- +To return information about the kernel object that may be specific to a +device, call the function + +include::{generated}/api/protos/clGetKernelWorkGroupInfo.txt[] +include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _device_ identifies a specific device in the list of devices associated with + _kernel_. + The list of devices is the list of devices in the OpenCL context that is + associated with _kernel_. + If the list of devices associated with _kernel_ is a single device, _device_ + can be a `NULL` value. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelWorkGroupInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[kernel-workgroup-info-table]] +.List of supported param_names by {clGetKernelWorkGroupInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Work-group Info | Return Type | Description +| {CL_KERNEL_GLOBAL_WORK_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_GLOBAL_WORK_SIZE.asciidoc[] + | {size_t_TYPE}[3] + | This provides a mechanism for the application to query the maximum + global size that can be used to execute a kernel (i.e. + _global_work_size_ argument to {clEnqueueNDRangeKernel}) on a custom + device given by device or a built-in kernel on an OpenCL device + given by device. + + If device is not a custom device and kernel is not a built-in + kernel, {clGetKernelWorkGroupInfo} returns the error + {CL_INVALID_VALUE}. +| {CL_KERNEL_WORK_GROUP_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_WORK_GROUP_SIZE.asciidoc[] + | {size_t_TYPE} + | This provides a mechanism for the application to query the maximum + work-group size that can be used to execute the kernel on a specific + device given by device. + The OpenCL implementation uses the resource requirements of the + kernel (register usage etc.) to determine what this work-group size + should be. + + As a result and unlike {CL_DEVICE_MAX_WORK_GROUP_SIZE} this value may + vary from one kernel to another as well as one device to another. + + {CL_KERNEL_WORK_GROUP_SIZE} will be less than or equal to + {CL_DEVICE_MAX_WORK_GROUP_SIZE} for a given kernel object. +| {CL_KERNEL_COMPILE_WORK_GROUP_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_COMPILE_WORK_GROUP_SIZE.asciidoc[] + | {size_t_TYPE}[3] + | Returns the work-group size specified in the kernel source or IL. + + If the work-group size is not specified in the kernel source or IL, + (0, 0, 0) is returned. +| {CL_KERNEL_LOCAL_MEM_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_LOCAL_MEM_SIZE.asciidoc[] + | {cl_ulong_TYPE} + | Returns the amount of local memory in bytes being used by a kernel. + This includes local memory that may be needed by an implementation + to execute the kernel, variables declared inside the kernel with the + `+__local+` address qualifier and local memory to be allocated for + arguments to the kernel declared as pointers with the `+__local+` + address qualifier and whose size is specified with {clSetKernelArg}. + + If the local memory size, for any pointer argument to the kernel + declared with the `+__local+` address qualifier, is not specified, + its size is assumed to be 0. +| {CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.asciidoc[] + | {size_t_TYPE} + | Returns the preferred multiple of work-group size for launch. + This is a performance hint. + Specifying a work-group size that is not a multiple of the value + returned by this query as the value of the local work size argument + to {clEnqueueNDRangeKernel} will not fail to enqueue the kernel for + execution unless the work-group size specified is larger than the + device maximum. +| {CL_KERNEL_PRIVATE_MEM_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_PRIVATE_MEM_SIZE.asciidoc[] + | {cl_ulong_TYPE} + | Returns the minimum amount of private memory, in bytes, used by each + work-item in the kernel. + This value may include any private memory needed by an + implementation to execute the kernel, including that used by the + language built-ins and variable declared inside the kernel with the + `+__private+` qualifier. +|==== + +// refError + +{clGetKernelWorkGroupInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated + with _kernel_ or if _device_ is `NULL` but there is more than one device + associated with _kernel_. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table + and _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is {CL_KERNEL_GLOBAL_WORK_SIZE} and + _device_ is not a custom device and _kernel_ is not a built-in kernel. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetKernelSubGroupInfo',desc='Returns information about the kernel object.',type='protos'] +-- +To return information about a kernel object, call the function + +include::{generated}/api/protos/clGetKernelSubGroupInfo.txt[] +include::{generated}/api/version-notes/clGetKernelSubGroupInfo.asciidoc[] + +Also see `<>`. + + * _kernel_ specifies the kernel object being queried. + * _device_ identifies a specific device in the list of devices associated with + _kernel_. + The list of devices is the list of devices in the OpenCL context that is + associated with _kernel_. + If the list of devices associated with _kernel_ is a single device, _device_ + can be a `NULL` value. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelSubGroupInfo} is described in the + <> table. + * _input_value_size_ is used to specify the size in bytes of memory pointed to + by _input_value_. + This size must be == size of input type as described in the table below. + * _input_value_ is a pointer to memory where the appropriate parameterization + of the query is passed from. + If _input_value_ is `NULL`, it is ignored. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[kernel-sub-group-info-table]] +.List of supported param_names by {clGetKernelSubGroupInfo} +[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] +|==== +| Kernel Sub-group Info | Input Type | Return Type | Description +| {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE.asciidoc[] + +Also see `<>`. + | {size_t_TYPE}* + | {size_t_TYPE} + | Returns the maximum sub-group size for this kernel. + All sub-groups must be the same size, while the last sub-group in + any work-group (i.e. the sub-group with the maximum index) could + be the same or smaller size. + + The _input_value_ must be an array of {size_t_TYPE} values + corresponding to the local work size parameter of the intended + dispatch. + The number of dimensions in the ND-range will be inferred from + the value specified for _input_value_size_. +| {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE.asciidoc[] + +Also see `<>`. + | {size_t_TYPE}* + | {size_t_TYPE} + | Returns the number of sub-groups that will be present in each + work-group for a given local work size. + All workgroups, apart from the last work-group in each dimension + in the presence of non-uniform work-group sizes, will have the + same number of sub-groups. + + The _input_value_ must be an array of {size_t_TYPE} values + corresponding to the local work size parameter of the intended + dispatch. + The number of dimensions in the ND-range will be inferred from + the value specified for _input_value_size_. +| {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT.asciidoc[] + +Also see `<>`. + | {size_t_TYPE} + | {size_t_TYPE}[] + | Returns the local size that will generate the requested number + of sub-groups for the kernel. + The output array must be an array of {size_t_TYPE} values corresponding + to the local size parameter. + Any returned work-group will have one dimension. + Other dimensions inferred from the value specified for + param_value_size will be filled with the value 1. + The returned value will produce an exact number of sub-groups + and result in no partial groups for an executing kernel except + in the case where the last work-group in a dimension has a size + different from that of the other groups. + If no work-group size can accommodate the requested number of + sub-groups, 0 will be returned in each element of the return + array. +| {CL_KERNEL_MAX_NUM_SUB_GROUPS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_MAX_NUM_SUB_GROUPS.asciidoc[] + +Also see `<>`. + | ignored + | {size_t_TYPE} + | This provides a mechanism for the application to query the + maximum number of sub-groups that may make up each work-group to + execute a kernel on a specific device given by device. + The OpenCL implementation uses the resource requirements of the + kernel (register usage etc.) to determine what this work-group + size should be. + The returned value may be used to compute a work-group size to + enqueue the kernel with to give a round number of sub-groups for + an enqueue. +| {CL_KERNEL_COMPILE_NUM_SUB_GROUPS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_COMPILE_NUM_SUB_GROUPS.asciidoc[] + +Also see `<>`. + | ignored + | {size_t_TYPE} + | Returns the number of sub-groups per work-group specified in the kernel + source or IL. If the sub-group count is not specified then 0 is returned. +|==== + +// refError + +{clGetKernelSubGroupInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated + with _kernel_ or if _device_ is `NULL` but there is more than one device + associated with _kernel_. + * {CL_INVALID_OPERATION} if _device_ does not support sub-groups. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table + and _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is + {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE}, + {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE} or + {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT} and the size in bytes specified + by _input_value_size_ is not valid or if _input_value_ is `NULL`. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetKernelArgInfo',desc='Returns information about the arguments of a kernel.',type='protos'] +-- +To return information about the arguments of a kernel, call the function + +include::{generated}/api/protos/clGetKernelArgInfo.txt[] +include::{generated}/api/version-notes/clGetKernelArgInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel. + * _param_name_ specifies the argument information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelArgInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be > size of return type as described in the + <> table. + * _param_value_size ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +Kernel argument information is only available if the program object +associated with _kernel_: + +ifdef::cl_khr_spir[] + * is created with {clCreateProgramWithBinary} and the program executable + is built with the `-cl-kernel-arg-info` and `-x spir` options specified + in the _options_ argument to {clBuildProgram} or {clCompileProgram}, if + the `<>` extension is supported; or, +endif::cl_khr_spir[] + * is created with {clCreateProgramWithSource} and the program executable + is built with the `-cl-kernel-arg-info option` specified in the + _options_ argument to {clBuildProgram} or {clCompileProgram}, + +[[kernel-argument-info-table]] +.List of supported param_names by {clGetKernelArgInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Arg Info | Return Type | Description +| {CL_KERNEL_ARG_ADDRESS_QUALIFIER_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_ADDRESS_QUALIFIER.asciidoc[] + | {cl_kernel_arg_address_qualifier_TYPE} + | Returns the address qualifier specified for the argument given by + _arg_index_. + This can be one of the following values: + + {CL_KERNEL_ARG_ADDRESS_GLOBAL_anchor} + + {CL_KERNEL_ARG_ADDRESS_LOCAL_anchor} + + {CL_KERNEL_ARG_ADDRESS_CONSTANT_anchor} + + {CL_KERNEL_ARG_ADDRESS_PRIVATE_anchor} + + If no address qualifier is specified, the default address qualifier + which is {CL_KERNEL_ARG_ADDRESS_PRIVATE} is returned. +| {CL_KERNEL_ARG_ACCESS_QUALIFIER_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_ACCESS_QUALIFIER.asciidoc[] + | {cl_kernel_arg_access_qualifier_TYPE} + | Returns the access qualifier specified for the argument given by + _arg_index_. + This can be one of the following values: + + {CL_KERNEL_ARG_ACCESS_READ_ONLY_anchor} + + {CL_KERNEL_ARG_ACCESS_WRITE_ONLY_anchor} + + {CL_KERNEL_ARG_ACCESS_READ_WRITE_anchor} + + {CL_KERNEL_ARG_ACCESS_NONE_anchor} + + If argument is not an image type and is not declared with the pipe + qualifier, {CL_KERNEL_ARG_ACCESS_NONE} is returned. + If argument is an image type, the access qualifier specified or the + default access qualifier is returned. +| {CL_KERNEL_ARG_TYPE_NAME_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_NAME.asciidoc[] + | {char_TYPE}[] + | Returns the type name specified for the argument given by + _arg_index_. + The type name returned will be the argument type name as it was + declared with any whitespace removed. + If argument type name is an unsigned scalar type (i.e. unsigned + char, unsigned short, unsigned int, unsigned long), uchar, ushort, + uint and ulong will be returned. + The argument type name returned does not include any type + qualifiers. +| {CL_KERNEL_ARG_TYPE_QUALIFIER_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_QUALIFIER.asciidoc[] + | {cl_kernel_arg_type_qualifier_TYPE} + | Returns a bitfield describing one or more type qualifiers specified + for the argument given by _arg_index_. + The returned values can be: + + {CL_KERNEL_ARG_TYPE_CONST_anchor} + footnote:[{fn-kernel-arg-type-qualifier}] + footnote:[{fn-kernel-arg-type-const-addr-space}] + + {CL_KERNEL_ARG_TYPE_RESTRICT_anchor} + + {CL_KERNEL_ARG_TYPE_VOLATILE_anchor} + + {CL_KERNEL_ARG_TYPE_PIPE_anchor}, or + + {CL_KERNEL_ARG_TYPE_NONE_anchor} + + {CL_KERNEL_ARG_TYPE_NONE} is returned for all parameters passed by + value. +| {CL_KERNEL_ARG_NAME_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_NAME.asciidoc[] + | {char_TYPE}[] + | Returns the name specified for the argument given by _arg_index_. +|==== + +{clGetKernelArgInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_ size is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_KERNEL_ARG_INFO_NOT_AVAILABLE} if the argument information is not + available for kernel. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. +-- + +ifdef::cl_khr_suggested_local_work_size[] +[open,refpage='clGetKernelSuggestedLocalWorkSizeKHR',desc='Query suggested local work size for a kernel object',type='protos'] +-- +To query a suggested local work size for a kernel object, call the function + +include::{generated}/api/protos/clGetKernelSuggestedLocalWorkSizeKHR.txt[] +include::{generated}/api/version-notes/clGetKernelSuggestedLocalWorkSizeKHR.asciidoc[] + + * _command_queue_ specifies the command-queue and device for the query. + * _kernel_ specifies the kernel object and kernel arguments for the query. + The OpenCL context associated with _kernel_ and _command_queue_ must the + same. + * _work_dim_ specifies the number of work dimensions in the input global + work offset and global work size, and the output suggested local work + size. + * _global_work_offset_ can be used to specify an array of at least + _work_dim_ global ID offset values for the query. + This is optional and may be `NULL` to indicate there is no global ID + offset. + * _global_work_size_ is an array of at least _work_dim_ values describing + the global work size for the query. + * _suggested_local_work_size_ is an output array of at least _work_dim_ + values that will contain the result of the query. + +The returned suggested local work size is expected to match the local work +size that would be chosen if the specified kernel object, with the same +kernel arguments, were enqueued into the specified command-queue with the +specified global work size, specified global work offset, and with a `NULL` +local work size. + +// refError + +{clGetKernelSuggestedLocalWorkSizeKHR} returns {CL_SUCCESS} if the query +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_CONTEXT} if the context associated with _kernel_ is not the + same as the context associated with _command_queue_. + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built + program executable available for _kernel_ for the device associated with + _command_queue_. + * {CL_INVALID_KERNEL_ARGS} if all argument values for _kernel_ have not + been set. + * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is set as an + argument to _kernel_ and the offset specified when the sub-buffer object + was created is not aligned to {CL_DEVICE_MEM_BASE_ADDR_ALIGN} for the + device associated with _command_queue_. + * {CL_INVALID_IMAGE_SIZE} if an image object is set as an argument to + _kernel_ and the image dimensions are not supported by device associated + with _command_queue_. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is set as an argument + to _kernel_ and the image format is not supported by the device + associated with _command_queue_. + * {CL_INVALID_OPERATION} if an SVM pointer is set as an argument to + _kernel_ and the device associated with _command_queue_ does not support + SVM or the required SVM capabilities for the SVM pointer. + * {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a + value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). + * {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of + the values specified in _global_work_size_ are 0. + * {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in + _global_work_size_ exceed the maximum value representable by `size_t` on + the device associated with _command_queue_. + * {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ + plus the corresponding value in _global_work_offset_ for dimension + exceeds the maximum value representable by `size_t` on the device + associated with _command_queue_. + * {CL_INVALID_VALUE} if _suggested_local_work_size_ is NULL. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +NOTE: These error conditions are consistent with error conditions for +{clEnqueueNDRangeKernel}. +-- +endif::cl_khr_suggested_local_work_size[] + + +== Executing Kernels + +[open,refpage='clEnqueueNDRangeKernel',desc='Enqueues a command to execute a kernel on a device.',type='protos'] +-- +To enqueue a command to execute a kernel on a device, call the function + +include::{generated}/api/protos/clEnqueueNDRangeKernel.txt[] +include::{generated}/api/version-notes/clEnqueueNDRangeKernel.asciidoc[] + + * _command_queue_ is a valid host command-queue. + The kernel will be queued for execution on the device associated with + _command_queue_. + * _kernel_ is a valid kernel object. + The OpenCL context associated with _kernel_ and _command-queue_ must be the + same. + * _work_dim_ is the number of dimensions used to specify the global work-items + and work-items in the work-group. + _work_dim_ must be greater than zero and less than or equal to + {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}. + If _global_work_size_ is `NULL`, or the value in any passed dimension is 0 + then the kernel command will trivially succeed after its event dependencies + are satisfied and subsequently update its completion event. + The behavior in this situation is similar to that of an enqueued marker, + except that unlike a marker, an enqueued kernel with no events passed to + _event_wait_list_ may run at any time. + * _global_work_offset_ can be used to specify an array of _work_dim_ unsigned + values that describe the offset used to calculate the global ID of a + work-item. + If _global_work_offset_ is `NULL`, the global IDs start at offset (0, 0, 0). + _global_work_offset_ must be `NULL` <> version 1.1. + * _global_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of global work-items in _work_dim_ dimensions that will + execute the kernel function. + The total number of global work-items is computed as _global_work_size_[0] + {times} ... {times} _global_work_size_[_work_dim_ - 1]. + * _local_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of work-items that make up a work-group (also referred + to as the size of the work-group) that will execute the kernel specified by + _kernel_. + The total number of work-items in a work-group is computed as + _local_work_size_[0] {times} ... {times} _local_work_size_[_work_dim_ - 1]. + The total number of work-items in the work-group must be less than or equal + to the {CL_KERNEL_WORK_GROUP_SIZE} value specified in the + <> table, and the + number of work-items specified in _local_work_size_[0], ..., + _local_work_size_[_work_dim_ - 1] must be less than or equal to the + corresponding values specified by {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., + {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. + The explicitly specified _local_work_size_ will be used to determine how to + break the global work-items specified by _global_work_size_ into appropriate + work-group instances. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +An ND-range kernel command may require uniform work-groups or may support non-uniform work-groups. +To support non-uniform work-groups: + +. The device associated with _command_queue_ must support non-uniform work-groups. +. The program object associated with _kernel_ must support non-uniform work-groups. +Specifically, this means: +.. If the program was created with {clCreateProgramWithSource}, the program must be compiled or built using the `-cl-std=CL2.0` or `-cl-std=CL3.0` build option and without the `-cl-uniform-work-group-size` build option. +.. If the program was created with {clCreateProgramWithIL} or {clCreateProgramWithBinary}, the program must be compiled or built without the `-cl-uniform-work-group-size` build options. +.. If the program was created using {clLinkProgram}, all input programs must support non-uniform work-groups. + +If non-uniform work-groups are supported, any single dimension +for which the global size is not divisible by the local size will be +partitioned into two regions. +One region will have work-groups that have the same number of work-items as +was specified by the local size parameter in that dimension. +The other region will have work-groups with less than the number of work +items specified by the local size parameter in that dimension. +The global IDs and group IDs of the work-items in the first region will be +numerically lower than those in the second, and the second region will be at +most one work-group wide in that dimension. +Work-group sizes could be non-uniform in multiple dimensions, potentially +producing work-groups of up to 4 different sizes in a 2D range and 8 +different sizes in a 3D range. + +If non-uniform work-groups are supported and _local_work_size_ is `NULL`, the OpenCL runtime may choose a uniform or non-uniform work-group size. + +Otherwise, when non-uniform work-groups are not supported, the size of each work-group must be uniform. +If _local_work_size_ is specified, the values specified in _global_work_size_[0], ..., _global_work_size_[_work_dim_ - 1] must be evenly divisible by the corresponding values specified in _local_work_size_[0], ..., _local_work_size_[_work_dim_ - 1]. +If _local_work_size_ is `NULL`, the OpenCL runtime must choose a uniform work-group size. + +The work-group size to be used for _kernel_ can also be specified in the +program source or intermediate language. +In this case the size of work-group specified by _local_work_size_ must +match the value specified in the program source. + +These work-group instances are executed in parallel across multiple compute +units or concurrently on the same compute unit. + +Each work-item is uniquely identified by a global identifier. +The global ID, which can be read inside the kernel, is computed using the +value given by _global_work_size_ and _global_work_offset_. +In addition, a work-item is also identified within a work-group by a unique +local ID. +The local ID, which can also be read by the kernel, is computed using the +value given by _local_work_size_. +The starting local ID is always (0, 0, ..., 0). + +// refError + +{clEnqueueNDRangeKernel} returns {CL_SUCCESS} if the kernel-instance was +successfully queued. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program + executable available for device associated with _command_queue_. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and + _kernel_ are not the same or if the context associated with + _command_queue_ and events in _event_wait_list_ are not the same. + * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been + specified. + * {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a + value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). + * {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of + the values specified in _global_work_size_[0], ... + _global_work_size_[_work_dim_ - 1] are 0. + Returning this error code under these circumstances is <> version 2.1. + * {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in + _global_work_size_[0], ... _global_work_size_[_work_dim_ - 1] exceed the + maximum value representable by {size_t_TYPE} on the device on which the + kernel-instance will be enqueued. + * {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ + {plus} the corresponding values in _global_work_offset_ for any + dimensions is greater than the maximum value representable by size t on + the device on which the kernel-instance will be enqueued, or if + _global_work_offset_ is non-`NULL` <> version 1.1. + * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and does + not match the required work-group size for _kernel_ in the program + source. + * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and is not + consistent with the required number of sub-groups for _kernel_ in the + program source. + * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and the + total number of work-items in the work-group computed as + _local_work_size_[0] {times} ... _local_work_size_[_work_dim_ - 1] is + greater than the value specified by {CL_KERNEL_WORK_GROUP_SIZE} in the + <> table. + * {CL_INVALID_WORK_GROUP_SIZE} if the work-group size must be uniform and + the _local_work_size_ is not `NULL`, is not equal to the required + work-group size specified in the kernel source, or the + _global_work_size_ is not evenly divisible by the _local_work_size_. + * {CL_INVALID_WORK_ITEM_SIZE} if the number of work-items specified in any + of _local_work_size_[0], ... _local_work_size_[_work_dim_ - 1] is + greater than the corresponding values specified by + {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., + {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. + * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as + the value for an argument that is a buffer object and the _offset_ + specified when the sub-buffer object is created is not aligned to + {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. + This error code is <> version 1.1. + * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument + value and the image dimensions (image width, height, specified or + compute row and/or slice pitch) are not supported by device associated + with _queue_. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an + argument value and the image format (image channel order and data type) + is not supported by device associated with _queue_. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _kernel_ on the command-queue because of insufficient + resources needed to execute the kernel. + For example, the explicitly specified _local_work_size_ causes a failure + to execute the kernel because of insufficient resources such as + registers or local memory. + Another example would be the number of read-only image args used in + _kernel_ exceed the {CL_DEVICE_MAX_READ_IMAGE_ARGS} value for device or + the number of write-only and read-write image args used in _kernel_ + exceed the {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS} value for device or the + number of samplers used in _kernel_ exceed {CL_DEVICE_MAX_SAMPLERS} for + device. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for data store associated with image or buffer objects specified + as arguments to _kernel_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel + and the device does not support SVM or if system pointers are passed as + arguments to a kernel and/or stored inside SVM allocations passed as + kernel arguments and the device does not support fine grain system SVM + allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueTask',desc='Enqueues a command to execute a kernel, using a single work-item, on a device.',type='protos'] +-- +To enqueue a command to execute a kernel on a device, using a single work-item, +call the function + +include::{generated}/api/protos/clEnqueueTask.txt[] +include::{generated}/api/version-notes/clEnqueueTask.asciidoc[] + + * _command_queue_ is a valid host command-queue. + The kernel will be queued for execution on the device associated with + _command_queue_. + * _kernel_ is a valid kernel object. + The OpenCL context associated with _kernel_ and _command-queue_ must be the + same. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +{clEnqueueTask} is equivalent to calling {clEnqueueNDRangeKernel} with +_work_dim_ set to 1, _global_work_offset_ set to `NULL`, _global_work_size[0]_ +set to 1, and _local_work_size[0]_ set to 1. + +// refError + +{clEnqueueTask} returns {CL_SUCCESS} if the kernel-instance was successfully +queued. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program + executable available for device associated with _command_queue_. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and + _kernel_ are not the same or if the context associated with + _command_queue_ and events in _event_wait_list_ are not the same. + * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been + specified. + * {CL_INVALID_WORK_GROUP_SIZE} if a work-group size is specified for _kernel_ + in the program source and it is not (1, 1, 1). +// TODO I'm not sure if the next error makes sense for a 'task'. + * {CL_INVALID_WORK_GROUP_SIZE} if the required number of sub-groups is + specified for _kernel_ in the program source and is not consistent with a + work-group size of (1, 1, 1). + * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as + the value for an argument that is a buffer object and the _offset_ + specified when the sub-buffer object is created is not aligned to + {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. + This error code is <> version 1.1. + * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument + value and the image dimensions (image width, height, specified or + compute row and/or slice pitch) are not supported by device associated + with _queue_. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an + argument value and the image format (image channel order and data type) + is not supported by device associated with _queue_. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _kernel_ on the command-queue because of insufficient + resources needed to execute the kernel. See how this error code is used + with {clEnqueueNDRangeKernel} for examples. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for data store associated with image or buffer objects specified + as arguments to _kernel_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel + and the device does not support SVM or if system pointers are passed as + arguments to a kernel and/or stored inside SVM allocations passed as + kernel arguments and the device does not support fine grain system SVM + allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueNativeKernel',desc='Enqueues a command to execute a native C/C++ function not compiled using the OpenCL compiler.',type='protos'] +-- +To enqueue a command to execute a native C/{cpp} function not compiled using +the OpenCL compiler, call the function + +include::{generated}/api/protos/clEnqueueNativeKernel.txt[] +include::{generated}/api/version-notes/clEnqueueNativeKernel.asciidoc[] + + * _command_queue_ is a valid host command-queue. + A native user function can only be executed on a command-queue created on a + device that has {CL_EXEC_NATIVE_KERNEL} capability set in + {CL_DEVICE_EXECUTION_CAPABILITIES} as specified in the + <> table. + * _user_func_ is a pointer to a host-callable user function. + It is the application's responsibility to ensure that the host-callable user + function is thread-safe. + * _args_ is a pointer to the args list that _user_func_ should be called with. + * _cb_args_ is the size in bytes of the args list that _args_ points to. + * _num_mem_objects_ is the number of buffer objects that are passed in _args_. + * _mem_list_ is a list of valid buffer objects, if _num_mem_objects_ > 0. + The buffer object values specified in _mem_list_ are memory object handles + (`{cl_mem_TYPE}` values) returned by {clCreateBuffer} or {clCreateBufferWithProperties}, + or `NULL`. + * _args_mem_loc_ is a pointer to appropriate locations that _args_ points to + where memory object handles ({cl_mem_TYPE} values) are stored. + Before the user function is executed, the memory object handles are replaced + by pointers to global memory. + * _event_wait_list_, _num_events_in_wait_list_ and _event_ are as described in + {clEnqueueNDRangeKernel}. + +The data pointed to by _args_ and _cb_args_ bytes in size will be copied and +a pointer to this copied region will be passed to _user_func_. +The copy needs to be done because the memory objects ({cl_mem_TYPE} values) that +_args_ may contain need to be modified and replaced by appropriate pointers +to global memory. +When {clEnqueueNativeKernel} returns, the memory region pointed to by _args_ +can be reused by the application. + +// refError + +{clEnqueueNativeKernel} returns {CL_SUCCESS} if the user function execution +instance was successfully queued. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _user_func_ is `NULL`. + * {CL_INVALID_VALUE} if _args_ is a `NULL` value and _cb_args_ > 0, or if + _args_ is a `NULL` value and _num_mem_objects_ > 0. + * {CL_INVALID_VALUE} if _args_ is not `NULL` and _cb_args_ is 0. + * {CL_INVALID_VALUE} if _num_mem_objects_ > 0 and _mem_list_ or + _args_mem_loc_ are `NULL`. + * {CL_INVALID_VALUE} if _num_mem_objects_ = 0 and _mem_list_ or + _args_mem_loc_ are not `NULL`. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ + cannot execute the native kernel. + * {CL_INVALID_MEM_OBJECT} if one or more memory objects specified in + _mem_list_ are not valid or are not buffer objects. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _kernel_ on the command-queue because of insufficient + resources needed to execute the kernel. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for data store associated with buffer objects specified as + arguments to _kernel_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel + and the device does not support SVM or if system pointers are passed as + arguments to a kernel and/or stored inside SVM allocations passed as + kernel arguments and the device does not support fine grain system SVM + allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +[NOTE] +==== +The total number of read-only images specified as arguments to a kernel +cannot exceed {CL_DEVICE_MAX_READ_IMAGE_ARGS}. +Each image array argument to a kernel declared with the `read_only` +qualifier counts as one image. +The total number of write-only images specified as arguments to a kernel +cannot exceed {CL_DEVICE_MAX_WRITE_IMAGE_ARGS}. +Each image array argument to a kernel declared with the `write_only` +qualifier counts as one image. + +The total number of read-write images specified as arguments to a kernel +cannot exceed {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS}. +Each image array argument to a kernel declared with the `read_write` +qualifier counts as one image. +==== +-- + + +[[event-objects]] +== Event Objects + +An event object can be used to track the execution status of a command. +The API calls that enqueue commands to a command-queue create a new event +object that is returned in the _event_ argument. +In case of an error enqueuing the command in the command-queue the event +argument does not return an event object. + +The execution status of an enqueued command at any given point in time can +be one of the following: + + * {CL_QUEUED_anchor}: Indicates that the command has been enqueued in a + command-queue. + This is the initial state of all events except user events. + * {CL_SUBMITTED_anchor}: The initial state for all user events. + For all other events, indicates that the command has been submitted + by the host to the device. + * {CL_RUNNING_anchor}: Indicates that the device has started executing this + command. + In order for the execution status of an enqueued command to change from + {CL_SUBMITTED} to {CL_RUNNING}, all events that this command is waiting on + must have completed successfully i.e. their execution status must be + {CL_COMPLETE}. + * {CL_COMPLETE_anchor}: Indicates that the command has successfully completed. + * An Error Code: A negative integer value indicating that the command was + abnormally terminated. Abnormal termination may occur for a number of reasons, + such as a bad memory access. + +[NOTE] +==== +A command is considered to be complete if its execution status is +{CL_COMPLETE} or is a negative integer value. + +If the execution of a command is terminated, the command-queue associated +with this terminated command, and the associated context (and all other +command-queues in this context) may no longer be available. +The behavior of OpenCL API calls that use this context (and command-queues +associated with this context) are now considered to be +implementation-defined. +The user registered callback function specified when context is created can +be used to report appropriate error information. +==== + + +=== Creating, Waiting for, and Releasing Event Objects + +[open,refpage='clCreateUserEvent',desc='Creates a user event object.',type='protos'] +-- +To create a user event object, call the function + +include::{generated}/api/protos/clCreateUserEvent.txt[] +include::{generated}/api/version-notes/clCreateUserEvent.asciidoc[] + + * _context_ must be a valid OpenCL context. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +User events allow applications to enqueue commands that wait on a user event +to finish before the command is executed by the device. + +// refError + +{clCreateUserEvent} returns a valid non-zero event object and _errcode_ret_ +is set to {CL_SUCCESS} if the user event object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +The initial execution status for the user event object is {CL_SUBMITTED}. +-- + +[open,refpage='clSetUserEventStatus',desc='Sets the execution status of a user event object.',type='protos'] +-- +To set the execution status of a user event object, call the function + +include::{generated}/api/protos/clSetUserEventStatus.txt[] +include::{generated}/api/version-notes/clSetUserEventStatus.asciidoc[] + + * _event_ is a user event object created using {clCreateUserEvent}. + * _execution_status_ specifies the new execution status to be set and can be + {CL_COMPLETE} or a negative integer value to indicate an error. + A negative integer value causes all enqueued commands that wait on this user + event to be terminated. + {clSetUserEventStatus} can only be called once to change the execution + status of _event_. + +[NOTE] +==== +If there are enqueued commands with user events in the _event_wait_list_ +argument of *+clEnqueue*+* commands, the user must ensure that the status of +these user events being waited on are set using {clSetUserEventStatus} +before any OpenCL APIs that release OpenCL objects except for event objects +are called; otherwise the behavior is undefined. + +For example, the following code sequence will result in undefined behavior +of {clReleaseMemObject}. + +[source,opencl] +---- +ev1 = clCreateUserEvent(ctx, NULL); +clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); +clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); +clReleaseMemObject(buf2); +clSetUserEventStatus(ev1, CL_COMPLETE); +---- + +The following code sequence, however, works correctly. + +[source,opencl] +---- +ev1 = clCreateUserEvent(ctx, NULL); +clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); +clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); +clSetUserEventStatus(ev1, CL_COMPLETE); +clReleaseMemObject(buf2); +---- +==== + +// refError + +{clSetUserEventStatus} returns {CL_SUCCESS} if the function was executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid user event object. + * {CL_INVALID_VALUE} if the _execution_status_ is not {CL_COMPLETE} or a + negative integer value. + * {CL_INVALID_OPERATION} if the _execution_status_ for _event_ has already + been changed by a previous call to {clSetUserEventStatus}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clWaitForEvents',desc='Waits on the host thread for commands identified by event objects to complete.',type='protos'] +-- +To wait for events to complete, call the function + +include::{generated}/api/protos/clWaitForEvents.txt[] +include::{generated}/api/version-notes/clWaitForEvents.asciidoc[] + + * _num_events_ is the number of events in _event_list_. + * _event_list_ is a pointer to a list of event object handles. + +This function waits on the host thread for commands identified by event +objects in _event_list_ to complete. +A command is considered complete if its execution status is {CL_COMPLETE} or a +negative value. +The events specified in _event_list_ act as synchronization points. + +// refError + +{clWaitForEvents} returns {CL_SUCCESS} if the execution status of all events +in _event_list_ is {CL_COMPLETE}. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_events_ is zero or _event_list_ is `NULL`. + * {CL_INVALID_CONTEXT} if events specified in _event_list_ do not belong to + the same context. + * {CL_INVALID_EVENT} if event objects specified in _event_list_ are not + valid event objects. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of + any of the events in _event_list_ is a negative integer value. + This error code is <> version 1.1. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetEventInfo',desc='Returns information about the event object.',type='protos'] +-- +To return information about an event object, call the function + +include::{generated}/api/protos/clGetEventInfo.txt[] +include::{generated}/api/version-notes/clGetEventInfo.asciidoc[] + + * _event_ specifies the event object being queried. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetEventInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[event-info-table]] +.List of supported param_names by {clGetEventInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Event Info | Return Type | Description +| {CL_EVENT_COMMAND_QUEUE_anchor} + +include::{generated}/api/version-notes/CL_EVENT_COMMAND_QUEUE.asciidoc[] + | {cl_command_queue_TYPE} + | Return the command-queue associated with _event_. + For user event objects, a `NULL` value is returned. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is + supported, for events returned by a command-buffer enqueue operation + to multiple command-queues, `NULL` is returned. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_EVENT_CONTEXT_anchor} + +include::{generated}/api/version-notes/CL_EVENT_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context associated with _event_. +| {CL_EVENT_COMMAND_TYPE_anchor} + +include::{generated}/api/version-notes/CL_EVENT_COMMAND_TYPE.asciidoc[] + | {cl_command_type_TYPE} + | Return the command type associated with _event_ as described in the + <> table. + +| {CL_EVENT_COMMAND_EXECUTION_STATUS_anchor} footnote:[{fn-event-status-order}] + +include::{generated}/api/version-notes/CL_EVENT_COMMAND_EXECUTION_STATUS.asciidoc[] + | {cl_int_TYPE} + | Return the execution status of the command identified by event. + Valid values are: + + {CL_QUEUED} - Command has been enqueued in the command-queue. + + {CL_SUBMITTED} - Enqueued command has been submitted by the host to the + device associated with the command-queue. + + {CL_RUNNING} - Device is currently executing this command. + + {CL_COMPLETE} - The command has completed. + + Or an error code given by a negative integer value (command was + abnormally terminated - this may be caused by a bad memory access + etc.). + These error codes come from the same set of error codes that are + returned from the platform or runtime API calls as return values or + *errcode_ret* values. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is + supported, for events returned by a command-buffer enqueue operation + to multiple command-queues the semantics of execution status is as + follows: + + {CL_QUEUED} - Command-buffer has been enqueued across the + command-queues. + + {CL_SUBMITTED} - Commands from the command-buffer have been + submitted by the host to any device associated with one of the + command-queues. + + {CL_RUNNING} - Any command from the command-buffer has started + execution on a device. + + {CL_COMPLETE} - All commands have completed on all devices. +endif::cl_khr_command_buffer_multi_device[] +| {CL_EVENT_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] + +include::{generated}/api/version-notes/CL_EVENT_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _event_ reference count. +|==== + +[[event-command-type-table]] +.List of supported event command types +[width="100%",cols="2,3",options="header"] +|==== +| Events Created By | Event Command Type + +| {clEnqueueNDRangeKernel} + | {CL_COMMAND_NDRANGE_KERNEL_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_NDRANGE_KERNEL.asciidoc[] + +| {clEnqueueTask} + | {CL_COMMAND_TASK_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_TASK.asciidoc[] + +| {clEnqueueNativeKernel} + | {CL_COMMAND_NATIVE_KERNEL_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_NATIVE_KERNEL.asciidoc[] + +| {clEnqueueReadBuffer} + | {CL_COMMAND_READ_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER.asciidoc[] + +| {clEnqueueWriteBuffer} + | {CL_COMMAND_WRITE_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER.asciidoc[] + +| {clEnqueueCopyBuffer} + | {CL_COMMAND_COPY_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER.asciidoc[] + +| {clEnqueueReadImage} + | {CL_COMMAND_READ_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_READ_IMAGE.asciidoc[] + +| {clEnqueueWriteImage} + | {CL_COMMAND_WRITE_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_WRITE_IMAGE.asciidoc[] + +| {clEnqueueCopyImage} + | {CL_COMMAND_COPY_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE.asciidoc[] + +| {clEnqueueCopyBufferToImage} + | {CL_COMMAND_COPY_BUFFER_TO_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_TO_IMAGE.asciidoc[] + +| {clEnqueueCopyImageToBuffer} + | {CL_COMMAND_COPY_IMAGE_TO_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE_TO_BUFFER.asciidoc[] + +| {clEnqueueMapBuffer} + | {CL_COMMAND_MAP_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MAP_BUFFER.asciidoc[] + +| {clEnqueueMapImage} + | {CL_COMMAND_MAP_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MAP_IMAGE.asciidoc[] + +| {clEnqueueUnmapMemObject} + | {CL_COMMAND_UNMAP_MEM_OBJECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_UNMAP_MEM_OBJECT.asciidoc[] + +| {clEnqueueMarker}, + + {clEnqueueMarkerWithWaitList} + | {CL_COMMAND_MARKER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MARKER.asciidoc[] + +| {clEnqueueReadBufferRect} + | {CL_COMMAND_READ_BUFFER_RECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER_RECT.asciidoc[] + +| {clEnqueueWriteBufferRect} + | {CL_COMMAND_WRITE_BUFFER_RECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER_RECT.asciidoc[] + +| {clEnqueueCopyBufferRect} + | {CL_COMMAND_COPY_BUFFER_RECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_RECT.asciidoc[] + +| {clCreateUserEvent} + | {CL_COMMAND_USER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_USER.asciidoc[] + +| {clEnqueueBarrier}, + + {clEnqueueBarrierWithWaitList} + | {CL_COMMAND_BARRIER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_BARRIER.asciidoc[] + +| {clEnqueueMigrateMemObjects} + | {CL_COMMAND_MIGRATE_MEM_OBJECTS_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MIGRATE_MEM_OBJECTS.asciidoc[] + +| {clEnqueueFillBuffer} + | {CL_COMMAND_FILL_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_FILL_BUFFER.asciidoc[] + +| {clEnqueueFillImage} + | {CL_COMMAND_FILL_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_FILL_IMAGE.asciidoc[] + +| {clEnqueueSVMFree} + | {CL_COMMAND_SVM_FREE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_FREE.asciidoc[] + +| {clEnqueueSVMMemcpy} + | {CL_COMMAND_SVM_MEMCPY_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMCPY.asciidoc[] + +| {clEnqueueSVMMemFill} + | {CL_COMMAND_SVM_MEMFILL_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMFILL.asciidoc[] + +| {clEnqueueSVMMap} + | {CL_COMMAND_SVM_MAP_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MAP.asciidoc[] + +| {clEnqueueSVMUnmap} + | {CL_COMMAND_SVM_UNMAP_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_UNMAP.asciidoc[] + +| {clEnqueueSVMMigrateMem} + | {CL_COMMAND_SVM_MIGRATE_MEM_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MIGRATE_MEM.asciidoc[] + +Prior to OpenCL 3.0, implementations should return +{CL_COMMAND_MIGRATE_MEM_OBJECTS}, but may return an implementation-defined +event command type for {clEnqueueSVMMigrateMem}. + +ifdef::cl_khr_command_buffer[] +| {clEnqueueCommandBufferKHR} + | {CL_COMMAND_COMMAND_BUFFER_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COMMAND_BUFFER_KHR.asciidoc[] +endif::cl_khr_command_buffer[] + +ifdef::cl_khr_dx9_media_sharing[] +| {clEnqueueAcquireDX9MediaSurfacesKHR} + | {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR.asciidoc[] + +| {clEnqueueReleaseDX9MediaSurfacesKHR} + | {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR.asciidoc[] +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {clEnqueueAcquireD3D10ObjectsKHR} + | {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR.asciidoc[] + +| {clEnqueueReleaseD3D10ObjectsKHR} + | {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR.asciidoc[] +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {clEnqueueAcquireD3D11ObjectsKHR} + | {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR.asciidoc[] + +| {clEnqueueReleaseD3D11ObjectsKHR} + | {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR.asciidoc[] +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_egl_image[] +| {clEnqueueAcquireEGLObjectsKHR} + | {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR.asciidoc[] +| {clEnqueueReleaseEGLObjectsKHR} + | {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_EGL_OBJECTS_KHR.asciidoc[] +endif::cl_khr_egl_image[] + +ifdef::cl_khr_egl_event[] +| {clCreateEventFromEGLSyncKHR} + | {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR.asciidoc[] +endif::cl_khr_egl_event[] + +ifdef::cl_khr_gl_sharing[] +| {clEnqueueAcquireGLObjects} + | {CL_COMMAND_ACQUIRE_GL_OBJECTS_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_GL_OBJECTS.asciidoc[] +| {clEnqueueReleaseGLObjects} + | {CL_COMMAND_RELEASE_GL_OBJECTS_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_GL_OBJECTS.asciidoc[] +endif::cl_khr_gl_sharing[] + +ifdef::cl_khr_gl_event[] +| {clCreateEventFromGLsyncKHR} + | {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR.asciidoc[] +endif::cl_khr_gl_event[] + +|==== + +Using {clGetEventInfo} to determine if a command identified by _event_ has +finished execution (i.e. {CL_EVENT_COMMAND_EXECUTION_STATUS} returns +{CL_COMPLETE}) is not a synchronization point. +There are no guarantees that the memory objects being modified by command +associated with _event_ will be visible to other enqueued commands. + +// refError + +{clGetEventInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if information to query given in _param_name_ cannot be + queried for _event_. + * {CL_INVALID_EVENT} if _event_ is a not a valid event object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clSetEventCallback',desc='Registers a user callback function for a specific command execution status.',type='protos'] +-- +To register a user callback function for a specific command execution +status, call the function + +include::{generated}/api/protos/clSetEventCallback.txt[] +include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] + + * _event_ is a valid event object. + * _command_exec_callback_type_ specifies the command execution status for + which the callback is registered. + The command execution status types for which a callback can be registered + are {CL_SUBMITTED}, {CL_RUNNING}, or {CL_COMPLETE}. + The callback function registered for a _command_exec_callback_type_ value of + {CL_COMPLETE} will be called when the command has completed successfully or + is abnormally terminated. + * _pfn_event_notify_ is the event callback function that can be registered by + the application. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + The parameters to this callback function are: + ** _event_ is the event object for which the callback function is invoked. + ** _event_command_status_ is equal to the _command_exec_callback_type_ + used while registering the callback. + Refer to the <> + table for the command execution status values. + If the callback is called as the result of the command associated with + event being abnormally terminated, an appropriate error code for the + error that caused the termination will be passed to + _event_command_status_ instead. + ** _user_data_ is a pointer to user supplied data. + * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is + called. + _user_data_ can be `NULL`. + +Each call to {clSetEventCallback} registers the specified user callback +function on a callback stack associated with _event_. +The order in which the registered user callback functions are called is +undefined. + +The registered callback function will be called when the execution status of the +command associated with _event_ changes to an execution status equal to or past +the status specified by _command_exec_status_, or for the execution status +{CL_COMPLETE}, if the command is abnormally terminated. +There is no guarantee that the callback functions registered for various command +execution status values for an event will be called in the exact order that the +execution status of a command changes. +Furthermore, it should be noted that calling a callback for an event execution +status other than {CL_COMPLETE} in no way implies that the memory model or +execution model as defined by the OpenCL specification has changed. For example, +it is not valid to assume that a corresponding memory transfer has completed +unless the event is in the state {CL_COMPLETE}. + +All callbacks registered for an event object must be called before the event +object is destroyed. + +Callbacks should return promptly. +Behavior is undefined when calling expensive system routines, OpenCL APIs to +create contexts or command-queues, or blocking OpenCL APIs in an event callback. +Rather than calling a blocking OpenCL API in an event callback, applications +may call a non-blocking OpenCL API, then register a completion callback +for the non-blocking OpenCL API with the remainder of the work. + +Because commands in a command-queue are not required to begin execution +until the command-queue is flushed, callbacks that enqueue commands on a +command-queue should either call {clFlush} on the queue before returning, +or arrange for the command-queue to be flushed later. + +// refError + +{clSetEventCallback} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid event object. + * {CL_INVALID_VALUE} if _pfn_event_notify_ is `NULL` or if + _command_exec_callback_type_ is not {CL_SUBMITTED}, {CL_RUNNING}, or + {CL_COMPLETE}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +[open,refpage='clRetainEvent',desc='Increments the event reference count.',type='protos'] +-- +To retain an event object, call the function + +include::{generated}/api/protos/clRetainEvent.txt[] +include::{generated}/api/version-notes/clRetainEvent.asciidoc[] + + * _event_ is the event object to be retained. + +The _event_ reference count is incremented. +The OpenCL commands that return an event perform an implicit retain. + +// refError + +{clRetainEvent} returns {CL_SUCCESS} if the function is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid event object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clReleaseEvent',desc='Decrements the event reference count.',type='protos'] +-- +To release an event object, call the function + +include::{generated}/api/protos/clReleaseEvent.txt[] +include::{generated}/api/version-notes/clReleaseEvent.asciidoc[] + + * _event_ is the event object to be released. + +The _event_ reference count is decremented. + +The event object is deleted once the reference count becomes zero, the +specific command identified by this event has completed (or terminated) and +there are no commands in the command-queues of a context that require a wait +for this event to complete. +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainEvent} causes undefined behavior. + +[NOTE] +==== +Developers should be careful when releasing their last reference count on +events created by {clCreateUserEvent} that have not yet been set to status +of {CL_COMPLETE} or an error. +If the user event was used in the event_wait_list argument passed to a +*+clEnqueue*+* API or another application host thread is waiting for it in +{clWaitForEvents}, those commands and host threads will continue to wait for +the event status to reach {CL_COMPLETE} or error, even after the application +has released the object. +Since in this scenario the application has released its last reference count +to the user event, it would be in principle no longer valid for the +application to change the status of the event to unblock all the other +machinery. +As a result the waiting tasks will wait forever, and associated events, +{cl_mem_TYPE} objects, command-queues and contexts are likely to leak. +In-order command-queues caught up in this deadlock may cease to do any work. +==== + +// refError + +{clReleaseEvent} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid event object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +ifdef::cl_khr_egl_event[] +==== Linking Event Objects to EGL Fence Sync Objects + +An event object may be created by linking to an EGL *fence sync object*. + +[open,refpage='clCreateEventFromEGLSyncKHR',desc='Link OpenCL event object to an EGL sync object',type='protos'] +-- +To create an OpenCL event object linked to an EGL fence sync object, call +the function + +include::{generated}/api/protos/clCreateEventFromEGLSyncKHR.txt[] +include::{generated}/api/version-notes/clCreateEventFromEGLSyncKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context or + share group, using the `<>` extension. + * _sync_ is the name of a sync object of type `EGL_SYNC_FENCE_KHR` created + with respect to `EGLDisplay` _display_. + * _display_ is the `EGLDisplay` handle. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Completion of such an event object is equivalent to waiting for completion +of the fence command associated with the linked EGL sync object. + +The parameters of an event object linked to an EGL sync object will return +the following values when queried with {clGetEventInfo}: + + * The {CL_EVENT_COMMAND_QUEUE} of a linked event is `NULL`, because the + event is not associated with any OpenCL command-queue. + * The {CL_EVENT_COMMAND_TYPE} of a linked event is + {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR}, indicating that the event is + associated with a EGL sync object, rather than an OpenCL command. + * The {CL_EVENT_COMMAND_EXECUTION_STATUS} of a linked event is either + {CL_SUBMITTED}, indicating that the fence command associated with the + sync object has not yet completed, or {CL_COMPLETE}, indicating that the + fence command has completed. + +{clCreateEventFromEGLSyncKHR} performs an implicit {clRetainEvent} on the +returned event object. +Creating a linked event object also places a reference on the linked EGL +sync object. +When the event object is deleted, the reference will be removed from the EGL +sync object. + +Events returned from {clCreateEventFromEGLSyncKHR} may only be consumed by +{clEnqueueAcquire}*** commands. +Passing such events to any other CL API that enqueues commands will generate +a {CL_INVALID_EVENT} error. + +// refError + +{clCreateEventFromEGLSyncKHR} returns a valid OpenCL event object and +_errcode_ret_ is set to {CL_SUCCESS} if the event object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context, or was not + created from a GL context. + * {CL_INVALID_EGL_OBJECT_KHR} if _sync_ is not a valid EGLSyncKHR object + of type `EGL_SYNC_FENCE_KHR` created with respect to `EGLDisplay` + _display_. +-- + + +[[explicit-sync-using-egl-fences]] +===== Explicit Synchronization Using EGL Fence Sync Objects + +If the `<>` extension is supported, event objects created +with {clCreateEventFromEGLsyncKHR} provide another method of coordinating +sharing between EGL / EGL client API objects, and OpenCL. + +Completion of EGL and EGL client API commands may be determined by + + * placing an EGL fence command after commands using `eglCreateSyncKHR`; + * creating an event from the resulting EGL sync object using + {clCreateEventFromEGLSyncKHR}; and + * determining completion of that event object via + {clEnqueueAcquireGLObjects}. + +This method may be considerably more efficient than calling operations like +`glFinish`, and is referred to as _explicit synchronization_. +The application is responsible for ensuring the command stream associated +with the EGL fence is flushed to ensure the CL queue is submitted to the +device. +Explicit synchronization is most useful when an EGL client API context bound +to another thread is accessing the memory objects. + +endif::cl_khr_egl_event[] + + +ifdef::cl_khr_gl_event[] +==== Linking Event Objects to OpenGL Fence Sync Objects + +An event object may be created by linking to an OpenGL *fence sync object*. + +[open,refpage='clCreateEventFromGLsyncKHR',desc='Create OpenCL event object linked to an OpenGL sync object',type='protos'] +-- +To create an OpenCL event object linked to an OpenGL fence sync object, call +the function + +include::{generated}/api/protos/clCreateEventFromGLsyncKHR.txt[] +include::{generated}/api/version-notes/clCreateEventFromGLsyncKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context or + share group, using the `<>` extension. + * _sync_ is the name of a sync object in the GL share group associated + with _context_. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Completion of such an event object is equivalent to waiting for completion +of the fence command associated with the linked GL sync object. + +// refError + +{clCreateEventFromGLsyncKHR} returns a valid OpenCL event object and +_errcode_ret_ is set to {CL_SUCCESS} if the event object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context, or was not + created from a GL context. + * {CL_INVALID_GL_OBJECT} if _sync_ is not the name of a sync object in the + GL share group associated with _context_. + +The parameters of an event object linked to a GL sync object will return the +following values when queried with {clGetEventInfo}: + + * The {CL_EVENT_COMMAND_QUEUE} of a linked event is `NULL`, because the + event is not associated with any OpenCL command-queue. + * The {CL_EVENT_COMMAND_TYPE} of a linked event is + {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR}, indicating that the event is + associated with a GL sync object, rather than an OpenCL command. + * The {CL_EVENT_COMMAND_EXECUTION_STATUS} of a linked event is either + {CL_SUBMITTED}, indicating that the fence command associated with the + sync object has not yet completed, or {CL_COMPLETE}, indicating that the + fence command has completed. + +{clCreateEventFromGLsyncKHR} performs an implicit {clRetainEvent} on the +returned event object. +Creating a linked event object also places a reference on the linked GL sync +object. +When the event object is deleted, the reference will be removed from the GL +sync object. + +Events returned from {clCreateEventFromGLsyncKHR} can be used in the +_event_wait_list_ argument to {clEnqueueAcquireGLObjects} and CL APIs that +take a {cl_event} as an argument but do not enqueue commands. +Passing such events to any other CL API that enqueues commands will generate +a {CL_INVALID_EVENT} error. +-- + + +[[explicit-sync-using-opengl-fences]] +===== Explicit Synchronization Using OpenGL Fence Sync Objects + +If the `<>` extension is supported, event objects created +with {clCreateEventFromGLsyncKHR} provide another method of coordinating +sharing of buffers and images between OpenGL and OpenCL. + +Completion of OpenGL commands may be determined by + + * placing an OpenGL fence command after commands using `glFenceSync`; + * creating an event from the resulting OpenGL sync object using + {clCreateEventFromGLSyncKHR}; and + * determining completion of that event object via + {clEnqueueAcquireGLObjects}. + +This method may be considerably more efficient than calling `glFinish`, and +is referred to as _explicit synchronization_. +Explicit synchronization is most useful when an OpenGL context bound to +another thread is accessing the memory objects. + +Explicit synchronization is most useful when an OpenGL context bound to +another thread is accessing the memory objects. + +endif::cl_khr_gl_event[] + + +[[markers-barriers-waiting-for-events]] +== Markers, Barriers and Waiting for Events + +[open,refpage='clEnqueueMarkerWithWaitList',desc='Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.',type='protos'] +-- +To enqueue a marker command which waits for events or commands to complete, +call the function + +include::{generated}/api/protos/clEnqueueMarkerWithWaitList.txt[] +include::{generated}/api/version-notes/clEnqueueMarkerWithWaitList.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. +If _event_wait_list_ is not `NULL`, the list of events pointed to by +_event_wait_list_ must be valid and _num_events_in_wait_list_ must be +greater than 0. +The events specified in _event_wait_list_ act as synchronization points. +The context associated with events in _event_wait_list_ and _command_queue_ +must be the same. +The memory associated with _event_wait_list_ can be reused or freed after +the function returns. + +If _event_wait_list_ is `NULL`, then this particular command waits until all +previous enqueued commands to _command_queue_ have completed. + +The marker command either waits for a list of events to complete, or if the +list is empty it waits for all commands previously enqueued in +_command_queue_ to complete before it completes. +This command returns an _event_ which can be waited on, i.e. this event can +be waited on to insure that all events either in the _event_wait_list_ or +all previously enqueued commands, queued before this command to +_command_queue_, have completed. + +// refError + +{clEnqueueMarkerWithWaitList} returns {CL_SUCCESS} if the function is +successfully executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueMarker',desc='Enqueues a marker command which waits for all previously enqueued commands to complete.',type='protos'] +-- +To enqueue a marker command which waits for previous commands to complete, call +the function + +include::{generated}/api/protos/clEnqueueMarker.txt[] +include::{generated}/api/version-notes/clEnqueueMarker.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +The marker command waits for all commands previously enqueued in _command_queue_ to complete before it completes. +This command returns an _event_ which can be waited on, i.e. this event can be +waited on to insure that all previously enqueued commands, queued before this +command to _command_queue_, have completed. + +// refError + +{clEnqueueMarker} returns {CL_SUCCESS} if the function is successfully +executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_VALUE} if _event_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueWaitForEvents',desc='Enqueues a wait on a list of events to complete.',type='protos'] +-- +To enqueue a wait for a specific event or a list of events to complete before any future commands queued in a command-queue are executed, call the function + +include::{generated}/api/protos/clEnqueueWaitForEvents.txt[] +include::{generated}/api/version-notes/clEnqueueWaitForEvents.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_list_ and _num_events_ specify events that need to complete before + this particular command can be executed. + +// Note, this parameter is called event_list (like clWaitForEvents) rather than +// event_wait_list(like clEnqueueMarkerWithWaitList etc.) because the function +// predates wait lists (and CL_INVALID_EVENT_WAIT_LIST). + +The events specified in _event_list_ act as synchronization points. +The context associated with events in _event_list_ and _command_queue_ must be +the same. +The memory associated with _event_list_ can be reused or freed after the +function returns. + +// refError + +{clEnqueueWaitForEvents} returns {CL_SUCCESS} if the function is successfully +executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_list_ are not the same. + * {CL_INVALID_VALUE} if _num_events_ is 0 or _event_list_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueBarrierWithWaitList',desc='A synchronization point that enqueues a barrier operation.',type='protos'] +-- +To enqueue a barrier command which waits for events or commands to complete, +call the function + +include::{generated}/api/protos/clEnqueueBarrierWithWaitList.txt[] +include::{generated}/api/version-notes/clEnqueueBarrierWithWaitList.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + * If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +If _event_wait_list_ is `NULL`, then this particular command waits until all +previous enqueued commands to _command_queue_ have completed. + +The barrier command either waits for a list of events to complete, or if the +list is empty it waits for all commands previously enqueued in +_command_queue_ to complete before it completes. +This command blocks command execution, that is, any following commands +enqueued after it do not execute until it completes. +This command returns an _event_ which can be waited on, i.e. this event can +be waited on to insure that all events either in the _event_wait_list_ or +all previously enqueued commands, queued before this command to +_command_queue_, have completed. + +// refError + +{clEnqueueBarrierWithWaitList} returns {CL_SUCCESS} if the function is +successfully executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueBarrier',desc='A synchronization point that enqueues a barrier operation.',type='protos'] +-- +To enqueue a barrier command which waits for commands to complete, call the +function + +include::{generated}/api/protos/clEnqueueBarrier.txt[] +include::{generated}/api/version-notes/clEnqueueBarrier.asciidoc[] + + * _command_queue_ is a valid host command-queue. + +The barrier command waits for all commands previously enqueued in +_command_queue_ to complete before it completes. +This command blocks command execution, that is, any following commands +enqueued after it do not execute until it completes. +// TODO clEnqueueBarrierWithWaitList doesn't say synchronization point, should +// it, or should the next line be removed? The main difference is that +// clEnqueueBarrierWithWaitList returns an event, which is the synchronization +// point. +The barrier command is a synchronization point. + +// refError + +{clEnqueueBarrier} returns {CL_SUCCESS} if the function is successfully +executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +ifdef::cl_khr_semaphore[] +== Semaphores + +This section describes the semaphore types and functions defined by the +`<>` extension. + +=== Semaphore Types + +// TODO this is very rough, does not include API definitions or define +// refpage content, etc. + + * {cl_semaphore_type_khr_TYPE} represent the different types of + semaphores. + ** It is mandatory to support {CL_SEMAPHORE_TYPE_BINARY_KHR}. + * {cl_semaphore_properties_khr_TYPE} represents properties associated with + semaphores. + ** {CL_SEMAPHORE_TYPE_KHR} must be supported. + * {cl_semaphore_info_khr_TYPE} represents queries for additional + information about semaphores. + ** All enums described in the "`New API Enums`" section of the + `<>` extension for cl_semaphore_info_khr_TYPE} must + be supported. + * {cl_semaphore_payload_khr_TYPE} represents payload values of semaphores. + * {cl_semaphore_khr_TYPE} represent semaphore objects. + + +=== Creating Semaphores + +[open,refpage='clCreateSemaphoreWithPropertiesKHR',desc='Create a semaphore object',type='protos'] +-- +To create a *semaphore object*, call the function + +include::{generated}/api/protos/clCreateSemaphoreWithPropertiesKHR.txt[] +include::{generated}/api/version-notes/clCreateSemaphoreWithPropertiesKHR.asciidoc[] + + * _context_ identifies a valid OpenCL context that the created + {cl_semaphore_khr_TYPE} will belong to. + * _sema_props_ specifies additional semaphore properties in the form list + of pairs terminated with 0. + {CL_SEMAPHORE_TYPE_KHR} must be part of the list of properties specified + by _sema_props_. + +Following new properties are added to the list of possible supported +properties by {cl_semaphore_properties_khr_TYPE} that can be passed to +{clCreateSemaphoreWithPropertiesKHR}: + +.List of supported semaphore creation properties by {clCreateSemaphoreWithPropertiesKHR} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Semaphore Property | Property Value | Description +| {CL_SEMAPHORE_TYPE_KHR_anchor} + | {cl_semaphore_type_khr_TYPE} + | Specifies the type of semaphore to create. + This property is always required. +| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR_anchor} + | {cl_device_id_TYPE}[] + | Specifies the list of OpenCL devices (terminated with + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the + semaphore. + Only a single device is permitted in the list. + +ifdef::cl_khr_external_semaphore[] +| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Specifies the list of semaphore handle type properties (terminated + with {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR}) that can be + used to export the semaphore being created. +endif::cl_khr_external_semaphore[] +|==== + +If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of +_sema_props_, the semaphore object created by +{clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices +in the _context_. +For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be +specified in _sema_props_. + +// refError + +_errcode_ret_ returns an appropriate error code. +If _errcode_ret_ is `NULL`, no error code is returned. + +{clCreateSemaphoreWithPropertiesKHR} returns a valid semaphore object in an +un-signaled state and and _errcode_ret_ is set to {CL_SUCCESS} if the +function is executed successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_PROPERTY} if a property name in _sema_props_ is not a + supported property name, if the value specified for a supported property + name is not valid, or if the same property name is specified more than + once. + Additionally, if _context_ is a multiple device context and _sema_props_ + does not specify {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR}. + * {CL_INVALID_DEVICE} if {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is + specified as part of _sema_props_, but it does not identify exactly one + valid device; or if a device identified by + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not one of the devices within + _context_. + * {CL_INVALID_VALUE} + ** if _sema_props_ is `NULL`, or + ** if _sema_props_ do not specify pairs for minimum set + of properties (i.e. {CL_SEMAPHORE_TYPE_KHR}) required for successful + creation of a {cl_semaphore_khr_TYPE}, or + + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +ifdef::cl_khr_external_semaphore[] + * {CL_INVALID_DEVICE} if one or more devices identified by properties + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} cannot import the requested + external semaphore handle type. + * {CL_INVALID_VALUE} if more than one semaphore handle type is specified + in the {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} list. + * {CL_INVALID_OPERATION} If _props_list_ specifies a + {cl_external_semaphore_handle_type_khr_TYPE} followed by a handle as + well as {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR}. Exporting a semaphore + handle from a semaphore that was created by importing an external + semaphore handle is not permitted. +endif::cl_khr_external_semaphore[] +-- + + +ifdef::cl_khr_external_semaphore[] +=== Exporting Semaphore External Handles + +[open,refpage='clGetSemaphoreHandleForTypeKHR',desc='Export external handle from a semaphore',type='protos'] +-- +To export an external handle from a semaphore, call the function + +include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] + + * _sema_object_ specifies a valid semaphore object with exportable + properties. + * _device_ specifies a valid device for which a semaphore handle is being + requested. + * _handle_type_ specifies the type of semaphore handle that should be + returned for this exportable _sema_object_, and must be one of the + values specified when _sema_object_ was created. + * _handle_size_ specifies the size of memory pointed by _handle_ptr_. + * _handle_ptr_ is a pointer to memory where the exported external handle + is returned. + If _param_value_ is `NULL`, it is ignored. + * _handle_size_ret_ returns the actual size in bytes for the external + handle. + If _handle_size_ret_ is `NULL`, it is ignored. + +// refError + +{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore +handle is queried successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} + ** if _sema_object_ is not a valid semaphore +// This is redundant with the error below. + ** if _sema_object_ is not exportable + * {CL_INVALID_DEVICE} + ** if _device_ is not a valid device, or + ** if _sema_object_ belongs to a context that is not associated with + _device_, or + ** if _sema_object_ can not be shared with _device_. + * {CL_INVALID_VALUE} if the requested external semaphore handle type was + not specified when _sema_object_ was created. + * {CL_INVALID_VALUE} if _handle_size_ is less than the size needed to + store the returned handle. +// I don't think this can happen. This would have been checked when the semaphore was created. +// ** if CL_SEMAPHORE_HANDLE_*_KHR is specified as one of the _sema_props_ and +// the property CL_SEMAPHORE_HANDLE_*_KHR does not identify a valid external +// memory handle poperty reported by +// {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} or +// {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} queries. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Importing Semaphore External Handles + +Applications can import a semaphore payload into an existing semaphore using +an external semaphore handle. +The effects of the import operation will be either temporary or permanent, +as specified by the application. +If the import is temporary, the implementation must restore the semaphore to +its prior permanent state after submitting the next semaphore wait +operation. +Performing a subsequent temporary import on a semaphore before performing a +semaphore wait has no effect on this requirement; the next wait submitted on +the semaphore must still restore its last permanent state. +A permanent payload import behaves as if the target semaphore was destroyed, +and a new semaphore was created with the same handle but the imported +payload. +Because importing a semaphore payload temporarily or permanently detaches +the existing payload from a semaphore, similar usage restrictions to those +applied to {clReleaseSemaphoreKHR} are applied to any command that imports a +semaphore payload. +Which of these import types is used is referred to as the import operation's +permanence. +Each handle type supports either one or both types of permanence. + +The implementation must perform the import operation by either referencing +or copying the payload referred to by the specified external semaphore +handle, depending on the handle's type. +The import method used is referred to as the handle type's transference. +When using handle types with reference transference, importing a payload to +a semaphore adds the semaphore to the set of all semaphores sharing that +payload. +This set includes the semaphore from which the payload was exported. +Semaphore signaling and waiting operations performed on any semaphore in the +set must behave as if the set were a single semaphore. +Importing a payload using handle types with copy transference creates a +duplicate copy of the payload at the time of import, but makes no further +reference to it. +Semaphore signaling and waiting operations performed on the target of copy +imports must not affect any other semaphore or payload. + +Export operations have the same transference as the specified handle type's +import operations. +Additionally, exporting a semaphore payload to a handle with copy +transference has the same side effects on the source semaphore's payload as +executing a semaphore wait operation. +If the semaphore was using a temporarily imported payload, the semaphore's +prior permanent payload will be restored. + +Please refer to handle specific specifications for more details on +transference and permanence requirements specific to handle type. + + +=== Descriptions of External Semaphore Handle Types + +This section describes the external semaphore handle types that are added by +related extensions. + +Applications can import the same semaphore payload into multiple OpenCL +contexts, into the same context from which it was exported, and multiple +times into a given OpenCL context. +In all cases, each import operation must create a distinct semaphore object. + + +ifdef::cl_khr_external_semaphore_opaque_fd,cl_khr_external_semaphore_sync_fd[] + +==== File Descriptor Handle Types + +ifdef::cl_khr_external_semaphore_opaque_fd[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file descriptor + handle that has only limited valid usage outside of OpenCL and other + compatible APIs. + It must be compatible with the POSIX system calls `dup`, `dup2`, + `close`, and the non-standard system call `dup3`. + Additionally, it must be transportable over a socket using an + `SCM_RIGHTS` control message. + It owns a reference to the underlying synchronization primitive + represented by its semaphore object. + +endif::cl_khr_external_semaphore_opaque_fd[] + +ifdef::cl_khr_external_semaphore_sync_fd[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} specifies a POSIX file descriptor + handle to a Linux Sync File or Android Fence object. + It can be used with any native API accepting a valid sync file or fence + as input. + It owns a reference to the underlying synchronization primitive + associated with the file descriptor. + Implementations which support importing this handle type must accept any + type of sync or fence FD supported by the native system they are running + on. + +The special value -1 for fd is treated like a valid sync file descriptor +referring to an object that has already signaled. +The import operation will succeed and the semaphore will have a temporarily +imported payload as if a valid file descriptor had been provided. + +Note: This special behavior for importing an invalid sync file descriptor +allows easier interoperability with other system APIs which use the +convention that an invalid sync file descriptor represents work that has +already completed and does not need to be waited for. +It is consistent with the option for implementations to return a -1 file +descriptor when exporting a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} from a +{cl_semaphore_khr_TYPE} which is signaled. + +endif::cl_khr_external_semaphore_sync_fd[] + +.Transference and Permanence Properties for File Descriptor Handles +[width="100%",cols="60%,<20%,<20%",options="header"] +|==== +| Handle Type | Transference | Permanence + +ifdef::cl_khr_external_semaphore_opaque_fd[] +| {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR.asciidoc[] + | Reference + | Temporary, Permanent +endif::cl_khr_external_semaphore_opaque_fd[] + +ifdef::cl_khr_external_semaphore_sync_fd[] +| {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_SYNC_FD_KHR.asciidoc[] + | Copy + | Temporary +endif::cl_khr_external_semaphore_sync_fd[] +|==== + +Importing a semaphore payload from a file descriptor transfers ownership of +the file descriptor from the application to the OpenCL implementation. +The application must not perform any operations on the file descriptor after +a successful import. + +ifdef::cl_khr_external_semaphore_sync_fd[] +[open,refpage='clReImportSemaphoreSyncFdKHR',desc='Re-import sync fd handle into an existing semaphore',type='protos'] +-- +To re-imported a handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} into an +existing semaphore, call the function: + +include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] + + * _sema_object_ specifies a valid semaphore object with importable + properties. + * _reimport_props_ must be `NULL`, and is reserved for future use. + * _fd_ specifies an external file descriptor handle to import + +Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying +_sema_object_ and re-creating it with the original _sema_props_ from +{clCreateSemaphoreWithPropertiesKHR}, except a handle specified by _fd_ will +be imported. +The semaphore _sema_object_ must have originally imported an external handle +of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. + +// refError + +{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore +handle is re-imported successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} + ** if _sema_object_ is not a valid semaphore + * {CL_INVALID_SEMAPHORE_KHR} if a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} handle + was not imported when _sema_object_ was created. + * {CL_INVALID_VALUE} if _fd_ is invalid. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. +-- +endif::cl_khr_external_semaphore_sync_fd[] + +endif::cl_khr_external_semaphore_opaque_fd,cl_khr_external_semaphore_sync_fd[] + + +ifdef::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] + +==== NT Handle Types + +ifdef::cl_khr_external_semaphore_dx_fence[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} specifies an NT handle returned by + `ID3D12Device::CreateSharedHandle` referring to a Direct3D 12 fence, or + `ID3D11Device5::CreateFence` referring to a Direct3D 11 fence. + It owns a reference to the underlying synchronization primitive + associated with the Direct3D fence. + +When waiting on semaphores using {clEnqueueWaitSemaphoresKHR} or signaling +semaphores using {clEnqueueSignalSemaphoresKHR}, the semaphore payload must +be provided for semaphores created from +{CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR}. + + * If _sema_objects_ list has a mix of semaphores obtained from + {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and other handle types, then the + _sema_payload_list_ should point to a list of _num_sema_objects_ payload + values for each semaphore in _sema_objects_. + However, the payload values corresponding to semaphores with type + {CL_SEMAPHORE_TYPE_BINARY_KHR} can be set to 0 or will be ignored. + +{clEnqueueWaitSemaphoresKHR} and {clEnqueueSignalSemaphoresKHR} may return +{CL_INVALID_VALUE} if _sema_objects_ list has one or more semaphores +obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and _sema_payload_list_ +is NULL. + +endif::cl_khr_external_semaphore_dx_fence[] + +ifdef::cl_khr_external_semaphore_win32[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that has + only limited valid usage outside of OpenCL and other compatible APIs. + It must be compatible with the functions `DuplicateHandle`, + `CloseHandle`, `CompareObjectHandles`, `GetHandleInformation`, and + `SetHandleInformation`. + It owns a reference to the underlying synchronization primitive + represented by its semaphore object. + * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global share + handle that has only limited valid usage outside of OpenCL and other + compatible APIs. + It is not compatible with any native APIs. + It does not own a reference to the underlying synchronization primitive + represented by its semaphore object, and will therefore become invalid + when all semaphore objects associated with it are destroyed. + +endif::cl_khr_external_semaphore_win32[] + +.Transference and Permanence Properties for NT Handle Types +[width="100%",cols="60%,<20%,<20%",options="header"] +|==== +| Handle Type | Transference | Permanence + +ifdef::cl_khr_external_semaphore_dx_fence[] +| {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR.asciidoc[] + | Reference + | Temporary, Permanent +endif::cl_khr_external_semaphore_dx_fence[] + +ifdef::cl_khr_external_semaphore_win32[] +| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR.asciidoc[] + | Reference + | Temporary, Permanent +| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR.asciidoc[] + | Reference + | Temporary, Permanent +endif::cl_khr_external_semaphore_win32[] +|==== + +// TODO Why "Windows handles" here but "NT handles" elsewhere? + +Importing a semaphore payload from Windows handles does not transfer +ownership of the handle to the OpenCL implementation. +For handle types defined as NT handles, the application must release +ownership using the `CloseHandle` system call when the handle is no longer +needed. + +endif::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] + +endif::cl_khr_external_semaphore[] + + +=== Waiting On and Signaling Semaphores + +[open,refpage='clEnqueueWaitSemaphoresKHR',desc='Enqueue a command to wait on a set of semaphores',type='protos'] +-- +To enqueue a command to wait on a set of semaphores, call the function + +include::{generated}/api/protos/clEnqueueWaitSemaphoresKHR.txt[] +include::{generated}/api/version-notes/clEnqueueWaitSemaphoresKHR.asciidoc[] + + * _command_queue_ specifies a valid command-queue. + * _num_sema_objects_ specifies the number of semaphore objects to wait on. + * _sema_objects_ points to the list of semaphore objects to wait on. + The length of the list must be at least _num_sema_objects_. + * _sema_payload_list_ points to the list of values of type + {cl_semaphore_payload_khr_TYPE} containing valid semaphore payload + values to wait on. + This can be set to `NULL` or will be ignored when all semaphores in the + list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. + * _num_events_in_wait_list_ specifies the number of events in + _event_wait_list_. + * _event_wait_list_ specifies list of events that need to complete before + {clEnqueueWaitSemaphoresKHR} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueWaitSemaphoresKHR} does + not wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and that + associated with _command_queue_ must be the same. + * _event_ returns an event object that identifies this particular command + and can be used to query or queue a wait for this particular command to + complete. + _event_ can be `NULL`, in which case it will not be possible for the + application to query the status of this command or queue a wait for this + command to complete. + +The semaphore wait command waits for a list of events to complete and a list +of semaphore objects to become signaled. +The semaphore wait command returns an _event_ which can be waited on to +ensure that all events in the _event_wait_list_ have completed and all +semaphores in _sema_objects_ have been signaled. +{clEnqueueWaitSemaphoresKHR} will not return until the binary semaphores in +_sema_objects_ are in a state that makes them safe to re-signal. +If necessary, implementations may block in {clEnqueueWaitSemaphoresKHR} to +ensure the correct state of semaphores when returning. +There are no implications from this behavior for the state of _event_ or the +events in _event_wait_list_ when {clEnqueueWaitSemaphoresKHR} returns. +Waiting on the same binary semaphore twice without an interleaving signal +may lead to undefined behavior. + +// refError + +{clEnqueueWaitSemaphoresKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if the device associated with _command_queue_ is not same as one of the + devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time + of creating one or more of _sema_objects_. + * {CL_INVALID_VALUE} if _num_sema_objects_ is 0. + * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by + _sema_objects_ is not valid. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + any of the semaphore objects in _sema_objects_ are not the same, or if + the context associated with _command_queue_ and that associated with + events in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if any of the semaphore objects specified by + _sema_objects_ requires a semaphore payload and _sema_payload_list_ is + `NULL`. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueSignalSemaphoresKHR',desc='Enqueue a command to signal a set of semaphores',type='protos'] +-- +To enqueue a command to signal a set of semaphores, call the function + +include::{generated}/api/protos/clEnqueueSignalSemaphoresKHR.txt[] +include::{generated}/api/version-notes/clEnqueueSignalSemaphoresKHR.asciidoc[] + + * _command_queue_ specifies a valid command-queue. + * _num_sema_objects_ specifies the number of semaphore objects to signal. + * _sema_objects_ points to the list of semaphore objects to signal. + The length of the list must be at least _num_sema_objects_. + * _sema_payload_list_ points to the list of values of type + {cl_semaphore_payload_khr_TYPE} containing semaphore payload values to + signal. + This can be set to `NULL` or will be ignored when all semaphores in the + list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. + * _num_events_in_wait_list_ specifies the number of events in + * _event_wait_list_ points to the list of events that need to complete + before {clEnqueueSignalSemaphoresKHR} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueSignalSemaphoresKHR} does + not wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and that + associated with _command_queue_ must be the same. ++ +_event_ returns an event object that identifies this particular command +and can be used to query or queue a wait for this particular command to +complete. +_event_ can be `NULL`, in which case it will not be possible for the +application to query the status of this command or queue a wait for this +command to complete. + +The semaphore signal command waits for a list of events to complete and then +signals a list of semaphore objects. +The semaphore signal command returns an _event_ which can be waited on to +ensure that all events in the _event_wait_list_ have completed and all +semaphores in _sema_objects_ have been signaled. +The successful completion of the event generated by +{clEnqueueSignalSemaphoresKHR} called on one or more semaphore objects of +type {CL_SEMAPHORE_TYPE_BINARY_KHR} changes the state of the corresponding +semaphore objects to signaled. +{clEnqueueSignalSemaphoresKHR} will not return until the binary semaphores +in _sema_objects_ are in a state that makes them safe to wait on again. +If necessary, implementations may block in {clEnqueueSignalSemaphoresKHR} to +ensure the correct state of semaphores when returning. +There are no implications from this behavior for the state of _event_ or the +events in _event_wait_list_ when {clEnqueueSignalSemaphoresKHR} returns. +Signaling the same binary semaphore twice without an interleaving wait may +lead to undefined behavior. + +// refError + +{clEnqueueSignalSemaphoresKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if the device associated with _command_queue_ is not same as one of the + devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of + creating one or more of _sema_objects_, or + ** if one or more of _sema_objects_ belong to a context that does not + contain a device associated with _command_queue_. + * {CL_INVALID_VALUE} if _num_sema_objects_ is 0. + * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by + _sema_objects_ is not valid. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + any of the semaphore objects in _sema_objects_ are not the same, or if + the context associated with _command_queue_ and that associated with + events in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if any of the semaphore objects specified by + _sema_objects_ requires a semaphore payload and _sema_payload_list_ is + `NULL`. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not + 0, or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is + 0, or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Retaining and Releasing Semaphores + +[open,refpage='clReleaseSemaphoreKHR',desc='Release a semaphore object',type='protos'] +-- +To release a semaphore object, call the function + +include::{generated}/api/protos/clReleaseSemaphoreKHR.txt[] +include::{generated}/api/version-notes/clReleaseSemaphoreKHR.asciidoc[] + + * _sema_object_ specifies the semaphore object to be released. + +The _sema_object_ reference count is decremented. + +// refError + +{clReleaseSemaphoreKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore + object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +After the reference count becomes zero and commands queued for execution on +a command-queue(s) that use _sema_object_ have finished, the semaphore +object is deleted. +Using this function to release a reference that was not obtained by creating +the object via {clCreateSemaphoreWithPropertiesKHR} or by calling +{clRetainSemaphoreKHR} causes undefined behavior. +-- + +[open,refpage='clRetainSemaphoreKHR',desc='Retain a semaphore object',type='protos'] +-- +To retain a semaphore object, call the function + +include::{generated}/api/protos/clRetainSemaphoreKHR.txt[] +include::{generated}/api/version-notes/clRetainSemaphoreKHR.asciidoc[] + + * _sema_object_ specifies the semaphore object to be retained. + +{clRetainSemaphoreKHR} increments the reference count of _sema_object_. + +// refError + +{clRetainSemaphoreKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore + object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Semaphore Queries + +[open,refpage='clGetSemaphoreInfoKHR',desc='Query information about a semaphore object',type='protos'] +-- +To query information about a semaphore object, call the function + +include::{generated}/api/protos/clGetSemaphoreInfoKHR.txt[] +include::{generated}/api/version-notes/clGetSemaphoreInfoKHR.asciidoc[] + + * _sema_object_ specifies the semaphore object being queried. + * _param_name_ is a constant that specifies the semaphore information to + query, and must be one of the values shown in the + <> table. + * _param_value_ is a pointer to memory where the result of the query is + returned as described in the <> table. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ specifies the size in bytes of memory pointed to + _param_value_. + This size must be greater than or equal to the size of the return type + described in the <> + table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[cl_khr_semaphore_info-table]] +.List of parameter names supported by {clGetSemaphoreInfoKHR} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Semaphore Info | Return Type | Description +| {CL_SEMAPHORE_CONTEXT_KHR_anchor} + | {cl_context_TYPE} + | Returns the context specified when the semaphore is created. +| {CL_SEMAPHORE_REFERENCE_COUNT_KHR_anchor} footnote:[{fn-reference-count-usage}] + | {cl_uint_TYPE} + | Returns the semaphore reference count. +| {CL_SEMAPHORE_PROPERTIES_KHR_anchor} + | {cl_semaphore_properties_khr_TYPE}[] + | Return the properties argument specified in + {clCreateSemaphoreWithPropertiesKHR}. + + The implementation must return the values specified in the + properties argument in the same order and without including + additional properties. +| {CL_SEMAPHORE_TYPE_KHR_anchor} + | {cl_semaphore_type_khr_TYPE} + | Returns the semaphore type. +| {CL_SEMAPHORE_PAYLOAD_KHR_anchor} + | {cl_semaphore_payload_khr_TYPE} + | Returns the semaphore payload value. + For semaphores of type {CL_SEMAPHORE_TYPE_BINARY_KHR} the payload + value returned will be `0` if the semaphore is in an un-signaled + state, and `1` if it is in a signaled state. +| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR_anchor} + | {cl_device_id_TYPE}[] + | Returns the list of OpenCL devices the semaphore is associated with. + +ifdef::cl_khr_external_semaphore[] +| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of external semaphore handle types that may be used + for exporting. + The size of this query may be 0 indicating that this semaphore does + not support any handle types for exporting. +| {CL_SEMAPHORE_EXPORTABLE_KHR_anchor} + | {cl_bool_TYPE}[] + | Returns {CL_TRUE} if the semaphore is exportable and {CL_FALSE} + otherwise. +endif::cl_khr_external_semaphore[] +|==== + +// refError + +{clGetSemaphoreInfoKHR} returns {CL_SUCCESS} if the information is queried +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} + ** if _sema_object_ is not a valid semaphore + * {CL_INVALID_VALUE} + ** if _param_name_ is not one of the attribute defined in the + <> table or + ** if _param_value_size_ is less than the size of Return Type of the + corresponding _param_name_ attribute as defined in the + <> table. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +endif::cl_khr_semaphore[] + + +== Out-of-Order Execution of Kernels and Memory Object Commands + +The OpenCL functions that are submitted to a command-queue are enqueued in +the order the calls are made but can be configured to execute in-order or +out-of-order. +The _properties_ argument in {clCreateCommandQueueWithProperties} or +{clCreateCommandQueue} can be used to specify the execution order. + +If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is +not set, the commands enqueued to a command-queue execute in-order. +For example, if an application calls {clEnqueueNDRangeKernel} to execute +kernel A followed by a {clEnqueueNDRangeKernel} to execute kernel B, the +application can assume that kernel A finishes first and then kernel B is +executed. +If the memory objects output by kernel A are inputs to kernel B then kernel +B will see the correct data in memory objects produced by execution of +kernel A. +If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is +set, then there is no guarantee that kernel A will finish before kernel B +starts execution. + +Applications can configure the commands enqueued to a command-queue to +execute out-of-order by setting the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} +property of the command-queue. +This can be specified when the command-queue is created. +In out-of-order execution mode there is no guarantee that the enqueued +commands will finish execution in the order they were queued. +As there is no guarantee that kernels will be executed in-order, i.e. based +on when the {clEnqueueNDRangeKernel} or {clEnqueueTask} calls are made within a +command-queue, it is therefore possible that an earlier +{clEnqueueNDRangeKernel} call to execute kernel A identified by event A may +execute and/or finish later than a {clEnqueueNDRangeKernel} call to execute +kernel B which was called by the application at a later point in time. +To guarantee a specific order of execution of kernels, a wait on a +particular event (in this case event A) can be used. +The wait for event A can be specified in the _event_wait_list_ argument to +{clEnqueueNDRangeKernel} for kernel B. + +In addition, a marker ({clEnqueueMarker} or {clEnqueueMarkerWithWaitList}) or a +barrier ({clEnqueueBarrier} or {clEnqueueBarrierWithWaitList}) command can be +enqueued to the command-queue. +The marker command ensures that previously enqueued commands identified by +the list of events to wait for (or all previous commands) have finished. +A barrier command is similar to a marker command, but additionally +guarantees that no later-enqueued commands will execute until the waited-for +commands have executed. + +Similarly, commands to read, write, copy or map memory objects that are +enqueued after {clEnqueueNDRangeKernel}, {clEnqueueTask} or +{clEnqueueNativeKernel} commands are not guaranteed to wait for kernels +scheduled for execution to have completed (if the +{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property is set). +To ensure correct ordering of commands, the event object returned by +{clEnqueueNDRangeKernel}, {clEnqueueTask} or {clEnqueueNativeKernel} can be +used to enqueue a wait for event or a barrier command can be enqueued that must +complete before reads or writes to the memory object(s) occur. + + +[[profiling-operations]] +== Profiling Operations on Memory Objects and Kernels + +This section describes the profiling of OpenCL functions that are enqueued +as commands to a command-queue. Profiling of OpenCL commands can be enabled +by using a command-queue created with the {CL_QUEUE_PROFILING_ENABLE} +flag set in the {CL_QUEUE_PROPERTIES} bitfield in the _properties_ argument to +{clCreateCommandQueueWithProperties}, or in the _properties_ argument to +{clCreateCommandQueue}. +When profiling is enabled, the event objects that are created from +enqueuing a command store a timestamp for each of their state transitions. + +[open,refpage='clGetEventProfilingInfo',desc='Returns profiling information for the command associated with event if profiling is enabled.',type='protos'] +-- +To return profiling information for a command associated with an event when +profiling is enabled, call the function + +include::{generated}/api/protos/clGetEventProfilingInfo.txt[] +include::{generated}/api/version-notes/clGetEventProfilingInfo.asciidoc[] + + * _event_ specifies the event object. + * _param_name_ specifies the profiling data to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetEventProfilingInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[event-profiling-info-table]] +.List of supported param_names by {clGetEventProfilingInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Event Profiling Info | Return Type | Description +| {CL_PROFILING_COMMAND_QUEUED_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_QUEUED.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event is enqueued in a + command-queue by the host. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time when the command-buffer has been enqueued + across the command-queues is used. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_SUBMIT_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_SUBMIT.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event that has been + enqueued is submitted by the host to the device associated with the + command-queue. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when command-buffer commands have + been submitted to any command-queue. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_START_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_START.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event starts execution on + the device. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when any device starts executing a + command-buffer command. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_END_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_END.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event has finished + execution on the device. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when the last command-buffer + command finishes execution on any device. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_COMPLETE_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_COMPLETE.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event and any child + commands enqueued by this command on the device have finished + execution. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when the command-buffer has + completed execution across all command-queues. +endif::cl_khr_command_buffer_multi_device[] + +|==== + +The unsigned 64-bit values returned can be used to measure the time in +nano-seconds consumed by OpenCL commands. + +OpenCL devices are required to correctly track time across changes in device +frequency and power states. +The {CL_DEVICE_PROFILING_TIMER_RESOLUTION} specifies the resolution of the +timer i.e. the number of nanoseconds elapsed before the timer is +incremented. + +ifdef::cl_khr_command_buffer_multi_device[] +[NOTE] +==== +If the `<>` extension is supported, and +if no reliable device timer sources are available to inform the host side, +or parallel runtime scheduling makes it impossible to identify a first/last +command, then an implementation may fallback to reporting +{CL_PROFILING_COMMAND_SUBMIT} and {CL_PROFILING_COMMAND_COMPLETE} for +{CL_PROFILING_COMMAND_START} and {CL_PROFILING_COMMAND_END} respectively. +==== +endif::cl_khr_command_buffer_multi_device[] + +// refError + +{clGetEventProfilingInfo} returns {CL_SUCCESS} if the function is executed +successfully and the profiling information has been recorded. +Otherwise, it returns one of the following errors: + + * {CL_PROFILING_INFO_NOT_AVAILABLE} if the {CL_QUEUE_PROFILING_ENABLE} flag is + not set for the command-queue, if the execution status of the command + identified by _event_ is not {CL_COMPLETE} or if _event_ is a user event + object. + Prior to OpenCL 3.0, implementations may return + {CL_PROFILING_INFO_NOT_AVAILABLE} for an event created by + {clEnqueueSvmFree}. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported, + and if _event_ was created from a call to {clEnqueueCommandBufferKHR}, + {CL_PROFILING_INFO_NOT_AVAILABLE} is returned if all the queues passed + do not have {CL_QUEUE_PROFILING_ENABLE} set. +endif::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_EVENT} if _event_ is a not a valid event object. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueTask',desc='Enqueues a command to execute a kernel, using a single work-item, on a device.',type='protos'] + +== Flush and Finish + +[open,refpage='clFlush',desc='Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.',type='protos'] -- -To enqueue a command to execute a kernel on a device, using a single work-item, -call the function +To flush commands to a device, call the function -include::{generated}/api/protos/clEnqueueTask.txt[] -include::{generated}/api/version-notes/clEnqueueTask.asciidoc[] +include::{generated}/api/protos/clFlush.txt[] +include::{generated}/api/version-notes/clFlush.asciidoc[] - * _command_queue_ is a valid host command-queue. - The kernel will be queued for execution on the device associated with - _command_queue_. - * _kernel_ is a valid kernel object. - The OpenCL context associated with _kernel_ and _command-queue_ must be the - same. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. - If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. - If _event_wait_list_ is not `NULL`, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. + * _command_queue_ is the command-queue to flush. -{clEnqueueTask} is equivalent to calling {clEnqueueNDRangeKernel} with -_work_dim_ set to 1, _global_work_offset_ set to `NULL`, _global_work_size[0]_ -set to 1, and _local_work_size[0]_ set to 1. +All previously queued OpenCL commands in _command_queue_ are issued to the +device associated with _command_queue_. +{clFlush} only guarantees that all queued commands to _command_queue_ will +eventually be submitted to the appropriate device. +There is no guarantee that they will be complete after {clFlush} returns. + +Any blocking commands queued in a command-queue and {clReleaseCommandQueue} +perform an implicit flush of the command-queue. +These blocking commands are {clEnqueueReadBuffer}, +{clEnqueueReadBufferRect}, {clEnqueueReadImage}, with _blocking_read_ set to +{CL_TRUE}; {clEnqueueWriteBuffer}, {clEnqueueWriteBufferRect}, +{clEnqueueWriteImage} with _blocking_write_ set to {CL_TRUE}; +{clEnqueueMapBuffer}, {clEnqueueMapImage} with _blocking_map_ set to +{CL_TRUE}; {clEnqueueSVMMemcpy} with _blocking_copy_ set to {CL_TRUE}; +{clEnqueueSVMMap} with _blocking_map_ set to {CL_TRUE} or {clWaitForEvents}. + +To use event objects that refer to commands enqueued in a command-queue as +event objects to wait on by commands enqueued in a different command-queue, +the application must call a {clFlush} or any blocking commands that perform +an implicit flush of the command-queue where the commands that refer to +these event objects are enqueued. // refError -{clEnqueueTask} returns {CL_SUCCESS} if the kernel-instance was successfully -queued. +{clFlush} returns {CL_SUCCESS} if the function call was executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program - executable available for device associated with _command_queue_. * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host command-queue. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and - _kernel_ are not the same or if the context associated with - _command_queue_ and events in _event_wait_list_ are not the same. - * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been - specified. - * {CL_INVALID_WORK_GROUP_SIZE} if a work-group size is specified for _kernel_ - in the program source and it is not (1, 1, 1). -// TODO I'm not sure if the next error makes sense for a 'task'. - * {CL_INVALID_WORK_GROUP_SIZE} if the required number of sub-groups is - specified for _kernel_ in the program source and is not consistent with a - work-group size of (1, 1, 1). - * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as - the value for an argument that is a buffer object and the _offset_ - specified when the sub-buffer object is created is not aligned to - {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. - This error code is <> version 1.1. - * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument - value and the image dimensions (image width, height, specified or - compute row and/or slice pitch) are not supported by device associated - with _queue_. - * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an - argument value and the image format (image channel order and data type) - is not supported by device associated with _queue_. - * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution - instance of _kernel_ on the command-queue because of insufficient - resources needed to execute the kernel. See how this error code is used - with {clEnqueueNDRangeKernel} for examples. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for data store associated with image or buffer objects specified - as arguments to _kernel_. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueNativeKernel',desc='Enqueues a command to execute a native C/C++ function not compiled using the OpenCL compiler.',type='protos'] +[open,refpage='clFinish',desc='Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.',type='protos'] -- -To enqueue a command to execute a native C/{cpp} function not compiled using -the OpenCL compiler, call the function +To wait for completion of commands on a device, call the function -include::{generated}/api/protos/clEnqueueNativeKernel.txt[] -include::{generated}/api/version-notes/clEnqueueNativeKernel.asciidoc[] +include::{generated}/api/protos/clFinish.txt[] +include::{generated}/api/version-notes/clFinish.asciidoc[] - * _command_queue_ is a valid host command-queue. - A native user function can only be executed on a command-queue created on a - device that has {CL_EXEC_NATIVE_KERNEL} capability set in - {CL_DEVICE_EXECUTION_CAPABILITIES} as specified in the - <> table. - * _user_func_ is a pointer to a host-callable user function. - It is the application's responsibility to ensure that the host-callable user - function is thread-safe. - * _args_ is a pointer to the args list that _user_func_ should be called with. - * _cb_args_ is the size in bytes of the args list that _args_ points to. - * _num_mem_objects_ is the number of buffer objects that are passed in _args_. - * _mem_list_ is a list of valid buffer objects, if _num_mem_objects_ > 0. - The buffer object values specified in _mem_list_ are memory object handles - (`{cl_mem_TYPE}` values) returned by {clCreateBuffer} or {clCreateBufferWithProperties}, - or `NULL`. - * _args_mem_loc_ is a pointer to appropriate locations that _args_ points to - where memory object handles ({cl_mem_TYPE} values) are stored. - Before the user function is executed, the memory object handles are replaced - by pointers to global memory. - * _event_wait_list_, _num_events_in_wait_list_ and _event_ are as described in - {clEnqueueNDRangeKernel}. + * _command_queue_ is the command-queue to wait for. -The data pointed to by _args_ and _cb_args_ bytes in size will be copied and -a pointer to this copied region will be passed to _user_func_. -The copy needs to be done because the memory objects ({cl_mem_TYPE} values) that -_args_ may contain need to be modified and replaced by appropriate pointers -to global memory. -When {clEnqueueNativeKernel} returns, the memory region pointed to by _args_ -can be reused by the application. +All previously queued OpenCL commands in _command_queue_ are issued to the +associated device, and the function blocks until all previously queued +commands have completed. +{clFinish} does not return until all previously queued commands in +_command_queue_ have been processed and completed. +{clFinish} is also a synchronization point. // refError -{clEnqueueNativeKernel} returns {CL_SUCCESS} if the user function execution -instance was successfully queued. +{clFinish} returns {CL_SUCCESS} if the function call was executed +successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _user_func_ is `NULL`. - * {CL_INVALID_VALUE} if _args_ is a `NULL` value and _cb_args_ > 0, or if - _args_ is a `NULL` value and _num_mem_objects_ > 0. - * {CL_INVALID_VALUE} if _args_ is not `NULL` and _cb_args_ is 0. - * {CL_INVALID_VALUE} if _num_mem_objects_ > 0 and _mem_list_ or - _args_mem_loc_ are `NULL`. - * {CL_INVALID_VALUE} if _num_mem_objects_ = 0 and _mem_list_ or - _args_mem_loc_ are not `NULL`. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ - cannot execute the native kernel. - * {CL_INVALID_MEM_OBJECT} if one or more memory objects specified in - _mem_list_ are not valid or are not buffer objects. - * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution - instance of _kernel_ on the command-queue because of insufficient - resources needed to execute the kernel. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for data store associated with buffer objects specified as - arguments to _kernel_. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- + +ifdef::cl_khr_command_buffer[] + +== Command-Buffers + +A _command-buffer_ object represents a series of operations to be enqueued +on one or more command-queues without any application code interaction. +Grouping the operations together allows efficient enqueuing of repetitive +operations, as well as enabling driver optimizations. + +Command-buffers are _sequential use_ by default, but may also be set to +_simultaneous use_ on creation if the device optionally supports this +capability. +A sequential use command-buffer must have a <> +of 0 or 1. +The simultaneous use capability removes this restriction and allows +command-buffers to have a <> greater than 1. + +[[compatible]] +Command-buffers are created using an ordered list of command-queues that +commands are recorded to and execute on by default. +These command-queues can be replaced on command-buffer enqueue with +different command-queues, provided for each element in the replacement list +the substitute command-queue is compatible with the command-queue used on +command-buffer creation. +A _compatible_ command-queue is defined as a command-queue with +identical properties targeting the same device and in the same OpenCL +context. + +While constructing a command-buffer it is valid for the user to interleave +calls to the same queue which create commands, such as +{clCommandNDRangeKernelKHR}, with queue submission calls, such as +{clEnqueueNDRangeKernel} or {clEnqueueCommandBufferKHR}. +That is, there is no effect on queue state from recording commands. +The purpose of the queue parameter is to define the device and properties of +the command, which are constant queries on the queue object. + +A command-buffer object should increment the reference count of attached +OpenCL objects such as queues, buffers, images, and kernels referenced in +commands recorded to the command-buffer. +This enables correct behavior of the command-buffer when its attached +objects have been released. +On destruction of the command-buffer it should decrement these reference +counts, allowing the attached objects to be freed if appropriate. + +[[command-buffer-kernel-argument-ref-counting]] [NOTE] ==== -The total number of read-only images specified as arguments to a kernel -cannot exceed {CL_DEVICE_MAX_READ_IMAGE_ARGS}. -Each image array argument to a kernel declared with the `read_only` -qualifier counts as one image. -The total number of write-only images specified as arguments to a kernel -cannot exceed {CL_DEVICE_MAX_WRITE_IMAGE_ARGS}. -Each image array argument to a kernel declared with the `write_only` -qualifier counts as one image. - -The total number of read-write images specified as arguments to a kernel -cannot exceed {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS}. -Each image array argument to a kernel declared with the `read_write` -qualifier counts as one image. +A command-buffer object does not update the reference count of objects set +as arguments on kernels recorded into the command-buffer. +This is consistent with the reference counting behavior of {clSetKernelArg}. + +Applications should ensure that objects passed as arguments to kernels +recorded to a command-buffer are not deleted until the command-buffer has +been released. +Undefined behavior may result from the failure to follow this usage +requirement for all the command-buffers an object is used as a kernel +argument in. + +If using layered extension `<>`, +<>. ==== --- -[[event-objects]] -== Event Objects +ifdef::cl_khr_command_buffer_multi_device[] +=== Command-Buffers and Multiple Devices + +If the `<>` extension is supported, a +command-buffer can contain commands recorded to the queues of different +devices if a vendor provides support for inter-device +{cl_sync_point_khr_TYPE} synchronization. +This feature is reported either through +{CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}, which informs the user what +devices can synchronize with each other natively on the device-side, or +through {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR}, which allows +synchronization between all devices in a platform, falling back to host-side +synchronization when device-side synchronization is not available. +These two mechanisms are referred to as **device-side sync** and **universal +sync** respectively. + +If these mechanisms do not report that more than one device can be used in a +command-buffer, it will still be possible to perform multiple queue +recording in a command-buffer if the +{CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} capability is reported for +a device. +However, with this capability all the queues commands are recorded to must +target the same device. + +Commands recorded to different command-queues in the same command-buffer may +be executed concurrently to each other unless synchronized explicitly with +sync-points. +Ordering of other commands submitted to the same command-queues as used to +enqueue a command-buffer is the responsibility of the programmer. +A command-buffer enqueue spanning multiple queues can return an event to use +for synchronization, which will complete once all commands in the +command-buffer have completed. +If ordering restrictions are required, this event (or command-queue +barriers) may be used by the user to synchronize the command-buffer enqueue +with regular commands, or another command-buffer enqueue. + +endif::cl_khr_command_buffer_multi_device[] + + + +=== Command-Buffer Lifecycle + +A command-buffer is always in one of the following states: + +[[recording]] +Recording:: Initial state of a command-buffer on creation, where commands can be +recorded to the command-buffer. + +[[executable]] +Executable:: State after command recording has finished with +{clFinalizeCommandBufferKHR} and the command-buffer may be enqueued. + +[[pending]] +Pending:: Once a command-buffer has been enqueued to a command-queue it enters +the Pending state until completion, at which point it moves back to the +<> state. + +// Image generated from the following mermaid diagram description using https://mermaid.live +// Ideally we'd use the asciidoctor-diagram extension to generate the rendered diagram, but +// there are issues installing the gem with ruby 2.3.3 +// +// [mermaid, "Lifecycle of a command-buffer", png] +// .... +// stateDiagram-v2 +// [*] --> Recording: Create +// Recording -->Executable: Finalize +// Executable --> Pending: Enqueue +// Pending --> Executable: Completion +// .... + +image::images/commandbuffer_lifecycle.png[align="center", title="Lifecycle of a command-buffer."] + +[[pending_count]] +The Pending Count is the number of copies of the command +buffer in the <> state. +By default a command-buffer's Pending Count must be 0 or 1. +If the command-buffer was created with +{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} then the command-buffer may have a +Pending Count greater than 1. + + +=== Creating Command-Buffer Objects + +[open,refpage='clCreateCommandBufferKHR',desc='Create a command-buffer',type='protos'] +-- +To create a command-buffer that can record commands to the specified +queues, call the function + +include::{generated}/api/protos/clCreateCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clCreateCommandBufferKHR.asciidoc[] + + * _num_queues_ is the number of command-queues listed in _queues_. + If the `<>` extension is not + supported, this **must** be one. + * _queues_ is a pointer to a list of command-queues that the + command-buffer commands will be recorded to. + _queues_ must be a non-`NULL` value and the length of the list equal to + _num_queues_. + * _properties_ specifies a list of properties for the command-buffer and + their corresponding values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. + The list of supported properties is described in the table below. + If a supported property and its value is not specified in properties, + its default value will be used. + _properties_ can be `NULL` in which case the default values for + supported command-buffer properties will be used. ++ +[[commandbuffer-properties]] +.{clCreateCommandBufferKHR} properties +[cols=",,",options="header",] +|==== +| Recording Properties | Property Value | Description -// Check: Is this list of event object APIs really necessary? - -//Event objects can be used to refer to a kernel-instance command -//({clEnqueueNDRangeKernel}, {clEnqueueTask}, {clEnqueueNativeKernel}), read, -//write, map and copy commands on memory objects ( -//{clEnqueueReadBuffer}, {clEnqueueWriteBuffer}, -//{clEnqueueMapBuffer}, {clEnqueueUnmapMemObject}, -//{clEnqueueReadBufferRect}, {clEnqueueWriteBufferRect}, -//{clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueMapImage}, -//{clEnqueueCopyBuffer}, {clEnqueueCopyImage}, -//{clEnqueueCopyBufferRect}, -//{clEnqueueCopyBufferToImage}, {clEnqueueCopyImageToBuffer}), -//{clEnqueueSVMMemcpy}, {clEnqueueSVMMemFill}, -//{clEnqueueSVMMap}, {clEnqueueSVMUnmap}, {clEnqueueSVMFree}, {clEnqueueMarker}, -//{clEnqueueMarkerWithWaitList}, {clEnqueueWaitForEvents}, {clEnqueueBarrier}, -//{clEnqueueBarrierWithWaitList}, (refer to -//<>) or user events. +| {CL_COMMAND_BUFFER_FLAGS_KHR_anchor} -An event object can be used to track the execution status of a command. -The API calls that enqueue commands to a command-queue create a new event -object that is returned in the _event_ argument. -In case of an error enqueuing the command in the command-queue the event -argument does not return an event object. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_FLAGS_KHR.asciidoc[] + | {cl_command_buffer_flags_khr_TYPE} + | This is a bitfield and can be set to a combination of the following values: -The execution status of an enqueued command at any given point in time can -be one of the following: + {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR_anchor} - Allow multiple + instances of the command-buffer to be submitted to the device for + execution. + If set, devices must support + {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR}. - * {CL_QUEUED_anchor}: Indicates that the command has been enqueued in a - command-queue. - This is the initial state of all events except user events. - * {CL_SUBMITTED_anchor}: The initial state for all user events. - For all other events, indicates that the command has been submitted - by the host to the device. - * {CL_RUNNING_anchor}: Indicates that the device has started executing this - command. - In order for the execution status of an enqueued command to change from - {CL_SUBMITTED} to {CL_RUNNING}, all events that this command is waiting on - must have completed successfully i.e. their execution status must be - {CL_COMPLETE}. - * {CL_COMPLETE_anchor}: Indicates that the command has successfully completed. - * An Error Code: A negative integer value indicating that the command was - abnormally terminated. Abnormal termination may occur for a number of reasons, - such as a bad memory access. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR.asciidoc[] -[NOTE] -==== -A command is considered to be complete if its execution status is -{CL_COMPLETE} or is a negative integer value. +ifdef::cl_khr_command_buffer_multi_device[] + {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR_anchor} - All commands in the + command-buffer must use native synchronization, as reported by + {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. + This can be used as a safeguard for performant applications that do not + want to accidentally fallback to host synchronization when passing + multiple queues. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR.asciidoc[] + +endif::cl_khr_command_buffer_multi_device[] -If the execution of a command is terminated, the command-queue associated -with this terminated command, and the associated context (and all other -command-queues in this context) may no longer be available. -The behavior of OpenCL API calls that use this context (and command-queues -associated with this context) are now considered to be -implementation-defined. -The user registered callback function specified when context is created can -be used to report appropriate error information. -==== +ifdef::cl_khr_command_buffer_mutable_dispatch[] + {CL_COMMAND_BUFFER_MUTABLE_KHR} - Enables modification of the + command-buffer, by default command-buffers are immutable. + If set, commands in the command-buffer may be updated via + {clUpdateMutableCommandsKHR}. -[open,refpage='clCreateUserEvent',desc='Creates a user event object.',type='protos'] --- -To create a user event object, call the function +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_MUTABLE_KHR.asciidoc[] +endif::cl_khr_command_buffer_mutable_dispatch[] -include::{generated}/api/protos/clCreateUserEvent.txt[] -include::{generated}/api/version-notes/clCreateUserEvent.asciidoc[] + The default value of this property is `0`. - * _context_ must be a valid OpenCL context. +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR.asciidoc[] + | {cl_mutable_dispatch_asserts_khr_TYPE} + | This is a bitfield and can be set to a combination of the following values: + + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR_anchor} - + An assertion by the user that the number of work-groups of any + ND-range kernel recorded in this command buffer will not be updated + beyond the number defined when the ND-range kernel was recorded. + If the user's update to the values of _local_work_size_ and/or + _global_work_size_ result in an increase in the number of + work-groups in the ND-range over the number specified when the + ND-range kernel was recorded, the behavior is undefined. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR.asciidoc[] +endif::cl_khr_command_buffer_mutable_dispatch[] +|==== * _errcode_ret_ will return an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. -User events allow applications to enqueue commands that wait on a user event -to finish before the command is executed by the device. +ifdef::cl_khr_command_buffer_multi_device[] +.Summary of command-buffer creation configurations, for the `<>` extension +[width="100%",options="header"] +|==== +| All Devices Associated With `Queues` can Device-side Sync | Platform Supports Universal Sync | Condition | Result +.3+| Yes +.3+| Yes or No +| Any device does not support the multi-queue capability, and has more than + one queue targeting it +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag +| OK +| Otherwise +| OK + +.3+| No +.3+| Yes +| Any device does not support the multi-queue capability, and has more than + one queue targeting it +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +| Otherwise +| OK - May be performance implications when synchronizing commands between + devices without device-side sync support. + +| No +| No +| Always +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +|==== +endif::cl_khr_command_buffer_multi_device[] + +[NOTE] +==== +Upon creation the command-buffer is defined as being in the +<> state, in order for the command-buffer to be enqueued +it must first be finalized using {clFinalizeCommandBufferKHR} after which no +further commands can be recorded. +A command-buffer is submitted for execution on command-queues with a call to +{clEnqueueCommandBufferKHR}. +==== // refError -{clCreateUserEvent} returns a valid non-zero event object and _errcode_ret_ -is set to {CL_SUCCESS} if the user event object is created successfully. +{clCreateCommandBufferKHR} returns a valid non-zero command-buffer and +_errcode_ret_ is set to {CL_SUCCESS} if the command-buffer is created +successfully. Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a + valid command-queue. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any command-queue in _queues_ is + an out-of-order command-queue and the device associated with the + command-queue does not support the + {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} capability. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any + command-queue in _queues_ does not contain the minimum properties + specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. + * {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have + the same OpenCL context. + * {CL_INVALID_VALUE} if _num_queues_ is zero. + * {CL_INVALID_VALUE} if _queues_ is `NULL`. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid, or + if the same property name is specified more than once. + * {CL_INVALID_PROPERTY} if values specified in _properties_ are valid but + are not supported by all the devices associated with command-queues in + _queues_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -The initial execution status for the user event object is {CL_SUBMITTED}. +ifdef::cl_khr_command_buffer_multi_device[] +If the `<>` extension is supported: + + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if _queues_ includes more than one + command-queue associated with a device that does not support capability + {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the + {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag is set, and any device + associated with a command-queue in _queues_ cannot natively synchronize + with the other devices associated with _queues_ as reported by + {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the platform does not support the + {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} capability, and any + device associated with a command-queue in _queues_ cannot natively + synchronize with the other devices associated with _queues_ as reported + by {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. +endif::cl_khr_command_buffer_multi_device[] -- -[open,refpage='clSetUserEventStatus',desc='Sets the execution status of a user event object.',type='protos'] +[open,refpage='clRetainCommandBufferKHR',desc='Increment a command-buffer\'s reference count',type='protos'] -- -To set the execution status of a user event object, call the function +To increment a command-buffer's reference count, call the function -include::{generated}/api/protos/clSetUserEventStatus.txt[] -include::{generated}/api/version-notes/clSetUserEventStatus.asciidoc[] +include::{generated}/api/protos/clRetainCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clRetainCommandBufferKHR.asciidoc[] - * _event_ is a user event object created using {clCreateUserEvent}. - * _execution_status_ specifies the new execution status to be set and can be - {CL_COMPLETE} or a negative integer value to indicate an error. - A negative integer value causes all enqueued commands that wait on this user - event to be terminated. - {clSetUserEventStatus} can only be called once to change the execution - status of _event_. + * _command_buffer_ specifies the command-buffer to retain. + +// refError + +{clRetainCommandBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clReleaseCommandBufferKHR',desc='Decrement a command-buffer\'s reference count',type='protos'] +-- +To decrement a command-buffer's reference count, call the function + +include::{generated}/api/protos/clReleaseCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clReleaseCommandBufferKHR.asciidoc[] + + * _command_buffer_ specifies the command-buffer to release. [NOTE] ==== -If there are enqueued commands with user events in the _event_wait_list_ -argument of *+clEnqueue*+* commands, the user must ensure that the status of -these user events being waited on are set using {clSetUserEventStatus} -before any OpenCL APIs that release OpenCL objects except for event objects -are called; otherwise the behavior is undefined. +After the _command_buffer_ reference count becomes zero and has finished +execution, the command-buffer is deleted. +==== -For example, the following code sequence will result in undefined behavior -of {clReleaseMemObject}. +// refError -[source,opencl] ----- -ev1 = clCreateUserEvent(ctx, NULL); -clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); -clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); -clReleaseMemObject(buf2); -clSetUserEventStatus(ev1, CL_COMPLETE); ----- +{clReleaseCommandBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -The following code sequence, however, works correctly. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -[source,opencl] ----- -ev1 = clCreateUserEvent(ctx, NULL); -clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); -clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); -clSetUserEventStatus(ev1, CL_COMPLETE); -clReleaseMemObject(buf2); ----- + +=== Enqueuing a Command-Buffer + +[open,refpage='clFinalizeCommandBufferKHR',desc='Finalize command recording for a command-buffer',type='protos'] +-- +To finalize command recording ready for enqueuinga command-buffer on a +command-queue, call the function + +include::{generated}/api/protos/clFinalizeCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clFinalizeCommandBufferKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + +[NOTE] +==== +{clFinalizeCommandBufferKHR} places the command-buffer in the +<> state where commands can no longer be recorded, at +this point the command-buffer is ready to be enqueued. ==== // refError -{clSetUserEventStatus} returns {CL_SUCCESS} if the function was executed +{clFinalizeCommandBufferKHR} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_EVENT} if _event_ is not a valid user event object. - * {CL_INVALID_VALUE} if the _execution_status_ is not {CL_COMPLETE} or a - negative integer value. - * {CL_INVALID_OPERATION} if the _execution_status_ for _event_ has already - been changed by a previous call to {clSetUserEventStatus}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ is not in the <> state. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clWaitForEvents',desc='Waits on the host thread for commands identified by event objects to complete.',type='protos'] +[open,refpage='clEnqueueCommandBufferKHR',desc='Enqueue a command-buffer to execute on command-queues',type='protos'] -- -To wait for events to complete, call the function +To enqueue a command-buffer to execute on command-queues, call the function -include::{generated}/api/protos/clWaitForEvents.txt[] -include::{generated}/api/version-notes/clWaitForEvents.asciidoc[] +include::{generated}/api/protos/clEnqueueCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clEnqueueCommandBufferKHR.asciidoc[] - * _num_events_ is the number of events in _event_list_. - * _event_list_ is a pointer to a list of event object handles. + * _num_queues_ is the number of command-queues listed in _queues_. + * _queues_ is a pointer to an ordered list of command-queues <> with the command-queues used on recording. + _queues_ can be `NULL`, in which case the default command-queues used on + command-buffer creation are used and _num_queues_ must be 0. + * _command_buffer_ refers to a valid command-buffer object. + * _event_wait_list_, _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If event_wait_list is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and + command_queue must be the same. + The memory associated with _event_wait_list_ can be reused or freed + after the function returns. + * _event_ will return an event object that identifies this command and can be + used to query for profiling information or queue a wait for this + particular command to complete. + _event_ can be `NULL` in which case it will not be possible for the + application to wait on this command or query it for profiling + information. -This function waits on the host thread for commands identified by event -objects in _event_list_ to complete. -A command is considered complete if its execution status is {CL_COMPLETE} or a -negative value. -The events specified in _event_list_ act as synchronization points. +[NOTE] +==== +To enqueue a command-buffer it must be in a <> state, +see {clFinalizeCommandBufferKHR}. +==== // refError -{clWaitForEvents} returns {CL_SUCCESS} if the execution status of all events -in _event_list_ is {CL_COMPLETE}. +{clEnqueueCommandBufferKHR} returns {CL_SUCCESS} if the command-buffer +execution was successfully queued, or one of the errors below: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. + * {CL_INVALID_OPERATION} if _command_buffer_ was not created with the + {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag and is in the <> state. + * {CL_INVALID_VALUE} if _queues_ is `NULL` and _num_queues_ is > 0, or + _queues_ is not `NULL` and _num_queues_ is 0. + * {CL_INVALID_VALUE} if _num_queues_ is > 0 and not the same value as + _num_queues_ set on _command_buffer_ creation. + * {CL_INVALID_COMMAND_QUEUE} if any element of _queues_ is not a valid + command-queue. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any element of _queues_ is not + <> with the command-queue set on + _command_buffer_ creation at the same list index. + * {CL_INVALID_CONTEXT} if any element of _queues_ does not have the same + context as the command-queue set on _command_buffer_ creation at the + same list index. + * {CL_INVALID_CONTEXT} if context associated with _command_buffer_ and + events in _event_wait_list_ are not the same. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _command_buffer_ on the command-queues because of + insufficient resources needed to execute _command_buffer_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Recording Commands to a Command-Buffer + +[open,refpage='clCommandBarrierWithWaitListKHR',desc='Record a barrier operation to a command-queue',type='protos'] +-- +To record a barrier operation used as a synchronization point, call the +function + +include::{generated}/api/protos/clCommandBarrierWithWaitListKHR.txt[] +include::{generated}/api/version-notes/clCommandBarrierWithWaitListKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded + to. + This parameter is unused, as only a single + command-queue is supported, and **must** be `NULL`. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. ++ +If _sync_point_wait_list_ is `NULL`, then this particular command +waits until all previous recorded commands to _command_queue_ have +completed. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this barrier command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. + +[NOTE] +==== +{clCommandBarrierWithWaitListKHR} waits for either a list of +synchronization-points to complete, or if the list is empty it waits for all +commands previously recorded in _command_buffer_ to complete before it +completes. +This command blocks command execution, that is, any following commands +recorded after it do not execute until it completes. +==== + +// refError + +{clCommandBarrierWithWaitListKHR} returns {CL_SUCCESS} if the function is +executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _num_events_ is zero or _event_list_ is `NULL`. - * {CL_INVALID_CONTEXT} if events specified in _event_list_ do not belong to - the same context. - * {CL_INVALID_EVENT} if event objects specified in _event_list_ are not - valid event objects. - * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of - any of the events in _event_list_ is a negative integer value. - This error code is <> version 1.1. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + _command_buffer_ is not the same. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clGetEventInfo',desc='Returns information about the event object.',type='protos'] +[open,refpage='clCommandCopyBufferKHR',desc='Record a command to copy between two buffer objects',type='protos'] -- -To return information about an event object, call the function +To record a command to copy from one buffer object to another, call the +function -include::{generated}/api/protos/clGetEventInfo.txt[] -include::{generated}/api/version-notes/clGetEventInfo.asciidoc[] +include::{generated}/api/protos/clCommandCopyBufferKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyBufferKHR.asciidoc[] - * _event_ specifies the event object being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetEventInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_buffer_, _dst_buffer_, _src_offset_, _dst_offset_, _size_ refer + to {clEnqueueCopyBuffer}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -[[event-info-table]] -.List of supported param_names by {clGetEventInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Event Info | Return Type | Description -| {CL_EVENT_COMMAND_QUEUE_anchor} +// refError -include::{generated}/api/version-notes/CL_EVENT_COMMAND_QUEUE.asciidoc[] - | {cl_command_queue_TYPE} - | Return the command-queue associated with _event_. - For user event objects, a `NULL` value is returned. -| {CL_EVENT_CONTEXT_anchor} +{clCommandCopyBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyBuffer} except: -include::{generated}/api/version-notes/CL_EVENT_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context associated with _event_. -| {CL_EVENT_COMMAND_TYPE_anchor} +{CL_INVALID_COMMAND_QUEUE} is replaced with: -include::{generated}/api/version-notes/CL_EVENT_COMMAND_TYPE.asciidoc[] - | {cl_command_type_TYPE} - | Return the command type associated with _event_ as described in the - <> table. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -| {CL_EVENT_COMMAND_EXECUTION_STATUS_anchor} footnote:[{fn-event-status-order}] +{CL_INVALID_CONTEXT} is replaced with: -include::{generated}/api/version-notes/CL_EVENT_COMMAND_EXECUTION_STATUS.asciidoc[] - | {cl_int_TYPE} - | Return the execution status of the command identified by event. - Valid values are: + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. - {CL_QUEUED} (command has been enqueued in the command-queue), +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - {CL_SUBMITTED} (enqueued command has been submitted by the host to the - device associated with the command-queue), + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. - {CL_RUNNING} (device is currently executing this command), +New errors: - {CL_COMPLETE} (the command has completed), or + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- - Error code given by a negative integer value. (command was - abnormally terminated - this may be caused by a bad memory access - etc.). - These error codes come from the same set of error codes that are - returned from the platform or runtime API calls as return values or - errcode_ret values. -| {CL_EVENT_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] +[open,refpage='clCommandCopyBufferRectKHR',desc='Record a command to copy a rectangular region from one buffer object to another',type='protos'] +-- +To record a command to copy a rectangular region from a buffer object to +another buffer object, call the function -include::{generated}/api/version-notes/CL_EVENT_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _event_ reference count. -|==== +include::{generated}/api/protos/clCommandCopyBufferRectKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyBufferRectKHR.asciidoc[] -[[event-command-type-table]] -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_origin_, _dst_origin_, _region_, _src_row_pitch_, + _src_slice_pitch_, _dst_row_pitch_, _dst_slice_pitch_ refer to + {clEnqueueCopyBufferRect}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. + +[NOTE] +==== +{clCommandCopyBufferRectKHR} records a command to copy a 2D or 3D rectangular +region from the buffer object identified by _src_buffer_ to a 2D or 3D region +in the buffer object identified by _dst_buffer_. +Copying begins at the source offset and destination offset which are +computed as described in the description for _src_origin_ and _dst_origin_. + +Each byte of the region's width is copied from the source offset to the +destination offset. +After copying each width, the source and destination offsets are incremented +by their respective source and destination row pitches. +After copying each 2D rectangle, the source and destination offsets are +incremented by their respective source and destination slice pitches. +==== + +// refError + +{clCommandCopyBufferRectKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyBufferRect} +except: + +{CL_INVALID_COMMAND_QUEUE} is replaced with: + + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. + +{CL_INVALID_CONTEXT} is replaced with: + + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. -| {clEnqueueNDRangeKernel} -| {CL_COMMAND_NDRANGE_KERNEL_anchor} +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_NDRANGE_KERNEL.asciidoc[] + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -| {clEnqueueTask} -| {CL_COMMAND_TASK_anchor} +New errors: -include::{generated}/api/version-notes/CL_COMMAND_TASK.asciidoc[] + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -| {clEnqueueNativeKernel} -| {CL_COMMAND_NATIVE_KERNEL_anchor} +[open,refpage='clCommandCopyBufferToImageKHR',desc='Record a command to copy a buffer object to an image object',type='protos'] +-- +To record a command to copy a buffer object to an image object, call the +function -include::{generated}/api/version-notes/CL_COMMAND_NATIVE_KERNEL.asciidoc[] +include::{generated}/api/protos/clCommandCopyBufferToImageKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyBufferToImageKHR.asciidoc[] -| {clEnqueueReadBuffer} -| {CL_COMMAND_READ_BUFFER_anchor} + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_buffer_, _dst_image_, _src_offset_, _dst_origin_, _region_ refer to + {clEnqueueCopyBufferToImage} + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER.asciidoc[] +// refError -| {clEnqueueWriteBuffer} -| {CL_COMMAND_WRITE_BUFFER_anchor} +{clCommandCopyBufferToImageKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyBufferToImage} +except: -include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER.asciidoc[] +{CL_INVALID_COMMAND_QUEUE} is replaced with: -| {clEnqueueCopyBuffer} -| {CL_COMMAND_COPY_BUFFER_anchor} + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: -| {clEnqueueReadImage} -| {CL_COMMAND_READ_IMAGE_anchor} + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_buffer_, and _dst_image_ are not the same. -include::{generated}/api/version-notes/CL_COMMAND_READ_IMAGE.asciidoc[] +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -| {clEnqueueWriteImage} -| {CL_COMMAND_WRITE_IMAGE_anchor} + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -include::{generated}/api/version-notes/CL_COMMAND_WRITE_IMAGE.asciidoc[] +New errors: -| {clEnqueueCopyImage} -| {CL_COMMAND_COPY_IMAGE_anchor} + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE.asciidoc[] +[open,refpage='clCommandCopyImageKHR',desc='Record a command to copy between two image objects',type='protos'] +-- +To record a command to copy between two image objects, call the function -| {clEnqueueCopyBufferToImage} -| {CL_COMMAND_COPY_BUFFER_TO_IMAGE_anchor} +include::{generated}/api/protos/clCommandCopyImageKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyImageKHR.asciidoc[] -include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_TO_IMAGE.asciidoc[] + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_image_, _dst_image_, _src_origin_, _dst_origin_, _region_ refer to + {clEnqueueCopyImage}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -| {clEnqueueCopyImageToBuffer} -| {CL_COMMAND_COPY_IMAGE_TO_BUFFER_anchor} +[NOTE] +==== +It is currently a requirement that the _src_image_ and _dst_image_ image +memory objects for {clCommandCopyImageKHR} must have the exact same image +format, i.e. the {cl_image_format_TYPE} descriptor specified when +_src_image_ and _dst_image_ are created must match. +==== -include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE_TO_BUFFER.asciidoc[] +// refError -| {clEnqueueMapBuffer} -| {CL_COMMAND_MAP_BUFFER_anchor} +{clCommandCopyImageKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyImage} except: -include::{generated}/api/version-notes/CL_COMMAND_MAP_BUFFER.asciidoc[] +{CL_INVALID_COMMAND_QUEUE} is replaced with: -| {clEnqueueMapImage} -| {CL_COMMAND_MAP_IMAGE_anchor} + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/version-notes/CL_COMMAND_MAP_IMAGE.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: -| {clEnqueueUnmapMemObject} -| {CL_COMMAND_UNMAP_MEM_OBJECT_anchor} + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_image_, and _dst_image_ are not the same. -include::{generated}/api/version-notes/CL_COMMAND_UNMAP_MEM_OBJECT.asciidoc[] +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -| {clEnqueueMarker}, + - {clEnqueueMarkerWithWaitList} -| {CL_COMMAND_MARKER_anchor} + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -include::{generated}/api/version-notes/CL_COMMAND_MARKER.asciidoc[] +New errors: -| {clEnqueueReadBufferRect} -| {CL_COMMAND_READ_BUFFER_RECT_anchor} + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER_RECT.asciidoc[] +[open,refpage='clCommandCopyImageToBufferKHR',desc='Record a command to copy an image object to a buffer object',type='protos'] +-- +To record a command to copy an image object to a buffer object, call the +function -| {clEnqueueWriteBufferRect} -| {CL_COMMAND_WRITE_BUFFER_RECT_anchor} +include::{generated}/api/protos/clCommandCopyImageToBufferKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyImageToBufferKHR.asciidoc[] -include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER_RECT.asciidoc[] + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_image_, _dst_buffer_, _src_origin_, _region_, _dst_offset_ refer to + {clEnqueueCopyImageToBuffer}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -| {clEnqueueCopyBufferRect} -| {CL_COMMAND_COPY_BUFFER_RECT_anchor} +// refError -include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_RECT.asciidoc[] +{clCommandCopyImageToBufferKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyImageToBuffer} +except: -| {clCreateUserEvent} -| {CL_COMMAND_USER_anchor} +{CL_INVALID_COMMAND_QUEUE} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_USER.asciidoc[] + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -| {clEnqueueBarrier}, + - {clEnqueueBarrierWithWaitList} -| {CL_COMMAND_BARRIER_anchor} +{CL_INVALID_CONTEXT} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_BARRIER.asciidoc[] + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_image_, and _dst_buffer_ are not the same. -| {clEnqueueMigrateMemObjects} -| {CL_COMMAND_MIGRATE_MEM_OBJECTS_anchor} +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_MIGRATE_MEM_OBJECTS.asciidoc[] + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -| {clEnqueueFillBuffer} -| {CL_COMMAND_FILL_BUFFER_anchor} +New errors: -include::{generated}/api/version-notes/CL_COMMAND_FILL_BUFFER.asciidoc[] + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -| {clEnqueueFillImage} -| {CL_COMMAND_FILL_IMAGE_anchor} +[open,refpage='clCommandFillBufferKHR',desc='Record a command to fill a buffer object with a pattern',type='protos'] +-- +To record a command to fill a buffer object with a pattern of a given +pattern size, call the function -include::{generated}/api/version-notes/CL_COMMAND_FILL_IMAGE.asciidoc[] +include::{generated}/api/protos/clCommandFillBufferKHR.txt[] +include::{generated}/api/version-notes/clCommandFillBufferKHR.asciidoc[] -| {clEnqueueSVMFree} -| {CL_COMMAND_SVM_FREE_anchor} +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _buffer_ is created is ignored by +{clCommandFillBufferKHR}. +==== -include::{generated}/api/version-notes/CL_COMMAND_SVM_FREE.asciidoc[] + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _buffer_, _pattern_, _pattern_size_, _offset_, _size_ refer to + {clEnqueueFillBuffer}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -| {clEnqueueSVMMemcpy} -| {CL_COMMAND_SVM_MEMCPY_anchor} +// refError -include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMCPY.asciidoc[] +{clCommandFillBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueFillBuffer} except: -| {clEnqueueSVMMemFill} -| {CL_COMMAND_SVM_MEMFILL_anchor} +{CL_INVALID_COMMAND_QUEUE} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMFILL.asciidoc[] + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -| {clEnqueueSVMMap} -| {CL_COMMAND_SVM_MAP_anchor} +{CL_INVALID_CONTEXT} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_SVM_MAP.asciidoc[] + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _buffer_ are not the same. -| {clEnqueueSVMUnmap} -| {CL_COMMAND_SVM_UNMAP_anchor} +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_SVM_UNMAP.asciidoc[] + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -| {clEnqueueSVMMigrateMem} -| {CL_COMMAND_SVM_MIGRATE_MEM_anchor} +New errors: -include::{generated}/api/version-notes/CL_COMMAND_SVM_MIGRATE_MEM.asciidoc[] + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -Prior to OpenCL 3.0, implementations should return -{CL_COMMAND_MIGRATE_MEM_OBJECTS}, but may return an implementation-defined -event command type for {clEnqueueSVMMigrateMem}. +[open,refpage='clCommandFillImageKHR',desc='Record a command to fill an image object with a specified color',type='protos'] +-- +To record a command to fill an image object with a specified color, call the +function -|==== +include::{generated}/api/protos/clCommandFillImageKHR.txt[] +include::{generated}/api/version-notes/clCommandFillImageKHR.asciidoc[] -Using {clGetEventInfo} to determine if a command identified by _event_ has -finished execution (i.e. {CL_EVENT_COMMAND_EXECUTION_STATUS} returns -{CL_COMPLETE}) is not a synchronization point. -There are no guarantees that the memory objects being modified by command -associated with _event_ will be visible to other enqueued commands. +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when image is created is ignored by {clCommandFillImageKHR}. +==== + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _image_, _fill_color_, _origin_, _region_ refer to {clEnqueueFillImage}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. // refError -{clGetEventInfo} returns {CL_SUCCESS} if the function is executed +{clCommandFillImageKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: +Otherwise, it returns the errors defined by {clEnqueueFillImage} except: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if information to query given in _param_name_ cannot be - queried for _event_. - * {CL_INVALID_EVENT} if _event_ is a not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +{CL_INVALID_COMMAND_QUEUE} is replaced with: -[open,refpage='clSetEventCallback',desc='Registers a user callback function for a specific command execution status.',type='protos'] --- -To register a user callback function for a specific command execution -status, call the function + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/protos/clSetEventCallback.txt[] -include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: - * _event_ is a valid event object. - * _command_exec_callback_type_ specifies the command execution status for - which the callback is registered. - The command execution status types for which a callback can be registered - are {CL_SUBMITTED}, {CL_RUNNING}, or {CL_COMPLETE}. - The callback function registered for a _command_exec_callback_type_ value of - {CL_COMPLETE} will be called when the command has completed successfully or - is abnormally terminated. - * _pfn_event_notify_ is the event callback function that can be registered by - the application. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - The parameters to this callback function are: - ** _event_ is the event object for which the callback function is invoked. - ** _event_command_status_ is equal to the _command_exec_callback_type_ - used while registering the callback. - Refer to the <> - table for the command execution status values. - If the callback is called as the result of the command associated with - event being abnormally terminated, an appropriate error code for the - error that caused the termination will be passed to - _event_command_status_ instead. - ** _user_data_ is a pointer to user supplied data. - * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is - called. - _user_data_ can be `NULL`. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _image_ are not the same. -Each call to {clSetEventCallback} registers the specified user callback -function on a callback stack associated with _event_. -The order in which the registered user callback functions are called is -undefined. +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -The registered callback function will be called when the execution status of the -command associated with _event_ changes to an execution status equal to or past -the status specified by _command_exec_status_, or for the execution status -{CL_COMPLETE}, if the command is abnormally terminated. -There is no guarantee that the callback functions registered for various command -execution status values for an event will be called in the exact order that the -execution status of a command changes. -Furthermore, it should be noted that calling a callback for an event execution -status other than {CL_COMPLETE} in no way implies that the memory model or -execution model as defined by the OpenCL specification has changed. For example, -it is not valid to assume that a corresponding memory transfer has completed -unless the event is in the state {CL_COMPLETE}. + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -All callbacks registered for an event object must be called before the event -object is destroyed. +New errors: -Callbacks should return promptly. -Behavior is undefined when calling expensive system routines, OpenCL APIs to -create contexts or command-queues, or blocking OpenCL APIs in an event callback. -Rather than calling a blocking OpenCL API in an event callback, applications -may call a non-blocking OpenCL API, then register a completion callback -for the non-blocking OpenCL API with the remainder of the work. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -Because commands in a command-queue are not required to begin execution -until the command-queue is flushed, callbacks that enqueue commands on a -command-queue should either call {clFlush} on the queue before returning, -or arrange for the command-queue to be flushed later. +[open,refpage='clCommandNDRangeKernelKHR',desc='Record a command to execute a kernel on a device',type='protos'] +-- +To record a command to execute a kernel on a device, call the function + +include::{generated}/api/protos/clCommandNDRangeKernelKHR.txt[] +include::{generated}/api/version-notes/clCommandNDRangeKernelKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the kernel command and + their corresponding values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. +ifdef::cl_khr_command_buffer_mutable_dispatch[] + If a supported property and its value is not specified in _properties_, its + default value will be used. + _properties_ may be `NULL`, in which case the default values for supported + properties will be used. + The `<>` extension does not define any + properties, but supported properties defined by extensions are defined + in the <> table. +endif::cl_khr_command_buffer_mutable_dispatch[] + * _kernel_ is a valid kernel object which **must** have its arguments set. + Any changes to _kernel_ after calling {clCommandNDRangeKernelKHR}, such + as with {clSetKernelArg} or {clSetKernelExecInfo}, have no effect on the + recorded command. + If _kernel_ is recorded to a following {clCommandNDRangeKernelKHR} + command however, then that command will capture the updated state of + _kernel_. + * _work_dim_, _global_work_offset_, _global_work_size_, _local_work_size_ + Refer to {clEnqueueNDRangeKernel}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. +ifdef::cl_khr_command_buffer_mutable_dispatch[] + If the `<>` extension is + supported, and _mutable_handle_ is not `NULL`, it can be used in the + {cl_mutable_dispatch_config_khr_TYPE} struct to update the command + configuration between recordings. + The lifetime of this handle is tied to the parent command-buffer, such + that freeing the command-buffer will also free this handle. +endif::cl_khr_command_buffer_mutable_dispatch[] +ifndef::cl_khr_command_buffer_mutable_dispatch[] + If the `<>` extension is not + supported, this parameter is unused, and **must** be `NULL`. +endif::cl_khr_command_buffer_mutable_dispatch[] + +[[ndrange-kernel-properties-table]] +.List of supported properties by {clCommandNDRangeKernelKHR} +[cols=",,",options="header",] +|==== +| Recording Properties | Property Value | Description + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_MUTABLE_DISPATCH_ASSERTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERTS_KHR.asciidoc[] + | {cl_mutable_dispatch_asserts_khr_TYPE} + | This is a bitfield and can be set to a combination of the following + values: + + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR_anchor} + + An assertion by the user that the number of work-groups of this + ND-range kernel will not be updated beyond the number defined when the + ND-range kernel was recorded. + The number of work-groups is defined as the product for each _i_ from + _0_ to _work_dim - 1_ of + _ceil(global_work_size[i]/local_work_size[i])_. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR.asciidoc[] + +| {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR.asciidoc[] + | {cl_mutable_dispatch_fields_khr_TYPE} + | This is a bitfield and can be set to a combination of the following + values: + + {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR_anchor} determines whether the + _global_work_offset_ of kernel execution can be modified after + recording. + If set, the _global_work_offset_ of the kernel execution can be + changed with {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the _global_work_offset_ cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR_anchor} determines whether the + _global_work_size_ of kernel execution can be modified after + recording. + If set, the _global_work_size_ of the kernel execution can be changed + with {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the _global_work_size_ cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR_anchor} determines whether the + _local_work_size_ of kernel execution can be modified after recording. + If set, the _local_work_size_ of the kernel execution can be changed + with {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the _local_work_size_ cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR_anchor} determines whether the + kernel arguments set on _kernel_ can be updated between executions. + If set, the kernel arguments normally set with {clSetKernelArg} and + {clSetKernelArgSVMPointer} can be changed with + {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the kernel arguments cannot be modified between executions. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR_anchor} determines whether the + information passed to _kernel_ can be updated between executions. + If set, the execution information of the kernel can be changed with + {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the kernel execution information cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_EXEC_INFO_KHR.asciidoc[] + + If {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR_anchor} is not specified + then it defaults to the value returned by the + {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} device query. +endif::cl_khr_command_buffer_mutable_dispatch[] +|==== + +[NOTE] +==== +The work-group size to be used for _kernel_ can also be specified in the +program source using the +`+__attribute__((reqd_work_group_size(X, Y, Z)))+` qualifier. +In this case the size of work-group specified by _local_work_size_ must +match the value specified by the `reqd_work_group_size` `+__attribute__+` +qualifier. + +These work-group instances are executed in parallel across multiple compute +units or concurrently on the same compute unit. + +Each work-item is uniquely identified by a global identifier. +The global ID, which can be read inside the kernel, is computed using the +value given by _global_work_size_ and _global_work_offset_. +In addition, a work-item is also identified within a work-group by a unique +local ID. +The local ID, which can also be read by the kernel, is computed using the +value given by _local_work_size_. +The starting local ID is always (0, 0, ... 0). +==== // refError -{clSetEventCallback} returns {CL_SUCCESS} if the function is executed +{clCommandNDRangeKernelKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_EVENT} if _event_ is not a valid event object. - * {CL_INVALID_VALUE} if _pfn_event_notify_ is `NULL` or if - _command_exec_callback_type_ is not {CL_SUBMITTED}, {CL_RUNNING}, or - {CL_COMPLETE}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +Otherwise, it returns the errors defined by {clEnqueueNDRangeKernel} except: +{CL_INVALID_COMMAND_QUEUE} is replaced with: -[open,refpage='clRetainEvent',desc='Increments the event reference count.',type='protos'] --- -To retain an event object, call the function + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/protos/clRetainEvent.txt[] -include::{generated}/api/version-notes/clRetainEvent.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: - * _event_ is the event object to be retained. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _kernel_ are not the same. -The _event_ reference count is incremented. -The OpenCL commands that return an event perform an implicit retain. +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -// refError + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -{clRetainEvent} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: +New errors: - * {CL_INVALID_EVENT} if _event_ is not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if the `<>` + extension is not supported and _mutable_handle_ is not `NULL`. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ + does not support {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} and + _kernel_ contains a printf call. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ + does not support {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} + and _kernel_ contains a kernel-enqueue call. -[open,refpage='clReleaseEvent',desc='Decrements the event reference count.',type='protos'] --- -To release an event object, call the function +ifdef::cl_khr_command_buffer_mutable_dispatch[] +If the `<>` extension is supported: -include::{generated}/api/protos/clReleaseEvent.txt[] -include::{generated}/api/version-notes/clReleaseEvent.asciidoc[] + * {CL_INVALID_OPERATION} if the requested + {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} properties are not reported + by {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} for the device + associated with _command_queue_. + If _command_queue_ is `NULL`, the device associated with + _command_buffer_ must report support for these properties. + * {CL_INVALID_VALUE} if _command_buffer_ was created with the + {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and + _local_work_size_ is `NULL`, or if _properties_ includes the + {CL_MUTABLE_DISPATCH_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and + _local_work_size_ is `NULL`. +endif::cl_khr_command_buffer_mutable_dispatch[] - * _event_ is the event object to be released. +-- -The _event_ reference count is decremented. +[open,refpage='clCommandSVMMemcpyKHR',desc='Record a command to do an SVM memcpy operation',type='protos'] +-- +To record a command to do an SVM memcpy operation, call the function -The event object is deleted once the reference count becomes zero, the -specific command identified by this event has completed (or terminated) and -there are no commands in the command-queues of a context that require a wait -for this event to complete. -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainEvent} causes undefined behavior. +include::{generated}/api/protos/clCommandSVMMemcpyKHR.txt[] +include::{generated}/api/version-notes/clCommandSVMMemcpyKHR.asciidoc[] -[NOTE] -==== -Developers should be careful when releasing their last reference count on -events created by {clCreateUserEvent} that have not yet been set to status -of {CL_COMPLETE} or an error. -If the user event was used in the event_wait_list argument passed to a -*+clEnqueue*+* API or another application host thread is waiting for it in -{clWaitForEvents}, those commands and host threads will continue to wait for -the event status to reach {CL_COMPLETE} or error, even after the application -has released the object. -Since in this scenario the application has released its last reference count -to the user event, it would be in principle no longer valid for the -application to change the status of the event to unblock all the other -machinery. -As a result the waiting tasks will wait forever, and associated events, -{cl_mem_TYPE} objects, command-queues and contexts are likely to leak. -In-order command-queues caught up in this deadlock may cease to do any work. -==== + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _dst_ptr_ is the pointer to a host (if the device supports system SVM) + or SVM memory allocation where data is copied to. + * _src_ptr_ is the pointer to a host (if the device supports system SVM) + or SVM memory allocation where data is copied from. + * _size_ is the size in bytes of data being copied. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must be +0. +If _sync_point_wait_list_ is not `NULL`, the list of synchronization-points +pointed to by _sync_point_wait_list_ must be valid and +_num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. // refError -{clReleaseEvent} returns {CL_SUCCESS} if the function is executed +{clCommandSVMMemcpyKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: +Otherwise, it returns the errors defined by {clEnqueueSVMMemcpy} except: - * {CL_INVALID_EVENT} if _event_ is not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +{CL_INVALID_COMMAND_QUEUE} is replaced with: + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -[[markers-barriers-waiting-for-events]] -== Markers, Barriers and Waiting for Events +{CL_INVALID_CONTEXT} is replaced with: -[open,refpage='clEnqueueMarkerWithWaitList',desc='Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.',type='protos'] --- -To enqueue a marker command which waits for events or commands to complete, -call the function + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _kernel_ are not the same. -include::{generated}/api/protos/clEnqueueMarkerWithWaitList.txt[] -include::{generated}/api/version-notes/clEnqueueMarkerWithWaitList.asciidoc[] +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - * _command_queue_ is a valid host command-queue. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and _command_queue_ -must be the same. -The memory associated with _event_wait_list_ can be reused or freed after -the function returns. +New errors: -If _event_wait_list_ is `NULL`, then this particular command waits until all -previous enqueued commands to _command_queue_ have completed. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -The marker command either waits for a list of events to complete, or if the -list is empty it waits for all commands previously enqueued in -_command_queue_ to complete before it completes. -This command returns an _event_ which can be waited on, i.e. this event can -be waited on to insure that all events either in the _event_wait_list_ or -all previously enqueued commands, queued before this command to -_command_queue_, have completed. +[open,refpage='clCommandSVMMemFillKHR',desc='Record a command to fill a region in SVM with a pattern of a given pattern size',type='protos'] +-- +To record a command to fill a region in SVM with a pattern of a given +pattern size, call the function + +include::{generated}/api/protos/clCommandSVMMemFillKHR.txt[] +include::{generated}/api/version-notes/clCommandSVMMemFillKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _svm_ptr_ is a pointer to a (if the device supports system SVM) + or SVM memory region that will be filled with _pattern_. + It must be aligned to _pattern_size_ bytes. + If _svm_ptr_ is allocated using {clSVMAlloc}, then it must be allocated + from the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _pattern_ is a pointer to the data pattern of size _pattern_size_ in + bytes. + _pattern_ will be used to fill a region in _buffer_ starting at + _svm_ptr_ and is _size_ bytes in size. + The data pattern must be a scalar or vector integer or floating-point + data type supported by OpenCL. + For example, if the region pointed to by _svm_ptr_ is to be filled with + a pattern of `float4` values, then _pattern_ will be a pointer to a + `cl_float4` value and _pattern_size_ will be `sizeof(cl_float4)`. + The maximum value of _pattern_size_ is the size of the largest integer + or floating-point vector data type supported by the OpenCL device. + The memory associated with _pattern_ can be reused or freed after the + function returns. + * _size_ is the size in bytes of region being filled starting with + _svm_ptr_ and must be a multiple of _pattern_size_. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this + particular command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must be +0. +If _sync_point_wait_list_ is not `NULL`, the list of synchronization-points +pointed to by _sync_point_wait_list_ must be valid and +_num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. // refError -{clEnqueueMarkerWithWaitList} returns {CL_SUCCESS} if the function is -successfully executed. -Otherwise, it returns one of the following errors: +{clCommandSVMMemFillKHR} returns {CL_SUCCESS} if the function is executed +successfully. Otherwise, it returns the errors defined by +{clEnqueueSVMMemFill} except: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +{CL_INVALID_COMMAND_QUEUE} is replaced with: -[open,refpage='clEnqueueMarker',desc='Enqueues a marker command which waits for all previously enqueued commands to complete.',type='protos'] --- -To enqueue a marker command which waits for previous commands to complete, call -the function + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/protos/clEnqueueMarker.txt[] -include::{generated}/api/version-notes/clEnqueueMarker.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: - * _command_queue_ is a valid host command-queue. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _kernel_ are not the same. + +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: + + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. + +New errors: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- + + +ifdef::cl_khr_command_buffer_multi_device[] +=== Remapping Command-Buffers + +If the `<>` extension is supported, +platforms reporting the {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} +capability support generating a deep copy of a command-buffer with its +commands remapped to a list of command-queues that are potentially +<> with the queues used to create the +command-buffer. +That is, the remapped command-buffer can execute on queues that differ in +terms of properties and/or associated device from the original +command-buffer queues. + +This functionality is invoked through a new synchronous entry-point +{clRemapCommandBufferKHR} which takes a list of queues to which the commands +should now target. +It then returns a command-buffer containing the same commands as the +original, with the same command dependencies, but targeting different +queues. +A list of command handles may also be passed to the entry-point, which +allows handles to the equivalent commands in the remapped command-buffer to +be returned by an output parameter. + +Device properties restrict remapping possibilities, as existing commands can +have a configuration which is not supported by another device, and so +remapping may fail with an error relating to this incompatibility. +Examples of command configurations which can introduce incompatibilities +when trying to map to a new device are: + + * Program language features used in a kernel not supported by the new + device. + * ND-Range configuration, e.g exceeds new the device max work-group size. + * Misalignment of sub-buffers based on minimum alignment of new device. + +In additional to this functionality, platforms reporting +{CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} allow the user to create a +remapped command-buffer where the mapping of queues to commands is +determined by the OpenCL runtime in a way it determines as optimal. +This is particularly useful in hot plugging environments where devices may +appear and disappear during runtime. + +[open,refpage='clRemapCommandBufferKHR',desc='Create copy of a command-buffer remapped to specified command-queues',type='protos'] +-- +To create a deep copy of the input command-buffer with the copied commands +remapped to target the passed command-queues, call the function + +include::{generated}/api/protos/clRemapCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clRemapCommandBufferKHR.asciidoc[] + + * _command_buffer_ specifies the command-buffer to create a remapped deep + copy of. + * _automatic_ indicates if the remapping is done explicitly by the user, + or automatically by the OpenCL runtime. + If _automatic_ is {CL_FALSE}, then each element of _queues_ will replace + the queue used on _command_buffer_ creation at the same index. + If {CL_TRUE} and {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} is + supported, then the OpenCL runtime will decide in a way it determines + optimal which of the elements in _queues_ each command in the returned + command-buffer will be associated with. + * _num_queues_ is the number of command-queues listed in _queues_, must + not be 0. + * _queues_ is a pointer to an ordered list of command-queues for the + returned command-buffer to target, must be a non-`NULL` value. + * _num_handles_ is the number of command handles passed in both _handles_ + and _handles_ret_ lists, may be 0. + * _handles_ is an ordered list of handles belonging to _command_buffer_ to + create remapped copies of, may be `NULL`. + * _handles_ret_ returns an ordered list of handles where each handle is + equivalent to the handle at the same index in _handles_, but belonging + to the returned command-buffer. + * _errcode_ret_ returns an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -The marker command waits for all commands previously enqueued in _command_queue_ to complete before it completes. -This command returns an _event_ which can be waited on, i.e. this event can be -waited on to insure that all previously enqueued commands, queued before this -command to _command_queue_, have completed. +The returned command-buffer has the same state as the input command-buffer, +unless the input command-buffer is in the <> state, in +which case the returned command-buffer has state <>. // refError -{clEnqueueMarker} returns {CL_SUCCESS} if the function is successfully -executed. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_VALUE} if _event_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. +{clRemapCommandBufferKHR} returns a valid command-buffer with _errcode_ret_ +set to {CL_SUCCESS} if the command-buffer is created successfully. +Otherwise, it returns a `NULL` value without setting _handles_ret_, and with +one of the following error values returned in _errcode_ret_: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_VALUE} if _num_queues_ is 0, or if _queues_ is `NULL`. + * {CL_INVALID_VALUE} if _automatic_ is {CL_FALSE} and _num_queues_ is not + equal to the number of queues used on creation of _command_buffer_. + * {CL_INVALID_VALUE} if _handles_ or _handles_ret_ is `NULL` and + _num_handles_ is > 0, or either _handles_ or _handles_ret_ is not `NULL` + and _num_handles_ is 0. + * {CL_INVALID_VALUE} if any handle in _handles_ is not a valid command + handle belonging to _command_buffer_. + * {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a + valid command-queue. + * {CL_INVALID_CONTEXT} if _command_buffer_ and all the command-queues in + _queues_ do not have the same OpenCL context. + * {CL_INVALID_OPERATION} if the platform does not support the + {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} flag. + * {CL_INVALID_OPERATION} if the platform does not support the + {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} flag and _automatic_ is + {CL_TRUE}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if such an error would be returned + by passing _queues_ to {clCreateCommandBufferKHR}. + * Any error relating to device support that can be returned by a command + recording entry-point may also be returned. + As a command in _command_buffer_ can have a configuration that is not + supported by a device that is associated with the queue in _queues_ the + command is being remapped to. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_command_buffer_multi_device[] + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +[[mutable-commands]] +=== Mutable Commands: + +A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ +object as it can be returned from any command recording entry-point in the +`<>` family of extensions. +The mutable-command handles returned by {clCommandNDRangeKernelKHR} in +particular are referred to as _mutable-dispatch_ objects, and can be +modified through the fields of {cl_mutable_dispatch_config_khr_TYPE}. + +Mutable-command handles are updated between enqueues using entry-point +{clUpdateMutableCommandsKHR}. +To enable performant usage, all aspects of mutation are encapsulated inside +a single {cl_mutable_base_config_khr_TYPE} parameter. +This means that the runtime has access to all the information about how the +command-buffer will change, allowing the command-buffer to be rebuilt as +efficiently as possible. +Any modifications to the arguments or execution info of a mutable-dispatch +handle using {cl_mutable_dispatch_arg_khr_TYPE} or +{cl_mutable_dispatch_exec_info_khr_TYPE} have no affect on the original +kernel object used when the command was recorded, and only influence the +{clCommandNDRangeKernelKHR} command associated with the mutable-dispatch. + +[[mutable-dispatch-kernel-argument-safe-usage]] +[NOTE] +==== +The base `<>` extension +<> that a command-buffer +does not update the reference count of objects set as arguments on kernels +recorded into the command-buffer. + +The implications for applications using {clUpdateMutableCommandsKHR} is that +it is safe to delete objects used as kernel command arguments, if all the +kernel commands using that object as an argument have had their arguments +replaced with a different object. +==== -[open,refpage='clEnqueueWaitForEvents',desc='Enqueues a wait on a list of events to complete.',type='protos'] --- -To enqueue a wait for a specific event or a list of events to complete before any future commands queued in a command-queue are executed, call the function - -include::{generated}/api/protos/clEnqueueWaitForEvents.txt[] -include::{generated}/api/version-notes/clEnqueueWaitForEvents.asciidoc[] +To facilitate performant usage for pipelined work flows, where applications +repeatedly call command-buffer update then enqueue, implementations may +defer some of the work to allow {clUpdateMutableCommandsKHR} to return +immediately. +Deferring any recompilation until {clEnqueueCommandBufferKHR} avoids +blocking in host code and keeps device occupancy high. +This is only possible with a command-buffer created with the +{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag, as without this the enqueued +command-buffer must complete before any modification occurs. - * _command_queue_ is a valid host command-queue. - * _event_list_ and _num_events_ specify events that need to complete before - this particular command can be executed. +[open,refpage='clUpdateMutableCommandsKHR',desc='Modify configuration of mutable-command handles to update behavior for future enqueues',type='protos'] +-- +To modify the configuration of mutable-command handles returned during +_command_buffer_ recording, updating the behavior of those commands in +future enqueues of _command_buffer_, call the function -// Note, this parameter is called event_list (like clWaitForEvents) rather than -// event_wait_list(like clEnqueueMarkerWithWaitList etc.) because the function -// predates wait lists (and CL_INVALID_EVENT_WAIT_LIST). +include::{generated}/api/protos/clUpdateMutableCommandsKHR.txt[] +include::{generated}/api/version-notes/clUpdateMutableCommandsKHR.asciidoc[] -The events specified in _event_list_ act as synchronization points. -The context associated with events in _event_list_ and _command_queue_ must be -the same. -The memory associated with _event_list_ can be reused or freed after the -function returns. + * _command_buffer_ refers to a valid command-buffer object. + * _mutable_config_ is a pointer to a {cl_mutable_base_config_khr_TYPE} + structure defining updates to make to mutable-commands. // refError -{clEnqueueWaitForEvents} returns {CL_SUCCESS} if the function is successfully -executed. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_list_ are not the same. - * {CL_INVALID_VALUE} if _num_events_ is 0 or _event_list_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. +{clUpdateMutableCommandsKHR} returns {CL_SUCCESS} if all the mutable-command +objects were updated successfully. +Otherwise, none of the updates to mutable-command objects are preserved and +one of the errors below is returned: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. + * {CL_INVALID_OPERATION} if _command_buffer_ was not created with the + {CL_COMMAND_BUFFER_MUTABLE_KHR} flag. + * {CL_INVALID_VALUE} if the _type_ member of _mutable_config_ is not + {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR}. + * {CL_INVALID_VALUE} if the _mutable_dispatch_list_ member of + _mutable_config_ is `NULL` and _num_mutable_dispatch_ > 0, or + _mutable_dispatch_list_ is not `NULL` and _num_mutable_dispatch_ is 0. + * {CL_INVALID_VALUE} if the _next_ member of _mutable_config_ is not + `NULL` and any iteration of the structure pointer chain does not contain + valid _type_ and _next_ members. + * {CL_INVALID_VALUE} if _mutable_config_ is `NULL`, or if both _next_ and + _mutable_dispatch_list_ members of _mutable_config_ are `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. + +Using this function when _command_buffer_ is in the <> +state and not created with the {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag +causes undefined behavior. + +[NOTE] +==== +Performant usage is to call {clUpdateMutableCommandsKHR} only when the +desired state of all commands is known, rather than iteratively updating +each command individually. +==== + +[NOTE] +==== +If the command buffer has been created with +{CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or the updated +ND-range command has been recorded with this flag, and the ND-range +parameters are updated so that the new number of work-groups exceeds the +number when the ND-range command was recorded, the behavior is undefined. +==== + +If the _mutable_dispatch_list_ member of _mutable_config_ is non-`NULL`, +then errors defined by {clEnqueueNDRangeKernel}, {clSetKernelExecInfo}, +{clSetKernelArg}, and {clSetKernelArgSVMPointer} are returned by +{clUpdateMutableCommandsKHR} if any of the array elements are set to an +invalid value. +Additionally, the following errors are returned if any +{cl_mutable_dispatch_config_khr_TYPE} element of the array violates the +defined conditions: + + * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable + command object, or created from _command_buffer_. + * {CL_INVALID_VALUE} if _type_ is not + {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + * {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or + _global_work_size_ result in a change to work-group uniformity. + * {CL_INVALID_OPERATION} if the _work_dim_ is different from the + _work_dim_ set on _command_ recording. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} + property was not set on _command_ recording and _global_work_offset_ is + not `NULL`. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} + property was not set on _command_ recording and _global_work_size_ is + not `NULL`. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} + property was not set on _command_ recording and _local_work_size_ is not + `NULL`. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} + property was not set on _command_ recording and _num_args_ or + _num_svm_args_ is non-zero. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} + property was not set on _command_ recording and _num_exec_infos_ is + non-zero. + * {CL_INVALID_VALUE} if _arg_list_ is `NULL` and _num_args_ > 0, or + _arg_list_ is not `NULL` and _num_args_ is 0. + * {CL_INVALID_VALUE} if _arg_svm_list_ is `NULL` and _num_svm_args_ > 0, + or _arg_svm_list_ is not `NULL` and _num_svm_args_ is 0. + * {CL_INVALID_VALUE} if _exec_info_list_ is `NULL` and _num_exec_infos_ > + 0, or _exec_info_list_ is not `NULL` and _num_exec_infos_ is 0. +-- + +[open,refpage='cl_mutable_base_config_khr',desc='DESC',type='structs'] +-- +The {cl_mutable_base_config_khr_TYPE} structure is TODO Add fuller +description here and is defined as: + +include::{generated}/api/structs/cl_mutable_base_config_khr.txt[] + + * _type_ is the type of this structure, and must be + {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + * _next_ is `NULL` or a pointer to an extending structure. + * _num_mutable_dispatch_ is the number of mutable-dispatch objects to + configure in this enqueue of the command-buffer. + * _mutable_dispatch_list_ is an array containing _num_mutable_dispatch_ + elements describing the configurations of mutable kernel execution + commands in the command-buffer. + For a description of struct members making up each array element see + {cl_mutable_dispatch_config_khr_TYPE}. +-- + +[open,refpage='cl_mutable_dispatch_config_khr',desc='Set kernel configuration of a mutable clCommandNDRangeKernelKHR command',type='structs'] +-- +The {cl_mutable_dispatch_arg_khr_TYPE} structure is passed to +{clUpdateMutableCommandsKHR} to set the kernel configuration of a mutable +{clCommandNDRangeKernelKHR} command, and is defined as: + +include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] + + * _type_ is the type of this structure, and must be + {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + * _next_ is `NULL` or a pointer to an extending structure. + * _command_ is a mutable-command object returned by + {clCommandNDRangeKernelKHR} representing a kernel execution as part of a + command-buffer. + * _num_args_ is the number of kernel arguments being changed. + * _num_svm_args_ is the number of SVM kernel arguments being changed. + * _num_exec_infos_ is the number of kernel execution info objects to set + for this dispatch. + * _work_dim_ is the number of dimensions used to specify the global + work-items and work-items in the work-group. + See {clEnqueueNDRangeKernel} for valid usage. + * _arg_list_ is an array describing the new kernel arguments for this + enqueue. + It must contain _num_args_ array elements, each of which encapsulates + parameters passed to {clSetKernelArg}. + See {clSetKernelArg} for usage of {cl_mutable_dispatch_arg_khr_TYPE} + members. + * _arg_svm_list_ is an array describing the new SVM kernel arguments for + this enqueue. + It must contain _num_svm_args_ array elements, each of which + encapsulates parameters passed to {clSetKernelArgSVMPointer}. + See {clSetKernelArgSVMPointer} for usage of + {cl_mutable_dispatch_arg_khr_TYPE} members, `arg_size` is ignored. + * _exec_info_list_ is an array containing _num_exec_infos_ elements + specifying the list of execution info objects use for this + command-buffer enqueue. + See {clSetKernelExecInfo} for usage of + {cl_mutable_dispatch_exec_info_khr_TYPE} members. + * _global_work_offset_ can be used to specify an array of _work_dim_ + unsigned values that describe the offset used to calculate the global ID + of a work-item. + If _global_work_offset_ is `NULL` then the global offset of the dispatch + is not changed. + See {clEnqueueNDRangeKernel} for valid usage. + * _global_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of global work-items in _work_dim_ dimensions that + will execute the kernel function. + If _global_work_size_ is `NULL` then the number of global work-items in + the dispatch is not changed. + See {clEnqueueNDRangeKernel} for valid usage. + * _local_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of work-items that make up a work-group that will + execute the kernel. + If _local_work_size_ is `NULL` then the number of local work-items in + the dispatch is not changed. + See {clEnqueueNDRangeKernel} for valid usage. -- -[open,refpage='clEnqueueBarrierWithWaitList',desc='A synchronization point that enqueues a barrier operation.',type='protos'] +[open,refpage='cl_mutable_dispatch_arg_khr',desc='Set kernel arguments normally passed using clSetKernelArg and clSetKernelArg',type='structs'] -- -To enqueue a barrier command which waits for events or commands to complete, -call the function +The {cl_mutable_dispatch_arg_khr_TYPE} structure sets kernel arguments +normally passed using {clSetKernelArg} and {clSetKernelArgSVMPointer}, and +is defined as: -include::{generated}/api/protos/clEnqueueBarrierWithWaitList.txt[] -include::{generated}/api/version-notes/clEnqueueBarrierWithWaitList.asciidoc[] +include::{generated}/api/structs/cl_mutable_dispatch_arg_khr.txt[] +-- - * _command_queue_ is a valid host command-queue. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - * If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. - If _event_wait_list_ is not `NULL`, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. +[open,refpage='cl_mutable_dispatch_exec_info_khr',desc='Specify kernel execution info',type='structs'] +-- +The {cl_mutable_dispatch_exec_info_khr_TYPE} structure sets kernel execution +info normally passed using {clSetKernelExecInfo}, and is defined as: -If _event_wait_list_ is `NULL`, then this particular command waits until all -previous enqueued commands to _command_queue_ have completed. +include::{generated}/api/structs/cl_mutable_dispatch_exec_info_khr.txt[] -The barrier command either waits for a list of events to complete, or if the -list is empty it waits for all commands previously enqueued in -_command_queue_ to complete before it completes. -This command blocks command execution, that is, any following commands -enqueued after it do not execute until it completes. -This command returns an _event_ which can be waited on, i.e. this event can -be waited on to insure that all events either in the _event_wait_list_ or -all previously enqueued commands, queued before this command to -_command_queue_, have completed. +[NOTE] +==== +_param_name_ is of type {cl_uint_TYPE} rather than +{cl_kernel_exec_info_TYPE} so that the extension can be implemented on +OpenCL 1.2 where the {cl_kernel_exec_info_TYPE} typedef is unavailable. +==== +-- +endif::cl_khr_command_buffer_mutable_dispatch[] -// refError -{clEnqueueBarrierWithWaitList} returns {CL_SUCCESS} if the function is -successfully executed. -Otherwise, it returns one of the following errors: +=== Command-Buffer Queries - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +[open,refpage='clGetCommandBufferInfoKHR',desc='Query information about a command-buffer',type='protos'] -- +To query information about a command-buffer, call the function + +include::{generated}/api/protos/clGetCommandBufferInfoKHR.txt[] +include::{generated}/api/version-notes/clGetCommandBufferInfoKHR.asciidoc[] + + * _command_buffer_ specifies the command-buffer being queried. + * _param_name_ specifies the information to query. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be {geq} size of return type as described in the table + below. + If _param_value_ is `NULL`, it is ignored. + * _param_value_ is a pointer to a memory location where the appropriate + result being queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +The list of supported _param_name_ values and the information returned in +_param_value_ by {clGetCommandBufferInfoKHR} is described in the table below. + +.{clGetCommandBufferInfoKHR} values +[cols=",,",options="header",] +|==== +| Command Buffer Info | Return Type | Description +| {CL_COMMAND_BUFFER_NUM_QUEUES_KHR_anchor} -[open,refpage='clEnqueueBarrier',desc='A synchronization point that enqueues a barrier operation.',type='protos'] --- -To enqueue a barrier command which waits for commands to complete, call the -function +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_NUM_QUEUES_KHR.asciidoc[] + | {cl_uint_TYPE} + | The number of command-queues specified when _command_buffer_ was created. -include::{generated}/api/protos/clEnqueueBarrier.txt[] -include::{generated}/api/version-notes/clEnqueueBarrier.asciidoc[] +| {CL_COMMAND_BUFFER_QUEUES_KHR_anchor} - * _command_queue_ is a valid host command-queue. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_QUEUES_KHR.asciidoc[] + | {cl_command_queue_TYPE}[] + | Return the list of command-queues specified when the + _command_buffer_ was created. -The barrier command waits for all commands previously enqueued in -_command_queue_ to complete before it completes. -This command blocks command execution, that is, any following commands -enqueued after it do not execute until it completes. -// TODO clEnqueueBarrierWithWaitList doesn't say synchronization point, should -// it, or should the next line be removed? The main difference is that -// clEnqueueBarrierWithWaitList returns an event, which is the synchronization -// point. -The barrier command is a synchronization point. +| {CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR_anchor} footnote:[{fn-reference-count-usage}] -// refError +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR.asciidoc[] + | {cl_uint_TYPE} + | Return the _command_buffer_ reference count. -{clEnqueueBarrier} returns {CL_SUCCESS} if the function is successfully -executed. -Otherwise, it returns one of the following errors: +| {CL_COMMAND_BUFFER_STATE_KHR_anchor} - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_KHR.asciidoc[] + | {cl_command_buffer_state_khr_TYPE} + | Return the state of _command_buffer_. -== Out-of-order Execution of Kernels and Memory Object Commands + {CL_COMMAND_BUFFER_STATE_RECORDING_KHR_anchor} is returned when + _command_buffer_ has not been finalized. -The OpenCL functions that are submitted to a command-queue are enqueued in -the order the calls are made but can be configured to execute in-order or -out-of-order. -The _properties_ argument in {clCreateCommandQueueWithProperties} or -{clCreateCommandQueue} can be used to specify the execution order. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_RECORDING_KHR.asciidoc[] -If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is -not set, the commands enqueued to a command-queue execute in-order. -For example, if an application calls {clEnqueueNDRangeKernel} to execute -kernel A followed by a {clEnqueueNDRangeKernel} to execute kernel B, the -application can assume that kernel A finishes first and then kernel B is -executed. -If the memory objects output by kernel A are inputs to kernel B then kernel -B will see the correct data in memory objects produced by execution of -kernel A. -If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is -set, then there is no guarantee that kernel A will finish before kernel B -starts execution. + {CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR_anchor} is returned when + _command_buffer_ has been finalized and there is not a <> instance of _command_buffer_ awaiting completion on a + command_queue. -Applications can configure the commands enqueued to a command-queue to -execute out-of-order by setting the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} -property of the command-queue. -This can be specified when the command-queue is created. -In out-of-order execution mode there is no guarantee that the enqueued -commands will finish execution in the order they were queued. -As there is no guarantee that kernels will be executed in-order, i.e. based -on when the {clEnqueueNDRangeKernel} or {clEnqueueTask} calls are made within a -command-queue, it is therefore possible that an earlier -{clEnqueueNDRangeKernel} call to execute kernel A identified by event A may -execute and/or finish later than a {clEnqueueNDRangeKernel} call to execute -kernel B which was called by the application at a later point in time. -To guarantee a specific order of execution of kernels, a wait on a -particular event (in this case event A) can be used. -The wait for event A can be specified in the _event_wait_list_ argument to -{clEnqueueNDRangeKernel} for kernel B. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR.asciidoc[] -In addition, a marker ({clEnqueueMarker} or {clEnqueueMarkerWithWaitList}) or a -barrier ({clEnqueueBarrier} or {clEnqueueBarrierWithWaitList}) command can be -enqueued to the command-queue. -The marker command ensures that previously enqueued commands identified by -the list of events to wait for (or all previous commands) have finished. -A barrier command is similar to a marker command, but additionally -guarantees that no later-enqueued commands will execute until the waited-for -commands have executed. + {CL_COMMAND_BUFFER_STATE_PENDING_KHR_anchor} is returned when an + instance of _command_buffer_ has been enqueued for execution but not + yet completed. -Similarly, commands to read, write, copy or map memory objects that are -enqueued after {clEnqueueNDRangeKernel}, {clEnqueueTask} or -{clEnqueueNativeKernel} commands are not guaranteed to wait for kernels -scheduled for execution to have completed (if the -{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property is set). -To ensure correct ordering of commands, the event object returned by -{clEnqueueNDRangeKernel}, {clEnqueueTask} or {clEnqueueNativeKernel} can be -used to enqueue a wait for event or a barrier command can be enqueued that must -complete before reads or writes to the memory object(s) occur. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_PENDING_KHR.asciidoc[] +| {CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR_anchor} -[[profiling-operations]] -== Profiling Operations on Memory Objects and Kernels +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR.asciidoc[] + | {cl_command_buffer_properties_khr_TYPE}[] + | Return the _properties_ argument specified in + {clCreateCommandBufferKHR}. -This section describes the profiling of OpenCL functions that are enqueued -as commands to a command-queue. Profiling of OpenCL commands can be enabled -by using a command-queue created with the {CL_QUEUE_PROFILING_ENABLE} -flag set in the {CL_QUEUE_PROPERTIES} bitfield in the _properties_ argument to -{clCreateCommandQueueWithProperties}, or in the _properties_ argument to -{clCreateCommandQueue}. -When profiling is enabled, the event objects that are created from -enqueuing a command store a timestamp for each of their state transitions. + If the _properties_ argument specified in {clCreateCommandBufferKHR} + used to create _command_buffer_ was not `NULL`, the implementation + must return the values specified in the properties argument. -[open,refpage='clGetEventProfilingInfo',desc='Returns profiling information for the command associated with event if profiling is enabled.',type='protos'] + If the _properties_ argument specified in {clCreateCommandBufferKHR} + used to create _command_buffer_ was `NULL`, the implementation may + return either a _param_value_size_ret_ of 0 (i.e. there is are no + properties to be returned), or the implementation may return a + property value of 0 (where 0 is used to terminate the properties + list). + +| {CL_COMMAND_BUFFER_CONTEXT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CONTEXT_KHR.asciidoc[] + | {cl_context_TYPE} + | Return the context associated with _command_buffer_. + +|==== + +// refError + +{clGetCommandBufferInfoKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or + if size in bytes specified by _param_value_size_ is less than size of + return type and _param_value_ is not a `NULL` value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -To return profiling information for a command associated with an event when -profiling is enabled, call the function -include::{generated}/api/protos/clGetEventProfilingInfo.txt[] -include::{generated}/api/version-notes/clGetEventProfilingInfo.asciidoc[] +ifdef::cl_khr_command_buffer_mutable_dispatch[] +[open,refpage='clGetMutableCommandInfoKHR',desc='Query information about a mutable command object',type='protos'] +-- +To query information about a mutable command object, call the function - * _event_ specifies the event object. - * _param_name_ specifies the profiling data to query. +include::{generated}/api/protos/clGetMutableCommandInfoKHR.txt[] +include::{generated}/api/version-notes/clGetMutableCommandInfoKHR.asciidoc[] + + * _command_ specifies the mutable-command object being queried. + * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetEventProfilingInfo} is described in the - <> table. + _param_value_ by {clGetMutableCommandInfoKHR} is described in the + <> + table. + * _param_value_size_ is used to specify the size in bytes of memory + pointed to by _param_value_. + This size must be {geq} size of return type as described in the + <> + table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[event-profiling-info-table]] -.List of supported param_names by {clGetEventProfilingInfo} +[[mutable-command-object-queries]] +._Mutable Command Object Queries_ [width="100%",cols="<33%,<17%,<50%",options="header"] |==== -| Event Profiling Info | Return Type | Description -| {CL_PROFILING_COMMAND_QUEUED_anchor} - -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_QUEUED.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event is enqueued in a - command-queue by the host. -| {CL_PROFILING_COMMAND_SUBMIT_anchor} - -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_SUBMIT.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event that has been - enqueued is submitted by the host to the device associated with the - command-queue. -| {CL_PROFILING_COMMAND_START_anchor} +| Mutable Command Info | Return Type | Description +| {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR_anchor} -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_START.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event starts execution on - the device. -| {CL_PROFILING_COMMAND_END_anchor} +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR.asciidoc[] + | {cl_command_queue_TYPE} + | Return the command-queue associated with _command_. + If `NULL` was passed as the queue when _command_ was recorded, then + the queue associated with the command-buffer that _command_ belongs to + is returned. +| {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR.asciidoc[] + | {cl_command_buffer_khr_TYPE} + | Return the command-buffer associated with _command_. +| {CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR.asciidoc[] + | {cl_command_type_TYPE} + | Return the command-type associated with _command_. + + The list of supported event command types defined by {clGetEventInfo} + is used with the matching command. +| {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR.asciidoc[] + | {cl_ndrange_kernel_command_properties_khr_TYPE}[] + | Return the properties argument specified on _command_ recording with + {clCommandNDRangeKernelKHR}. + + If the properties argument specified on creation of _command_ was not + `NULL`, the implementation must return the values specified in the + properties argument in the same order and without including additional + properties. + + If the properties argument specified on creation of _command_ was + `NULL`, or _command_ was not recorded from a + {clCommandNDRangeKernelKHR} command, the implementation must return + _param_value_size_ret_ equal to 0, indicating that there are no + properties to be returned. +| {CL_MUTABLE_DISPATCH_KERNEL_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_KERNEL_KHR.asciidoc[] + | {cl_kernel_TYPE} + | Return the kernel associated with _command_ when recorded with + {clCommandNDRangeKernelKHR}. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_DIMENSIONS_KHR.asciidoc[] + | {cl_uint_TYPE} + | Return the number of work-item dimensions specified when _command_ was + created. -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_END.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event has finished - execution on the device. -| {CL_PROFILING_COMMAND_COMPLETE_anchor} + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR_anchor} -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_COMPLETE.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event and any child - commands enqueued by this command on the device have finished - execution. +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR.asciidoc[] + | {size_t_TYPE}[] + | Return the global work-item offset set on _command_ creation, or from + the most recent update via {clUpdateMutableCommandsKHR} where this + value was modified. + The output array contains _work_dim_ values, where _work_dim_ is + returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. + If a global work-item offset was not set, zero is returned for each + element in the array. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR.asciidoc[] + | {size_t_TYPE}[] + | Return the global work-item size set on _command_ creation, or from + the most recent update via {clUpdateMutableCommandsKHR} where this + value was modified. + The output array contains _work_dim_ values, where _work_dim_ is + returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. + If a global work-item size was not set, zero is returned for each + element in the array. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR.asciidoc[] + | {size_t_TYPE}[] + | Return the local work-item size set on _command_ creation, or from the + most recent update via {clUpdateMutableCommandsKHR} where this value + was modified. + The output array contains _work_dim_ values, where _work_dim_ is + returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. + If a local work-item size was not set, zero is returned for each + element in the array. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. |==== -The unsigned 64-bit values returned can be used to measure the time in -nano-seconds consumed by OpenCL commands. - -OpenCL devices are required to correctly track time across changes in device -frequency and power states. -The {CL_DEVICE_PROFILING_TIMER_RESOLUTION} specifies the resolution of the -timer i.e. the number of nanoseconds elapsed before the timer is -incremented. - // refError -{clGetEventProfilingInfo} returns {CL_SUCCESS} if the function is executed -successfully and the profiling information has been recorded. +{clGetMutableCommandInfoKHR} returns {CL_SUCCESS} if the function is +executed successfully. Otherwise, it returns one of the following errors: - * {CL_PROFILING_INFO_NOT_AVAILABLE} if the {CL_QUEUE_PROFILING_ENABLE} flag is - not set for the command-queue, if the execution status of the command - identified by _event_ is not {CL_COMPLETE} or if _event_ is a user event - object. - Prior to OpenCL 3.0, implementations may return - {CL_PROFILING_INFO_NOT_AVAILABLE} for an event created by - {clEnqueueSvmFree}. * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_EVENT} if _event_ is a not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + the <> + table and _param_value_ is not `NULL`. + * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable + command object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_command_buffer_mutable_dispatch[] +endif::cl_khr_command_buffer[] -== Flush and Finish - -[open,refpage='clFlush',desc='Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.',type='protos'] --- -To flush commands to a device, call the function - -include::{generated}/api/protos/clFlush.txt[] -include::{generated}/api/version-notes/clFlush.asciidoc[] - - * _command_queue_ is the command-queue to flush. - -All previously queued OpenCL commands in _command_queue_ are issued to the -device associated with _command_queue_. -{clFlush} only guarantees that all queued commands to _command_queue_ will -eventually be submitted to the appropriate device. -There is no guarantee that they will be complete after {clFlush} returns. - -Any blocking commands queued in a command-queue and {clReleaseCommandQueue} -perform an implicit flush of the command-queue. -These blocking commands are {clEnqueueReadBuffer}, -{clEnqueueReadBufferRect}, {clEnqueueReadImage}, with _blocking_read_ set to -{CL_TRUE}; {clEnqueueWriteBuffer}, {clEnqueueWriteBufferRect}, -{clEnqueueWriteImage} with _blocking_write_ set to {CL_TRUE}; -{clEnqueueMapBuffer}, {clEnqueueMapImage} with _blocking_map_ set to -{CL_TRUE}; {clEnqueueSVMMemcpy} with _blocking_copy_ set to {CL_TRUE}; -{clEnqueueSVMMap} with _blocking_map_ set to {CL_TRUE} or {clWaitForEvents}. - -To use event objects that refer to commands enqueued in a command-queue as -event objects to wait on by commands enqueued in a different command-queue, -the application must call a {clFlush} or any blocking commands that perform -an implicit flush of the command-queue where the commands that refer to -these event objects are enqueued. -// refError +ifdef::cl_khr_gl_sharing[] +[[querying-devices-that-support-sharing-with-opengl]] +== Querying Devices That Support Sharing With OpenGL -{clFlush} returns {CL_SUCCESS} if the function call was executed successfully. -Otherwise, it returns one of the following errors: +OpenCL device(s) corresponding to an OpenGL context may be queried. +Such a device may not always exist (for example, if an OpenGL context is +specified on a GPU not supporting OpenCL command-queues, but which does +support shared OpenCL/OpenGL memory objects), and if it does exist, may +change over time. +When such a device does exist, acquiring and releasing shared OpenCL/OpenGL +memory objects may be faster on a command-queue corresponding to this device +than on command-queues corresponding to other devices available to an OpenCL +context. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +[open,refpage='clGetGLContextInfoKHR',desc='Query OpenCL device corresponding to an OpenGL context',type='protos'] -- +To query the OpenCL device corresponding to an OpenGL context, call the +function -[open,refpage='clFinish',desc='Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.',type='protos'] --- -To wait for completion of commands on a device, call the function +include::{generated}/api/protos/clGetGLContextInfoKHR.txt[] -include::{generated}/api/protos/clFinish.txt[] -include::{generated}/api/version-notes/clFinish.asciidoc[] + * _properties_ points to an property list whose format and valid contents + are identical to the _properties_ argument of {clCreateContext}. + _properties_ must identify a single valid GL context or GL share group + object. + * _param_name_ is a constant that specifies the device types to query, and + must be one of the values shown in the <> table below. + * _param_value_ is a pointer to memory where the result of the query is + returned, as described in the <> table. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + described in the table below. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. - * _command_queue_ is the command-queue to wait for. +[[gl-context-info-table]] +.Supported Device Types for {clGetGLContextInfoKHR} +[cols="2,1,2",options="header",] +|==== +| param_name | Return Type | Information returned in param_value +| {CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR_anchor} -All previously queued OpenCL commands in _command_queue_ are issued to the -associated device, and the function blocks until all previously queued -commands have completed. -{clFinish} does not return until all previously queued commands in -_command_queue_ have been processed and completed. -{clFinish} is also a synchronization point. +include::{generated}/api/version-notes/CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR.asciidoc[] + | {cl_device_id_TYPE} + | Return the OpenCL device currently associated with the specified + OpenGL context. +| {CL_DEVICES_FOR_GL_CONTEXT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICES_FOR_GL_CONTEXT_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Return all OpenCL devices which may be associated with the specified + OpenGL context. +|==== // refError -{clFinish} returns {CL_SUCCESS} if the function call was executed +{clGetGLContextInfoKHR} returns {CL_SUCCESS} if the function is executed successfully. +If no device(s) exist corresponding to _param_name_, the call will not fail, +but the value of _param_value_size_ret_ will be zero. Otherwise, it returns one of the following errors: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for + an OpenGL or OpenGL ES implementation using the EGL, GLX, or WGL binding + APIs, as <>; and + any of the following conditions hold: + ** The specified display and context properties do not identify a valid + OpenGL or OpenGL ES context. + ** The specified context does not support buffer and renderbuffer objects. + ** The specified context is not compatible with the OpenCL context being + created (for example, it exists in a physically distinct address space, + such as another hardware device; or it does not support sharing data + with OpenCL due to implementation restrictions). + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a share group was specified + for a CGL-based OpenGL implementation by setting the property + {CL_CGL_SHAREGROUP_KHR}, and the specified share group does not identify + a valid CGL share group object. + * {CL_INVALID_OPERATION} if a context was specified as described above and + any of the following conditions hold: + ** A context or share group object was specified for one of CGL, EGL, GLX, + or WGL and the OpenGL implementation does not support that + window-system binding API. + ** More than one of the properties {CL_CGL_SHAREGROUP_KHR}, + {CL_EGL_DISPLAY_KHR}, {CL_GLX_DISPLAY_KHR}, and {CL_WGL_HDC_KHR} is set + to a non-default value. + ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} + are set to non-default values. + ** Any of the devices specified in the argument cannot support + OpenCL objects which share the data store of an OpenGL object. + * {CL_INVALID_VALUE} if an property name other than those specified in + _table 4.5_ is specified in _properties_. + * {CL_INVALID_VALUE} if _param_name_ is not one of the values listed in + the <> table, or if the size in bytes + specified by _param_value_size_ is less than the size of the return type + shown in the table and _param_value_ is not a `NULL` value + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_gl_sharing[] diff --git a/api/provisional_notice.asciidoc b/api/provisional_notice.asciidoc new file mode 100644 index 000000000..7f0720c55 --- /dev/null +++ b/api/provisional_notice.asciidoc @@ -0,0 +1,11 @@ +// Copyright 2023-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +[NOTE] +==== +This is a provisional OpenCL extension specification that has been Ratified under the Khronos Intellectual Property Framework. +It is being made publicly available as a provisional extension to enable review and feedback from the community. +While it is a provisional extension features may be added, removed, or changed in non-backward compatible ways. + +If you have feedback please create an issue on: https://github.com/KhronosGroup/OpenCL-Docs/ +==== diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index ce7767038..4943b36b5 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -129,3 +129,19 @@ endif::[] ifndef::backend-html5[] :opencl_c_work_group_collective_functions: pass:q[`\__opencl_c_​work_​group_​collective_​functions`] endif::[] + +// opencl_c_integer_dot_product_input_4x8bit +ifdef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit`] +endif::[] +ifndef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit`] +endif::[] + +// opencl_c_integer_dot_product_input_4x8bit_packed +ifdef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit_packed`] +endif::[] +ifndef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit_​packed`] +endif::[] diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index 6047fd0a2..4045e8e60 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -177,7 +177,10 @@ An OpenCL C 3.0 compiler must also define the `+__opencl_c_int64+` feature macro ] :fn-mad-caution: pass:n[ \ -The user is cautioned that for some usages, e.g. *mad*(a, b, -a*b), the definition of *mad*() is loose enough in the embedded profile that almost any result is allowed from *mad*() for some values of a and b. \ +The user is cautioned that for some usages, e.g. *mad*(a, b, -a*b), the \ +definition of *mad*() is loose enough in the embedded profile \ +or with half-precision arguments \ +that almost any result is allowed from *mad*() for some values of a and b. \ ] :fn-memory-scope-restrictions: pass:n[ \ diff --git a/config/opencl.asciidoc b/config/opencl.asciidoc index db190b495..62f166b4f 100644 --- a/config/opencl.asciidoc +++ b/config/opencl.asciidoc @@ -6,6 +6,7 @@ :khronos-opencl-repo: https://github.com/KhronosGroup/OpenCL-Docs :khronos-opencl-pr: {khronos-opencl-repo}/pull +:OpenCLCSpecURL: OpenCL_C.html :blank: pass:[ +] :pp: ++ diff --git a/config/rouge_opencl.rb b/config/rouge_opencl.rb index e7c26d47b..d30913eb0 100644 --- a/config/rouge_opencl.rb +++ b/config/rouge_opencl.rb @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true -# Copyright (c) 2011-2024 The Khronos Group, Inc. +# Copyright 2011-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 #puts "Loading rouge_opencl extensions for source code highlighting..." diff --git a/ext/cl_khr_3d_image_writes.asciidoc b/ext/cl_khr_3d_image_writes.asciidoc deleted file mode 100644 index 2ebfa10e4..000000000 --- a/ext/cl_khr_3d_image_writes.asciidoc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_3d_image_writes]] -== Writing to 3D Image Objects - -This section describes the *cl_khr_3d_image_writes* extension. - -This extension adds built-in functions that allow a kernel to write to 3D image objects in addition to 2D image objects. - -This extension became a core feature in OpenCL 2.0. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -The new built-in functions are described in the table below: - -// Editors note: There are no access qualifiers on these built-in -// functions, because read-write images did not exist pre-OpenCL 2.0. - -._3D Image Built-in Image Write Functions_ -[cols=",",options="header",] -|======================================================================= -|*Function* -|*Description* - -|void *write_imagef* ( + -image3d_t _image_, + -int4 _coord_, + -float4 _color_) + -{blank} -void *write_imagei* ( + -image3d_t _image_, + -int4 _coord_, + -int4 _color_) + -{blank} -void *write_imageui* ( + -image3d_t _image_, + -int4 _coord_, + -uint4 _color_) -|Write _color_ value to the location specified by coordinate (_x_, _y_, _z_) in the 3D image specified by _image_. -The appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_, _coord.y_, and _coord.z_ are considered to be unnormalized coordinates and must be in the range 0 ... image width - 1, 0 ... image height - 1, and 0 ... image depth - 1. + -{blank} -*write_imagef* can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16}, {CL_HALF_FLOAT}, or {CL_FLOAT}. Appropriate data format conversion will be done to convert the channel data from a floating-point value to the actual data format in which the channels are stored. + -{blank} -*write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: + -{CL_SIGNED_INT8}, + -{CL_SIGNED_INT16}, or + -{CL_SIGNED_INT32}. + -{blank} -*write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: + -{CL_UNSIGNED_INT8}, + -{CL_UNSIGNED_INT16}, or + -{CL_UNSIGNED_INT32}. + -{blank} -The behavior of *write_imagef*, *write_imagei*, and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above, or with (_x_, _y_, _z_) coordinate values that are not in the range (0 ... image width - 1, 0 ... image height - 1, 0 ... image depth - 1) respectively, is undefined. - -|======================================================================= diff --git a/ext/cl_khr_async_work_group_copy_fence.asciidoc b/ext/cl_khr_async_work_group_copy_fence.asciidoc deleted file mode 100644 index 5a2656c65..000000000 --- a/ext/cl_khr_async_work_group_copy_fence.asciidoc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_async_work_group_copy_fence]] -== Async Work-group Copy Fence - -This section describes the *cl_khr_async_work_group_copy_fence* extension. -The extension adds a new built-in function to OpenCL C to establish a memory synchronization ordering of asynchronous copies. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 0.9.0 | First assigned version (provisional). -| 2021-11-10 | 1.0.0 | First non-provisional version. -|==== - -[[cl_khr_async_work_group_copy_fence-additions-to-chapter-6-of-the-opencl-specification]] -=== Additions to Chapter 6 of the OpenCL C Specification - -The following new built-in function is added to the _Async Copies from Global to -Local Memory, Local to Global Memory, and Prefetch_ functions described in _section 6.12.10_ -and _section 6.13.10_ of the OpenCL 1.2 and OpenCL 2.0 C specifications: - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -void async_work_group_copy_fence( - cl_mem_fence_flags flags) ----- -| Orders async copies produced by the work-items of a work-group executing -a kernel. Async copies preceding the *async_work_group_copy_fence* must -complete their access to the designated memory or memories, -including both reads-from and writes-to it, before async copies -following the fence are allowed to start accessing these memories. -In other words, every async copy preceding the *async_work_group_copy_fence* -must happen-before every async copy following the fence, with respect to -the designated memory or memories. - -The _flags_ argument specifies the memory address space and can be set to a -combination of the following literal values: - -`CLK_LOCAL_MEM_FENCE` + -`CLK_GLOBAL_MEM_FENCE` - -The async fence is performed by all work-items in a work-group and this -built-in function must therefore be encountered by all work-items in a -work-group executing the kernel with the same argument values; -otherwise the results are undefined. This rule applies to ND-ranges -implemented with uniform and non-uniform work-groups. -|======================================================================= diff --git a/ext/cl_khr_byte_addressable_store.asciidoc b/ext/cl_khr_byte_addressable_store.asciidoc deleted file mode 100644 index 0386a9818..000000000 --- a/ext/cl_khr_byte_addressable_store.asciidoc +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_byte_addressable_store]] -== Byte Addressable Stores - -This section describes the *cl_khr_byte_addressable_store* extension. -This extension relaxes restrictions on pointers to `char`, `uchar`, `char2`, `uchar2`, `short`, `ushort` and `half` that were present in _Section 6.8m: Restrictions_ of the OpenCL 1.0 specification. -With this extension, applications are able to read from and write to pointers to these types. - -This extension became a core feature in OpenCL 1.1. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== diff --git a/ext/cl_khr_command_buffer.asciidoc b/ext/cl_khr_command_buffer.asciidoc deleted file mode 100644 index 2c543eea6..000000000 --- a/ext/cl_khr_command_buffer.asciidoc +++ /dev/null @@ -1,2020 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_command_buffer]] -== Command Buffers (Provisional) - -This extension adds the ability to record and replay buffers of OpenCL commands. - -=== General Information - -==== Name Strings - -`cl_khr_command_buffer` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-11-10 | 0.9.0 | First assigned version (provisional). -| 2022-08-24 | 0.9.1 | Specify an error if a command-buffer is finalized multiple times (provisional). -| 2023-03-31 | 0.9.2 | Introduce context query {CL_COMMAND_BUFFER_CONTEXT_KHR} (provisional). -| 2023-04-04 | 0.9.3 | Remove Invalid command-buffer state (provisional). -| 2023-05-11 | 0.9.4 | Add clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR command entries (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification version 3.0.6. - -This extension requires OpenCL 1.2 or later. Buffering of SVM commands -requires OpenCL 2.0 or later. - -==== Contributors - -Ewan Crawford, Codeplay Software Ltd. + -Gordon Brown, Codeplay Software Ltd. + -Kenneth Benzie, Codeplay Software Ltd. + -Alastair Murray, Codeplay Software Ltd. + -Jack Frankland, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm Technologies Inc. + -Joshua Kelly, Qualcomm Technologies, Inc. + -Kevin Petit, Arm Ltd. + -Aharon Abramson, Intel. + -Ben Ashbaugh, Intel. + -Boaz Ouriel, Intel. + -Chris Gearing, Intel. + -Pekka Jääskeläinen, Tampere University and Intel. + -Jan Solanti, Tampere University + -Nikhil Joshi, NVIDIA + -James Price, Google + -Brice Videau, Argonne National Laboratory + - -=== Overview - -Command-buffers enable a reduction in overhead when enqueuing the same -workload multiple times. By separating the command-queue setup from dispatch, -the ability to replay a set of previously created commands is introduced. - -Device-side _cl_sync_point_khr_ synchronization-points can be used within -command-buffers to define command dependencies. This allows the commands of a -command-buffer to execute out-of-order on a single <> -command-queue. The command-buffer itself has no inherent in-order/out-of-order -property, this ordering is inferred from the command-queue used on command -recording. Out-of-order enqueues without event dependencies of both regular -commands, such as {clEnqueueFillBuffer}, and command-buffers are allowed to -execute concurrently, and it is up to the user to express any dependencies using -events. - -The command-queues a command-buffer will be executed on can be set on replay via -parameters to {clEnqueueCommandBufferKHR}, provided they are -<> with the command-queues used on command-buffer -recording. - -==== Background - -On embedded devices where building a command stream accounts for a significant -expenditure of resources and where workloads are often required to be pipelined, -a solution that minimizes driver overhead can significantly improve the -utilization of accelerators by removing a bottleneck in repeated command stream -generation. - -An additional motivator is lowering task execution latency, as devices can be -kept occupied with work by repeated submissions, without having to wait on -the host to construct commands again for a similar workload. - -==== Rationale - -The command-buffer abstraction over the generation of command streams is a -proven approach which facilitates a significant reduction in driver overhead in -existing real-world applications with repetitive pipelined workloads which are -built on top of Vulkan, DirectX 12, and Metal. - -A primary goal is for a command-buffer to avoid any interaction with -application code after being enqueued until all recorded commands have -completed. As such, any command which maps or migrates memory objects; reads -or writes memory objects; or enqueues a native kernel, is not available for -command-buffer recording. Finally commands recorded into a command buffer do -not wait for or return event objects, these are instead replaced with -device-side synchronization-point identifiers which enable out-of-order -execution when enqueued on <> command-queues. - -Adding new entry-points for individual commands, rather than recording existing -command-queue APIs with begin/end markers was a design decision made for the -following reasons: - -* Individually specified entry points makes it clearer to the user what's - supported, as opposed to adding a large number of error conditions - throughout the specification with all the restrictions. - -* Prevents code forking in existing entry points for the implementer, as - otherwise separate paths in each entry point need to be maintained for both - the recording and normal cases. - -* Allows the definition of a new device-side synchronization primitive rather - than overloading {cl_event_TYPE}. As use of {cl_event_TYPE} in individual commands - allows host interaction from callback and user-events, as well as introducing - complexities when a command-buffer is enqueued multiple times regarding - profiling and execution status. - -* New entry points facilitate returning handles to individual commands, allowing - those commands to be modified between enqueues of the command buffer. Not all - command handles are used in this extension, but providing them facilitates - other extensions layered on top to take advantage of them to provide additional - mutable functionality. - -==== Simultaneous Use - -The optional simultaneous use capability was added to the extension so that -vendors can support pipelined workflows, where command-buffers are repeatedly -enqueued without blocking in user code. However, simultaneous use may result in -command-buffers being more expensive to enqueue than in a sequential model, so -the capability is optional to enable optimizations on command-buffer recording. - -=== Interactions with Other Extensions - -The introduction of the command-buffer abstraction enables functionality -beyond what the `cl_khr_command_buffer` extension currently provides, i.e. -the recording of immutable commands to a single queue which can then be -executed without commands synchronizing outside the command-buffer. It is -intended that extra functionality expanding on this will be provided as layered -extensions on top of `cl_khr_command_buffer`. - -Having `cl_khr_command_buffer` as a minimal base specification means that the -API defines mechanisms for functionality that is not enabled by this extension, -these are described in the following sub-sections. `cl_khr_command_buffer` will -retain its provisional extension status until other layered extensions are -released, as these may reveal modifications needed to the base specification to -support their intended use cases. - -==== ND-range Kernel Command Properties - -The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of -new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined -in `cl_khr_command_buffer`, but the parameter is intended to enable future -functionality that would change the characteristics of the kernel command. - -==== Command Handles - -All command recording entry-points define a {cl_mutable_command_khr_TYPE} output -parameter which provides a handle to the specific command being recorded. Use of -these output handles is not enabled by the `cl_khr_command_buffer` extension, -but the handles will allow individual commands in a command-buffer to be -referenced by the user. In particular, the capability for an application to use -these handles to modify commands between enqueues of a command-buffer is -envisaged. - -==== List of Queues - -Only a single command-queue can be associated with a command-buffer in the -`cl_khr_command_buffer` extension, but the API is designed with the intention -that a future extension will allow commands to be recorded across multiple -queues in the same command-buffer, providing replay of heterogeneous task -graphs. - -Using multiple queue functionality will result in an error without any layered -extensions to relax usage of the following API features: - -* When a command-buffer is created the API enables passing a list of queues - that the command-buffer will record commands to. Only a single queue is - permitted in `cl_khr_command_buffer`. - -* Individual command recording entry-points define a {cl_command_queue_TYPE} - parameter for which of the queues set on command-buffer creation that command - should be record to. This must be passed as NULL in `cl_khr_command_buffer`. - -* {clEnqueueCommandBufferKHR} takes a list of queues for command-buffer execution, - correspond to those set on creation. Only a single queue is permitted in - `cl_khr_command_buffer`. - -=== New Types - -==== Command Buffer Types - -Bitfield for querying command-buffer capabilities of an OpenCL device with -{clGetDeviceInfo}, see <>: -[source] ----- -typedef cl_bitfield cl_device_command_buffer_capabilities_khr ----- - -Types describing <>: - -[source] ----- -// Returned by clCreateCommandBufferKHR() -typedef struct _cl_command_buffer_khr* cl_command_buffer_khr; - -// Unique ID to a device-side synchronization-point used to describe the -// ordering of commands when recording a command-buffer. Valid for use -// only within the same command-buffer during recording. -typedef cl_uint cl_sync_point_khr; - -// Handle returned on command recording -typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; - -// Properties of a clCommandNDRangeKernelKHR command -typedef cl_properties cl_ndrange_kernel_command_properties_khr; - -// Properties for command-buffer creation -typedef cl_properties cl_command_buffer_properties_khr; - -// Bitfield representing flags for command-buffers -typedef cl_bitfield cl_command_buffer_flags_khr; - -// Enumerated type for use in clGetCommandBufferInfoKHR() -typedef cl_uint cl_command_buffer_info_khr; - -// Return type for CL_COMMAND_BUFFER_STATE_KHR in clGetCommandBufferInfoKHR() -typedef cl_uint cl_command_buffer_state_khr; ----- - -=== New API Functions - -Command-buffer entry points from <>: -[source] ----- -cl_command_buffer_khr clCreateCommandBufferKHR( - cl_uint num_queues, - const cl_command_queue* queues, - const cl_command_buffer_properties_khr* properties, - cl_int* errcode_ret); - -cl_int clRetainCommandBufferKHR(cl_command_buffer_khr command_buffer); - -cl_int clReleaseCommandBufferKHR(cl_command_buffer_khr command_buffer); - -cl_int clFinalizeCommandBufferKHR(cl_command_buffer_khr command_buffer); - -cl_int clEnqueueCommandBufferKHR( - cl_uint num_queues, - cl_command_queue* queues, - cl_command_buffer_khr command_buffer, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -cl_int clCommandBarrierWithWaitListKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyBufferKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - size_t src_offset, - size_t dst_offset, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyBufferRectKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - const size_t* src_origin, - const size_t* dst_origin, - const size_t* region, - size_t src_row_pitch, - size_t src_slice_pitch, - size_t dst_row_pitch, - size_t dst_slice_pitch, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyBufferToImageKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_image, - size_t src_offset, - const size_t* dst_origin, - const size_t* region, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyImageKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_image, - const size_t* src_origin, - const size_t* dst_origin, - const size_t* region, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyImageToBufferKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_buffer, - const size_t* src_origin, - const size_t* region, - size_t dst_offset, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandFillBufferKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem buffer, - const void* pattern, - size_t pattern_size, - size_t offset, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandFillImageKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem image, - const void* fill_color, - const size_t* origin, - const size_t* region, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandNDRangeKernelKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - const cl_ndrange_kernel_command_properties_khr* properties, - cl_kernel kernel, - cl_uint work_dim, - const size_t* global_work_offset, - const size_t* global_work_size, - const size_t* local_work_size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clGetCommandBufferInfoKHR( - cl_command_buffer_khr command_buffer, - cl_command_buffer_info_khr param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); ----- - -The following SVM entry points are supported only with at least OpenCL 2.0 and -starting from 0.9.4 of this extension: - -[source] ----- - -cl_int clCommandSVMMemcpyKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandSVMMemFillKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - void* svm_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); ----- - -=== New API Enums - -Enums for querying device command-buffer capabilities with -{clGetDeviceInfo}, see <>: - -[source] ----- -// Accepted values for the param_name parameter to clGetDeviceInfo -CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9 -CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA - -// Bits for cl_device_command_buffer_capabilities_khr bitfield -CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR (0x1 << 0) -CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR (0x1 << 1) -CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR (0x1 << 2) -CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR (0x1 << 3) - -// Values for cl_command_buffer_state_khr -CL_COMMAND_BUFFER_STATE_RECORDING_KHR 0x0 -CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR 0x1 -CL_COMMAND_BUFFER_STATE_PENDING_KHR 0x2 ----- - -Enums for base <> functionality: - -[source] ----- -// Error codes -CL_INVALID_COMMAND_BUFFER_KHR -1138 -CL_INVALID_SYNC_POINT_WAIT_LIST_KHR -1139 -CL_INCOMPATIBLE_COMMAND_QUEUE_KHR -1140 - -// Bitfield to clCreateCommandBufferKHR -CL_COMMAND_BUFFER_FLAGS_KHR 0x1293 - -// Bits for cl_command_buffer_flags_khr bitfield -CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR (0x1 << 0) - -// cl_command_buffer_info_khr queries to clGetCommandBufferInfoKHR -CL_COMMAND_BUFFER_QUEUES_KHR 0x1294 -CL_COMMAND_BUFFER_NUM_QUEUES_KHR 0x1295 -CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR 0x1296 -CL_COMMAND_BUFFER_STATE_KHR 0x1297 -CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR 0x1298 -CL_COMMAND_BUFFER_CONTEXT_KHR 0x1299 - -// cl_event command-buffer enqueue command type -CL_COMMAND_COMMAND_BUFFER_KHR 0x12A8 ----- - -=== Modifications to section 4.2 of the OpenCL API Specification - -Add to *Table 5*, _Device Queries_, of section 4.2: - -[[command-buffer-queries]] -[caption="Table 5. "] -.List of supported param_names by {clGetDeviceInfo} -[cols="1,1,4",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} -| {cl_device_command_buffer_capabilities_khr_TYPE} -| Describes device command-buffer capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} Device supports the ability - to record commands that execute kernels which contain printf calls. - - {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} Device supports the - ability to record commands that execute kernels which contain device-side - kernel-enqueue calls. - - {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR} Device supports the - command-buffers having a <> that exceeds 1. - - {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} Device supports the ability - to record command-buffers to out-of-order command-queues. - -| {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} -| {cl_command_queue_properties_TYPE} -| Bitmask of the minimum properties with which a command-queue must be created - to allow a command-buffer to be executed on it. It is valid for a - command-queue to be created with extra properties in addition to this - base requirement and still be compatible with command-buffer execution. -|==== - -[[command-buffers]] -=== Add new section "Section 5.X - Command Buffers" to OpenCL API Specification - -A _command-buffer_ object represents a series of operations to be enqueued -on one or more command-queues without any application code interaction. -Grouping the operations together allows efficient enqueuing of repetitive -operations, as well as enabling driver optimizations. - -Command-buffers are _sequential use_ by default, but may also be set to -_simultaneous use_ on creation if the device optionally supports this -capability. A sequential use command-buffer must have a <> of 0 or 1. The simultaneous use capability removes this -restriction and allows command-buffers to have a <> greater than 1. - -[[compatible]] -Command-buffers are created using an ordered list of command-queues that -commands are recorded to and execute on by default. These command-queues can be -replaced on command-buffer enqueue with different command-queues, provided for -each element in the replacement list the substitute command-queue is compatible -with the command-queue used on command-buffer creation. Where a _compatible_ -command-queue is defined as a command-queue with identical properties targeting -the same device and in the same OpenCL context. - -While constructing a command-buffer it is valid for the user to interleave calls -to the same queue which create commands, such as {clCommandNDRangeKernelKHR}, with -queue submission calls, such as {clEnqueueNDRangeKernel} or -{clEnqueueCommandBufferKHR}. That is, there is no effect on queue state from -recording commands. The purpose of the queue parameter is to define the device -and properties of the command, which are constant queries on the queue object. - -A command-buffer object should increment the reference count of attached OpenCL -objects such as queues, buffers, images, and kernels referenced in commands -recorded to the command-buffer. This enables correct behavior of the -command-buffer when its attached objects have been released. On destruction of -the command-buffer it should decrement these reference counts, allowing the -attached objects to be freed if appropriate. - -[[command-buffer-kernel-argument-ref-counting]] -[NOTE] -==== -A command-buffer object does not update the reference count of objects set as -arguments on kernels recorded into the command-buffer. This is consistent with -the reference counting behavior of {clSetKernelArg}. - -Applications should ensure that objects passed as arguments to kernels recorded -to a command-buffer are not deleted until the command-buffer has been released. -Undefined behavior may result from the failure to follow this usage requirement -for all the command-buffers an object is used as a kernel argument in. - -If using layered extension `cl_khr_command_buffer_mutable_dispatch`, -<>. -==== - -==== Add new section "Section 5.X.1 - Command Buffer Lifecycle" - -A command-buffer is always in one of the following states: - -[[recording]] -Recording:: Initial state of a command-buffer on creation, where commands can be -recorded to the command-buffer. - -[[executable]] -Executable:: State after command recording has finished with -{clFinalizeCommandBufferKHR} and the command-buffer may be enqueued. - -[[pending]] -Pending:: Once a command-buffer has been enqueued to a command-queue it enters -the Pending state until completion, at which point it moves back to the -<> state. - -//// -Image generated from the following mermaid diagram description using https://mermaid.live -Ideally we'd use the asciidoctor-diagram extension to generate the rendered diagram, but -there are issues installing the gem with ruby 2.3.3 - -[mermaid, "Lifecycle of a command-buffer", png] -.... -stateDiagram-v2 - [*] --> Recording: Create - Recording -->Executable: Finalize - Executable --> Pending: Enqueue - Pending --> Executable: Completion -.... -//// - -image::images/commandbuffer_lifecycle.png[align="center", title="Lifecycle of a command-buffer."] - -[[pending_count]] -The Pending Count is the number of copies of the command -buffer in the <> state. By default a command-buffer's Pending -Count must be 0 or 1. If the command-buffer was created with -{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} then the command-buffer may have a -Pending Count greater than 1. - -==== Add new section "Section 5.X.2 - Creating Command Buffer Objects" - -The function - -include::{generated}/api/protos/clCreateCommandBufferKHR.txt[] - -Is used to create a command-buffer that can record commands to the specified -queues. - -[NOTE] -==== -Upon creation the command-buffer is defined as being in the -<> state, in order for the command-buffer to be enqueued -it must first be finalized using {clFinalizeCommandBufferKHR} after which no -further commands can be recorded. A command-buffer is submitted for execution -on command-queues with a call to {clEnqueueCommandBufferKHR}. -==== - -_num_queues_ The number of command-queues listed in _queues_. This extension -only supports a single command-queue, so this **must** be one. - -_queues_ Is a pointer to a command-queue that the command-buffer commands will -be recorded to. _queues_ must be a non-`NULL` value. - -_properties_ Specifies a list of properties for the command-buffer and their -corresponding values. Each property name is immediately followed by the -corresponding desired value. The list is terminated with 0. -The list of supported properties is described in the table below. If a -supported property and its value is not specified in properties, its -default value will be used. _properties_ can be `NULL` in which case the -default values for supported command-buffer properties will be used. - -[[commandbuffer-properties]] -.{clCreateCommandBufferKHR} properties -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_FLAGS_KHR} -| {cl_command_buffer_flags_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} - Allow multiple instances of the - command-buffer to be submitted to the device for execution. If set, devices - must support {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR}. - - The default value of this property is `0`. -|==== - -_errcode_ret_ Returns an appropriate error code. If _errcode_ret_ is `NULL`, no -error code is returned. - -{clCreateCommandBufferKHR} returns a valid non-zero command-buffer and -_errcode_ret_ is set to {CL_SUCCESS} if the command-buffer is created -successfully. Otherwise, it returns a `NULL` value with one of the following -error values returned in _errcode_ret_: - -* {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a valid - command-queue. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any command-queue in _queues_ is an - out-of-order command-queue and the device associated with the command-queue - does not support the {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} - capability. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any command-queue in - _queues_ does not contain the minimum properties specified by - {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. - -* {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have the - same OpenCL context. - -* {CL_INVALID_VALUE} if _num_queues_ is not one. - -* {CL_INVALID_VALUE} if _queues_ is `NULL`. - -* {CL_INVALID_VALUE} if values specified in _properties_ are not valid, or if - the same property name is specified more than once. - -* {CL_INVALID_PROPERTY} if values specified in _properties_ are valid but are - not supported by all the devices associated with command-queues in _queues_. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clRetainCommandBufferKHR.txt[] - -Increments the _command_buffer_ reference count. - -_command_buffer_ Specifies the command-buffer to retain. - -{clRetainCommandBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clReleaseCommandBufferKHR.txt[] - -Decrements the _command_buffer_ reference count. - -[NOTE] -==== -After the _command_buffer_ reference count becomes zero and has finished -execution, the command-buffer is deleted. -==== - -_command_buffer_ Specifies the command-buffer to release. - -{clReleaseCommandBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -==== Add new section "Section 5.X.2 - Enqueuing a Command Buffer" - -The function - -include::{generated}/api/protos/clFinalizeCommandBufferKHR.txt[] - -Finalizes command recording ready for enqueuing the command-buffer on a -command-queue. - -[NOTE] -==== -{clFinalizeCommandBufferKHR} places the command-buffer in the -<> state where commands can no longer be recorded, at -this point the command-buffer is ready to be enqueued. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -{clFinalizeCommandBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ is not in the - <> state. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required - by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueCommandBufferKHR.txt[] - -Enqueues a command-buffer to execute on command-queues specified by _queues_, -or on default command-queues used during recording if _queues_ is empty. - -[NOTE] -==== -To enqueue a command-buffer it must be in a <> state, -see {clFinalizeCommandBufferKHR}. -==== - -_num_queues_ The number of command-queues listed in _queues_. - -_queues_ A pointer to an ordered list of command-queues -<> with the command-queues used on recording. _queues_ -can be `NULL` in which case the default command-queues used on command-buffer -creation are used and _num_queues_ must be 0. - -_command_buffer_ Refers to a valid command-buffer object. - -_event_wait_list_, _num_events_in_wait_list_ Specify events that need to -complete before this particular command can be executed. If -_event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. If _event_wait_list_ is `NULL`, -_num_events_in_wait_list_ must be 0. If event_wait_list is not `NULL`, -the list of events pointed to by _event_wait_list_ must be valid and -_num_events_in_wait_list_ must be greater than 0. The events specified -in _event_wait_list_ act as synchronization points. The context associated -with events in _event_wait_list_ and command_queue must be the same. The memory -associated with _event_wait_list_ can be reused or freed after the function -returns. - -_event_ Returns an event object that identifies this command and -can be used to query for profiling information or queue a wait for this -particular command to complete. _event_ can be `NULL` in which case it will not -be possible for the application to wait on this command or query it for -profiling information. - -{clEnqueueCommandBufferKHR} returns {CL_SUCCESS} if the command-buffer -execution was successfully queued, or one of the errors below: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. - -* {CL_INVALID_OPERATION} if _command_buffer_ was not created with the - {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag and is in the <> state. - -* {CL_INVALID_VALUE} if _queues_ is `NULL` and _num_queues_ is > 0, or _queues_ - is not `NULL` and _num_queues_ is 0. - -* {CL_INVALID_VALUE} if _num_queues_ is > 0 and not the same value as - _num_queues_ set on _command_buffer_ creation. - -* {CL_INVALID_COMMAND_QUEUE} if any element of _queues_ is not a valid - command-queue. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any element of _queues_ is not - <> with the command-queue set on _command_buffer_ - creation at the same list index. - -* {CL_INVALID_CONTEXT} if any element of _queues_ does not have the same - context as the command-queue set on _command_buffer_ creation at the same list - index. - -* {CL_INVALID_CONTEXT} if context associated with _command_buffer_ and - events in _event_wait_list_ are not the same. - -* {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution instance of - _command_buffer_ on the command-queues because of insufficient resources - needed to execute _command_buffer_. - -* {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` - and _num_events_in_wait_list_ is 0, or if event objects in - _event_wait_list_ are not valid events. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required - by the OpenCL implementation on the host. - -==== Add new section "Section 5.X.3 - Recording Commands to a Command Buffer" - -The function - -include::{generated}/api/protos/clCommandBarrierWithWaitListKHR.txt[] - -Records a barrier operation used as a synchronization point. - -[NOTE] -==== -{clCommandBarrierWithWaitListKHR} Waits for either a list of -synchronization-points to complete, or if the list is empty it waits for all -commands previously recorded in _command_buffer_ to complete before it -completes. This command blocks command execution, that is, any following -commands recorded after it do not execute until it completes. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -If _sync_point_wait_list_ is `NULL`, then this particular command -waits until all previous recorded commands to _command_queue_ have -completed. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this barrier command later on. _sync_point_ can be `NULL` in -which case it will not be possible for the application to record a wait -for this command to complete. If the _sync_point_wait_list_ and the -_sync_point_ arguments are not `NULL`, the _sync_point_ argument -should not refer to an element of the _sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandBarrierWithWaitListKHR} returns {CL_SUCCESS} if the function is -executed successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - _command_buffer_ is not the same. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clCommandCopyBufferKHR.txt[] - -Records a command to copy from one buffer object to another. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_buffer_, _dst_buffer_, _src_offset_, _dst_offset_, _size_ Refer to -{clEnqueueCopyBuffer}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyBuffer} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyBufferRectKHR.txt[] - -Records a command to copy a rectangular region from a buffer object to another -buffer object. - -[NOTE] -==== -{clCommandCopyBufferRectKHR} records a command to copy a 2D or 3D rectangular -region from the buffer object identified by _src_buffer_ to a 2D or 3D region -in the buffer object identified by _dst_buffer_. Copying begins at the source -offset and destination offset which are computed as described in the -description for _src_origin_ and _dst_origin_. - -Each byte of the region's width is copied from the source offset to the -destination offset. After copying each width, the source and destination -offsets are incremented by their respective source and destination row -pitches. After copying each 2D rectangle, the source and destination offsets -are incremented by their respective source and destination slice pitches. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_origin_, _dst_origin_, _region_, _src_row_pitch_, _src_slice_pitch_, -_dst_row_pitch_, _dst_slice_pitch_ Refer to {clEnqueueCopyBufferRect}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyBufferRectKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyBufferRect} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyBufferToImageKHR.txt[] - -Records a command to copy a buffer object to an image object. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_buffer_, _dst_image_, _src_offset_, _dst_origin_, _region_ Refer to -{clEnqueueCopyBufferToImage} - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyBufferToImageKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyBufferToImage} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_image_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyImageKHR.txt[] - -Records a command to copy image objects. - -[NOTE] -==== -It is currently a requirement that the _src_image_ and _dst_image_ image -memory objects for {clCommandCopyImageKHR} must have the exact same image -format, i.e. the {cl_image_format_TYPE} descriptor specified when _src_image_ and -_dst_image_ are created must match. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_image_, _dst_image_, _src_origin_, _dst_origin_, _region_ Refer to -{clEnqueueCopyImage}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyImageKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyImage} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_image_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyImageToBufferKHR.txt[] - -Records a command to copy an image object to a buffer object. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_image_, _dst_buffer_, _src_origin_, _region_, _dst_offset_ -Refer to {clEnqueueCopyImageToBuffer}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyImageToBufferKHR} returns {CL_SUCCESS} if the function is -executed successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyImageToBuffer} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandFillBufferKHR.txt[] - -Records a command to fill a buffer object with a pattern of a given pattern -size. - -[NOTE] -==== -The usage information which indicates whether the memory object can be read or -written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument -value specified when _buffer_ is created is ignored by -{clCommandFillBufferKHR}. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_buffer_, _pattern_, _pattern_size_, _offset_, _size_ Refer to -{clEnqueueFillBuffer}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandFillBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueFillBuffer} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandFillImageKHR.txt[] - -Records a command to fill an image object with a specified color. - -[NOTE] -==== -The usage information which indicates whether the memory object can be read or -written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument -value specified when image is created is ignored by {clCommandFillImageKHR}. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_image_, _fill_color_, _origin_, _region_ Refer to {clEnqueueFillImage}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandFillImageKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueFillImage} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _image_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandNDRangeKernelKHR.txt[] - -Records a command to execute a kernel on a device. - -[NOTE] -==== -The work-group size to be used for _kernel_ can also be specified in the -program source using the -`+__attribute__((reqd_work_group_size(X, Y, Z)))+` qualifier. In this case the -size of work-group specified by _local_work_size_ must match the value -specified by the `reqd_work_group_size` `+__attribute__+` qualifier. - -These work-group instances are executed in parallel across multiple compute -units or concurrently on the same compute unit. - -Each work-item is uniquely identified by a global identifier. The global ID, -which can be read inside the kernel, is computed using the value given by -_global_work_size_ and _global_work_offset_. In addition, a work-item is -also identified within a work-group by a unique local ID. The local ID, -which can also be read by the kernel, is computed using the value given by -_local_work_size_. The starting local ID is always (0, 0, ... 0). -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_properties_ Specifies a list of properties for the kernel command and their -corresponding values. Each property name is immediately followed by the -corresponding desired value. The list is terminated with 0. If no properties are -required, _properties_ may be `NULL`. This extension does not define any -properties. - -_kernel_ A valid kernel object which **must** have its arguments set. Any -changes to _kernel_ after calling {clCommandNDRangeKernelKHR}, such as with -{clSetKernelArg} or {clSetKernelExecInfo}, have no effect on the recorded -command. If _kernel_ is recorded to a following {clCommandNDRangeKernelKHR} -command however, then that command will capture the updated state of _kernel_. - -_work_dim_, _global_work_offset_, _global_work_size_, _local_work_size_ Refer -to {clEnqueueNDRangeKernel}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandNDRangeKernelKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueNDRangeKernel} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_VALUE} if values specified in _properties_ are not valid - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not - support {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} and _kernel_ contains - a printf call. - -* {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not - support {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} and _kernel_ - contains a kernel-enqueue call. - - -The function - -include::{generated}/api/protos/clCommandSVMMemcpyKHR.txt[] - -Records a command to do an SVM memcpy operation. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_dst_ptr_ is the pointer to a host (if the device supports system SVM) -or SVM memory allocation where data is copied to. - -_src_ptr_ is the pointer to a host (if the device supports system SVM) -or SVM memory allocation where data is copied from. - -_size_ is the size in bytes of data being copied. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandSVMMemcpyKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueSVMMemcpy} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - - -The function - -include::{generated}/api/protos/clCommandSVMMemFillKHR.txt[] - -Records a command to fill a region in SVM with a pattern of a given pattern size. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_svm_ptr_ is a pointer to a (if the device supports system SVM) -or SVM memory region that will be filled with _pattern_. It must be -aligned to _pattern_size_ bytes. -If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from -the same context from which _command_queue_ was created. -Otherwise the behavior is undefined. - -_pattern_ is a pointer to the data pattern of size _pattern_size_ in bytes. -_pattern_ will be used to fill a region in _buffer_ starting at _svm_ptr_ -and is _size_ bytes in size. -The data pattern must be a scalar or vector integer or floating-point data -type supported by OpenCL. -For example, if region pointed to by _svm_ptr_ is to be filled with a -pattern of float4 values, then _pattern_ will be a pointer to a cl_float4 -value and _pattern_size_ will be `sizeof(cl_float4)`. -The maximum value of _pattern_size_ is the size of the largest integer or -floating-point vector data type supported by the OpenCL device. -The memory associated with _pattern_ can be reused or freed after the -function returns. - -_size_ is the size in bytes of region being filled starting with _svm_ptr_ -and must be a multiple of _pattern_size_. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandSVMMemFillKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueSVMMemFill} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -==== Add new section "Section 5.X.4 - Command Buffer Queries" - -The function - -include::{generated}/api/protos/clGetCommandBufferInfoKHR.txt[] - -Queries information about a command-buffer. - -_command_buffer_ Specifies the command-buffer being queried. - -_param_name_ Specifies the information to query. - -_param_value_size_ Specifies the size in bytes of memory pointed to by -_param_value_. This size must be ≥ size of return type as described in the table -below. If _param_value_ is `NULL`, it is ignored. - -_param_value_ A pointer to memory where the appropriate result being queried is -returned. If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ret_ Returns the actual size in bytes of data being queried by -_param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. - -The list of supported _param_name_ values and the information returned in -_param_value_ by {clGetCommandBufferInfoKHR} is described in the table below. - -.{clGetCommandBufferInfoKHR} values -[cols=",,",options="header",] -|==== -| Command Buffer Info -| Return Type -| Description - -| {CL_COMMAND_BUFFER_NUM_QUEUES_KHR} -| {cl_uint_TYPE} -| The number of command-queues specified when _command_buffer_ was created. - -| {CL_COMMAND_BUFFER_QUEUES_KHR} -| {cl_command_queue_TYPE}[] -| Return the list of command-queues specified when the _command_buffer_ was - created. - -| {CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR} footnote:[{fn-reference-count-usage}] -| {cl_uint_TYPE} -| Return the _command_buffer_ reference count. - -| {CL_COMMAND_BUFFER_STATE_KHR} -| {cl_command_buffer_state_khr_TYPE} -| Return the state of _command_buffer_. - - {CL_COMMAND_BUFFER_STATE_RECORDING_KHR} is returned when _command_buffer_ has - not been finalized. - - {CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR} is returned when _command_buffer_ - has been finalized and there is not a <> instance of - _command_buffer_ awaiting completion on a command_queue. - - {CL_COMMAND_BUFFER_STATE_PENDING_KHR} is returned when an instance of - _command_buffer_ has been enqueued for execution but not yet completed. - -| {CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR} -| {cl_command_buffer_properties_khr_TYPE}[] -| Return the _properties_ argument specified in {clCreateCommandBufferKHR}. - - If the _properties_ argument specified in {clCreateCommandBufferKHR} used to - create _command_buffer_ was not `NULL`, the implementation must return the - values specified in the properties argument. - - If the _properties_ argument specified in {clCreateCommandBufferKHR} used to - create _command_buffer_ was `NULL`, the implementation may return either a - _param_value_size_ret_ of 0 (i.e. there is are no properties to be returned), - or the implementation may return a property value of 0 (where 0 is used to - terminate the properties list). - -| {CL_COMMAND_BUFFER_CONTEXT_KHR} -| {cl_context_TYPE} -| Return the context associated with _command_buffer_. - -|==== - -{clGetCommandBufferInfoKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_VALUE} if _param_name_ is not one of the supported values - or if size in bytes specified by _param_value_size_ is less than size of - return type and _param_value_ is not a `NULL` value. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -=== Modifications to section 5.11 of the OpenCL API Specification - -In the opening paragraph add {clEnqueueCommandBufferKHR} to list of commands that -can return an event object. - -Add to Table 37, _Event Command Types_: -[cols=",",options="header"] -|==== -| Events Created By -| Event Command Type - -| {clEnqueueCommandBufferKHR} -| {CL_COMMAND_COMMAND_BUFFER_KHR} -|==== - -=== Sample Code - -[source] ----- - #define CL_CHECK(ERROR) \ - if (ERROR) { \ - std::cerr << "OpenCL error: " << ERROR << "\n"; \ - return ERROR; \ - } - - int main() { - cl_platform_id platform; - CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); - cl_device_id device; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); - - cl_int error; - cl_context context = - clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); - CL_CHECK(error); - - const char* code = R"OpenCLC( - kernel void vector_addition(global int* tile1, global int* tile2, - global int* res) { - size_t index = get_global_id(0); - res[index] = tile1[index] + tile2[index]; - } - )OpenCLC"; - const size_t length = std::strlen(code); - - cl_program program = - clCreateProgramWithSource(context, 1, &code, &length, &error); - CL_CHECK(error); - - CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); - - cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); - CL_CHECK(error); - - constexpr size_t frame_count = 60; - constexpr size_t frame_elements = 1024; - constexpr size_t frame_size = frame_elements * sizeof(cl_int); - - constexpr size_t tile_count = 16; - constexpr size_t tile_elements = frame_elements / tile_count; - constexpr size_t tile_size = tile_elements * sizeof(cl_int); - - cl_mem buffer_tile1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_tile2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_res = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, nullptr, &error); - CL_CHECK(error); - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); - - cl_command_queue command_queue = - clCreateCommandQueue(context, device, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); - CL_CHECK(error); - - cl_command_buffer_khr command_buffer = - clCreateCommandBufferKHR(1, &command_queue, nullptr, &error); - CL_CHECK(error); - - cl_mem buffer_src1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_src2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_dst = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - - cl_sync_point_khr tile_sync_point = 0; - for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { - std::array copy_sync_points; - CL_CHECK(clCommandCopyBufferKHR(command_buffer, - command_queue, buffer_src1, buffer_tile1, tile_index * tile_size, 0, - tile_size, tile_sync_point ? 1 : 0, - tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[0]), - nullptr); - CL_CHECK(clCommandCopyBufferKHR(command_buffer, - command_queue, buffer_src2, buffer_tile2, tile_index * tile_size, 0, - tile_size, tile_sync_point ? 1 : 0, - tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[1]), - nullptr); - - cl_sync_point_khr nd_sync_point; - CL_CHECK(clCommandNDRangeKernelKHR(command_buffer, - command_queue, nullptr, kernel, 1, nullptr, &tile_elements, nullptr, - copy_sync_points.size(), copy_sync_points.data(), &nd_sync_point, - nullptr)); - - CL_CHECK(clCommandCopyBufferKHR(command_buffer, - command_queue, buffer_res, buffer_dst, 0, tile_index * tile_size, - tile_size, 1, &nd_sync_point, &tile_sync_point, nullptr)); - } - - CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); - - std::random_device random_device; - std::mt19937 random_engine{random_device()}; - std::uniform_int_distribution random_distribution{ - 0, std::numeric_limits::max() / 2}; - auto random_generator = [&]() { return random_distribution(random_engine); }; - - for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { - std::array write_src_events; - std::vector src1(frame_elements); - std::generate(src1.begin(), src1.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src1, CL_FALSE, 0, - frame_size, src1.data(), 0, nullptr, - &write_src_events[0])); - std::vector src2(frame_elements); - std::generate(src2.begin(), src2.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src2, CL_FALSE, 0, - frame_size, src2.data(), 0, nullptr, - &write_src_events[1])); - - CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, - write_src_events.data(), nullptr)); - - CL_CHECK(clFinish(command_queue)); - - CL_CHECK(clReleaseEvent(write_src_event[0])); - CL_CHECK(clReleaseEvent(write_src_event[1])); - } - - CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); - CL_CHECK(clReleaseCommandQueue(command_queue)); - - CL_CHECK(clReleaseMemObject(buffer_src1)); - CL_CHECK(clReleaseMemObject(buffer_src2)); - CL_CHECK(clReleaseMemObject(buffer_dst)); - - CL_CHECK(clReleaseMemObject(buffer_tile1)); - CL_CHECK(clReleaseMemObject(buffer_tile2)); - CL_CHECK(clReleaseMemObject(buffer_res)); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); - - return 0; - } ----- - -=== Issues - -. Introduce a `clCloneCommandBufferKHR` entry-point for cloning a - command-buffer. -+ --- -*UNRESOLVED* --- -. Enable detached command-buffer execution, where command-buffers are executed - on their own internal queue to prevent locking user created queues for the - duration of their execution. -+ --- -*UNRESOLVED* --- diff --git a/ext/cl_khr_command_buffer_multi_device.asciidoc b/ext/cl_khr_command_buffer_multi_device.asciidoc deleted file mode 100644 index cd1c638a7..000000000 --- a/ext/cl_khr_command_buffer_multi_device.asciidoc +++ /dev/null @@ -1,767 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_command_buffer_multi_device]] -== Command Buffers - Multiple Devices (Provisional) - -This extension enables users to record commands across multiple queues in the same command-buffer, -providing execution of heterogeneous task graphs from command-queues associated with different devices. - -=== General Information - -==== Name Strings - -`cl_khr_command_buffer_multi_device` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2023-04-14 | 0.9.0 | First assigned version (provisional). -| 2024-04-30 | 0.9.1 | Added clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR as affected functions (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension requires the `cl_khr_command_buffer` extension version 0.9.3. - -==== Contributors - -Ewan Crawford, Codeplay Software Ltd. + -Gordon Brown, Codeplay Software Ltd. + -Kenneth Benzie, Codeplay Software Ltd. + -Alastair Murray, Codeplay Software Ltd. + -Jack Frankland, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm Technologies Inc. + -Joshua Kelly, Qualcomm Technologies, Inc. + -Kevin Petit, Arm Ltd. + -Aharon Abramson, Intel. + -Ben Ashbaugh, Intel. + -Boaz Ouriel, Intel. + -Pekka Jääskeläinen, Tampere University and Intel. + -Jan Solanti, Tampere University + -Nikhil Joshi, NVIDIA + -James Price, Google + - -=== Overview - -The `cl_khr_command_buffer` extension separates command construction from -enqueue by providing a mechanism to record a set of commands which can then be -repeatedly enqueued. However, the commands in a command-buffer can -only be recorded to a single command-queue specified on command-buffer creation. - -`cl_khr_command_buffer_multi_device` extends the scope of a command-buffer to -allow commands to be recorded across multiple queues in the same command-buffer, -providing execution of heterogeneous task graphs from command-queues associated -with different devices. - -The ability for a user to deep copy an existing command-buffer so that the -commands target a different device is also made possible by -`cl_khr_command_buffer_multi_device`. Depending on platform support the mapping -of commands to the new target device can be done either explicitly by the user, -or automatically by the OpenCL runtime. - -=== New Types - -Bitfield for querying command-buffer capabilities of an OpenCL Platform with -{clGetPlatformInfo}, see the -<>: -[source,opencl] ----- -typedef cl_bitfield cl_platform_command_buffer_capabilities_khr ----- - -=== New API Functions - -[source,opencl] ----- -cl_command_buffer_khr clRemapCommandBufferKHR( - cl_command_buffer_khr command_buffer, - cl_bool automatic, - cl_uint num_queues, - const cl_command_queue* queues, - cl_uint num_handles, - const cl_mutable_command_khr* handles, - cl_mutable_command_khr* handles_ret, - cl_int* errcode_ret); ----- - -=== New API Enums - -Enums for querying device command-buffer capabilities with -{clGetDeviceInfo}, see the -<>: - -[source,opencl] ----- -// Accepted values for the param_name parameter to clGetDeviceInfo -CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR 0x12AB -CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR 0x12AC - -// Bits for cl_device_command_buffer_capabilities_khr bitfield -CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR (0x1 << 4) - -// Bits for cl_command_buffer_flags_khr -CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR (0x1 << 2) ----- - -Enums for querying platform command-buffer capabilities with -{clGetPlatformInfo}, see the -<>: - -[source,opencl] ----- -// Accepted values for the param_name parameter to clGetPlatformInfo -CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR 0x0908 - -// Bits for cl_platform_command_buffer_capabilities_khr bitfield -CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR (0x1 << 0) -CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR (0x1 << 1) -CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR (0x1 << 2) ----- - -=== Modifications to section 4.1 of the OpenCL API Specification - -Add to *Table 3*, _Platform Queries_, - -[[cl_khr_command_buffer_multi_device-platform-queries]] -[cols="1,1,4",options="header"] -|==== -| Platform Info -| Return Type -| Description - -| {CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR} -| {cl_platform_command_buffer_capabilities_khr_TYPE} -| Describes platform command-buffer capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} - Platform supports the ability - to synchronize all commands in a command-buffer using sync-points, irrespective - of the queue the individual commands are recorded to. - - {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} - Platform supports the ability - to create a deep copy of an existing command-buffer with the commands - explicitly remapped to different, potentially <>, - queues. - - {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} - Platform supports the - ability to create a remapped command-buffer where the mapping of commands to - queues is done by the OpenCL runtime in a way it determines as optimal. If - {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} is reported, - {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} must also be reported. - -|==== - -=== Modifications to section 4.2 of the OpenCL API Specification - -Add {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR} and -{CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} rows to *Table 5*, _Device Queries_, -of section 4.2. Also, add additional text to the -{CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} row: - -[[cl_khr_command_buffer_multi_device-device-queries]] -[cols="1,1,4",options="header"] -|==== -| {cl_device_info_TYPE} -| Return Type -| Description - -| {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} -| {cl_device_command_buffer_capabilities_khr_TYPE} -| Describes device command-buffer capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} Device supports the ability - to record commands to more than one command-queue associated with _device_ in - a single command-buffer. - -| {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR} -| {cl_uint_TYPE} -| Return the number of root devices listed in - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} that _device_ can use device-side - synchronization with. - -| {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} -| {cl_device_id_TYPE}[] -| Return the list of root devices _device_ can use device-side synchronization - with. A device should list itself only if it has native support for - synchronizing commands. Sub-devices are not listed to avoid non-deterministic - results as sub-devices are created, instead if a root device is listed, then - any of its partitioned sub-devices can also be natively synchronized with. - -|==== - -=== Modifications to section 5.11 of the OpenCL API Specification - -Add additional wording to the description column of *Table 36*, _Event Object -Queries_: - -{CL_EVENT_COMMAND_QUEUE} - For events returned by a command-buffer enqueue -operation to multiple command-queues, `NULL` is returned. - -{CL_EVENT_COMMAND_EXECUTION_STATUS} - For events returned by a command-buffer -enqueue operation to multiple command-queues the semantics of execution status -is as follows: - - * {CL_QUEUED} - Command-buffer has been enqueued across the command-queues. - - * {CL_SUBMITTED} - Commands from the command-buffer have been submitted by - the host to any device associated with one of the command-queues. - - * {CL_RUNNING} - Any command from the command-buffer has started execution on - a device. - - * {CL_COMPLETE} - All commands have completed on all devices. - -=== Modifications to section 5.14 of the OpenCL API Specification - -==== Query Updates - -Add additional wording to description column of *Table 38*, _Event Profiling -Queries_: - -* {CL_PROFILING_COMMAND_QUEUED} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time when the -command-buffer has been enqueued across the command-queues is used. - -* {CL_PROFILING_COMMAND_SUBMIT} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when -command-buffer commands have been submitted to any command-queue. - -* {CL_PROFILING_COMMAND_START} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when -any device starts executing a command-buffer command. - -* {CL_PROFILING_COMMAND_END} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when -the last command-buffer command finishes execution on any device. - -* {CL_PROFILING_COMMAND_COMPLETE} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when the -command-buffer has completed execution across all command-queues. - -[NOTE] -==== -If no reliable device timer sources are available to inform the host side, -or parallel runtime scheduling makes it impossible to identify a first/last -command, then an implementation may fallback to reporting -{CL_PROFILING_COMMAND_SUBMIT} and {CL_PROFILING_COMMAND_COMPLETE} for -{CL_PROFILING_COMMAND_START} and {CL_PROFILING_COMMAND_END} respectively. -==== - -==== Error Updates - -Extend the wording defining the {CL_PROFILING_INFO_NOT_AVAILABLE} error return -code from {clGetEventProfilingInfo} to append the following sentence: - -* If _event_ was created from a call to {clEnqueueCommandBufferKHR}, - {CL_PROFILING_INFO_NOT_AVAILABLE} is returned if all the queues passed - do not have {CL_QUEUE_PROFILING_ENABLE} set. - -=== Modifications to Section 5.X - Command Buffers of the OpenCL API Specification - -==== Additional Section 5.X Introduction Text - -A command-buffer can contain commands recorded to the queues of different -devices if a vendor provides support for inter-device {cl_sync_point_khr_TYPE} -synchronization. This feature is reported either through -{CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}, which informs the user what devices can -synchronize with each other natively on the device-side, or through -{CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR}, which allows synchronization -between all devices in a platform, falling back to host-side synchronization -when device-side synchronization isn't available. These two mechanisms are -referred to as **device-side sync** and **universal sync** respectively. - -If these mechanisms don't report that more than one device can be used in a -command-buffer, it will still be possible to perform multiple queue recording in a -command-buffer if the {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} -capability is reported for a device. However, with this capability all the -queues commands are recorded to must target the same device. - -Commands recorded to different command-queues in the same command-buffer may be -executed concurrently to each other unless synchronized explicitly with -sync-points. Ordering of other commands submitted to the same command-queues as -used to enqueue a command-buffer is the responsibility of the programmer. A -command-buffer enqueue spanning multiple queues can return an event to use for -synchronization, which will complete once all commands in the command-buffer -have completed. If ordering restrictions are required, this event (or -command-queue barriers) may be used by the user to synchronize the -command-buffer enqueue with regular commands, or another command-buffer enqueue. - -==== Add new section "Section 5.X.Y - Remapping Command Buffers" - -Platforms reporting the {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} capability -support generating a deep copy of a command-buffer with its commands remapped to a -list of command-queues that are potentially <> with the queues -used to create the command-buffer. That is, the remapped command-buffer can -execute on queues that differ in terms of properties and/or associated device -from the original command-buffer queues. - -This functionality is invoked through a new synchronous entry-point -{clRemapCommandBufferKHR} which takes a list of queues to which the commands -should now target. It then returns a command-buffer containing the same -commands as the original, with the same command dependencies, but targeting -different queues. A list of command handles may also be passed to the -entry-point, which allows handles to the equivalent commands in the remapped -command-buffer to be returned by an output parameter. - -Device properties restrict remapping possibilities, as existing commands -can have a configuration which is not supported by another device, and so -remapping may fail with an error relating to this incompatibility. Examples -of command configurations which can introduce incompatibilities when trying to -map to a new device are: - -* Program language features used in a kernel not supported by the new device. -* ND-Range configuration, e.g exceeds new the device max work-group size. -* Misalignment of sub-buffers based on minimum alignment of new device. - -In additional to this functionality, platforms reporting -{CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} allow the user to create a -remapped command-buffer where the mapping of queues to commands is determined by -the OpenCL runtime in a way it determines as optimal. This is particularly -useful in hot plugging environments where devices may appear and disappear -during runtime. - -The function -include::{generated}/api/protos/clRemapCommandBufferKHR.txt[] - -Creates a deep copy of the input command-buffer with the copied commands -remapped to target the passed command-queues. The returned command-buffer -has the same state as the input command-buffer, unless the input -command-buffer is in the <> state, in which case the returned -command-buffer has state <>. - -_command_buffer_ Specifies the command-buffer to create a remapped deep copy of. - -_automatic_ Indicates if the remapping is done explicitly by the user, or -automatically by the OpenCL runtime. If _automatic_ is {CL_FALSE}, then each -element of _queues_ will replace the queue used on _command_buffer_ creation at -the same index. If {CL_TRUE} and {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} -is supported, then the OpenCL runtime will decide in a way it determines optimal -which of the elements in _queues_ each command in the returned command-buffer -will be associated with. - -_num_queues_ The number of command-queues listed in _queues_, must not be 0. - -_queues_ A pointer to an ordered list of command-queues for the returned -command-buffer to target, must be a non-`NULL` value. - -_num_handles_ The number of command handles passed in both _handles_ and -_handles_ret_ lists, may be 0. - -_handles_ An ordered list of handles belonging to _command_buffer_ to create -remapped copies of, may be `NULL`. - -_handles_ret_ Returns an ordered list of handles where each handle is equivalent -to the handle at the same index in _handles_, but belonging to the returned -command-buffer. - -_errcode_ret_ Returns an appropriate error code. If _errcode_ret_ is `NULL`, no -error code is returned. - -{clRemapCommandBufferKHR} returns a valid command-buffer with _errcode_ret_ set -to {CL_SUCCESS} if the command-buffer is created successfully. Otherwise, it -returns a `NULL` value without setting _handles_ret_, and with one of the -following error values returned in _errcode_ret_: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_VALUE} if _num_queues_ is 0, or if _queues_ is `NULL`. - -* {CL_INVALID_VALUE} if _automatic_ is {CL_FALSE} and _num_queues_ is not equal - to the number of queues used on creation of _command_buffer_. - -* {CL_INVALID_VALUE} if _handles_ or _handles_ret_ is `NULL` and - _num_handles_ is > 0, or either _handles_ or _handles_ret_ is not - `NULL` and _num_handles_ is 0. - -* {CL_INVALID_VALUE} if any handle in _handles_ is not a valid command handle - belonging to _command_buffer_. - -* {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a valid - command-queue. - -* {CL_INVALID_CONTEXT} if _command_buffer_ and all the command-queues in - _queues_ do not have the same OpenCL context. - -* {CL_INVALID_OPERATION} if the platform does not support the - {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} flag. - -* {CL_INVALID_OPERATION} if the platform does not support the - {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} flag and _automatic_ is - {CL_TRUE}. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if such an error would be returned by - passing _queues_ to {clCreateCommandBufferKHR}. - -* Any error relating to device support that can be returned by a command - recording entry-point may also be returned. As a command in _command_buffer_ - can have a configuration that is not supported by a device that is associated - with the queue in _queues_ the command is being remapped to. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -==== Modifications to clCreateCommandBufferKHR - -==== New Property Flag - -Modify the {CL_COMMAND_BUFFER_FLAGS_KHR} property in the -{clCreateCommandBufferKHR} properties table to introduce a new flag to the -bitfield. The following text is now included in the description of property -values. - -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_FLAGS_KHR} -| {cl_command_buffer_flags_khr_TYPE} -| {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} - All commands in the command-buffer - must use native synchronization, as reported by - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. This can be used as a safeguard - for performant applications that don't want to accidentally fallback to host - synchronization when passing multiple queues. -|==== - -==== Add to clCreateCommandBufferKHR description - -.Summary of command-buffer creation configurations -[width="100%",options="header"] - -|==== -| All devices associated with `queues` can device-side sync | Platform supports universal sync | Condition | Result - -.3+| Yes -.3+| Yes or No -| Any device does not support the multi-queue capability, and has more than one - queue targeting it -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} -| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag -| OK -| Otherwise -| OK - -.3+| No -.3+| Yes -| Any device does not support the multi-queue capability, and has more than one - queue targeting it -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} -| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} -| Otherwise -| OK - May be performance implications when synchronizing commands between devices - without device-side sync support. - -| No -| No -| Always -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} - -|==== - -===== Parameter Updates - -Parameter descriptions changed to: - -_num_queues_ The number of command-queues listed in _queues_. - -_queues_ Is a pointer to a list of command-queues that the command-buffer may be -executed on. _queues_ must be a non-`NULL` value and length of the list equal to -_num_queues_. - -===== Error Updates - -The returned error: - -* {CL_INVALID_VALUE} if _num_queues_ is not one. - -Is changed to: - -* {CL_INVALID_VALUE} if _num_queues_ is zero. - -Additional errors: - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if _queues_ includes more than one - command-queue associated with a device that does not support capability - {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR}. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the - {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag is set, and any device - associated with a command-queue in _queues_ cannot natively synchronize with - the other devices associated with _queues_ as reported by - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the platform doesn't support the - {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} capability, and any device - associated with a command-queue in _queues_ cannot natively synchronize with - the other devices associated with _queues_ as reported by - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. - -==== Command recording entry points - -The descriptions of command recording entry-points are modified as described in -this section. These changes apply to all of {clCommandCopyBufferKHR}, -{clCommandCopyBufferRectKHR}, {clCommandCopyBufferToImageKHR}, -{clCommandCopyImageKHR}, {clCommandCopyImageToBufferKHR}, -{clCommandFillBufferKHR}, {clCommandFillImageKHR}, -{clCommandNDRangeKernelKHR}, {clCommandSVMMemcpyKHR} and -{clCommandSVMMemFillKHR}. - -===== Parameter Update - -Parameter description of _command_queue_ is changed to: - -_command_queue_ Specifies the command-queue the command will be recorded to. -If _command_queue_ is `NULL` then only one command-queue must have been set -on _command_buffer_ creation, otherwise _command_queue_ must be a non-`NULL` -value. - -===== Error Update - -The error condition: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -Is changed to : - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is `NULL` and _command_buffer_ - was created with more than one queue, or if _command_queue_ is non-`NULL` and - not a command-queue listed on _command_buffer_ creation. - -=== Sample Code - -[source,opencl] ----- -#define CL_CHECK(ERROR) \ - if (ERROR) { \ - std::cerr << "OpenCL error: " << ERROR << "\n"; \ - return ERROR; \ - } - -int main() { - cl_platform_id platform; - CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); - cl_platform_command_buffer_capabilities_khr platform_caps; - CL_CHECK(clGetPlatformInfo(platform, - CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR, - sizeof(platform_caps), &platform_caps, NULL)); - if (!(platform_caps & CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR)) { - std::cerr << "Command-buffer remapping not supported but used in example, " - "skipping\n"; - return 0; - } - - cl_uint num_devices = 0; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices)); - std::vector devices(num_devices); - CL_CHECK( - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, devices.data(), nullptr)); - - // Checks omitted for brevity that either a) the platform supports - // CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR or b) each device is listed - // in the others CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR - - cl_int error; - cl_context context = - clCreateContext(NULL, num_devices, devices.data(), NULL, NULL, &error); - CL_CHECK(error); - - std::vector queues(num_devices); - for (cl_uint i = 0; i < num_devices; i++) { - queues[i] = clCreateCommandQueue(context, devices[i], 0, &error); - CL_CHECK(error); - } - - const char *code = R"OpenCLC( - kernel void vector_addition(global int* tile1, global int* tile2, - global int* res) { - size_t index = get_global_id(0); - res[index] = tile1[index] + tile2[index]; - } - )OpenCLC"; - const size_t length = std::strlen(code); - - cl_program program = - clCreateProgramWithSource(context, 1, &code, &length, &error); - CL_CHECK(error); - - CL_CHECK( - clBuildProgram(program, num_devices, devices.data(), NULL, NULL, NULL)); - - cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); - CL_CHECK(error); - - constexpr size_t frame_count = 60; - constexpr size_t frame_elements = 1024; - constexpr size_t frame_size = frame_elements * sizeof(cl_int); - - constexpr size_t tile_count = 16; - constexpr size_t tile_elements = frame_elements / tile_count; - constexpr size_t tile_size = tile_elements * sizeof(cl_int); - - cl_mem buffer_tile1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_tile2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_res = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, NULL, &error); - CL_CHECK(error); - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); - - cl_command_buffer_khr original_cmdbuf = - clCreateCommandBufferKHR(num_devices, queues.data(), nullptr, &error); - CL_CHECK(error); - - cl_mem buffer_src1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_src2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_dst = - clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size, NULL, &error); - CL_CHECK(error); - - cl_sync_point_khr tile_sync_point = 0; - for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { - cl_sync_point_khr copy_sync_points[2]; - CL_CHECK(clCommandCopyBufferKHR( - original_cmdbuf, queues[tile_index % num_devices], buffer_src1, - buffer_tile1, tile_index * tile_size, 0, tile_size, - tile_sync_point ? 1 : 0, tile_sync_point ? &tile_sync_point : NULL, - ©_sync_points[0], NULL)); - - CL_CHECK(clCommandCopyBufferKHR( - original_cmdbuf, queues[tile_index % num_devices], buffer_src2, - buffer_tile2, tile_index * tile_size, 0, tile_size, - tile_sync_point ? 1 : 0, - tile_sync_point ? &tile_sync_point : nullptr, - ©_sync_points[1], NULL)); - - cl_sync_point_khr nd_sync_point; - CL_CHECK(clCommandNDRangeKernelKHR( - original_cmdbuf, queues[tile_index % num_devices], NULL, kernel, 1, - NULL, &tile_elements, NULL, 2, copy_sync_points, &nd_sync_point, NULL)); - - CL_CHECK(clCommandCopyBufferKHR( - original_cmdbuf, queues[tile_index % num_devices], buffer_res, - buffer_dst, 0, tile_index * tile_size, tile_size, 1, &nd_sync_point, - &tile_sync_point, NULL)); - } - - CL_CHECK(clFinalizeCommandBufferKHR(original_cmdbuf)); - - std::random_device random_device; - std::mt19937 random_engine{random_device()}; - std::uniform_int_distribution random_distribution{ - 0, std::numeric_limits::max() / 2}; - auto random_generator = [&]() { return random_distribution(random_engine); }; - - auto enqueue_frame = [&](cl_command_buffer_khr command_buffer) { - for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { - std::array enqueue_events; - std::vector src1(frame_elements); - std::generate(src1.begin(), src1.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src1, CL_FALSE, 0, - frame_size, src1.data(), 0, nullptr, - &enqueue_events[0])); - std::vector src2(frame_elements); - std::generate(src2.begin(), src2.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src2, CL_FALSE, 0, - frame_size, src2.data(), 0, nullptr, - &enqueue_events[1])); - - CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, - enqueue_events.data(), - &enqueue_events[2])); - - CL_CHECK(clWaitForEvents(1, enqueue_events[2])); - - for (auto e : enqueue_events) { - CL_CHECK(clReleaseEvent(e)); - } - } - return 0; - }; - - error = enqueue_frame(original_cmdbuf); - CL_CHECK(error); - - // Remap from N queues to 1 queue and run again - cl_command_buffer_khr remapped_cmdbuf = clRemapCommandBufferKHR( - original_cmdbuf, CL_TRUE, 1, queues.data(), 0, NULL, NULL, &error); - CL_CHECK(error); - - error = enqueue_frame(remapped_cmdbuf); - CL_CHECK(error); - - for (unsigned i = 0; i < num_devices; ++i) { - CL_CHECK(clReleaseCommandQueue(queues[i])); - } - CL_CHECK(clReleaseMemObject(buffer_src1)); - CL_CHECK(clReleaseMemObject(buffer_src2)); - CL_CHECK(clReleaseMemObject(buffer_dst)); - - CL_CHECK(clReleaseMemObject(buffer_tile1)); - CL_CHECK(clReleaseMemObject(buffer_tile2)); - CL_CHECK(clReleaseMemObject(buffer_res)); - - CL_CHECK(clReleaseCommandBufferKHR(original_cmdbuf)); - CL_CHECK(clReleaseCommandBufferKHR(remapped_cmdbuf)); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); - - return 0; -} ----- - -=== Issues - -. In cl_event profiling info for a command-buffer running across the queues for - several devices, how do we know what the first & last commands executed are - if there is concurrent execution across devices. -+ --- -*RESOLVED*: Allowed an implementation to fallback to {CL_PROFILING_COMMAND_SUBMIT} -and {CL_PROFILING_COMMAND_COMPLETE} when reporting {CL_PROFILING_COMMAND_START} & -{CL_PROFILING_COMMAND_END}. --- -. Is an atomic constraint required? This would forbid regular clEnqueue* commands, -from interleaving execution on a queue which a command-buffer is being executed -on. -+ --- -*RESOLVED*: This behavior can block parallelism, and constraint is expressible -by the user through existing synchronization mechanisms if they require it. --- -. It is currently an error if a set of command-queues passed to -{clEnqueueCommandBufferKHR} aren't compatible with those set on recording. -Should we relax this as an optional capability that allows an implementation to -do a more expensive command-buffer enqueue for this case? -+ --- -*RESOLVED*: Added as an optional feature. --- diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc deleted file mode 100644 index b189c9c88..000000000 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ /dev/null @@ -1,1010 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_command_buffer_mutable_dispatch]] -== Command Buffers - Mutable Dispatch (Provisional) - -This extension enables users to modify the configuration of kernel execution -commands between command-buffer enqueues. - -=== General Information - -==== Name Strings - -`cl_khr_command_buffer_mutable_dispatch` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2022-08-31 | 0.9.0 | First assigned version (provisional). -| 2023-11-07 | 0.9.1 | Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension requires the `cl_khr_command_buffer` extension version 0.9.0. - -==== Contributors - -Ewan Crawford, Codeplay Software Ltd. + -Gordon Brown, Codeplay Software Ltd. + -Kenneth Benzie, Codeplay Software Ltd. + -Alastair Murray, Codeplay Software Ltd. + -Jack Frankland, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm Technologies Inc. + -Joshua Kelly, Qualcomm Technologies, Inc. + -Kevin Petit, Arm Ltd. + -Aharon Abramson, Intel. + -Ben Ashbaugh, Intel. + -Boaz Ouriel, Intel. + -Pekka Jääskeläinen, Tampere University + -Jan Solanti, Tampere University + -Nikhil Joshi, NVIDIA + -James Price, Google + - -=== Overview - -The `cl_khr_command_buffer` extension separates command construction from -enqueue by providing a mechanism to record a set of commands which can then be -repeatedly enqueued. However, the commands recorded to the command-buffer are -immutable between enqueues. - -`cl_khr_command_buffer_mutable_dispatch` removes this restriction, in particular, -this extension allows the configuration of a kernel execution command in a -command-buffer, called a _mutable-dispatch_, to be modified. This allows inputs -and outputs to the kernel, as well as work-item sizes and offsets, to change -without having to re-record the entire command sequence in a new command-buffer. - -=== Interactions with Other Extensions - -The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this -extension for the purpose of allowing expansion of mutable functionality in -future extensions layered on top of `cl_khr_command_buffer_mutable_dispatch`. -Any parameter that is a structure containing a `void* next` member *must* have -a value of `next` that is either `NULL`, or is a pointer to a valid structure -defined by `cl_khr_command_buffer_mutable_dispatch` or an extension layered on -top. To be a valid structure in the pointer chain the first member of the -structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier for the -structure being iterated through, and the second member a `void* next` pointer -to the next structure in the chain. - -[NOTE] -==== -This approach is based on structure pointer chains in Vulkan, for more details -see the "Valid Usage for Structure Pointer Chains" section of the Vulkan -specification. -==== - -This is designed so that another extension layered on -`cl_khr_command_buffer_mutable_dispatch` could allow modification of commands -recorded to a command-buffer other than kernel execution commands. As all -command recording entry-points return a {cl_mutable_command_khr_TYPE} handle, and -aspects like which {cl_mem_TYPE} object a command uses could also be updated between -enqueues of the command-buffer. - -=== New Types - -==== Mutable Command Types - -Types for using mutable-commands objects from -<>: - -[source,opencl] ----- -// Bitfield covering each aspect of a mutable-dispatch which can be updated -typedef cl_bitfield cl_mutable_dispatch_fields_khr; - -// For querying mutable-command objects with clGetMutableCommandInfoKHR -typedef cl_uint cl_mutable_command_info_khr; - -// Identifies the type of a structure to allow structure pointer chains -typedef cl_uint cl_command_buffer_structure_type_khr; - -// Bitfield describing mutable-dispatch assertions, enabling possible optimizations -typedef cl_bitfield cl_mutable_dispatch_asserts_khr; ----- - -Struct type for setting kernel arguments normally passed using {clSetKernelArg} -and {clSetKernelArgSVMPointer}: - -include::{generated}/api/structs/cl_mutable_dispatch_arg_khr.txt[] - -Struct type for setting kernel execution info normally passed using -{clSetKernelExecInfo}: - -include::{generated}/api/structs/cl_mutable_dispatch_exec_info_khr.txt[] - -[NOTE] -==== -_param_name_ is of type {cl_uint_TYPE} rather than {cl_kernel_exec_info_TYPE} so that the -extension can be implemented on OpenCL 1.2 where the {cl_kernel_exec_info_TYPE} -typedef is unavailable. -==== - -Struct type passed to {clUpdateMutableCommandsKHR} for setting the kernel -configuration of a mutable {clCommandNDRangeKernelKHR} command: - -include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] - -_type_ Type of this structure, must be -{CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. - -_next_ Is `NULL` or a pointer to an extending structure. - -_command_ A mutable-command object returned by {clCommandNDRangeKernelKHR} -representing a kernel execution as part of a command-buffer. - -_num_args_ Is the number of kernel arguments being changed. - -_num_svm_args_ Is the number of SVM kernel arguments being changed. - -_num_exec_infos_ Is the number of kernel execution info objects to set for -this dispatch. - -_work_dim_ Is the number of dimensions used to specify the global work-items -and work-items in the work-group. See {clEnqueueNDRangeKernel} for valid usage. - -_arg_list_ Is an array describing the new kernel arguments for this enqueue. It -must contain _num_args_ array elements, each of which encapsulates parameters -passed to {clSetKernelArg}. See {clSetKernelArg} for usage of -{cl_mutable_dispatch_arg_khr_TYPE} members. - -_arg_svm_list_ is an array describing the new SVM kernel arguments for this -enqueue. It must contain _num_svm_args_ array elements, each of which -encapsulates parameters passed to {clSetKernelArgSVMPointer}. See -{clSetKernelArgSVMPointer} for usage of -{cl_mutable_dispatch_arg_khr_TYPE} members, -`arg_size` is ignored. - -_exec_info_list_ Is an array containing _num_exec_infos_ elements -specifying the list of execution info objects use for this command-buffer -enqueue. See {clSetKernelExecInfo} for usage of -{cl_mutable_dispatch_exec_info_khr_TYPE} -members. - -_global_work_offset_ Can be used to specify an array of _work_dim_ unsigned -values that describe the offset used to calculate the global ID of a work-item. -If _global_work_offset_ is `NULL` then the global offset of the dispatch is not -changed. See {clEnqueueNDRangeKernel} for valid usage. - -_global_work_size_ Points to an array of _work_dim_ unsigned values that -describe the number of global work-items in _work_dim_ dimensions that will -execute the kernel function. If _global_work_size_ is `NULL` then the number of -global work-items in the dispatch is not changed. See {clEnqueueNDRangeKernel} -for valid usage. - -_local_work_size_ Points to an array of _work_dim_ unsigned values that -describe the number of work-items that make up a work-group that will execute -the kernel. If _local_work_size_ is `NULL` then the number of local work-items -in the dispatch is not changed. See {clEnqueueNDRangeKernel} for valid usage. - -[[cl_mutable_base_config_khr]] -[source,opencl] ----- -typedef struct _cl_mutable_base_config_khr { - cl_command_buffer_structure_type_khr type, - const void* next, - cl_uint num_mutable_dispatch, - const cl_mutable_dispatch_config_khr* mutable_dispatch_list -} cl_mutable_base_config_khr; ----- - -_type_ Type of this structure, must be -{CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} - -_next_ Is `NULL` or a pointer to an extending structure. - -_num_mutable_dispatch_ Is the number of mutable-dispatch objects to configure -in this enqueue of the command-buffer. - -_mutable_dispatch_list_ Is an array containing _num_mutable_dispatch_ elements -describing the configurations of mutable kernel execution commands in the -command-buffer. For a description of struct members making up each array -element see {cl_mutable_dispatch_config_khr_TYPE}. - -=== New API Functions - -Mutable-handle entry points from <>: -[source,opencl] ----- -cl_int clUpdateMutableCommandsKHR( - cl_command_buffer_khr command_buffer, - const cl_mutable_base_config_khr* mutable_config); - -cl_int clGetMutableCommandInfoKHR( - cl_mutable_command_khr command, - cl_mutable_command_info_khr param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); ----- - -=== New API Enums - -Enums for working with mutable-command objects from -<>: - -[source,opencl] ----- -// Error code -CL_INVALID_MUTABLE_COMMAND_KHR -1141 - -// Accepted values for the param_name parameter to clGetDeviceInfo -CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 - -// Accepted command buffer property to clCreateCommandBufferKHR -CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 - -// Bits for cl_command_buffer_flags_khr -CL_COMMAND_BUFFER_MUTABLE_KHR (0x1 << 1) - -// Accepted ND-range kernel command properties to clCommandNDRangeKernelKHR -CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 -CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B8 - -// Bits for cl_mutable_dispatch_fields_khr bitfield -CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (0x1 << 0) -CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR (0x1 << 1) -CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (0x1 << 2) -CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (0x1 << 3) -CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (0x1 << 4) - -// Bits for cl_mutable_dispatch_asserts_khr bitfield -CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) - -// cl_mutable_command_info_khr -CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0 -CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1 -CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR 0x12A2 -CL_MUTABLE_DISPATCH_KERNEL_KHR 0x12A3 -CL_MUTABLE_DISPATCH_DIMENSIONS_KHR 0x12A4 -CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5 -CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6 -CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7 -CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD ----- - -Enum values for {cl_command_buffer_structure_type_khr_TYPE} allowing the structure -types used for mutating commands between enqueues to be extended by future -extensions built on top of `cl_khr_command_buffer_mutable_dispatch`. Based on -structure pointer chains in Vulkan. -[source,opencl] ----- -CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR 0 -CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR 1 ----- - -=== Modifications to section 4.2 of the OpenCL API Specification - -Add to *Table 5*, _Device Queries_, of section 4.2: - -[[command-dispatch-queries]] -[caption="Table 5. "] -.List of supported param_names by {clGetDeviceInfo} -[cols="1,1,4",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} -| {cl_mutable_dispatch_fields_khr_TYPE} -| Describes device mutable-dispatch capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} Device supports the ability to modify - the _global_work_offset_ of kernel execution after command recording. - - {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} Device supports the ability to modify - the _global_work_size_ of kernel execution after command recording. - - {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} Device supports the ability to modify - the _local_work_size_ of kernel execution after command recording. - - {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} Device supports the ability to modify - arguments set on a kernel after command recording. - - {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} Device supports the ability to modify - execution information set on a kernel after command recording. - -|==== - -=== Modifications to Section 5.X - Command Buffers of the OpenCL API Specification - -==== Modifications to clCreateCommandBufferKHR - -Modify the {CL_COMMAND_BUFFER_FLAGS_KHR} property in the -<> table to -introduce a new flag to the bitfield. The following text is now included in the -description of property values. - -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_FLAGS_KHR} -| {cl_command_buffer_flags_khr_TYPE} -| {CL_COMMAND_BUFFER_MUTABLE_KHR} - Enables modification of the - command-buffer, by default command-buffers are immutable. If set, - commands in the command-buffer may be updated via {clUpdateMutableCommandsKHR}. - -|==== - -Add a {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property to the -<> table. - -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} -| {cl_mutable_dispatch_asserts_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} - An assertion by the user that the number of work-groups of any ND-range kernel recorded in this command - buffer will not be updated beyond the number defined when the ND-range kernel was recorded. - If the user's update to the values of _local_work_size_ and/or _global_work_size_ result in an increase - in the number of work-groups in the ND-range over the number specified when the ND-range kernel was - recorded, the behavior is undefined. - -|==== - -==== Modifications to clCommandNDRangeKernelKHR - -===== Properties Parameter - -Description of the _properties_ parameter is changed to: - -_properties_ Specifies a list of properties for the kernel command and their -corresponding values. Each property name is immediately followed by the -corresponding desired value. The list is terminated with 0. If a supported -property and its value is not specified in _properties_, its default value will -be used. _properties_ may be `NULL` in which case the default values for -supported properties will be used. The list of supported properties is described -in the table below. - -.{clCommandNDRangeKernelKHR} properties -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} -| {cl_mutable_dispatch_fields_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} - Determines whether the _global_work_offset_ of kernel execution can be - modified after recording. If set, the _global_work_offset_ of the kernel - execution can be changed with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the _global_work_offset_ cannot - be modified. - - {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} - Determines whether the _global_work_size_ of kernel execution can be - modified after recording. If set, the _global_work_size_ of the kernel - execution can be changed with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the _global_work_size_ cannot be - modified. - - {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} - Determines whether the _local_work_size_ of kernel execution can be - modified after recording. If set, the _local_work_size_ of the kernel - execution can be changed with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the _local_work_size_ cannot be - modified. - - {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} - Determines whether the kernel arguments set on _kernel_ can be updated - between executions. If set, the kernel arguments normally set with - {clSetKernelArg} and {clSetKernelArgSVMPointer} can be changed with - {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the kernel arguments cannot be - modified between executions. - - {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} - Determines whether the information passed to _kernel_ can be updated between - executions. If set, the execution information of the kernel can be changed - with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field of - the _mutable_config_ parameter. Otherwise, the kernel execution information - cannot be modified. - - If {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} is not specified then it - defaults to the value returned by the - {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} device query. - -| {CL_MUTABLE_DISPATCH_ASSERTS_KHR} -| {cl_mutable_dispatch_asserts_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} - An assertion by the user that the number of work-groups of this ND-range kernel will not be updated beyond - the number defined when the ND-range kernel was recorded. The number of work-groups is - defined as the product for each _i_ from _0_ to _work_dim - 1_ of - _ceil(global_work_size[i]/local_work_size[i])_. -|==== - -===== Mutable Handle Parameter - -Description of the _mutable_handle_ parameter is changed to: - -_mutable_handle_ Returns a handle to the command that can be used in the -{cl_mutable_dispatch_config_khr_TYPE} struct -to update the command configuration between recordings, may be `NULL`. The -lifetime of this handle is tied to the parent command-buffer, such that freeing -the command-buffer will also free this handle. - -===== Additional Errors - -The error condition: - -* {CL_INVALID_OPERATION} if _mutable_handle_ is not `NULL`. - -Is replaced with - -* {CL_INVALID_OPERATION} if the requested - {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} properties are not reported by - {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} for the device associated with - _command_queue_. If _command_queue_ is `NULL`, the device associated with - _command_buffer_ must report support for these properties. - -The following error condition is added: - -* {CL_INVALID_VALUE} if _command_buffer_ was created with the - {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and - _local_work_size_ is `NULL`, or if _properties_ includes the - {CL_MUTABLE_DISPATCH_ASSERTS_KHR} property with - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and - _local_work_size_ is `NULL`. - -[[mutable-commands]] -==== New Section in the OpenCL API specification 5.X.5 - Mutable Commands: - -A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ object -as it can be returned from any command recording entry-point in the -`cl_khr_command_buffer` family of extensions. The mutable-command handles -returned by {clCommandNDRangeKernelKHR} in particular are referred to as -_mutable-dispatch_ objects, and can be modified through the fields of -{cl_mutable_dispatch_config_khr_TYPE}. - -Mutable-command handles are updated between enqueues using entry-point -{clUpdateMutableCommandsKHR}. To enable performant usage, all aspects of -mutation are encapsulated inside a single -{cl_mutable_base_config_khr_TYPE} parameter. This means -that the runtime has access to all the information about how the command-buffer -will change, allowing the command-buffer to be rebuilt as efficiently as -possible. Any modifications to the arguments or execution info of a mutable-dispatch -handle using {cl_mutable_dispatch_arg_khr_TYPE} or -{cl_mutable_dispatch_exec_info_khr_TYPE} have no -affect on the original kernel object used when the command was recorded, and -only influence the {clCommandNDRangeKernelKHR} command associated with the -mutable-dispatch. - -[[mutable-dispatch-kernel-argument-safe-usage]] -[NOTE] -==== -The base `cl_khr_command_buffer` extension -<> that a command-buffer -does not update the reference count of objects set as arguments on kernels -recorded into the command-buffer. - -The implications for applications using {clUpdateMutableCommandsKHR} is -that it is safe to delete objects used as kernel command arguments, if all the -kernel commands using that object as an argument have had their arguments -replaced with a different object. -==== - -To facilitate performant usage for pipelined work flows, where applications -repeatedly call command-buffer update then enqueue, implementations may defer -some of the work to allow {clUpdateMutableCommandsKHR} to return immediately. -Deferring any recompilation until {clEnqueueCommandBufferKHR} avoids blocking -in host code and keeps device occupancy high. This is only possible with a -command-buffer created with the {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag, -as without this the enqueued command-buffer must complete before any modification -occurs. - -The function - -include::{generated}/api/protos/clUpdateMutableCommandsKHR.txt[] - -Modifies the configuration of mutable-command handles returned during -_command_buffer_ recording, updating the behavior of those commands in future -enqueues of _command_buffer_. Using this function when _command_buffer_ is in -the <> state and not created with the -{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag causes undefined behavior. - -[NOTE] -==== -Performant usage is to call {clUpdateMutableCommandsKHR} only when the desired -state of all commands is known, rather than iteratively updating each command -individually. -==== - -[NOTE] -==== -If the command buffer has been created with {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or -the updated ND-range command has been recorded with this flag, and the ND-range parameters are updated so -that the new number of work-groups exceeds the number when the ND-range command was recorded, the behavior -is undefined. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_mutable_config_ Is a pointer to a -{cl_mutable_base_config_khr_TYPE} structure defining -updates to make to mutable-commands. - -{clUpdateMutableCommandsKHR} returns {CL_SUCCESS} if all the mutable-command -objects were updated successfully. Otherwise, none of the updates to -mutable-command objects are preserved and one of the errors below is returned: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. - -* {CL_INVALID_OPERATION} if _command_buffer_ was not created with the - {CL_COMMAND_BUFFER_MUTABLE_KHR} flag. - -* {CL_INVALID_VALUE} if the _type_ member of _mutable_config_ is not - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR}. - -* {CL_INVALID_VALUE} if the _mutable_dispatch_list_ member of _mutable_config_ - is `NULL` and _num_mutable_dispatch_ > 0, or _mutable_dispatch_list_ is not - `NULL` and _num_mutable_dispatch_ is 0. - -* {CL_INVALID_VALUE} if the _next_ member of _mutable_config_ is not `NULL` and - any iteration of the structure pointer chain does not contain valid _type_ - and _next_ members. - -* {CL_INVALID_VALUE} if _mutable_config_ is `NULL`, or if both _next_ and - _mutable_dispatch_list_ members of _mutable_config_ are `NULL`. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -If the _mutable_dispatch_list_ member of _mutable_config_ is non-`NULL`, then -errors defined by {clEnqueueNDRangeKernel}, {clSetKernelExecInfo}, -{clSetKernelArg}, and {clSetKernelArgSVMPointer} are returned by -{clUpdateMutableCommandsKHR} if any of the array elements are set to an invalid -value. Additionally, the following errors are returned if any -{cl_mutable_dispatch_config_khr_TYPE} element of -the array violates the defined conditions: - -* {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable - command object, or created from _command_buffer_. - -* {CL_INVALID_VALUE} if _type_ is not - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. - -* {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or - _global_work_size_ result in a change to work-group uniformity. - -* {CL_INVALID_OPERATION} if the _work_dim_ is different from the _work_dim_ set - on _command_ recording. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} property - was not set on _command_ recording and _global_work_offset_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} property - was not set on _command_ recording and _global_work_size_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} property - was not set on _command_ recording and _local_work_size_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} property was - not set on _command_ recording and _num_args_ or _num_svm_args_ is non-zero. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} property was - not set on _command_ recording and _num_exec_infos_ is non-zero. - -* {CL_INVALID_VALUE} if _arg_list_ is `NULL` and _num_args_ > 0, or _arg_list_ - is not `NULL` and _num_args_ is 0. - -* {CL_INVALID_VALUE} if _arg_svm_list_ is `NULL` and _num_svm_args_ > 0, or - _arg_svm_list_ is not `NULL` and _num_svm_args_ is 0. - -* {CL_INVALID_VALUE} if _exec_info_list_ is `NULL` and _num_exec_infos_ > 0, or - _exec_info_list_ is not `NULL` and _num_exec_infos_ is 0. - -The function - -include::{generated}/api/protos/clGetMutableCommandInfoKHR.txt[] - -Queries information about the _command_ object. - -_command_ Specifies the mutable-command object being queried. - -_param_name_ Specifies the information to query. The list of supported -_param_name_ types and the information returned in _param_value_ by -{clGetMutableCommandInfoKHR} is described in the -<> table. - -_param_value_size_ Is used to specify the size in bytes of memory pointed to by -_param_value_. This size must be ≥ size of return type as described in the -<> table. - -_param_value_ Is a pointer to memory where the appropriate result being queried -is returned. If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ret_ Returns the actual size in bytes of data being queried -by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. - -[[mutable-command-object-queries]] -._Mutable Command Object Queries_ -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Mutable Command Info -| Return Type -| Description - -| {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR} -| {cl_command_queue_TYPE} -| Return the command-queue associated with _command_. If `NULL` was passed as - the queue when _command_ was recorded, then the queue associated with the - command-buffer that _command_ belongs to is returned. - -| {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR} -| {cl_command_buffer_khr_TYPE} -| Return the command-buffer associated with _command_. - -| {CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR} -| {cl_command_type_TYPE} -| Return the command-type associated with _command_. - - The list of supported event command types defined by {clGetEventInfo} is used - with the matching command. - -| {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR} -| {cl_ndrange_kernel_command_properties_khr_TYPE}[] -| Return the properties argument specified on _command_ recording with - {clCommandNDRangeKernelKHR}. - - If the properties argument specified on creation of _command_ was not - `NULL`, the implementation must return the values specified in the - properties argument in the same order and without including additional - properties. - - If the properties argument specified on creation of _command_ was `NULL`, - or _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - there are no properties to be returned. - -| {CL_MUTABLE_DISPATCH_KERNEL_KHR} -| {cl_kernel_TYPE} -| Return the kernel associated with _command_ when recorded with - {clCommandNDRangeKernelKHR}. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR} -| {cl_uint_TYPE} -| Return the number of work-item dimensions specified when _command_ was - created. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR} -| {size_t_TYPE}[] -| Return the global work-item offset set on _command_ creation, or from - the most recent update via {clUpdateMutableCommandsKHR} where this value - was modified. The output array contains _work_dim_ values, where _work_dim_ is - returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. If a global work-item - offset was not set, zero is returned for each element in the array. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR} -| {size_t_TYPE}[] -| Return the global work-item size set on _command_ creation, or from - the most recent update via {clUpdateMutableCommandsKHR} where this value - was modified. The output array contains _work_dim_ values, where _work_dim_ is - returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. - If a global work-item size was not set, zero is returned for each element in - the array. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR} -| {size_t_TYPE}[] -| Return the local work-item size set on _command_ creation, or from - the most recent update via {clUpdateMutableCommandsKHR} where this value - was modified. The output array contains _work_dim_ values, where _work_dim_ is - returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. If a local work-item - size was not set, zero is returned for each element in the array. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. -|==== - -{clGetMutableCommandInfoKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in the - <> table - and _param_value_ is not `NULL`. - -* {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable - command object. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -=== Sample Code - -Sample application updating the arguments to a mutable-dispatch between -command-buffer submissions. - -[source,opencl] ----- - #define CL_CHECK(ERROR) \ - if (ERROR) { \ - std::cerr << "OpenCL error: " << ERROR << "\n"; \ - return ERROR; \ - } - - int main() { - cl_platform_id platform; - CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); - cl_device_id device; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); - - cl_mutable_dispatch_fields_khr mutable_capabilities; - CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, - sizeof(mutable_capabilities), &mutable_capabilities, - nullptr)); - if (!(mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR)) { - std::cerr - << "Device does not support update arguments to a mutable-dispatch, " - "skipping example.\n"; - return 0; - } - - cl_int error; - cl_context context = - clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); - CL_CHECK(error); - - const char* code = R"OpenCLC( - kernel void vector_addition(global int* tile1, global int* tile2, - global int* res) { - size_t index = get_global_id(0); - res[index] = tile1[index] + tile2[index]; - } - )OpenCLC"; - const size_t length = std::strlen(code); - - cl_program program = - clCreateProgramWithSource(context, 1, &code, &length, &error); - CL_CHECK(error); - - CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); - - cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); - CL_CHECK(error); - - // Set the parameters of the frames - constexpr size_t iterations = 60; - constexpr size_t elem_size = sizeof(cl_int); - constexpr size_t frame_width = 32; - constexpr size_t frame_count = frame_width * frame_width; - constexpr size_t frame_size = frame_count * elem_size; - - cl_mem input_A_buffers[2] = {nullptr, nullptr}; - cl_mem input_B_buffers[2] = {nullptr, nullptr}; - cl_mem output_buffers[2] = {nullptr, nullptr}; - - // Create the buffer to swap between even and odd kernel iterations - for (size_t i = 0; i < 2; i++) { - input_A_buffers[i] = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - - input_B_buffers[i] = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - - output_buffers[i] = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - } - - cl_command_queue command_queue = - clCreateCommandQueue(context, device, 0, &error); - CL_CHECK(error); - - // Create command-buffer with mutable flag so we can update it - cl_command_buffer_properties_khr properties[3] = { - CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0}; - cl_command_buffer_khr command_buffer = - clCreateCommandBufferKHR(1, &command_queue, properties, &error); - CL_CHECK(error); - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_A_buffers[0])); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_B_buffers[0])); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_buffers[0])); - - // Instruct the nd-range command to allow for mutable kernel arguments - cl_ndrange_kernel_command_properties_khr mutable_properties[] = { - CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, - CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0}; - - // Create command handle for mutating nd-range command - cl_mutable_command_khr command_handle = nullptr; - - // Add the nd-range kernel command - error = clCommandNDRangeKernelKHR( - command_buffer, command_queue, mutable_properties, kernel, 1, nullptr, - &frame_count, nullptr, 0, nullptr, nullptr, &command_handle); - CL_CHECK(error); - - CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); - - // Prepare for random input generation - std::random_device random_device; - std::mt19937 random_engine{random_device()}; - std::uniform_int_distribution random_distribution{ - std::numeric_limits::min() / 2, - std::numeric_limits::max() / 2}; - - // Iterate over each frame - for (size_t i = 0; i < iterations; i++) { - // Set the buffers for the current frame - cl_mem input_A_buffer = input_A_buffers[i % 2]; - cl_mem input_B_buffer = input_B_buffers[i % 2]; - cl_mem output_buffer = output_buffers[i % 2]; - - // Generate input A data - std::vector input_a(frame_count); - std::generate(std::begin(input_a), std::end(input_a), - [&]() { return random_distribution(random_engine); }); - - // Write the generated data to the input A buffer - error = - clEnqueueWriteBuffer(command_queue, input_A_buffer, CL_FALSE, 0, - frame_size, input_a.data(), 0, nullptr, nullptr); - CL_CHECK(error); - - // Generate input B data - std::vector input_b(frame_count); - std::generate(std::begin(input_b), std::end(input_b), - [&]() { return random_distribution(random_engine); }); - - // Write the generated data to the input B buffer - error = - clEnqueueWriteBuffer(command_queue, input_B_buffer, CL_FALSE, 0, - frame_size, input_b.data(), 0, nullptr, nullptr); - CL_CHECK(error); - - // If not executing the first frame - if (i != 0) { - // Configure the mutable configuration to update the kernel arguments - cl_mutable_dispatch_arg_khr arg_0{0, sizeof(cl_mem), &input_A_buffer}; - cl_mutable_dispatch_arg_khr arg_1{1, sizeof(cl_mem), &input_B_buffer}; - cl_mutable_dispatch_arg_khr arg_2{2, sizeof(cl_mem), &output_buffer}; - cl_mutable_dispatch_arg_khr args[] = {arg_0, arg_1, arg_2}; - cl_mutable_dispatch_config_khr dispatch_config{ - CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, - nullptr, - command_handle, - 3 /* num_args */, - 0 /* num_svm_arg */, - 0 /* num_exec_infos */, - 0 /* work_dim - 0 means no change to dimensions */, - args /* arg_list */, - nullptr /* arg_svm_list - nullptr means no change*/, - nullptr /* exec_info_list */, - nullptr /* global_work_offset */, - nullptr /* global_work_size */, - nullptr /* local_work_size */}; - cl_mutable_base_config_khr mutable_config{ - CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, - &dispatch_config}; - - // Update the command buffer with the mutable configuration - error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); - CL_CHECK(error); - } - - // Enqueue the command buffer - error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr, - nullptr); - CL_CHECK(error); - - // Allocate memory for the output data - std::vector output(frame_count); - - // Read the output data from the output buffer - error = clEnqueueReadBuffer(command_queue, output_buffer, CL_TRUE, 0, - frame_size, output.data(), 0, nullptr, nullptr); - CL_CHECK(error); - - // Flush and execute the read buffer - error = clFinish(command_queue); - CL_CHECK(error); - - // Verify the results of the frame - for (size_t i = 0; i < frame_count; ++i) { - const cl_int result = input_a[i] + input_b[i]; - if (output[i] != result) { - std::cerr << "Error: Incorrect result at index " << i << " - Expected " - << output[i] << " was " << result << std::endl; - std::exit(1); - } - } - } - - std::cout << "Result verified\n"; - - CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); - for (size_t i = 0; i < 2; i++) { - CL_CHECK(clReleaseMemObject(input_A_buffers[i])); - CL_CHECK(clReleaseMemObject(input_B_buffers[i])); - CL_CHECK(clReleaseMemObject(output_buffers[i])); - } - CL_CHECK(clReleaseCommandQueue(command_queue)); - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); - CL_CHECK(clReleaseDevice(device)); - return 0; - } ----- - -=== Issues - -. Include simpler, more user friendly, entry-points for updating kernel - arguments? -+ --- -*RESOLVED*: Can be implemented in the ecosystem as a layer on top, if -that layer proves popular then can be introduced, possibly as another -extension on top. --- - -. Add a command-buffer clone entry-point for deep copying a command-buffer? - Arguments could then be updated and both command-buffers used. Useful for - techniques like double buffering. -+ --- -*Resolved*: In the use-case we're targeting a user would only have a handle to -the original command-buffer, but not the clone, which may limit the usefulness -of this capability. Additionally, an implementation could be complicated by -non-trivial deep copying of the underlying objects contained in the -command-buffer. As a result of this new entry-point being an additive change to -the specification it is omitted, and if its functionality has demand later, it -may be a introduced as a stand alone extension. --- diff --git a/ext/cl_khr_create_command_queue.asciidoc b/ext/cl_khr_create_command_queue.asciidoc deleted file mode 100644 index de1cf6e4f..000000000 --- a/ext/cl_khr_create_command_queue.asciidoc +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_create_command_queue]] -== Creating Command-Queues with Properties - -=== Overview - -The section describes the *cl_khr_create_command_queue* extension. - -This extension allows OpenCL 1.x devices to support an equivalent of the -{clCreateCommandQueueWithProperties} API that was added in OpenCL 2.0. -This allows OpenCL 1.x devices to support other optional extensions or -features that use the {clCreateCommandQueueWithProperties} API to specify -additional command-queue properties that cannot be specified using the -OpenCL 1.x {clCreateCommandQueue} API. - -No new command-queue properties are required by this extension. -Applications may use the existing {CL_DEVICE_QUEUE_PROPERTIES} query to -determine command-queue properties that are supported by the device. - -OpenCL 2.x devices may support this extension for compatibility. In -this scenario, the function added by this extension will have the same -capabilities as the core {clCreateCommandQueueWithProperties} API. -Applications that only target OpenCL 2.x devices should use the core -OpenCL 2.x {clCreateCommandQueueWithProperties} API instead of this -extension API. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== New API Functions - -[source,opencl] ----- -cl_command_queue clCreateCommandQueueWithPropertiesKHR( - cl_context context, - cl_device_id device, - const cl_queue_properties_khr *properties, - cl_int *errcode_ret); ----- - -=== New API Types - -[source,opencl] ----- -typedef cl_properties cl_queue_properties_khr; ----- - -=== Modifications to the OpenCL 1.2 Specification - -(Add to Table 5.2 for {CL_QUEUE_PROPERTIES} in Section 5.1) :: -+ --- - -[caption="Table 5.2 "] -.List of supported param_names by {clGetCommandQueueInfo} -[cols="2,1,3",options="header",] -|======================================================================= -| Queue Info | Return Type | Description - -| {CL_QUEUE_PROPERTIES} -| {cl_command_queue_properties_TYPE} -| Returns the currently specified properties for the command-queue. -These properties are specified by the _properties_ argument in -{clCreateCommandQueue}, or by the {CL_QUEUE_PROPERTIES} property value in -{clCreateCommandQueueWithPropertiesKHR}. - -|======================================================================= --- - -(Add a new Section 5.1.1, *Creating Command-Queues With Properties*) :: -+ --- - -The function - -include::{generated}/api/protos/clCreateCommandQueueWithPropertiesKHR.txt[] - -allows creation of a command-queue from an array of properties -for the specified device. - -_context_ must be a valid OpenCL context. - -_device_ must be a device or sub-device associated with _context_. It -can either be in the list of devices and sub-devices specified when -_context_ is created using {clCreateContext} or -be a root device with the same device type as specified when _context_ -is created using {clCreateContextFromType}. - -_properties_ specifies a list of properties for the command-queue and -their corresponding values. Each property name is immediately followed -by the corresponding desired value. The list is terminated with 0. The -list of supported properties is described in the table below. If a -supported property and its value is not specified in _properties_, its -default value will be used. _properties_ can be NULL in which case the -default values for supported command-queue properties will be used. - -[caption="Table X.Y "] -.List of supported param_names by {clCreateCommandQueueWithPropertiesKHR} -|======================================================================= -|*Queue Properties* |*Property Value* |*Description* - -|{CL_QUEUE_PROPERTIES} -|{cl_bitfield_TYPE} -| This is a bitfield and can be set to a combination of the following -values: + -{blank} -{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} - Determines whether the -commands queued in the command-queue are executed in-order or out-of-order. If -set, the commands in the command-queue are executed out-of-order. Otherwise, -commands are executed in-order. + -{blank} -{CL_QUEUE_PROFILING_ENABLE} - Enable or disable profiling of commands in -the command-queue. If set, the profiling of commands is enabled. Otherwise, -profiling of commands is disabled. + -{blank} -If {CL_QUEUE_PROPERTIES} is not specified an in-order command-queue that -does not support profiling of commands is created for the specified device. - -|======================================================================= - -_errcode_ret_ will return an appropriate error code. If _errcode_ret_ -is NULL, no error code is returned. - -{clCreateCommandQueueWithPropertiesKHR} returns a valid non-zero -command-queue and _errcode_ret_ is set to {CL_SUCCESS} if the -command-queue is created successfully. Otherwise, it returns a NULL -value with one of the following error values returned in _errcode_ret_: - -* {CL_INVALID_CONTEXT} if _context_ is not a valid context. - -* {CL_INVALID_DEVICE} if _device_ is not a valid device or is not associated -with _context_. - -* {CL_INVALID_VALUE} if values specified in _properties_ are not valid. - -* {CL_INVALID_QUEUE_PROPERTIES} if values specified in _properties_ are -valid but are not supported by the device. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required -by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required -by the OpenCL implementation on the host. --- diff --git a/ext/cl_khr_d3d10_sharing.asciidoc b/ext/cl_khr_d3d10_sharing.asciidoc deleted file mode 100644 index 25e89a890..000000000 --- a/ext/cl_khr_d3d10_sharing.asciidoc +++ /dev/null @@ -1,889 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_d3d10_sharing]] -== Creating OpenCL Memory Objects from Direct3D 10 Buffers and Textures - -[[cl_khr_d3d10_sharing-overview]] -=== Overview - -This section describes the *cl_khr_d3d10_sharing* extension. -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 10. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_d3d10_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices); - -cl_mem clCreateFromD3D10BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D10Buffer *resource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D10Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture2D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D10Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture3D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -[[cl_khr_d3d10_sharing-new-tokens]] -=== New Tokens - -Accepted as a Direct3D 10 device source in the _d3d_device_source_ parameter -of {clGetDeviceIDsFromD3D10KHR}: - ----- -CL_D3D10_DEVICE_KHR -CL_D3D10_DXGI_ADAPTER_KHR ----- - -Accepted as a set of Direct3D 10 devices in the _d3d_device_set_ parameter -of {clGetDeviceIDsFromD3D10KHR}: - ----- -CL_PREFERRED_DEVICES_FOR_D3D10_KHR -CL_ALL_DEVICES_FOR_D3D10_KHR ----- - -Accepted as a property name in the _properties_ parameter of -{clCreateContext} and {clCreateContextFromType}: - ----- -CL_CONTEXT_D3D10_DEVICE_KHR ----- - -Accepted as a property name in the _param_name_ parameter of -{clGetContextInfo}: - ----- -CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetMemObjectInfo}: - ----- -CL_MEM_D3D10_RESOURCE_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetImageInfo}: - ----- -CL_IMAGE_D3D10_SUBRESOURCE_KHR ----- - -Returned in the _param_value_ parameter of {clGetEventInfo} when -_param_name_ is {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR ----- - -Returned by {clCreateContext} and {clCreateContextFromType} if the Direct3D -10 device specified for interoperability is not compatible with the devices -against which the context is to be created: - ----- -CL_INVALID_D3D10_DEVICE_KHR ----- - -Returned by {clCreateFromD3D10BufferKHR} when _resource_ is not a Direct3D -10 buffer object, and by {clCreateFromD3D10Texture2DKHR} and -{clCreateFromD3D10Texture3DKHR} when _resource_ is not a Direct3D 10 texture -object: - ----- -CL_INVALID_D3D10_RESOURCE_KHR ----- - -Returned by {clEnqueueAcquireD3D10ObjectsKHR} when any of _mem_objects_ are -currently acquired by OpenCL: - ----- -CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR ----- - -Returned by {clEnqueueReleaseD3D10ObjectsKHR} when any of _mem_objects_ are -not currently acquired by OpenCL: - ----- -CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR ----- - -[[cl_khr_d3d10_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -{clCreateContext} with: - -"_properties_ specifies a list of context property names and their -corresponding values. -Each property is followed immediately by the corresponding desired value. -The list is terminated with zero. -If a property is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values." - -Add the following to _table 4.5_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_properties enum* -| *Property value* -| *Description* - -| {CL_CONTEXT_D3D10_DEVICE_KHR} -| ID3D10Device * -| Specifies the ID3D10Device * to use for Direct3D 10 interoperability. - - The default value is `NULL`. - -|==== - -Add to the list of errors for {clCreateContext}: - - * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property - {CL_CONTEXT_D3D10_DEVICE_KHR} is non-`NULL` and does not specify a valid - Direct3D 10 device with which the _cl_device_ids_ against which this - context is to be created may interoperate. - * {CL_INVALID_OPERATION} if Direct3D 10 interoperability is specified by - setting {CL_INVALID_D3D10_DEVICE_KHR} to a non-`NULL` value, and - interoperability with another graphics API is also specified. - -Add to the list of errors for {clCreateContextFromType} the same new errors -described above for {clCreateContext}. - -Add the following row to _table 4.6_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_info* -| *Return Type* -| *Information returned in param_value* - -| {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} -| {cl_bool_TYPE} -| Returns {CL_TRUE} if Direct3D 10 resources created as shared by setting - _MiscFlags_ to include D3D10_RESOURCE_MISC_SHARED will perform faster when - shared with OpenCL, compared with resources which have not set this flag. - Otherwise returns {CL_FALSE}. -|==== - -[[cl_khr_d3d10_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add to the list of errors for {clGetMemObjectInfo}: - - * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is - {CL_MEM_D3D10_RESOURCE_KHR} and _memobj_ was not created by the function - {clCreateFromD3D10BufferKHR}, {clCreateFromD3D10Texture2DKHR}, or - {clCreateFromD3D10Texture3DKHR}. - -Extend _table 5.12_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_mem_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_MEM_D3D10_RESOURCE_KHR} -| ID3D10Resource * -| If _memobj_ was created using {clCreateFromD3D10BufferKHR}, - {clCreateFromD3D10Texture2DKHR}, or {clCreateFromD3D10Texture3DKHR}, - returns the _resource_ argument specified when _memobj_ was created. - -|==== - -Add to the list of errors for {clGetImageInfo}: - - * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is - {CL_IMAGE_D3D10_SUBRESOURCE_KHR} and _image_ was not created by the function - {clCreateFromD3D10Texture2DKHR}, or {clCreateFromD3D10Texture3DKHR}. - -Extend _table 5.9_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_image_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_IMAGE_D3D10_SUBRESOURCE_KHR} -| UINT -| If _image_ was created using {clCreateFromD3D10Texture2DKHR}, or - {clCreateFromD3D10Texture3DKHR}, returns the _subresource_ argument - specified when _image_ was created. -|==== - -Add to _table 5.22_ in the *Info returned in * column for -_cl_event_info_ = {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR ----- - -[[cl_khr_d3d10_sharing-sharing-memory-objects-with-direct3d-10-resources]] -=== Sharing Memory Objects with Direct3D 10 Resources - -This section discusses OpenCL functions that allow applications to use -Direct3D 10 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 10. -The OpenCL API may be used to execute kernels that read and/or write memory -objects that are also Direct3D 10 resources. -An OpenCL image object may be created from a Direct3D 10 texture resource. -An OpenCL buffer object may be created from a Direct3D 10 buffer resource. -OpenCL memory objects may be created from Direct3D 10 objects if and only if -the OpenCL context has been created from a Direct3D 10 device. - -[[cl_khr_d3d10_sharing-querying-opencl-devices-corresponding-to-direct3d-10-devices]] -==== Querying OpenCL Devices Corresponding to Direct3D 10 Devices - -The OpenCL devices corresponding to a Direct3D 10 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 10 device will be a subset of -the OpenCL devices corresponding to the DXGI adapter against which the -Direct3D 10 device was created. - -The OpenCL devices corresponding to a Direct3D 10 device or a DXGI device -may be queried using the function - -include::{generated}/api/protos/clGetDeviceIDsFromD3D10KHR.txt[] - -_platform_ refers to the platform ID returned by {clGetPlatformIDs}. - -_d3d_device_source_ specifies the type of _d3d_object_, and must be one of -the values shown in the table below. - -_d3d_object_ specifies the object whose corresponding OpenCL devices are -being queried. -The type of _d3d_object_ must be as specified in the table below. - -_d3d_device_set_ specifies the set of devices to return, and must be one of -the values shown in the table below. - -_num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to -_devices_. -If _devices_ is not `NULL` then _num_entries_ must be greater than zero. - -_devices_ returns a list of OpenCL devices found. -The {cl_device_id_TYPE} values returned in _devices_ can be used to identify a -specific OpenCL device. -If _devices_ is `NULL`, this argument is ignored. -The number of OpenCL devices returned is the minimum of the value specified -by _num_entries_ and the number of OpenCL devices corresponding to -_d3d_object_. - -_num_devices_ returns the number of OpenCL devices available that correspond -to _d3d_object_. -If _num_devices_ is `NULL`, this argument is ignored. - -{clGetDeviceIDsFromD3D10KHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise it may return - - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, - _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero - and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are - `NULL`. - * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to _d3d_object_ - were found. - -[[cl_khr_d3d10_sharing-clGetDeviceIDsFromD3D10KHR-object-type]] -.Direct3D 10 object types that may be used by {clGetDeviceIDsFromD3D10KHR} -[cols=",",options="header",] -|==== -| {cl_d3d10_device_source_khr_TYPE} -| Type of _d3d_object_ - -| {CL_D3D10_DEVICE_KHR} -| ID3D10Device * - -| {CL_D3D10_DXGI_ADAPTER_KHR} -| IDXGIAdapter * - -|==== - -[[cl_khr_d3d10_sharing-clGetDeviceIDsFromD3D10KHR-devices]] -.Sets of devices queriable using {clGetDeviceIDsFromD3D10KHR} -[cols=",",options="header",] -|==== -| {cl_d3d10_device_set_khr_TYPE} -| Devices returned in _devices_ - -| {CL_PREFERRED_DEVICES_FOR_D3D10_KHR} -| The preferred OpenCL devices associated with the specified Direct3D - object. - -| {CL_ALL_DEVICES_FOR_D3D10_KHR} -| All OpenCL devices which may interoperate with the specified Direct3D - object. - Performance of sharing data on these devices may be considerably less than - on the preferred devices. - -|==== - -[[cl_khr_d3d10_sharing-lifetime-of-shared-objects]] -==== Lifetime of Shared Objects - -An OpenCL memory object created from a Direct3D 10 resource remains valid as -long as the corresponding Direct3D 10 resource has not been deleted. -If the Direct3D 10 resource is deleted through the Direct3D 10 API, -subsequent use of the OpenCL memory object will result in undefined -behavior, including but not limited to possible OpenCL errors, data -corruption, and program termination. - -The successful creation of a {cl_context_TYPE} against a Direct3D 10 device -specified via the context create parameter {CL_CONTEXT_D3D10_DEVICE_KHR} will -increment the internal Direct3D reference count on the specified Direct3D 10 -device. -The internal Direct3D reference count on that Direct3D 10 device will be -decremented when the OpenCL reference count on the returned OpenCL context -drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 10 device from which the OpenCL context was -created. -If the Direct3D 10 device is deleted through the Direct3D 10 API, subsequent -use of the OpenCL context will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -[[cl_khr_d3d10_sharing-sharing-direct3d-10-buffer-resources-as-opencl-buffer-objects]] -==== Sharing Direct3D 10 Buffer Resources as OpenCL Buffer Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D10BufferKHR.txt[] - -creates an OpenCL buffer object from a Direct3D 10 buffer. - -_context_ is a valid OpenCL context created from a Direct3D 10 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 10 buffer to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D10BufferKHR} returns a valid non-zero OpenCL buffer object -and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. - * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 buffer - resource, if _resource_ was created with the D3D10_USAGE flag - D3D10_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already been - created using {clCreateFromD3D10BufferKHR}, or if _context_ was not - created against the same Direct3D 10 device from which _resource_ was - created. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The size of the returned OpenCL buffer object is the same as the size of -_resource_. -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d10_sharing-sharing-direct3d-10-texture-and-resources-as-opencl-image-objects]] -==== Sharing Direct3D 10 Texture and Resources as OpenCL Image Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D10Texture2DKHR.txt[] - -creates an OpenCL 2D image object from a subresource of a Direct3D 10 2D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 10 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 10 2D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D10Texture2DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 texture - resource, if _resource_ was created with the D3D10_USAGE flag - D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D10Texture2DKHR}, or if _context_ was not - created against the same Direct3D 10 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 10 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width and height of the returned OpenCL 2D image object are determined -by the width and height of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -The function - -include::{generated}/api/protos/clCreateFromD3D10Texture3DKHR.txt[] - -creates an OpenCL 3D image object from a subresource of a Direct3D 10 3D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 10 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to table 5.3 for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 10 3D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D10Texture3DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 texture - resource, if _resource_ was created with the D3D10_USAGE flag - D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D10Texture3DKHR}, or if _context_ was not - created against the same Direct3D 10 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 10 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width, height and depth of the returned OpenCL 3D image object are -determined by the width, height and depth of subresource _subresource_ of -_resource_. -The channel type and order of the returned OpenCL 3D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d10_sharing-mapping-of-image-formats]] -._Direct3D 10 formats and corresponding OpenCL image formats_ -[cols=",",options="header",] -|==== -| *DXGI format* -| *CL image format* - -*(channel order, channel data type)* - -| DXGI_FORMAT_R32G32B32A32_FLOAT | `CL_RGBA`, `CL_FLOAT` -| DXGI_FORMAT_R32G32B32A32_UINT | `CL_RGBA`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32B32A32_SINT | `CL_RGBA`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16B16A16_FLOAT | `CL_RGBA`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16B16A16_UNORM | `CL_RGBA`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_UINT | `CL_RGBA`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16B16A16_SNORM | `CL_RGBA`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_SINT | `CL_RGBA`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_B8G8R8A8_UNORM | `CL_BGRA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UNORM | `CL_RGBA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UINT | `CL_RGBA`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8B8A8_SNORM | `CL_RGBA`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_SINT | `CL_RGBA`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32G32_FLOAT | `CL_RG`, `CL_FLOAT` -| DXGI_FORMAT_R32G32_UINT | `CL_RG`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32_SINT | `CL_RG`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16_FLOAT | `CL_RG`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16_UNORM | `CL_RG`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16_UINT | `CL_RG`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16_SNORM | `CL_RG`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16_SINT | `CL_RG`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8G8_UNORM | `CL_RG`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8_UINT | `CL_RG`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8_SNORM | `CL_RG`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8_SINT | `CL_RG`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32_FLOAT | `CL_R`, `CL_FLOAT` -| DXGI_FORMAT_R32_UINT | `CL_R`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32_SINT | `CL_R`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16_FLOAT | `CL_R`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16_UNORM | `CL_R`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16_UINT | `CL_R`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16_SNORM | `CL_R`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16_SINT | `CL_R`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8_UNORM | `CL_R`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8_UINT | `CL_R`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8_SNORM | `CL_R`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8_SINT | `CL_R`, `CL_SIGNED_INT8` -|==== - -[[cl_khr_d3d10_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-10-resources]] -==== Querying Direct3D properties of memory objects created from Direct3D 10 resources - -Properties of Direct3D 10 objects may be queried using {clGetMemObjectInfo} -and {clGetImageInfo} with _param_name_ {CL_MEM_D3D10_RESOURCE_KHR} and -{CL_IMAGE_D3D10_SUBRESOURCE_KHR} respectively as described in _sections 5.4.3_ -and _5.3.6_. - -[[cl_khr_d3d10_sharing-sharing-memory-objects-created-from-direct3d-10-resources-between-direct3d-10-and-opencl-contexts]] -==== Sharing memory objects created from Direct3D 10 resources between Direct3D 10 and OpenCL contexts - -The function - -include::{generated}/api/protos/clEnqueueAcquireD3D10ObjectsKHR.txt[] - -is used to acquire OpenCL memory objects that have been created from -Direct3D 10 resources. -The Direct3D 10 objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from Direct3D 10 resources must be acquired -before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 10 resource is used while -it is not currently acquired by OpenCL, the behavior is undefined. -Implementations may fail the execution of commands attempting to use that -OpenCL memory object and set their associated event's execution status to -{CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR}. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueAcquireD3D10ObjectsKHR} provides the synchronization -guarantee that any Direct3D 10 calls involving the interop device(s) used in -the OpenCL context made before {clEnqueueAcquireD3D10ObjectsKHR} is called -will complete executing before _event_ reports completion and before the -execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 10 calls involving the interop device(s) used -in the OpenCL context made before {clEnqueueAcquireD3D10ObjectsKHR} is -called have completed before calling {clEnqueueAcquireD3D10ObjectsKHR}. - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 10 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueAcquireD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 10 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from an Direct3D 10 context. - * {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in - _mem_objects_ have previously been acquired using - {clEnqueueAcquireD3D10ObjectsKHR} but have not been released using - {clEnqueueReleaseD3D10ObjectsKHR}. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueReleaseD3D10ObjectsKHR.txt[] - -is used to release OpenCL memory objects that have been created from -Direct3D 10 resources. -The Direct3D 10 objects are released by the OpenCL context associated with -_command_queue_. - -OpenCL memory objects created from Direct3D 10 resources which have been -acquired by OpenCL must be released by OpenCL before they may be accessed by -Direct3D 10. -Accessing a Direct3D 10 resource while its corresponding OpenCL memory -object is acquired is in error and will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueReleaseD3D10ObjectsKHR} provides the synchronization -guarantee that any calls to Direct3D 10 calls involving the interop -device(s) used in the OpenCL context made after the call to -{clEnqueueReleaseD3D10ObjectsKHR} will not start executing until after all -events in _event_wait_list_ are complete and all work already submitted to -_command_queue_ completes execution. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 10 calls involving the interop device(s) used -in the OpenCL context made after {clEnqueueReleaseD3D10ObjectsKHR} will not -start executing until after event returned by -{clEnqueueReleaseD3D10ObjectsKHR} reports completion. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 10 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueReleaseD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 10 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a Direct3D 10 device. - * {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ - have not previously been acquired using - {clEnqueueAcquireD3D10ObjectsKHR}, or have been released using - {clEnqueueReleaseD3D10ObjectsKHR} since the last time that they were - acquired. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - __num_events_in_wait_list__> is 0, or if event objects in - _event_wait_list_ are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_d3d10_sharing-event-command-types]] -==== Event Command Types for Sharing memory objects that map to Direct3D 10 objects - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -Direct3D 10 objects: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireD3D10ObjectsKHR} -| {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR_anchor} - -| {clEnqueueReleaseD3D10ObjectsKHR} -| {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR_anchor} - -|==== - -[[cl_khr_d3d10_sharing-issues]] -=== Issues - - . Should this extension be KHR or EXT? -+ --- -PROPOSED: KHR. -If this extension is to be approved by Khronos then it should be KHR, -otherwise EXT. -Not all platforms can support this extension, but that is also true of -OpenGL interop. - -RESOLVED: KHR. --- - - . Requiring SharedHandle on ID3D10Resource -+ --- -Requiring this can largely simplify things at the DDI level and make some -implementations faster. -However, the DirectX spec only defines the shared handle for a subset of the -resources we would like to support: - ----- -D3D10_RESOURCE_MISC_SHARED - Enables the sharing of resource data between -two or more Direct3D devices. -The only resources that can be shared are 2D non-mipmapped textures. ----- - -PROPOSED A: Add wording to the spec about some implementations needing the -resource setup as shared: - -"`Some implementations may require the resource to be shared on the D3D10 -side of the API`" - -If we do that, do we need another enum to describe this failure case? - -PROPOSED B: Require that all implementations support both shared and -non-shared resources. -The restrictions prohibiting multisample textures and the flag -D3D10_USAGE_IMMUTABLE guarantee software access to all shareable resources. - -RESOLVED: Require that implementations support both -D3D10_RESOURCE_MISC_SHARED being set and not set. -Add the query for {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} to determine -on a per-context basis which method will be faster. --- - - . Texture1D support -+ --- -There is not a matching CL type, so do we want to support this and map to -buffer or Texture2D? - -RESOLVED: We will not add support for ID3D10Texture1D objects unless a -corresponding OpenCL 1D Image type is created. --- - - . CL/D3D10 queries -+ --- -The GL interop has {clGetGLObjectInfo} and {clGetGLTextureInfo}. -It is unclear if these are needed on the D3D10 interop side since the D3D10 -spec makes these queries trivial on the D3D10 object itself. -Also, not all of the semantics of the GL call map across. - -PROPOSED: Add the {clGetMemObjectInfo} and {clGetImageInfo} parameter names -{CL_MEM_D3D10_RESOURCE_KHR} and {CL_IMAGE_D3D10_SUBRESOURCE_KHR} to query the -D3D10 resource from which a {cl_mem_TYPE} was created. -From this data, any D3D10 side information may be queried using the D3D10 -API. - -RESOLVED: We will use {clGetMemObjectInfo} and {clGetImageInfo} to access -this information. --- diff --git a/ext/cl_khr_d3d11_sharing.asciidoc b/ext/cl_khr_d3d11_sharing.asciidoc deleted file mode 100644 index 6d14a3f50..000000000 --- a/ext/cl_khr_d3d11_sharing.asciidoc +++ /dev/null @@ -1,813 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_d3d11_sharing]] -== Creating OpenCL Memory Objects from Direct3D 11 Buffers and Textures - -[[cl_khr_d3d11_sharing-overview]] -=== Overview - -This section describes the *cl_khr_d3d11_sharing* extension. -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 11. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_d3d11_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, - cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices); - -cl_mem clCreateFromD3D11BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D11Buffer *resource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D11Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture2D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D11Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture3D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -[[cl_khr_d3d11_sharing-new-tokens]] -=== New Tokens - -Accepted as a Direct3D 11 device source in the _d3d_device_source_ parameter -of {clGetDeviceIDsFromD3D11KHR}: - ----- -CL_D3D11_DEVICE_KHR -CL_D3D11_DXGI_ADAPTER_KHR ----- - -Accepted as a set of Direct3D 11 devices in the _d3d_device_set_parameter of -{clGetDeviceIDsFromD3D11KHR}: - ----- -CL_PREFERRED_DEVICES_FOR_D3D11_KHR -CL_ALL_DEVICES_FOR_D3D11_KHR ----- - -Accepted as a property name in the _properties_ parameter of -{clCreateContext} and {clCreateContextFromType}: - ----- -CL_CONTEXT_D3D11_DEVICE_KHR ----- - -Accepted as a property name in the _param_name_ parameter of -{clGetContextInfo}: - ----- -CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetMemObjectInfo}: - ----- -CL_MEM_D3D11_RESOURCE_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetImageInfo}: - ----- -CL_IMAGE_D3D11_SUBRESOURCE_KHR ----- - -Returned in the _param_value_ parameter of {clGetEventInfo} when -_param_name_ is {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR ----- - -Returned by {clCreateContext} and {clCreateContextFromType} if the Direct3D -11 device specified for interoperability is not compatible with the devices -against which the context is to be created: - ----- -CL_INVALID_D3D11_DEVICE_KHR ----- - -Returned by {clCreateFromD3D11BufferKHR} when _resource_ is not a Direct3D -11 buffer object, and by {clCreateFromD3D11Texture2DKHR} and -{clCreateFromD3D11Texture3DKHR} when _resource_ is not a Direct3D 11 texture -object. - ----- -CL_INVALID_D3D11_RESOURCE_KHR ----- - -Returned by {clEnqueueAcquireD3D11ObjectsKHR} when any of _mem_objects_ are -currently acquired by OpenCL: - ----- -CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR ----- - -Returned by {clEnqueueReleaseD3D11ObjectsKHR} when any of _mem_objects_ are -not currently acquired by OpenCL: - ----- -CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR ----- - -[[cl_khr_d3d11_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -{clCreateContext} with: - -"_properties_ specifies a list of context property names and their -corresponding values. -Each property is followed immediately by the corresponding desired value. -The list is terminated with zero. -If a property is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values." - -Add the following to _table 4.5_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_properties enum* -| *Property value* -| *Description* - -| {CL_CONTEXT_D3D11_DEVICE_KHR} -| ID3D11Device * -| Specifies the ID3D11Device * to use for Direct3D 11 interoperability. - - The default value is `NULL`. - -|==== - -Add to the list of errors for {clCreateContext}: - - * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property - {CL_CONTEXT_D3D11_DEVICE_KHR} is non-`NULL` and does not specify a valid - Direct3D 11 device with which the _cl_device_ids_ against which this - context is to be created may interoperate. - * {CL_INVALID_OPERATION} if Direct3D 11 interoperability is specified by - setting {CL_INVALID_D3D11_DEVICE_KHR} to a non-`NULL` value, and - interoperability with another graphics API is also specified. - -Add to the list of errors for {clCreateContextFromType} the same new errors -described above for {clCreateContext}. - -Add the following row to _table 4.6_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_info* -| *Return Type* -| *Information returned in param_value* - -| {CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR} -| {cl_bool_TYPE} -| Returns {CL_TRUE} if Direct3D 11 resources created as shared by setting - _MiscFlags_ to include D3D11_RESOURCE_MISC_SHARED will perform faster when - shared with OpenCL, compared with resources which have not set this flag. - Otherwise returns {CL_FALSE}. -|==== - -[[cl_khr_d3d11_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add to the list of errors for {clGetMemObjectInfo}: - - * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is - {CL_MEM_D3D11_RESOURCE_KHR} and _memobj_ was not created by the function - {clCreateFromD3D11BufferKHR}, {clCreateFromD3D11Texture2DKHR}, or - {clCreateFromD3D11Texture3DKHR}. - -Extend _table 5.12_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_mem_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_MEM_D3D11_RESOURCE_KHR} -| ID3D11Resource * -| If _memobj_ was created using {clCreateFromD3D11BufferKHR}, - {clCreateFromD3D11Texture2DKHR}, or {clCreateFromD3D11Texture3DKHR}, - returns the _resource_ argument specified when _memobj_ was created. - -|==== - -Add to the list of errors for {clGetImageInfo}: - - * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is - {CL_IMAGE_D3D11_SUBRESOURCE_KHR} and _image_ was not created by the function - {clCreateFromD3D11Texture2DKHR}, or {clCreateFromD3D11Texture3DKHR}. - -Extend _table 5.9_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_image_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_IMAGE_D3D11_SUBRESOURCE_KHR} -| UINT -| If _image_ was created using {clCreateFromD3D11Texture2DKHR}, or - {clCreateFromD3D11Texture3DKHR}, returns the _subresource_ argument - specified when _image_ was created. -|==== - -Add to _table 5.22_ in the *Info returned in param_value* column for -_cl_event_info_ = {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR ----- - -[[cl_khr_d3d11_sharing-sharing-memory-objects-with-direct3d-11-resources]] -=== Sharing Memory Objects with Direct3D 11 Resources - -This section discusses OpenCL functions that allow applications to use -Direct3D 11 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 11. -The OpenCL API may be used to execute kernels that read and/or write memory -objects that are also Direct3D 11 resources. -An OpenCL image object may be created from a Direct3D 11 texture resource. -An OpenCL buffer object may be created from a Direct3D 11 buffer resource. -OpenCL memory objects may be created from Direct3D 11 objects if and only if -the OpenCL context has been created from a Direct3D 11 device. - -[[cl_khr_d3d11_sharing-querying-opencl-devices-corresponding-to-direct3d-11-devices]] -==== Querying OpenCL Devices Corresponding to Direct3D 11 Devices - -The OpenCL devices corresponding to a Direct3D 11 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 11 device will be a subset of -the OpenCL devices corresponding to the DXGI adapter against which the -Direct3D 11 device was created. - -The OpenCL devices corresponding to a Direct3D 11 device or a DXGI device -may be queried using the function - -include::{generated}/api/protos/clGetDeviceIDsFromD3D11KHR.txt[] - -_platform_ refers to the platform ID returned by {clGetPlatformIDs}. - -_d3d_device_source_ specifies the type of _d3d_object_, and must be one of -the values shown in the table below. - -_d3d_object_ specifies the object whose corresponding OpenCL devices are -being queried. -The type of _d3d_object_ must be as specified in the table below. - -_d3d_device_set_ specifies the set of devices to return, and must be one of -the values shown in the table below. - -_num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to -_devices_. -If _devices_ is not `NULL` then _num_entries_ must be greater than zero. - -_devices_ returns a list of OpenCL devices found. -The {cl_device_id_TYPE} values returned in _devices_ can be used to identify a -specific OpenCL device. -If _devices_ is `NULL`, this argument is ignored. -The number of OpenCL devices returned is the minimum of the value specified -by _num_entries_ and the number of OpenCL devices corresponding to -_d3d_object_. - -_num_devices_ returns the number of OpenCL devices available that correspond -to _d3d_object_. -If _num_devices_ is `NULL`, this argument is ignored. - -{clGetDeviceIDsFromD3D11KHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise it may return - - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, - _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero - and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are - `NULL`. - * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to _d3d_object_ - were found. - -[[cl_khr_d3d11_sharing-clGetDeviceIDsFromD3D11KHR-object-type]] -._Direct3D 11 object types that may be used by_ {clGetDeviceIDsFromD3D11KHR} -[cols=",",options="header",] -|==== -| {cl_d3d11_device_source_khr_TYPE} -| Type of _d3d_object_ - -| {CL_D3D11_DEVICE_KHR} -| ID3D11Device * - -| {CL_D3D11_DXGI_ADAPTER_KHR} -| IDXGIAdapter * - -|==== - -[[cl_khr_d3d11_sharing-clGetDeviceIDsFromD3D11KHR-devices]] -._Sets of devices queriable using_ {clGetDeviceIDsFromD3D11KHR} -[cols=",",options="header",] -|==== -| {cl_d3d11_device_set_khr_TYPE} -| Devices returned in _devices_ - -| {CL_PREFERRED_DEVICES_FOR_D3D11_KHR} -| The preferred OpenCL devices associated with the specified Direct3D - object. - -| {CL_ALL_DEVICES_FOR_D3D11_KHR} -| All OpenCL devices which may interoperate with the specified Direct3D - object. - Performance of sharing data on these devices may be considerably less than - on the preferred devices. - -|==== - -[[cl_khr_d3d11_sharing-lifetime-of-shared-objects]] -==== Lifetime of Shared Objects - -An OpenCL memory object created from a Direct3D 11 resource remains valid as -long as the corresponding Direct3D 11 resource has not been deleted. -If the Direct3D 11 resource is deleted through the Direct3D 11 API, -subsequent use of the OpenCL memory object will result in undefined -behavior, including but not limited to possible OpenCL errors, data -corruption, and program termination. - -The successful creation of a {cl_context_TYPE} against a Direct3D 11 device -specified via the context create parameter {CL_CONTEXT_D3D11_DEVICE_KHR} will -increment the internal Direct3D reference count on the specified Direct3D 11 -device. -The internal Direct3D reference count on that Direct3D 11 device will be -decremented when the OpenCL reference count on the returned OpenCL context -drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 11 device from which the OpenCL context was -created. -If the Direct3D 11 device is deleted through the Direct3D 11 API, subsequent -use of the OpenCL context will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -[[cl_khr_d3d11_sharing-sharing-direct3d-11-buffer-resources-as-opencl-buffer-objects]] -==== Sharing Direct3D 11 Buffer Resources as OpenCL Buffer Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D11BufferKHR.txt[] - -creates an OpenCL buffer object from a Direct3D 11 buffer. - -_context_ is a valid OpenCL context created from a Direct3D 11 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to table 5.3 for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 11 buffer to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D11BufferKHR} returns a valid non-zero OpenCL buffer object -and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. - * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 buffer - resource, if _resource_ was created with the D3D11_USAGE flag - D3D11_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already been - created using {clCreateFromD3D11BufferKHR}, or if _context_ was not - created against the same Direct3D 11 device from which _resource_ was - created. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The size of the returned OpenCL buffer object is the same as the size of -_resource_. -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d11_sharing-sharing-direct3d-11-texture-and-resources-as-opencl-image-objects]] -==== Sharing Direct3D 11 Texture and Resources as OpenCL Image Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D11Texture2DKHR.txt[] - -creates an OpenCL 2D image object from a subresource of a Direct3D 11 2D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 11 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 11 2D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D11Texture2DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 texture - resource, if _resource_ was created with the D3D11_USAGE flag - D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D11Texture2DKHR}, or if _context_ was not - created against the same Direct3D 11 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 11 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width and height of the returned OpenCL 2D image object are determined -by the width and height of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -The function - -include::{generated}/api/protos/clCreateFromD3D11Texture3DKHR.txt[] - -creates an OpenCL 3D image object from a subresource of a Direct3D 11 3D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 11 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 11 3D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D11Texture3DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 texture - resource, if _resource_ was created with the D3D11_USAGE flag - D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D11Texture3DKHR}, or if _context_ was not - created against the same Direct3D 11 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 11 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width, height and depth of the returned OpenCL 3D image object are -determined by the width, height and depth of subresource _subresource_ of -_resource_. -The channel type and order of the returned OpenCL 3D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d11_sharing-mapping-of-image-formats]] -._Direct3D 11 formats and corresponding OpenCL image formats_ -[cols=",",options="header",] -|==== -| *DXGI format* -| *CL image format* - -*(channel order, channel data type)* - -| DXGI_FORMAT_R32G32B32A32_FLOAT | `CL_RGBA`, `CL_FLOAT` -| DXGI_FORMAT_R32G32B32A32_UINT | `CL_RGBA`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32B32A32_SINT | `CL_RGBA`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16B16A16_FLOAT | `CL_RGBA`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16B16A16_UNORM | `CL_RGBA`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_UINT | `CL_RGBA`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16B16A16_SNORM | `CL_RGBA`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_SINT | `CL_RGBA`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_B8G8R8A8_UNORM | `CL_BGRA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UNORM | `CL_RGBA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UINT | `CL_RGBA`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8B8A8_SNORM | `CL_RGBA`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_SINT | `CL_RGBA`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32G32_FLOAT | `CL_RG`, `CL_FLOAT` -| DXGI_FORMAT_R32G32_UINT | `CL_RG`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32_SINT | `CL_RG`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16_FLOAT | `CL_RG`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16_UNORM | `CL_RG`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16_UINT | `CL_RG`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16_SNORM | `CL_RG`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16_SINT | `CL_RG`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8G8_UNORM | `CL_RG`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8_UINT | `CL_RG`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8_SNORM | `CL_RG`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8_SINT | `CL_RG`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32_FLOAT | `CL_R`, `CL_FLOAT` -| DXGI_FORMAT_R32_UINT | `CL_R`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32_SINT | `CL_R`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16_FLOAT | `CL_R`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16_UNORM | `CL_R`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16_UINT | `CL_R`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16_SNORM | `CL_R`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16_SINT | `CL_R`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8_UNORM | `CL_R`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8_UINT | `CL_R`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8_SNORM | `CL_R`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8_SINT | `CL_R`, `CL_SIGNED_INT8` -|==== - -[[cl_khr_d3d11_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-11-resources]] -==== Querying Direct3D properties of memory objects created from Direct3D 11 resources - -Properties of Direct3D 11 objects may be queried using {clGetMemObjectInfo} -and {clGetImageInfo} with _param_name_ {CL_MEM_D3D11_RESOURCE_KHR} and -{CL_IMAGE_D3D11_SUBRESOURCE_KHR} respectively as described in _sections 5.4.3_ -and _5.3.6_. - -[[cl_khr_d3d11_sharing-sharing-memory-objects-created-from-direct3d-11-resources-between-direct3d-11-and-opencl-contexts]] -==== Sharing memory objects created from Direct3D 11 resources between Direct3D 11 and OpenCL contexts - -The function - -include::{generated}/api/protos/clEnqueueAcquireD3D11ObjectsKHR.txt[] - -is used to acquire OpenCL memory objects that have been created from -Direct3D 11 resources. -The Direct3D 11 objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from Direct3D 11 resources must be acquired -before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 11 resource is used while -it is not currently acquired by OpenCL, the behavior is undefined. -Implementations may fail the execution of commands attempting to use that -OpenCL memory object and set their associated event's execution status to -{CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR}. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueAcquireD3D11ObjectsKHR} provides the synchronization -guarantee that any Direct3D 11 calls involving the interop device(s) used in -the OpenCL context made before {clEnqueueAcquireD3D11ObjectsKHR} is called -will complete executing before _event_ reports completion and before the -execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 11 calls involving the interop device(s) used -in the OpenCL context made before {clEnqueueAcquireD3D11ObjectsKHR} is -called have completed before calling {clEnqueueAcquireD3D11ObjectsKHR}. - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 11 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueAcquireD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 11 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from an Direct3D 11 context. - * {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in - _mem_objects_ have previously been acquired using - {clEnqueueAcquireD3D11ObjectsKHR} but have not been released using - {clEnqueueReleaseD3D11ObjectsKHR}. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueReleaseD3D11ObjectsKHR.txt[] - -is used to release OpenCL memory objects that have been created from -Direct3D 11 resources. -The Direct3D 11 objects are released by the OpenCL context associated with -_command_queue_. - -OpenCL memory objects created from Direct3D 11 resources which have been -acquired by OpenCL must be released by OpenCL before they may be accessed by -Direct3D 11. -Accessing a Direct3D 11 resource while its corresponding OpenCL memory -object is acquired is in error and will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueReleaseD3D11ObjectsKHR} provides the synchronization -guarantee that any calls to Direct3D 11 calls involving the interop -device(s) used in the OpenCL context made after the call to -{clEnqueueReleaseD3D11ObjectsKHR} will not start executing until after all -events in _event_wait_list_ are complete and all work already submitted to -_command_queue_ completes execution. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 11 calls involving the interop device(s) used -in the OpenCL context made after {clEnqueueReleaseD3D11ObjectsKHR} will not -start executing until after event returned by -{clEnqueueReleaseD3D11ObjectsKHR} reports completion. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 11 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueReleaseD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 11 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a Direct3D 11 device. - * {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ - have not previously been acquired using - {clEnqueueAcquireD3D11ObjectsKHR}, or have been released using - {clEnqueueReleaseD3D11ObjectsKHR} since the last time that they were - acquired. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - __num_events_in_wait_list__> is 0, or if event objects in - _event_wait_list_ are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_d3d11_sharing-event-command-types]] -==== Event Command Types for Sharing memory objects that map to Direct3D 11 objects - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -Direct3D 11 objects: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireD3D11ObjectsKHR} -| {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR_anchor} - -| {clEnqueueReleaseD3D11ObjectsKHR} -| {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR_anchor} - -|==== diff --git a/ext/cl_khr_depth_images.asciidoc b/ext/cl_khr_depth_images.asciidoc deleted file mode 100644 index c11c695ee..000000000 --- a/ext/cl_khr_depth_images.asciidoc +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_depth_images]] -== Depth Images - -This section describes the *cl_khr_depth_images* extension. - -This extension adds support for depth images. - -This extension became a core feature in OpenCL 2.0. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_depth_images-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 1.2 Specification - -This extension adds the following new image formats for depth images to _tables 5.6 and 5.7_ of the OpenCL 1.2 specification. - -[cols="",options="header",] -|======================================================================= -|*Enum values that can be specified in channel_order* - -|{CL_DEPTH}. This format can only be used if channel data type = {CL_UNORM_INT16} or {CL_FLOAT}. - -|======================================================================= - -[cols="2,3",options="header",] -|======================================================================= -|*Image Channel Data Type* -|*Description* - -|{CL_UNORM_INT16} -|Each channel component is a normalized unsigned 16-bit integer value - -|{CL_FLOAT} -|Each channel component is a single precision floating-point value -|======================================================================= - -This extension adds the following new image format to the minimum list of supported image formats described in _table 5.8_: - -[[cl_khr_depth_images-required-image-formats]] -._Required Image Formats for_ *cl_khr_depth_images* -[cols=",,",options="header",] -|==================================================================== -|*num_channels* -|*channel_order* -|*channel_data_type* - -|1 -|{CL_DEPTH} -|{CL_UNORM_INT16} + -{CL_FLOAT} - -|==================================================================== - -NOTE: - -Depth image objects can be initialized, read and written using the appropriate CL APIs i.e. {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage}, {clEnqueueMapImage} and {clEnqueueFillImage}. - -For {clEnqueueFillImage}, the fill color is a 4-component value where the R component refers to the depth value if the image format is {CL_DEPTH}. The fill color will be converted to the appropriate image channel format and order associated with image. - -Update text that describes arg value argument to {clSetKernelArg} with the following: - -If the kernel argument is declared to be of type image2d_depth_t or image2d_array_depth t, the arg_value entry will be a pointer to a depth image or depth image array object. - -Add the following error condition for {clSetKernelArg}: - -{CL_INVALID_MEM_OBJECT} for an argument declared to be a depth image or a depth image -array and the argument value specified in arg_value does not follow the rules described above -for a depth memory object or memory array object argument. - -[[cl_khr_depth_images-additions-to-chapter-6]] -=== Additions to Chapter 6 of the OpenCL 1.2 Specification - -Add the following new data types to _table 6.3_ in _section 6.1.3_ of the OpenCL 1.2 specification: - -[cols="2,3",options="header",] -|==== -|*Type* -|*Description* - -|*image2d_depth_t* -|A 2D depth image. Refer to _section 6.12.14_ for a detailed -description of the built-in functions that use this type. - -|*image2d_array_depth_t* -|A 2D depth image array. Refer to _section 6.12.14_ for a -detailed description of the built-in functions that use this -type. - -|==== - -Add the following to the bulleted list in section 6.12.14.1.1 - Determining the border color: - - * If the image channel order is {CL_DEPTH}, the border value is `0.0f`. - -Add the following built-in functions to section 6.12.14.2 - Built-in Image Read Functions: - -[cols="2,3",options="header",] -|==== -|*Function* -|*Description* - -| float *read_imagef*(read_only image2d_depth_t _image_, - sampler_t _sampler_, int2 _coord_) + - float *read_imagef*(read_only image2d_depth_t _image_, - sampler_t _sampler_, float2 _coord_) - | Use the coordinate (_coord.x_, _coord.y_) to do an element lookup in - the 2D depth image object specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - The *read_imagef* calls that take integer coordinates must use a - sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized - coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode - set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or - `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. - - Values returned by *read_imagef* for depth image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. -| | -| float *read_imagef*(read_only image2d_array_depth_t _image_, - sampler_t _sampler_, int4 _coord_) + - float *read_imagef*(read_only image2d_array_depth_t _image_, - sampler_t _sampler_, float4 _coord_) - | Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D depth image array specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - The *read_imagef* calls that take integer coordinates must use a - sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized - coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode - set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or - `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. - - Values returned by *read_imagef* for image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. - -|==== - -Add the following built-in functions to section 6.12.14.3 - Built-in Image Sampler-less Read Functions: - -[cols="2,3",options="header",] -|==== -|*Function* -|*Description* - -| float *read_imagef*(image2d_depth_t _image_, int2 _coord_) - | Use the coordinate (_coord.x_, _coord.y_) to do an element lookup in - the 2D depth image object specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - Values returned by *read_imagef* for image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. -| | -| float *read_imagef*(image2d_array_depth_t _image_, int4 _coord_) - | Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D depth image array specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - Values returned by *read_imagef* for image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. - -|==== - -Add the following built-in functions to section 6.12.14.4 – Built-in Image Write Functions: - -[cols="2,3",options="header",] -|==== -|*Function* -|*Description* - -| void *write_imagef*(image2d_depth_t _image_, int2 _coord_, - float _depth_) - | Write _depth_ value to location specified by _coord.xy_ in the 2D - depth image object specified by _image_. - Appropriate data format conversion to the specified image format is - done before writing the depth value. - _coord.x_ and _coord.y_ are considered to be unnormalized coordinates, - and must be in the range [0, image width-1], and [0, image height-1], - respectively. - - *write_imagef* can only be used with image objects created with - _image_channel_data_type_ set to {CL_UNORM_INT16}, {CL_UNORM_INT24} or - {CL_FLOAT}. - Appropriate data format conversion will be done to convert depth value - from a floating-point value to actual data format associated with the - image. - - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for - image objects created with _image_channel_data_type_ values not - specified in the description above or with (_x_, _y_) coordinate - values that are not in the range [0, image width-1] and [0, image - height-1], respectively, is undefined. -| | -| void *write_imagef*(image2d_array_depth_t _image_, int4 _coord_, - float _depth_) - | Write _depth_ value to location specified by _coord.xy_ in the 2D - image identified by _coord.z_ in the 2D depth image array specified by - _image_. - Appropriate data format conversion to the specified image format is - done before writing the depth value. - _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized - coordinates, and must be in the range [0, image width-1], [0, image - height-1], and [0, image number of layers-1], respectively. - - *write_imagef* can only be used with image objects created with - _image_channel_data_type_ set to {CL_UNORM_INT16}, {CL_UNORM_INT24} or - {CL_FLOAT}. - Appropriate data format conversion will be done to convert depth valye - from a floating-point value to actual data format associated with the - image. - - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for - image objects created with _image_channel_data_type_ values not - specified in the description above or with (_x_, _y_, _z_) coordinate - values that are not in the range [0, image width-1], [0, image - height-1], [0, image number of layers-1], respectively, is undefined. - -|==== - -Add the following built-in functions to section 6.12.14.5 – Built-in Image Query Functions: - -[cols="2,3",] -|==== -| *Function* | *Description* -| int *get_image_width*(image2d_depth_t _image_) + - int *get_image_width*(image2d_array_depth_t _image_) - | Return the image width in pixels. -| int *get_image_height*(image2d_depth_t _image_) + - int *get_image_height*(image2d_array_depth_t _image_) - | Return the image height in pixels. -| | -| int *get_image_channel_data_type*(image2d_depth_t _image_) + - int *get_image_channel_data_type*(image2d_array_depth_t _image_) - | Return the channel data type. Valid values are: - - `CLK_UNORM_INT16` + - `CLK_FLOAT` -| int *get_image_channel_order*(image2d_depth_t _image_) + - int *get_image_channel_order*(image2d_array_depth_t _image_) - | Return the image channel order. Valid values are: - - `CLK_DEPTH` -| | -| int2 *get_image_dim*(image2d_depth_t _image_) + - int2 *get_image_dim*(image2d_array_depth_t _image_) - | Return the 2D image width and height as an int2 type. - The width is returned in the _x_ component, and the height in the _y_ - component. -| | -| size_t *get_image_array_size*(image2d_array_depth_t _image_) - | Return the number of images in the 2D image array. -|==== - -Add the following text below the table in section 6.12.14.6 - Mapping image channels to color values returned by read_image -and color values passed to write_image to image channels: - -For {CL_DEPTH} images, a scalar value is returned by *read_imagef* or -supplied to *write_imagef*. diff --git a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc b/ext/cl_khr_device_enqueue_local_arg_types.asciidoc deleted file mode 100644 index 19f341989..000000000 --- a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_device_enqueue_local_arg_types]] -== Device Enqueue Local Argument Types - -This extension allows arguments to blocks that are passed to the *enqueue_kernel* built-in -function to be pointers to any type (built-in or user-defined) in local memory, instead of -requiring arguments to blocks to be pointers to void in local memory. - -The name of this extension is *cl_khr_device_enqueue_local_arg_types*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_device_enqueue_local_arg_types-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 C Specification - -Modify the second paragraph of Section 6.13.17: Enqueuing Kernels: - -"The following table describes the list of built-in functions that can be used to enqueue a -kernel. We use the generic type name +gentype+ to indicate the built-in OpenCL C scalar or -vector integer or floating-point data types, or any user defined type built from these scalar and -vector data types, which can be used as the type of the pointee of the arguments of the kernel -enqueue functions listed in table 6.31." - -Then, replace all occurrences of +local void *+ in table 6.31 with +local gentype *+. For example: - -[source,opencl_c] ----- -int enqueue_kernel(queue_t queue, - kernel_enqueue_flags_t flags, - const ndrange_t ndrange, - void (^block)(local gentype *, ...), - uint size0, ... ) ----- - -Additionally, replace all occurrences of +local void*+ in table 6.33 with +local gentype *+. For example: - -[source,opencl_c] ----- -uint get_kernel_work_group_size( - void (^block)(local gentype *, ...)) ----- diff --git a/ext/cl_khr_device_uuid.asciidoc b/ext/cl_khr_device_uuid.asciidoc deleted file mode 100644 index e4005dcaf..000000000 --- a/ext/cl_khr_device_uuid.asciidoc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_device_uuid]] -== Unique Device Identifiers - -This section describes the *cl_khr_device_uuid* extension. - -This extension adds the ability to query a universally unique identifier -(UUID) for an OpenCL driver and OpenCL device. -The UUIDs returned by the query may be used to identify drivers and devices -across processes or APIs. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-08-27 | 1.0.0 | First assigned version. -|==== - -// == New API Enums -// -// Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: -// -// [source,opencl] -// ---- -// #define CL_DEVICE_UUID_KHR 0x106A -// #define CL_DRIVER_UUID_KHR 0x106B -// #define CL_DEVICE_LUID_VALID_KHR 0x106C -// #define CL_DEVICE_LUID_KHR 0x106D -// #define CL_DEVICE_NODE_MASK_KHR 0x106E -// ---- -// -// Constants describing the size of the driver and device UUIDs, and the device LUID: -// -// [source,opencl] -// ---- -// #define CL_UUID_SIZE_KHR 16 -// #define CL_LUID_SIZE_KHR 8 -// ---- - -=== Additions to Chapter 4 of the OpenCL 3.0 API Specification - -Add to Table 5 - OpenCL Device Queries: - -[caption="Table 5. "] -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_UUID_KHR} - | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] - | Returns a universally unique identifier (UUID) for the device. - - Device UUIDs must be immutable for a given device across processes, driver APIs, driver versions, and system reboots. - -| {CL_DRIVER_UUID_KHR} - | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] - | Returns a universally unique identifier (UUID) for the software driver for the device. - -| {CL_DEVICE_LUID_VALID_KHR} - | {cl_bool_TYPE} - | Returns {CL_TRUE} if the device has a valid LUID and {CL_FALSE} otherwise. - -| {CL_DEVICE_LUID_KHR} - | {cl_uchar_TYPE}[{CL_LUID_SIZE_KHR}] - | Returns a locally unique identifier (LUID) for the device. - - It is not an error to query {CL_DEVICE_LUID_KHR} when {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the returned LUID value is undefined. - - When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, and the OpenCL device is running on the Windows operating system, the returned LUID value can be cast to an `LUID` object and must be equal to the locally unique identifier of an `IDXGIAdapter1` object that corresponds to the OpenCL device. - -| {CL_DEVICE_NODE_MASK_KHR} - | {cl_uint_TYPE} - | Returns a node mask for the device. - - It is not an error to query {CL_DEVICE_NODE_MASK_KHR} when {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the returned node mask is undefined. - - When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, the returned node mask must contain exactly one bit. - If the OpenCL device is running on an operating system that supports the Direct3D 12 API and the OpenCL device corresponds to an individual device in a linked device adapter, the returned node mask identifies the Direct3D 12 node corresponding to the OpenCL device. - Otherwise, the returned node mask must be `1`. - -|==== - -NOTE: While {CL_DEVICE_UUID_KHR} is specified to remain consistent across driver versions and system reboots, it is not intended to be usable as a serializable persistent identifier for a device. -It may change when a device is physically added to, removed from, or moved to a different connector in a system while that system is powered down. -Further, there is no reasonable way to verify with conformance testing that a given device retains the same UUID in a given system across all driver versions supported in that system. -While implementations should make every effort to report consistent device UUIDs across driver versions, applications should avoid relying on the persistence of this value for uses other than identifying compatible devices for external object sharing purposes. diff --git a/ext/cl_khr_dx9_media_sharing.asciidoc b/ext/cl_khr_dx9_media_sharing.asciidoc deleted file mode 100644 index a350af40a..000000000 --- a/ext/cl_khr_dx9_media_sharing.asciidoc +++ /dev/null @@ -1,737 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_dx9_media_sharing]] -== Creating OpenCL Memory Objects from DirectX 9 Media Surfaces - -[[cl_khr_dx9_media_sharing-overview]] -=== Overview - -This section describes the *cl_khr_dx9_media_sharing* extension. -The goal of this extension is to allow applications to use media surfaces as -OpenCL memory objects. -This allows efficient sharing of data between OpenCL and selected adapter -APIs (only DX9 for now). -If this extension is supported, an OpenCL image object can be created from a -media surface and the OpenCL API can be used to execute kernels that read -and/or write memory objects that are media surfaces. -Note that OpenCL memory objects may be created from the adapter media -surface if and only if the OpenCL context has been created from that -adapter. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_dx9_media_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetDeviceIDsFromDX9MediaAdapterKHR( - cl_platform_id platform, - cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapters_type, - void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, - cl_uint num_entries, - cl_device_id *devices, - cl_int *num_devices); - -cl_mem clCreateFromDX9MediaSurfaceKHR(cl_context context, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - void *surface_info, - cl_uint plane, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -[[cl_khr_dx9_media_sharing-new-tokens]] -=== New Tokens - -Accepted by the _media_adapter_type_ parameter of -{clGetDeviceIDsFromDX9MediaAdapterKHR}: - ----- -CL_ADAPTER_D3D9_KHR -CL_ADAPTER_D3D9EX_KHR -CL_ADAPTER_DXVA_KHR ----- - -Accepted by the _media_adapter_set_ parameter of -{clGetDeviceIDsFromDX9MediaAdapterKHR}: - ----- -CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR -CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR ----- - -Accepted as a property name in the _properties_ parameter of -{clCreateContext} and {clCreateContextFromType}: - ----- -CL_CONTEXT_ADAPTER_D3D9_KHR -CL_CONTEXT_ADAPTER_D3D9EX_KHR -CL_CONTEXT_ADAPTER_DXVA_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetMemObjectInfo}: - ----- -CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR -CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetImageInfo}: - ----- -CL_IMAGE_DX9_MEDIA_PLANE_KHR ----- - -Returned in the _param_value_ parameter of {clGetEventInfo} when -_param_name_ is {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR -CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR ----- - -Returned by {clCreateContext} and {clCreateContextFromType} if the media -adapter specified for interoperability is not compatible with the devices -against which the context is to be created: - ----- -CL_INVALID_DX9_MEDIA_ADAPTER_KHR ----- - -Returned by {clCreateFromDX9MediaSurfaceKHR} when _adapter_type_ is set to a -media adapter and the _surface_info_ does not reference a media surface of -the required type, or if _adapter_type_ is set to a media adapter type and -_surface_info_ does not contain a valid reference to a media surface on that -adapter, by {clGetMemObjectInfo} when _param_name_ is a surface or handle -when the image was not created from an appropriate media surface, and from -{clGetImageInfo} when _param_name_ is {CL_IMAGE_DX9_MEDIA_PLANE_KHR} and image -was not created from an appropriate media surface. - ----- -CL_INVALID_DX9_MEDIA_SURFACE_KHR ----- - -Returned by {clEnqueueAcquireDX9MediaSurfacesKHR} when any of _mem_objects_ -are currently acquired by OpenCL: - ----- -CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR ----- - -Returned by {clEnqueueReleaseDX9MediaSurfacesKHR} when any of _mem_objects_ -are not currently acquired by OpenCL: - ----- -CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR ----- - -[[cl_khr_dx9_media_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -{clCreateContext} with: - -"`_properties_ specifies a list of context property names and their -corresponding values. -Each property is followed immediately by the corresponding desired value. -The list is terminated with zero. -If a property is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values.`" - -Add the following to _table 4.5_: - -[cols=",,",options="header",] -|==== -| *cl_context_properties enum* -| *Property value* -| *Description* - -| {CL_CONTEXT_ADAPTER_D3D9_KHR} -| IDirect3DDevice9 * -| Specifies an IDirect3DDevice9 to use for D3D9 interop. - -| {CL_CONTEXT_ADAPTER_D3D9EX_KHR} -| IDirect3DDeviceEx* -| Specifies an IDirect3DDevice9Ex to use for D3D9 interop. - -| {CL_CONTEXT_ADAPTER_DXVA_KHR} -| IDXVAHD_Device * -| Specifies an IDXVAHD_Device to use for DXVA interop. - -|==== - -Add to the list of errors for {clCreateContext}: - - * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the properties - {CL_CONTEXT_ADAPTER_D3D9_KHR}, {CL_CONTEXT_ADAPTER_D3D9EX_KHR} or - {CL_CONTEXT_ADAPTER_DXVA_KHR} is non-`NULL` and does not specify a valid - media adapter with which the _cl_device_ids_ against which this context - is to be created may interoperate. - -Add to the list of errors for {clCreateContextFromType} the same new errors -described above for {clCreateContext}. - -[[cl_khr_dx9_media_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add to the list of errors for {clGetMemObjectInfo}: - - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is - {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and _memobj_ was not created by the - function {clCreateFromDX9MediaSurfaceKHR} from a Direct3D9 surface. - -Extend _table 5.12_ to include the following entry: - -[cols=",,",options="header",] -|==== -| *cl_mem_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR} -| {cl_dx9_media_adapter_type_khr_TYPE} -| Returns the {cl_dx9_media_adapter_type_khr_TYPE} argument value specified when - _memobj_ is created using {clCreateFromDX9MediaSurfaceKHR}. - -| {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} -| {cl_dx9_surface_info_khr_TYPE} -| Returns the {cl_dx9_surface_info_khr_TYPE} argument value specified when - _memobj_ is created using {clCreateFromDX9MediaSurfaceKHR}. - -|==== - -Add to the list of errors for {clGetImageInfo}: - - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is - {CL_IMAGE_DX9_MEDIA_PLANE_KHR} and _image_ was not created by the function - {clCreateFromDX9MediaSurfaceKHR}. - -Extend _table 5.9_ to include the following entry. - -[cols=",,",options="header",] -|==== -| *cl_image_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_IMAGE_DX9_MEDIA_PLANE_KHR} -| {cl_uint_TYPE} -| Returns the _plane_ argument value specified when _memobj_ is created - using {clCreateFromDX9MediaSurfaceKHR}. - -|==== - -Add to _table 5.22_ in the *Info returned in param_value* column for -_cl_event_info_ = {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR -CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR ----- - -[[cl_khr_dx9_media_sharing-sharing-media-surfaces-with-opencl]] -=== Sharing Media Surfaces with OpenCL - -This section discusses OpenCL functions that allow applications to use media -surfaces as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and media surface APIs. -The OpenCL API may be used to execute kernels that read and/or write memory -objects that are also media surfaces. -An OpenCL image object may be created from a media surface. -OpenCL memory objects may be created from media surfaces if and only if the -OpenCL context has been created from a media adapter. - -[[cl_khr_dx9_media_sharing-querying-opencl-devices-corresponding-to-media-adapters]] -==== Querying OpenCL Devices corresponding to Media Adapters - -Media adapters are an abstraction associated with devices that provide media -capabilities. - -The function - -include::{generated}/api/protos/clGetDeviceIDsFromDX9MediaAdapterKHR.txt[] - -queries a media adapter for any associated OpenCL devices. -Adapters with associated OpenCL devices can enable media surface sharing -between the two. - -_platform_ refers to the platform ID returned by {clGetPlatformIDs}. - -_num_media_adapters_ specifies the number of media adapters. - -_media_adapters_type_ is an array of _num_media_adapters_ entries. -Each entry specifies the type of media adapter and must be one of the values -described in the table below. - -[[cl_khr_dx9_media_sharing-media-adapter-types]] -.DirectX 9 object types that may be used by {clGetDeviceIDsFromDX9MediaAdapterKHR} -[cols=",",options="header",] -|==== -| {cl_dx9_media_adapter_type_khr_TYPE} -| Type of media adapter - -| {CL_ADAPTER_D3D9_KHR} -| IDirect3DDevice9 * - -| {CL_ADAPTER_D3D9EX_KHR} -| IDirect3DDevice9Ex * - -| {CL_ADAPTER_DXVA_KHR} -| IDXVAHD_Device * - -|==== - -[[cl_khr_dx9_media_sharing-media-adapter-sets]] -.Sets of devices queriable using {clGetDeviceIDsFromDX9MediaAdapterKHR} -[cols=",",options="header",] -|==== -| {cl_dx9_media_adapter_set_khr_TYPE} -| Description - -| {CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} -| The preferred OpenCL devices associated with the media adapter. - -| {CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} -| All OpenCL devices that may interoperate with the media adapter -|==== - -_media_adapters_ is an array of _num_media_adapters_ entries. -Each entry specifies the actual adapter whose type is specified by -_media_adapter_type_. -The _media_adapters_ must be one of the types described in the table -<>. -_media_adapter_set_ specifies the set of adapters to return and must be one -of the values described in the table -<<[[cl_khr_dx9_media_sharing-media-adapter-sets,_cl_dx9_media_adapter_set_khr -values_>>. - -_num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to -_devices_. -If _devices_ is not `NULL`, the _num_entries_ must be greater than zero. - -_devices_ returns a list of OpenCL devices found that support the list of -media adapters specified. -The {cl_device_id_TYPE} values returned in _devices_ can be used to identify a -specific OpenCL device. -If _devices_ argument is `NULL`, this argument is ignored. -The number of OpenCL devices returned is the minimum of the value specified -by _num_entries_ or the number of OpenCL devices whose type matches -_device_type_. - -_num_devices_ returns the number of OpenCL devices. -If _num_devices_ is `NULL`, this argument is ignored. - -{clGetDeviceIDsFromDX9MediaAdapterKHR} returns {CL_SUCCESS} if the function is -executed successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _num_media_adapters_ is zero or if - _media_adapters_type_ is `NULL` or if _media_adapters_ is `NULL`. - * {CL_INVALID_VALUE} if any of the entries in _media_adapters_type_ or - _media_adapters_ is not a valid value. - * {CL_INVALID_VALUE} if _media_adapter_set_ is not a valid value. - * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _devices_ is not - `NULL` or if both _num_devices_ and _devices_ are `NULL`. - * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to adapters - specified in _media_adapters_ and _media_adapters_type_ were found. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_dx9_media_sharing-creating-media-resources-as-opencl-image-objects]] -==== Creating Media Resources as OpenCL Image Objects - -The function - -include::{generated}/api/protos/clCreateFromDX9MediaSurfaceKHR.txt[] - -creates an OpenCL image object from a media surface. - -_context_ is a valid OpenCL context created from a media adapter. - -flags is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of flags. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_adapter_type_ is a value from enumeration of supported adapters described -in the table -<>. -The type of _surface_info_ is determined by the adapter type. -The implementation does not need to support all adapter types. -This approach provides flexibility to support additional adapter types in -the future. -Supported adapter types are {CL_ADAPTER_D3D9_KHR}, {CL_ADAPTER_D3D9EX_KHR} and -{CL_ADAPTER_DXVA_KHR}. - -If _adapter_type_ is {CL_ADAPTER_D3D9_KHR}, {CL_ADAPTER_D3D9EX_KHR} and -{CL_ADAPTER_DXVA_KHR}, the _surface_info_ points to the following structure: - -include::{generated}/api/structs/cl_dx9_surface_info_khr.txt[] - -For DX9 surfaces, we need both the handle to the resource and the resource -itself to have a sufficient amount of information to eliminate a copy of the -surface for sharing in cases where this is possible. -Elimination of the copy is driver dependent. -_shared_handle_ may be `NULL` and this may result in sub-optimal -performance. - -_surface_info_ is a pointer to one of the structures defined in the -_adapter_type_ description above passed in as a void *. - -_plane_ is the plane of resource to share for planar surface formats. -For planar formats, we use the plane parameter to obtain a handle to thie -specific plane (Y, U or V for example). -For non-planar formats used by media, _plane_ must be 0. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromDX9MediaSurfaceKHR} returns a valid non-zero 2D image object -and _errcode_ret_ is set to {CL_SUCCESS} if the 2D image object is created -successfully. -Otherwise it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _plane_ is not a valid plane of _resource_ specified in _surface_info_. - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _resource_ specified in - _surface_info_ is not a valid resource or is not associated with - _adapter_type_ (e.g., _adapter_type_ is set to {CL_ADAPTER_D3D9_KHR} and - _resource_ is not a Direct3D 9 surface created in D3DPOOL_DEFAULT). - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _shared_handle_ specified in - _surface_info_ is not `NULL` or a valid handle value. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the texture format of _resource_ - is not listed in <> or - <>. - * {CL_INVALID_OPERATION} if there are no devices in _context_ that support - _adapter_type_. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width and height of the returned OpenCL 2D image object are determined -by the width and height of the plane of resource. -The channel type and order of the returned image object is determined by the -format and plane of resource and are described in the table -<> or -<>. - -This call will increment the internal media surface count on _resource_. -The internal media surface reference count on _resource_ will be decremented -when the OpenCL reference count on the returned OpenCL memory object drops -to zero. - -[[cl_khr_dx9_media_sharing-querying-media-surface-properties-of-memory-objects-created-from-media-surfaces]] -==== Querying Media Surface Properties of Memory Objects created from Media Surfaces - -Properties of media surface objects may be queried using -{clGetMemObjectInfo} and {clGetImageInfo} with _param_name_ -{CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR}, {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and -{CL_IMAGE_DX9_MEDIA_PLANE_KHR} as described in _sections 5.4.3_ and _5.3.6_. - -[[cl_khr_dx9_media_sharing-sharing-memory-objects-created-from-media-surfaces-between-a-media-adapter-and-opencl]] -==== Sharing Memory Objects created from Media Surfaces between a Media Adapter and OpenCL - -The function - -include::{generated}/api/protos/clEnqueueAcquireDX9MediaSurfacesKHR.txt[] - -is used to acquire OpenCL memory objects that have been created from a media -surface. -The media surfaces are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from media surfaces must be acquired before -they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a media surface is used while it is -not currently acquired by OpenCL, the call attempting to use that OpenCL -memory object will return {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR}. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueAcquireDX9MediaSurfacesKHR} provides the synchronization -guarantee that any media adapter API calls involving the interop device(s) -used in the OpenCL context made before {clEnqueueAcquireDX9MediaSurfacesKHR} -is called will complete executing before _event_ reports completion and -before the execution of any subsequent OpenCL work issued in _command_queue_ -begins. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any media adapter API calls involving the interop -device(s) used in the OpenCL context made before -{clEnqueueAcquireDX9MediaSurfacesKHR} is called have completed before -calling {clEnqueueAcquireDX9MediaSurfacesKHR} *.* - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from media surfaces. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueAcquireDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from media surfaces. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a device that can share the media surface referenced by - _mem_objects_. - * {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR} if memory objects in - _mem_objects_ have previously been acquired using - {clEnqueueAcquireDX9MediaSurfacesKHR} but have not been released using - {clEnqueueReleaseDX9MediaSurfacesKHR}. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueReleaseDX9MediaSurfacesKHR.txt[] - -is used to release OpenCL memory objects that have been created from media -surfaces. -The media surfaces are released by the OpenCL context associated with -_command_queue_. - -OpenCL memory objects created from media surfaces which have been acquired -by OpenCL must be released by OpenCL before they may be accessed by the -media adapter API. -Accessing a media surface while its corresponding OpenCL memory object is -acquired is in error and will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueReleaseDX9MediaSurfacesKHR} provides the synchronization -guarantee that any calls to media adapter APIs involving the interop -device(s) used in the OpenCL context made after the call to -{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after -all events in _event_wait_list_ are complete and all work already submitted -to _command_queue_ completes execution. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any media adapter API calls involving the interop -device(s) used in the OpenCL context made after -{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after -event returned by {clEnqueueReleaseDX9MediaSurfacesKHR} reports completion. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from media surfaces. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueReleaseDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and <__mem_objects__> is `NULL` the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from valid media surfaces. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a media object. - * {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ - have not previously been acquired using - {clEnqueueAcquireDX9MediaSurfacesKHR}, or have been released using - {clEnqueueReleaseDX9MediaSurfacesKHR} since the last time that they were - acquired. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - __num_events_in_wait_list__> is 0, or if event objects in - _event_wait_list_ are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_dx9_media_sharing-event-command-types]] -==== Event Command Types for Sharing Memory Objects created from Media Surfaces - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -media surfaces: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireDX9MediaSurfacesKHR} -| {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR_anchor} - -| {clEnqueueReleaseDX9MediaSurfacesKHR} -| {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR_anchor} - -|==== - -[[cl_khr_dx9_media_sharing-surface-formats-for-media-surface-sharing]] -==== Surface formats for Media Surface Sharing - -This section includes the D3D surface formats that are supported when the -adapter type is one of the Direct 3D lineage . -Using a D3D surface format not listed here is an error. -To extend the use of this extension to support media adapters beyond -DirectX 9 tables similar to the ones in this section will need to be defined -for the surface formats supported by the new media adapter. -All implementations that support this extension are required to support the -NV12 surface format, the other surface formats supported are the same -surface formats that the adapter you are sharing with supports as long as -they are listed in the table -<> or in the table -<>. - -[[cl_khr_dx9_media_sharing-fourcc-image-formats]] -._YUV FourCC codes and corresponding OpenCL image format_ -[cols=",",options="header",] -|==== -| *FOUR CC code* -| *CL image format* - -*(channel order, channel data type)* - -| FOURCC('N','V','1','2'), Plane 0 | `CL_R`, `CL_UNORM_INT8` -| FOURCC('N','V','1','2'), Plane 1 | `CL_RG`, `CL_UNORM_INT8` -| FOURCC('Y','V','1','2'), Plane 0 | `CL_R`, `CL_UNORM_INT8` -| FOURCC('Y','V','1','2'), Plane 1 | `CL_R`, `CL_UNORM_INT8` -| FOURCC('Y','V','1','2'), Plane 2 | `CL_R`, `CL_UNORM_INT8` -|==== - -In the table <> above, NV12 Plane 0 -corresponds to the luminance (Y) channel and Plane 1 corresponds to the UV -channels. -The YV12 Plane 0 corresponds to the Y channel, Plane 1 corresponds to the V -channel and Plane 2 corresponds to the U channel. -Note that the YUV formats map to `CL_R` and `CL_RG` but do not perform any YUV -to RGB conversion and vice-versa. - -[[cl_khr_dx9_media_sharing-d3d-image-formats]] -._Direct3D formats and corresponding OpenCL image formats_ -[cols=",",options="header",] -|==== -| *D3D format* -| *CL image format* + -*(channel order, channel data type)* - -| D3DFMT_R32F | `CL_R`, `CL_FLOAT` -| D3DFMT_R16F | `CL_R`, `CL_HALF_FLOAT` -| D3DFMT_L16 | `CL_R`, `CL_UNORM_INT16` -| D3DFMT_A8 | `CL_A`, `CL_UNORM_INT8` -| D3DFMT_L8 | `CL_R`, `CL_UNORM_INT8` -| | -| D3DFMT_G32R32F | `CL_RG`, `CL_FLOAT` -| D3DFMT_G16R16F | `CL_RG`, `CL_HALF_FLOAT` -| D3DFMT_G16R16 | `CL_RG`, `CL_UNORM_INT16` -| D3DFMT_A8L8 | `CL_RG`, `CL_UNORM_INT8` -| | -| D3DFMT_A32B32G32R32F | `CL_RGBA`, `CL_FLOAT` -| D3DFMT_A16B16G16R16F | `CL_RGBA`, `CL_HALF_FLOAT` -| D3DFMT_A16B16G16R16 | `CL_RGBA`, `CL_UNORM_INT16` -| D3DFMT_A8B8G8R8 | `CL_RGBA`, `CL_UNORM_INT8` -| D3DFMT_X8B8G8R8 | `CL_RGBA`, `CL_UNORM_INT8` -| D3DFMT_A8R8G8B8 | `CL_BGRA`, `CL_UNORM_INT8` -| D3DFMT_X8R8G8B8 | `CL_BGRA`, `CL_UNORM_INT8` -|==== - -Note: The D3D9 format names in the table above seem to imply that the -order of the color channels are switched relative to OpenCL but this is -not the case. -For example, the layout of channels for each pixel for D3DFMT_A32FB32FG32FR32F -is the same as `CL_RGBA`, `CL_FLOAT`. diff --git a/ext/cl_khr_egl_event.asciidoc b/ext/cl_khr_egl_event.asciidoc deleted file mode 100644 index d04fb0d8b..000000000 --- a/ext/cl_khr_egl_event.asciidoc +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_egl_event]] -== Creating OpenCL Event Objects from EGL Sync Objects - -[[cl_khr_egl_event-overview]] -=== Overview - -This section describes the *cl_khr_egl_event* extension. -This extension allows creating OpenCL event objects linked to EGL fence sync -objects, potentially improving efficiency of sharing images and buffers -between the two APIs. -The companion *EGL_KHR_cl_event* extension provides the complementary -functionality of creating an EGL sync object from an OpenCL event object. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_egl_event-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_event clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int *errcode_ret); ----- - -[[cl_khr_egl_event-new-tokens]] -=== New Tokens - -Returned by clCreateEventFromEGLSyncKHR if _sync_ is not a valid EGLSyncKHR -handle created with respect to EGLDisplay _display_: - ----- -CL_INVALID_EGL_OBJECT_KHR ----- - -Returned by *clGetEventInfo* when _param_name_ is CL_EVENT_COMMAND_TYPE: - ----- -CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR ----- - -[[cl_khr_egl_event-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add following to the fourth paragraph of _section 5.11_ (prior to the -description of *clWaitForEvents*): - -"`Event objects can also be used to reflect the status of an EGL fence sync -object. -The sync object in turn refers to a fence command executing in an EGL client -API command stream. -This provides another method of coordinating sharing of EGL / EGL client API -objects with OpenCL. -Completion of EGL / EGL client API commands may be determined by placing an -EGL fence command after commands using eglCreateSyncKHR, creating an event -from the resulting EGL sync object using clCreateEventFromEGLSyncKHR and -then specifying it in the _event_wait_list_ of a clEnqueueAcquire*** -command. -This method may be considerably more efficient than calling operations like -glFinish, and is referred to as _explicit synchronization_. -The application is responsible for ensuring the command stream associated -with the EGL fence is flushed to ensure the CL queue is submitted to the -device. -Explicit synchronization is most useful when an EGL client API context bound -to another thread is accessing the memory objects.`" - -Add CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR to the valid _param_value_ values -returned by *clGetEventInfo* for _param_name_ CL_EVENT_COMMAND_TYPE (in the -third row and third column of _table 5.22_). - -Add new _subsection 5.11.2_: - -"`*5.11.2 Linking Event Objects to EGL Synchronization Objects* - -An event object may be created by linking to an EGL *sync object*. -Completion of such an event object is equivalent to waiting for completion -of the fence command associated with the linked EGL sync object. - -The function -indexterm:[clCreateEventFromEGLSyncKHR] -[source,opencl] ----- -cl_event clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int *errcode_ret) ----- - -creates a linked event object. - -_context_ is a valid OpenCL context created from an OpenGL context or share -group, using the *cl_khr_gl_sharing* extension. - -_sync_ is the name of a sync object of type EGL_SYNC_FENCE_KHR created with -respect to EGLDisplay _display_. - -*clCreateEventFromEGLSyncKHR* returns a valid OpenCL event object and -_errcode_ret_ is set to CL_SUCCESS if the event object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context, or was not - created from a GL context. - * CL_INVALID_EGL_OBJECT_KHR if _sync_ is not a valid EGLSyncKHR object of - type EGL_SYNC_FENCE_KHR created with respect to EGLDisplay _display_. - -The parameters of an event object linked to an EGL sync object will return -the following values when queried with *clGetEventInfo*: - - * The CL_EVENT_COMMAND_QUEUE of a linked event is `NULL`, because the - event is not associated with any OpenCL command-queue. - * The CL_EVENT_COMMAND_TYPE of a linked event is - CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR, indicating that the event is - associated with a EGL sync object, rather than an OpenCL command. - * The CL_EVENT_COMMAND_EXECUTION_STATUS of a linked event is either - CL_SUBMITTED, indicating that the fence command associated with the sync - object has not yet completed, or CL_COMPLETE, indicating that the fence - command has completed. - -*clCreateEventFromEGLSyncKHR* performs an implicit *clRetainEvent* on the -returned event object. -Creating a linked event object also places a reference on the linked EGL -sync object. -When the event object is deleted, the reference will be removed from the EGL -sync object. - -Events returned from *clCreateEventFromEGLSyncKHR* may only be consumed by -*clEnqueueAcquire**** commands. -Passing such events to any other CL API that enqueues commands will generate -a CL_INVALID_EVENT error.`" - -[[cl_khr_egl_event-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -Replace the second paragraph of -<> with: - -"`Prior to calling *clEnqueueAcquireGLObjects*, the application must ensure -that any pending EGL or EGL client API operations which access the objects -specified in _mem_objects_ have completed. - -If the *cl_khr_egl_event* extension is supported and the EGL context in -question supports fence sync objects, _explicit synchronization_ can be -achieved as set out in _section 5.7.1_. - -If the *cl_khr_egl_event* extension is not supported, completion of EGL -client API commands may be determined by issuing and waiting for completion -of commands such as glFinish or vgFinish on all client API contexts with -pending references to these objects. -Some implementations may offer other efficient synchronization methods. -If such methods exist they will be described in platform-specific -documentation. - -Note that no synchronization methods other than glFinish and vgFinish are -portable between all EGL client API implementations and all OpenCL -implementations. -While this is the only way to ensure completion that is portable to all -platforms, these are expensive operation and their use should be avoided if -the cl_khr_egl_event extension is supported on a platform.`" - -[[cl_khr_egl_event-issues]] -=== Issues - -Most issues are shared with *cl_khr_gl_event* and are resolved as described -in that extension. - - . Should we support implicit synchronization? -+ --- -RESOLVED: No, as this may be very difficult since the synchronization would -not be with EGL, it would be with currently bound EGL client APIs. -It would be necessary to know which client APIs might be bound, to validate -that they're associated with the EGLDisplay associated with the OpenCL -context, and to reach into each such context. --- - - . Do we need to have typedefs to use EGL handles in OpenCL? -+ --- -RESOLVED Using typedefs for EGL handles. --- - - . Should we restrict which CL APIs can be used with this cl_event? -+ --- -RESOLVED Use is limited to clEnqueueAcquire*** calls only. --- - - . What is the desired behaviour for this extension when EGLSyncKHR is of a - type other than EGL_SYNC_FENCE_KHR? -+ --- -RESOLVED This extension only requires support for EGL_SYNC_FENCE_KHR. -Support of other types is an implementation choice, and will result in -CL_INVALID_EGL_OBJECT_KHR if unsupported. --- diff --git a/ext/cl_khr_egl_image.asciidoc b/ext/cl_khr_egl_image.asciidoc deleted file mode 100644 index da56b9ea6..000000000 --- a/ext/cl_khr_egl_image.asciidoc +++ /dev/null @@ -1,432 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_egl_image]] -== Creating OpenCL Memory Objects from EGL Images - -[[cl_khr_egl_image-overview]] -=== Overview - -This section describes the *cl_khr_egl_image* extension. -This extension provides a mechanism to creating OpenCL memory objects from -from EGLImages. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_egl_image-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_mem clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR display, - CLeglImageKHR image, - cl_mem_flags flags, - const cl_egl_image_properties_khr *properties, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) - -cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -[[cl_khr_egl_image-new-tokens]] -=== New Tokens - -New error codes: - ----- -CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -CL_INVALID_EGL_OBJECT_KHR ----- - -New command types: - ----- -CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR -CL_COMMAND_RELEASE_EGL_OBJECTS_KHR ----- - -[[cl_khr_egl_image-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -In section 5.2.4, add the following text after the paragraph defining -clCreateImage: - -"`The function -indexterm:[clCreateFromEGLImageKHR] -[source,opencl] ----- -cl_mem clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR display, - CLeglImageKHR image, - cl_mem_flags flags, - const cl_egl_image_properties_khr *properties, - cl_int *errcode_ret); ----- - -creates an EGLImage target of type cl_mem from the EGLImage source provided -as _image_. - -_display_ should be of type EGLDisplay, cast into the type CLeglDisplayKHR. - -_image_ should be of type EGLImageKHR, cast into the type CLeglImageKHR. -Assuming no errors are generated in this function, the resulting image -object will be an EGLImage target of the specified EGLImage _image_. -The resulting cl_mem is an image object which may be used normally by all -OpenCL operations. -This maps to an image2d_t type in OpenCL kernel code. - -_flags_ is a bit-field that is used to specify usage information about the -memory object being created. - -The possible values for _flags_ are: CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and -CL_MEM _READ_WRITE. - -For OpenCL 1.2 _flags_ also accepts: CL_MEM_HOST_WRITE_ONLY, -CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS. - -This extension only requires support for CL_MEM _READ_ONLY, and for OpenCL -1.2 CL_MEM_HOST_NO_ACCESS. -For OpenCL 1.1, a CL_INVALID_OPERATION will be returned for images which do -not support host mapping. - -If the value passed in _flags_ is not supported by the OpenCL implementation -it will return CL_INVALID_VALUE. -The accepted _flags_ may be dependent upon the texture format used. - -_properties_ specifies a list of property names and their corresponding -values. -Each property name is immediately followed by the corresponding desired -value. -The list is terminated with 0. -No properties are currently supported with this version of the extension. -_properties_ can be `NULL`. - -*clCreateFromEGLImageKHR* returns a valid non-zero OpenCL image object and -_errcode_ret_ is set to CL_SUCCESS if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid OpenCL context. - * CL_INVALID_VALUE if _properties_ contains invalid values, if _display_ - is not a valid display object or if _flags_ are not in the set defined - above. - * CL_INVALID_EGL_OBJECT_KHR if _image_ is not a valid EGLImage object. - * CL_IMAGE_FORMAT_NOT_SUPPORTED if the OpenCL implementation is not able - to create a cl_mem compatible with the provided CLeglImageKHR for an - implementation-dependent reason (this could be caused by, but not - limited to, reasons such as unsupported texture formats, etc). - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_INVALID_OPERATION if there are no devices in _context_ that support - images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in table 4.3 is CL_FALSE) - or if the flags passed are not supported for that image type.`" - -[[cl_khr_egl_image-lifetime-of-shared-objects]] -==== Lifetime of Shared Objects - -An OpenCL memory object created from an EGL image remains valid according to -the lifetime behavior as described in EGL_KHR_image_base. - -"`Any EGLImage siblings exist in any client API context`" - -For OpenCL this means that while the application retains a reference on the -cl_mem (the EGL sibling), the image remains valid. - -[[cl_khr_egl_image-synchronizing-opengl-and-egl-access-to-shared-objects]] -==== Synchronizing OpenCL and EGL Access to Shared Objects - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/EGL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling clEnqueueAcquireEGLObjectsKHR, the application must ensure -that any pending operations which access the objects specified in -mem_objects have completed. -This may be accomplished in a portable way by ceasing all client operations -on the resource, and issuing and waiting for completion of a glFinish -command on all GL contexts with pending references to these objects. -Implementations may offer more efficient synchronization methods, such as -synchronization primitives or fence operations. - -Similarly, after calling clEnqueueReleaseEGLImageObjects, the application is -responsible for ensuring that any pending OpenCL operations which access the -objects specified in mem_objects have completed prior to executing -subsequent commands in other APIs which reference these objects. -This may be accomplished in a portable way by calling clWaitForEvents with -the event object returned by clEnqueueReleaseGLObjects, or by calling -clFinish. -As above, some implementations may offer more efficient methods. - -Attempting to access the data store of an EGLImage object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. -Similarly, attempting to access a shared EGLImage object from OpenCL before -it has been acquired by the OpenCL command-queue or after it has been -released, will result in undefined behavior. - -[[cl_khr_egl_image-sharing-memory-objects-created-from-egl-resources-between-egldisplays-and-opencl-contexts]] -==== Sharing memory objects created from EGL resources between EGLDisplays and OpenCL contexts - -The function -indexterm:[clEnqueueAcquireEGLObjectsKHR] -[source,opencl] ----- -cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to acquire OpenCL memory objects that have been created from EGL -resources. -The EGL objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from EGL resources must be acquired before -they can be used by any OpenCL commands queued to a command-queue. If an -OpenCL memory object created from a EGL resource is used while it is not -currently acquired by OpenCL, the behavior is undefined. Implementations -may fail the execution of commands attempting to use that OpenCL memory -object and set their associated event's execution status to -{CL_EGL_RESOURCE_NOT_ACQUIRED_KHR}. - - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from EGL resources, within the context associate with command_queue. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueAcquireEGLObjectsKHR* returns CL_SUCCESS if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns CL_SUCCESS. -Otherwise it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if num_objects > 0 and mem_objects is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects in the context associated with _command_queue_. - * CL_INVALID_EGL_OBJECT_KHR if memory objects in _mem_objects_ have not - been created from EGL resources. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function -indexterm:[clEnqueueReleaseEGLObjectsKHR] -[source,opencl] ----- -cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to release OpenCL memory objects that have been created from EGL -resources. -The EGL objects are released by the OpenCL context associated with -. - -OpenCL memory objects created from EGL resources which have been acquired by -OpenCL must be released by OpenCL before they may be accessed by EGL or by -EGL client APIs. -Accessing a EGL resource while its corresponding OpenCL memory object is -acquired is in error and will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from EGL resources, within the context associate with command_queue. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueReleaseEGLObjectsKHR* returns CL_SUCCESS if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns CL_SUCCESS. -Otherwise it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if num_objects > 0 and mem_objects is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects in the context associated with _command_queue_. - * CL_INVALID_EGL_OBJECT_KHR if memory objects in _mem_objects_ have not - been created from EGL resources. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_egl_image-event-command-types]] -==== Event Command Types for Sharing memory objects created from EGL resources - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -EGL resources: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireEGLObjectsKHR} -| {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR_anchor} - -| {clEnqueueReleaseEGLObjectsKHR} -| {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR_anchor} - -|==== - -[[cl_khr_egl_image-issues]] -=== Issues - - . This extension does not support reference counting of the images, so the - onus is on the application to behave sensibly and not release the - underlying cl_mem object while the EGLImage is still being used. - . In order to ensure data integrity, the application is responsible for - synchronizing access to shared CL/EGL image objects by their respective - APIs. - Failure to provide such synchronization may result in race conditions - and other undefined behavior. - This may be accomplished by calling clWaitForEvents with the event - objects returned by any OpenCL commands which use the shared image - object or by calling clFinish. - . Currently CL_MEM_READ_ONLY is the only supported flag for _flags_. -+ --- -RESOLVED: Implementation will now return an error if writing to a shared -object that is not supported rather than disallowing it entirely. --- - . Currently restricted to 2D image objects. - . What should happen for YUV color-space conversion, multi plane images, - and chroma-siting, and channel mapping? -+ --- -RESOLVED: YUV is no longer explicitly described in this extension. -Before this removal the behavior was dependent on the platform. -This extension explicitly leaves the YUV layout to the platform and EGLImage -source extension (i.e. is implementation specific). -Colorspace conversion must be applied by the application using a color -conversion matrix. - -The expected extension path if YUV color-space conversion is to be supported -is to introduce a YUV image type and provide overloaded versions of the -read_image built-in functions. - -Getting image information for a YUV image should return the original image -size (non quantized size) when all of Y U and V are present in the image. -If the planes have been separated then the actual dimensionality of the -separated plane should be reported. -For example with YUV 4:2:0 (NV12) with a YUV image of 256x256, the Y only -image would return 256x256 whereas the UV only image would return 128x128. --- - . Should an attribute list be used instead? -+ --- -RESOLVED: function has been changed to use an attribute list. --- - . What should happen for EGLImage extensions which introduce formats - without a mapping to an OpenCL image channel data type or channel order? -+ --- -RESOLVED: This extension does not define those formats. -It is expected that as additional EGL extensions are added to create EGL -images from other sources, an extension to CL will be introduced where -needed to represent those image types. --- - . What are the guarantees to synchronization behavior provided by the - implementation? -+ --- -The basic portable form of synchronization is to use a clFinish, as is the -case for GL interop. -In addition implementations which support the synchronization extensions -cl_khr_egl_event and EGL_KHR_cl_event can interoperate more efficiently as -described in those extensions. --- diff --git a/ext/cl_khr_expect_assume.asciidoc b/ext/cl_khr_expect_assume.asciidoc deleted file mode 100644 index 274d73b1c..000000000 --- a/ext/cl_khr_expect_assume.asciidoc +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_expect_assume]] -== Kernel Optimization Hints - -This extension adds mechanisms to provide information to the compiler that may improve the performance of some kernels. -Specifically, this extension adds the ability to: - -* Tell the compiler the _expected_ value of a variable. -* Allow the compiler to _assume_ a condition is true. - -These functions are not required for functional correctness. - -The initial version of this extension extends the OpenCL SPIR-V environment to support new instructions for offline compilation tool chains. -Similar functionality may be provided by some OpenCL C online compilation tool chains, but formal support in OpenCL C is not required by the initial version of the extension. - -=== General Information - -==== Name Strings - -`cl_khr_expect_assume` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-11-10 | 1.0.0 | First assigned version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specifications Version V3.0.8. - -The initial version of this extension extends the OpenCL SPIR-V environment to support new instructions. -Please refer to the OpenCL SPIR-V Environment Specification that describes how this extension modifies the OpenCL SPIR-V environment. - -=== Sample Code - -Although this extension does not formally extend OpenCL C, the ability to provide _expect_ and _assume_ information is supported by many OpenCL C compiler tool chains. -The sample code below describes how to test for and provide _expect_ and _assume_ information to compilers based on Clang: - -[source,opencl_c] ----- -// __has_builtin is an optional compiler feature that is supported by Clang. -// If this feature is not supported, we will assume the builtin is not present. -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -kernel void test(global int* dst, global int* src) -{ - int value = src[get_global_id(0)]; - - // Tell the compiler that the most likely source value is zero. -#if __has_builtin(__builtin_expect) - value = __builtin_expect(value, 0); -#endif - - // Tell the compiler that the source value is non-negative. - // Behavior is undefined if the source value is actually negative. -#if __has_builtin(__builtin_assume) - __builtin_assume(value >= 0); -#endif - - dst[get_global_id(0)] = value % 4; -} ----- diff --git a/ext/cl_khr_extended_async_copies.asciidoc b/ext/cl_khr_extended_async_copies.asciidoc deleted file mode 100644 index 9bde9244c..000000000 --- a/ext/cl_khr_extended_async_copies.asciidoc +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_extended_async_copies]] -== Extended Async Copies - -This section describes the *cl_khr_extended_async_copies* extension. -This extension augments built-in asynchronous copy functions to OpenCL C -to support more patterns: - -1. for async copy between 2D source and 2D destination. -2. for async copy between 3D source and 3D destination. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 0.9.0 | First assigned version (provisional). -| 2021-09-06 | 0.9.1 | Elements-based proposal update. -| 2021-11-10 | 1.0.0 | First non-provisional version. -|==== - -[[cl_khr_extended_async_copies-additions-to-chapter-6-of-the-opencl-specification]] -=== Additions to Chapter 6 of the OpenCL C Specification - -The following new built-in functions are added to the _Async Copies from Global to -Local Memory, Local to Global Memory, and Prefetch_ functions described in _section 6.12.10_ -and _section 6.13.10_ of the OpenCL 1.2 and OpenCL 2.0 C specifications. - -Note that *async_work_group_strided_copy* is a special case of -*async_work_group_copy_2D2D*, namely one which copies a single column to a -single line or vice versa. -For example: + -`async_work_group_strided_copy(dst, src, num_gentypes, src_stride, event)` is equal to -`async_work_group_copy_2D2D(dst, 0, src, 0, sizeof(gentype), 1, num_gentypes, src_stride, 1, event)` - -The async copy built-in functions described in this section support arbitrary -`gentype`-based buffers by casting pointers to `void*`. - -These async copy built-in functions do not perform any implicit synchronization -of source data such as using a *barrier* before performing the copy. - -These async copy built-in functions are performed by all work-items in a -work-group and must therefore be encountered by all work-items in a work-group -executing the kernel with the same argument values; otherwise the results are -undefined. - -The _src_offset_, _dst_offset_, _src_total_line_length_, -_dst_total_line_length_, _src_total_plane_area_ and _dst_total_plane_area_ -function arguments are expressed in elements. - -Both _src_total_line_length_ and _dst_total_line_length_ describe the number of -elements between the beginning of the current line and the beginning of the next -line. - -Both _src_total_plane_area_ and _dst_total_plane_area_ describe the number of -elements between the beginning of the current plane and the beginning of the -next plane. - -These async copy built-in functions return an event object that can be used by -*wait_group_events* to wait for the async copy to finish. The _event_ argument -can also be used to associate the async copy with a previous async copy allowing -an event to be shared by multiple async copies; otherwise _event_ should be -zero. If the _event_ argument is non-zero, the event object supplied as the -_event_ argument will be returned. - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -event_t async_work_group_copy_2D2D( - __local void *dst, - size_t dst_offset, - const __global void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t src_total_line_length, - size_t dst_total_line_length, - event_t event) - -event_t async_work_group_copy_2D2D( - __global void *dst, - size_t dst_offset, - const __local void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t src_total_line_length, - size_t dst_total_line_length, - event_t event) ----- -| Perform an async copy of (_num_elements_per_line_ * _num_lines_) elements -of size _num_bytes_per_element_ from -(_src_ + (_src_offset_ * _num_bytes_per_element_)) to -(_dst_ + (_dst_offset_ * _num_bytes_per_element_)). All pointer arithmetic -is performed with implicit casting to `char*` by the implementation. -Each line contains _num_elements_per_line_ elements of size -_num_bytes_per_element_. -After each line of transfer, the _src_ address is incremented by -_src_total_line_length_ elements -(i.e. _src_total_line_length_ * _num_bytes_per_element_ bytes), -and the _dst_ address is incremented by _dst_total_line_length_ elements -(i.e. _dst_total_line_length_ * _num_bytes_per_element_ bytes), -for the next line of transfer. - -The behavior of *async_work_group_copy_2D2D* is undefined if the -source or destination addresses exceed the upper bounds of the address space -during the copy. - -The behavior of *async_work_group_copy_2D2D* is also undefined if the -_src_total_line_length_ or _dst_total_line_length_ values are smaller -than _num_elements_per_line_, i.e. overlapping of lines is undefined. - -|[source,opencl_c] ----- -event_t async_work_group_copy_3D3D( - __local void *dst, - size_t dst_offset, - const __global void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t num_planes, - size_t src_total_line_length, - size_t src_total_plane_area, - size_t dst_total_line_length, - size_t dst_total_plane_area, - event_t event) - -event_t async_work_group_copy_3D3D( - __global void *dst, - size_t dst_offset, - const __local void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t num_planes, - size_t src_total_line_length, - size_t src_total_plane_area, - size_t dst_total_line_length, - size_t dst_total_plane_area, - event_t event) ----- -| Perform an async copy of \((_num_elements_per_line_ * _num_lines_) * _num_planes_) elements -of size _num_bytes_per_element_ from -(_src_ + (_src_offset_ * _num_bytes_per_element_)) to -(_dst_ + (_dst_offset_ * _num_bytes_per_element_)), -arranged in _num_planes_ planes. All pointer arithmetic -is performed with implicit casting to `char*` by the implementation. -Each plane contains _num_lines_ lines. -Each line contains _num_elements_per_line_ elements. -After each line of transfer, the _src_ address is incremented by -_src_total_line_length_ elements -(i.e. _src_total_line_length_ * _num_bytes_per_element_ bytes), -and the _dst_ address is incremented by _dst_total_line_length_ elements -(i.e. _dst_total_line_length_ * _num_bytes_per_element_ bytes), -for the next line of transfer. - -The behavior of *async_work_group_copy_3D3D* is undefined if the -source or destination addresses exceed the upper bounds of the address space -during the copy. - -The behavior of *async_work_group_copy_3D3D* is also undefined if the -_src_total_line_length_ or _dst_total_line_length_ values are smaller -than _num_elements_per_line_, i.e. overlapping of lines is undefined. - -The behavior of *async_work_group_copy_3D3D* is also undefined if -_src_total_plane_area_ is smaller than (_num_lines_ * _src_total_line_length_), -or _dst_total_plane_area_ is smaller than (_num_lines_ * _dst_total_line_length_), -i.e. overlapping of planes is undefined. - -|======================================================================= diff --git a/ext/cl_khr_extended_bit_ops.asciidoc b/ext/cl_khr_extended_bit_ops.asciidoc deleted file mode 100644 index b65194cf9..000000000 --- a/ext/cl_khr_extended_bit_ops.asciidoc +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_extended_bit_ops]] -== Extended Bit Operations - -This extension adds OpenCL C functions for performing extended bit operations. -Specifically, the following functions are added: - -* bitfield insert: insert bits from one source operand into another source operand. -* bitfield extract: extract bits from a source operand, with sign- or zero-extension. -* bit reverse: reverse the bits of a source operand. - -=== General Information - -==== Name Strings - -`cl_khr_extended_bit_ops` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-04-22 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL 3.0 C Language Specification and the OpenCL SPIR-V Environment Specification Version V3.0.6. - -This extension requires OpenCL 1.0. - -=== New OpenCL C Functions - -[source,opencl_c] ----- -gentype bitfield_insert( gentype base, gentype insert, uint offset, uint count ) -igentype bitfield_extract_signed( gentype base, uint offset, uint count ) -ugentype bitfield_extract_unsigned( gentype base, uint offset, uint count ) -gentype bit_reverse( gentype base ) ----- - -=== Modifications to the OpenCL C Specification - -==== Modify Section 6.15.3. Integer Functions: - -Add a new Section 6.15.3.X. Extended Bit Operations: :: -+ --- -The functions described in the following table can be used with built-in scalar or vector integer types to perform extended bit operations. -The functions that operate on vector types operate component-wise. -The description is per-component. - -In the table below, the generic type name `gentype` refers to the built-in integer types `char`, `char__n__`, `uchar`, `uchar__n__`, `short`, `short__n__`, `ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, `uint__n__`, `long`, `long__n__`, `ulong`, and `ulong__n__`. -The generic type name `igentype` refers to the built-in signed integer types `char`, `char__n__`, `short`, `short__n__`, `int`, `int__n__`, `long`, and `long__n__`. -The generic type name `ugentype` refers to the built-in unsigned integer types `uchar`, `uchar__n__`, `ushort`, `ushort__n__`, `uint`, `uint__n__`, `ulong`, and `ulong__n__`. -_n_ is 2, 3, 4, 8, or 16. - -.Built-in Scalar and Vector Extended Bit Operations -[cols="1a,1", options="header"] -|=== -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype bitfield_insert( - gentype base, gentype insert, - uint offset, uint count) ----- - -|Returns a copy of _base_, with a modified bitfield that comes from _insert_. - -Any bits of the result value numbered outside [_offset_, _offset_ + _count_ - 1] (inclusive) will come from the corresponding bits in _base_. - -Any bits of the result value numbered inside [_offset_, _offset_ + _count_ - 1] (inclusive) will come from the bits numbered [0, _count_ - 1] (inclusive) of _insert_. - -_count_ is the number of bits to be modified. -If _count_ equals 0, the return value will be equal to _base_. - -If _count_ or _offset_ or _offset_ + _count_ is greater than number of bits in `gentype` (for scalar types) or components of `gentype` (for vector types), the result is undefined. - -|[source,opencl_c] ----- -igentype bitfield_extract_signed( - gentype base, - uint offset, uint count) ----- - -|Returns an extracted bitfield from _base_ with sign extension. -The type of the return value is always a signed type. - -The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] (inclusive) are returned as the bits numbered in [0, _count_ - 1] (inclusive) of the result. -The remaining bits in the result will be sign extended by replicating the bit numbered _offset_ + _count_ - 1 of _base_. - -_count_ is the number of bits to be extracted. -If _count_ equals 0, the result is 0. - -If the _count_ or _offset_ or _offset_ + _count_ is greater than number of bits in `gentype` (for scalar types) or components of `gentype` (for vector types), the result is undefined. - -|[source,opencl_c] ----- -ugentype bitfield_extract_unsigned( - gentype base, - uint offset, uint count) ----- - -|Returns an extracted bitfield from _base_ with zero extension. -The type of the return value is always an unsigned type. - -The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] (inclusive) are returned as the bits numbered in [0, _count_ - 1] (inclusive) of the result. -The remaining bits in the result will be zero. - -_count_ is the number of bits to be extracted. -If _count_ equals 0, the result is 0. - -If the _count_ or _offset_ or _offset_ + _count_ is greater than number of bits in `gentype` (for scalar types) or components of `gentype` (for vector types), the result is undefined. - -|[source,opencl_c] ----- -gentype bit_reverse( - gentype base) ----- - -|Returns the value of _base_ with reversed bits. -That is, the bit numbered _n_ of the result value will be taken from the bit numbered _width_ - _n_ - 1 of _base_ (for scalar types) or a component of _base_ (for vector types), where _width_ is number of bits of `gentype` (for scalar types) or components of `gentype` (for vector types). - -|=== --- diff --git a/ext/cl_khr_extended_versioning.asciidoc b/ext/cl_khr_extended_versioning.asciidoc deleted file mode 100644 index 115f5ce7c..000000000 --- a/ext/cl_khr_extended_versioning.asciidoc +++ /dev/null @@ -1,283 +0,0 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_extended_versioning]] -== Extended versioning - -This extension introduces new platform and device queries that return detailed -version information to applications. It makes it possible to return the exact -revision of the specification or intermediate languages supported by an -implementation. It also enables implementations to communicate a version -number for each of the extensions they support and remove the requirement -for applications to process strings to test for the presence of an extension or -intermediate language or built-in kernel. - -Extended versioning was promoted to a core feature in OpenCL 3.0, however note -that the query for {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} was replaced by the -query for {CL_DEVICE_OPENCL_C_ALL_VERSIONS}. - -=== General Information - -==== Name Strings - -`cl_khr_extended_versioning` - -==== Contributors - -Kévin Petit, Arm Ltd. + -Ben Ashbaugh, Intel + -Alastair Murray, Codeplay Software Ltd. + -Einar Hov, Arm Ltd. - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-02-12 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification -Version 2.2, Revision 11. - -This extension requires OpenCL 1.0. - -=== New API Types - -==== Version - -This extension introduces a new scheme to encode detailed -(major, minor, patch/revision) version information into a single 32-bit unsigned -integer: - -* The major version is using bits 31-22 -* The minor version is using bits 21-12 -* The patch version is using bits 11-0 - -This scheme enables two versions to be ordered using the standard C/C++ operators. -Macros are provided to extract individual fields or compose a full version -from the individual fields. - -[source,opencl] ----- - -typedef cl_uint cl_version_khr; - -#define CL_VERSION_MAJOR_BITS_KHR (10) -#define CL_VERSION_MINOR_BITS_KHR (10) -#define CL_VERSION_PATCH_BITS_KHR (12) - -#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) -#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) -#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) - -#define CL_VERSION_MAJOR_KHR(version) \ - ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) -#define CL_VERSION_MINOR_KHR(version) \ - (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) -#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) - -#define CL_MAKE_VERSION_KHR(major, minor, patch) \ - ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ - (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ - ((patch) & CL_VERSION_PATCH_MASK_KHR)) ----- - -==== Name and version - -This extension adds a structure that can be used to describe a combination of a -name alongside a version number: - -[source,opencl] ----- -#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 - -typedef struct _cl_name_version_khr { - cl_version_khr version; - char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; -} cl_name_version_khr; ----- - -The `name` field is an array of {CL_NAME_VERSION_MAX_NAME_SIZE_KHR} bytes used as -storage for a NUL-terminated string whose maximum length is therefore -{CL_NAME_VERSION_MAX_NAME_SIZE_KHR} `- 1`. - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo}: - -[source,opencl] ----- -CL_PLATFORM_NUMERIC_VERSION_KHR -CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - -[source,opencl] ----- -CL_DEVICE_NUMERIC_VERSION_KHR -CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR -CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR -CL_DEVICE_ILS_WITH_VERSION_KHR -CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR ----- - -=== Modifications to the OpenCL API Specification - -(Modify Section 4.1, *Querying Platform Info*) :: -+ --- - -(Add the following to Table 3, _List of supported param_names by {clGetPlatformInfo}_) :: -+ -[cols="3,2,3",options="header"] -|==== -| Platform Info -| Return Type -| Description - -| {CL_PLATFORM_NUMERIC_VERSION_KHR} -| {cl_version_khr_TYPE} -| Returns detailed (major, minor, patch) numeric version information. The major - and minor version numbers returned must match those returned via - {CL_PLATFORM_VERSION}. - -| {CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of description (name and version) structures. The same - extension name must not be reported more than once. The list of extensions - reported must match the list reported via {CL_PLATFORM_EXTENSIONS}. - -|==== --- - -(Modify Section 4.2, *Querying Devices*) :: -+ --- - -(Add the following to Table 5, _List of supported param_names by {clGetDeviceInfo}_) :: -+ -[cols="3,2,3",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_NUMERIC_VERSION_KHR} -| {cl_version_khr_TYPE} -| Returns detailed (major, minor, patch) numeric version information. The major - and minor version numbers returned must match those returned via - {CL_DEVICE_VERSION}. - -| {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} -| {cl_version_khr_TYPE} -| Returns detailed (major, minor, patch) numeric version information. The major - and minor version numbers returned must match those returned via - {CL_DEVICE_OPENCL_C_VERSION}. - -| {CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of description (name and version) structures. The same - extension name must not be reported more than once. The list of extensions - reported must match the list reported via {CL_DEVICE_EXTENSIONS}. - -| {CL_DEVICE_ILS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of descriptions (name and version) for all supported - Intermediate Languages. Intermediate Languages with the same name may be - reported more than once but each name and major/minor version combination - may only be reported once. The list of intermediate languages reported must - match the list reported via {CL_DEVICE_IL_VERSION}. - -| {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of descriptions for the built-in kernels supported by the device. - Each built-in kernel may only be reported once. The list of reported kernels must - match the list returned via {CL_DEVICE_BUILT_IN_KERNELS}. - -|==== --- - -=== Conformance tests - -. Each of the new queries described in this extension must be attempted and - succeed. -. It must be verified that the information returned by all queries that - extend existing queries is consistent with the information returned - by existing queries. -. Some of the queries introduced by this extension impose uniqueness constraints - on the list of returned values. It must be verified that these constraints are - satisfied. - -=== Issues - -. What compatibility policy should we define? e.g. a _revision_ has to be - backwards-compatible with previous ones -+ --- -*RESOLVED*: No general rules as that wouldn't be testable. Here's a recommended policy: - -- Patch version bump: only clarifications and small/obvious bugfixes. -- Minor version bump: backwards-compatible changes only. -- Major version bump: backwards compatibility may break. - --- - -. Do we want versioning for built-in kernels as returned by {CL_DEVICE_BUILT_IN_KERNELS}? -+ --- -*RESOLVED*: No immediate use-case for versioning but being able to get a list of - individual kernels without parsing a string is desirable. Adding - {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR}. --- - -. What is the behaviour of the queries that return an array of structures when -there are no elements to return? -+ --- -*RESOLVED*: The query succeeds and the size returned is zero. --- - -. What value should be returned when version information is not available? -+ --- -*RESOLVED*: If a patch version is not available, it should be reported as 0. - If no version information is available, 0.0.0 should be reported. - These values have been chosen as they are guaranteed to be lower - than or equal to any other version. --- - -. Should we add a query to report SPIR-V extended instruction sets? -+ --- -*RESOLVED*: It is unlikely that we will introduce many SPIR-V extended - instruction sets without an accompanying API extension. Decided - not to do this. --- - -. Should the queries for which the old-style query doesn't exist in a given -OpenCL version be present (e.g. {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} -prior to OpenCL 2.1 or without support for `cl_khr_il_program` or -{CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} on OpenCL 1.0)? -+ --- -*RESOLVED*: All the queries are always present. - {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} returns an empty set - when Intermediate Languages are not supported. - {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} always returns 1.0 on an - OpenCL 1.0 platform. --- - -. Is reporting multiple Intermediate Languages with the same name and major/minor -versions but differing patch versions allowed? -+ --- -*RESOLVED*: No. This isn't aligned with the intended use for patch versions and - makes it harder for implementations to guarantee consistency with - the existing IL queries. --- - diff --git a/ext/cl_khr_external_memory.asciidoc b/ext/cl_khr_external_memory.asciidoc deleted file mode 100644 index 3eeae8908..000000000 --- a/ext/cl_khr_external_memory.asciidoc +++ /dev/null @@ -1,608 +0,0 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_external_memory]] -== External Memory (Provisional) - -This extension defines a generic mechanism to share buffer and image objects between OpenCL and many other APIs. - -In particular, the `cl_khr_external_memory` extension defines: - -* Optional properties to import external memory exported by other APIs into OpenCL for a set of devices. - -* Routines to explicitly hand off memory ownership between OpenCL and other APIs. - -Other related extensions define specific external memory types that may be imported into OpenCL. - -=== General Information - -==== Name Strings - -`cl_khr_external_memory` + -`cl_khr_external_memory_dma_buf` + -`cl_khr_external_memory_dx` + -`cl_khr_external_memory_opaque_fd` + -`cl_khr_external_memory_win32` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-09-10 | 0.9.0 | Initial version (provisional). -| 2023-05-04 | 0.9.1 | Clarified device handle list enum cannot be specified without an external memory handle (provisional). -| 2023-08-01 | 0.9.2 | Changed device handle list enum to the memory-specific {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). -| 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.8. - -Because this extension adds new properties for {clCreateBufferWithProperties} -and {clCreateImageWithProperties} this extension requires OpenCL 3.0. - -==== Contributors - -// spell-checker: disable -Ajit Hakke-Patil, NVIDIA + -Amit Rao, NVIDIA + -Balaji Calidas, QUALCOMM + -Ben Ashbaugh, INTEL + -Carsten Rohde, NVIDIA + -Christoph Kubisch, NVIDIA + -Debalina Bhattacharjee, NVIDIA + -Faith Ekstrand, INTEL + -James Jones, NVIDIA + -Jeremy Kemp, IMAGINATION + -Joshua Kelly, QUALCOMM + -Karthik Raghavan Ravi, NVIDIA + -Kedar Patil, NVIDIA + -Kevin Petit, ARM + -Nikhil Joshi, NVIDIA + -Sharan Ashwathnarayan, NVIDIA + -Vivek Kini, NVIDIA + -// spell-checker: enable - -=== New Types - -[source] ----- -typedef cl_uint cl_external_memory_handle_type_khr; ----- - -=== New API Functions - -[source] ----- -cl_int clEnqueueAcquireExternalMemObjectsKHR( - cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseExternalMemObjectsKHR( - cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query external memory handle types that may be imported by all devices in an OpenCL platform: - -[source] ----- -CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044 ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo} to query external memory handle types that may be imported by an OpenCL device: - -[source] ----- -CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F -CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR 0x2052 ----- - -New properties accepted as _properties_ to {clCreateBufferWithProperties} and {clCreateImageWithProperties}: - -[source] ----- -CL_MEM_DEVICE_HANDLE_LIST_KHR 0x2051 -CL_MEM_DEVICE_HANDLE_LIST_END_KHR 0 ----- - -New return values from {clGetEventInfo} when _param_name_ is {CL_EVENT_COMMAND_TYPE}: - -[source] ----- -CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR 0x2047 -CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR 0x2048 ----- - -External memory handle type added by `cl_khr_external_memory_dma_buf`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067 ----- - -External memory handle types added by `cl_khr_external_memory_dx`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR 0x2063 -CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR 0x2064 -CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR 0x2065 -CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR 0x2066 ----- - -External memory handle type added by `cl_khr_external_memory_opaque_fd`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR 0x2060 ----- - -External memory handle types added by `cl_khr_external_memory_win32`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR 0x2061 -CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2062 ----- - -=== Modifications to existing APIs added by this spec - -Following new enums are added to the list of supported _param_names_ by {clGetPlatformInfo}: - -.List of supported param_names by clGetPlatformInfo -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Platform Info | Return Type | Description -| {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_memory_handle_type_khr_TYPE}[] - | Returns the list of importable external memory handle types supported by all devices in _platform_. -|==== - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} must return a common list of external memory handle types supported by all devices in the platform. - -Following new enums are added to the list of supported _param_names_ by {clGetDeviceInfo}: - -.List of supported param_names by clGetDeviceInfo -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_memory_handle_type_khr_TYPE}[] - | Returns the list of importable external memory handle types supported by _device_. -| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} - | {cl_external_memory_handle_type_khr_TYPE}[] - | Returns the list of importable external memory handle types supported by _device_, that are assumed to apply linear layout to imported images when no other tiling information is provided. -|==== - -{clGetDeviceInfo} when called with param_name {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} must return a non-empty list of external memory handle types for at least one of the devices in the platform. - -{clGetDeviceInfo} when called with param_name {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} returns a list of external memory handle types that are assumed to have a linear memory layout when no other tiling information is provided. This list contains a subset of {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR}. The returned list may be empty. - -External memory handle types not in {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} may have any memory layout. The layout interpretation of images imported with these handle types is implementation defined. - -Following new properties are added to the list of supported properties by {clCreateBufferWithProperties} and {clCreateImageWithProperties}. - -[[external-memory-properties-table]] -.List of supported buffer and image creation properties -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Property | Property Value | Description -| {CL_MEM_DEVICE_HANDLE_LIST_KHR} - | {cl_device_id_TYPE}[] - | Specifies the list of OpenCL devices (terminated with {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external memory handle. -|==== - -If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, the memory object created by {clCreateBufferWithProperties} or {clCreateImageWithProperties} is by default accessible to all devices in the _context_. - -The properties used to create a buffer or image from an external memory handle are described by related extensions. -When a buffer or image is created from an external memory handle, the _flags_ used to specify usage information for the buffer or image must not include {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or {CL_MEM_COPY_HOST_PTR}, and the _host_ptr_ argument must be `NULL`. -When images are created from an external memory handle, implementations may acquire information about image attributes such as format and layout at the time of creation. When such information is acquired at image creation time, it is used for the lifetime of the image object. - -Add to the list of error conditions for {clCreateBufferWithProperties} and {clCreateImageWithProperties}: - -* {CL_INVALID_DEVICE} - ** if a device identified by the property {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not a valid device or is not associated with _context_, or - ** if a device identified by property {CL_MEM_DEVICE_HANDLE_LIST_KHR} cannot import the requested external memory object type, or - ** if {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_ and one or more devices in _context_ cannot import the requested external memory object type. -* {CL_INVALID_VALUE} - ** if _properties_ includes a supported external memory handle and _flags_ includes {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or {CL_MEM_COPY_HOST_PTR}. -* {CL_INVALID_HOST_PTR} - ** if _properties_ includes a supported external memory handle and _host_ptr_ is not `NULL`. -* {CL_INVALID_PROPERTY} - ** if _properties_ does not include a supported external memory handle and {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. - -Add images created from an external memory handle to the description of `image_row_pitch` and `image_slice_pitch` for {cl_image_desc_TYPE}: - -* `image_row_pitch` is the scan-line pitch in bytes. -The `image_row_pitch` must be zero if _host_ptr_ is `NULL`, the image is not a -2D image created from a buffer, and the image is not an image created from an -external memory handle. -If `image_row_pitch` is zero and _host_ptr_ is not `NULL` then the image row -pitch is calculated as `image_width` {times} the size of an image element in -bytes. -If `image_row_pitch` is zero and the image is created from an external memory -handle then the image row pitch is implementation-defined. -The image row pitch must be {geq} `image_width` {times} the size of an image -element in bytes and must be a multiple of the size of an image element in -bytes. -For a 2D image created from a buffer the image row pitch must also be a multiple -of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in -the context that support images. - -* `image_slice_pitch` is the size in bytes of each 2D slice in a 3D image or the -size in bytes of each image in a 1D or 2D image array. -The `image_slice_pitch` must be zero if _host_ptr_ is `NULL` and the image is -not created from an external memory handle. -If `image_slice_pitch` is zero and _host_ptr_ is not `NULL` then the image slice -pitch is calculated as the image row pitch {times} `image_height` for a 2D image -array or a 3D image, and as the image row pitch for a 1D image array. -If `image_slice_pitch` is zero and the image is created from an external memory -handle then the image slice pitch is implementation-defined. -The image slice pitch must be {geq} the image image row pitch {times} -`image_height` for a 2D image array or a 3D image, must be {geq} the image row -pitch for a 1D image array, and must be a multiple of the image row pitch. - -=== Description of new types added by this spec - -The following new APIs are added as part of this spec. The details of each are described below: - -==== Acquiring and Releasing External Memory Objects - -To enqueue a command to acquire OpenCL memory objects created from external memory handles, call the function - -include::{generated}/api/protos/clEnqueueAcquireExternalMemObjectsKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_mem_objects_ specifies the number of memory objects to acquire. - -_mem_objects_ points to a list of valid memory objects. - -_num_events_in_wait_list_ specifies the number of events in _event_wait_list_. - -_event_wait_list_ points to the list of events that need to complete before {clEnqueueAcquireExternalMemObjectsKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueAcquireExternalMemObjectsKHR} does not explicitly wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that of _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command or queue a wait for this command to complete. - -Applications must acquire the memory objects that are created using external handles before they can be used by any OpenCL commands queued to a command-queue. -Behavior is undefined if a memory object created from an external memory handle is used by an OpenCL command queued to a command-queue without being acquired. -This is to guarantee that the state of the memory objects is up-to-date and they are accessible to OpenCL. -See "Example with Acquire / Release" provided in <> for more details on how to use this API. - -If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will trivially succeed after its event dependencies are satisfied and will update its completion event. - -{clEnqueueAcquireExternalMemObjectsKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not a `NULL` value or if _num_mem_objects_ is greater than 0 and _mem_objects_ is `NULL`. -* {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is not a valid OpenCL memory object created using an external memory handle. -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if device associated with _command_queue_ is not one of the devices specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _mem_objects_, or -** if one or more of _mem_objects_ belong to a context that does not contain a device associated with _command_queue_. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -To enqueue a command to release OpenCL memory objects created from external memory handles, call the function - -include::{generated}/api/protos/clEnqueueReleaseExternalMemObjectsKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_mem_objects_ specifies the number of memory objects to release. - -_mem_objects_ points to a list of valid memory objects. - -_num_events_in_wait_list_ specifies the number of events in _event_wait_list_. - -_event_wait_list_ points to the list of events that need to complete before {clEnqueueReleaseExternalMemObjectsKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueReleaseExternalMemObjectsKHR} does not wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that of _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command or queue a wait for this command to complete. - -Applications must release the memory objects that are acquired using {clEnqueueReleaseExternalMemObjectsKHR} before using them through any commands in the other API. -This is to guarantee that the state of memory objects is up-to-date and they are accessible to the other API. -See "Example with Acquire / Release" provided in <> for more details on how to use this API. - -If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will trivially succeed after its event dependencies are satisfied and will update its completion event. - -{clEnqueueReleaseExternalMemObjectsKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not a `NULL` value or if _num_mem_objects_ is greater than 0 and _mem_objects_ is `NULL`. -* {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is not a valid OpenCL memory object created using an external memory handle. -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if device associated with _command_queue_ is not one of the devices specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _mem_objects_, or -** if one or more of _mem_objects_ belong to a context that does not contain a device associated with _command_queue_. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -=== Descriptions of External Memory Handle Types - -This section describes the external memory handle types that are added by related extensions. - -Applications can import the same payload into multiple OpenCL contexts and multiple times into a given OpenCL context. In all cases, each import operation must create a distinct memory object. - -==== File Descriptor Handle Types - -The `cl_khr_external_memory_opaque_fd` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file descriptor handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the POSIX system calls dup, dup2, close, and the non-standard system call dup3. Additionally, it must be transportable over a socket using an SCM_RIGHTS control message. It owns a reference to the underlying memory resource represented by its memory object. --- - -The `cl_khr_external_memory_dma_buf` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} is a file descriptor for a Linux dma_buf. It owns a reference to the underlying memory resource represented by its memory object. --- - -For these extensions, importing memory from a file descriptor transfers ownership of the file descriptor from the application to the OpenCL implementation. The application must not perform any operations on the file descriptor after a successful import. The imported memory object holds a reference to its payload. - -==== NT Handle Types - -The `cl_khr_external_memory_dx` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} specifies an NT handle returned by IDXGIResource1::CreateSharedHandle referring to a Direct3D 10 or 11 texture resource. It owns a reference to the memory used by the Direct3D resource. - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} specifies a global share handle returned by IDXGIResource::GetSharedHandle referring to a Direct3D 10 or 11 texture resource. It does not own a reference to the underlying Direct3D resource, and will therefore become invalid when all memory objects and Direct3D resources associated with it are destroyed. - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} specifies an NT handle returned by ID3D12Device::CreateSharedHandle referring to a Direct3D 12 heap resource. It owns a reference to the resources used by the Direct3D heap. - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} specifies an NT handle returned by ID3D12Device::CreateSharedHandle referring to a Direct3D 12 committed resource. It owns a reference to the memory used by the Direct3D resource. --- - -The `cl_khr_external_memory_win32` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the functions DuplicateHandle, CloseHandle, CompareObjectHandles, GetHandleInformation, and SetHandleInformation. It owns a reference to the underlying memory resource represented by its memory object. - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global share handle that has only limited valid usage outside of OpenCL and other compatible APIs. It is not compatible with any native APIs. It does not own a reference to the underlying memory resource represented by its memory object, and will therefore become invalid when all memory objects associated with it are destroyed. --- - -For these extensions, importing memory object payloads from Windows handles does not transfer ownership of the handle to the OpenCL implementation. For handle types defined as NT handles, the application must release handle ownership using the CloseHandle system call when the handle is no longer needed. For handle types defined as NT handles, the imported memory object holds a reference to its payload. - -Note: Non-NT handle import operations do not add a reference to their associated payload. If the original object owning the payload is destroyed, all resources and handles sharing that payload will become invalid. - -[[cl_khr_external_memory-Sample-Code]] -=== Sample Code - -. Example for creating a CL buffer from an exported external buffer in a single device context. -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with just first device -clCreateContext(..., 1, devices, ...); - -// Obtain fd/win32 or similar handle for external memory to be imported -// from other API. -int fd = getFdForExternalMemory(); - -// Create extMemBuffer of type cl_mem from fd. -cl_mem_properties_khr extMemProperties[] = -{ - (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, - (cl_mem_properties_khr)fd, - 0 -}; - -cl_mem extMemBuffer = clCreateBufferWithProperties(/*context*/ clContext, - /*properties*/ extMemProperties, - /*flags*/ 0, - /*size*/ size, - /*host_ptr*/ NULL, - /*errcode_ret*/ &errcode_ret); ----- --- -. Example for creating a CL Image from an exported external Image for single device usage in a multi-device context -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Create img of type cl_mem usable only on devices[0] - -// Create img of type cl_mem. -// Obtain fd/win32 or similar handle for external memory to be imported -// from other API. -int fd = getFdForExternalMemory(); - -// Set cl_image_format based on external image info -cl_image_format clImgFormat = { }; -clImageFormat.image_channel_order = CL_RGBA; -clImageFormat.image_channel_data_type = CL_UNORM_INT8; - -// Set cl_image_desc based on external image info -size_t clImageFormatSize; -cl_image_desc image_desc = { }; -image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; -image_desc.image_width = width; -image_desc.image_height = height; -image_desc.image_depth = depth; -image_desc.image_array_size = num_slices; -image_desc.image_row_pitch = width * 8 * 4; // May need alignment -image_desc.image_slice_pitch = image_desc.image_row_pitch * height; -image_desc.num_mip_levels = 1; -image_desc.num_samples = 0; -image_desc.buffer = NULL; - -cl_mem_properties_khr extMemProperties[] = { - (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, - (cl_mem_properties_khr)fd, - (cl_mem_properties_khr)CL_MEM_DEVICE_HANDLE_LIST_KHR, - (cl_mem_properties_khr)devices[0], - CL_MEM_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -cl_mem img = clCreateImageWithProperties(/*context*/ clContext, - /*properties*/ extMemProperties, - /*flags*/ 0, - /*image_format*/ &clImgFormat, - /*image_desc*/ &image_desc, - /*errcode_ret*/ &errcode_ret); - -// Use clGetImageInfo to get cl_image_format details. -size_t clImageFormatSize; -clGetImageInfo(img, - CL_IMAGE_FORMAT, - sizeof(cl_image_format), - &clImageFormat, - &clImageFormatSize); ----- --- -. Example for synchronization using Wait and Signal -+ --- -[source] ----- -// Start the main rendering loop - -// Create extSem of type cl_semaphore_khr using clCreateSemaphoreWithPropertiesKHR - -// Create extMem of type cl_mem using clCreateBufferWithProperties or clCreateImageWithProperties - -while (true) { - // (not shown) Signal the semaphore from the other API - - // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'extSem' - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel that accesses extMem - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in other API that waits on 'extSem' -} ----- --- -. Example with memory sharing using acquire/release -+ --- -[source] ----- -// Create extSem of type cl_semaphore_khr using -// clCreateSemaphoreWithPropertiesKHR with CL_SEMAPHORE_HANDLE_*_KHR. - -// Create extMem1 and extMem2 of type cl_mem using clCreateBufferWithProperties -// or clCreateImageWithProperties - -while (true) { - // (not shown) Signal the semaphore from the other API. Wait for the - // semaphore in OpenCL, by calling clEnqueueWaitForSemaphore on extSem - clEnqueueWaitSemaphoresKHR(/*command_queue*/ cq1, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Get explicit ownership of extMem1 - clEnqueueAcquireExternalMemObjectsKHR(/*command_queue*/ cq1, - /*num_mem_objects*/ 1, - /*mem_objects*/ extMem1, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel that accesses extMem1 on cq1 on cl_device1 - clEnqueueNDRangeKernel(cq1, ..., &event1); - - // Launch kernel that accesses both extMem1 and extMem2 on cq2 on cl_device2 - // Migration of extMem1 and extMem2 handles through regular CL memory - // migration. - clEnqueueNDRangeKernel(cq2, ..., &event1, &event2); - - // Give up ownership of extMem1 before you signal the semaphore. Handle - // memory migration here. - clEnqueueReleaseExternalMemObjectsKHR(/*command_queue*/ cq2 - /*num_mem_objects*/ 1, - /*mem_objects*/ &extMem1, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Signal the semaphore from OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ cq2, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in other API that waits on 'extSem' - // Other API accesses ext1, but not ext2 on device-1 -} ----- --- - - -=== Issues - -. How should the import of images that are created in external APIs with non-linear tiling be robustly handled? -+ --- -*UNRESOLVED* --- diff --git a/ext/cl_khr_external_semaphore.asciidoc b/ext/cl_khr_external_semaphore.asciidoc deleted file mode 100644 index 064ebd754..000000000 --- a/ext/cl_khr_external_semaphore.asciidoc +++ /dev/null @@ -1,662 +0,0 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_external_semaphore]] -== External Semaphores (Provisional) - -`cl_khr_semaphore` introduced semaphores as a new type along with a set of APIs for create, release, retain, wait and signal operations on it. -This extension defines APIs and mechanisms to share semaphores created in an external API by importing into and exporting from OpenCL. - -This extension defines: - -* New attributes that can be passed as part of {cl_semaphore_properties_khr_TYPE} for specifying properties of external semaphores to be imported or exported. - -* New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE} for specifying properties of external semaphores to be exported. - -* An extension to {clCreateSemaphoreWithPropertiesKHR} to accept external semaphore properties allowing to import or export an external semaphore into or from OpenCL. - -* Semaphore handle types required for importing and exporting semaphores. - -* Modifications to Wait and Signal API behavior when dealing with external semaphores created from different handle types. - -* API query exportable semaphores handles using specified handle type. - -Other related extensions define specific external semaphores that may be imported into or exported from OpenCL. - -=== General Information - -==== Name Strings - -`cl_khr_external_semaphore` + -`cl_khr_external_semaphore_dx_fence` + -`cl_khr_external_semaphore_opaque_fd` + -`cl_khr_external_semaphore_sync_fd` + -`cl_khr_external_semaphore_win32` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-09-10 | 0.9.0 | Initial version (provisional). -| 2023-11-16 | 0.9.1 | Added CL_SEMAPHORE_EXPORTABLE_KHR. -| 2023-11-21 | 0.9.2 | Added re-import function call to cl_khr_external_semaphore_sync_fd -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.8. - -This extension requires OpenCL 1.2. - -The `cl_khr_semaphore` extension is required as it defines semaphore objects as well as for wait and signal operations on semaphores. - -For OpenCL to be able to import external semaphores from other APIs using this extension, the other API is required to provide below mechanisms: - -* Ability to export semaphore handles -* Ability to query semaphore handle in the form of one of the handle type supported by OpenCL. - -The other APIs that want to use semaphore exported by OpenCL using this extension are required to provide below mechanism: - -* Ability to import semaphore handles using handle types exported by OpenCL. - -==== Contributors - -// spell-checker: disable -Ajit Hakke-Patil, NVIDIA + -Amit Rao, NVIDIA + -Balaji Calidas, QUALCOMM + -Ben Ashbaugh, INTEL + -Carsten Rohde, NVIDIA + -Christoph Kubisch, NVIDIA + -Debalina Bhattacharjee, NVIDIA + -Faith Ekstrand, INTEL + -James Jones, NVIDIA + -Jeremy Kemp, IMAGINATION + -Joshua Kelly, QUALCOMM + -Karthik Raghavan Ravi, NVIDIA + -Kedar Patil, NVIDIA + -Kevin Petit, ARM + -Nikhil Joshi, NVIDIA + -Sharan Ashwathnarayan, NVIDIA + -Vivek Kini, NVIDIA + -// spell-checker: enable - -=== New Types - -[source] ----- -typedef cl_uint cl_external_semaphore_handle_type_khr; ----- - -The `cl_khr_external_semaphore_sync_fd` extension adds: - -[source] ----- -typedef cl_properties cl_semaphore_reimport_properties_khr; ----- - -=== New API Functions - -[source] ----- -cl_int clGetSemaphoreHandleForTypeKHR( - cl_semaphore_khr sema_object, - cl_device_id device, - cl_external_semaphore_handle_type_khr handle_type, - size_t handle_size, - void *handle_ptr, - size_t *handle_size_ret); ----- - -The `cl_khr_external_semaphore_sync_fd` extension adds: - ----- -cl_int clReImportSemaphoreSyncFdKHR( - cl_semaphore_khr sema_object, - cl_semaphore_reimport_properties_khr *reimport_props, - int fd); ----- - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query external semaphore handle types that may be imported or exported by all devices in an OpenCL platform: - -[source] ----- -CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037 -CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038 ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo} to query external semaphore handle types that may be imported or exported by an OpenCL device: - -[source] ----- -CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D -CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E ----- - -Following new attributes can be passed as part of {cl_semaphore_properties_khr_TYPE} and {cl_semaphore_info_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F -CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0 ----- - -The following new attribute that can be passed as part of {cl_semaphore_info_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_EXPORTABLE_KHR 0x2054 ----- - -External semaphore handle type added by `cl_khr_external_semaphore_dx_fence`: - -[source] ----- -CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR 0x2059 ----- - -External semaphore handle type added by `cl_khr_external_semaphore_opaque_fd`: - -[source] ----- -CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR 0x2055 ----- - -External semaphore handle type added by `cl_khr_external_semaphore_sync_fd`: - -[source] ----- -CL_SEMAPHORE_HANDLE_SYNC_FD_KHR 0x2058 ----- - -External semaphore handle types added by `cl_khr_external_semaphore_win32`: - -[source] ----- -CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR 0x2056 -CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2057 ----- - -=== Modifications to existing APIs added by this spec - -Following new enums are added to the list of supported _param_names_ by {clGetPlatformInfo}: - -.List of supported param_names by {clGetPlatformInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Platform Info | Return Type | Description -| {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of importable external semaphore handle types supported by all devices in _platform_. - This size of this query may be 0 if no importable external semaphore handle types are supported by all devices in _platform_. -| {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of exportable external semaphore handle types supported by all devices in the platform. - This size of this query may be 0 if no exportable external semaphore handle types are supported by all devices in _platform_. -|==== - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} returns a common list of external semaphore handle types supported for importing by all devices in the platform. - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} returns a common list of external semaphore handle types supported for exporting by all devices in the platform. - -Following new enums are added to the list of supported _param_names_ by {clGetDeviceInfo}: - -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of importable external semaphore handle types supported by _device_. - This size of this query may be 0 indicating that the device does not support importing semaphores. -| {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of exportable external semaphore handle types supported by _device_. - This size of this query may be 0 indicating that the device does not support exporting semaphores. -|==== - -{clGetDeviceInfo} when called with _param_name_ {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} returns a list of external semaphore handle types supported for importing. - -{clGetDeviceInfo} when called with _param_name_ {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} returns a list of external semaphore handle types supported for exporting. - -One of the above two queries {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} and {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} must return a non-empty list indicating support for at least one of the valid semaphore handles types either for import or for export or both. - -Following new properties are added to the list of possible supported properties by {clCreateSemaphoreWithPropertiesKHR}: - -.List of supported semaphore creation properties by {clCreateSemaphoreWithPropertiesKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Property | Property Value | Description -| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Specifies the list of semaphore handle type properties terminated with - {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR} that can be used to export - the semaphore being created. -|==== - -Add to the list of error conditions for {clCreateSemaphoreWithPropertiesKHR}: - -* {CL_INVALID_DEVICE} if one or more devices identified by properties {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} can not import the requested external semaphore handle type. -* {CL_INVALID_VALUE} if more than one semaphore handle type is specified in the {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} list. -* {CL_INVALID_OPERATION} If _props_list_ specifies a {cl_external_semaphore_handle_type_khr_TYPE} followed by a handle as well as -{CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR}. Exporting a semaphore handle from a semaphore that was created by importing -an external semaphore handle is not permitted. - -Add to the list of supported _param_names_ by {clGetSemaphoreInfoKHR}: - -.List of supported param_names by {clGetSemaphoreInfoKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Info | Return Type | Description -| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of external semaphore handle types that may be used for - exporting. The size of this query may be 0 indicating that this - semaphore does not support any handle types for exporting. -| {CL_SEMAPHORE_EXPORTABLE_KHR} - | {cl_bool_TYPE} - | Returns {CL_TRUE} if the semaphore is exportable and {CL_FALSE} otherwise. -|==== - -=== Exporting semaphore external handles - -To export an external handle from a semaphore, call the function - -include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] - -_sema_object_ specifies a valid semaphore object with exportable properties. - -_device_ specifies a valid device for which a semaphore handle is being requested. - -_handle_type_ specifies the type of semaphore handle that should be returned for this exportable _sema_object_ and must be one of the values specified when _sema_object_ was created. - -_handle_size_ specifies the size of memory pointed by _handle_ptr_. - -_handle_ptr_ is a pointer to memory where the exported external handle is returned. -If _param_value_ is `NULL`, it is ignored. - -_handle_size_ret_ returns the actual size in bytes for the external handle. -If _handle_size_ret_ is `NULL`, it is ignored. - -{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore handle is queried successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} -** if _sema_object_ is not a valid semaphore -// This is redundant with the error below. -** if _sema_object_ is not exportable -* {CL_INVALID_DEVICE} -** if _device_ is not a valid device, or -** if _sema_object_ belongs to a context that is not associated with _device_, or -** if _sema_object_ can not be shared with _device_. -* {CL_INVALID_VALUE} if the requested external semaphore handle type was not specified when _sema_object_ was created. -* {CL_INVALID_VALUE} if _handle_size_ is less than the size needed to store the returned handle. -// I don't think this can happen. This would have been checked when the semaphore was created. -// ** if CL_SEMAPHORE_HANDLE_*_KHR is specified as one of the _sema_props_ and -// the property CL_SEMAPHORE_HANDLE_*_KHR does not identify a valid external -// memory handle poperty reported by -// CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR or -// CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR queries. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -=== Importing semaphore external handles - -Applications can import a semaphore payload into an existing semaphore using an -external semaphore handle. The effects of the import operation will be either -temporary or permanent, as specified by the application. If the import is -temporary, the implementation must restore the semaphore to its prior permanent -state after submitting the next semaphore wait operation. Performing a -subsequent temporary import on a semaphore before performing a semaphore wait -has no effect on this requirement; the next wait submitted on the semaphore must -still restore its last permanent state. A permanent payload import behaves as if -the target semaphore was destroyed, and a new semaphore was created with the -same handle but the imported payload. Because importing a semaphore payload -temporarily or permanently detaches the existing payload from a semaphore, -similar usage restrictions to those applied to {clReleaseSemaphoreKHR} are -applied to any command that imports a semaphore payload. Which of these import -types is used is referred to as the import operation's permanence. Each handle -type supports either one or both types of permanence. - -The implementation must perform the import operation by either referencing or -copying the payload referred to by the specified external semaphore handle, -depending on the handle's type. The import method used is referred to as the -handle type's transference. When using handle types with reference transference, -importing a payload to a semaphore adds the semaphore to the set of all -semaphores sharing that payload. This set includes the semaphore from which the -payload was exported. Semaphore signaling and waiting operations performed on -any semaphore in the set must behave as if the set were a single semaphore. -Importing a payload using handle types with copy transference creates a -duplicate copy of the payload at the time of import, but makes no further -reference to it. Semaphore signaling and waiting operations performed on the -target of copy imports must not affect any other semaphore or payload. - -Export operations have the same transference as the specified handle type's -import operations. Additionally, exporting a semaphore payload to a handle with -copy transference has the same side effects on the source semaphore's payload as -executing a semaphore wait operation. If the semaphore was using a temporarily -imported payload, the semaphore's prior permanent payload will be restored. - -Please refer to handle specific specifications for more details on transference and -permanence requirements specific to handle type. - -=== Descriptions of External Semaphore Handle Types - -This section describes the external semaphore handle types that are added by related extensions. - -Applications can import the same semaphore payload into multiple OpenCL contexts, into the same context from which it was exported, and multiple times into a given OpenCL context. -In all cases, each import operation must create a distinct semaphore object. - -==== File Descriptor Handle Types - -The `cl_khr_external_semaphore_opaque_fd` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file descriptor handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the POSIX system calls dup, dup2, close, and the non-standard system call dup3. Additionally, it must be transportable over a socket using an SCM_RIGHTS control message. It owns a reference to the underlying synchronization primitive represented by its semaphore object. --- - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_opaque_fd`: - -.Transference and Permanence Properties for `cl_khr_external_semaphore_opaque_fd` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} - | Reference - | Temporary, Permanent -|==== - -The `cl_khr_external_semaphore_sync_fd` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} specifies a POSIX file descriptor handle to a Linux Sync File or Android Fence object. It can be used with any native API accepting a valid sync file or fence as input. It owns a reference to the underlying synchronization primitive associated with the file descriptor. Implementations which support importing this handle type must accept any type of sync or fence FD supported by the native system they are running on. --- - -The special value -1 for fd is treated like a valid sync file descriptor referring to an object that has already signaled. The import operation will succeed and the semaphore will have a temporarily imported payload as if a valid file descriptor had been provided. - -Note: This special behavior for importing an invalid sync file descriptor allows easier interoperability with other system APIs which use the convention that an invalid sync file descriptor represents work that has already completed and does not need to be waited for. It is consistent with the option for implementations to return a -1 file descriptor when exporting a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} from a {cl_semaphore_khr_TYPE} which is signaled. - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_sync_fd`: - -.Transference and Permanence Properties for `cl_khr_external_semaphore_sync_fd` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} - | Copy - | Temporary -|==== - -For these extensions, importing a semaphore payload from a file descriptor transfers ownership of the file descriptor from the application to the OpenCL implementation. The application must not perform any operations on the file descriptor after a successful import. - -A handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} may be re-imported into an existing semaphore using {clReImportSemaphoreSyncFdKHR}: - -include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] - -_sema_object_ specifies a valid semaphore object with importable properties. - -_reimport_props_ Must be `NULL`. Reserved for future use. - -_fd_ external file descriptor handle to import - -Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying _sema_object_ and re-creating it with the original _sema_props_ -from {clCreateSemaphoreWithPropertiesKHR}, except a handle specified by _fd_ will be imported. -The semaphore _sema_object_ must have originally imported an external handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. - -* {CL_INVALID_SEMAPHORE_KHR} -** if _sema_object_ is not a valid semaphore -* {CL_INVALID_SEMAPHORE_KHR} if a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} handle was not imported when _sema_object_ was created. -* {CL_INVALID_VALUE} if _fd_ is invalid. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. - -==== NT Handle Types - -The `cl_khr_external_semaphore_dx_fence` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} specifies an NT handle returned by ID3D12Device::CreateSharedHandle referring to a Direct3D 12 fence, or ID3D11Device5::CreateFence referring to a Direct3D 11 fence. It owns a reference to the underlying synchronization primitive associated with the Direct3D fence. --- - -When waiting on semaphores using {clEnqueueWaitSemaphoresKHR} or signaling semaphores using {clEnqueueSignalSemaphoresKHR}, the semaphore payload must be provided for semaphores created from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR}. - - -* If _sema_objects_ list has a mix of semaphores obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and other handle types, -then the _sema_payload_list_ should point to a list of _num_sema_objects_ payload values for each semaphore in _sema_objects_. -However, the payload values corresponding to semaphores with type {CL_SEMAPHORE_TYPE_BINARY_KHR} can be set to 0 or will be ignored. - -{clEnqueueWaitSemaphoresKHR} and {clEnqueueSignalSemaphoresKHR} may return {CL_INVALID_VALUE} if _sema_objects_ list has one or more semaphores obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and _sema_payload_list_ is NULL. - - - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_dx_fence`: - --- -.Transference and Permanence Properties for `cl_khr_external_semaphore_dx_fence` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} - | Reference - | Temporary, Permanent -|==== --- - -The `cl_khr_external_semaphore_win32` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the functions DuplicateHandle, CloseHandle, CompareObjectHandles, GetHandleInformation, and SetHandleInformation. It owns a reference to the underlying synchronization primitive represented by its semaphore object. - * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global share handle that has only limited valid usage outside of OpenCL and other compatible APIs. It is not compatible with any native APIs. It does not own a reference to the underlying synchronization primitive represented by its semaphore object, and will therefore become invalid when all semaphore objects associated with it are destroyed. --- - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_win32`: - -.Transference and Permanence Properties for `cl_khr_external_semaphore_win32` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} - | Reference - | Temporary, Permanent -| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} - | Reference - | Temporary, Permanent -|==== - -For these extensions, importing a semaphore payload from Windows handles does not transfer ownership of the handle to the OpenCL implementation. For handle types defined as NT handles, the application must release ownership using the CloseHandle system call when the handle is no longer needed. - -[[cl_khr_external_semaphore-Sample-Code]] -=== Sample Code - -. Example for importing a semaphore created by another API in OpenCL in a single-device context. -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with just first device -clCreateContext(..., 1, devices, ...); - -// Obtain fd/win32 or similar handle for external semaphore to be imported -// from the other API. -int fd = getFdForExternalSemaphore(); - -// Create clSema of type cl_semaphore_khr usable on the only available device -// assuming the semaphore was imported from the same device. - -cl_semaphore_properties_khr sema_props[] = - {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - (cl_semaphore_properties_khr)fd, - 0}; - - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - ----- --- -. Example for importing a semaphore created by another API in OpenCL in a multi-device context for single device usage. -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Obtain fd/win32 or similar handle for external semaphore to be imported -// from the other API. -int fd = getFdForExternalSemaphore(); - -// Create clSema of type cl_semaphore_khr usable only on device 1 -// assuming the semaphore was imported from the same device. -cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - (cl_semaphore_properties_khr)fd, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, - (cl_semaphore_properties_khr)devices[1], - CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - ----- --- -. Example for synchronization using a semaphore created by another API and imported in OpenCL -+ --- -[source] ----- -// Create clSema using one of the above examples of external semaphore creation. - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Start the main loop - -while (true) { - // (not shown) Signal the semaphore from the other API - - // Wait for the semaphore in OpenCL - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in the other API that waits on 'clSema' - -} ----- --- -. Example for synchronization using semaphore exported by OpenCL -+ --- -[source] ----- - -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Create clSema of type cl_semaphore_khr usable only on device 1 -cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, - (cl_semaphore_properties_khr)devices[1], - CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Application queries handle-type and the exportable handle associated with the semaphore. -clGetSemaphoreInfoKHR(clSema, - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, - sizeof(cl_external_semaphore_handle_type_khr), - &handle_type, - &handle_type_size); - -// The other API or process can use the exported semaphore handle -// to import -int fd = -1; -if (handle_type == CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR) { - clGetSemaphoreHandleForTypeKHR(clSema, - device, - CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - sizeof(int), - &fd, - NULL); -} - -// Start the main rendering loop - -while (true) { - // (not shown) Signal the semaphore from the other API - - // Wait for the semaphore in OpenCL - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in the other API that waits on 'clSema' -} ----- --- diff --git a/ext/cl_khr_fp16.asciidoc b/ext/cl_khr_fp16.asciidoc deleted file mode 100644 index 595ff95c5..000000000 --- a/ext/cl_khr_fp16.asciidoc +++ /dev/null @@ -1,1928 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_fp16]] -== Half Precision Floating-Point - -This section describes the *cl_khr_fp16* extension. -This extension adds support for half scalar and vector types as built-in -types that can be used for arithmetic operations, conversions etc. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_fp16-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 C Specification - -The list of built-in scalar, and vector data types defined in _tables 6.1_, -and _6.2_ are extended to include the following: - -[cols="1,3",options="header",] -|==== -| *Type* | *Description* -| *half2* | A 2-component half-precision floating-point vector. -| *half3* | A 3-component half-precision floating-point vector. -| *half4* | A 4-component half-precision floating-point vector. -| *half8* | A 8-component half-precision floating-point vector. -| *half16* | A 16-component half-precision floating-point vector. -|==== - -The built-in vector data types for `halfn` are also declared as appropriate -types in the OpenCL API (and header files) that can be used by an -application. -The following table describes the built-in vector data types for `halfn` as -defined in the OpenCL C programming language and the corresponding data type -available to the application: - -[cols=",",options="header",] -|==== -| *Type in OpenCL Language* | *API type for application* -| *half2* | *cl_half2* -| *half3* | *cl_half3* -| *half4* | *cl_half4* -| *half8* | *cl_half8* -| *half16* | *cl_half16* -|==== - -The relational, equality, logical and logical unary operators described in -_section 6.3_ can be used with `half` scalar and `halfn` vector types and -shall produce a scalar `int` and vector `shortn` result respectively. - -The OpenCL compiler accepts an h and H suffix on floating-point literals, -indicating the literal is typed as a half. - -[[cl_khr_fp16-conversions]] -==== Conversions - -The implicit conversion rules specified in _section 6.2.1_ now include the -`half` scalar and `halfn` vector data types. - -The explicit casts described in _section 6.2.2_ are extended to take a -`half` scalar data type and a `halfn` vector data type. - -The explicit conversion functions described in _section 6.2.3_ are extended -to take a `half` scalar data type and a `halfn` vector data type. - -The `as_typen()` function for re-interpreting types as described in _section -6.2.4.2_ is extended to allow conversion-free casts between `shortn`, -`ushortn`, and `halfn` scalar and vector data types. - -[[cl_khr_fp16-math-functions]] -==== Math Functions - -The built-in math functions defined in _table 6.8_ (also listed below) are -extended to include appropriate versions of functions that take `half` and -`half{2|3|4|8|16}` as arguments and return values. -`gentype` now also includes `half`, `half2`, `half3`, `half4`, `half8`, and -`half16`. - -For any specific use of a function, the actual type has to be the same for -all arguments and the return type. - -._Half Precision Built-in Math Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *acos* (gentype _x_) -| Arc cosine function. - -| gentype *acosh* (gentype _x_) -| Inverse hyperbolic cosine. - -| gentype *acospi* (gentype _x_) -| Compute *acos* (_x_) / {pi}. - -| gentype *asin* (gentype _x_) -| Arc sine function. - -| gentype *asinh* (gentype _x_) -| Inverse hyperbolic sine. - -| gentype *asinpi* (gentype _x_) -| Compute *asin* (_x_) / {pi}. - -| gentype *atan* (gentype _y_over_x_) -| Arc tangent function. - -| gentype *atan2* (gentype _y_, gentype _x_) -| Arc tangent of _y_ / _x_. - -| gentype *atanh* (gentype _x_) -| Hyperbolic arc tangent. - -| gentype *atanpi* (gentype _x_) -| Compute *atan* (_x_) / {pi}. - -| gentype *atan2pi* (gentype _y_, gentype _x_) -| Compute *atan2* (_y_, _x_) / {pi}. - -| gentype *cbrt* (gentype _x_) -| Compute cube-root. - -| gentype *ceil* (gentype _x_) -| Round to integral value using the round to positive infinity rounding - mode. - -| gentype *copysign* (gentype _x_, gentype _y_) -| Returns _x_ with its sign changed to match the sign of _y_. - -| gentype *cos* (gentype _x_) -| Compute cosine. - -| gentype *cosh* (gentype _x_) -| Compute hyperbolic cosine. - -| gentype *cospi* (gentype _x_) -| Compute *cos* ({pi} _x_). - -| gentype *erfc* (gentype _x_) -| Complementary error function. - -| gentype *erf* (gentype _x_) -| Error function encountered in integrating the normal distribution. - -| gentype *exp* (gentype _x_) -| Compute the base- e exponential of _x_. - -| gentype *exp2* (gentype _x_) -| Exponential base 2 function. - -| gentype *exp10* (gentype _x_) -| Exponential base 10 function. - -| gentype *expm1* (gentype _x_) -| Compute _e^x^_- 1.0. - -| gentype *fabs* (gentype _x_) -| Compute absolute value of a floating-point number. - -| gentype *fdim* (gentype _x_, gentype _y_) -| _x_ - _y_ if _x_ > _y_, +0 if x is less than or equal to y. - -| gentype *floor* (gentype _x_) -| Round to integral value using the round to negative infinity rounding - mode. - -| gentype *fma* (gentype _a_, gentype _b_, gentype _c_) -| Returns the correctly rounded floating-point representation of the sum of - _c_ with the infinitely precise product of _a_ and _b_. - Rounding of intermediate products shall not occur. - Edge case behavior is per the IEEE 754-2008 standard. - -| gentype *fmax* (gentype _x_, gentype _y_) + - gentype *fmax* (gentype _x_, half _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If one argument is a NaN, *fmax()* returns the other argument. - If both arguments are NaNs, *fmax()* returns a NaN. - -| gentype *fmin* (gentype _x_, gentype _y_) + - gentype *fmin* (gentype _x_, half _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If one argument is a NaN, *fmin()* returns the other argument. - If both arguments are NaNs, *fmin()* returns a NaN. - -| gentype *fmod* (gentype _x_, gentype _y_) -| Modulus. - Returns _x_ - _y_ * *trunc* (_x_/_y_) . - -| gentype **fract** (gentype _x_, {global} gentype *_iptr_) + - gentype **fract** (gentype _x_, {local} gentype *_iptr_) + - gentype **fract** (gentype _x_, {private} gentype *_iptr_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype **fract** (gentype _x_, gentype *_iptr_) -| Returns *fmin*( _x_ - *floor* (_x_), 0x1.ffcp-1f ). - - *floor*(x) is returned in _iptr_. - -| half__n__ **frexp** (half__n x__, {global} int__n__ *exp) + - half **frexp** (half _x_, {global} int *exp) + - - half__n__ **frexp** (half__n x__, {local} int__n__ *exp) + - half **frexp** (half _x_, {local} int *exp) + - - half__n__ **frexp** (half__n x__, {private} int__n__ *exp) + - half **frexp** (half _x_, {private} int *exp) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - half__n__ **frexp** (half__n__ _x_, int__n__ *exp) + - half **frexp** (half _x_, int *exp) -| Extract mantissa and exponent from _x_. - For each component the mantissa returned is a `half` with magnitude - in the interval [1/2, 1) or 0. - Each component of _x_ equals mantissa returned * 2__^exp^__. - -| gentype *hypot* (gentype _x_, gentype _y_) -| Compute the value of the square root of __x__^2^+ __y__^2^ without undue - overflow or underflow. - -| int__n__ *ilogb* (half__n__ _x_) + - int *ilogb* (half _x_) -| Return the exponent as an integer value. - -| half__n__ *ldexp* (half__n__ _x_, int__n__ _k_) + - half__n__ *ldexp* (half__n__ _x_, int _k_) + - half *ldexp* (half _x_, int _k_) -| Multiply _x_ by 2 to the power _k_. - -| gentype **lgamma** (gentype _x_) + - - half__n__ **lgamma_r** (half__n__ _x_, {global} int__n__ *_signp_) + - half **lgamma_r** (half _x_, {global} int *_signp_) + - - half__n__ **lgamma_r** (half__n__ _x_, {local} int__n__ *_signp_) + - half **lgamma_r** (half _x_, {local} int *_signp_) + - - half__n__ **lgamma_r** (half__n__ _x_, {private} int__n__ *_signp_) + - half **lgamma_r** (half _x_, {private} int *_signp_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - half__n__ **lgamma_r** (half__n__ _x_, int__n__ *_signp_) + - half **lgamma_r** (half _x_, int *_signp_) -| Log gamma function. - Returns the natural logarithm of the absolute value of the gamma function. - The sign of the gamma function is returned in the _signp_ argument of - *lgamma_r*. - -| gentype *log* (gentype _x_) -| Compute natural logarithm. - -| gentype *log2* (gentype _x_) -| Compute a base 2 logarithm. - -| gentype *log10* (gentype _x_) -| Compute a base 10 logarithm. - -| gentype *log1p* (gentype _x_) -| Compute log~e~(1.0 + _x_) . - -| gentype *logb* (gentype _x_) -| Compute the exponent of _x_, which is the integral part of - log__~r~__\|_x_\|. - -| gentype *mad* (gentype _a_, gentype _b_, gentype _c_) -| *mad* computes _a_ * _b_ + _c_. - The function may compute _a_ * _b_ + _c_ with reduced accuracy - in the embedded profile. See the OpenCL SPIR-V Environment Specification - for details. On some hardware the mad instruction may provide better - performance than expanded computation of _a_ * _b_ + _c_. - - Note: For some usages, e.g. *mad*(a, b, -a*b), the half precision - definition of *mad*() is loose enough that almost any result is allowed - from *mad*() for some values of a and b. - -| gentype *maxmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| > \|_y_\|, _y_ if \|_y_\| > \|_x_\|, otherwise - *fmax*(_x_, _y_). - -| gentype *minmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| < \|_y_\|, _y_ if \|_y_\| < \|_x_\|, otherwise - *fmin*(_x_, _y_). - -| gentype **modf** (gentype _x_, {global} gentype *_iptr_) + - gentype **modf** (gentype _x_, {local} gentype *_iptr_) + - gentype **modf** (gentype _x_, {private} gentype *_iptr_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype **modf** (gentype _x_, gentype *_iptr_) -| Decompose a floating-point number. - The *modf* function breaks the argument _x_ into integral and fractional - parts, each of which has the same sign as the argument. - It stores the integral part in the object pointed to by _iptr_. - -| half__n__ *nan* (ushort__n__ _nancode_) + - half *nan* (ushort _nancode_) -| Returns a quiet NaN. - The _nancode_ may be placed in the significand of the resulting NaN. - -| gentype *nextafter* (gentype _x_, gentype _y_) -| Computes the next representable half-precision floating-point value - following _x_ in the direction of _y_. - Thus, if _y_ is less than _x_, *nextafter*() returns the largest - representable floating-point number less than _x_. - -| gentype *pow* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_. - -| half__n__ *pown* (half__n__ _x_, int__n__ _y_) + - half *pown* (half _x_, int _y_) -| Compute _x_ to the power _y_, where _y_ is an integer. - -| gentype *powr* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_, where _x_ is >= 0. - -| gentype *remainder* (gentype _x_, gentype _y_) -| Compute the value _r_ such that _r_ = _x_ - _n_*_y_, where _n_ is the - integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _n_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - -| half__n__ **remquo** (half__n__ _x_, half__n__ _y_, {global} int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, {global} int *_quo_) + - - half__n__ **remquo** (half__n__ _x_, half__n__ _y_, {local} int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, {local} int *_quo_) + - - half__n__ **remquo** (half__n__ _x_, half__n__ _y_, {private} int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, {private} int *_quo_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - half__n__ **remquo** (half__n__ _x_, half__n__ _y_, int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, int *_quo_) -| The *remquo* function computes the value r such that _r_ = _x_ - _k_*_y_, - where _k_ is the integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _k_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - This is the same value that is returned by the *remainder* function. - *remquo* also calculates the lower seven bits of the integral quotient - _x_/_y_, and gives that value the same sign as _x_/_y_. - It stores this signed value in the object pointed to by _quo_. - -| gentype *rint* (gentype _x_) -| Round to integral value (using round to nearest even rounding mode) in - floating-point format. - Refer to section 7.1 for description of rounding modes. - -| half__n__ *rootn* (half__n__ _x_, int__n__ _y_) + - half *rootn* (half _x_, int _y_) -| Compute _x_ to the power 1/_y_. - -| gentype *round* (gentype _x_) -| Return the integral value nearest to _x_ rounding halfway cases away from - zero, regardless of the current rounding direction. - -| gentype *rsqrt* (gentype _x_) -| Compute inverse square root. - -| gentype *sin* (gentype _x_) -| Compute sine. - -| gentype **sincos** (gentype _x_, {global} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {local} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {private} gentype *_cosval_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype **sincos** (gentype _x_, gentype *_cosval_) -| Compute sine and cosine of x. - The computed sine is the return value and computed cosine is returned in - _cosval_. - -| gentype *sinh* (gentype _x_) -| Compute hyperbolic sine. - -| gentype *sinpi* (gentype _x_) -| Compute *sin* ({pi} _x_). - -| gentype *sqrt* (gentype _x_) -| Compute square root. - -| gentype *tan* (gentype _x_) -| Compute tangent. - -| gentype *tanh* (gentype _x_) -| Compute hyperbolic tangent. - -| gentype *tanpi* (gentype _x_) -| Compute *tan* ({pi} _x_). - -| gentype *tgamma* (gentype _x_) -| Compute the gamma function. - -| gentype *trunc* (gentype _x_) -| Round to integral value using the round to zero rounding mode. -|==== - -The *FP_FAST_FMA_HALF* macro indicates whether the *fma()* family of -functions are fast compared with direct code for half precision -floating-point. -If defined, the *FP_FAST_FMA_HALF* macro shall indicate that the *fma()* -function generally executes about as fast as, or faster than, a multiply and -an add of *half* operands. - -The macro names given in the following list must use the values specified. -These constant expressions are suitable for use in #if preprocessing -directives. - -[source,opencl_c] ----- -#define HALF_DIG 3 -#define HALF_MANT_DIG 11 -#define HALF_MAX_10_EXP +4 -#define HALF_MAX_EXP +16 -#define HALF_MIN_10_EXP -4 -#define HALF_MIN_EXP -13 -#define HALF_RADIX 2 -#define HALF_MAX 0x1.ffcp15h -#define HALF_MIN 0x1.0p-14h -#define HALF_EPSILON 0x1.0p-10h ----- - -The following table describes the built-in macro names given above in the -OpenCL C programming language and the corresponding macro names available to -the application. - -[cols=",",options="header",] -|==== -| *Macro in OpenCL Language* | *Macro for application* -| `HALF_DIG` | {CL_HALF_DIG} -| `HALF_MANT_DIG` | {CL_HALF_MANT_DIG} -| `HALF_MAX_10_EXP` | {CL_HALF_MAX_10_EXP} -| `HALF_MAX_EXP` | {CL_HALF_MAX_EXP} -| `HALF_MIN_10_EXP` | {CL_HALF_MIN_10_EXP} -| `HALF_MIN_EXP` | {CL_HALF_MIN_EXP} -| `HALF_RADIX` | {CL_HALF_RADIX} -| `HALF_MAX` | {CL_HALF_MAX} -| `HALF_MIN` | {CL_HALF_MIN} -| `HALF_EPSILSON` | {CL_HALF_EPSILON} -|==== - -The following constants are also available. -They are of type `half` and are accurate within the precision of the `half` -type. - -[cols=",",options="header",] -|==== -| *Constant* | *Description* -| `M_E_H` | Value of e -| `M_LOG2E_H` | Value of log~2~e -| `M_LOG10E_H` | Value of log~10~e -| `M_LN2_H` | Value of log~e~2 -| `M_LN10_H` | Value of log~e~10 -| `M_PI_H` | Value of {pi} -| `M_PI_2_H` | Value of {pi} / 2 -| `M_PI_4_H` | Value of {pi} / 4 -| `M_1_PI_H` | Value of 1 / {pi} -| `M_2_PI_H` | Value of 2 / {pi} -| `M_2_SQRTPI_H` | Value of 2 / {sqrt}{pi} -| `M_SQRT2_H` | Value of {sqrt}2 -| `M_SQRT1_2_H` | Value of 1 / {sqrt}2 -|==== - -[[cl_khr_fp16-common-functions]] -==== Common Functions - -The built-in common functions defined in _table 6.12_ (also listed below) -are extended to include appropriate versions of functions that take `half` -and `half{2|3|4|8|16}` as arguments and return values. -gentype now also includes `half`, `half2`, `half3`, `half4`, `half8` and -`half16`. -These are described below. - -._Half Precision Built-in Common Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *clamp* ( + - gentype _x_, gentype _minval_, gentype _maxval_) - - gentype *clamp* ( + - gentype _x_, half _minval_, half _maxval_) -| Returns *fmin*(*fmax*(_x_, _minval_), _maxval_). - - Results are undefined if _minval_ > _maxval_. - -| gentype *degrees* (gentype _radians_) -| Converts _radians_ to degrees, + - i.e. (180 / {pi}) * _radians_. - -| gentype *max* (gentype _x_, gentype _y_) + - gentype *max* (gentype _x_, half _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *min* (gentype _x_, gentype _y_) + - gentype *min* (gentype _x_, half _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *mix* (gentype _x_, gentype _y_, gentype _a_) + - gentype *mix* (gentype _x_, gentype _y_, half _a_) -| Returns the linear blend of _x_ and _y_ implemented as: - - _x_ + (_y_ - _x)_ * _a_ - - _a_ must be a value in the range 0.0 ... 1.0. - If _a_ is not in the range 0.0 ... 1.0, the return values are undefined. - - Note: The half precision *mix* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *radians* (gentype _degrees_) -| Converts _degrees_ to radians, i.e. ({pi} / 180) * _degrees_. - -| gentype *step* (gentype _edge_, gentype _x_) + - gentype *step* (half _edge_, gentype _x_) -| Returns 0.0 if _x_ < _edge_, otherwise it returns 1.0. - -| gentype *smoothstep* ( + - gentype _edge0_, gentype _edge1_, gentype _x_) - - gentype *smoothstep* ( + - half _edge0_, half _edge1_, gentype _x_) -| Returns 0.0 if _x_ \<= _edge0_ and 1.0 if _x_ >= _edge1_ and performs - smooth Hermite interpolation between 0 and 1 when _edge0_ < _x_ < _edge1_. - This is useful in cases where you would want a threshold function with a - smooth transition. - - This is equivalent to: - - gentype _t_; + - _t_ = clamp ((_x_ - _edge0_) / (_edge1_ - _edge0_), 0, 1); + - return _t_ * _t_ * (3 - 2 * _t_); + - - Results are undefined if _edge0_ >= _edge1_. - - Note: The half precision *smoothstep* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *sign* (gentype _x_) -| Returns 1.0 if _x_ > 0, -0.0 if _x_ = -0.0, +0.0 if _x_ = +0.0, or -1.0 if - _x_ < 0. - Returns 0.0 if _x_ is a NaN. - -|==== - -[[cl_khr_fp16-geometric-functions]] -==== Geometric Functions - -The built-in geometric functions defined in _table 6.13_ (also listed below) -are extended to include appropriate versions of functions that take `half` -and `half{2|3|4}` as arguments and return values. -gentype now also includes `half`, `half2`, `half3` and `half4`. -These are described below. - -Note: The half precision geometric functions can be implemented using -contractions such as *mad* or *fma*. - -._Half Precision Built-in Geometric Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| half4 *cross* (half4 _p0_, half4 _p1_) + - half3 *cross* (half3 _p0_, half3 _p1_) -| Returns the cross product of _p0.xyz_ and _p1.xyz_. - The _w_ component of the result will be 0.0. - -| half *dot* (gentype _p0_, gentype _p1_) -| Compute the dot product of _p0_ and _p1_. - -| half *distance* (gentype _p0_, gentype _p1_) -| Returns the distance between _p0_ and _p1_. - This is calculated as *length*(_p0_ - _p1_). - -| half *length* (gentype _p_) -| Return the length of vector x, i.e., + - sqrt( __p.x__^2^ + __p.y__^2^ + ... ) - -| gentype *normalize* (gentype _p_) -| Returns a vector in the same direction as _p_ but with a length of 1. - -|==== - -[[cl_khr_fp16-relational-functions]] -==== Relational Functions - -The scalar and vector relational functions described in _table 6.14_ are -extended to include versions that take `half`, `half2`, `half3`, `half4`, -`half8` and `half16` as arguments. - -The relational and equality operators (<, \<=, >, >=, !=, ==) can be used -with `halfn` vector types and shall produce a vector `shortn` result as -described in _section 6.3_. - -The functions *isequal*, *isnotequal*, *isgreater*, *isgreaterequal*, -*isless*, *islessequal*, *islessgreater*, *isfinite*, *isinf*, *isnan*, -*isnormal*, *isordered*, *isunordered* and *signbit* shall return a 0 if the -specified relation is _false_ and a 1 if the specified relation is true for -scalar argument types. -These functions shall return a 0 if the specified relation is _false_ and a --1 (i.e. all bits set) if the specified relation is _true_ for vector -argument types. - -The relational functions *isequal*, *isgreater*, *isgreaterequal*, *isless*, -*islessequal*, and *islessgreater* always return 0 if either argument is not -a number (NaN). -*isnotequal* returns 1 if one or both arguments are not a number (NaN) and -the argument type is a scalar and returns -1 if one or both arguments are -not a number (NaN) and the argument type is a vector. - -The functions described in _table 6.14_ are extended to include the `halfn` -vector types. - -._Half Precision Relational Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| int *isequal* (half _x_, half _y_) + - short__n__ *isequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ == _y_. - -| int *isnotequal* (half _x_, half _y_) + - short__n__ *isnotequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ != _y_. - -| int *isgreater* (half _x_, half _y_) + - short__n__ *isgreater* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ > _y_. - -| int *isgreaterequal* (half _x_, half _y_) + - short__n__ *isgreaterequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ >= _y_. - -| int *isless* (half _x_, half _y_) + - short__n__ *isless* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ < _y_. - -| int *islessequal* (half _x_, half _y_) + - short__n__ *islessequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ \<= _y_. - -| int *islessgreater* (half _x_, half _y_) + - short__n__ *islessgreater* (half__n x__, half__n y__) -| Returns the component-wise compare of (_x_ < _y_) \|\| (_x_ > _y_) . - -| | - -| int *isfinite* (half) + - short__n__ *isfinite* (half__n__) -| Test for finite value. - -| int *isinf* (half) + - short__n__ *isinf* (half__n__) -| Test for infinity value (positive or negative) . - -| int *isnan* (half) + - short__n__ *isnan* (half__n__) -| Test for a NaN. - -| int *isnormal* (half) + - short__n__ *isnormal* (half__n__) -| Test for a normal value. - -| int *isordered* (half _x_, half _y_) + - short__n__ *isordered* (half__n x__, half__n y__) -| Test if arguments are ordered. - *isordered*() takes arguments _x_ and _y_, and returns the result - *isequal*(_x_, _x_) && *isequal*(_y_, _y_). - -| int *isunordered* (half _x_, half _y_) + - short__n__ *isunordered* (half__n x__, half__n y__) -| Test if arguments are unordered. - *isunordered*() takes arguments _x_ and _y_, returning non-zero if _x_ or - _y_ is a NaN, and zero otherwise. - -| int *signbit* (half) + - short__n__ *signbit* (half__n__) -| Test for sign bit. - The scalar version of the function returns a 1 if the sign bit in the half - is set else returns 0. - The vector version of the function returns the following for each - component in half__n__: -1 (i.e all bits set) if the sign bit in the half - is set else returns 0. - -| | - -| half__n__ *bitselect* (half__n a__, half__n b__, half__n c__) -| Each bit of the result is the corresponding bit of _a_ if the - corresponding bit of _c_ is 0. - Otherwise it is the corresponding bit of _b_. - -| half__n__ *select* (half__n a__, half__n b__, short__n__ _c_) + - half__n__ *select* (half__n a__, half__n b__, ushort__n__ _c_) -| For each component, + - _result[i]_ = if MSB of _c[i]_ is set ? _b[i]_ : _a[i]_. + - -|==== - -[[cl_khr_fp16-vector-data-load-and-store-functions]] -==== Vector Data Load and Store Functions - -The vector data load (*vload__n__*) and store (*vstore__n__*) functions -described in _table 6.13_ (also listed below) are extended to include -versions that read or write half vector values. -The generic type `gentype` is extended to include `half`. -The generic type `gentypen` is extended to include `half2`, `half3`, -`half4`, `half8`, and `half16`. - -Note: *vload3* reads _x_, _y_, _z_ components from address -(_p_ + (_offset_ * 3)) into a 3-component vector and *vstore3* writes _x_, _y_, _z_ -components from a 3-component vector to address (_p_ + (_offset_ * 3)). - -._Half Precision Vector Data Load and Store Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype__n__ **vload__n__**(size_t _offset_, const {global} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {local} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {constant} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype__n__ **vload__n__**(size_t _offset_, const gentype *_p_) -| Return sizeof (gentype__n__) bytes of data read from address - (_p_ + (_offset * n_)). - If gentype is half, the read address computed as (_p_ + (_offset * n_)) - must be 16-bit aligned. - -| void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {global} gentype *_p_) + - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {local} gentype *_p_) + - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, gentype *_p_) -| Write sizeof (gentype__n__) bytes given by _data_ to address - (_p_ + (_offset * n_)). - If gentype is half, the write address computed as (_p_ + (_offset * n_)) - must be 16-bit aligned. - -|==== - -[[cl_khr_fp16-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch]] -==== Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch - -The OpenCL C programming language implements the following functions that -provide asynchronous copies between global and local memory and a prefetch -from global memory. - -The generic type `gentype` is extended to include `half`, `half2`, `half3`, -`half4`, `half8`, and `half16`. - -._Half Precision Built-in Async Copy and Prefetch Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| event_t **async_work_group_copy** ( + - {local} gentype *_dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) - - event_t **async_work_group_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) -| Perform an async copy of _num_gentypes_ gentype elements from _src_ to - _dst_. - The async copy is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_copy* with a previous async copy allowing an event to be - shared by multiple async copies; otherwise _event_ should be zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - -| | - -| event_t **async_work_group_strided_copy** ( + - {local} gentype _*dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, + - size_t _src_stride_, event_t _event_) - - event_t **async_work_group_strided_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, + - size_t _dst_stride_, event_t _event_) -| Perform an async gather of _num_gentypes_ gentype elements from _src_ to - _dst_. - The _src_stride_ is the stride in elements for each gentype element read - from _src_. - The async gather is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_strided_copy* with a previous async copy allowing an - event to be shared by multiple async copies; otherwise _event_ should be - zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - - The behavior of *async_work_group_strided_copy* is undefined if - _src_stride_ or _dst_stride_ is 0, or if the _src_stride_ or _dst_stride_ - values cause the _src_ or _dst_ pointers to exceed the upper bounds of the - address space during the copy. - -| | - -| void *wait_group_events* ( + - int _num_events_, event_t *_event_list_) -| Wait for events that identify the *async_work_group_copy* operations to - complete. - The event objects specified in _event_list_ will be released after the - wait is performed. - - This function must be encountered by all work-items in a work-group - executing the kernel with the same _num_events_ and event objects - specified in _event_list_; otherwise the results are undefined. - -| void *prefetch* ( + - const {global} gentype *__p__, size_t _num_gentypes_) -| Prefetch _num_gentypes_ * sizeof(gentype) bytes into the global cache. - The prefetch instruction is applied to a work-item in a work-group and - does not affect the functional behavior of the kernel. - -|==== - -[[cl_khr_fp16-image-read-and-write-functions]] -==== Image Read and Write Functions - -The image read and write functions defined in _tables 6.23_, _6.24_ and -_6.25_ are extended to support image color values that are a `half` type. - -==== Built-in Image Read Functions - -._Half Precision Built-in Image Read Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| half4 *read_imageh* ( + - read_only image2d_t _image_, + - sampler_t _sampler_, + - int2 _coord_) - - half4 *read_imageh* ( + - read_only image2d_t _image_, + - sampler_t _sampler_, + - float2 _coord_) -| Use the coordinate _(coord.x, coord.y)_ to do an element lookup in the 2D - image object specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats, {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| | - -| half4 *read_imageh* ( + - read_only image3d_t _image_, + - sampler_t _sampler_, + - int4 _coord_ ) - - half4 *read_imageh* ( + - read_only image3d_t _image_, + - sampler_t _sampler_, + - float4 _coord_) -| Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an - elementlookup in the 3D image object specified by _image_. _coord.w_ is - ignored. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - **read_imageh**returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description are - undefined. - -| | - -| half4 *read_imageh* ( + - read_only image2d_array_t _image_, + - sampler_t _sampler_, + - int4 _coord_) - - half4 *read_imageh* ( + - read_only image2d_array_t _image_, + - sampler_t _sampler_, + - float4 _coord_) -| Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with image_channel_data_type set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with image_channel_data_type set - to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with image_channel_data_type set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - image_channel_data_type values not specified in the description above are - undefined. - -| | - -| half4 *read_imageh* ( + - read_only image1d_t _image_, + - sampler_t _sampler_, + - int _coord_) - - half4 *read_imageh* ( + - read_only image1d_t _image_, + - sampler_t _sampler_, + - float _coord_) -| Use _coord_ to do an element lookup in the 1D image object specified by - _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| | - -| half4 *read_imageh* ( + - read_only image1d_array_t _image_, + - sampler_t _sampler_, + - int2 _coord_) - - half4 *read_imageh* ( + - read_only image1d_array_t _image_, + - sampler_t _sampler_, + - float2 _coord_) -| Use _coord.x_ to do an element lookup in the 1D image identified by - _coord.y_ in the 1D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with image_channel_data_type set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with image_channel_data_type set - to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with image_channel_data_type set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - image_channel_data_type values not specified in the description above are - undefined. - -|==== - -==== Built-in Image Sampler-less Read Functions - -_aQual_ in Table 6.24 refers to one of the access qualifiers. -For sampler-less read functions this may be _read_only_ or _read_write_. - -._Half Precision Built-in Image Sampler-less Read Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| half4 *read_imageh* ( + - _aQual_ image2d_t _image_, + - int2 _coord_) -| Use the coordinate _(coord.x, coord.y)_ to do an element lookup in the 2D - image object specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| half4 *read_imageh* ( + - _aQual_ image3d_t _image_, + - int4 _coord_ ) -| Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an element - lookup in the 3D image object specified by _image_. _coord.w_ is ignored. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description are - undefined. - -| half4 *read_imageh* ( + - _aQual_ image2d_array_t _image_, + - int4 _coord_) -| Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| half4 *read_imageh* ( + - _aQual_ image1d_t _image_, + - int _coord_) - - half4 *read_imageh* ( + - _aQual_ image1d_buffer_t _image_, + - int _coord_) -| Use _coord_ to do an element lookup in the 1D image or 1D image buffer - object specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| half4 *read_imageh* ( + - _aQual_ image1d_array_t _image_, + - int2 _coord_) -| Use _coord.x_ to do an element lookup in the 2D image identified by - _coord.y_ in the 2D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -|==== - -==== Built-in Image Write Functions - -_aQual_ in Table 6.25 refers to one of the access qualifiers. -For write functions this may be _write_only_ or _read_write_. - -._Half Precision Built-in Image Write Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| void *write_imageh* ( + - _aQual_ image2d_t _image_, + - int2 _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord.xy_ in the 2D image - specified by _image_. - - Appropriate data format conversion to the specified image format is done - before writing the color value. _x_ & _y_ are considered to be - unnormalized coordinates and must be in the range 0 ... width - 1, and 0 - ... height - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with (_x_, _y_) coordinate values that are not in the range (0 ... width - - 1, 0 ... height - 1) respectively, is undefined. - -| void *write_imageh* ( + - _aQual_ image2d_array_t _image_, + - int4 _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord.xy_ in the 2D image - identified by _coord.z_ in the 2D image array specified by _image_. - - Appropriate data format conversion to the specified image format is done - before writing the color value. _coord.x_, _coord.y_ and _coord.z_ are - considered to be unnormalized coordinates and must be in the range 0 ... - image width - 1, 0 ... image height - 1 and 0 ... image number of layers - - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with (_x_, _y, z_) coordinate values that are not in the range (0 ... - image width - 1, 0 ... image height - 1, 0 ... image number of layers - - 1), respectively, is undefined. - -| void *write_imageh* ( + - _aQual_ image1d_t _image_, + - int _coord_, + - half4 _color_) - - void *write_imageh* ( + - _aQual_ image1d_buffer_t _image_, + - int _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord_ in the 1D image or 1D - image buffer object specified by _image_. - Appropriate data format conversion to the specified image format is done - before writing the color value. - _coord_ is considered to be unnormalized coordinates and must be in the - range 0 ... image width - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - Appropriate data format conversion will be done to convert channel data - from a floating-point value to actual data format in which the channels - are stored. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with coordinate values that is not in the range (0 ... image width - 1), - is undefined. - -| void *write_imageh* ( + - _aQual_ image1d_array_t _image_, + - int2 _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord.x_ in the 1D image - identified by _coord.y_ in the 1D image array specified by _image_. - Appropriate data format conversion to the specified image format is done - before writing the color value. _coord.x_ and _coord.y_ are considered to - be unnormalized coordinates and must be in the range 0 ... image width - 1 - and 0 ... image number of layers - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - Appropriate data format conversion will be done to convert channel data - from a floating-point value to actual data format in which the channels - are stored. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with (_x_, _y_) coordinate values that are not in the range (0 ... image - width - 1, 0 ... image number of layers - 1), respectively, is undefined. - -| void *write_imageh* ( + - _aQual_ image3d_t _image_, + - int4 _coord_, + - half4 _color_) -| Write color value to location specified by coord.xyz in the 3D image - object specified by _image_. - - Appropriate data format conversion to the specified image format is done - before writing the color value. - coord.x, coord.y and coord.z are considered to be unnormalized coordinates - and must be in the range 0 ... image width - 1, 0 ... image height - 1 and - 0 ... image depth - 1. - - *write_imageh* can only be used with image objects created with - image_channel_data_type set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - - The behavior of *write_imageh* for image objects created with - image_channel_data_type values not specified in the description above or - with (x, y, z) coordinate values that are not in the range (0 ... image - width - 1, 0 ... image height - 1, 0 ... image depth - 1), respectively, - is undefined. - - Note: This built-in function is only available if the - cl_khr_3d_image_writes extension is also supported by the device. - -|==== - -[[cl_khr_fp16-ieee754-compliance]] -==== IEEE754 Compliance - -The following table entry describes the additions to _table 4.3,_ which -allows applications to query the configuration information using -{clGetDeviceInfo} for an OpenCL device that supports half precision -floating-point. - -[cols="1,1,2",options="header",] -|==== -| *Op-code* -| *Return Type* -| *Description* - -| {CL_DEVICE_HALF_FP_CONFIG} -| {cl_device_fp_config_TYPE} -| Describes half precision floating-point capability of the OpenCL device. - This is a bit-field that describes one or more of the following values: - - {CL_FP_DENORM} -- denorms are supported - - {CL_FP_INF_NAN} -- INF and NaNs are supported - - {CL_FP_ROUND_TO_NEAREST} -- round to nearest even rounding mode supported - - {CL_FP_ROUND_TO_ZERO} -- round to zero rounding mode supported - - {CL_FP_ROUND_TO_INF} -- round to positive and negative infinity rounding - modes supported - - {CL_FP_FMA} -- IEEE754-2008 fused multiply-add is supported - - {CL_FP_SOFT_FLOAT} -- Basic floating-point operations (such as addition, - subtraction, multiplication) are implemented in software. - - The required minimum half precision floating-point capability as - implemented by this extension is: - - {CL_FP_ROUND_TO_ZERO}, or {CL_FP_ROUND_TO_NEAREST} \| {CL_FP_INF_NAN}. - -|==== - -[[cl_khr_fp16-rounding-modes]] -==== Rounding Modes - -If {CL_FP_ROUND_TO_NEAREST} is supported, the default rounding mode for -half-precision floating-point operations will be round to nearest even; -otherwise the default rounding mode will be round to zero. - -Conversions to half floating-point format must be correctly rounded using -the indicated `convert` operator rounding mode or the default rounding mode -for half-precision floating-point operations if no rounding mode is -specified by the operator, or a C-style cast is used. - -Conversions from half to integer format shall correctly round using the -indicated `convert` operator rounding mode, or towards zero if no rounding -mode is specified by the operator or a C-style cast is used. -All conversions from half to floating-point formats are exact. - -[[cl_khr_fp16-relative-error-as-ulps]] -==== Relative Error as ULPs - -In this section we discuss the maximum relative error defined as _ulp_ -(units in the last place). - -Addition, subtraction, multiplication, fused multiply-add operations on half -types are required to be correctly rounded using the default rounding mode -for half-precision floating-point operations. - -The following table describes the minimum accuracy of half precision -floating-point arithmetic operations given as ULP values. -0 ULP is used for math functions that do not require rounding. -The reference value used to compute the ULP value of an arithmetic operation -is the infinitely precise result. - -._ULP Values for Half Precision Floating-Point Arithmetic Operations_ -[cols=",,",options="header",] -|==== -| *Function* -| *Min Accuracy - Full Profile* -| *Min Accuracy - Embedded Profile* - -| *_x_ + _y_* -| Correctly rounded -| Correctly rounded - -| *_x_ - _y_* -| Correctly rounded -| Correctly rounded - -| *_x_ * _y_* -| Correctly rounded -| Correctly rounded - -| *1.0 / _x_* -| Correctly rounded -| \<= 1 ulp - -| *_x_ / _y_* -| Correctly rounded -| \<= 1 ulp - -| | | - -| *acos* -| \<= 2 ulp -| \<= 3 ulp - -| *acosh* -| \<= 2 ulp -| \<= 3 ulp - -| *acospi* -| \<= 2 ulp -| \<= 3 ulp - -| *asin* -| \<= 2 ulp -| \<= 3 ulp - -| *asinh* -| \<= 2 ulp -| \<= 3 ulp - -| *asinpi* -| \<= 2 ulp -| \<= 3 ulp - -| *atan* -| \<= 2 ulp -| \<= 3 ulp - -| *atanh* -| \<= 2 ulp -| \<= 3 ulp - -| *atanpi* -| \<= 2 ulp -| \<= 3 ulp - -| *atan2* -| \<= 2 ulp -| \<= 3 ulp - -| *atan2pi* -| \<= 2 ulp -| \<= 3 ulp - -| *cbrt* -| \<= 2 ulp -| \<= 2 ulp - -| *ceil* -| Correctly rounded -| Correctly rounded - -| *clamp* -| 0 ulp -| 0 ulp - -| *copysign* -| 0 ulp -| 0 ulp - -| *cos* -| \<= 2 ulp -| \<= 2 ulp - -| *cosh* -| \<= 2 ulp -| \<= 3 ulp - -| *cospi* -| \<= 2 ulp -| \<= 2 ulp - -// 3 operations from the 2 multiplications and 1 subtraction per component -| *cross* -| absolute error tolerance of 'max * max * (3 * HALF_EPSILON)' per vector component, where _max_ is the maximum input operand magnitude -| Implementation-defined - -| *degrees* -| \<= 2 ulp -| \<= 2 ulp - -// 0.5 ULP error in sqrt -// 1.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// = 0.5 + (1.5 * n) + (0.5 * (n - 1)) -// = 0.5 + 1.5n + (0.5n - 0.5) -// = 2n -| *distance* -| \<= 2n ulp, for gentype with vector width _n_ -| Implementation-defined - -// n + n-1 Number of operations from n multiples and (n-1) additions -// 2n - 1 -| *dot* -| absolute error tolerance of 'max * max * (2n - 1) * HALF_EPSILON', for vector width _n_ and maximum input operand magnitude _max_ across all vector components -| Implementation-defined - -| *erfc* -| \<= 4 ulp -| \<= 4 ulp - -| *erf* -| \<= 4 ulp -| \<= 4 ulp - -| *exp* -| \<= 2 ulp -| \<= 3 ulp - -| *exp2* -| \<= 2 ulp -| \<= 3 ulp - -| *exp10* -| \<= 2 ulp -| \<= 3 ulp - -| *expm1* -| \<= 2 ulp -| \<= 3 ulp - -| *fabs* -| 0 ulp -| 0 ulp - -| *fdim* -| Correctly rounded -| Correctly rounded - -| *floor* -| Correctly rounded -| Correctly rounded - -| *fma* -| Correctly rounded -| Correctly rounded - -| *fmax* -| 0 ulp -| 0 ulp - -| *fmin* -| 0 ulp -| 0 ulp - -| *fmod* -| 0 ulp -| 0 ulp - -| *fract* -| Correctly rounded -| Correctly rounded - -| *frexp* -| 0 ulp -| 0 ulp - -| *hypot* -| \<= 2 ulp -| \<= 3 ulp - -| *ilogb* -| 0 ulp -| 0 ulp - -| *ldexp* -| Correctly rounded -| Correctly rounded - -// 0.5 ULP error in sqrt -// 0.5 effect on e of taking sqrt(x + e) -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// = (0.5 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) -// = 0.5 + 0.5 * (n - 0.5) -// = 0.25 + 0.5n -| *length* -| \<= 0.25 + 0.5n ulp, for gentype with vector width _n_ -| Implementation-defined - -| *log* -| \<= 2 ulp -| \<= 3 ulp - -| *log2* -| \<= 2 ulp -| \<= 3 ulp - -| *log10* -| \<= 2 ulp -| \<= 3 ulp - -| *log1p* -| \<= 2 ulp -| \<= 3 ulp - -| *logb* -| 0 ulp -| 0 ulp - -| *mad* -| Implementation-defined -| Implementation-defined - -| *max* -| 0 ulp -| 0 ulp - -| *maxmag* -| 0 ulp -| 0 ulp - -| *min* -| 0 ulp -| 0 ulp - -| *minmag* -| 0 ulp -| 0 ulp - -| *mix* -| Implementation-defined -| Implementation-defined - -| *modf* -| 0 ulp -| 0 ulp - -| *nan* -| 0 ulp -| 0 ulp - -| *nextafter* -| 0 ulp -| 0 ulp - -// 1.5 error in rsqrt + error in multiply -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// = 1.5 + (0.5 * n) + (0.5 * (n - 1)) -// = 1.5 + 0.5n + (0.5n - 0.5) -// = 1.0 + n -| *normalize* -| \<= 1 + n ulp, for gentype with vector width _n_ -| Implementation-defined - -| *pow(x, y)* -| \<= 4 ulp -| \<= 5 ulp - -| *pown(x, y)* -| \<= 4 ulp -| \<= 5 ulp - -| *powr(x, y)* -| \<= 4 ulp -| \<= 5 ulp - -| *radians* -| \<= 2 ulp -| \<= 2 ulp - -| *remainder* -| 0 ulp -| 0 ulp - -| *remquo* -| 0 ulp for the remainder, at least the lower 7 bits of the integral quotient -| 0 ulp for the remainder, at least the lower 7 bits of the integral quotient - -| *rint* -| Correctly rounded -| Correctly rounded - -| *rootn* -| \<= 4 ulp -| \<= 5 ulp - -| *round* -| Correctly rounded -| Correctly rounded - -| *rsqrt* -| \<=1 ulp -| \<=1 ulp - -| *sign* -| 0 ulp -| 0 ulp - -| *sin* -| \<= 2 ulp -| \<= 2 ulp - -| *sincos* -| \<= 2 ulp for sine and cosine values -| \<= 2 ulp for sine and cosine values - -| *sinh* -| \<= 2 ulp -| \<= 3 ulp - -| *sinpi* -| \<= 2 ulp -| \<= 2 ulp - -| *smoothstep* -| Implementation-defined -| Implementation-defined - -| *sqrt* -| Correctly rounded -| \<= 1 ulp - -| *step* -| 0 ulp -| 0 ulp - -| *tan* -| \<= 2 ulp -| \<= 3 ulp - -| *tanh* -| \<= 2 ulp -| \<= 3 ulp - -| *tanpi* -| \<= 2 ulp -| \<= 3 ulp - -| *tgamma* -| \<= 4 ulp -| \<= 4 ulp - -| *trunc* -| Correctly rounded -| Correctly rounded - -|==== - -Note: _Implementations may perform floating-point operations on_ `half` -_scalar or vector data types by converting the_ `half` _values to single -precision floating-point values and performing the operation in single -precision floating-point. -In this case, the implementation will use the_ `half` _scalar or vector data -type as a storage only format_. - -[[cl_khr_fp16-additions-to-chapter-8-of-the-opencl-2.0-specification]] -=== Additions to Chapter 8 of the OpenCL 2.0 C Specification - -Add new sub-sections to _section 8.3.1. Conversion rules for normalized integer channel data types_: - -[[cl_khr_fp16-converting-normalized-integer-channel-data-types-to-floating-point-values]] -==== Converting normalized integer channel data types to half precision floating-point values - -For images created with image channel data type of {CL_UNORM_INT8} and -{CL_UNORM_INT16}, *read_imagef* will convert the channel values from an -8-bit or 16-bit unsigned integer to normalized half precision -floating-point values in the range [`0.0h`, `1.0h`]. - -For images created with image channel data type of {CL_SNORM_INT8} and -{CL_SNORM_INT16}, *read_imagef* will convert the channel values from an -8-bit or 16-bit signed integer to normalized half precision floating-point -values in the range [`-1.0h`, `1.0h`]. - -These conversions are performed as follows: - -{CL_UNORM_INT8} (8-bit unsigned integer) {rightarrow} `half` - -[none] -* normalized `half` value = `round_to_half(c / 255)` - -{CL_UNORM_INT_101010} (10-bit unsigned integer) {rightarrow} `half` - -[none] -* normalized `half` value = `round_to_half(c / 1023)` - -{CL_UNORM_INT16} (16-bit unsigned integer) {rightarrow} `half` - -[none] -* normalized `half` value = `round_to_half(c / 65535)` - -{CL_SNORM_INT8} (8-bit signed integer) {rightarrow} `half` - -[none] -* normalized `half` value = `max(-1.0h, round_to_half(c / 127))` - -{CL_SNORM_INT16} (16-bit signed integer) {rightarrow} `half` - -[none] -* normalized `half` value = `max(-1.0h, round_to_half(c / 32767))` - -The accuracy of the above conversions must be \<= 1.5 ulp except for the -following cases. - -For {CL_UNORM_INT8} - -[none] -* 0 must convert to `0.0h` and -* 255 must convert to `1.0h` - -For {CL_UNORM_INT_101010} - -[none] -* 0 must convert to `0.0h` and -* 1023 must convert to `1.0h` - -For {CL_UNORM_INT16} - -[none] -* 0 must convert to `0.0h` and -* 65535 must convert to `1.0h` - -For {CL_SNORM_INT8} - -[none] -* -128 and -127 must convert to `-1.0h`, -* 0 must convert to `0.0h` and -* 127 must convert to `1.0h` - -For {CL_SNORM_INT16} - -[none] -* -32768 and -32767 must convert to `-1.0h`, -* 0 must convert to `0.0h` and -* 32767 must convert to `1.0h` - - -[[cl_khr_fp16-converting-floating-point-values-to-normalized-integer-channel-data-types]] -==== Converting half precision floating-point values to normalized integer channel data types - -For images created with image channel data type of {CL_UNORM_INT8} and -{CL_UNORM_INT16}, *write_imagef* will convert the floating-point color value -to an 8-bit or 16-bit unsigned integer. - -For images created with image channel data type of {CL_SNORM_INT8} and -{CL_SNORM_INT16}, *write_imagef* will convert the floating-point color value -to an 8-bit or 16-bit signed integer. - -The preferred conversion uses the round to nearest even (`_rte`) rounding -mode, but OpenCL implementations may choose to approximate the rounding mode -used in the conversions described below. -When approximate rounding is used instead of the preferred rounding, -the result of the conversion must satisfy the bound given below. - -`half` {rightarrow} {CL_UNORM_INT8} (8-bit unsigned integer) - -[none] -* Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) -* Let f~preferred~ = *convert_uchar_sat_rte*(`f * 255.0f`) -* Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_UNORM_INT_101010} (10-bit unsigned integer) - -[none] -* Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) -* Let f~preferred~ = *min*(*convert_ushort_sat_rte*(`f * 1023.0f`), `1023`) -* Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_UNORM_INT16} (16-bit unsigned integer) - -[none] -* Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) -* Let f~preferred~ = *convert_ushort_sat_rte*(`f * 65535.0f`) -* Let f~approx~ = *convert_ushort_sat_*(`f * 65535.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_SNORM_INT8} (8-bit signed integer) - -[none] -* Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) -* Let f~preferred~ = *convert_char_sat_rte*(`f * 127.0f`) -* Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_SNORM_INT16} (16-bit signed integer) - -[none] -* Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) -* Let f~preferred~ = *convert_short_sat_rte*(`f * 32767.0f`) -* Let f~approx~ = *convert_short_sat_*(`f * 32767.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 diff --git a/ext/cl_khr_fp64.asciidoc b/ext/cl_khr_fp64.asciidoc deleted file mode 100644 index 467830cd6..000000000 --- a/ext/cl_khr_fp64.asciidoc +++ /dev/null @@ -1,1303 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_fp64]] -== Double Precision Floating-Point - -This section describes the *cl_khr_fp64* extension. -This extension became an optional core feature in OpenCL 1.2. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_fp64-additions-to-chapter-6]] -=== Additions to Chapter 6 - -The list of built-in scalar, and vector data types defined in _tables 6.1_ -and _6.2_ are extended to include the following: - -[cols="1,3",options="header",] -|==== -|*Type* | *Description* -|*double* | A double precision float. -|*double2* | A 2-component double-precision floating-point vector. -|*double3* | A 3-component double-precision floating-point vector. -|*double4* | A 4-component double-precision floating-point vector. -|*double8* | A 8-component double-precision floating-point vector. -|*double16* | A 16-component double-precision floating-point vector. -|==== - -The built-in scalar and vector data types for `doublen` are also declared as appropriate -types in the OpenCL API (and header files) that can be used by an -application. -The following table describes the built-in scalar and vector data types for `doublen` as -defined in the OpenCL C programming language and the corresponding data type -available to the application: - -[cols=",",options="header",] -|==== -|*Type in OpenCL Language* | *API type for application* -|*double* | *cl_double* -|*double2* | *cl_double2* -|*double3* | *cl_double3* -|*double4* | *cl_double4* -|*double8* | *cl_double8* -|*double16* | *cl_double16* -|==== - -The double data type must conform to the IEEE-754 double precision storage format. - -The following text is added to _Section 6.1.1.1 The half data type_: - -Conversions from double to half are correctly rounded. -Conversions from half to double are lossless. - -[[cl_khr_fp64-conversions]] -==== Conversions - -The implicit conversion rules specified in _section 6.2.1_ now include the -`double` scalar and `doublen` vector data types. - -The explicit casts described in _section 6.2.2_ are extended to take a -`double` scalar data type and a `doublen` vector data type. - -The explicit conversion functions described in _section 6.2.3_ are extended -to take a `double` scalar data type and a `doublen` vector data type. - -The `as_typen()` function for re-interpreting types as described in _section -6.2.4.2_ is extended to allow conversion-free casts between `longn`, -`ulongn` and `doublen` scalar and vector data types. - -[[cl_khr_fp64-math-functions]] -==== Math Functions - -The built-in math functions defined in _table 6.8_ (also listed below) are -extended to include appropriate versions of functions that take `double` and -`double{2|3|4|8|16}` as arguments and return values. -`gentype` now also includes `double`, `double2`, `double3`, `double4`, `double8` and -`double16`. - -For any specific use of a function, the actual type has to be the same for -all arguments and the return type. - -._Double Precision Built-in Math Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *acos* (gentype _x_) -| Arc cosine function. - -| gentype *acosh* (gentype _x_) -| Inverse hyperbolic cosine. - -| gentype *acospi* (gentype _x_) -| Compute *acos* (_x_) / {pi}. - -| gentype *asin* (gentype _x_) -| Arc sine function. - -| gentype *asinh* (gentype _x_) -| Inverse hyperbolic sine. - -| gentype *asinpi* (gentype _x_) -| Compute *asin* (_x_) / {pi}. - -| gentype *atan* (gentype _y_over_x_) -| Arc tangent function. - -| gentype *atan2* (gentype _y_, gentype _x_) -| Arc tangent of _y_ / _x_. - -| gentype *atanh* (gentype _x_) -| Hyperbolic arc tangent. - -| gentype *atanpi* (gentype _x_) -| Compute *atan* (_x_) / {pi}. - -| gentype *atan2pi* (gentype _y_, gentype _x_) -| Compute *atan2* (_y_, _x_) / {pi}. - -| gentype *cbrt* (gentype _x_) -| Compute cube-root. - -| gentype *ceil* (gentype _x_) -| Round to integral value using the round to positive infinity rounding - mode. - -| gentype *copysign* (gentype _x_, gentype _y_) -| Returns _x_ with its sign changed to match the sign of _y_. - -| gentype *cos* (gentype _x_) -| Compute cosine. - -| gentype *cosh* (gentype _x_) -| Compute hyperbolic cosine. - -| gentype *cospi* (gentype _x_) -| Compute *cos* ({pi} _x_). - -| gentype *erfc* (gentype _x_) -| Complementary error function. - -| gentype *erf* (gentype _x_) -| Error function encountered in integrating the normal distribution. - -| gentype *exp* (gentype _x_) -| Compute the base- e exponential of _x_. - -| gentype *exp2* (gentype _x_) -| Exponential base 2 function. - -| gentype *exp10* (gentype _x_) -| Exponential base 10 function. - -| gentype *expm1* (gentype _x_) -| Compute _e^x^_- 1.0. - -| gentype *fabs* (gentype _x_) -| Compute absolute value of a floating-point number. - -| gentype *fdim* (gentype _x_, gentype _y_) -| _x_ - _y_ if _x_ > _y_, +0 if x is less than or equal to y. - -| gentype *floor* (gentype _x_) -| Round to integral value using the round to negative infinity rounding - mode. - -| gentype *fma* (gentype _a_, gentype _b_, gentype _c_) -| Returns the correctly rounded floating-point representation of the sum of - _c_ with the infinitely precise product of _a_ and _b_. - Rounding of intermediate products shall not occur. - Edge case behavior is per the IEEE 754-2008 standard. - -| gentype *fmax* (gentype _x_, gentype _y_) + - gentype *fmax* (gentype _x_, double _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If one argument is a NaN, *fmax()* returns the other argument. - If both arguments are NaNs, *fmax()* returns a NaN. - -| gentype *fmin* (gentype _x_, gentype _y_) + - gentype *fmin* (gentype _x_, double _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If one argument is a NaN, *fmin()* returns the other argument. - If both arguments are NaNs, *fmin()* returns a NaN. - -| gentype *fmod* (gentype _x_, gentype _y_) -| Modulus. - Returns _x_ - _y_ * *trunc* (_x_/_y_) . - -| gentype **fract** (gentype _x_, {global} gentype *_iptr_) + - gentype **fract** (gentype _x_, {local} gentype *_iptr_) + - gentype **fract** (gentype _x_, {private} gentype *_iptr_) -| Returns *fmin*( _x_ - *floor* (_x_), 0x1. fffffffffffffp-1 ). - - *floor*(x) is returned in _iptr_. - -| double__n__ **frexp** (double__n x__, {global} int__n__ *exp) + - double__n__ **frexp** (double__n x__, {local} int__n__ *exp) + - double__n__ **frexp** (double__n x__, {private} int__n__ *exp) + - double **frexp** (double _x_, {global} int *exp) + - double **frexp** (double _x_, {local} int *exp) + - double **frexp** (double _x_, {private} int *exp) -| Extract mantissa and exponent from _x_. - For each component the mantissa returned is a `double` with magnitude - in the interval [1/2, 1) or 0. - Each component of _x_ equals mantissa returned * 2__^exp^__. - -| gentype *hypot* (gentype _x_, gentype _y_) -| Compute the value of the square root of __x__^2^+ __y__^2^ without undue - overflow or underflow. - -| int__n__ *ilogb* (double__n__ _x_) + - int *ilogb* (double _x_) -| Return the exponent as an integer value. - -| double__n__ *ldexp* (double__n__ _x_, int__n__ _k_) + - double__n__ *ldexp* (double__n__ _x_, int _k_) + - double *ldexp* (double _x_, int _k_) -| Multiply _x_ by 2 to the power _k_. - -| gentype **lgamma** (gentype _x_) + - double__n__ **lgamma_r** (double__n__ _x_, {global} int__n__ *_signp_) + - double__n__ **lgamma_r** (double__n__ _x_, {local} int__n__ *_signp_) + - double__n__ **lgamma_r** (double__n__ _x_, {private} int__n__ *_signp_) + - double **lgamma_r** (double _x_, {global} int *_signp_) + - double **lgamma_r** (double _x_, {local} int *_signp_) + - double **lgamma_r** (double _x_, {private} int *_signp_) -| Log gamma function. - Returns the natural logarithm of the absolute value of the gamma function. - The sign of the gamma function is returned in the _signp_ argument of - *lgamma_r*. - -| gentype *log* (gentype _x_) -| Compute natural logarithm. - -| gentype *log2* (gentype _x_) -| Compute a base 2 logarithm. - -| gentype *log10* (gentype _x_) -| Compute a base 10 logarithm. - -| gentype *log1p* (gentype _x_) -| Compute log~e~(1.0 + _x_) . - -| gentype *logb* (gentype _x_) -| Compute the exponent of _x_, which is the integral part of - log__~r~__\|_x_\|. - -| gentype *mad* (gentype _a_, gentype _b_, gentype _c_) -| *mad* computes _a_ * _b_ + _c_. - The function may compute _a_ * _b_ + _c_ with reduced accuracy - in the embedded profile. See the OpenCL SPIR-V Environment Specification - for details. On some hardware the mad instruction may provide better - performance than expanded computation of _a_ * _b_ + _c_. - -| gentype *maxmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| > \|_y_\|, _y_ if \|_y_\| > \|_x_\|, otherwise - *fmax*(_x_, _y_). - -| gentype *minmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| < \|_y_\|, _y_ if \|_y_\| < \|_x_\|, otherwise - *fmin*(_x_, _y_). - -| gentype **modf** (gentype _x_, {global} gentype *_iptr_) + - gentype **modf** (gentype _x_, {local} gentype *_iptr_) + - gentype **modf** (gentype _x_, {private} gentype *_iptr_) -| Decompose a floating-point number. - The *modf* function breaks the argument _x_ into integral and fractional - parts, each of which has the same sign as the argument. - It stores the integral part in the object pointed to by _iptr_. - -| double__n__ *nan* (ulong__n nancode__) + - double *nan* (ulong _nancode_) -| Returns a quiet NaN. - The _nancode_ may be placed in the significand of the resulting NaN. - -| gentype *nextafter* (gentype _x_, gentype _y_) -| Computes the next representable double-precision floating-point value - following _x_ in the direction of _y_. - Thus, if _y_ is less than _x_, *nextafter*() returns the largest - representable floating-point number less than _x_. - -| gentype *pow* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_. - -| double__n__ *pown* (double__n__ _x_, int__n__ _y_) + - double *pown* (double _x_, int _y_) -| Compute _x_ to the power _y_, where _y_ is an integer. - -| gentype *powr* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_, where _x_ is >= 0. - -| gentype *remainder* (gentype _x_, gentype _y_) -| Compute the value _r_ such that _r_ = _x_ - _n_*_y_, where _n_ is the - integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _n_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - -| double__n__ **remquo** (double__n__ _x_, double__n__ _y_, {global} int__n__ *_quo_) + - double__n__ **remquo** (double__n__ _x_, double__n__ _y_, {local} int__n__ *_quo_) + - double__n__ **remquo** (double__n__ _x_, double__n__ _y_, {private} int__n__ *_quo_) + - double **remquo** (double _x_, double _y_, {global} int *_quo_) + - double **remquo** (double _x_, double _y_, {local} int *_quo_) + - double **remquo** (double _x_, double _y_, {private} int *_quo_) -| The *remquo* function computes the value r such that _r_ = _x_ - _k_*_y_, - where _k_ is the integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _k_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - This is the same value that is returned by the *remainder* function. - *remquo* also calculates the lower seven bits of the integral quotient - _x_/_y_, and gives that value the same sign as _x_/_y_. - It stores this signed value in the object pointed to by _quo_. - -| gentype *rint* (gentype _x_) -| Round to integral value (using round to nearest even rounding mode) in - floating-point format. - Refer to section 7.1 for description of rounding modes. - -| double__n__ *rootn* (double__n__ _x_, int__n__ _y_) + - double *rootn* (double _x_, int _y_) -| Compute _x_ to the power 1/_y_. - -| gentype *round* (gentype _x_) -| Return the integral value nearest to _x_ rounding halfway cases away from - zero, regardless of the current rounding direction. - -| gentype *rsqrt* (gentype _x_) -| Compute inverse square root. - -| gentype *sin* (gentype _x_) -| Compute sine. - -| gentype **sincos** (gentype _x_, {global} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {local} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {private} gentype *_cosval_) -| Compute sine and cosine of x. - The computed sine is the return value and computed cosine is returned in - _cosval_. - -| gentype *sinh* (gentype _x_) -| Compute hyperbolic sine. - -| gentype *sinpi* (gentype _x_) -| Compute *sin* ({pi} _x_). - -| gentype *sqrt* (gentype _x_) -| Compute square root. - -| gentype *tan* (gentype _x_) -| Compute tangent. - -| gentype *tanh* (gentype _x_) -| Compute hyperbolic tangent. - -| gentype *tanpi* (gentype _x_) -| Compute *tan* ({pi} _x_). - -| gentype *tgamma* (gentype _x_) -| Compute the gamma function. - -| gentype *trunc* (gentype _x_) -| Round to integral value using the round to zero rounding mode. -|==== - -In addition, the following symbolic constant will also be available: - -*HUGE_VAL* - A positive double expression that evaluates to infinity. -Used as an error value returned by the built-in math functions. - -The *FP_FAST_FMA* macro indicates whether the *fma()* family of -functions are fast compared with direct code for double precision -floating-point. -If defined, the *FP_FAST_FMA* macro shall indicate that the *fma()* -function generally executes about as fast as, or faster than, a multiply and -an add of *double* operands. - -The macro names given in the following list must use the values specified. -These constant expressions are suitable for use in #if preprocessing -directives. - -[source,opencl_c] ----- -#define DBL_DIG 15 -#define DBL_MANT_DIG 53 -#define DBL_MAX_10_EXP +308 -#define DBL_MAX_EXP +1024 -#define DBL_MIN_10_EXP -307 -#define DBL_MIN_EXP -1021 -#define DBL_RADIX 2 -#define DBL_MAX 0x1.fffffffffffffp1023 -#define DBL_MIN 0x1.0p-1022 -#define DBL_EPSILON 0x1.0p-52 ----- - -The following table describes the built-in macro names given above in the -OpenCL C programming language and the corresponding macro names available to -the application. - -[cols=",",options="header",] -|==== -| *Macro in OpenCL Language* | *Macro for application* -| `DBL_DIG` | {CL_DBL_DIG} -| `DBL_MANT_DIG` | {CL_DBL_MANT_DIG} -| `DBL_MAX_10_EXP` | {CL_DBL_MAX_10_EXP} -| `DBL_MAX_EXP` | {CL_DBL_MAX_EXP} -| `DBL_MIN_10_EXP` | {CL_DBL_MIN_10_EXP} -| `DBL_MIN_EXP` | {CL_DBL_MIN_EXP} -| `DBL_RADIX` | {CL_DBL_RADIX} -| `DBL_MAX` | {CL_DBL_MAX} -| `DBL_MIN` | {CL_DBL_MIN} -| `DBL_EPSILSON` | {CL_DBL_EPSILON} -|==== - -// TODO: DBL_RADIX / CL_DBL_RADIX? - -The following constants are also available. -They are of type `double` and are accurate within the precision of the `double` -type. - -[cols=",",options="header",] -|==== -| *Constant* | *Description* -| `M_E` | Value of e -| `M_LOG2E` | Value of log~2~e -| `M_LOG10E` | Value of log~10~e -| `M_LN2` | Value of log~e~2 -| `M_LN10` | Value of log~e~10 -| `M_PI` | Value of {pi} -| `M_PI_2` | Value of {pi} / 2 -| `M_PI_4` | Value of {pi} / 4 -| `M_1_PI` | Value of 1 / {pi} -| `M_2_PI` | Value of 2 / {pi} -| `M_2_SQRTPI` | Value of 2 / {sqrt}{pi} -| `M_SQRT2` | Value of {sqrt}2 -| `M_SQRT1_2` | Value of 1 / {sqrt}2 -|==== - -[[cl_khr_fp64-common-functions]] -==== Common Functions - -The built-in common functions defined in _table 6.12_ (also listed below) -are extended to include appropriate versions of functions that take `double` -and `double{2|3|4|8|16}` as arguments and return values. -gentype now also includes `double`, `double2`, `double3`, `double4`, `double8` and -`double16`. -These are described below. - -.Double Precision Built-in Common Functions -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *clamp* ( + - gentype _x_, gentype _minval_, gentype _maxval_) - - gentype *clamp* ( + - gentype _x_, double _minval_, double _maxval_) -| Returns *fmin*(*fmax*(_x_, _minval_), _maxval_). - - Results are undefined if _minval_ > _maxval_. - -| gentype *degrees* (gentype _radians_) -| Converts _radians_ to degrees, + - i.e. (180 / {pi}) * _radians_. - -| gentype *max* (gentype _x_, gentype _y_) + - gentype *max* (gentype _x_, double _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *min* (gentype _x_, gentype _y_) + - gentype *min* (gentype _x_, double _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *mix* (gentype _x_, gentype _y_, gentype _a_) + - gentype *mix* (gentype _x_, gentype _y_, double _a_) -| Returns the linear blend of _x_ and _y_ implemented as: - - _x_ + (_y_ - _x)_ * _a_ - - _a_ must be a value in the range 0.0 ... 1.0. - If _a_ is not in the range 0.0 ... 1.0, the return values are undefined. - - Note: The double precision *mix* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *radians* (gentype _degrees_) -| Converts _degrees_ to radians, i.e. ({pi} / 180) * _degrees_. - -| gentype *step* (gentype _edge_, gentype _x_) + - gentype *step* (double _edge_, gentype _x_) -| Returns 0.0 if _x_ < _edge_, otherwise it returns 1.0. - -| gentype *smoothstep* ( + - gentype _edge0_, gentype _edge1_, gentype _x_) + - - gentype *smoothstep* ( + - double _edge0_, double _edge1_, gentype _x_) -| Returns 0.0 if _x_ \<= _edge0_ and 1.0 if _x_ >= _edge1_ and performs - smooth Hermite interpolation between 0 and 1 when _edge0_ < _x_ < _edge1_. - This is useful in cases where you would want a threshold function with a - smooth transition. - - This is equivalent to: - - gentype _t_; + - _t_ = clamp ((_x_ - _edge0_) / (_edge1_ - _edge0_), 0, 1); + - return _t_ * _t_ * (3 - 2 * _t_); + - - Results are undefined if _edge0_ >= _edge1_. - - Note: The double precision *smoothstep* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *sign* (gentype _x_) -| Returns 1.0 if _x_ > 0, -0.0 if _x_ = -0.0, +0.0 if _x_ = +0.0, or -1.0 if - _x_ < 0. - Returns 0.0 if _x_ is a NaN. - -|==== - -[[cl_khr_fp64-geometric-functions]] -==== Geometric Functions - -The built-in geometric functions defined in _table 6.13_ (also listed below) -are extended to include appropriate versions of functions that take `double` -and `double{2|3|4}` as arguments and return values. -gentype now also includes `double`, `double2`, `double3` and `double4`. -These are described below. - -Note: The double precision geometric functions can be implemented using -contractions such as *mad* or *fma*. - -._Double Precision Built-in Geometric Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| double4 *cross* (double4 _p0_, double4 _p1_) + - double3 *cross* (double3 _p0_, double3 _p1_) -| Returns the cross product of _p0.xyz_ and _p1.xyz_. - The _w_ component of the result will be 0.0. - -| double *dot* (gentype _p0_, gentype _p1_) -| Compute the dot product of _p0_ and _p1_. - -| double *distance* (gentype _p0_, gentype _p1_) -| Returns the distance between _p0_ and _p1_. - This is calculated as *length*(_p0_ - _p1_). - -| double *length* (gentype _p_) -| Return the length of vector x, i.e., + - sqrt( __p.x__^2^ + __p.y__^2^ + ... ) - -| gentype *normalize* (gentype _p_) -| Returns a vector in the same direction as _p_ but with a length of 1. - -|==== - -[[cl_khr_fp64-relational-functions]] -==== Relational Functions - -The scalar and vector relational functions described in _table 6.14_ are -extended to include versions that take `double`, `double2`, `double3`, `double4`, -`double8` and `double16` as arguments. - -The relational and equality operators (<, \<=, >, >=, !=, ==) can be used -with `doublen` vector types and shall produce a vector `longn` result as -described in _section 6.3_. - -The functions *isequal*, *isnotequal*, *isgreater*, *isgreaterequal*, -*isless*, *islessequal*, *islessgreater*, *isfinite*, *isinf*, *isnan*, -*isnormal*, *isordered*, *isunordered* and *signbit* shall return a 0 if the -specified relation is _false_ and a 1 if the specified relation is true for -scalar argument types. -These functions shall return a 0 if the specified relation is _false_ and a --1 (i.e. all bits set) if the specified relation is _true_ for vector -argument types. - -The relational functions *isequal*, *isgreater*, *isgreaterequal*, *isless*, -*islessequal*, and *islessgreater* always return 0 if either argument is not -a number (NaN). -*isnotequal* returns 1 if one or both arguments are not a number (NaN) and -the argument type is a scalar and returns -1 if one or both arguments are -not a number (NaN) and the argument type is a vector. - -The functions described in _table 6.14_ are extended to include the `doublen` -vector types. - -._Double Precision Relational Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| int *isequal* (double _x_, double _y_) + - long__n__ *isequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ == _y_. - -| int *isnotequal* (double _x_, double _y_) + - long__n__ *isnotequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ != _y_. - -| int *isgreater* (double _x_, double _y_) + - long__n__ *isgreater* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ > _y_. - -| int *isgreaterequal* (double _x_, double _y_) + - long__n__ *isgreaterequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ >= _y_. - -| int *isless* (double _x_, double _y_) + - long__n__ *isless* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ < _y_. - -| int *islessequal* (double _x_, double _y_) + - long__n__ *islessequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ \<= _y_. - -| int *islessgreater* (double _x_, double _y_) + - long__n__ *islessgreater* (double__n x__, double__n y__) -| Returns the component-wise compare of (_x_ < _y_) \|\| (_x_ > _y_) . - -| | - -| int *isfinite* (double) + - long__n__ *isfinite* (double__n__) -| Test for finite value. - -| int *isinf* (double) + - long__n__ *isinf* (double__n__) -| Test for infinity value (positive or negative) . - -| int *isnan* (double) + - long__n__ *isnan* (double__n__) -| Test for a NaN. - -| int *isnormal* (double) + - long__n__ *isnormal* (double__n__) -| Test for a normal value. - -| int *isordered* (double _x_, double _y_) + - long__n__ *isordered* (double__n x__, double__n y__) -| Test if arguments are ordered. - *isordered*() takes arguments _x_ and _y_, and returns the result - *isequal*(_x_, _x_) && *isequal*(_y_, _y_). - -| int *isunordered* (double _x_, double _y_) + - long__n__ *isunordered* (double__n x__, double__n y__) -| Test if arguments are unordered. - *isunordered*() takes arguments _x_ and _y_, returning non-zero if _x_ or - _y_ is a NaN, and zero otherwise. - -| int *signbit* (double) + - long__n__ *signbit* (double__n__) -| Test for sign bit. - The scalar version of the function returns a 1 if the sign bit in the double - is set else returns 0. - The vector version of the function returns the following for each - component in double__n__: -1 (i.e all bits set) if the sign bit in the double - is set else returns 0. - -| | - -| double__n__ *bitselect* (double__n a__, double__n b__, double__n c__) -| Each bit of the result is the corresponding bit of _a_ if the - corresponding bit of _c_ is 0. - Otherwise it is the corresponding bit of _b_. - -| double__n__ *select* (double__n a__, double__n b__, long__n c__) + - double__n__ *select* (double__n a__, double__n b__, ulong__n c__) -| For each component, + - _result[i]_ = if MSB of _c[i]_ is set ? _b[i]_ : _a[i]_. + - -|==== - -[[cl_khr_fp64-vector-data-load-and-store-functions]] -==== Vector Data Load and Store Functions - -The vector data load (*vload__n__*) and store (*vstore__n__*) functions -described in _table 6.13_ (also listed below) are extended to include -versions that read from or write to double scalar or vector values. -The generic type `gentype` is extended to include `double`. -The generic type `gentypen` is extended to include `double2`, `double3`, -`double4`, `double8` and `double16`. -The *vstore_half*, **vstore_half__n __**and **vstorea_half__n __** -functions are extended to allow a double precision scalar or vector -value to be written to memory as half values. - -Note: *vload3* reads (_x_,_y_,_z_) components from address -`(_p_ + (_offset_ * 3))` into a 3-component vector. -*vstore3*, and *vstore_half3* write (_x_,_y_,_z_) components from a -3-component vector to address `(_p_ + (_offset_ * 3))`. -In addition, *vloada_half3* reads (_x_,_y_,_z_) components from address -`(_p_ + (_offset_ * 4))` into a 3-component vector and *vstorea_half3* -writes (_x_,_y_,_z_) components from a 3-component vector to address -`(_p_ + (_offset_ * 4))`. -Whether *vloada_half3* and *vstorea_half3* read/write padding data -between the third vector element and the next alignment boundary is -implementation-defined. -*vloada_* and *vstoreaa_* variants are provided to access data that is -aligned to the size of the vector, and are intended to enable performance -on hardware that can take advantage of the increased alignment. - -._Double Precision Vector Data Load and Store Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype__n__ **vload__n__**(size_t _offset_, const {global} gentype *_p_) - - gentype__n__ **vload__n__**(size_t _offset_, const {local} gentype *_p_) - - gentype__n__ **vload__n__**(size_t _offset_, const {constant} gentype *_p_) - - gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) -| Return sizeof (gentype__n__) bytes of data read from address - (_p_ + (_offset * n_)). - If gentype is double, the read address computed as (_p_ + (_offset * n_)) - must be 64-bit aligned. - -| void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {global} gentype *_p_) - - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {local} gentype *_p_) - - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) -| Write sizeof (gentype__n__) bytes given by _data_ to address - (_p_ + (_offset * n_)). - If gentype is double, the write address computed as (_p_ + (_offset * n_)) - must be 64-bit aligned. - -| void **vstore_half**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rte}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtz}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtp}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {global} half *_p_) + - - void **vstore_half**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rte}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtz}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtp}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {local} half *_p_) + - - void **vstore_half**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rte}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtz}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtp}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {private} half *_p_) -| The double value given by _data_ is first converted to a half value - using the appropriate rounding mode. - The half value is then written to the address computed as - (_p_ + _offset_). - The address computed as (_p_ + _offset_) must be 16-bit aligned. - - *vstore_half* uses the current rounding mode. - The default current rounding mode is round to nearest even. - -| void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - - void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - - void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) -| The double__n__ value given by _data_ is converted to a half__n__ value - using the appropriate rounding mode. - The half__n __value is then written to the address computed as - (_p_ + (_offset * n_)). - The address computed as (_p_ + (_offset * n_)) must be 16-bit - aligned. - - **vstore_half__n __**uses the current rounding mode. - The default current rounding mode is round to nearest even. - -| void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - - void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - - void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) -| The double__n__ value is converted to a half__n__ value - using the appropriate rounding mode. - - For n = 1, 2, 4, 8 or 16, the half__n__ value is written to the - address computed as - (_p_ + (_offset * n_)). - The address computed as (_p_ + (_offset * n_)) must be aligned to - sizeof (half__n__) bytes. - - For n = 3, the half__3__ value is written to the address computed as - (_p_ + (_offset * 4_)). - The address computed as (_p_ + (_offset * 4_)) must be aligned to - sizeof (half) * 4 bytes. - - **vstorea_half__n__** uses the current rounding mode. - The default current rounding mode is round to nearest even. -|==== - -[[cl_khr_fp64-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch]] -==== Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch - -The OpenCL C programming language implements the following functions that -provide asynchronous copies between global and local memory and a prefetch -from global memory. - -The generic type gentype is extended to include `double`, `double2`, `double3`, -`double4`, `double8` and `double16`. - -._Double Precision Built-in Async Copy and Prefetch Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| event_t **async_work_group_copy** ( + - {local} gentype *_dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) - - event_t **async_work_group_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) -| Perform an async copy of _num_gentypes_ gentype elements from _src_ to - _dst_. - The async copy is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_copy* with a previous async copy allowing an event to be - shared by multiple async copies; otherwise _event_ should be zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - -| | - -| event_t **async_work_group_strided_copy** ( + - {local} gentype _*dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, + - size_t _src_stride_, event_t _event_) - - event_t **async_work_group_strided_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, + - size_t _dst_stride_, event_t _event_) -| Perform an async gather of _num_gentypes_ gentype elements from _src_ to - _dst_. - The _src_stride_ is the stride in elements for each gentype element read - from _src_. - The async gather is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_strided_copy* with a previous async copy allowing an - event to be shared by multiple async copies; otherwise _event_ should be - zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - - The behavior of *async_work_group_strided_copy* is undefined if - _src_stride_ or _dst_stride_ is 0, or if the _src_stride_ or _dst_stride_ - values cause the _src_ or _dst_ pointers to exceed the upper bounds of the - address space during the copy. - -| | - -| void *wait_group_events* ( + - int _num_events_, event_t *_event_list_) -| Wait for events that identify the *async_work_group_copy* operations to - complete. - The event objects specified in _event_list_ will be released after the - wait is performed. - - This function must be encountered by all work-items in a work-group - executing the kernel with the same _num_events_ and event objects - specified in _event_list_; otherwise the results are undefined. - -| void *prefetch* ( + - const {global} gentype *__p__, size_t _num_gentypes_) -| Prefetch _num_gentypes_ * sizeof(gentype) bytes into the global cache. - The prefetch instruction is applied to a work-item in a work-group and - does not affect the functional behavior of the kernel. - -|==== - -[[cl_khr_fp64-ieee754-compliance]] -==== IEEE754 Compliance - -The following table entry describes the additions to _table 4.3,_ which -allows applications to query the configuration information using -{clGetDeviceInfo} for an OpenCL device that supports double precision -floating-point. - -[cols="1,1,2",options="header",] -|==== -| *Op-code* -| *Return Type* -| *Description* - -| {CL_DEVICE_DOUBLE_FP_CONFIG} -| {cl_device_fp_config_TYPE} -| Describes double precision floating-point capability of the OpenCL device. - This is a bit-field that describes one or more of the following values: - - {CL_FP_DENORM} -- denorms are supported - - {CL_FP_INF_NAN} -- INF and NaNs are supported - - {CL_FP_ROUND_TO_NEAREST} -- round to nearest even rounding mode supported - - {CL_FP_ROUND_TO_ZERO} -- round to zero rounding mode supported - - {CL_FP_ROUND_TO_INF} -- round to positive and negative infinity rounding - modes supported - - {CL_FP_FMA} -- IEEE754-2008 fused multiply-add is supported - - {CL_FP_SOFT_FLOAT} -- Basic floating-point operations (such as addition, - subtraction, multiplication) are implemented in software. - - The required minimum double precision floating-point capability as - implemented by this extension is: - - {CL_FP_FMA} \| + - {CL_FP_ROUND_TO_NEAREST} \| + - {CL_FP_ROUND_TO_ZERO} \| + - {CL_FP_ROUND_TO_INF} \| + - {CL_FP_INF_NAN} \| + - {CL_FP_DENORM}. - -|==== - -IEEE754 fused multiply-add, denorms, INF and NaNs are required to be -supported for double precision floating-point numbers and operations -on double precision floating-point numbers. - -[[cl_khr_fp64-relative-error-as-ulps]] -==== Relative Error as ULPs - -In this section we discuss the maximum relative error defined as _ulp_ -(units in the last place). - -Addition, subtraction, multiplication, fused multiply-add and conversion -between integer and a floating-point format are IEEE 754 compliant and -are therefore correctly rounded using round-to-nearest even rounding mode. - -The following table describes the minimum accuracy of double precision -floating-point arithmetic operations given as ULP values. -0 ULP is used for math functions that do not require rounding. -The reference value used to compute the ULP value of an arithmetic operation -is the infinitely precise result. - -._ULP Values for Double Precision Floating-Point Arithmetic Operations_ -[cols=",",options="header",] -|==== -| *Function* -| *Min Accuracy* - -| *_x_ + _y_* -| Correctly rounded - -| *_x_ - _y_* -| Correctly rounded - -| *_x_ * _y_* -| Correctly rounded - -| *1.0 / _x_* -| Correctly rounded - -| *_x_ / _y_* -| Correctly rounded - -| | - -| *acos* -| \<= 4 ulp - -| *acosh* -| \<= 4 ulp - -| *acospi* -| \<= 5 ulp - -| *asin* -| \<= 4 ulp - -| *asinh* -| \<= 4 ulp - -| *asinpi* -| \<= 5 ulp - -| *atan* -| \<= 5 ulp - -| *atanh* -| \<= 5 ulp - -| *atanpi* -| \<= 5 ulp - -| *atan2* -| \<= 6 ulp - -| *atan2pi* -| \<= 6 ulp - -| *cbrt* -| \<= 2 ulp - -| *ceil* -| Correctly rounded - -| *clamp* -| 0 ulp - -| *copysign* -| 0 ulp - -| *cos* -| \<= 4 ulp - -| *cosh* -| \<= 4 ulp - -| *cospi* -| \<= 4 ulp - -// 3 operations from the 2 multiplications and 1 subtraction per component -| *cross* -| absolute error tolerance of 'max * max * (3 * FLT_EPSILON)' per vector component, where _max_ is the maximum input operand magnitude - -| *degrees* -| \<= 2 ulp - -// 3 ULP error in sqrt -// 0.5 effect on e of taking sqrt(x + e) -// 1.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// 2 accounts for error in reference code -// -// = 2 * (3 + 0.5 * ((1.5 * n) + (0.5 * (n - 1)))) -// = 2 * (3 + 0.5 * (1.5n + (0.5n - 0.5))) -// = 2 * (3 + 0.5 * (2n - 0.5)) -// = 2 * (3 + n - 0.25) -// = 2 * (2.75 + n) -// = 5.5 + 2n -| *distance* -| \<= 5.5 + 2n ulp, for gentype with vector width _n_ - -// n + n-1 Number of operations from n multiples and (n-1) additions -// 2n - 1 -| *dot* -| absolute error tolerance of 'max * max * (2n - 1) * FLT_EPSILON', for vector width _n_ and maximum input operand magnitude _max_ across all vector components - -| *erfc* -| \<= 16 ulp - -| *erf* -| \<= 16 ulp - -| *exp* -| \<= 3 ulp - -| *exp2* -| \<= 3 ulp - -| *exp10* -| \<= 3 ulp - -| *expm1* -| \<= 3 ulp - -| *fabs* -| 0 ulp - -| *fdim* -| Correctly rounded - -| *floor* -| Correctly rounded - -| *fma* -| Correctly rounded - -| *fmax* -| 0 ulp - -| *fmin* -| 0 ulp - -| *fmod* -| 0 ulp - -| *fract* -| Correctly rounded - -| *frexp* -| 0 ulp - -| *hypot* -| \<= 4 ulp - -| *ilogb* -| 0 ulp - -| *ldexp* -| Correctly rounded - -// 3 ULP error in sqrt -// 0.5 effect on e of taking sqrt(x + e) -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// 2 accounts for error in reference code -// -// = 2 * (3 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) -// = 2 * (3 + 0.5 * (n - 0.5)) -// = 2 * (2.75 + 0.5n) -// = 5.5 + n -| *length* -| \<= 5.5 + n ulp, for gentype with vector width _n_ - -| *log* -| \<= 3 ulp - -| *log2* -| \<= 3 ulp - -| *log10* -| \<= 3 ulp - -| *log1p* -| \<= 2 ulp - -| *logb* -| 0 ulp - -| *mad* -| Implementation-defined - -| *max* -| 0 ulp - -| *maxmag* -| 0 ulp - -| *min* -| 0 ulp - -| *minmag* -| 0 ulp - -| *mix* -| Implementation-defined - -| *modf* -| 0 ulp - -| *nan* -| 0 ulp - -| *nextafter* -| 0 ulp - -// 2.5 error in rsqrt + error in multiply -// 0.5 effect on e of taking sqrt(x + e) -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// 2 accounts for error in reference code -// -// = 2 * (2.5 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) -// = 2 * (2.5 + 0.5 * (0.5n + (0.5n - 0.5))) -// = 2 * (2.5 + 0.5 * (n - 0.5)) -// = 2 * (2.5 + 0.5n - 0.25) -// = 2 * (2.25 + 0.5n) -// = 4.5 + n -| *normalize* -| \<= 4.5 + n ulp, for gentype with vector width _n_ - -| *pow(x, y)* -| \<= 16 ulp - -| *pown(x, y)* -| \<= 16 ulp - -| *powr(x, y)* -| \<= 16 ulp - -| *radians* -| \<= 2 ulp - -| *remainder* -| 0 ulp - -| *remquo* -| 0 ulp for the remainder, at least the lower 7 bits of the integral quotient - -| *rint* -| Correctly rounded - -| *rootn* -| \<= 16 ulp - -| *round* -| Correctly rounded - -| *rsqrt* -| \<= 2 ulp - -| *sign* -| 0 ulp - -| *sin* -| \<= 4 ulp - -| *sincos* -| \<= 4 ulp for sine and cosine values - -| *sinh* -| \<= 4 ulp - -| *sinpi* -| \<= 4 ulp - -| *smoothstep* -| Implementation-defined - -| *sqrt* -| Correctly rounded - -| *step* -| 0 ulp - -| *tan* -| \<= 5 ulp - -| *tanh* -| \<= 5 ulp - -| *tanpi* -| \<= 6 ulp - -| *tgamma* -| \<= 16 ulp - -| *trunc* -| Correctly rounded - -|==== diff --git a/ext/cl_khr_gl_depth_images.asciidoc b/ext/cl_khr_gl_depth_images.asciidoc deleted file mode 100644 index c958da903..000000000 --- a/ext/cl_khr_gl_depth_images.asciidoc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_depth_images]] -== Sharing OpenGL and OpenGL ES Depth and Depth-Stencil Images - -This section describes the *cl_khr_gl_depth_images* extension. -The *cl_khr_gl_depth_images* extends OpenCL / OpenGL sharing (the -cl_khr_gl_sharing_extension) defined in -<> to allow an OpenCL image to be created from an OpenGL depth or -depth-stencil texture. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_depth_images-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -The *cl_khr_gl_depth_images* extension extends OpenCL / OpenGL sharing by -allowing an OpenCL depth image to be created from an OpenGL depth or -depth-stencil texture. -Depth images with an image channel order of CL_DEPTH_STENCIL can only be -created using the *clCreateFromGLTexture* API. - -This extension adds the following new image format for depth-stencil images -to _table 5.6 and 5.7_ of the OpenCL 2.2 specification. - -[cols="",options="header",] -|==== -| *Enum values that can be specified in channel_order* - -| *CL_DEPTH_STENCIL*. - This format can only be used if channel data type = CL_UNORM_INT24 or - CL_FLOAT. - -|==== - -[cols=",",options="header",] -|==== -| *Image Channel Data Type* -| *Description* - -| *CL_UNORM_INT24* -| Each channel component is a normalized unsigned 24-bit integer value - -| *CL_FLOAT* -| Each channel component is a single precision floating-point value -|==== - -This extension adds the following new image format to the minimum list of -supported image formats described in _tables 5.8.a_ and _5.8.b_. - -[[cl_khr_gl_depth_images-required-image-formats]] -._Required Image Formats for_ *cl_khr_gl_depth_images* -[cols=",,,",] -|==== -| *num_channels* -| *channel_order* -| *channel_data_type* -| *read / write* - -| 1 -| CL_DEPTH_STENCIL -| CL_UNORM_INT24 + - CL_FLOAT -| read only - -|==== - -For the image format given by channel order of CL_DEPTH_STENCIL and channel -data type of CL_UNORM_INT24, the depth is stored as an unsigned normalized -24-bit value. - -For the image format given by channel order of CL_DEPTH_STENCIL and channel -data type of CL_FLOAT, each pixel is two 32-bit values. -The depth is stored as a single precision floating-point value followed by -the stencil which is stored as a 8-bit integer value. - -The stencil value cannot be read or written using the *read_imagef* and -*write_imagef* built-in functions in an OpenCL kernel. - -Depth image objects with an image channel order equal to CL_DEPTH_STENCIL -cannot be used as arguments to clEnqueueReadImage, clEnqueueWriteImage, -clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, -clEnqueueMapImage and clEnqueueFillImage and will return a -CL_INVALID_OPERATION error. - -[[cl_khr_gl_depth_images-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -The following new image formats are added to the table of -<> in the OpenCL extension -specification. -If an OpenGL texture object with an internal format in this table is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding OpenCL image format(s) in that table. - -[cols=",",options="header",] -|==== -| *GL internal format* -| *CL image format* - - *(channel order, channel data type)* - -| GL_DEPTH_COMPONENT32F | CL_DEPTH, CL_FLOAT -| GL_DEPTH_COMPONENT16 | CL_DEPTH, CL_UNORM_INT16 -| GL_DEPTH24_STENCIL8 | CL_DEPTH_STENCIL, CL_UNORM_INT24 -| GL_DEPTH32F_STENCIL8 | CL_DEPTH_STENCIL, CL_FLOAT -|==== - diff --git a/ext/cl_khr_gl_event.asciidoc b/ext/cl_khr_gl_event.asciidoc deleted file mode 100644 index d5c3b686f..000000000 --- a/ext/cl_khr_gl_event.asciidoc +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_event]] -== Creating OpenCL Event Objects from OpenGL Sync Objects - -[[cl_khr_gl_event-overview]] -=== Overview - -This section describes the *cl_khr_gl_event* extension. -This extension allows creating OpenCL event objects linked to OpenGL fence -sync objects, potentially improving efficiency of sharing images and buffers -between the two APIs. -The companion *GL_ARB_cl_event* extension provides the complementary -functionality of creating an OpenGL sync object from an OpenCL event object. - -In addition, this extension modifies the behavior of -*clEnqueueAcquireGLObjects* and *clEnqueueReleaseGLObjects* to implicitly -guarantee synchronization with an OpenGL context bound in the same thread as -the OpenCL context. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_event-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_event clCreateEventFromGLsyncKHR(cl_context context, - GLsync sync, - cl_int *errcode_ret); ----- - -[[cl_khr_gl_event-new-tokens]] -=== New Tokens - -Returned by *clGetEventInfo* when _param_name_ is CL_EVENT_COMMAND_TYPE: - ----- -CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR ----- - -[[cl_khr_gl_event-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add following to the fourth paragraph of _section 5.11_ (prior to the -description of *clWaitForEvents*): - -"`Event objects can also be used to reflect the status of an OpenGL sync -object. -The sync object in turn refers to a fence command executing in an OpenGL -command stream. -This provides another method of coordinating sharing of buffers and images -between OpenGL and OpenCL.`" - -Add CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR to the valid _param_value_ values -returned by *clGetEventInfo* for _param_name_ CL_EVENT_COMMAND_TYPE (in the -third row and third column of _table 5.22_). - -Add new _subsection 5.11.1_: - -"`*5.11.1 Linking Event Objects to OpenGL Synchronization Objects* - -An event object may be created by linking to an OpenGL *sync object*. -Completion of such an event object is equivalent to waiting for completion -of the fence command associated with the linked GL sync object. - -The function -indexterm:[clCreateEventFromGLsyncKHR] -[source,opencl] ----- -cl_event clCreateEventFromGLsyncKHR(cl_context context, - GLsync sync, - cl_int *errcode_ret) ----- - -creates a linked event object. - -_context_ is a valid OpenCL context created from an OpenGL context or share -group, using the *cl_khr_gl_sharing* extension. - -_sync_ is the name of a sync object in the GL share group associated with -_context_. - -*clCreateEventFromGLsyncKHR* returns a valid OpenCL event object and -_errcode_ret_ is set to CL_SUCCESS if the event object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context, or was not - created from a GL context. - * CL_INVALID_GL_OBJECT if _sync_ is not the name of a sync object in the - GL share group associated with _context_. - -The parameters of an event object linked to a GL sync object will return the -following values when queried with *clGetEventInfo*: - - * The CL_EVENT_COMMAND_QUEUE of a linked event is `NULL`, because the - event is not associated with any OpenCL command-queue. - * The CL_EVENT_COMMAND_TYPE of a linked event is - CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, indicating that the event is - associated with a GL sync object, rather than an OpenCL command. - * The CL_EVENT_COMMAND_EXECUTION_STATUS of a linked event is either - CL_SUBMITTED, indicating that the fence command associated with the sync - object has not yet completed, or CL_COMPLETE, indicating that the fence - command has completed. - -*clCreateEventFromGLsyncKHR* performs an implicit *clRetainEvent* on the -returned event object. -Creating a linked event object also places a reference on the linked GL sync -object. -When the event object is deleted, the reference will be removed from the GL -sync object. - -Events returned from *clCreateEventFromGLsyncKHR* can be used in the -_event_wait_list_ argument to *clEnqueueAcquireGLObjects* and CL APIs that -take a cl_event as an argument but do not enqueue commands. -Passing such events to any other CL API that enqueues commands will generate -a CL_INVALID_EVENT error.`" - -[[cl_khr_gl_event-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -Add following the paragraph describing parameter _event_ to -*clEnqueueAcquireGLObjects*: - -"`If an OpenGL context is bound to the current thread, then any OpenGL -commands which - - . affect or access the contents of a memory object listed in the - _mem_objects_ list, and - . were issued on that OpenGL context prior to the call to - *clEnqueueAcquireGLObjects* - -will complete before execution of any OpenCL commands following the -*clEnqueueAcquireGLObjects* which affect or access any of those memory -objects. -If a non-`NULL` _event_ object is returned, it will report completion only -after completion of such OpenGL commands.`" - -Add following the paragraph describing parameter _event_ to -*clEnqueueReleaseGLObjects*: - -"`If an OpenGL context is bound to the current thread, then then any OpenGL -commands which - - . affect or access the contents of the memory objects listed in the - _mem_objects_ list, and - . are issued on that context after the call to *clEnqueueReleaseGLObjects* - -will not execute until after execution of any OpenCL commands preceding the - -*clEnqueueReleaseGLObjects* which affect or access any of those memory -objects. -If a non-`NULL` _event_ object is returned, it will report completion before -execution of such OpenGL commands.`" - -Replace the second paragraph of -<> with: - -"`Prior to calling *clEnqueueAcquireGLObjects*, the application must ensure -that any pending OpenGL operations which access the objects specified in -_mem_objects_ have completed. - -If the *cl_khr_gl_event* extension is supported, then the OpenCL -implementation will ensure that any such pending OpenGL operations are -complete for an OpenGL context bound to the same thread as the OpenCL -context. -This is referred to as _implicit synchronization_. - -If the *cl_khr_gl_event* extension is supported and the OpenGL context in -question supports fence sync objects, completion of OpenGL commands may also -be determined by placing a GL fence command after those commands using -*glFenceSync*, creating an event from the resulting GL sync object using -*clCreateEventFromGLsyncKHR*, and determining completion of that event -object via *clEnqueueAcquireGLObjects*. -This method may be considerably more efficient than calling *glFinish*, and -is referred to as _explicit synchronization_. -Explicit synchronization is most useful when an OpenGL context bound to -another thread is accessing the memory objects. - -If the *cl_khr_gl_event* extension is not supported, completion of OpenGL -commands may be determined by issuing and waiting for completion of a -*glFinish* command on all OpenGL contexts with pending references to these -objects. -Some implementations may offer other efficient synchronization methods. -If such methods exist they will be described in platform-specific -documentation. - -Note that no synchronization method other than *glFinish* is portable -between all OpenGL implementations and all OpenCL implementations. -While this is the only way to ensure completion that is portable to all -platforms, *glFinish* is an expensive operation and its use should be -avoided if the *cl_khr_gl_event* extension is supported on a platform.`" - -[[cl_khr_gl_event-issues]] -=== Issues - - . How are references between CL events and GL syncs handled? -+ --- -PROPOSED: The linked CL event places a single reference on the GL sync -object. -That reference is removed when the CL event is deleted. -A more expensive alternative would be to reflect changes in the CL event -reference count through to the GL sync. --- - - . How are linkages to synchronization primitives in other APIs handled? -+ --- -UNRESOLVED. -We will at least want to have a way to link events to EGL sync objects. -There is probably no analogous DX concept. -There would be an entry point for each type of synchronization primitive to -be linked to, such as clCreateEventFromEGLSyncKHR. - -An alternative is a generic clCreateEventFromExternalEvent taking an -attribute list. -The attribute list would include information defining the type of the -external primitive and additional information (GL sync object handle, EGL -display and sync object handle, etc.) specific to that type. -This allows a single entry point to be reused. - -These will probably be separate extensions following the API proposed here. --- - - . Should the CL_EVENT_COMMAND_TYPE correspond to the type of command - (fence) or the type of the linked sync object? -+ --- -PROPOSED: To the type of the linked sync object. --- - - . Should we support both explicit and implicit synchronization? -+ --- -PROPOSED: Yes. -Implicit synchronization is suitable when GL and CL are executing in the -same application thread. -Explicit synchronization is suitable when they are executing in different -threads but the expense of glFinish is too high. --- - - . Should this be a platform or device extension? -+ --- -PROPOSED: Platform extension. -This may result in considerable under-the-hood work to implement the -sync->event semantics using only the public GL API, however, when multiple -drivers and devices with different GL support levels coexist in the same -runtime. --- - - . Where can events generated from GL syncs be usable? -+ --- -PROPOSED: Only with clEnqueueAcquireGLObjects, and attempting to use such an -event elsewhere will generate an error. -There is no apparent use case for using such events elsewhere, and possibly -some cost to supporting it, balanced by the cost of checking the source of -events in all other commands accepting them as parameters. --- diff --git a/ext/cl_khr_gl_msaa_sharing.asciidoc b/ext/cl_khr_gl_msaa_sharing.asciidoc deleted file mode 100644 index 91fad53bb..000000000 --- a/ext/cl_khr_gl_msaa_sharing.asciidoc +++ /dev/null @@ -1,405 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_msaa_sharing]] -== Creating OpenCL Memory Objects from OpenGL MSAA Textures - -This extension extends the OpenCL / OpenGL sharing (the -cl_khr_gl_sharing_extension) defined in -<> to allow an OpenCL image to be created from an OpenGL -multi-sampled (a.k.a. -MSAA) texture (color or depth). - -This extension name is *cl_khr_gl_msaa_sharing*. -This extension requires *cl_khr_gl_depth_images*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_msaa_sharing-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -Allow _texture_target_ argument to *clCreateFromGLTexture* to be -GL_TEXTURE_2D_MULTISAMPLE or GL_TEXTURE_2D_MULTISAMPLE_ARRAY. - -If _texture_target_ is GL_TEXTURE_2D_MULTISAMPLE, *clCreateFromGLTexture* -creates an OpenCL 2D multi-sample image object from an OpenGL 2D -multi-sample texture. - -If _texture_target_ is GL_TEXTURE_2D_MULTISAMPLE_ARRAY, -*clCreateFromGLTexture* creates an OpenCL 2D multi-sample array image object -from an OpenGL 2D multi-sample texture. - -Multi-sample OpenCL image objects can only be read from a kernel. -Multi-sample OpenCL image objects cannot be used as arguments to -clEnqueueReadImage , clEnqueueWriteImage, clEnqueueCopyImage, -clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapImage -and clEnqueueFillImage and will return a CL_INVALID_OPERATION error. - -*Add the following entry to the table describing -<>:* - -[cols=",,",options="header",] -|==== -| *cl_gl_texture_info* -| *Return Type* -| *Info. returned in _param_value_* - -| *CL_GL_NUM_SAMPLES* -| GLsizei -| The _samples_ argument passed to *glTexImage2DMultisample* or - *glTexImage3DMultisample*. - - If _image_ is not a MSAA texture, 1 is returned. -|==== - -[[cl_khr_gl_msaa_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -The formats described in tables 5.8.a and 5.8.b of the OpenCL 2.2 -specification and the additional formats described in -<> also support OpenCL images created from a OpenGL -multi-sampled color or depth texture. - -*Update text that describes arg value argument to clSetKernelArg with the -following:* - -"`If the argument is a multi-sample 2D image, the _arg_value_ entry must be -a pointer to a multi-sample image object. -If the argument is a multi-sample 2D depth image, the _arg_value_ entry must -be a pointer to a multisample depth image object. -If the argument is a multi-sample 2D image array, the _arg_value_ entry must -be a pointer to a multi-sample image array object. -If the argument is a multi-sample 2D depth image array, the _arg_value_ -entry must be a pointer to a multi-sample depth image array object.`" - -*Updated error code text for clSetKernelArg is:* - -*Add the following text:* - -"`CL_INVALID_MEM_OBJECT for an argument declared to be a multi-sample image, -multi-sample image array, multi-sample depth image or a multi-sample depth -image array and the argument value specified in _arg_value_ does not follow -the rules described above for a depth memory object or memory array object -argument.`" - -[[cl_khr_gl_msaa_sharing-additions-to-chapter-6]] -=== Additions to Chapter 6 of the OpenCL 2.2 Specification - -*Add the following new data types to _table 6.3_ in _section 6.1.3_ of the -OpenCL 2.2 specification:* - -[cols=",",options="header",] -|==== -| *Type* -| *Description* - -| *image2d_msaa_t* -| A 2D multi-sample color image. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -| *image2d_array_msaa_t* -| A 2D multi-sample color image array. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -| *image2d_msaa_depth_t* -| A 2D multi-sample depth image. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -| *image2d_array_msaa_depth_t* -| A 2D multi-sample depth image array. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -|==== - -*Add the following built-in functions to section 6.13.14.3 -- Built-in Image -Sampler-less Read Functions:* - -[source,opencl_c] ----- -float4 read_imagef( - image2d_msaa_t image, - int2 coord, - int sample) ----- - -Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup -in the 2D image object specified by _image_. - -*read_imagef* returns floating-point values in the range [0.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to one of the -pre-defined packed formats or CL_UNORM_INT8, or CL_UNORM_INT16. - -*read_imagef* returns floating-point values in the range [-1.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to CL_SNORM_INT8, -or CL_SNORM_INT16. - -*read_imagef* returns floating-point values for image objects created with -_image_channel_data_type_ set to CL_HALF_FLOAT or CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - -[source,opencl_c] ----- -int4 read_imagei(image2d_msaa_t image, - int2 coord, - int sample) - -uint4 read_imageui(image2d_msaa_t image, - int2 coord, - int sample) ----- - -Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup -in the 2D image object specified by _image_. - -*read_imagei* and *read_imageui* return unnormalized signed integer and -unsigned integer values respectively. -Each channel will be stored in a 32-bit integer. - -*read_imagei* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_SIGNED_INT8, - * CL_SIGNED_INT16, and - * CL_SIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imagei* are undefined. - -*read_imageui* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_UNSIGNED_INT8, - * CL_UNSIGNED_INT16, and - * CL_UNSIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imageui* are undefined. - -[source,opencl_c] ----- -float4 read_imagef(image2d_array_msaa_t image, - int4 coord, - int sample) ----- - -Use _coord.xy_ and _sample_ to do an element lookup in the 2D image -identified by _coord.z_ in the 2D image array specified by _image_. - -*read_imagef* returns floating-point values in the range [0.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to one of the -pre-defined packed formats or CL_UNORM_INT8, or CL_UNORM_INT16. - -*read_imagef* returns floating-point values in the range [-1.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to CL_SNORM_INT8, -or CL_SNORM_INT16. - -*read_imagef* returns floating-point values for image objects created with -_image_channel_data_type_ set to CL_HALF_FLOAT or CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - - -[source,opencl_c] ----- -int4 read_imagei(image2d_array_msaa_t image, - int4 coord, - int sample) - -uint4 read_imageui(image2d_array_msaa_t image, - int4 coord, - int sample) ----- - -Use _coord.xy_ and _sample_ to do an element lookup in the 2D image -identified by _coord.z_ in the 2D image array specified by _image_. - -*read_imagei* and *read_imageui* return unnormalized signed integer and -unsigned integer values respectively. -Each channel will be stored in a 32-bit integer. - -*read_imagei* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_SIGNED_INT8, - * CL_SIGNED_INT16, and - * CL_SIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imagei* are undefined. - -*read_imageui* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_UNSIGNED_INT8, - * CL_UNSIGNED_INT16, and - * CL_UNSIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imageui* are undefined. - -[source,opencl_c] ----- -float read_imagef(image2d_msaa_depth_t image, - int2 coord, - int sample) ----- - -Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup -in the 2D depth image object specified by _image_. - -*read_imagef* returns a floating-point value in the range [0.0 ... 1.0] for -depth image objects created with _image_channel_data_type_ set to -CL_UNORM_INT16 or CL_UNORM_INT24. - -*read_imagef* returns a floating-point value for depth image objects created -with _image_channel_data_type_ set to CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - -[source,opencl_c] ----- -float read_imagef(image2d_array_msaaa_depth_t image, - int4 coord, - int sample) ----- - -Use _coord.xy_ and _sample_ to do an element lookup in the 2D image -identified by _coord.z_ in the 2D depth image array specified by _image_. - -*read_imagef* returns a floating-point value in the range [0.0 ... 1.0] for -depth image objects created with _image_channel_data_type_ set to -CL_UNORM_INT16 or CL_UNORM_INT24. - -*read_imagef* returns a floating-point value for depth image objects created -with _image_channel_data_type_ set to CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - -Note: When a multisample image is accessed in a kernel, the access takes one -vector of integers describing which pixel to fetch and an integer -corresponding to the sample numbers describing which sample within the pixel -to fetch. -sample identifies the sample position in the multi-sample image. - -*For best performance, we recommend that _sample_ be a literal value so it -is known at compile time and the OpenCL compiler can perform appropriate -optimizations for multi-sample reads on the device*. - -No standard sampling instructions are allowed on the multisample image. -Accessing a coordinate outside the image and/or a sample that is outside the -number of samples associated with each pixel in the image is undefined - -*Add the following built-in functions to section 6.13.14.5 -- Built-in Image -Query Functions:* - -[source,opencl_c] ----- -int get_image_width(image2d_msaa_t image) - -int get_image_width(image2d_array_msaa_t image) - -int get_image_width(image2d_msaa_depth_t image) - -int get_image_width(image2d_array_msaa_depth_t image) ----- - -Return the image width in pixels. - -[source,opencl_c] ----- -int get_image_height(image2d_msaa_t image) - -int get_image_height(image2d_array_msaa_t image) - -int get_image_height(image2d_msaa_depth_t image) - -int get_image_height(image2d_array_msaa_depth_t image) ----- - -Return the image height in pixels. - -[source,opencl_c] ----- -int get_image_channel_data_type(image2d_msaa_t image) - -int get_image_channel_data_type(image2d_array_msaa_t image) - -int get_image_channel_data_type(image2d_msaa_depth_t image) - -int get_image_channel_data_type(image2d_array_msaa_depth_t image) ----- - -Return the channel data type. - -[source,opencl_c] ----- -int get_image_channel_order(image2d_msaa_t image) - -int get_image_channel_order(image2d_array_msaa_t image) - -int get_image_channel_order(image2d_msaa_depth_t image) - -int get_image_channel_order(image2d_array_msaa_depth_t image) ----- - -Return the image channel order. - -[source,opencl_c] ----- -int2 get_image_dim(image2d_msaa_t image) - -int2 get_image_dim(image2d_array_msaa_t image) - -int2 get_image_dim(image2d_msaa_depth_t image) - -int2 get_image_dim(image2d_array_msaa_depth_t image) ----- - -Return the 2D image width and height as an int2 type. -The width is returned in the _x_ component, and the height in the _y_ -component. - -[source,opencl_c] ----- -size_t get_image_array_size(image2d_array_msaa_depth_t image) ----- - -Return the number of images in the 2D image array. - -[source,opencl_c] ----- -int get_image_num_samples(image2d_msaa_t image) - -int get_image_num_samples(image2d_array_msaa_t image) - -int get_image_num_samples(image2d_msaa_depth_t image) - -int get_image_num_samples(image2d_array_msaa_depth_t image) ----- - -Return the number of samples in the 2D MSAA image diff --git a/ext/cl_khr_gl_sharing__context.asciidoc b/ext/cl_khr_gl_sharing__context.asciidoc deleted file mode 100644 index ac0cc1388..000000000 --- a/ext/cl_khr_gl_sharing__context.asciidoc +++ /dev/null @@ -1,459 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_sharing]] -== Creating an OpenCL Context from an OpenGL Context or Share Group - -[[cl_khr_gl_sharing-overview]] -=== Overview - -This section describes functionality in the *cl_khr_gl_sharing* extension -to associate an OpenCL context with an OpenGL context or share group object. -Once an OpenCL context is associated with an OpenGL context or share group -object, the functionality described in the section -<> -may be used to share OpenGL buffer, texture, and renderbuffer objects with the OpenCL context. - -An OpenGL implementation supporting buffer objects and sharing of texture -and buffer object images with OpenCL is required by this extension. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetGLContextInfoKHR(const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); ----- - -[[cl_khr_gl_sharing-new-tokens]] -=== New Tokens - -Returned by *clCreateContext*, *clCreateContextFromType*, and -*clGetGLContextInfoKHR* when an invalid OpenGL context or share group object -handle is specified in _properties_: - ----- -CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR ----- - -Accepted as the _param_name_ argument of *clGetGLContextInfoKHR*: - ----- -CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR -CL_DEVICES_FOR_GL_CONTEXT_KHR ----- - -Accepted as an attribute name in the _properties_ argument of -*clCreateContext* and *clCreateContextFromType*: - ----- -CL_GL_CONTEXT_KHR -CL_EGL_DISPLAY_KHR -CL_GLX_DISPLAY_KHR -CL_WGL_HDC_KHR -CL_CGL_SHAREGROUP_KHR ----- - -[[cl_khr_gl_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -*clCreateContext* with: - -"`_properties_ points to an attribute list, which is a array of ordered - pairs terminated with zero. -If an attribute is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values. - -Attributes control sharing of OpenCL memory objects with OpenGL buffer, -texture, and renderbuffer objects. -Depending on the platform-specific API used to bind OpenGL contexts to the -window system, the following attributes may be set to identify an OpenGL -context: - - * When the CGL binding API is supported, the attribute - CL_CGL_SHAREGROUP_KHR should be set to a CGLShareGroup handle to a CGL - share group object. - * When the EGL binding API is supported, the attribute CL_GL_CONTEXT_KHR - should be set to an EGLContext handle to an OpenGL ES or OpenGL context, - and the attribute CL_EGL_DISPLAY_KHR should be set to the EGLDisplay - handle of the display used to create the OpenGL ES or OpenGL context. - * When the GLX binding API is supported, the attribute CL_GL_CONTEXT_KHR - should be set to a GLXContext handle to an OpenGL context, and the - attribute CL_GLX_DISPLAY_KHR should be set to the Display handle of the - X Window System display used to create the OpenGL context. - * When the WGL binding API is supported, the attribute CL_GL_CONTEXT_KHR - should be set to an HGLRC handle to an OpenGL context, and the attribute - CL_WGL_HDC_KHR should be set to the HDC handle of the display used to - create the OpenGL context. - -Memory objects created in the context so specified may be shared with the -specified OpenGL or OpenGL ES context (as well as with any other OpenGL -contexts on the share list of that context, according to the description of -sharing in the GLX 1.4 and EGL 1.4 specifications, and the WGL documentation -for OpenGL implementations on Microsoft Windows), or with the explicitly -identified OpenGL share group for CGL. -If no OpenGL or OpenGL ES context or share group is specified in the -attribute list, then memory objects may not be shared, and calling any of -the commands described in <> will result in a -CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR error.`" - -OpenCL / OpenGL sharing does not support the CL_CONTEXT_INTEROP_USER_SYNC -property defined in _table 4.5_. -Specifying this property when creating a context with OpenCL / OpenGL -sharing will return an appropriate error. - -Add to _table 4.5_: - -._OpenGL Sharing Context Creation Attributes_ -[cols=",,",options="header",] -|==== -| *Attribute Name* -| *Allowed Values* - - *(Default value is in bold)* -| *Description* - -| CL_GL_CONTEXT_KHR -| *0*, OpenGL context handle -| OpenGL context to associated the OpenCL context with - -| CL_CGL_SHAREGROUP_KHR -| *0*, CGL share group handle -| CGL share group to associate the OpenCL context with - -| CL_EGL_DISPLAY_KHR -| *EGL_NO_DISPLAY*, EGLDisplay handle -| EGLDisplay an OpenGL context was created with respect to - -| CL_GLX_DISPLAY_KHR -| *None*, X handle -| X Display an OpenGL context was created with respect to - -| CL_WGL_HDC_KHR -| *0*, HDC handle -| HDC an OpenGL context was created with respect to -|==== - -Replace the first error in the list for *clCreateContext* with: - -"`_errcode_ret_ returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a context -was specified by any of the following means: - - * A context was specified for an EGL-based OpenGL ES or OpenGL - implementation by setting the attributes CL_GL_CONTEXT_KHR and - CL_EGL_DISPLAY_KHR. - * A context was specified for a GLX-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_GLX_DISPLAY_KHR. - * A context was specified for a WGL-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_WGL_HDC_KHR - -and any of the following conditions hold: - - * The specified display and context attributes do not identify a valid - OpenGL or OpenGL ES context. - * The specified context does not support buffer and renderbuffer objects. - * The specified context is not compatible with the OpenCL context being - created (for example, it exists in a physically distinct address space, - such as another hardware device; or it does not support sharing data - with OpenCL due to implementation restrictions). - -_errcode_ret_ returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a share -group was specified for a CGL-based OpenGL implementation by setting the -attribute CL_CGL_SHAREGROUP_KHR, and the specified share group does not -identify a valid CGL share group object. - -_errcode_ret_ returns CL_INVALID_OPERATION if a context was specified as -described above and any of the following conditions hold: - - * A context or share group object was specified for one of CGL, EGL, GLX, - or WGL and the OpenGL implementation does not support that window-system - binding API. - * More than one of the attributes CL_CGL_SHAREGROUP_KHR, - CL_EGL_DISPLAY_KHR, CL_GLX_DISPLAY_KHR, and CL_WGL_HDC_KHR is set to a - non-default value. - * Both of the attributes CL_CGL_SHAREGROUP_KHR and CL_GL_CONTEXT_KHR are - set to non-default values. - * Any of the devices specified in the _devices_ argument cannot support - OpenCL objects which share the data store of an OpenGL object. - -_errcode_ret_ returns CL_INVALID_PROPERTY if an attribute name other than -those specified in _table 4.5_ or if CL_CONTEXT_INTEROP_USER_SYNC is -specified in _properties_.`" - -Replace the description of _properties_ under *clCreateContextFromType* -with: - -"`_properties_ points to an attribute list whose format and valid contents -are identical to the *properties* argument of *clCreateContext*.`" - -Replace the first error in the list for *clCreateContextFromType* with the -same two new errors described above for *clCreateContext*. - -[[cl_khr_gl_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add a new section to describe the new API for querying OpenCL devices that -support sharing with OpenGL: - -"`OpenCL device(s) corresponding to an OpenGL context may be queried. -Such a device may not always exist (for example, if an OpenGL context is -specified on a GPU not supporting OpenCL command-queues, but which does -support shared CL/GL objects), and if it does exist, may change over time. -When such a device does exist, acquiring and releasing shared CL/GL objects -may be faster on a command-queue corresponding to this device than on -command-queues corresponding to other devices available to an OpenCL -context. - -To query the currently corresponding device, use the function -indexterm:[clGetGLContextInfoKHR] -[source,opencl] ----- -cl_int clGetGLContextInfoKHR(const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - -_properties_ points to an attribute list whose format and valid contents are -identical to the _properties_ argument of *clCreateContext*. -_properties_ must identify a single valid GL context or GL share group -object. - -_param_name_ is a constant that specifies the device types to query, and -must be one of the values shown in the table below. - -_param_value_ is a pointer to memory where the result of the query is -returned as described in the table below. -If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ specifies the size in bytes of memory pointed to by -_param_value_. -This size must be greater than or equal to the size of the return type -described in the table below. - -_param_value_size_ret_ returns the actual size in bytes of data being -queried by _param_value_. -If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_gl_sharing-clGetGLContextInfoKHR-table]] -._Supported Device Types for_ *clGetGLContextInfoKHR* -[cols="2,1,2",options="header",] -|==== -| *param_name* -| *Return Type* -| *Information returned in param_value* - -| CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR -| *cl_device_id* -| Return the OpenCL device currently associated with the specified OpenGL - context. - -| CL_DEVICES_FOR_GL_CONTEXT_KHR -| *cl_device_id[]* -| Return all OpenCL devices which may be associated with the specified - OpenGL context. -|==== - -*clGetGLContextInfoKHR* returns CL_SUCCESS if the function is executed -successfully. -If no device(s) exist corresponding to _param_name_, the call will not fail, -but the value of _param_value_size_ret_ will be zero. - -*clGetGLContextInfoKHR* returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a -context was specified by any of the following means: - - * A context was specified for an EGL-based OpenGL ES or OpenGL - implementation by setting the attributes CL_GL_CONTEXT_KHR and - CL_EGL_DISPLAY_KHR. - * A context was specified for a GLX-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_GLX_DISPLAY_KHR. - * A context was specified for a WGL-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_WGL_HDC_KHR. - -and any of the following conditions hold: - - * The specified display and context attributes do not identify a valid - OpenGL or OpenGL ES context. - * The specified context does not support buffer and renderbuffer objects. - * The specified context is not compatible with the OpenCL context being - created (for example, it exists in a physically distinct address space, - such as another hardware device; or it does not support sharing data - with OpenCL due to implementation restrictions). - -*clGetGLContextInfoKHR* returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a -share group was specified for a CGL-based OpenGL implementation by setting -the attribute CL_CGL_SHAREGROUP_KHR, and the specified share group does not -identify a valid CGL share group object. - -*clGetGLContextInfoKHR* returns CL_INVALID_OPERATION if a context was -specified as described above and any of the following conditions hold: - - * A context or share group object was specified for one of CGL, EGL, GLX, - or WGL and the OpenGL implementation does not support that window-system - binding API. - * More than one of the attributes CL_CGL_SHAREGROUP_KHR, - CL_EGL_DISPLAY_KHR, CL_GLX_DISPLAY_KHR, and CL_WGL_HDC_KHR is set to a - non-default value. - * Both of the attributes CL_CGL_SHAREGROUP_KHR and CL_GL_CONTEXT_KHR are - set to non-default values. - * Any of the devices specified in the argument cannot support - OpenCL objects which share the data store of an OpenGL object. - -*clGetGLContextInfoKHR* returns CL_INVALID_VALUE if an attribute name other -than those specified in _table 4.5_ is specified in _properties_. - -Additionally, *clGetGLContextInfoKHR* returns CL_INVALID_VALUE if -_param_name_ is not one of the values listed in the table -<>, or if the size in bytes -specified by _param_value_size_ is less than the size of the return type -shown in the table and _param_value_ is not a `NULL` value; -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by -the OpenCL implementation on the device; or CL_OUT_OF_HOST_MEMORY if there -is a failure to allocate resources required by the OpenCL implementation on -the host.`" - -[[cl_khr_gl_sharing-issues]] -=== Issues - - . How should the OpenGL context be identified when creating an associated - OpenCL context? -+ --- -RESOLVED: by using a (display,context handle) attribute pair to identify an -arbitrary OpenGL or OpenGL ES context with respect to one of the -window-system binding layers EGL, GLX, or WGL, or a share group handle to -identify a CGL share group. -If a context is specified, it need not be current to the thread calling -clCreateContext*. - -A previously suggested approach would use a single boolean attribute -CL_USE_GL_CONTEXT_KHR to allow creating a context associated with the -currently bound OpenGL context. -This may still be implemented as a separate extension, and might allow more -efficient acquire/release behavior in the special case where they are being -executed in the same thread as the bound GL context used to create the CL -context. --- - - . What should the format of an attribute list be? -+ --- -After considerable discussion, we think we can live with a list of - pairs terminated by zero. -The list is passed as 'cl_context_properties *_properties'_, where -cl_context_properties is typedefed to be 'intptr_t' in cl.h. - -This effectively allows encoding all scalar integer, pointer, and handle -values in the host API into the argument list and is analogous to the -structure and type of EGL attribute lists. -`NULL` attribute lists are also allowed. -Again as for EGL, any attributes not explicitly passed in the list will take -on a defined default value that does something reasonable. - -Experience with EGL, GLX, and WGL has shown attribute lists to be a -sufficiently flexible and general mechanism to serve the needs of management -calls such as context creation. -It is not completely general (encoding floating-point and non-scalar -attribute values is not straightforward), and other approaches were -suggested such as opaque attribute lists with getter/setter methods, or -arrays of varadic structures. --- - - . What's the behavior of an associated OpenGL or OpenCL context when using - resources defined by the other associated context, and that context is - destroyed? -+ --- -RESOLVED: OpenCL objects place a reference on the data store underlying the -corresponding GL object when they're created. -The GL name corresponding to that data store may be deleted, but the data -store itself remains so long as any CL object has a reference to it. -However, destroying all GL contexts in the share group corresponding to a CL -context results in implementation-dependent behavior when using a -corresponding CL object, up to and including program termination. --- - - . How about sharing with D3D? -+ --- -Sharing between D3D and OpenCL should use the same attribute list mechanism, -though obviously with different parameters, and be exposed as a similar -parallel OpenCL extension. -There may be an interaction between that extension and this one since it's -not yet clear if it will be possible to create a CL context simultaneously -sharing GL and D3D objects. --- - - . Under what conditions will context creation fail due to sharing? -+ --- -RESOLVED: Several cross-platform failure conditions are described (GL -context or CGL share group doesn't exist, GL context doesn't support types -of GL objects, GL context implementation doesn't allow sharing), but -additional failures may result due to implementation-dependent reasons and -should be added to this extension as such failures are discovered. -Sharing between OpenCL and OpenGL requires integration at the driver -internals level. --- - - . What command-queues can *clEnqueueAcquire/ReleaseGLObjects* be placed - on? -+ --- -RESOLVED: All command-queues. -This restriction is enforced at context creation time. -If any device passed to context creation cannot support shared CL/GL -objects, context creation will fail with a CL_INVALID_OPERATION error. --- - - . How can applications determine which command-queue to place an - Acquire/Release on? -+ --- -RESOLVED: The *clGetGLContextInfoKHR* returns either the CL device currently -corresponding to a specified GL context (typically the display it's running -on), or a list of all the CL devices the specified context might run on -(potentially useful in multiheaded / "`virtual screen`" environments). -This command is not simply placed in <> because it relies on the same -property-list method of specifying a GL context introduced by this -extension. - -If no devices are returned, it means that the GL context exists on an older -GPU not capable of running OpenCL, but still capable of sharing objects -between GL running on that GPU and CL running elsewhere. --- - - . What is the meaning of the CL_DEVICES_FOR_GL_CONTEXT_KHR query? -+ --- -RESOLVED: The list of all CL devices that may ever be associated with a -specific GL context. -On platforms such as MacOS X, the "`virtual screen`" concept allows multiple -GPUs to back a single virtual display. -Similar functionality might be implemented on other windowing systems, such -as a transparent heterogenous multiheaded X server. -Therefore the exact meaning of this query is interpreted relative to the -binding layer API in use. --- diff --git a/ext/cl_khr_gl_sharing__memobjs.asciidoc b/ext/cl_khr_gl_sharing__memobjs.asciidoc deleted file mode 100644 index 2de4b2927..000000000 --- a/ext/cl_khr_gl_sharing__memobjs.asciidoc +++ /dev/null @@ -1,778 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_sharing__memobjs]] -== Creating OpenCL Memory Objects from OpenGL Objects - -This section describes functionality in the *cl_khr_gl_sharing* extension -to use OpenGL buffer, texture, and renderbuffer objects as OpenCL memory objects. -OpenCL memory objects may be created from OpenGL objects if and only if the -OpenCL context is associated with an OpenGL context or share group object. -The section <> -describes how to create an OpenCL context associated with an OpenGL context or share group object. - -An OpenCL image object may be created from an OpenGL texture or renderbuffer object. -An OpenCL buffer object may be created from an OpenGL buffer object. - -Any supported OpenGL object defined within the associated OpenGL context -or share group object may be shared, with the exception of the default -OpenGL objects (i.e. objects named zero), which may not be shared. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_sharing__memobjs-lifetime-of-shared-objects]] -=== Lifetime of Shared Objects - -An OpenCL memory object created from an OpenGL object (hereinafter referred -to as a "`shared CL/GL object`") remains valid as long as the corresponding -GL object has not been deleted. -If the GL object is deleted through the GL API (e.g. *glDeleteBuffers*, -*glDeleteTextures,* or *glDeleteRenderbuffers*), subsequent use of the CL -buffer or image object will result in undefined behavior, including but not -limited to possible CL errors and data corruption, but may not result in -program termination. - -The CL context and corresponding command-queues are dependent on the -existence of the GL share group object, or the share group associated with -the GL context from which the CL context is created. -If the GL share group object or all GL contexts in the share group are -destroyed, any use of the CL context or command-queue(s) will result in -undefined behavior, which may include program termination. -Applications should destroy the CL command-queue(s) and CL context before -destroying the corresponding GL share group or contexts - -[[cl_khr_gl_sharing__memobjs-cl-buffer-objects-from-gl-buffer-objects]] -=== OpenCL Buffer Objects from OpenGL Buffer Objects - -The function -indexterm:[clCreateFromGLBuffer] -[source,opencl] ----- -cl_mem clCreateFromGLBuffer(cl_context context, - cl_mem_flags flags, - GLuint bufobj, - cl_int *errcode_ret) ----- - -creates an OpenCL buffer object from an OpenGL buffer object. - -_context_ is a valid OpenCL context created from an OpenGL context. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values -specified in _table 5.3_ can be used. - -_bufobj_ is the name of a GL buffer object. -The data store of the GL buffer object must have have been previously -created by calling *glBufferData*, although its contents need not be -initialized. -The size of the data store will be used to determine the size of the CL -buffer object. - -_errcode_ret_ will return an appropriate error code as described below. -If _errcode_ret_ is `NULL`, no error code is returned. - -*clCreateFromGLBuffer* returns a valid non-zero OpenCL buffer object and -_errcode_ret_ is set to CL_SUCCESS if the buffer object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context or was not - created from a GL context. - * CL_INVALID_VALUE if values specified in _flags_ are not valid. - * CL_INVALID_GL_OBJECT if _bufobj_ is not a GL buffer object or is a GL - buffer object but does not have an existing data store or the size of - the buffer is 0. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The size of the GL buffer object data store at the time -*clCreateFromGLBuffer* is called will be used as the size of buffer object -returned by *clCreateFromGLBuffer*. -If the state of a GL buffer object is modified through the GL API (e.g. -*glBufferData*) while there exists a corresponding CL buffer object, -subsequent use of the CL buffer object will result in undefined behavior. - -The *clRetainMemObject* and *clReleaseMemObject* functions can be used to -retain and release the buffer object. - -The CL buffer object created using clCreateFromGLBuffer can also be used to -create a CL 1D image buffer object. - -[[cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-textures]] -=== OpenCL Image Objects from OpenGL Textures - -The function -indexterm:[clCreateFromGLTexture] -[source,opencl] ----- -cl_mem clCreateFromGLTexture(cl_context context, - cl_mem_flags flags, - GLenum texture_target, - GLint miplevel, - GLuint texture, - cl_int *errcode_ret) ----- - -creates the following: - - * an OpenCL 2D image object from an OpenGL 2D texture object or a single - face of an OpenGL cubemap texture object, - * an OpenCL 2D image array object from an OpenGL 2D texture array object, - * an OpenCL 1D image object from an OpenGL 1D texture object, - * an OpenCL 1D image buffer object from an OpenGL texture buffer object, - * an OpenCL 1D image array object from an OpenGL 1D texture array object, - * an OpenCL 3D image object from an OpenGL 3D texture object. - -_context_ is a valid OpenCL context created from an OpenGL context. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values -specified in _table 5.3_ may be used. - -_texture_target_ must be one of GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, -GL_TEXTURE_BUFFER, GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, -GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, -GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, -GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, or -GL_TEXTURE_RECTANGLE (Note: GL_TEXTURE_RECTANGLE requires OpenGL 3.1. -Alternatively, GL_TEXTURE_RECTANGLE_ARB may be specified if the OpenGL -extension *GL_ARB_texture_rectangle* is supported.). -_texture_target_ is used only to define the image type of _texture_. -No reference to a bound GL texture object is made or implied by this -parameter. - -_miplevel_ is the mipmap level to be used. -If _texture_target_ is GL_TEXTURE_BUFFER, _miplevel_ must be 0. -Note: Implementations may return CL_INVALID_OPERATION for miplevel -values > 0. - -_texture_ is the name of a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, -rectangle or buffer texture object. -The texture object must be a complete texture as per OpenGL rules on texture -completeness. -The _texture_ format and dimensions defined by OpenGL for the specified -_miplevel_ of the texture will be used to create the OpenCL image memory -object. -Only GL texture objects with an internal format that maps to appropriate -image channel order and data type specified in _tables 5.5_ and _5.6_ may be -used to create the OpenCL image memory object. - -_errcode_ret_ will return an appropriate error code as described below. -If _errcode_ret_ is `NULL`, no error code is returned. - -*clCreateFromGLTexture* returns a valid non-zero OpenCL image object and -_errcode_ret_ is set to CL_SUCCESS if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context or was not - created from a GL context. - * CL_INVALID_VALUE if values specified in _flags_ are not valid or if - value specified in _texture_target_ is not one of the values specified - in the description of _texture_target_. - * CL_INVALID_MIP_LEVEL if _miplevel_ is less than the value of - _level~base~_ (for OpenGL implementations) or zero (for OpenGL ES - implementations); or greater than the value of _q_ (for both OpenGL and - OpenGL ES). - _level~base~_ and _q_ are defined for the texture in _section 3.8.10_ - (Texture Completeness) of the OpenGL 2.1 specification and _section - 3.7.10_ of the OpenGL ES 2.0. - * CL_INVALID_MIP_LEVEL if _miplevel_ is greather than zero and the OpenGL - implementation does not support creating from non-zero mipmap levels. - * CL_INVALID_GL_OBJECT if _texture_ is not a GL texture object whose type - matches _texture_target_, if the specified _miplevel_ of _texture_ is - not defined, or if the width or height of the specified _miplevel_ is - zero or if the GL texture object is incomplete. - * CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture internal format - does not map to a supported OpenCL image format. - * CL_INVALID_OPERATION if _texture_ is a GL texture object created with a - border width value greater than zero. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -If the state of a GL texture object is modified through the GL API (e.g. -*glTexImage2D*, *glTexImage3D* or the values of the texture parameters -GL_TEXTURE_BASE_LEVEL or GL_TEXTURE_MAX_LEVEL are modified) while there -exists a corresponding CL image object, subsequent use of the CL image -object will result in undefined behavior. - -The *clRetainMemObject* and *clReleaseMemObject* functions can be used to -retain and release the image objects. - -[[cl_khr_gl_sharing__memobjs-list-of-opengl-and-corresponding-opencl-image-formats]] -==== List of OpenGL and corresponding OpenCL Image Formats - -The table below describes the list of OpenGL texture internal formats and -the corresponding OpenCL image formats. -If a OpenGL texture object with an internal format from the table below is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding OpenCL image format(s) in that table. -Texture objects created with other OpenGL internal formats may (but are not -guaranteed to) have a mapping to an OpenCL image format; if such mappings -exist, they are guaranteed to preserve all color components, data types, and -at least the number of bits/component actually allocated by OpenGL for that -format. - -[[cl_khr_gl_sharing__memobjs-mapping-of-image-formats]] -._OpenGL internal formats and corresponding OpenCL internal formats_ -[cols=",",options="header",] -|==== -| *GL internal format* -| *CL image format* - - *(channel order, channel data type)* - -| GL_RGBA8 -| CL_RGBA, CL_UNORM_INT8 or - -CL_BGRA, CL_UNORM_INT8 - -| GL_SRGB8_ALPHA8 -| CL_sRGBA, CL_UNORM_INT8 - -| GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV -| CL_RGBA, CL_UNORM_INT8 - -| GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV -| CL_BGRA, CL_UNORM_INT8 - -| -| - -| GL_RGBA8I, GL_RGBA8I_EXT -| CL_RGBA, CL_SIGNED_INT8 - -| GL_RGBA16I, GL_RGBA16I_EXT -| CL_RGBA, CL_SIGNED_INT16 - -| GL_RGBA32I, GL_RGBA32I_EXT -| CL_RGBA, CL_SIGNED_INT32 - -| -| - -| GL_RGBA8UI, GL_RGBA8UI_EXT -| CL_RGBA, CL_UNSIGNED_INT8 - -| GL_RGBA16UI, GL_RGBA16UI_EXT -| CL_RGBA, CL_UNSIGNED_INT16 - -| GL_RGBA32UI, GL_RGBA32UI_EXT -| CL_RGBA, CL_UNSIGNED_INT32 - -| -| - -| GL_RGBA8_SNORM -| CL_RGBA, CL_SNORM_INT8 - -| GL_RGBA16 -| CL_RGBA, CL_UNORM_INT16 - -| GL_RGBA16_SNORM -| CL_RGBA, CL_SNORM_INT16 - -| GL_RGBA16F, GL_RGBA16F_ARB -| CL_RGBA, CL_HALF_FLOAT - -| GL_RGBA32F, GL_RGBA32F_ARB -| CL_RGBA, CL_FLOAT - -| -| - -| GL_R8 -| CL_R, CL_UNORM_INT8 - -| GL_R8_SNORM -| CL_R, CL_SNORM_INT8 - -| GL_R16 -| CL_R, CL_UNORM_INT16 - -| GL_R16_SNORM -| CL_R, CL_SNORM_INT16 - -| GL_R16F -| CL_R, CL_HALF_FLOAT - -| GL_R32F -| CL_R, CL_FLOAT - -| -| - -| GL_R8I -| CL_R, CL_SIGNED_INT8 - -| GL_R16I -| CL_R, CL_SIGNED_INT16 - -| GL_R32I -| CL_R, CL_SIGNED_INT32 - -| GL_R8UI -| CL_R, CL_UNSIGNED_INT8 - -| GL_R16UI -| CL_R, CL_UNSIGNED_INT16 - -| GL_R32UI -| CL_R, CL_UNSIGNED_INT32 - -| -| - -| GL_RG8 -| CL_RG, CL_UNORM_INT8 - -| GL_RG8_SNORM -| CL_RG, CL_SNORM_INT8 - -| GL_RG16 -| CL_RG, CL_UNORM_INT16 - -| GL_RG16_SNORM -| CL_RG, CL_SNORM_INT16 - -| GL_RG16F -| CL_RG, CL_HALF_FLOAT - -| GL_RG32F -| CL_RG, CL_FLOAT - -| -| - -| GL_RG8I -| CL_RG, CL_SIGNED_INT8 - -| GL_RG16I -| CL_RG, CL_SIGNED_INT16 - -| GL_RG32I -| CL_RG, CL_SIGNED_INT32 - -| GL_RG8UI -| CL_RG, CL_UNSIGNED_INT8 - -| GL_RG16UI -| CL_RG, CL_UNSIGNED_INT16 - -| GL_RG32UI -| CL_RG, CL_UNSIGNED_INT32 -|==== - -[[cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-renderbuffers]] -=== OpenCL Image Objects from OpenGL Renderbuffers - -The function -indexterm:[clCreateFromGLRenderbuffer] -[source,opencl] ----- -cl_mem clCreateFromGLRenderbuffer(cl_context context, - cl_mem_flags flags, - GLuint renderbuffer, - cl_int *errcode_ret) ----- - -creates an OpenCL 2D image object from an OpenGL renderbuffer object. - -_context_ is a valid OpenCL context created from an OpenGL context. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values -specified in _table 5.3_ can be used. - -_renderbuffer_ is the name of a GL renderbuffer object. -The renderbuffer storage must be specified before the image object can be -created. -The _renderbuffer_ format and dimensions defined by OpenGL will be used to -create the 2D image object. -Only GL renderbuffers with internal formats that maps to appropriate image -channel order and data type specified in _tables 5.5_ and _5.6_ can be used -to create the 2D image object. - -_errcode_ret_ will return an appropriate error code as described below. -If _errcode_ret_ is `NULL`, no error code is returned. - -*clCreateFromGLRenderbuffer* returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to CL_SUCCESS if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context or was not - created from a GL context. - * CL_INVALID_VALUE if values specified in _flags_ are not valid. - * CL_INVALID_GL_OBJECT if _renderbuffer_ is not a GL renderbuffer object - or if the width or height of _renderbuffer_ is zero. - * CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer internal - format does not map to a supported OpenCL image format. - * CL_INVALID_OPERATION if _renderbuffer_ is a multi-sample GL renderbuffer - object. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -If the state of a GL renderbuffer object is modified through the GL API -(i.e. changes to the dimensions or format used to represent pixels of the GL -renderbuffer using appropriate GL API calls such as *glRenderbufferStorage*) -while there exists a corresponding CL image object, subsequent use of the CL -image object will result in undefined behavior. - -The *clRetainMemObject* and *clReleaseMemObject* functions can be used to -retain and release the image objects. - -The table <> describes the -list of OpenGL renderbuffer internal formats and the corresponding OpenCL -image formats. -If an OpenGL renderbuffer object with an internal format from the table is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding OpenCL image format(s) in that table. -Renderbuffer objects created with other OpenGL internal formats may (but are -not guaranteed to) have a mapping to an OpenCL image format; if such -mappings exist, they are guaranteed to preserve all color components, data -types, and at least the number of bits/component actually allocated by -OpenGL for that format. - -[[cl_khr_gl_sharing__memobjs-querying-gl-object-information-from-a-cl-memory-object]] -=== Querying OpenGL object information from an OpenCL memory object - -The OpenGL object used to create the OpenCL memory object and information -about the object type i.e. whether it is a texture, renderbuffer or buffer -object can be queried using the following function. -indexterm:[clGetGLObjectInfo] -[source,opencl] ----- -cl_int clGetGLObjectInfo(cl_mem memobj, - cl_gl_object_type *gl_object_type, - GLuint *gl_object_name) ----- - -_gl_object_type_ returns the type of GL object attached to _memobj_ and can -be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D, CL_GL_OBJECT_TEXTURE3D, -CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_GL_OBJECT_TEXTURE1D, -CL_GL_OBJECT_TEXTURE1D_ARRAY, CL_GL_OBJECT_TEXTURE_BUFFER, or -CL_GL_OBJECT_RENDERBUFFER. -If _gl_object_type_ is `NULL`, it is ignored - -_gl_object_name_ returns the GL object name used to create _memobj_. -If _gl_object_name_ is `NULL`, it is ignored. - -*clGetGLObjectInfo* returns CL_SUCCESS if the call was executed -successfully. -Otherwise, it returns one of the following errors: - - * CL_INVALID_MEM_OBJECT if _memobj_ is not a valid OpenCL memory object. - * CL_INVALID_GL_OBJECT if there is no GL object associated with _memobj_. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function -indexterm:[clGetGLTextureInfo] -[source,opencl] ----- -cl_int clGetGLTextureInfo(cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - -returns additional information about the GL texture object associated with -_memobj_. - -_param_name_ specifies what additional information about the GL texture -object associated with _memobj_ to query. -The list of supported _param_name_ types and the information returned in -_param_value_ by *clGetGLTextureInfo* is described in the table below. - -_param_value_ is a pointer to memory where the result being queried is -returned. -If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ is used to specify the size in bytes of memory pointed to -by _param_value_. -This size must be >= size of return type as described in the table below. - -_param_value_size_ret_ returns the actual size in bytes of data copied to -_param_value_. -If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_gl_sharing__memobjs-clGetGLTextureInfo-queries]] -._OpenGL texture info that may be queried with_ *clGetGLTextureInfo* -[cols=",,",options="header",] -|==== -| *cl_gl_texture_info* -| *Return Type* -| *Info. returned in _param_value_* - -| *CL_GL_TEXTURE_TARGET* -| GLenum -| The _texture_target_ argument specified in *clCreateFromGLTexture*. - -| *CL_GL_MIPMAP_LEVEL* -| GLint -| The _miplevel_ argument specified in *clCreateFromGLTexture*. -|==== - -*clGetGLTextureInfo* returns CL_SUCCESS if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * CL_INVALID_MEM_OBJECT if _memobj_ is not a valid OpenCL memory object. - * CL_INVALID_GL_OBJECT if there is no GL texture object associated with - _memobj_. - * CL_INVALID_VALUE if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is less than the size of the return type - as described in the table above and _param_value_ is not `NULL`, or if - _param_value_ and _param_value_size_ret_ are `NULL`. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_gl_sharing__memobjs-sharing-memory-objects-that-map-to-gl-objects-between-gl-and-cl-contexts]] -=== Sharing memory objects that map to GL objects between GL and CL contexts - -The function -indexterm:[clEnqueueAcquireGLObjects] -[source,opencl] ----- -cl_int clEnqueueAcquireGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to acquire OpenCL memory objects that have been created from OpenGL -objects. -These objects need to be acquired before they can be used by any OpenCL -commands queued to a command-queue or the behaviour is undefined. -The OpenGL objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -_command_queue_ is a valid command-queue. -All devices used to create the OpenCL context associated with -_command_queue_ must support acquiring shared CL/GL objects. -This constraint is enforced at context creation time. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of CL memory objects that correspond to -GL objects. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in - -_event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command -and can be used to query wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueAcquireGLObjects* returns CL_SUCCESS if the function is executed -successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns CL_SUCCESS. -Otherwise, it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_CONTEXT if context associated with _command_queue_ was not - created from an OpenGL context - * CL_INVALID_GL_OBJECT if memory objects in _mem_objects_ have not been - created from a GL object(s). - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function -indexterm:[clEnqueueReleaseGLObjects] -[source,opencl] ----- -cl_int clEnqueueReleaseGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to release OpenCL memory objects that have been created from OpenGL -objects. -These objects need to be released before they can be used by OpenGL. -The OpenGL objects are released by the OpenCL context associated with -_command_queue_. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of CL memory objects that correspond to -GL objects. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command -and can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueReleaseGLObjects* returns CL_SUCCESS if the function is executed -successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns CL_SUCCESS. -Otherwise, it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_CONTEXT if context associated with _command_queue_ was not - created from an OpenGL context - * CL_INVALID_GL_OBJECT if memory objects in _mem_objects_ have not been - created from a GL object(s). - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_gl_sharing__memobjs-synchronizing-opencl-and-opengl-access-to-shared-objects]] -==== Synchronizing OpenCL and OpenGL Access to Shared Objects - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/GL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling *clEnqueueAcquireGLObjects*, the application must ensure -that any pending GL operations which access the objects specified in -_mem_objects_ have completed. -This may be accomplished portably by issuing and waiting for completion of a -*glFinish* command on all GL contexts with pending references to these -objects. -Implementations may offer more efficient synchronization methods; for -example on some platforms calling *glFlush* may be sufficient, or -synchronization may be implicit within a thread, or there may be -vendor-specific extensions that enable placing a fence in the GL command -stream and waiting for completion of that fence in the CL command-queue. -Note that no synchronization methods other than *glFinish* are portable -between OpenGL implementations at this time. - -Similarly, after calling *clEnqueueReleaseGLObjects*, the application is -responsible for ensuring that any pending OpenCL operations which access the -objects specified in _mem_objects_ have completed prior to executing -subsequent GL commands which reference these objects. -This may be accomplished portably by calling *clWaitForEvents* with the -event object returned by *clEnqueueReleaseGLObjects,* or by calling -*clFinish*. -As above, some implementations may offer more efficient methods. - -The application is responsible for maintaining the proper order of -operations if the CL and GL contexts are in separate threads. - -If a GL context is bound to a thread other than the one in which -*clEnqueueReleaseGLObjects* is called, changes to any of the objects in -_mem_objects_ may not be visible to that context without additional steps -being taken by the application. -For an OpenGL 3.1 (or later) context, the requirements are described in -Appendix D ("`Shared Objects and Multiple Contexts`") of the OpenGL 3.1 -Specification. -For prior versions of OpenGL, the requirements are implementation-dependent. - -Attempting to access the data store of an OpenGL object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. -Similarly, attempting to access a shared CL/GL object from OpenCL before it -has been acquired by the OpenCL command-queue, or after it has been -released, will result in undefined behavior. - -[[cl_khr_gl_sharing__memobjs-event-command-types]] -==== Event Command Types for Sharing memory objects that map to GL objects - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -OpenGL objects: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireGLObjects} -| {CL_COMMAND_ACQUIRE_GL_OBJECTS_anchor} - -include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_GL_OBJECTS.asciidoc[] - -| {clEnqueueReleaseGLObjects} -| {CL_COMMAND_RELEASE_GL_OBJECTS_anchor} - -include::{generated}/api/version-notes/CL_COMMAND_RELEASE_GL_OBJECTS.asciidoc[] - -|==== diff --git a/ext/cl_khr_il_program.asciidoc b/ext/cl_khr_il_program.asciidoc deleted file mode 100644 index 05201a14f..000000000 --- a/ext/cl_khr_il_program.asciidoc +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_il_program]] -== Intermediate Language Programs - -This section describes the *cl_khr_il_program* extension. - -This extension adds the ability to create programs with intermediate language (IL), -usually SPIR-V. Further information about the format and contents of SPIR-V may be -found in the SPIR-V Specification. Information about how SPIR-V modules behave in -the OpenCL environment may be found in the OpenCL SPIR-V Environment Specification. - -This functionality described by this extension is a core feature in OpenCL 2.1. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_il_program-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_program clCreateProgramWithILKHR(cl_context context, - const void *il, - size_t length, - cl_int *errcode_ret); ----- - -[[cl_khr_il_program-new-tokens]] -=== New Tokens - -Accepted as a new _param_name_ argument to {clGetDeviceInfo}: - ----- -CL_DEVICE_IL_VERSION_KHR ----- - -Accepted as a new _param_name_ argument to {clGetProgramInfo}: - ----- -CL_PROGRAM_IL_KHR ----- - -[[cl_khr_il_program-additions-to-chapter-3]] -=== Additions to Chapter 3 of the OpenCL 2.0 Specification - -In section 3.1, replace the fourth paragraph with: - -"Programmers provide programs in the form of intermediate language binaries (usually SPIR-V), OpenCL C source strings, or implementation-defined binary objects. The OpenCL platform provides a compiler to translate programs represented as intermediate language binaries or OpenCL C source strings into device program executables. The compiler may be _online_ or _offline_. An _online compiler_ is available during host program execution using standard APIs. An _offline compiler_ is invoked outside of host program control, using platform-specific methods. The OpenCL runtime allows developers to get a previously compiled device program executable and to load and execute a previously compiled device program executable." - -[[cl_khr_il_program-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.0 Specification - -Add to Table 4.3 - OpenCL Device Queries: - -[caption="Table 4.3 "] -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="2,1,3",options="header"] -|==== -| Device Info | Return Type | Description - -|{CL_DEVICE_IL_VERSION_KHR} -|char[] -|The intermediate languages that are be supported by {clCreateProgramWithILKHR} for this device. + -{blank} -Returns a space separated list of IL version strings of the form: + -{blank} -+_.+ + -{blank} -A device that supports the *cl_khr_il_program* extension must support the “SPIR-V” IL prefix. - -|==== - -[[cl_khr_il_program-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.0 Specification - -Add to Section 5.8.1: Creating Program Objects: - -"The function - -include::{generated}/api/protos/clCreateProgramWithILKHR.txt[] - -creates a new program object for _context_ using the _length_ bytes of intermediate language pointed to by _il_. - -_context_ must be a valid OpenCL context. - -_il_ is a pointer to a _length_-byte block of memory containing intermediate langage. - -_length_ is the length of the block of memory pointed to by _il_. - -_errcode_ret_ will return an appropriate error code. If _errcode_ret_ is NULL, no error code is returned. - -{clCreateProgramWithILKHR} returns a valid non-zero program object and _errcode_ret_ is set to {CL_SUCCESS} if the program object is created successfully. Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - -* {CL_INVALID_CONTEXT} if _context_ is not a valid context -* {CL_INVALID_VALUE} if _il_ is NULL or if _length_ is zero. -* {CL_INVALID_VALUE} if the _length_-byte block of memory pointed to by _il_ does not contain well-formed intermediate language. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host." - -Add to Section 5.8.2: Building Program Executables: - -Add the following to the description of the _options_ parameter to {clBuildProgram}: - -"Certain options are ignored when _program_ is created with IL." - -Additionally, replace the error: - -* {CL_INVALID_OPERATION} if _program_ was not created with {clCreateProgramWithSource} or {clCreateProgramWithBinary}. - -with: - -* {CL_INVALID_OPERATION} if _program_ was not created with {clCreateProgramWithSource}, {clCreateProgramWithILKHR} or {clCreateProgramWithBinary}. - -Add to Section 5.8.3: Separate Compilation and Linking of Programs: - -Add the following to the description of the _options_ parameter to {clCompileProgram}: - -"Certain options are ignored when _program_ is created with IL." - -Additionally, replace the error: - -* {CL_INVALID_OPERATION} if _program_ has no source i.e. it has not been created with {clCreateProgramWithSource}. - -with: - -* {CL_INVALID_OPERATION} if _program_ was not created with {clCreateProgramWithSource} or {clCreateProgramWithILKHR}. - -Add to Section 5.8.4.1: Preprocessor Options, + -Add to Section 5.8.4.2: Math Intrinsic Options (for -cl-single-precision-constant-only), + -Add to Section 5.8.4.3: Optimization Options, + -Add to Section 5.8.4.4: Options to Request or Suppress Warnings, and + -Add to Section 5.8.4.5: Options Controlling the OpenCL C Version: - -"These options are ignored for programs created with IL." - -Change one entry and add one new entry to Table 5.17 {clGetProgramInfo} parameter queries: - -[caption="Table 5.17 "] -.List of supported param_names by {clGetProgramInfo} -[width="100%",cols="2,1,3",options="header"] -|==== -| Program Info | Return Type | Description - -|{CL_PROGRAM_SOURCE} -|{char_TYPE}[] -|Return the program source code specified by {clCreateProgramWithSource}. The source string returned is a concatenation of all source strings -specified to {clCreateProgramWithSource} with a null terminator. The concatenation strips any nulls in the original source strings. + -{blank} -If program is created using {clCreateProgramWithBinary}, {clCreateProgramWithBuiltInKernels}, or {clCreateProgramWithILKHR} a null string or the appropriate program source code is returned depending on whether or not the program source code is stored in the binary. + -{blank} -The actual number of characters that represents the program source code including the null terminator is returned in _param_value_size_ret_. - -|{CL_PROGRAM_IL_KHR} -|{unsigned_char_TYPE}[] -|Returns the program IL for programs created with {clCreateProgramWithILKHR}. + -{blank} -If program is created with {clCreateProgramWithSource}, {clCreateProgramWithBinary}, or {clCreateProgramWithBuiltInKernels}, the memory pointed to by _param_value_ will be unchanged and _param_value_size_ret_ will be set to zero. - -|==== diff --git a/ext/cl_khr_image2d_from_buffer.asciidoc b/ext/cl_khr_image2d_from_buffer.asciidoc deleted file mode 100644 index ce4e7f21f..000000000 --- a/ext/cl_khr_image2d_from_buffer.asciidoc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_image2d_from_buffer]] -== Creating a 2D Image From A Buffer - -This section describes the *cl_khr_image2d_from_buffer* extension. - -This extension allows a 2D image to be created from an existing OpenCL buffer memory object. - -This extension became a core feature in OpenCL 2.0. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Additions to Chapter 4 of the OpenCL 1.2 Specification - -The following table entry describes the additions to _table 4.3,_ which allows applications to query the configuration information using {clGetDeviceInfo} for an OpenCL device that supports creating a 2D image from a buffer. - -[cols="2,1,2",options="header",] -|======================================================================= -|Device Info -|Return Type -|Description - -|{CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} -|{cl_uint_TYPE} -|The row pitch alignment size in pixels for images created from a buffer. The value returned must be a power of 2. + -{blank} -If the device does not support images, this value should be 0. - -|{CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} -|{cl_uint_TYPE} -|This query should be used when an image is created from a buffer which was created using {CL_MEM_USE_HOST_PTR}. The value returned must be a power of 2. + -{blank} -This query specifies the minimum alignment in pixels of the _host_ptr_ specified to {clCreateBuffer}. + -{blank} -If the device does not support images, this value should be 0. - -|======================================================================= - -=== Additions to Chapter 5 of the OpenCL 1.2 Specification - -Add to Section 5.3.1: Creating Image Objects: - -A 2D image can be created from a buffer by specifying a _buffer_ object in the _image_desc_ passed to {clCreateImage} for an _image_type_ equal to {CL_MEM_OBJECT_IMAGE2D}. When the 2D image from buffer is created, the client must specify the width, height and image format (i.e. channel order and channel data type). If these are not specified, {clCreateImage} returns a NULL value with _errcode_ret_ set to {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR}. The pitch can be optionally specified. If the pitch is not specified, the pitch is computed as width {times} bytes per pixel based on the image format. - -The pitch specified (or computed if pitch specified is 0) must be a multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} value for all devices in the context associated with the _buffer_ that support images. Otherwise, {clCreateImage} returns a NULL value with _errcode_ret_ set to {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR}. - -If the _buffer_ was created with {CL_MEM_USE_HOST_PTR}, the _host_ptr_ specified to {clCreateBuffer} must be aligned to the maximum of the {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} value for all devices in the context associated with the _buffer_ that support images. Otherwise, {clCreateImage} returns a NULL value with _errcode_ret_ set to {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR}. - -The minimum list of supported image formats described in _table 5.8_ of the OpenCL 1.2 specification must be supported for 2D images created from a buffer. - -The OpenCL runtime APIs that operate on images (i.e. {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueFillImage}, {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage} and {clEnqueueMapImage}) are supported for a 2D image created from a buffer. - -When the contents of a buffer object data store are modified, those changes are reflected in the contents of the 2D image object and vice-versa at corresponding synchronization points. The _image_height_ {times} _image_row_pitch_ specified in _image_desc_ must be less than or equal to the size of the buffer object data store. - -NOTE: Concurrent reading from, writing to, and copying between both a buffer object and the 2D image object associated with the buffer object is undefined. Only reading from both a buffer object and 2D image object associated with the buffer object is defined. A 2D image and a 2D image created from a buffer use the same image type in OpenCL C (`image2d_t`). The image built-ins functions described in _section 6.12.14.2_, _6.12.14.3_, _6.12.14.4_ and _6.12.14.5_ for `image2d_t` behave the same way for a 2D image and a 2D image from a buffer. diff --git a/ext/cl_khr_initialize_memory.asciidoc b/ext/cl_khr_initialize_memory.asciidoc deleted file mode 100644 index 29a078bf5..000000000 --- a/ext/cl_khr_initialize_memory.asciidoc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_initialize_memory]] -== Local and Private Memory Initialization - -Memory is allocated in various forms in OpenCL both explicitly (global -memory) or implicitly (local, private memory). -This allocation so far does not provide a straightforward mechanism to -initialize the memory on allocation. -In other words what is lacking is the equivalent of calloc for the currently -supported malloc like capability. -This functionality is useful for a variety of reasons including ease of -debugging, application controlled limiting of visibility to previous -contents of memory and in some cases, optimization. - -This extension adds support for initializing local and private memory before -a kernel begins execution. -This extension name is *cl_khr_initialize_memory*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_initialize_memory-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -Add a new context property to _table 4.5_ in _section 4.4_. - -// Note: Some of these extension enums and types are currently missing, -// see https://github.com/KhronosGroup/OpenCL-Docs/issues/872 - -.List of supported context creation properties by {clCreateContext} -[cols="3,2,4",options="header",] -|==== -| Context Property -| Property value -| Description - -| {CL_CONTEXT_MEMORY_INITIALIZE_KHR} -| {cl_context_memory_initialize_khr_TYPE} -| Describes which memory types for the context must be initialized. - This is a bit-field, where the following values are currently supported: - - {CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR} -- Initialize local memory to - zeros. - - {CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR} -- Initialize private memory to - zeros. - -|==== - -[[cl_khr_initialize_memory-additions-to-chapter-6]] -=== Additions to Chapter 6 of the OpenCL 2.2 Specification - -Updates to _section 6.9_ -- Restrictions - -If the context is created with {CL_CONTEXT_MEMORY_INITIALIZE_KHR}, appropriate -memory locations as specified by the bit-field is initialized with zeroes, -prior to the start of execution of any kernel. -The driver chooses when, prior to kernel execution, the initialization of -local and/or private memory is performed. -The only requirement is there should be no values set from outside the -context, which can be read during a kernel execution. diff --git a/ext/cl_khr_int32_atomics.asciidoc b/ext/cl_khr_int32_atomics.asciidoc deleted file mode 100644 index f6b79ae81..000000000 --- a/ext/cl_khr_int32_atomics.asciidoc +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_int32_atomics]] -== 32-bit Atomics - -This section describes the extensions *cl_khr_global_int32_base_atomics*, *cl_khr_global_int32_extended_atomics*, *cl_khr_local_int32_base_atomics*, and *cl_khr_local_int32_extended_atomics*. -These extensions allow atomic operations to be performed on 32-bit signed and unsigned integers in global and local memory. - -These extensions became core features in OpenCL 1.1, except the built-in atomic function names are changed to use the **atomic_** prefix instead of **atom_** and the volatile qualifier was added to the pointer parameter _p_. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Global Atomics for 32-bit Integers - -==== Base Atomics - -._Built-in Atomic Functions for_ *cl_khr_global_int32_base_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_add** (volatile {global} int *_p_, int _val_) + -uint **atom_add** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_sub** (volatile {global} int *_p_, int _val_) + -uint **atom_sub** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xchg** (volatile {global} int *_p_, int _val_) + -uint **atom_xchg** (volatile {global} uint *_p_, uint _val_) - -|Swaps the _old_ value stored at location _p_ with new value given by -_val_. Returns _old_ value. - -| -int **atom_inc** (volatile {global} int *_p_) + -uint **atom_inc** (volatile {global} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_dec** (volatile {global} int *_p_) + -uint **atom_dec** (volatile {global} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_cmpxchg** (volatile {global} int *_p_, int _cmp_, int _val_) + -uint **atom_cmpxchg** (volatile {global} uint *_p_, uint _cmp_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store -result at location pointed by _p_. The function returns _old_. - -|======================================================================= - -==== Extended Atomics - -._Built-in Atomic Functions for_ *cl_khr_global_int32_extended_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_min** (volatile {global} int *_p_, int _val_) + -uint **atom_min** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *min*(_old_, _val_) and store minimum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_max** (volatile {global} int *_p_, int _val_) + -uint **atom_max** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *max*(_old_, _val_) and store maximum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_and** (volatile {global} int *_p_, int _val_) + -uint **atom_and** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ & val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_or** (volatile {global} int *_p_, int _val_) + -uint **atom_or** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ \| val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xor** (volatile {global} int *_p_, int _val_) + -uint **atom_xor** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ ^ val) and store result at location -pointed by _p_. The function returns _old_. - -|======================================================================= - -=== Local Atomics for 32-bit Integers - -==== Base Atomics - -._Built-in Atomic Functions for_ *cl_khr_local_int32_base_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_add** (volatile {local} int *_p_, int _val_) + -uint **atom_add** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_sub** (volatile {local} int *_p_, int _val_) + -uint **atom_sub** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xchg** (volatile {local} int *_p_, int _val_) + -uint **atom_xchg** (volatile {local} uint *_p_, uint _val_) - -|Swaps the _old_ value stored at location _p_ with new value given by -_val_. Returns _old_ value. - -| -int **atom_inc** (volatile {local} int *_p_) + -uint **atom_inc** (volatile {local} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_dec** (volatile {local} int *_p_) + -uint **atom_dec** (volatile {local} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_cmpxchg** (volatile {local} int *_p_, int _cmp_, int _val_) + -uint **atom_cmpxchg** (volatile {local} uint *_p_, uint _cmp_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store -result at location pointed by _p_. The function returns _old_. - -|======================================================================= - -==== Extended Atomics - -._Built-in Atomic Functions for_ *cl_khr_local_int32_extended_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_min** (volatile {local} int *_p_, int _val_) + -uint **atom_min** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *min*(_old_, _val_) and store minimum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_max** (volatile {local} int *_p_, int _val_) + -uint **atom_max** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *max*(_old_, _val_) and store maximum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_and** (volatile {local} int *_p_, int _val_) + -uint **atom_and** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ & val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_or** (volatile {local} int *_p_, int _val_) + -uint **atom_or** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ \| val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xor** (volatile {local} int *_p_, int _val_) + -uint **atom_xor** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ ^ val) and store result at location -pointed by _p_. The function returns _old_. - -|======================================================================= - diff --git a/ext/cl_khr_int64_atomics.asciidoc b/ext/cl_khr_int64_atomics.asciidoc deleted file mode 100644 index ebed85229..000000000 --- a/ext/cl_khr_int64_atomics.asciidoc +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_int64_atomics]] -== 64-bit Atomics - -This section describes the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions. These extensions allow atomic operations to be performed on 64-bit signed and unsigned integers in global and local memory. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -._Built-in Atomic Functions for_ *cl_khr_int64_base_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -long **atom_add** (volatile {global} long *_p_, long _val_) + -long **atom_add** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_add** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_add** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_sub** (volatile {global} long *_p_, long _val_) + -long **atom_sub** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_sub** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_sub** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_xchg** (volatile {global} long *_p_, long _val_) + -long **atom_xchg** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_xchg** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_xchg** (volatile {local} ulong *_p_, ulong _val_) - -|Swaps the _old_ value stored at location _p_ with new value given by -_val_. Returns _old_ value. - -| -long **atom_inc** (volatile {global} long *_p_) + -long **atom_inc** (volatile {local} long *_p_) + -{blank} -ulong **atom_inc** (volatile {global} ulong *_p_) + -ulong **atom_inc** (volatile {local} ulong *_p_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_dec** (volatile {global} long *_p_) + -long **atom_dec** (volatile {local} long *_p_) + -{blank} -ulong **atom_dec** (volatile {global} ulong *_p_) + -ulong **atom_dec** (volatile {local} ulong *_p_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_cmpxchg** (volatile {global} long *_p_, long _cmp_, long _val_) + -long **atom_cmpxchg** (volatile {local} long *_p_, long _cmp_, long _val_) + -{blank} -ulong **atom_cmpxchg** (volatile {global} ulong *_p_, ulong _cmp_, ulong _val_) + -ulong **atom_cmpxchg** (volatile {local} ulong *_p_, ulong _cmp_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store -result at location pointed by _p_. The function returns _old_. - -|======================================================================= - -._Built-in Atomic Functions for_ *cl_khr_int64_extended_atomics* -[cols=",",options="header",] -|======================================================================= -|*Function* |*Description* - -| -long **atom_min** (volatile {global} long *_p_, long _val_) + -long **atom_min** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_min** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_min** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *min*(_old_, _val_) and store minimum value at -location pointed by _p_. The function returns _old_. - -| -long **atom_max** (volatile {global} long *_p_, long _val_) + -long **atom_max** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_max** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_max** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *max*(_old_, _val_) and store maximum value at -location pointed by _p_. The function returns _old_. - -| -long **atom_and** (volatile {global} long *_p_, long _val_) + -long **atom_and** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_and** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_and** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ & val) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_or** (volatile {global} long *_p_, long _val_) + -long **atom_or** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_or** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_or** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ \| val) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_xor** (volatile {global} long *_p_, long _val_) + -long **atom_xor** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_xor** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_xor** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ ^ val) and store result at location -pointed by _p_. The function returns _old_. - -|======================================================================= - -Note: Atomic operations on 64-bit integers and 32-bit integers (and -float) are also atomic w.r.t. each other. diff --git a/ext/cl_khr_integer_dot_product.asciidoc b/ext/cl_khr_integer_dot_product.asciidoc deleted file mode 100644 index 9ed542cd0..000000000 --- a/ext/cl_khr_integer_dot_product.asciidoc +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright 2020-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_integer_dot_product]] -== Integer dot product - -This extension adds support for SPIR-V instructions and OpenCL C built-in -functions to compute the dot product of vectors of integers. - -=== General Information - -==== Name Strings - -`cl_khr_integer_dot_product` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-06-17 | 1.0.0 | Initial version. -| 2021-06-23 | 2.0.0 | All 8-bit support is mandatory, added 8-bit acceleration properties. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.6, -and OpenCL C Specification Version 3.0.6 and OpenCL Environment Specification -Version 3.0.6. - -This extension requires OpenCL 1.0. - -==== Contributors - -Kévin Petit, Arm Ltd. + -Jeremy Kemp, Imagination Technologies + -Ben Ashbaugh, Intel + -Ruihao Zhang, Qualcomm + -Stuart Brady, Arm Ltd + -Balaji Calidas, Qualcomm + -Ayal Zaks, Intel + - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - -[source,opencl] ----- - -CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR (1 << 0) -CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR (1 << 1) - -CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR 0x1073 - -CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR 0x1074 -CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR 0x1075 ----- - -=== New OpenCL C Functions - -This extension defines a number of new functions that operate on vectors -of integers. The exact function overloads available depend on the features -supported by the target device. - -[source,opencl_c] ----- -uint dot(uchar4 a, uchar4 b); -int dot(char4 a, char4 b); -int dot(uchar4 a, char4 b); -int dot(char4 a, uchar4 b); - -uint dot_acc_sat(uchar4 a, uchar4 b, uint acc); -int dot_acc_sat(char4 a, char4 b, int acc); -int dot_acc_sat(uchar4 a, char4 b, int acc); -int dot_acc_sat(char4 a, uchar4 b, int acc); - -uint dot_4x8packed_uu_uint(uint a, uint b); -int dot_4x8packed_ss_int(uint a, uint b); -int dot_4x8packed_us_int(uint a, uint b); -int dot_4x8packed_su_int(uint a, uint b); - -uint dot_acc_sat_4x8packed_uu_uint(uint a, uint b, uint acc); -int dot_acc_sat_4x8packed_ss_int(uint a, uint b, int acc); -int dot_acc_sat_4x8packed_us_int(uint a, uint b, int acc); -int dot_acc_sat_4x8packed_su_int(uint a, uint b, int acc); ----- - -=== Modifications to the OpenCL API Specification - -(Modify Section 4.2, *Querying Devices*) :: -+ --- - -(Add the following to Table 4.3, _Device Queries_) :: -+ -[cols="2,2,4",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR} -| {cl_device_integer_dot_product_capabilities_khr_type} -| Returns the integer dot product capabilities supported by the device. + - + -{CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} is always set - indicating that all implementations that support `cl_khr_integer_dot_product` - must support dot product built-in functions and, when SPIR-V is supported, - SPIR-V instructions that take four-component vectors of 8-bit integers packed - into 32-bit integers as input. + -{CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} is set when dot product - built-in functions and, when SPIR-V is supported, SPIR-V instructions that - take four-component of 8-bit elements as input are supported. + - NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} must be set in version - 2.x of the extension. - - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} -| {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} -| Returns a structure describing the exact 8-bit dot product combinations - that are <> on the device. + - Each member is {CL_TRUE} if the combination it corresponds to is accelerated, - {CL_FALSE} otherwise. + - NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} is missing - before version 2.0 of the extension. - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} -| {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} -| Returns a structure describing the exact 4x8-bit packed dot product combinations - that are <> on the device. + - Each member is {CL_TRUE} if the combination it corresponds to is accelerated, - {CL_FALSE} otherwise. + - NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} is missing - before version 2.0 of the extension. -|==== - -OpenCL 3 devices must report the following feature macros via -{CL_DEVICE_OPENCL_C_FEATURES} when the corresponding bit is set in the bitfield -returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: - -[cols="1,1",options="header"] -|==== -| Feature bit -| Feature macro - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} -| `__opencl_c_integer_dot_product_input_4x8bit_packed` - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} -| `__opencl_c_integer_dot_product_input_4x8bit` - -|==== - -[[integer-dot-product-acceleration-properties]] -The {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} structure -describes the exact dot product operations that are accelerated on the device: - -include::{generated}/api/structs/cl_device_integer_dot_product_acceleration_properties_khr.txt[] - - * _signed_accelerated_ is {CL_TRUE} when signed dot product operations are - accelerated, {CL_FALSE} otherwise. - * _unsigned_accelerated_ is {CL_TRUE} when unsigned dot product operations - are accelerated, {CL_FALSE} otherwise. - * _mixed_signedness_accelerated_ is {CL_TRUE} when mixed signedness dot - product operations are accelerated, {CL_FALSE} otherwise. - * _accumulating_saturating_signed_accelerated_ is {CL_TRUE} when accumulating - saturating signed dot product operations are accelerated, {CL_FALSE} - otherwise. - * _accumulating_saturating_unsigned_accelerated_ is {CL_TRUE} when accumulating - saturating unsigned dot product operations are accelerated, {CL_FALSE} - otherwise. - * _accumulating_saturating_mixed_signedness_accelerated_ is {CL_TRUE} when - accumulating saturating mixed signedness dot product operations are - accelerated, {CL_FALSE} otherwise. - -A dot product operation is deemed accelerated if its implementation provides -a performance advantage over application-provided code composed from elementary -instructions and/or other dot product instructions, either because the -implementation uses optimized machine code sequences whose generation from -application-provided code cannot be guaranteed or because it uses hardware -features that cannot otherwise be targeted from application-provided code. --- - -=== Modifications to the OpenCL C Specification - -(Modify section 6.13.3, *Integer Functions*) :: -+ --- - -The following built-in functions and preprocessor definitions are added: - -[source,opencl_c] ----- -#define cl_khr_integer_dot_product 1 - -if (CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR) { - #define __opencl_c_integer_dot_product_input_4x8bit_packed 1 - - uint dot_4x8packed_uu_uint(uint a, uint b); - int dot_4x8packed_ss_int(uint a, uint b); - int dot_4x8packed_us_int(uint a, uint b); - int dot_4x8packed_su_int(uint a, uint b); - - uint dot_acc_sat_4x8packed_uu_uint(uint a, uint b, uint acc); - int dot_acc_sat_4x8packed_ss_int(uint a, uint b, int acc); - int dot_acc_sat_4x8packed_us_int(uint a, uint b, int acc); - int dot_acc_sat_4x8packed_su_int(uint a, uint b, int acc); -} - -if (CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR) { - #define __opencl_c_integer_dot_product_input_4x8bit 1 - - uint dot(uchar4 a, uchar4 b); - int dot(char4 a, char4 b); - int dot(uchar4 a, char4 b); - int dot(char4 a, uchar4 b); - - uint dot_acc_sat(uchar4 a, uchar4 b, uint acc); - int dot_acc_sat(char4 a, char4 b, int acc); - int dot_acc_sat(uchar4 a, char4 b, int acc); - int dot_acc_sat(char4 a, uchar4 b, int acc); -} ----- - - -* `dot` returns the dot product of the two input vectors `a` and `b`. The -components of `a` and `b` are sign- or zero-extended to the width of the -destination type and the vectors with extended components are multiplied -component-wise. All the components of the resulting vectors are added -together to form the final result. - -* `dot_acc_sat` returns the saturating addition of the dot product of the two -input vectors `a` and `b` and the accumulator `acc`: - ----- -product = dot(a,b); -result = add_sat(product, acc); ----- - -* `dot_*_4x8packed_XY_R` returns the dot product of the two vectors packed -into `a` and `b` (lowest component in least significant byte). The components -are unpacked, sign- or zero-extended to the width of the destination type before -the multiplications and additions. `X` represents the signedness of the components -of `a`, `Y` that of the components of `b`. `R` is the return type. --- - -=== Modifications to the OpenCL SPIR-V Environment Specification - -See OpenCL SPIR-V Environment Specification. - -=== Interactions with Other Extensions - -If `cl_khr_il_program` is supported then the SPIR-V environment specification -modifications described above apply. - diff --git a/ext/cl_khr_mipmap_image.asciidoc b/ext/cl_khr_mipmap_image.asciidoc deleted file mode 100644 index c7a435922..000000000 --- a/ext/cl_khr_mipmap_image.asciidoc +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_mipmap_image]] -== Mipmaps - -This section describes OpenCL support for mipmaps. - -There are two optional mipmap extensions. -The *cl_khr_mipmap_image* extension adds the ability to create a mip-mapped -image, enqueue commands to read/write/copy/map/unmap a region of a mipmapped -image, and built-in functions that can be used to read a mip-mapped image in -an OpenCL C program. -The *cl_khr_mipmap_image_writes* extension adds built-in functions that can -be used to write a mip-mapped image in an OpenCL C program. -If the *cl_khr_mipmap_image_writes* extension is supported by the OpenCL -device, the *cl_khr_mipmap_image* extension must also be supported. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_mipmap_image-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -[[cl_khr_mipmap_image-additions-to-section-5.3]] -==== Additions to section 5.3 -- Image Objects - -A mip-mapped 1D image, 1D image array, 2D image, 2D image array or 3D image -is created by specifying _num_mip_levels_ to be a value greater than one in -the _image_desc_ passed to {clCreateImage}. -The dimensions of a mip-mapped image can be a power of two or a non-power of -two. -Each successively smaller mipmap level is half the size of the previous -level. -If this half value is a fractional value, it is rounded down to the nearest -integer. - -*Restrictions* - -The following restrictions apply when mip-mapped images are created with -{clCreateImage}: - - * {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} cannot be specified if a - mip-mapped image is created. - * The _host_ptr_ argument to {clCreateImage} must be a `NULL` value. - * Mip-mapped images cannot be created for {CL_MEM_OBJECT_IMAGE1D_BUFFER} - images, depth images or multi-sampled (i.e. msaa) images. - -Calls to {clEnqueueReadImage}, {clEnqueueWriteImage} and {clEnqueueMapImage} -can be used to read from or write to a specific mip-level of a mip-mapped -image. -If image argument is a 1D image, _origin_[1] specifies the mip-level to use. -If image argument is a 1D image array, _origin_[2] specifies the mip-level -to use. -If image argument is a 2D image, _origin_[2] specifies the mip-level to use. -If image argument is a 2D image array or a 3D image, _origin_[3] specifies -the mip-level to use. - -Calls to {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer} and -{clEnqueueCopyBufferToImage} can also be used to copy from and to a specific -mip-level of a mip-mapped image. -If _src_image_ argument is a 1D image, _src_origin_[1] specifies the -mip-level to use. -If _src_image_ argument is a 1D image array, _src_origin_[2] specifies the -mip-level to use. -If _src_image_ argument is a 2D image, _src_origin_[2] specifies the -mip-level to use. -If _src_image_ argument is a 2D image array or a 3D image, _src_origin_[3] -specifies the mip-level to use. -If _dst_image_ argument is a 1D image, _dst_origin_[1] specifies the -mip-level to use. -If _dst_image_ argument is a 1D image array, _dst_origin_[2] specifies the -mip-level to use. -If _dst_image_ argument is a 2D image, _dst_origin_[2] specifies the -mip-level to use. -If _dst_image_ argument is a 2D image array or a 3D image, _dst_origin_[3] -specifies the mip-level to use. - -If the mip level specified is not a valid value, these functions return the -error {CL_INVALID_MIP_LEVEL}. - -Calls to {clEnqueueFillImage} can be used to write to a specific mip-level of -a mip-mapped image. -If image argument is a 1D image, origin[1] specifies the mip-level to use. -If image argument is a 1D image array, origin[2] specifies the mip-level to -use. -If image argument is a 2D image, origin[2] specifies the mip-level to use. -If image argument is a 2D image array or a 3D image, origin[3] specifies the -mip-level to use. - -[[cl_khr_mipmap_image-additions-to-section-5.7]] -==== Additions to section 5.7 -- Sampler Objects - -Add the following sampler properties _to table 5.14_ that can be specified -when a sampler object is created using {clCreateSamplerWithProperties}. - -[cols="3,1,2",options="header",] -|==== -| Sampler Property -| Property Value -| Default Value - -| {CL_SAMPLER_MIP_FILTER_MODE_KHR} -| {cl_filter_mode_TYPE} -| {CL_FILTER_NEAREST} - -| {CL_SAMPLER_LOD_MIN_KHR} -| {cl_float_TYPE} -| `0.0f` - -| {CL_SAMPLER_LOD_MAX_KHR} -| {cl_float_TYPE} -| `MAXFLOAT` - -|==== - -Note: The sampler properties {CL_SAMPLER_MIP_FILTER_MODE_KHR}, -{CL_SAMPLER_LOD_MIN_KHR} and {CL_SAMPLER_LOD_MAX_KHR} cannot be specified with -any samplers initialized in the OpenCL program source. -Only the default values for these properties will be used. -To create a sampler with specific values for these properties, a sampler -object must be created with {clCreateSamplerWithProperties} and passed as an -argument to a kernel. - -[[cl_khr_mipmap_image-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 Specification - -[[cl_khr_mipmap_image-additions-to-section-6.13.14-image-read-write-and-query-functions]] -==== Additions to section 6.13.14 – Image Read, Write and Query Functions - -The image read and write functions described in _sections 6.13.14.2_, -_6.13.14.3_ and _6.13.14.4_ read from and write to mip-level 0 if the -image argument is a mip-mapped image. - -The following new built-in functions are added to _section 6.13.14.2_. - -[cols="5a,4",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) - -int4 read_imagei( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) - -uint4 read_imageui( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) - -float read_imagef( - read_only image2d_depth_t image, - sampler_t sampler, - float2 coord, - float lod) ----- -| Use the coordinate _coord.xy_ to do an element lookup in the mip-level specified by _lod_ in the 2D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) - -int4 read_imagei( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) - -uint4 read_imageui( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) - -float read_imagef( - read_only image2d_depth_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord.xy_ to do an element lookup in the mip-level specified by the computed lod in the 2D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) - -int4 read_imagei( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) - -uint4 read_imageui( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) ----- -| Use the coordinate _coord_ to do an element lookup in the mip-level specified by _lod_ in the 1D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) - -int4 read_imagei( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) - -uint4 read_imageui( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord_ to do an element lookup in the mip-level specified by the computed lod in the 1D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) - -int4 read_imagei( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) - -uint4 read_imageui( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) ----- -| Use the coordinate _coord.xyz_ to do an element lookup in the mip-level specified by _lod_ in the 3D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) - -int4 read_imagei( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) - -uint4 read_imageui( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord.xyz_ to do an element lookup in the mip-level specified by the computed lod in the 3D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) - -int4 read_imagei( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) - -uint4 read_imageui( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) ----- -| Use the coordinate _coord.x_ to do an element lookup in the 1D image identified by _coord.x_ and mip-level specified by _lod_ in the 1D image array specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) - -int4 read_imagei( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) - -uint4 read_imageui( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord.x_ to do an element lookup in the mip-level specified by the computed lod in the 1D image array specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) - -int4 read_imagei( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) - -uint4 read_imageui( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) - -float read_imagef( - read_only image2d_array_depth_t image, - sampler_t sampler, - float4 coord, - float lod) ----- -| Use the coordinate _coord.xy_ to do an element lookup in the 2D image identified by _coord.z_ and mip-level specified by _lod_ in the 2D image array specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) - -int4 read_imagei( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) - -uint4 read_imageui( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) - -float read_imagef( - read_only image2d_array_depth_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) ----- -| Use the gradients to compute the lod coordinate and _coord.xy_ to do an element lookup in the 2D image identified by _coord.z_ and mip-level specified by the computed lod in the 2D image array specified by _image_. -|======================================================================= - -NOTE: {CL_SAMPLER_NORMALIZED_COORDS} must be {CL_TRUE} for built-in functions described in the table above that read from a mip-mapped image; otherwise the behavior is undefined. -The value specified in the _lod_ argument is clamped to the minimum of (actual number of mip-levels – 1) in the image or value specified for {CL_SAMPLER_LOD_MAX}. - -The following new built-in functions are added to _section 6.13.14.4_. - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -void write_imagef( - write_only image2d_t image, - int2 coord, - int lod, - float4 color) - -void write_imagei( - write_only image2d_t image, - int2 coord, - int lod, - int4 color) - -void write_imageui( - write_only image2d_t image, - int2 coord, - int lod, - uint4 color) - -void write_imagef( - write_only image2d_depth_t image, - int2 coord, - int lod, - float depth) ----- -| Write _color_ value to location specified by _coord.xy_ in the mip-level specified by _lod_ in the 2D image object specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_ and _coord.y_ are considered to be unnormalized coordinates and must be in the range 0 .. image width of mip-level specified by _lod_ – 1, and 0 .. image height of mip-level specified by _lod_ – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height of the mip-level specified by _lod_ – 1) or _lod_ value exceeds the (number of mip-levels in the image – 1) is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image1d_t image, - int coord, - int lod, - float4 color) - -void write_imagei( - write_only image1d_t image, - int coord, - int lod, - int4 color) - -void write_imageui( - write_only image1d_t image, - int coord, - int lod, - uint4 color) ----- -|Write _color_ value to location specified by _coord_ in the mip-level specified by _lod_ in the 1D image object specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord_ is considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level -specified by _lod_ – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if coordinate value is not in the range (0 .. image width of the mip-level specified by _lod_ – 1) or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image1d_array_t image, - int2 coord, - int lod, - float4 color) - -void write_imagei( - write_only image1d_array_t image, - int2 coord, - int lod, - int4 color) - -void write_imageui( - write_only image1d_array_t image, - int2 coord, - int lod, - uint4 color) ----- -| Write _color_ value to location specified by _coord.x_ in the 1D image identified by _coord.y_ and mip-level _lod_ in the 1D image array specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_ and _coord.y_ are considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level specified by _lod_ – 1 and 0 .. image number of layers – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image number of layers – 1), respectively or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image2d_array_t image, - int4 coord, - int lod, - float4 color) - -void write_imagei( - write_only image2d_array_t image, - int4 coord, - int lod, - int4 color) - -void write_imageui( - write_only image2d_array_t image, - int4 coord, - int lod, - uint4 color) - -void write_imagef( - write_only image2d_array_depth_t image, - int4 coord, - int lod, - float depth) ----- -| Write _color_ value to location specified by _coord.xy_ in the 2D image identified by _coord.z_ and mip-level _lod_ in the 2D image array specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height – 1 specified by _lod_ – 1 and 0 .. image number of layers – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y, z_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height of the mip-level specified by _lod_ – 1, 0 .. image number of layers – 1), respectively or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image3d_t image, - int4 coord, - int lod, - float4 color) - -void write_imagei( - write_only image3d_t image, - int4 coord, - int lod, - int4 color) - -void write_imageui( - write_only image3d_t image, - int4 coord, - int lod, - uint4 color) ----- -| Write color value to location specified by _coord.xyz_ and mip-level _lod_ in the 3D image object specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized coordinates and must be in the range 0 .. image width – 1 -specified by _lod_ – 1, 0 .. image height – 1 specified by _lod_ – 1 and 0 .. image depth – 1 specified by _lod_ – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y, z_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height of the mip-level specified by _lod_ – 1, 0 .. image depth – 1), respectively or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|======================================================================= - -The following new built-in functions are added to _section 6.13.14.5_. - -[cols="1a,1",options="header",] -|================================= -|*Function* |*Description* -|[source,opencl_c] ----- -int get_image_num_mip_levels( - image1d_t image) - -int get_image_num_mip_levels( - image2d_t image) - -int get_image_num_mip_levels( - image3d_t image) - -int get_image_num_mip_levels( - image1d_array_t image) - -int get_image_num_mip_levels( - image2d_array_t image) - -int get_image_num_mip_levels( - image2d_depth_t image) - -int get_image_num_mip_levels( - image2d_array_depth_t image) ----- -| Return the number of mip-levels. -|================================= - -[[cl_khr_mipmap_image-additions-to-creating-opencl-memory-objects-from-opengl-objects]] -=== Additions to <> - -If both the *cl_khr_mipmap_image* and *cl_khr_gl_sharing* extensions are -supported by the OpenCL device, the *cl_khr_gl_sharing* extension may also -be used to create a mipmapped OpenCL image from a mipmapped OpenGL texture. - -To create a mipmapped OpenCL image from a mipmapped OpenGL texture, pass a -negative value as the _miplevel_ argument to {clCreateFromGLTexture}. -If _miplevel_ is a negative value then an OpenCL mipmapped image object is -created from a mipmapped OpenGL texture object, instead of an OpenCL image -object for a specific miplevel of the OpenGL texture. - -Note: For a detailed description of how the level of detail is computed, -please refer to _section 3.9.7_ of the OpenGL 3.0 specification. diff --git a/ext/cl_khr_pci_bus_info.asciidoc b/ext/cl_khr_pci_bus_info.asciidoc deleted file mode 100644 index fc724ca1a..000000000 --- a/ext/cl_khr_pci_bus_info.asciidoc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_pci_bus_info]] -== PCI Bus Information Query - -This extension adds a new query to obtain PCI bus information about an OpenCL -device. - -Not all OpenCL devices have PCI bus information, either due to the device not -being connected to the system through a PCI interface or due to platform -specific restrictions and policies. Thus this extension is only expected to be -supported by OpenCL devices which can provide the information. - -As a consequence, applications should always check for the presence of the -extension string for each individual OpenCL device for which they intend to -issue the new query for and should not have any assumptions about the -availability of the extension on any given platform. - -=== General Information - -==== Name Strings - -`cl_khr_pci_bus_info` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-04-19 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL API Specification Version V3.0.6. - -This extension requires OpenCL 1.0. - -=== New API Types - -Structure returned by the device info query for {CL_DEVICE_PCI_BUS_INFO_KHR}: - -include::{generated}/api/structs/cl_device_pci_bus_info_khr.txt[] - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - -[source,opencl] ----- -#define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F ----- - -=== Modifications to the OpenCL API Specification - -==== Section 4.2 - Querying Devices: - -Add to Table 5 - OpenCL Device Queries: - -[caption="Table 5. "] -.OpenCL Device Queries -[width="100%",cols="<30%,<20%,<50%",options="header"] -|==== -| DeviceInfo | Return Type | Description -| {CL_DEVICE_PCI_BUS_INFO_KHR} - | {cl_device_pci_bus_info_khr_TYPE} - | Returns PCI bus information for the device. - - The PCI bus information is returned as a single structure that includes - the PCI bus domain, the PCI bus identifier, the PCI device identifier, and - the PCI device function identifier. - -|==== diff --git a/ext/cl_khr_priority_hints.asciidoc b/ext/cl_khr_priority_hints.asciidoc deleted file mode 100644 index bdbcfe402..000000000 --- a/ext/cl_khr_priority_hints.asciidoc +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_priority_hints]] -== Priority Hints - -This section describes the *cl_khr_priority_hints* extension. -This extension adds priority hints for OpenCL, but does not specify the -scheduling behavior or minimum guarantees. -It is expected that the the user guides associated with each implementation -which supports this extension will describe the scheduling behavior -guarantees. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_priority_hints-host-side-api-modifications]] -=== Host-side API modifications - -The function {clCreateCommandQueueWithProperties} (Section 5.1) is -extended to support a priority value as part of the _properties_ argument. - -The priority property applies to OpenCL command-queues that belong to the -same OpenCL context. - -The properties field accepts the {CL_QUEUE_PRIORITY_KHR} property, with a -value of type {cl_queue_priority_khr_TYPE}, which can be one of: - - * {CL_QUEUE_PRIORITY_HIGH_KHR} - * {CL_QUEUE_PRIORITY_MED_KHR} - * {CL_QUEUE_PRIORITY_LOW_KHR} - -If {CL_QUEUE_PRIORITY_KHR} is not specified then the default priority is -{CL_QUEUE_PRIORITY_MED_KHR}. - -To the error section for {clCreateCommandQueueWithProperties}, the -following is added: - - * {CL_INVALID_QUEUE_PROPERTIES} if the {CL_QUEUE_PRIORITY_KHR} property is - specified and the queue is a {CL_QUEUE_ON_DEVICE}. diff --git a/ext/cl_khr_select_fprounding_mode.asciidoc b/ext/cl_khr_select_fprounding_mode.asciidoc deleted file mode 100644 index c1285bd2c..000000000 --- a/ext/cl_khr_select_fprounding_mode.asciidoc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_select_fprounding_mode]] -== Selecting the Rounding Mode **(DEPRECATED)** - -This section describes the *cl_khr_select_fprounding_mode* extension. -It allows an application to specify the rounding mode for an instruction or group of instructions in the program source. - -**This extension was deprecated in OpenCL 1.1 and its use is not recommended.** - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Changes to OpenCL C specification - -With this extension, the rounding mode may be specified using the following *#pragma* in the OpenCL program source: - -[source,opencl_c] ----- -#pragma OPENCL SELECT_ROUNDING_MODE ----- - -The __ may be one of the following values: - -* *rte* - round to nearest even -* *rtz* - round to zero -* *rtp* - round to positive infinity -* *rtn* - round to negative infinity - -If this extensions is supported then the OpenCL implementation must support all four rounding modes for single precision floating-point. - -The *#pragma* sets the rounding mode for all instructions that operate on floating-point types (scalar or vector types) or produce floating-point values that follow this pragma in the program source until the next *#pragma*. -Note that the rounding mode specified for a block of code is known at compile time. -When inside a compound statement, the pragma takes effect from its occurrence until another *#pragma* is encountered (including within a nested compound statement), or until the end of the compound statement; at the end of a compound statement the state for the pragma is restored to its condition just before the compound statement. -Except where otherwise documented, the callee functions do not inherit the rounding mode of the caller function. - -If this extension is enabled, the `\\__ROUNDING_MODE__` preprocessor symbol shall be defined to be one of the following according to the current rounding mode: - -[source,opencl_c] ----- -#define __ROUNDING_MODE__ rte -#define __ROUNDING_MODE__ rtz -#define __ROUNDING_MODE__ rtp -#define __ROUNDING_MODE__ rtz ----- - -This is intended to enable remapping `foo()` to `foo_rte()` by the preprocessor by using: - -[source,opencl_c] ----- -#define foo foo ## __ROUNDING_MODE__ ----- - -The default rounding mode is round to nearest even. -The built-in math functions described in _section 6.11.2_, the common functions described in _section 6.11.4_ and the geometric functions described in _section 6.11.5_ are implemented with the round to nearest even rounding mode. -Various built-in conversions and the *vstore_half* and *vstorea_half* built-in functions that do not specify a rounding mode inherit the current rounding mode. -Conversions from floating-point to integer type always use `rtz` mode, except where the user specifically asks for another rounding mode. diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc deleted file mode 100644 index 95c41522a..000000000 --- a/ext/cl_khr_semaphore.asciidoc +++ /dev/null @@ -1,634 +0,0 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_semaphore]] -== Semaphores (Provisional) - -OpenCL provides {cl_event_TYPE} as a primary mechanism of synchronization between host and device as well as across devices. -While events can be waited on or can be passed as dependencies across work-submissions, they suffer from following limitations: - -* They are immutable. - -* They are not reusable. - -This extension introduces a new type of synchronization object to represent semaphores that can be reused, waited on, and signaled multiple times by OpenCL work-submissions. - -In particular, this extension defines: - -* A new type called {cl_semaphore_khr_TYPE} to represent the semaphore objects. - -* A new type called {cl_semaphore_properties_khr_TYPE} to specify metadata associated with semaphores. - -* Routines to create, retain, and release semaphores. - -* Routines to wait on and signal semaphore objects. - -* Routine to query the properties of semaphore objects. - -=== General Information - -==== Name Strings - -`cl_khr_semaphore` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-09-10 | 0.9.0 | Initial version (provisional). -| 2023-08-01 | 0.9.1 | Changed device handle list enum to the semaphore-specific {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.8. - -This extension requires OpenCL 1.2. - -==== Contributors - -// spell-checker: disable -Ajit Hakke-Patil, NVIDIA + -Amit Rao, NVIDIA + -Balaji Calidas, QUALCOMM + -Ben Ashbaugh, INTEL + -Carsten Rohde, NVIDIA + -Christoph Kubisch, NVIDIA + -Debalina Bhattacharjee, NVIDIA + -Faith Ekstrand, INTEL + -Gorazd Sumkovski, ARM + -James Jones, NVIDIA + -Jeremy Kemp, IMAGINATION + -Joshua Kelly, QUALCOMM + -Karthik Raghavan Ravi, NVIDIA + -Kedar Patil, NVIDIA + -Kevin Petit, ARM + -Nikhil Joshi, NVIDIA + -Sharan Ashwathnarayan, NVIDIA + -Vivek Kini, NVIDIA + -// spell-checker: enable - -=== New Types - -[source] ----- -typedef struct _cl_semaphore_khr* cl_semaphore_khr; - -typedef cl_properties cl_semaphore_properties_khr; -typedef cl_uint cl_semaphore_info_khr; -typedef cl_uint cl_semaphore_type_khr; -typedef cl_ulong cl_semaphore_payload_khr; ----- - -=== New API Functions - -[source] ----- -cl_semaphore_khr clCreateSemaphoreWithPropertiesKHR( - cl_context context, - const cl_semaphore_properties_khr *sema_props, - cl_int *errcode_ret); - -cl_int clEnqueueWaitSemaphoresKHR( - cl_command_queue command_queue, - cl_uint num_sema_objects, - const cl_semaphore_khr *sema_objects, - const cl_semaphore_payload_khr *sema_payload_list, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueSignalSemaphoresKHR( - cl_command_queue command_queue, - cl_uint num_sema_objects, - const cl_semaphore_khr *sema_objects, - const cl_semaphore_payload_khr *sema_payload_list, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clGetSemaphoreInfoKHR( - cl_semaphore_khr sema_object, - cl_semaphore_info_khr param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -cl_int clReleaseSemaphoreKHR(cl_semaphore_khr sema_object); - -cl_int clRetainSemaphoreKHR(cl_semaphore_khr sema_object); ----- - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query the semaphore types supported by an OpenCL platform: - -[source] ----- -CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036 ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo} to query the semaphore types supported by an OpenCL device: - -[source] ----- -CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C ----- - -Semaphore types: - -[source] ----- -CL_SEMAPHORE_TYPE_BINARY_KHR 1 ----- - -New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_CONTEXT_KHR 0x2039 -CL_SEMAPHORE_REFERENCE_COUNT_KHR 0x203A -CL_SEMAPHORE_PROPERTIES_KHR 0x203B -CL_SEMAPHORE_PAYLOAD_KHR 0x203C ----- - -New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE} or {cl_semaphore_properties_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_TYPE_KHR 0x203D -CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR 0x2053 -CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR 0 ----- - -New return values from {clGetEventInfo} when _param_name_ is {CL_EVENT_COMMAND_TYPE}: - -[source] ----- -CL_COMMAND_SEMAPHORE_WAIT_KHR 0x2042 -CL_COMMAND_SEMAPHORE_SIGNAL_KHR 0x2043 ----- - -The following error codes can be returned by APIs introduced as part of this specification or the specifications that depend on this: -[source] ----- -CL_INVALID_SEMAPHORE_KHR -1142 ----- - -=== Modifications to existing APIs added by this spec - -Following new enums are added to the list of supported _param_names_ by {clGetPlatformInfo}: - -.List of supported param_names by {clGetPlatformInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Platform Info | Return Type | Description -| {CL_PLATFORM_SEMAPHORE_TYPES_KHR} - | {cl_semaphore_type_khr_TYPE}[] - | Returns the list of the semaphore types supported all devices in _platform_. -|==== - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_SEMAPHORE_TYPES_KHR} must return common list of semaphore types supported by all devices in the platform. - -Following new enums are added to the list of supported _param_names_ by {clGetDeviceInfo}: - -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_SEMAPHORE_TYPES_KHR} - | {cl_semaphore_type_khr_TYPE}[] - | Returns the list of the semaphore types supported by _device_. -|==== - -{clGetDeviceInfo} when called with param_name {CL_DEVICE_SEMAPHORE_TYPES_KHR} must return a non-empty list of semaphore types for at least one of the devices in the platform. -The results of this query should meet minimum requirements for {cl_semaphore_type_khr_TYPE} as described by <>. - -=== Description of new types added by this spec - -Following new types are added: - -* {cl_semaphore_type_khr_TYPE} to represent the different types of semaphores. - ** It is mandatory to support {CL_SEMAPHORE_TYPE_BINARY_KHR}. - -* {cl_semaphore_properties_khr_TYPE} to represent properties associated with semaphores. - ** {CL_SEMAPHORE_TYPE_KHR} must be supported. - -* {cl_semaphore_info_khr_TYPE} to represent queries to get additional information about semaphores. - ** All enums described in New API Enums for {cl_semaphore_info_khr_TYPE} must be supported. - -* {cl_semaphore_payload_khr_TYPE} to represent payload values of semaphores. - -* {cl_semaphore_khr_TYPE} to represent semaphore objects. - -Note that above types can be extended in future based on the need for additional types of semaphore and properties required by them. -The specifics of the same can be added as a newer version of this specification or by a separate specification that depends on this for basic semaphore support. - -=== Description of new APIs added by this spec - -The following new APIs are added as part of this spec. The details of each are described below: - -==== Creating semaphores - -A *semaphore object* may be created using the function - -include::{generated}/api/protos/clCreateSemaphoreWithPropertiesKHR.txt[] - -_context_ identifies a valid OpenCL context that the created {cl_semaphore_khr_TYPE} will belong to. - -_sema_props_ specifies additional semaphore properties in the form list of pairs terminated with 0. -{CL_SEMAPHORE_TYPE_KHR} must be part of the list of properties specified by _sema_props_. - -Following new properties are added to the list of possible supported properties by {cl_semaphore_properties_khr_TYPE} that can be passed to {clCreateSemaphoreWithPropertiesKHR}: - -.List of supported semaphore creation properties by {clCreateSemaphoreWithPropertiesKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Property | Property Value | Description -| {CL_SEMAPHORE_TYPE_KHR} - | {cl_semaphore_type_khr_TYPE} - | Specifies the type of semaphore to create. This property is always required. -| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} - | {cl_device_id_TYPE}[] - | Specifies the list of OpenCL devices (terminated with {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the semaphore. Only a single device is permitted in the list. -|==== - -If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of _sema_props_, the semaphore object created by {clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices in the _context_. For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be specified in _sema_props_. - -_errcode_ret_ returns an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateSemaphoreWithPropertiesKHR} returns a valid semaphore object in an un-signaled state and and _errcode_ret_ is set to {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: - -* {CL_INVALID_CONTEXT} if _context_ is not a valid context. -* {CL_INVALID_PROPERTY} if a property name in _sema_props_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. Additionally, if _context_ is a multiple device context and _sema_props_ does not specify {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR}. -* {CL_INVALID_DEVICE} if {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is specified as part of _sema_props_, but it does not identify exactly one valid device or if a device identified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not one of the devices within _context_. -* {CL_INVALID_VALUE} -** if _sema_props_ is `NULL`, or -** if _sema_props_ do not specify pairs for minimum set of properties (i.e. {CL_SEMAPHORE_TYPE_KHR}) required for successful creation of a {cl_semaphore_khr_TYPE}, or -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -==== Waiting on and signaling semaphores - -To enqueue a command to wait on a set of semaphores, call the function - -include::{generated}/api/protos/clEnqueueWaitSemaphoresKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_sema_objects_ specifies the number of semaphore objects to wait on. - -_sema_objects_ points to the list of semaphore objects to wait on. -The length of the list must be at least _num_sema_objects_. - -_sema_payload_list_ points to the list of values of type {cl_semaphore_payload_khr_TYPE} containing valid semaphore payload values to wait on. -This can be set to `NULL` or will be ignored when all semaphores in the list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. - -_num_events_in_wait_list_ specifies the number of events in _event_wait_list_. - -_event_wait_list_ specifies list of events that need to complete before {clEnqueueWaitSemaphoresKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueWaitSemaphoresKHR} does not wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that associated with _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command or queue a wait for this command to complete. - -The semaphore wait command waits for a list of events to complete and a list of semaphore objects to become signaled. -The semaphore wait command returns an _event_ which can be waited on to ensure that all events in the _event_wait_list_ have completed and all semaphores in _sema_objects_ have been signaled. -{clEnqueueWaitSemaphoresKHR} will not return until the binary semaphores in _sema_objects_ are in a state that makes them safe to re-signal. If necessary, implementations may block in {clEnqueueWaitSemaphoresKHR} to ensure the correct state of semaphores when returning. There are no implications from this behavior for the state of _event_ or the events in _event_wait_list_ when {clEnqueueWaitSemaphoresKHR} returns. Waiting on the same binary semaphore twice without an interleaving signal may lead to undefined behavior. - -{clEnqueueWaitSemaphoresKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_. -* {CL_INVALID_VALUE} if _num_sema_objects_ is 0. -* {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and any of the semaphore objects in _sema_objects_ are not the same or if the context associated with _command_queue_ and that associated with events in _event_wait_list_ are not the same. -* {CL_INVALID_VALUE} if any of the semaphore objects specified by _sema_objects_ requires a semaphore payload and _sema_payload_list_ is `NULL`. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -To enqueue a command to signal a set of semaphores, call the function - -include::{generated}/api/protos/clEnqueueSignalSemaphoresKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_sema_objects_ specifies the number of semaphore objects to signal. - -_sema_objects_ points to the list of semaphore objects to signal. -The length of the list must be at least _num_sema_objects_. - -_sema_payload_list_ points to the list of values of type {cl_semaphore_payload_khr_TYPE} containing semaphore payload values to signal. -This can be set to `NULL` or will be ignored when all semaphores in the list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. - -_num_events_in_wait_list_ specifies the number of events in event_wait_list. - -_event_wait_list_ points to the list of events that need to complete before {clEnqueueSignalSemaphoresKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueSignalSemaphoresKHR} does not wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and -_num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that associated with _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command -or queue a wait for this command to complete. - -The semaphore signal command waits for a list of events to complete and then signals a list of semaphore objects. -The semaphore signal command returns an _event_ which can be waited on to ensure that all events in the _event_wait_list_ have completed and all semaphores in _sema_objects_ have been signaled. -The successful completion of the event generated by {clEnqueueSignalSemaphoresKHR} called on one or more semaphore objects of type {CL_SEMAPHORE_TYPE_BINARY_KHR} changes the state of the corresponding semaphore objects to signaled. {clEnqueueSignalSemaphoresKHR} will not return until the binary semaphores in _sema_objects_ are in a state that makes them safe to wait on again. If necessary, implementations may block in {clEnqueueSignalSemaphoresKHR} to ensure the correct state of semaphores when returning. There are no implications from this behavior for the state of _event_ or the events in _event_wait_list_ when {clEnqueueSignalSemaphoresKHR} returns. Signaling the same binary semaphore twice without an interleaving wait may lead to undefined behavior. - -{clEnqueueSignalSemaphoresKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_. -* {CL_INVALID_VALUE} if _num_sema_objects_ is 0 -* {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and any of the semaphore objects in _sema_objects_ are not the same or if the context associated with _command_queue_ and that associated with events in _event_wait_list_ are not the same. -* {CL_INVALID_VALUE} if any of the semaphore objects specified by _sema_objects_ requires a semaphore payload and _sema_payload_list_ is `NULL`. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -==== Semaphore Queries - -To query information about a semaphore object, call the function - -include::{generated}/api/protos/clGetSemaphoreInfoKHR.txt[] - -_sema_object_ specifies the semaphore object being queried. - -_param_name_ is a constant that specifies the semaphore information to query, and must be one of the values shown in the <> table. - -_param_value_ is a pointer to memory where the result of the query is returned as described in the <> table. If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ specifies the size in bytes of memory pointed to _param_value_. This size must be greater than or equal to the size of the return type described in the <> table. - -_param_value_size_ret_ returns the actual size in bytes of data -being queried by _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_semaphore_info-table]] -.List of supported param_names by {clGetSemaphoreInfoKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Info | Return Type | Description -| {CL_SEMAPHORE_CONTEXT_KHR} - | {cl_context_TYPE} - | Returns the context specified when the semaphore is created. - -| {CL_SEMAPHORE_REFERENCE_COUNT_KHR} footnote:[{fn-reference-count-usage}] - | {cl_uint_TYPE} - | Returns the semaphore reference count. - -| {CL_SEMAPHORE_PROPERTIES_KHR} - | {cl_semaphore_properties_khr_TYPE}[] - | Return the properties argument specified in - {clCreateSemaphoreWithPropertiesKHR}. - - The implementation must return the values specified in the properties - argument in the same order and without including additional properties. - -| {CL_SEMAPHORE_TYPE_KHR} - | {cl_semaphore_type_khr_TYPE} - | Returns the semaphore type. - -| {CL_SEMAPHORE_PAYLOAD_KHR} - | {cl_semaphore_payload_khr_TYPE} - | Returns the semaphore payload value. For semaphores of type - {CL_SEMAPHORE_TYPE_BINARY_KHR}, the payload value returned will be `0` - if the semaphore is in an un-signaled state and `1` if it is in a - signaled state. - -| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} - | {cl_device_id_TYPE}[] - | Returns the list of OpenCL devices the semaphore is associated with. -|==== - -{clGetSemaphoreInfoKHR} returns {CL_SUCCESS} if the information is queried successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} -** if _sema_object_ is not a valid semaphore -* {CL_INVALID_VALUE} -** if _param_name_ is not one of the attribute defined in the <> table or -** if _param_value_size_ is less than the size of Return Type of the corresponding _param_name_ attribute as defined in the <> table. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -==== Retaining and Releasing Semaphores - -To release a semaphore object, call the function - -include::{generated}/api/protos/clReleaseSemaphoreKHR.txt[] - -_sema_object_ specifies the semaphore object to be released. - -The _sema_object_ reference count is decremented. - -{clReleaseSemaphoreKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore object. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -After the reference count becomes zero and commands queued for execution on a -command-queue(s) that use _sema_object_ have finished, the semaphore object is -deleted. -Using this function to release a reference that was not obtained by creating the -object via {clCreateSemaphoreWithPropertiesKHR} or by calling -{clRetainSemaphoreKHR} causes undefined behavior. - -To retain a semaphore object, call the function - -include::{generated}/api/protos/clRetainSemaphoreKHR.txt[] - -_sema_object_ specifies the semaphore object to be retained. - -increments the reference count of _sema_object_. - -{clRetainSemaphoreKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore object. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -[[cl_khr_semaphore-Sample-Code]] -=== Sample Code - -. Example for semaphore creation in a single device context -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with just first device -context = clCreateContext(..., 1, devices, ...); - -// Create clSema of type cl_semaphore_khr usable on single device in the context - -cl_semaphore_properties_khr sema_props[] = - {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - 0}; - -int errcode_ret = 0; - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); ----- --- - -. Example for semaphore creation for a single device in a multi-device context -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Create clSema of type cl_semaphore_khr usable only on device 0 -cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, - (cl_semaphore_properties_khr)devices[0], - CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -int errcode_ret = 0; - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); ----- --- -. Example for synchronization using Wait and Signal -+ --- -[source] ----- -// clSema is created using clCreateSemaphoreWithPropertiesKHR -// using one of the examples for semaphore creation. - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Start the main loop - -while (true) { - // (not shown) Signal the semaphore from other work - - // Wait for the semaphore in OpenCL - // by calling clEnqueueWaitSemaphoresKHR on 'clSema' - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel that accesses extMem - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch other work that waits on 'clSema' -} ----- --- -. Example for {clGetSemaphoreInfoKHR} -+ --- -[source] ----- -// clSema is created using clCreateSemaphoreWithPropertiesKHR -// using one of the examples for semaphore creation. - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Start the main rendering loop - -while (true) { - // (not shown) Signal the semaphore from other work - - // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'clSema' - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel in OpenCL - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Query type of clSema - clGetSemaphoreInfoKHR(/*sema_object*/ clSema, - /*param_name*/ CL_SEMAPHORE_TYPE_KHR, - /*param_value_size*/ sizeof(cl_semaphore_type_khr), - /*param_value*/ &clSemaType, - /*param_value_ret_size*/ &clSemaTypeSize); - - if (clSemaType == CL_SEMAPHORE_TYPE_BINARY_KHR) { - // Do something - } - else { - // Do something else - } - // (not shown) Launch other work that waits on 'clSema' -} ----- --- diff --git a/ext/cl_khr_spir.asciidoc b/ext/cl_khr_spir.asciidoc deleted file mode 100644 index d5d0d47fd..000000000 --- a/ext/cl_khr_spir.asciidoc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_spir]] -== Standard Portable Intermediate Representation Binaries - -This extension adds the ability to create an OpenCL program object from a -Standard Portable Intermediate Representation (SPIR) instance. -A SPIR instance is a vendor-neutral non-source representation for OpenCL C -programs. - -The extension name is *cl_khr_spir*. -This extension has been superseded by the SPIR-V intermediate -representation, which is supported by the *cl_khr_il_program* extension, -and is a core feature in OpenCL 2.1. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_spir-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -*Add a new device property to _table 4.3_ in _section 4.2_:* - -.List of supported param_names by {clGetDeviceInfo} -[cols="2,1,4",options="header",] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_SPIR_VERSIONS} -| {char_TYPE}[] -| A space separated list of SPIR versions supported by the device. - - For example, returning `"1.2"` in this query implies that SPIR version 1.2 - is supported by the implementation. - -|==== - -[[cl_khr_spir-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -*Additions to _section 5.8.1_ -- Creating Program Objects:* - -"{clCreateProgramWithBinary} can be used to load a SPIR binary. -Once a program object has been created from a SPIR binary, {clBuildProgram} -can be called to build a program executable or {clCompileProgram} can be -called to compile the SPIR binary." - -Modify the {CL_PROGRAM_BINARY_TYPE} entry in _table 5.14_ -for {clGetProgramBuildInfo} to add a potential value -{CL_PROGRAM_BINARY_TYPE_INTERMEDIATE}: - -.List of supported param_names by {clGetProgramBuildInfo} -[cols="2,1,4",options="header",] -|==== -| Program Build Info -| Return Type -| Description - -| {CL_PROGRAM_BINARY_TYPE} -| {cl_program_binary_type_TYPE} -| {CL_PROGRAM_BINARY_TYPE_INTERMEDIATE} -- An intermediate (non-source) - representation for the program is loaded as a binary. - The program must be further processed with {clCompileProgram} or - {clBuildProgram}. - - If processed with {clCompileProgram}, the result will be a binary of type - {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT} or {CL_PROGRAM_BINARY_TYPE_LIBRARY}. - If processed with {clBuildProgram}, the result will be a binary of type - {CL_PROGRAM_BINARY_TYPE_EXECUTABLE}. - -|==== - -*Additions to _section 5.8.4_ -- Compiler Options:* - -"The compile option `-x spir` must be specified to indicate that the binary -is in SPIR format, and the compile option `-spir-std` must be used to -specify the version of the SPIR specification that describes the format and -meaning of the binary. -For example, if the binary is as described in SPIR version 1.2, then -`-spir-std=1.2` must be specified. -Failing to specify these compile options may result in implementation-defined -behavior." - -*Additions to _section 5.8.5_ -- Separate Compilation and Linking of Programs:* - -Replace this error for {clCompileProgram}: - - * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it - has not been created with {clCreateProgramWithSource} or - {clCreateProgramWithIL}. - -with: - - * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it - has not been created with {clCreateProgramWithSource} or - {clCreateProgramWithIL} or {clCreateProgramWithBinary} where `-x spir` is present in _options_. - -*Additions to _section 5.9.3_ -- Kernel Object Queries:* - -Modify following text in {clGetKernelArgInfo} from: - -"Kernel argument information is only available if the program object -associated with _kernel_ is created with {clCreateProgramWithSource} and the -program executable is built with the -cl-kernel-arg-info option specified in -_options_ argument to {clBuildProgram} or {clCompileProgram}." - -to: - -"Kernel argument information is only available if the program object -associated with _kernel_ is created with {clCreateProgramWithSource} and the -program executable is built with the `-cl-kernel-arg-info option` specified in -_options_ argument to {clBuildProgram} or {clCompileProgram}, or if the -program object associated with _kernel_ is created with -{clCreateProgramWithBinary} and the program executable is built with the -`-cl-kernel-arg-info` and `-x spir` options specified in _options_ argument to -{clBuildProgram} or {clCompileProgram}." diff --git a/ext/cl_khr_srgb_image_writes.asciidoc b/ext/cl_khr_srgb_image_writes.asciidoc deleted file mode 100644 index 63c7444f7..000000000 --- a/ext/cl_khr_srgb_image_writes.asciidoc +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_srgb_image_writes]] -== sRGB Image Writes - -This section describes the *cl_khr_srgb_image_writes* extension. - -This extension enables kernels to write to sRGB images using the *write_imagef* built-in function. -The sRGB image formats that may be written to will be returned by {clGetSupportedImageFormats}. - -When the image is an sRGB image, the *write_imagef* built-in function will perform the linear to sRGB conversion. -Only the R, G, and B components are converted from linear to sRGB; the A component is written as-is. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== diff --git a/ext/cl_khr_subgroup_extensions.asciidoc b/ext/cl_khr_subgroup_extensions.asciidoc deleted file mode 100644 index 9f131cb0d..000000000 --- a/ext/cl_khr_subgroup_extensions.asciidoc +++ /dev/null @@ -1,1071 +0,0 @@ -== Extended Sub-group Functions - -[[extended-sub-groups]] -=== Overview - -This section describes a family of extensions that provide extended sub-group functionality. -The extensions in this family are: - -* `cl_khr_subgroup_extended_types` -* `cl_khr_subgroup_non_uniform_vote` -* `cl_khr_subgroup_ballot` -* `cl_khr_subgroup_non_uniform_arithmetic` -* `cl_khr_subgroup_shuffle` -* `cl_khr_subgroup_shuffle_relative` -* `cl_khr_subgroup_clustered_reduce` - -The functionality added by these extensions includes: - -* Additional data type support for sub-group broadcast, scan, and reduction functions; -* The ability to elect a single work item from a sub-group to perform a task; -* The ability to hold votes among work items in a sub-group; -* The ability to collect and operate on ballots from work items in the sub-group; -* The ability to use some sub-group functions, such as any, all, broadcasts, scans, and reductions within non-uniform flow control; -* Additional scan and reduction operators; -* Additional ways to exchange data among work items in a sub-group; -* Clustered reductions, that operate on a subset of work items in the sub-group. - -This section describes changes to the OpenCL C Language for these extensions. -There are no new API functions or enums added by these extensions. - -=== General Information - -==== Version History - -For all of the extensions described in this section: - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-12-15 | 1.0.0 | First assigned version. -|==== - -[[extended-sub-groups-summary]] -=== Summary of New OpenCL C Functions - -[source,opencl_c] ----- -// These functions are available to devices supporting -// cl_khr_subgroup_extended_types: - -// Note: Existing functions supporting additional data types. - -gentype sub_group_broadcast( gentype value, uint index ) - -gentype sub_group_reduce_add( gentype value ) -gentype sub_group_reduce_min( gentype value ) -gentype sub_group_reduce_max( gentype value ) - -gentype sub_group_scan_inclusive_add( gentype value ) -gentype sub_group_scan_inclusive_min( gentype value ) -gentype sub_group_scan_inclusive_max( gentype value ) - -gentype sub_group_scan_exclusive_add( gentype value ) -gentype sub_group_scan_exclusive_min( gentype value ) -gentype sub_group_scan_exclusive_max( gentype value ) - -// These functions are available to devices supporting -// cl_khr_subgroup_non_uniform_vote: - -int sub_group_elect() - -int sub_group_non_uniform_all( int predicate ) -int sub_group_non_uniform_any( int predicate ) -int sub_group_non_uniform_all_equal( gentype value ) - -// These functions are available to devices supporting -// cl_khr_subgroup_ballot: - -gentype sub_group_non_uniform_broadcast( gentype value, uint index ) -gentype sub_group_broadcast_first( gentype value ) - -uint4 sub_group_ballot( int predicate ) -int sub_group_inverse_ballot( uint4 value ) -int sub_group_ballot_bit_extract( uint4 value, uint index ) -uint sub_group_ballot_bit_count( uint4 value ) -uint sub_group_ballot_inclusive_scan( uint4 value ) -uint sub_group_ballot_exclusive_scan( uint4 value ) -uint sub_group_ballot_find_lsb( uint4 value ) -uint sub_group_ballot_find_msb( uint4 value ) - -uint4 get_sub_group_eq_mask() -uint4 get_sub_group_ge_mask() -uint4 get_sub_group_gt_mask() -uint4 get_sub_group_le_mask() -uint4 get_sub_group_lt_mask() - -// These functions are available to devices supporting -// cl_khr_subgroup_non_uniform_arithmetic: - -gentype sub_group_non_uniform_reduce_add( gentype value ) -gentype sub_group_non_uniform_reduce_mul( gentype value ) -gentype sub_group_non_uniform_reduce_min( gentype value ) -gentype sub_group_non_uniform_reduce_max( gentype value ) -gentype sub_group_non_uniform_reduce_and( gentype value ) -gentype sub_group_non_uniform_reduce_or( gentype value ) -gentype sub_group_non_uniform_reduce_xor( gentype value ) -int sub_group_non_uniform_reduce_logical_and( int predicate ) -int sub_group_non_uniform_reduce_logical_or( int predicate ) -int sub_group_non_uniform_reduce_logical_xor( int predicate ) - -gentype sub_group_non_uniform_scan_inclusive_add( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_mul( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_min( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_max( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_and( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_or( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_xor( gentype value ) -int sub_group_non_uniform_scan_inclusive_logical_and( int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_or( int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ) - -gentype sub_group_non_uniform_scan_exclusive_add( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_mul( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_min( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_max( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_and( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_or( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_xor( gentype value ) -int sub_group_non_uniform_scan_exclusive_logical_and( int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_or( int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ) - -// These functions are available to devices supporting -// cl_khr_subgroup_shuffle: - -gentype sub_group_shuffle( gentype value, uint index ) -gentype sub_group_shuffle_xor( gentype value, uint mask ) - -// These functions are available to devices supporting -// cl_khr_subgroup_shuffle_relative: - -gentype sub_group_shuffle_up( gentype value, uint delta ) -gentype sub_group_shuffle_down( gentype value, uint delta ) - -// These functions are available to devices supporting -// cl_khr_subgroup_clustered_reduce: - -gentype sub_group_clustered_reduce_add( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_mul( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_min( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_max( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_and( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_or( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_xor( gentype value, uint clustersize ) -int sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ) ----- - -[[cl_khr_subgroup_extended_types]] -=== Extended Types - -This section describes functionality added by `cl_khr_subgroup_extended_types`. -This extension adds additional supported data types to the existing sub-group broadcast, scan, and reduction functions. - -==== Modify the Existing Section Describing Sub-group Functions - -Modify the first paragraph in this section that describes `gentype` type support for the sub-group `broadcast`, `scan`, and `reduction` functions to add scalar `char`, `uchar`, `short`, and `ushort` support, and to additionally add built-in vector type support for `broadcast` specifically. -The functions in the table and their descriptions remain unchanged by this extension: - -The table below describes OpenCL C programming language built-in functions that operate on a sub-group level. -These built-in functions must be encountered by all work items in the sub-group executing the kernel. -We use the generic type name `gentype` to indicate the built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -For the `sub_group_broadcast` function, the generic type name `gentype` may additionally be one of the supported built-in vector data types `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, `ulong__n__`, `float__n__`, `double__n__` (if double precision is supported), or `half__n__` (if half precision is supported). - -[[cl_khr_subgroup_non_uniform_vote]] -=== Votes and Elections - -This section describes functionality added by `cl_khr_subgroup_non_uniform_vote`. -This extension adds the ability to elect a single work item from a sub-group to perform a task and to hold votes among work items in a sub-group. - -==== Add a new Section 6.15.X - Sub-group Vote and Elect Built-in Functions - -The table below describes the OpenCL C programming language built-in functions to elect a single work item in a sub-group to perform a task and to collectively vote to determine a boolean condition for the sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -int sub_group_elect() ----- -| Elects a single work item in the sub-group to perform a task. -This function will return true (nonzero) for the active work item in the sub-group with the smallest sub-group local ID, and false (zero) for all other active work items in the sub-group. - -|[source,opencl_c] ----- -int sub_group_non_uniform_all( - int predicate ) ----- -| Examines _predicate_ for all active work items in the sub-group and returns a non-zero value if _predicate_ is non-zero for all active work items in the sub-group and zero otherwise. - -Note: This behavior is the same as `sub_group_all` from `cl_khr_subgroups` and OpenCL 2.1, except this function need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -int sub_group_non_uniform_any( - int predicate ) ----- -| Examines _predicate_ for all active work items in the sub-group and returns a non-zero value if _predicate_ is non-zero for any active work item in the sub-group and zero otherwise. - -Note: This behavior is the same as `sub_group_any` from `cl_khr_subgroups` and OpenCL 2.1, except this function need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -int sub_group_non_uniform_all_equal( - gentype value ) ----- -| Examines _value_ for all active work items in the sub-group and returns a non-zero value if _value_ is equivalent for all active invocations in the sub-group and zero otherwise. - -Integer types use a bitwise test for equality. Floating-point types use an ordered floating-point test for equality. - -|======================================================================= - -[[cl_khr_subgroup_ballot]] -=== Ballots - -This section describes functionality added by `cl_khr_subgroup_ballot`. -This extension adds the ability to collect and operate on ballots from work items in the sub-group. - -==== Add a new Section 6.15.X - Sub-group Ballot Built-in Functions - -The table below describes the OpenCL C programming language built-in functions to allow work items in a sub-group to collect and operate on ballots from work items in the sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. - -For the `sub_group_non_uniform_broadcast` and `sub_group_broadcast_first` functions, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -For the `sub_group_non_uniform_broadcast` function, the generic type name `gentype` may additionally be one of the supported built-in vector data types `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, `ulong__n__`, `float__n__`, `double__n__` (if double precision is supported), or `half__n__` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_broadcast( - gentype value, - uint index ) ----- -| Returns _value_ for the work item with sub-group local ID equal to _index_. - -Behavior is undefined when the value of _index_ is not equivalent for all active work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to _index_ is inactive or if _index_ is greater than or equal to the size of the sub-group. - -|[source,opencl_c] ----- -gentype sub_group_broadcast_first( - gentype value ) ----- -| Returns _value_ for the work item with the smallest sub-group local ID among active work items in the sub-group. - -|[source,opencl_c] ----- -uint4 sub_group_ballot( - int predicate ) ----- -| Returns a bitfield combining the _predicate_ values from all work items in the sub-group. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. -The representative bit in the bitfield is set if the work item is active and the _predicate_ is non-zero, and is unset otherwise. - -|[source,opencl_c] ----- -int sub_group_inverse_ballot( - uint4 value ) ----- -| Returns the predicate value for this work item in the sub-group from the bitfield _value_ representing predicate values from all work items in the sub-group. -The predicate return value will be non-zero if the bit in the bitfield _value_ for this work item is set, and zero otherwise. - -Behavior is undefined when _value_ is not equivalent for all active work items in the sub-group. - -This is a specialized function that may perform better than the equivalent `sub_group_ballot_bit_extract` on some implementations. - -|[source,opencl_c] ----- -int sub_group_ballot_bit_extract( - uint4 value, - uint index ) ----- -| Returns the predicate value for the work item with sub-group local ID equal to _index_ from the bitfield _value_ representing predicate values from all work items in the sub-group. -The predicate return value will be non-zero if the bit in the bitfield _value_ for the work item with sub-group local ID equal to _index_ is set, and zero otherwise. - -The predicate return value is undefined if the work item with sub-group local ID equal to _index_ is greater than or equal to the size of the sub-group. - -|[source,opencl_c] ----- -uint sub_group_ballot_bit_count( - uint4 value ) ----- -| Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ that represent predicate values corresponding to sub-group local IDs less than the maximum sub-group size within the dispatch (as returned by `get_max_sub_group_size`). - -|[source,opencl_c] ----- -uint sub_group_ballot_inclusive_scan( - uint4 value ) ----- -| Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ representing work items with a sub-group local ID less than or equal to this work item's sub-group local ID. - -|[source,opencl_c] ----- -uint sub_group_ballot_exclusive_scan( - uint4 value ) ----- -| Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ representing work items with a sub-group local ID less than this work item's sub-group local ID. - -|[source,opencl_c] ----- -uint sub_group_ballot_find_lsb( - uint4 value ) ----- -| Returns the smallest sub-group local ID with a bit set in the bitfield _value_, only considering the bits in _value_ that represent predicate values corresponding to sub-group local IDs less than the maximum sub-group size within the dispatch (as returned by `get_max_sub_group_size`). -If no bits representing predicate values from all work items in the sub-group are set in the bitfield _value_ then the return value is undefined. - -|[source,opencl_c] ----- -uint sub_group_ballot_find_msb( - uint4 value ) ----- -| Returns the largest sub-group local ID with a bit set in the bitfield _value_, only considering the bits in _value_ that represent predicate values corresponding to sub-group local IDs less than the maximum sub-group size within the dispatch (as returned by `get_max_sub_group_size`). -If no bits representing predicate values from all work items in the sub-group are set in the bitfield _value_ then the return value is undefined. - -|[source,opencl_c] ----- -uint4 get_sub_group_eq_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index equals the sub-group local ID and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_ge_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is greater than or equal to the sub-group local ID and less than the maximum sub-group size, and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_gt_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is greater than the sub-group local ID and less than the maximum sub-group size, and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_le_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is less than or equal to the sub-group local ID and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_lt_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is less than the sub-group local ID and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|======================================================================= - -[[cl_khr_subgroup_non_uniform_arithmetic]] -=== Non-Uniform Arithmetic - -This section describes functionality added by `cl_khr_subgroup_non_uniform_arithmetic`. -This extension adds the ability to use some sub-group functions within non-uniform flow control, including additional scan and reduction operators. - -==== Add a new Section 6.15.X - Non Uniform Sub-group Scan and Reduction Built-in Functions - -===== Arithmetic Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple arithmetic operations across work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="3a,2",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_reduce_add( - gentype value ) -gentype sub_group_non_uniform_reduce_min( - gentype value ) -gentype sub_group_non_uniform_reduce_max( - gentype value ) -gentype sub_group_non_uniform_reduce_mul( - gentype value ) ----- -| Returns the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group. - -Note: This behavior is the same as the *add*, *min*, and *max* reduction built-in functions from `cl_khr_subgroups` and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_inclusive_add( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_min( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_max( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_mul( - gentype value ) ----- -| Returns the result of an inclusive scan operation, which is the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group with a sub-group local ID less than or equal to this work item's sub-group local ID. - -Note: This behavior is the same as the *add*, *min*, and *max* inclusive scan built-in functions from `cl_khr_subgroups` and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_exclusive_add( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_min( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_max( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_mul( - gentype value ) ----- -| Returns the result of an exclusive scan operation, which is the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group with a sub-group local ID less than this work item's sub-group local ID. - -If there is no active work item in the sub-group with a sub-group local ID less than this work item's sub-group local ID then an identity value `I` is returned. -For *add*, the identity value is `0`. -For *min*, the identity value is the largest representable value for integer types, or `+INF` for floating-point types. -For *max*, the identity value is the minimum representable value for integer types, or `-INF` for floating-point types. -For *mul*, the identity value is `1`. - -Note: This behavior is the same as the *add*, *min*, and *max* exclusive scan built-in functions from `cl_khr_subgroups` and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. - -|======================================================================= - -Note: The order of floating-point operations is not guaranteed for the sub-group scan and reduction built-in functions that operate on floating-point types, and the order of operations may additionally be non-deterministic for a given sub-group. - -===== Bitwise Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple bitwise integer operations across work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, and `ulong`. - -[cols="3a,2",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_reduce_and( - gentype value ) -gentype sub_group_non_uniform_reduce_or( - gentype value ) -gentype sub_group_non_uniform_reduce_xor( - gentype value ) ----- -| Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_inclusive_and( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_or( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_xor( - gentype value ) ----- -| Returns the result of an inclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group with a sub-group local ID less than or equal to this work item's sub-group local ID. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_exclusive_and( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_or( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_xor( - gentype value ) ----- -| Returns the result of an exclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group with a sub-group local ID less than this work item's sub-group local ID. - -If there is no active work item in the sub-group with a sub-group local ID less than this work item's sub-group local ID then an identity value `I` is returned. -For *and*, the identity value is `~0` (all bits set). -For *or* and *xor*, the identity value is `0`. - -|======================================================================= - -===== Logical Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple logical operations across work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For these functions, a non-zero _predicate_ argument or return value is logically `true` and a zero _predicate_ argument or return value is logically `false`. - -[cols="2a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -int sub_group_non_uniform_reduce_logical_and( - int predicate ) -int sub_group_non_uniform_reduce_logical_or( - int predicate ) -int sub_group_non_uniform_reduce_logical_xor( - int predicate ) ----- -| Returns the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group. - -|[source,opencl_c] ----- -int sub_group_non_uniform_scan_inclusive_logical_and( - int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_or( - int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_xor( - int predicate ) ----- -| Returns the result of an inclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group with a sub-group local ID less than or equal to this work item's sub-group local ID. - -|[source,opencl_c] ----- -int sub_group_non_uniform_scan_exclusive_logical_and( - int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_or( - int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_xor( - int predicate ) ----- -| Returns the result of an exclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group with a sub-group local ID less than this work item's sub-group local ID. - -If there is no active work item in the sub-group with a sub-group local ID less than this work item's sub-group local ID then an identity value `I` is returned. -For *and*, the identity value is `true` (non-zero). -For *or* and *xor*, the identity value is `false` (zero). - -|======================================================================= - -[[cl_khr_subgroup_shuffle]] -=== General Purpose Shuffles - -This section describes functionality added by `cl_khr_subgroup_shuffle`. -This extension adds additional ways to exchange data among work items in a sub-group. - -==== Add a new Section 6.15.X - Sub-group Shuffle Built-in Functions - -The table below describes the OpenCL C programming language built-in functions that allow work items in a sub-group to exchange data. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_shuffle( - gentype value, uint index ) ----- -| Returns _value_ for the work item with sub-group local ID equal to _index_. -The shuffle _index_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to _index_ is inactive or if _index_ is greater than or equal to the size of the sub-group. - -|[source,opencl_c] ----- -gentype sub_group_shuffle_xor( - gentype value, uint mask ) ----- -| Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID xor'd with _mask_. -The shuffle _mask_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to the calculated index is inactive or if the calculated index is greater than or equal to the size of the sub-group. - -This is a specialized function that may perform better than the equivalent `sub_group_shuffle` on some implementations. - -|======================================================================= - -[[cl_khr_subgroup_shuffle_relative]] -=== Relative Shuffles - -This section describes functionality added by `cl_khr_subgroup_shuffle_relative`. -This extension adds specialized ways to exchange data among work items in a sub-group that may perform better on some implementations. - -==== Add a new Section 6.15.X - Sub-group Relative Shuffle Built-in Functions - -The table below describes specialized OpenCL C programming language built-in functions that allow work items in a sub-group to exchange data. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_shuffle_up( - gentype value, uint delta ) ----- -| Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID minus _delta_. -The shuffle _delta_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to the calculated index is inactive, or _delta_ is greater than this work item's sub-group local ID. - -This is a specialized function that may perform better than the equivalent `sub_group_shuffle` on some implementations. - -|[source,opencl_c] ----- -gentype sub_group_shuffle_down( - gentype value, uint delta ) ----- -| Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID plus _delta_. -The shuffle _delta_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to the calculated index is inactive, or this work item's sub-group local ID plus _delta_ is greater than or equal to the size of the sub-group. - -This is a specialized function that may perform better than the equivalent `sub_group_shuffle` on some implementations. - -|======================================================================= - -[[cl_khr_subgroup_clustered_reduce]] -=== Clustered Reductions - -This section describes functionality added by `cl_khr_subgroup_clustered_reduce`. -This extension adds support for clustered reductions that operate on a subset of work items in the sub-group. - -==== Add a new Section 6.15.X - Sub-group Clustered Reduction Built-in Functions - -This section describes arithmetic operations that are performed on a subset of work items in a sub-group, referred to as a cluster. -A cluster is described by a specified cluster size. -Work items in a sub-group are assigned to clusters such that for cluster size _n_, the _n_ work items in the sub-group with the smallest sub-group local IDs are assigned to the first cluster, then the _n_ remaining work items with the smallest sub-group local IDs are assigned to the next cluster, and so on. -Behavior is undefined if the specified cluster size is not an integer constant expression, is not a power-of-two, or is greater than the maximum size of a sub-group within the dispatch. - -===== Arithmetic Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple arithmetic operations on a cluster of work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_clustered_reduce_add( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_mul( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_min( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_max( - gentype value, uint clustersize ) ----- -| Returns the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group within a cluster of the specified _clustersize_. - -|======================================================================= - -Note: The order of floating-point operations is not guaranteed for the sub-group clustered reduction built-in functions that operate on floating-point types, and the order of operations may additionally be non-deterministic for a given sub-group. - -===== Bitwise Operations - -The table below describes the OpenCL C programming language built-in functions to perform simple bitwise integer operations across a cluster of work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, or `ulong`. - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_clustered_reduce_and( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_or( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_xor( - gentype value, uint clustersize ) ----- -| Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group within a cluster of the specified _clustersize_. - -|======================================================================= - -===== Logical Operations - -The table below describes the OpenCL C programming language built-in functions to perform simple logical operations across a cluster of work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For these functions, a non-zero _predicate_ argument or return value is logically `true` and a zero _predicate_ argument or return value is logically `false`. - -[cols="3a,2",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -int sub_group_clustered_reduce_logical_and( - int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_or( - int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_xor( - int predicate, uint clustersize ) ----- -| Returns the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group within a cluster of the specified _clustersize_. - -|======================================================================= - -[[extended-sub-groups-mapping]] -=== Function Mapping and Capabilities - -This section describes a possible mapping between OpenCL built-in functions and SPIR-V instructions and required SPIR-V capabilities. - -This section is informational and non-normative. - -// Note: the Unicode "zero with space" (​) causes long function names to break much more sensibly. - -[cols="1,1,1",options="header"] -|======================================================================= -|*OpenCL C Function* -|*SPIR-V BuiltIn or Instruction* -|*Enabling SPIR-V Capability* - -3+| For OpenCL 2.1 or `cl_khr_subgroups`: - -| `get_​sub_​group_​size` - | *SubgroupSize* - | *Kernel* -| `get_​max_​sub_​group_​size` - | *SubgroupMaxSize* - | *Kernel* -| `get_​num_​sub_​groups` - | *NumSubgroups* - | *Kernel* -| `get_​enqueued_​num_​sub_​groups` - | *NumEnqueuedSubgroups* - | *Kernel* -| `get_​sub_​group_​id` - | *SubgroupId* - | *Kernel* -| `get_​sub_​group_​local_​id` - | *SubgroupLocalInvocationId* - | *Kernel* - -| `sub_​group_​barrier` - | *OpControlBarrier* - | None Needed - -| `sub_​group_​all` - | *OpGroupAll* - | *Groups* -| `sub_​group_​any` - | *OpGroupAny* - | *Groups* - -| `sub_​group_​broadcast` - | *OpGroupBroadcast* - | *Groups* - -| `sub_​group_​reduce_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​reduce_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​reduce_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​exclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​exclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​exclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​inclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​inclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​inclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​reserve_​read_​pipe` - | *OpGroupReserveReadPipePackets* - | *Pipes* -| `sub_​group_​reserve_​write_​pipe` - | *OpGroupReserveReadWritePackets* - | *Pipes* -| `sub_​group_​commit_​read_​pipe` - | *OpGroupCommitReadPipe* - | *Pipes* -| `sub_​group_​commit_​write_​pipe` - | *OpGroupCommitWritePipe* - | *Pipes* - -| `get_​kernel_​sub_​group_​count_​for_​ndrange` - | *OpGetKernelNDrangeSubGroupCount* - | *DeviceEnqueue* -| `get_​kernel_​max_​sub_​group_​size_​for_​ndrange` - | *OpGetKernelNDrangeMaxSubGroupSize* - | *DeviceEnqueue* - -3+| For `cl_khr_subgroup_extended_types`: + -Note: This extension adds new types to uniform sub-group operations. - -| `sub_​group_​broadcast` - | *OpGroupBroadcast* - | *Groups* - -| `sub_​group_​reduce_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​reduce_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​reduce_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​exclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​exclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​exclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​inclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​inclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​inclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -3+| For `cl_khr_subgroup_non_uniform_vote`: - -| `sub_​group_​elect` - | *OpGroupNonUniformElect* - | *GroupNonUniform* -| `sub_​group_​non_​uniform_​all` - | *OpGroupNonUniformAll* - | *GroupNonUniformVote* -| `sub_​group_​non_​uniform_​any` - | *OpGroupNonUniformAny* - | *GroupNonUniformVote* -| `sub_​group_​non_​uniform_​all_​equal` - | *OpGroupNonUniformAllEqual* - | *GroupNonUniformVote* - -3+| For `cl_khr_subgroup_ballot`: - -| `sub_​group_​non_​uniform_​broadcast` - | *OpGroupNonUniformBroadcast* - | *GroupNonUniformBallot* -| `sub_​group_​broadcast_​first` - | *OpGroupNonUniformBroadcastFirst* - | *GroupNonUniformBallot* - -| `sub_​group_​ballot` - | *OpGroupNonUniformBallot* - | *GroupNonUniformBallot* -| `sub_​group_​inverse_​ballot` - | *OpGroupNonUniformInverseBallot* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​bit_​extract` - | *OpGroupNonUniformBallotBitExtract* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​bit_​count` - | *OpGroupNonUniformBallotBitCount* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​inclusive_​scan` - | *OpGroupNonUniformBallotBitCount* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​exclusive_​scan` - | *OpGroupNonUniformBallotBitCount* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​find_​lsb` - | *OpGroupNonUniformBallotFindLSB* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​find_​msb` - | *OpGroupNonUniformBallotFindMSB* - | *GroupNonUniformBallot* - -| `get_​sub_​group_​eq_​mask` - | *SubgroupEqMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​ge_​mask` - | *SubgroupGeMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​gt_​mask` - | *SubgroupGtMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​le_​mask` - | *SubgroupLeMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​lt_​mask` - | *SubgroupLtMask* - | *GroupNonUniformBallot* - -3+| For `cl_khr_subgroup_non_uniform_arithmetic`: - -| `sub_​group_​non_​uniform_​reduce_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformArithmetic* - -| `sub_​group_​non_​uniform_​scan_​inclusive_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformArithmetic* - -| `sub_​group_​non_​uniform_​scan_​exclusive_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformArithmetic* - -3+| For `cl_khr_subgroup_shuffle`: - -| `sub_​group_​shuffle` - | *OpGroupNonUniformShuffle* - | *GroupNonUniformShuffle* -| `sub_​group_​shuffle_​xor` - | *OpGroupNonUniformShuffleXor* - | *GroupNonUniformShuffle* - -3+| For `cl_khr_subgroup_shuffle_relative`: - -| `sub_​group_​shuffle_​up` - | *OpGroupNonUniformShuffleUp* - | *GroupNonUniformShuffleRelative* -| `sub_​group_​shuffle_​down` - | *OpGroupNonUniformShuffleDown* - | *GroupNonUniformShuffleRelative* - -3+| For `cl_khr_subgroup_clustered_reduce`: - -| `sub_​group_​clustered_​reduce_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformClustered* - -|======================================================================= diff --git a/ext/cl_khr_subgroup_named_barrier.asciidoc b/ext/cl_khr_subgroup_named_barrier.asciidoc deleted file mode 100644 index 08636a76e..000000000 --- a/ext/cl_khr_subgroup_named_barrier.asciidoc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_subgroup_named_barrier]] -== Named Barriers for Sub-groups - -This section describes the *cl_khr_subgroup_named_barrier* extension. -This extension adds barrier operations that cover subsets of an OpenCL -work-group. -Only the OpenCL API changes are described in this section. -Please refer to the SPIR-V specification for information about using -sub-groups named barriers in the SPIR-V intermediate representation, and to -the OpenCL {cpp} specification for descriptions of the sub-group named -barrier built-in functions in the OpenCL {cpp} kernel language. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Changes to OpenCL specification - -Add to _table 4.3_: - -.List of supported param_names by {clGetDeviceInfo} -[cols="2,1,3",options="header",] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR} -| {cl_uint_TYPE} -| Maximum number of named barriers in a work-group for any given - kernel-instance running on the device. - The minimum value is 8. - -|==== - diff --git a/ext/cl_khr_subgroup_rotate.asciidoc b/ext/cl_khr_subgroup_rotate.asciidoc deleted file mode 100644 index 337f98887..000000000 --- a/ext/cl_khr_subgroup_rotate.asciidoc +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2022-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_subgroup_rotate]] -== Sub-group Rotation - -This extension adds support for a new sub-group data exchange operation that -makes it possible to rotate values through the work items in a sub-group. - -=== General Information - -==== Name Strings - -`cl_khr_subgroup_rotate` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2022-04-22 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.10, -and OpenCL C Specification Version 3.0.10 and OpenCL Environment Specification -Version 3.0.10. - -This extension requires OpenCL 2.0. - -==== Contributors - -Kévin Petit, Arm Ltd. + -Ben Ashbaugh, Intel + -Ruihao Zhang, Qualcomm + -Sven van Haastregt, Arm Ltd. + -Anastasia Stulova, Arm Ltd. + -Stuart Brady, Arm Ltd. + - -=== New OpenCL C Functions - -This extension adds the following built-in function: - -[source,opencl_c] ----- -gentype sub_group_rotate(gentype value, int delta) -gentype sub_group_clustered_rotate(gentype value, int delta, uint clustersize) ----- - -=== Modifications to the OpenCL C Specification - -(Add a new section 6.15.x, *Sub-group Rotation*) :: -+ --- - -The following preprocessor definitions are added: - -[source,opencl_c] ----- -#define cl_khr_subgroup_rotate 1 ----- - -The table below describes a specialized OpenCL C programming language built-in -function that allow work items in a sub-group to exchange data. This function -need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the -supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, -`uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), -or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_rotate( - gentype value, int delta) ----- -| Returns _value_ for the work item with sub-group local ID equal to the remainder -of the division of the sum of this work item's sub-group local ID and _delta_ by -the maximum sub-group size. + -The value of _delta_ is required to be dynamically-uniform for all work items in -the sub-group, otherwise the behavior is undefined. - -The return value is undefined if the work item with sub-group local ID equal to the -calculated index is inactive. - -|[source,opencl_c] ----- -gentype sub_group_clustered_rotate( - gentype value, int delta, - uint clustersize) ----- -| Returns _value_ for the work item with sub-group local ID equal to the sum of, the -remainder of the division of the sum of this work item's ID within the cluster and -_delta_ by _clustersize_, and the sub-group local ID of the first work-item of the -cluster to which the work-item executing the function belongs. + -The value of _delta_ is required to be dynamically-uniform for all work items in -the sub-group, otherwise the behavior is undefined. - -_clustersize_ must be an integer constant expression and a power of two, smaller -than or equal to the maximum sub-group size, otherwise the behavior is undefined. - -The return value is undefined if the work item with sub-group local ID equal to the -calculated index is inactive. -|======================================================================= --- - -=== Modifications to the OpenCL SPIR-V Environment Specification - -See OpenCL SPIR-V Environment Specification. - -=== Interactions with Other Extensions - -If `cl_khr_il_program` is supported then the SPIR-V environment specification -modifications described above apply. - diff --git a/ext/cl_khr_subgroups.asciidoc b/ext/cl_khr_subgroups.asciidoc deleted file mode 100644 index ae479e9b7..000000000 --- a/ext/cl_khr_subgroups.asciidoc +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_subgroups]] -== Sub-groups - -This section describes the *cl_khr_subgroups* extension. - -This extension adds support for implementation-controlled groups of work items, known as sub-groups. -Sub-groups behave similarly to work-groups and have their own sets of built-ins and synchronization primitives. -Sub-groups within a work-group are independent, may make forward progress with respect to each other, and may map to optimized hardware structures where that makes sense. - -Sub-groups were promoted to a core feature in OpenCL 2.1, however note that: - -* The sub-group OpenCL C built-in functions described by this extension must still be accessed as an OpenCL C extension in OpenCL 2.1. -* Sub-group independent forward progress is an optional device property in OpenCL 2.1, see {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_subgroups-additions-to-chapter-3-of-the-opencl-2.0-specification]] -=== Additions to Chapter 3 of the OpenCL 2.0 Specification - -[[cl_khr_subgroups-additions-to-section-3.2-execution-model]] -=== Additions to section 3.2 -- Execution Model - -Within a work-group work-items may be divided into sub-groups. -The mapping of work-items to sub-groups is implementation-defined and may be queried at runtime. -While sub-groups may be used in multi-dimensional work-groups, each sub-group is 1-dimensional and any given work-item may query which sub-group it is a member of. - -Work items are mapped into sub-groups through a combination of compile-time decisions and the parameters of the dispatch. -The mapping to sub-groups is invariant for the duration of a kernel’s execution, across dispatches of a given kernel with the same launch parameters, and from one work-group to another within the dispatch (excluding the trailing edge work-groups in the presence of non-uniform work-group sizes). -In addition, all sub-groups within a work-group will be the same size, apart from the sub-group with the maximum index which may be smaller if the size of the work-group is not evenly divisible by the size of the sub-group. - -Sub-groups execute concurrently within a given work-group and make independent forward progress with respect to each other even in the absence of work-group barrier operations. -Sub-groups are able to internally synchronize using barrier operations without synchronizing with each other. - -In the degenerate case, with the extension enabled, a single sub-group must be supported for each work-group. -In this situation all sub-group scope functions alias their work-group level equivalents. - -[[cl_khr_subgroups-additions-to-chapter-5-of-the-opencl-2.0-specification]] -=== Additions to Chapter 5 of the OpenCL 2.0 Specification - -The function - -include::{generated}/api/protos/clGetKernelSubGroupInfoKHR.txt[] - -returns information about the kernel object. - -_kernel_ specifies the kernel object being queried. - -_device_ identifies a specific device in the list of devices associated with -_kernel_. -The list of devices is the list of devices in the OpenCL context that is -associated with _kernel_. -If the list of devices associated with _kernel_ is a single device, _device_ -can be a `NULL` value. - -_param_name_ specifies the information to query. -The list of supported _param_name_ types and the information returned in -_param_value_ by {clGetKernelSubGroupInfoKHR} is described in the -<> table. - -_input_value_size_ is used to specify the size in bytes of memory pointed to -by _input_value_. -This size must be == size of input type as described in the table below. - -_input_value_ is a pointer to memory where the appropriate parameterization -of the query is passed from. -If _input_value_ is `NULL`, it is ignored. - -_param_value_ is a pointer to memory where the appropriate result being -queried is returned. -If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ is used to specify the size in bytes of memory pointed to -by _param_value_. -This size must be {geq} size of return type as described in the -<> table. - -_param_value_size_ret_ returns the actual size in bytes of data being -queried by _param_name_. -If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_subgroups-kernel-sub-group-info-table]] -.List of supported param_names by {clGetKernelSubGroupInfoKHR} -[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] -|==== -| Kernel Sub-group Info | Input Type | Return Type | Description -| {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR} - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the maximum sub-group size for this kernel. - All sub-groups must be the same size, while the last sub-group in - any work-group (i.e. the sub-group with the maximum index) could - be the same or smaller size. - - The _input_value_ must be an array of size_t values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -| {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR} - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the number of sub-groups that will be present in each - work-group for a given local work size. - All workgroups, apart from the last work-group in each dimension - in the presence of non-uniform work-group sizes, will have the - same number of sub-groups. - - The _input_value_ must be an array of size_t values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -|==== - -{clGetKernelSubGroupInfoKHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _kernel_ or if _device_ is `NULL` but there is more than one device - associated with _kernel_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table - and _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if _param_name_ is - {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR} and the size in bytes specified by - _input_value_size_ is not valid or if _input_value_ is `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_subgroups-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 C Specification - -[[cl_khr_subgroups-additions-to-section-6.13.1-work-item-functions]] -==== Additions to section 6.13.1 -- Work Item Functions - -[cols="a,",options="header",] -|==== -| *Function* -| *Description* - -| uint *get_sub_group_size* () -| Returns the number of work items in the sub-group. - This value is no more than the maximum sub-group size and is - implementation-defined based on a combination of the compiled kernel and - the dispatch dimensions. - This will be a constant value for the lifetime of the sub-group. - -| uint *get_max_sub_group_size* () -| Returns the maximum size of a sub-group within the dispatch. - This value will be invariant for a given set of dispatch dimensions and a - kernel object compiled for a given device. - -| uint *get_num_sub_groups* () -| Returns the number of sub-groups that the current work-group is divided - into. - - This number will be constant for the duration of a work-group's execution. - If the kernel is executed with a non-uniform work-group size - (i.e. the global_work_size values specified to {clEnqueueNDRangeKernel} - are not evenly divisible by the local_work_size values for any dimension, - calls to this built-in from some work-groups may return different values - than calls to this built-in from other work-groups. - -| uint *get_enqueued_num_sub_groups* () -| Returns the same value as that returned by *get_num_sub_groups* if the - kernel is executed with a uniform work-group size. - - If the kernel is executed with a non-uniform work-group size, returns the - number of sub-groups in each of the work-groups that make up the uniform - region of the global range. - -| uint *get_sub_group_id* () -| *get_sub_group_id* returns the sub-group ID which is a number from 0 .. - *get_num_sub_groups*() - 1. - - For {clEnqueueTask}, this returns 0. - -| uint *get_sub_group_local_id* () -| Returns the unique work item ID within the current sub-group. - The mapping from *get_local_id*(__dimindx__) to *get_sub_group_local_id* - will be invariant for the lifetime of the work-group. - -|==== - -[[cl_khr_subgroups-additions-to-section-6.13.8-synchronization-functions]] -==== Additions to section 6.13.8 -- Synchronization Functions - -[cols="3,7",options="header",] -|==== -| *Function* -| *Description* - -| void **sub_group_barrier** ( + - cl_mem_fence_flags _flags_) - - void **sub_group_barrier** ( + - cl_mem_fence_flags _flags_, memory_scope _scope_) - -| All work items in a sub-group executing the kernel on a processor must - execute this function before any are allowed to continue execution beyond - the sub-group barrier. - This function must be encountered by all work items in a sub-group - executing the kernel. - These rules apply to ND-ranges implemented with uniform and non-uniform - work-groups. - - If *sub_group_barrier* is inside a conditional statement, then all work - items within the sub-group must enter the conditional if any work item in - the sub-group enters the conditional statement and executes the - sub_group_barrier. - - If *sub_group_barrier* is inside a loop, all work items within the sub-group - must execute the sub_group_barrier for each iteration of the loop before - any are allowed to continue execution beyond the sub_group_barrier. - - The *sub_group_barrier* function also queues a memory fence (reads and - writes) to ensure correct ordering of memory operations to local or global - memory. - - The flags argument specifies the memory address space and can be set to a - combination of the following values: - - CLK_LOCAL_MEM_FENCE - The *sub_group_barrier* function will either flush - any variables stored in local memory or queue a memory fence to ensure - correct ordering of memory operations to local memory. - - CLK_GLOBAL_MEM_FENCE -- The *sub_group_barrier* function will queue a - memory fence to ensure correct ordering of memory operations to global - memory. - This can be useful when work items, for example, write to buffer objects - and then want to read the updated data from these buffer objects. - - CLK_IMAGE_MEM_FENCE -- The *sub_group_barrier* function will queue a memory - fence to ensure correct ordering of memory operations to image objects. - This can be useful when work items, for example, write to image objects - and then want to read the updated data from these image objects. - -|==== - -[[cl_khr_subgroups-additions-to-section-6.13.11-atomic-functions]] -==== Additions to section 6.13.11 -- Atomic Functions - -Add the following new value to the enumerated type `memory_scope` defined in -_section 6.13.11.4_. - ----- -memory_scope_sub_group ----- - -The `memory_scope_sub_group` specifies that the memory ordering constraints -given by `memory_order` apply to work items in a sub-group. -This memory scope can be used when performing atomic operations to global or -local memory. - -[[cl_khr_subgroups-add-a-new-section-6.13.X-sub-group-functions]] -==== Add a new section 6.13.X -- Sub-group Functions - -The table below describes OpenCL C programming language built-in functions that operate on a sub-group level. -These built-in functions must be encountered by all work items in the sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| int *sub_group_all* (int _predicate_) -| Evaluates _predicate_ for all work items in the sub-group and returns a - non-zero value if _predicate_ evaluates to non-zero for all work items in - the sub-group. - -| int *sub_group_any* (int _predicate_) -| Evaluates _predicate_ for all work items in the sub-group and returns a - non-zero value if _predicate_ evaluates to non-zero for any work items in - the sub-group. - -| gentype *sub_group_broadcast* ( + - gentype _x_, uint _sub_group_local_id_) -| Broadcast the value of _x_ for work item identified by - _sub_group_local_id_ (value returned by *get_sub_group_local_id*) to all - work items in the sub-group. - - _sub_group_local_id_ must be the same value for all work items in the - sub-group. - -| gentype *sub_group_reduce_* ( + - gentype _x_) -| Return result of reduction operation specified by ** for all values of - _x_ specified by work items in a sub-group. - -| gentype *sub_group_scan_exclusive_* ( + - gentype _x_) -| Do an exclusive scan operation specified by ** of all values specified - by work items in a sub-group. - The scan results are returned for each work item. - - The scan order is defined by increasing sub-group local ID within the - sub-group. - -| gentype *sub_group_scan_inclusive_* ( + - gentype _x_) -| Do an inclusive scan operation specified by ** of all values specified - by work items in a sub-group. - The scan results are returned for each work item. - - The scan order is defined by increasing sub-group local ID within the - sub-group. - -|==== - -The ** in *sub_group_reduce_*, *sub_group_scan_inclusive_* and *sub_group_scan_exclusive_* defines the operator and can be *add*, *min* or *max*. - -The exclusive scan operation takes a binary operator *op* with an identity I and _n_ (where _n_ is the size of the sub-group) elements [a~0~, a~1~, ... a~n-1~] and returns [I, a~0~, (a~0~ *op* a~1~), ... (a~0~ *op* a~1~ *op* ... *op* a~n-2~)]. - -The inclusive scan operation takes a binary operator *op* with _n_ (where _n_ is the size of the sub-group) elements [a~0~, a~1~, ... a~n-1~] and returns [a~0~, (a~0~ *op* a~1~), ... (a~0~ *op* a~1~ *op* ... *op* a~n-1~)]. - -If *op* = *add*, the identity I is 0. -If *op* = *min*, the identity I is `INT_MAX`, `UINT_MAX`, `LONG_MAX`, `ULONG_MAX`, for `int`, `uint`, `long`, `ulong` types and is `+INF` for -floating-point types. -Similarly if *op* = max, the identity I is `INT_MIN`, 0, `LONG_MIN`, 0 and `-INF`. - -[NOTE] -==== -The order of floating-point operations is not guaranteed for the *sub_group_reduce_*, *sub_group_scan_inclusive_* and *sub_group_scan_exclusive_* built-in functions that operate on `half`, `float` and `double` data types. -The order of these floating-point operations is also non-deterministic for a given sub-group. -==== - -[[cl_khr_subgroups-additions-to-section-6.13.16-pipe-functions]] -==== Additions to section 6.13.16 -- Pipe Functions - -The OpenCL C programming language implements the following built-in pipe -functions that operate at a sub-group level. -These built-in functions must be encountered by all work items in a sub-group -executing the kernel with the same argument values; otherwise the behavior -is undefined. -We use the generic type name `gentype` to indicate the built-in OpenCL C -scalar or vector integer or floating-point data types or any user defined -type built from these scalar and vector data types can be used as the type -for the arguments to the pipe functions listed in _table 6.29_. - -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| reserve_id_t *sub_group_reserve_read_pipe* ( + - read_only pipe gentype _pipe_, + - uint _num_packets_) - - reserve_id_t *sub_group_reserve_write_pipe* ( + - write_only pipe gentype _pipe_, + - uint _num_packets_) -| Reserve _num_packets_ entries for reading from or writing to _pipe_. - Returns a valid non-zero reservation ID if the reservation is successful - and 0 otherwise. - - The reserved pipe entries are referred to by indices that go from 0 ... - _num_packets_ - 1. - -| void *sub_group_commit_read_pipe* ( + - read_only pipe gentype _pipe_, + - reserve_id_t _reserve_id_) - - void *sub_group_commit_write_pipe* ( + - write_only pipe gentype _pipe_, + - reserve_id_t _reserve_id_) -| Indicates that all reads and writes to _num_packets_ associated with - reservation _reserve_id_ are completed. - -|==== - -Note: Reservations made by a sub-group are ordered in the pipe as they are -ordered in the program. -Reservations made by different sub-groups that belong to the same work-group -can be ordered using sub-group synchronization. -The order of sub-group based reservations that belong to different work -groups is implementation-defined. - -[[cl_khr_subgroups-additions-to-section-6.13.17.6-enqueuing-kernels-kernel-query-functions]] -==== Additions to section 6.13.17.6 -- Enqueuing Kernels (Kernel Query Functions) - -[cols="5,4",options="header",] -|==== -| *Built-in Function* -| *Description* - -| uint *get_kernel_sub_group_count_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(void)); - - uint *get_kernel_sub_group_count_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(local void *, ...)); -| Returns the number of sub-groups in each work-group of the dispatch (except - for the last in cases where the global size does not divide cleanly into - work-groups) given the combination of the passed ndrange and block. - - _block_ specifies the block to be enqueued. - -| uint *get_kernel_max_sub_group_size_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(void)); + - - uint *get_kernel_max_sub_group_size_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(local void *, ...)); -| Returns the maximum sub-group size for a block. - -|==== diff --git a/ext/cl_khr_suggested_local_work_size.asciidoc b/ext/cl_khr_suggested_local_work_size.asciidoc deleted file mode 100644 index 97bef879a..000000000 --- a/ext/cl_khr_suggested_local_work_size.asciidoc +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_suggested_local_work_size]] -== Suggested Local Work Size Query - -This extension adds the ability to query a suggested local work-group size for a kernel running on a device for a specified global work size and global work offset. -The suggested local work-group size will match the work-group size that would be chosen if the kernel were enqueued with the specified global work size and global work offset and a `NULL` local work size. - -By using the suggested local work-group size query an application has greater insight into the local work-group size chosen by the OpenCL implementation, and the OpenCL implementation need not re-compute the local work-group size if the same kernel is enqueued multiple times with the same parameters. - -=== General Information - -==== Name Strings - -`cl_khr_suggested_local_work_size` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-04-22 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL API Specification Version V3.0.6. - -This extension requires OpenCL 1.0. - -=== New API Functions - -[source,opencl] ----- -cl_int clGetKernelSuggestedLocalWorkSizeKHR( - cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - size_t *suggested_local_work_size); ----- - -=== Modifications to the OpenCL API Specification - -==== Section 5.9 - Kernel Objects: - -===== New Section 5.9.4.X - Suggested Local Work Size Query - -To query a suggested local work size for a kernel object, call the function - -include::{generated}/api/protos/clGetKernelSuggestedLocalWorkSizeKHR.txt[] - -The returned suggested local work size is expected to match the local work size that would be chosen if the specified kernel object, with the same kernel arguments, were enqueued into the specified command-queue with the specified global work size, specified global work offset, and with a `NULL` local work size. - -* _command_queue_ specifies the command-queue and device for the query. -* _kernel_ specifies the kernel object and kernel arguments for the query. -The OpenCL context associated with _kernel_ and _command_queue_ must the same. -* _work_dim_ specifies the number of work dimensions in the input global work offset and global work size, and the output suggested local work size. -* _global_work_offset_ can be used to specify an array of at least _work_dim_ global ID offset values for the query. -This is optional and may be `NULL` to indicate there is no global ID offset. -* _global_work_size_ is an array of at least _work_dim_ values describing the global work size for the query. -* _suggested_local_work_size_ is an output array of at least _work_dim_ values that will contain the result of the query. - -{clGetKernelSuggestedLocalWorkSizeKHR} returns {CL_SUCCESS} if the query executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host command-queue. -* {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. -* {CL_INVALID_CONTEXT} if the context associated with _kernel_ is not the same as the context associated with _command_queue_. -* {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program executable available for _kernel_ for the device associated with _command_queue_. -* {CL_INVALID_KERNEL_ARGS} if all argument values for _kernel_ have not been set. -* {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is set as an argument to _kernel_ and the offset specified when the sub-buffer object was created is not aligned to {CL_DEVICE_MEM_BASE_ADDR_ALIGN} for the device associated with _command_queue_. -* {CL_INVALID_IMAGE_SIZE} if an image object is set as an argument to _kernel_ and the image dimensions are not supported by device associated with _command_queue_. -* {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is set as an argument to _kernel_ and the image format is not supported by the device associated with _command_queue_. -* {CL_INVALID_OPERATION} if an SVM pointer is set as an argument to _kernel_ and the device associated with _command_queue_ does not support SVM or the required SVM capabilities for the SVM pointer. -* {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). -* {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of the values specified in _global_work_size_ are 0. -* {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in _global_work_size_ exceed the maximum value representable by `size_t` on the device associated with _command_queue_. -* {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ plus the corresponding value in _global_work_offset_ for dimension exceeds the maximum value representable by `size_t` on the device associated with _command_queue_. -* {CL_INVALID_VALUE} if _suggested_local_work_size_ is NULL. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -NOTE: These error conditions are consistent with error conditions for {clEnqueueNDRangeKernel}. diff --git a/ext/cl_khr_terminate_context.asciidoc b/ext/cl_khr_terminate_context.asciidoc deleted file mode 100644 index 9a7717883..000000000 --- a/ext/cl_khr_terminate_context.asciidoc +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_terminate_context]] -== Terminating OpenCL Contexts - -Today, OpenCL provides an API to release a context. -This operation is done only after all queues, memory object, programs and -kernels are released, which in turn might wait for all ongoing operations to -complete. -However, there are cases in which a fast release is required, or release -operation cannot be done, as commands are stuck in mid execution. -An example of the first case can be program termination due to exception, or -quick shutdown due to low power. -Examples of the second case are when a kernel is running too long, or gets -stuck, or it may result from user action which makes the results of the -computation unnecessary. - -In many cases, the driver or the device is capable of speeding up the -closure of ongoing operations when the results are no longer required in a -much more expedient manner than waiting for all previously enqueued -operations to finish. - -This extension implements a new query to check whether a device can -terminate an OpenCL context and adds an API to terminate a context. - -The extension name is *cl_khr_terminate_context*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_terminate_context-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -Add a new device property to _table 4.3_ in _section 4.2_. - -.List of supported param_names by {clGetDeviceInfo} -[cols="3,2,4",options="header",] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_TERMINATE_CAPABILITY_KHR} -| {cl_device_terminate_capability_khr_TYPE} -| Describes the termination capability of the OpenCL device. - This is a bit-field, where the following values are currently supported: - - {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} - Indicates that context - termination is supported. - -|==== - -Add a new context property to _table 4.5_ in _section 4.4_. - -.List of supported context creation properties by {clCreateContext} -[cols="3,2,4",options="header",] -|==== -| Context Property -| Property value -| Description - -| {CL_CONTEXT_TERMINATE_KHR} -| {cl_bool_TYPE} -| Specifies whether the context can be terminated. - The default value is {CL_FALSE}. - -|==== - -{CL_CONTEXT_TERMINATE_KHR} can be specified in the context properties only if -all devices associated with the context support the ability to support -context termination (i.e. {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} is set -for {CL_DEVICE_TERMINATE_CAPABILITY_KHR}). -Otherwise, context creation fails with error code of {CL_INVALID_PROPERTY}. - -The new function - -include::{generated}/api/protos/clTerminateContextKHR.txt[] - -terminates all pending work associated with the context and renders all data -owned by the context invalid. -It is the responsibility of the application to release all objects -associated with the context being terminated. - -When a context is terminated: - - * The execution status of enqueued commands will be {CL_CONTEXT_TERMINATED_KHR}. - Event objects can be queried using {clGetEventInfo}. - Event callbacks can be registered and registered event callbacks will be - called with _event_command_status_ set to {CL_CONTEXT_TERMINATED_KHR}. - {clWaitForEvents} will return as immediately for commands associated - with event objects specified in event_list. - The status of user events can be set. - Event objects can be retained and released. - {clGetEventProfilingInfo} returns {CL_PROFILING_INFO_NOT_AVAILABLE}. - * The context is considered to be terminated. - A callback function registered when the context was created will be - called. - Only queries, retain and release operations can be performed on the - context. - All other APIs that use a context as an argument will return - {CL_CONTEXT_TERMINATED_KHR}. - * The contents of the memory regions of the memory objects is undefined. - Queries, registering a destructor callback, retain and release - operations can be performed on the memory objects. - * Once a context has been terminated, all OpenCL API calls that create - objects or enqueue commands will return {CL_CONTEXT_TERMINATED_KHR}. - APIs that release OpenCL objects will continue to operate as though - {clTerminateContextKHR} was not called. - * The behavior of callbacks will remain unchanged, and will report - appropriate error, if executing after termination of context. - This behavior is similar to enqueued commands, after the command-queue - has become invalid. - -{clTerminateContextKHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid OpenCL context. - * {CL_CONTEXT_TERMINATED_KHR} if _context_ has already been terminated. - * {CL_INVALID_OPERATION} if _context_ was not created with - {CL_CONTEXT_TERMINATE_KHR} set to {CL_TRUE}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -An implementation that supports this extension must be able to terminate -commands currently executing on devices or queued across all command-queues -associated with the context that is being terminated. -The implementation cannot implement this extension by waiting for currently -executing (or queued) commands to finish execution on devices associated -with this context (i.e. doing a {clFinish}). diff --git a/ext/cl_khr_throttle_hints.asciidoc b/ext/cl_khr_throttle_hints.asciidoc deleted file mode 100644 index 8b19ce69c..000000000 --- a/ext/cl_khr_throttle_hints.asciidoc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_throttle_hints]] -== Throttle Hints - -This section describes the *cl_khr_throttle_hints* extension. -This extension adds throttle hints for OpenCL, but does not specify the -throttling behavior or minimum guarantees. -It is expected that the user guide associated with each implementation which -supports this extension will describe the throttling behavior guarantees. - -Note that the throttle hint is orthogonal to functionality defined in -*cl_khr_priority_hints* extension. -For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) -but should at the same time be executed at an optimized throttle setting -({CL_QUEUE_THROTTLE_LOW_KHR}). - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_throttle_hints-host-side-api-modifications]] -=== Host-side API modifications - -The function {clCreateCommandQueueWithProperties} (Section 5.1) is -extended to support a new {CL_QUEUE_THROTTLE_KHR} value as part of the -_properties_ argument. - -The properties field accepts the following values: - - * {CL_QUEUE_THROTTLE_HIGH_KHR} (full throttle, i.e., OK to consume more - energy) - * {CL_QUEUE_THROTTLE_MED_KHR} (normal throttle) - * {CL_QUEUE_THROTTLE_LOW_KHR} (optimized/lowest energy consumption) - -If {CL_QUEUE_THROTTLE_KHR} is not specified then the default priority is -{CL_QUEUE_THROTTLE_MED_KHR}. - -To the error section for {clCreateCommandQueueWithProperties}, the -following is added: - - * {CL_INVALID_QUEUE_PROPERTIES} if the {CL_QUEUE_THROTTLE_KHR} property is - specified and the queue is a {CL_QUEUE_ON_DEVICE}. diff --git a/ext/cl_khr_work_group_uniform_arithmetic.asciidoc b/ext/cl_khr_work_group_uniform_arithmetic.asciidoc deleted file mode 100644 index 097f0aed2..000000000 --- a/ext/cl_khr_work_group_uniform_arithmetic.asciidoc +++ /dev/null @@ -1,239 +0,0 @@ -// Copyright 2022-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_work_group_uniform_arithmetic]] -== Work-group Uniform Arithmetic - -This extension adds additional work-group collective functions to OpenCL C. -Specifically, this extension adds support for work-group scans and reductions for the following operators: - -* Logical operations (`and`, `or`, and `xor`). -* Bitwise operations (`and`, `or`, and `xor`). -* Integer multiplication (`mul`). -* Floating-point multiplication (`mul`). - -=== General Information - -==== Name Strings - -`cl_khr_work_group_uniform_arithmetic` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2022-04-29 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification -Version 3.0.10. - -This extension requires OpenCL 2.0. - -==== Contributors - -Kevin Petit, Arm Ltd. + -Ben Ashbaugh, Intel + - -=== New OpenCL C Functions - -The following functions are added to OpenCL C. - -[source,opencl_c] ----- -int work_group_reduce_logical_and(int predicate); -int work_group_reduce_logical_or(int predicate); -int work_group_reduce_logical_xor(int predicate); - -int work_group_scan_inclusive_logical_and(int predicate); -int work_group_scan_inclusive_logical_or(int predicate); -int work_group_scan_inclusive_logical_xor(int predicate); - -int work_group_scan_exclusive_logical_and(int predicate); -int work_group_scan_exclusive_logical_or(int predicate); -int work_group_scan_exclusive_logical_xor(int predicate); ----- - -For the following functions, the generic type name `gentype` may be one of the supported built-in scalar data types `int`, `uint`, `long`, or `ulong`. - -[source,opencl_c] ----- -gentype work_group_reduce_and(gentype value); -gentype work_group_reduce_or(gentype value); -gentype work_group_reduce_xor(gentype value); - -gentype work_group_scan_inclusive_and(gentype value); -gentype work_group_scan_inclusive_or(gentype value); -gentype work_group_scan_inclusive_xor(gentype value); - -gentype work_group_scan_exclusive_and(gentype value); -gentype work_group_scan_exclusive_or(gentype value); -gentype work_group_scan_exclusive_xor(gentype value); ----- - -For the following functions, the generic type name `gentype` may be one of the supported built-in scalar data types `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[source,opencl_c] ----- -gentype work_group_reduce_mul(gentype value); -gentype work_group_scan_inclusive_mul(gentype value); -gentype work_group_scan_exclusive_mul(gentype value); ----- - -=== Modifications to the OpenCL C Specification - -(Add to Section 6.15.16, *Work-group Collective Functions*) :: -+ --- -The table below describes the OpenCL C programming language built-in functions that perform -logical arithmetic operations across work items in a work-group. These functions must be -encountered by all work items in a work-group executing the kernel, otherwise the behavior is -undefined. For these functions, a non-zero _predicate_ argument or return value is logically -`true` and a zero _predicate_ argument or return value is logically `false`. - -[cols="2a,1",options="header"] -|==== -| Function -| Description -|[source,opencl_c] ----- -int work_group_reduce_logical_and(int predicate); -int work_group_reduce_logical_or(int predicate); -int work_group_reduce_logical_xor(int predicate); ----- -| Returns the logical *and*, *or*, or *xor* of _predicate_ for all work items in the work-group. - -|[source,opencl_c] ----- -int work_group_scan_inclusive_logical_and(int predicate); -int work_group_scan_inclusive_logical_or(int predicate); -int work_group_scan_inclusive_logical_xor(int predicate); ----- -| Returns the result of an inclusive scan operation, which is the logical - *and*, *or*, or *xor* of _predicate_ for all work items in the work-group with - a work-group linear local ID less than or equal to this work item’s work-group - linear local ID. - -|[source,c] ----- -int work_group_scan_exclusive_logical_and(int predicate); -int work_group_scan_exclusive_logical_or(int predicate); -int work_group_scan_exclusive_logical_xor(int predicate); ----- -| Returns the result of an exclusive scan operation, which is the logical - *and*, *or*, or *xor* of _predicate_ for all work items in the work-group with - a work-group linear local ID less than this work item’s work-group linear - local ID. - - If there is no work item in the work-group with a work-group linear local ID - less than this work item’s work-group linear local ID then an identity value - `I` is returned. For *and*, the identity value is `true` (non-zero). For *or* - and *xor*, the identity value is `false` (zero). - -|==== - -The table below describes the OpenCL C programming language built-in functions -that perform bitwise integer operations across work items in a work-group. These -functions must be encountered by all work items in a work-group executing the -kernel, otherwise the behavior is undefined. For the functions below, the -generic type name `gentype` may be one of the supported built-in scalar data -types `int`, `uint`, `long`, and `ulong`. - -[cols="2a,1",options="header"] -|==== -| Function -| Description - -|[source,opencl_c] ----- -gentype work_group_reduce_and(gentype value); -gentype work_group_reduce_or(gentype value); -gentype work_group_reduce_xor(gentype value); ----- -| Returns the bitwise *and*, *or*, or *xor* of _value_ for all work items in the work-group. - -|[source,opencl_c] ----- -gentype work_group_scan_inclusive_and(gentype value); -gentype work_group_scan_inclusive_or(gentype value); -gentype work_group_scan_inclusive_xor(gentype value); ----- -| Returns the result of an inclusive scan operation, which is the bitwise *and*, - *or*, or *xor* of _value_ for all work items in the work-group with a - work-group linear local ID less than or equal to this work item’s work-group - linear local ID. - -|[source,opencl_c] ----- -gentype work_group_scan_exclusive_and(gentype value); -gentype work_group_scan_exclusive_or(gentype value); -gentype work_group_scan_exclusive_xor(gentype value); ----- -| Returns the result of an exclusive scan operation, which is the bitwise *and*, - *or*, or *xor* of _value_ for all work items in the work-group with a - work-group linear local ID less than this work item’s work-group linear local - ID. - - If there is no work item in the work-group with a work-group linear local ID less than - this work item’s work-group linear local ID then an identity value `I` is returned. - For *and*, the identity value is `~0` (all bits set). For *or* and *xor*, the identity - value is `0`. - -|==== - -The table below describes the OpenCL C programming language built-in functions -that perform multiplicative operations across work items in a work-group. These -functions must be encountered by all work items in a work-group executing the -kernel, otherwise the behavior is undefined. For the functions below, the -generic type name `gentype` may be one of the supported built-in scalar data -types `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is -supported), or `half` (if half precision is supported). - -[cols="2a,1",options="header"] -|==== -| Function -| Description - -|[source,opencl_c] ----- -gentype work_group_reduce_mul(gentype value); ----- -| Returns the multiplication of _value_ for all work items in the work-group. - -|[source,opencl_c] ----- -gentype work_group_scan_inclusive_mul(gentype value); ----- -| Returns the result of an inclusive scan operation which is the multiplication - of _value_ for all work items in the work-group with a work-group linear local - ID less than or equal to this work item’s work-group linear local ID. - -|[source,opencl_c] ----- -gentype work_group_scan_exclusive_mul(gentype value); ----- -| Returns the result of an exclusive scan operation which is the multiplication - of _value_ for all work items in the work-group with a work-group linear local - ID less than this work item’s work-group linear local ID. - - If there is no work item in the work-group with a work-group linear local ID - less than this work item’s work-group linear local ID then the identity value - `1` is returned. - -|==== --- - -=== Issues - -. For these built-in functions, do we only want to support the types supported by the existing work-group collective functions, or do we want to support the types supported by the sub-group collective functions? -+ --- -`RESOLVED`: The extension will require the same types as the existing work-group collective functions. - -The difference are the 8-bit and 16-bit types: `char`, `uchar`, `short`, and `ushort`. Note that `half` is already supported, if half-precision is supported. --- - diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 1580441f5..311549b02 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -5,24 +5,36 @@ [[extensions-overview]] == Extensions Overview -This document describes the list of optional features supported by OpenCL. -Optional extensions are not required to be supported by a conformant OpenCL -implementation, but are expected to be widely available, and in some cases may define -functionality that is likely to be required in a future revision of the -OpenCL specification. - -This document describes all extensions that have been approved by the OpenCL -working group. -It is a _unified_ specification, meaning that the extensions described in this -document are not specific to a specific core OpenCL specification version. - -OpenCL extensions approved by the OpenCL working group may be _promoted_ to -core features in later revisions of OpenCL. -When this occurs, the feature described by the extension specification -is merged into the core OpenCL specification. -The extension will continue to be documented in this specification, both for -backwards compatibility and for devices that wish to support the feature -but that are unable to support the newer core OpenCL version. +_Extensions_ are optional features which may be supported by OpenCL +implementations. +Extensions are not required to be supported by a conformant OpenCL +implementation, but are expected to be widely available, and in some cases +may define functionality that is likely to be required in a future revision +of the OpenCL specification. + +In the past, this document contained full specification language for +Khronos-approved `khr` extensions, described in terms of changes to the core +OpenCL Specification. +This extension language has now been integrated into the OpenCL 3.0 +Specification, and can be read in context there. + +The remaining parts of this document describe general issues in _using_ +extensions, such as API <>; OpenCL C +<>; and <>. + +In addition, there is a section on <>. + +Finally, the <> appendix summarizes khr +extensions and links to them in the OpenCL API Specification. +In some cases, extensions are mostly or entirely to the OpenCL C language +rather than to the OpenCL API. +Such extensions can be reached by following the links in the API +Specification extension appendices. + [[naming-convention-for-optional-extensions]] === Naming Convention for Optional Extensions @@ -60,6 +72,7 @@ convention: * All enumerants defined by the vendor extension will have names of the form *CL_<__enum_name__>_<__vendor_name__>.* + [[compiler-directives-for-optional-extensions]] === Compiler Directives for Optional Extensions @@ -144,6 +157,7 @@ A kernel can now use this preprocessor `#define` to do something like: #endif ---- + [[getting-opencl-api-extension-function-pointers]] === Getting OpenCL API Extension Function Pointers diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 00757cdbf..3b47e039f 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -2,286 +2,293 @@ // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ +// The API and C specifications are published in the same directory as the +// extension specification, so only the relative URL is required. + +:APISpecURL: OpenCL_API.html + [appendix] +[[quick-reference]] == Quick Reference +Each extension in this table includes a link to the corresponding appendix +in the OpenCL 3.0 API Specification, which provides a fuller description and +references to the actual extension specification language in the API and C +Language Specifications. + // Editors note: Please keep this table in alphabetical order! [cols="5,4,2",options="header",] |==== -| *Extension Name* -| *Brief Description* -| *Status* +| Extension Name and Link +| Brief Description +| Status -| <> +| [[cl_khr_3d_image_writes]] link:{APISpecURL}#cl_khr_3d_image_writes[`cl_khr_3d_image_writes`] | Write to 3D images | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_async_work_group_copy_fence]] link:{APISpecURL}#cl_khr_async_work_group_copy_fence[`cl_khr_async_work_group_copy_fence`] | Asynchronous Copy Fences | Extension -| <> +| [[cl_khr_byte_addressable_store]] link:{APISpecURL}#cl_khr_byte_addressable_store[`cl_khr_byte_addressable_store`] | Read and write from 8-bit and 16-bit pointers | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_command_buffer]] link:{APISpecURL}#cl_khr_command_buffer[`cl_khr_command_buffer`] | Record and Replay Commands | Provisional Extension -| <> +| [[cl_khr_command_buffer_multi_device]] link:{APISpecURL}#cl_khr_command_buffer_multi_device[`cl_khr_command_buffer_multi_device`] | Allow a command-buffer to contain commands targeting different devices | Provisional Extension -| <> +| [[cl_khr_command_buffer_mutable_dispatch]] link:{APISpecURL}#cl_khr_command_buffer_mutable_dispatch[`cl_khr_command_buffer_mutable_dispatch`] | Modify kernel execution commands between enqueues of a command-buffer | Provisional Extension -| <> +| [[cl_khr_create_command_queue]] link:{APISpecURL}#cl_khr_create_command_queue[`cl_khr_create_command_queue`] | API to Create Command-Queues with Properties | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_d3d10_sharing]] link:{APISpecURL}#cl_khr_d3d10_sharing[`cl_khr_d3d10_sharing`] | Share Direct3D 10 Buffers and Textures with OpenCL | Extension -| <> +| [[cl_khr_d3d11_sharing]] link:{APISpecURL}#cl_khr_d3d11_sharing[`cl_khr_d3d11_sharing`] | Share Direct3D 11 Buffers and Textures with OpenCL | Extension -| <> +| [[cl_khr_depth_images]] link:{APISpecURL}#cl_khr_depth_images[`cl_khr_depth_images`] | Single Channel Depth Images | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_device_enqueue_local_arg_types]] link:{APISpecURL}#cl_khr_device_enqueue_local_arg_types[`cl_khr_device_enqueue_local_arg_types`] | Pass Non-Void Local Pointers to Child Kernels | Extension -| <> +| [[cl_khr_device_uuid]] link:{APISpecURL}#cl_khr_device_uuid[`cl_khr_device_uuid`] | Unique Device and Driver Identifier Queries | Extension -| <> +| [[cl_khr_dx9_media_sharing]] link:{APISpecURL}#cl_khr_dx9_media_sharing[`cl_khr_dx9_media_sharing`] | Share DirectX 9 Media Surfaces with OpenCL | Extension -| <> +| [[cl_khr_egl_event]] link:{APISpecURL}#cl_khr_egl_event[`cl_khr_egl_event`] | Share EGL Sync Objects with OpenCL | Extension -| <> +| [[cl_khr_egl_image]] link:{APISpecURL}#cl_khr_egl_image[`cl_khr_egl_image`] | Share EGL Images with OpenCL | Extension -| <> +| [[cl_khr_extended_async_copies]] link:{APISpecURL}#cl_khr_extended_async_copies[`cl_khr_extended_async_copies`] | 2D and 3D Async Copies | Extension -| <> +| [[cl_khr_extended_bit_ops]] link:{APISpecURL}#cl_khr_extended_bit_ops[`cl_khr_extended_bit_ops`] | Bit Insert, Extract, and Reverse Operations | Extension -| <> +| [[cl_khr_extended_versioning]] link:{APISpecURL}#cl_khr_extended_versioning[`cl_khr_extended_versioning`] | Extend versioning of platform, devices, extensions, etc. | Core Feature in OpenCL 3.0 (with minor changes) -| <> +| [[cl_khr_external_memory]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory`] | Common Functionality for External Memory Sharing | Provisional Extension -| <> +| [[cl_khr_external_memory_dma_buf]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_dma_buf`] | dma_buf External Memory Handles | Provisional Extension -| <> +| [[cl_khr_external_memory_dx]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_dx`] | Direct3D 11 and 12 External Memory Handles | Provisional Extension -| <> +| [[cl_khr_external_memory_opaque_fd]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_opaque_fd`] | Opaque File Descriptor External Memory Handles | Provisional Extension -| <> +| [[cl_khr_external_memory_win32]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_win32`] | NT Handle External Memory Handles | Provisional Extension -| <> +| [[cl_khr_expect_assume]] link:{APISpecURL}#cl_khr_expect_assume[`cl_khr_expect_assume`] | Kernel Optimization Hints | Extension -| <> +| [[cl_khr_external_semaphore]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore`] | Common Functionality for External Semaphore Sharing | Provisional Extension -| <> +| [[cl_khr_external_semaphore_dx_fence]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_dx_fence`] | Direct3D 12 External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_external_semaphore_opaque_fd]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_opaque_fd`] | Opaque File Descriptor External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_external_semaphore_sync_fd]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_sync_fd`] | Sync FD External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_external_semaphore_win32]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_win32`] | NT Handle External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_fp16]] link:{APISpecURL}#cl_khr_fp16[`cl_khr_fp16`] | Operations on 16-bit Floating-Point Values | Extension -| <> +| [[cl_khr_fp64]] link:{APISpecURL}#cl_khr_fp64[`cl_khr_fp64`] | Operations on 64-bit Floating-Point Values | Optional Core Feature in OpenCL 1.2 -| <> +| [[cl_khr_gl_depth_images]] link:{APISpecURL}#cl_khr_gl_depth_images[`cl_khr_gl_depth_images`] | Share OpenGL Depth Images with OpenCL | Extension -| <> +| [[cl_khr_gl_event]] link:{APISpecURL}#cl_khr_gl_event[`cl_khr_gl_event`] | Share OpenGL Fence Sync Objects with OpenCL | Extension -| <> +| [[cl_khr_gl_msaa_sharing]] link:{APISpecURL}#cl_khr_gl_msaa_sharing[`cl_khr_gl_msaa_sharing`] | Share OpenGL MSAA Textures with OpenCL | Extension -| <> +| [[cl_khr_gl_sharing]] link:{APISpecURL}#cl_khr_gl_sharing[`cl_khr_gl_sharing`] | Sharing OpenGL Buffers and Textures with OpenCL | Extension -| <> +| [[cl_khr_global_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_global_int32_base_atomics`] | Basic Atomic Operations on 32-bit Integers in Global Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_global_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_global_int32_extended_atomics`] | Extended Atomic Operations on 32-bit Integers in Global Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_icd]] link:{APISpecURL}#cl_khr_icd[`cl_khr_icd`] | Installable Client Drivers | Extension -| <> +| [[cl_khr_il_program]] link:{APISpecURL}#cl_khr_il_program[`cl_khr_il_program`] | Support for Intermediate Language (IL) Programs (SPIR-V) | Core Feature in OpenCL 2.1 -| <> +| [[cl_khr_image2d_from_buffer]] link:{APISpecURL}#cl_khr_image2d_from_buffer[`cl_khr_image2d_from_buffer`] | Create 2D Images from Buffers | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_initialize_memory]] link:{APISpecURL}#cl_khr_initialize_memory[`cl_khr_initialize_memory`] | Initialize Local and Private Memory on Allocation | Extension -| <> +| [[cl_khr_int64_base_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[`cl_khr_int64_base_atomics`] | Basic Atomic Operations on 64-bit Integers in Global and Local Memory | Extension -| <> +| [[cl_khr_int64_extended_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[`cl_khr_int64_extended_atomics`] | Extended Atomic Operations on 64-bit Integers in Global and Local Memory | Extension -| <> +| [[cl_khr_local_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_local_int32_base_atomics`] | Basic Atomic Operations on 32-bit Integers in Local Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_local_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_local_int32_extended_atomics`] | Extended Atomic Operations on 32-bit Integers in Local Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_integer_dot_product]] link:{APISpecURL}#cl_khr_integer_dot_product[`cl_khr_integer_dot_product`] | Integer dot product operations | Extension -| <> +| [[cl_khr_mipmap_image]] link:{APISpecURL}#cl_khr_mipmap_image[`cl_khr_mipmap_image`] | Create and Use Images with Mipmaps | Extension -| <> -| Write to Images with Mipmaps -| Extension - -| <> +| [[cl_khr_pci_bus_info]] link:{APISpecURL}#cl_khr_pci_bus_info[`cl_khr_pci_bus_info`] | Query PCI Bus Information for an OpenCL Device | Extension -| <> +| [[cl_khr_priority_hints]] link:{APISpecURL}#cl_khr_priority_hints[`cl_khr_priority_hints`] | Create Command-Queues with Different Priorities | Extension -| <> +| [[cl_khr_select_fprounding_mode]] link:{APISpecURL}#cl_khr_select_fprounding_mode[`cl_khr_select_fprounding_mode`] | Set the Current Kernel Rounding Mode | DEPRECATED -| <> +| [[cl_khr_semaphore]] link:{APISpecURL}#cl_khr_semaphore[`cl_khr_semaphore`] | Semaphore Synchronization Primitives | Provisional Extension -| <> +| [[cl_khr_spir]] link:{APISpecURL}#cl_khr_spir[`cl_khr_spir`] | Standard Portable Intermediate Representation Programs | Extension, Superseded by IL Programs / SPIR-V -| <> +| [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[`cl_khr_srgb_image_writes`] | Write to sRGB Images | Extension -| <> +| [[cl_khr_subgroups]] link:{APISpecURL}#cl_khr_subgroups[`cl_khr_subgroups`] | Sub-Groupings of Work Items | Core Feature in OpenCL 2.1 (with minor changes) -| <> +| [[cl_khr_subgroup_ballot]] link:{APISpecURL}#cl_khr_subgroup_ballot[`cl_khr_subgroup_ballot`] | Exchange Ballots Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_clustered_reduce]] link:{APISpecURL}#cl_khr_subgroup_clustered_reduce[`cl_khr_subgroup_clustered_reduce`] | Clustered Reductions for Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_extended_types]] link:{APISpecURL}#cl_khr_subgroup_extended_types[`cl_khr_subgroup_extended_types`] | Additional Type Support for Sub-group Functions | Extension -| <> +| [[cl_khr_subgroup_named_barrier]] link:{APISpecURL}#cl_khr_subgroup_named_barrier[`cl_khr_subgroup_named_barrier`] | Barriers for Subsets of a Work-group | Extension -| <> +| [[cl_khr_subgroup_non_uniform_arithmetic]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_arithmetic[`cl_khr_subgroup_non_uniform_arithmetic`] | Sub-group Arithmetic Functions in Non-Uniform Control Flow | Extension -| <> +| [[cl_khr_subgroup_non_uniform_vote]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_vote[`cl_khr_subgroup_non_uniform_vote`] | Hold Votes Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_rotate]] link:{APISpecURL}#cl_khr_subgroup_rotate[`cl_khr_subgroup_rotate`] | Rotation Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_shuffle]] link:{APISpecURL}#cl_khr_subgroup_shuffle[`cl_khr_subgroup_shuffle`] | General-Purpose Shuffles Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_shuffle_relative]] link:{APISpecURL}#cl_khr_subgroup_shuffle_relative[`cl_khr_subgroup_shuffle_relative`] | Relative Shuffles Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_suggested_local_work_size]] link:{APISpecURL}#cl_khr_suggested_local_work_size[`cl_khr_suggested_local_work_size`] | Query a Suggested Local Work Size | Extension -| <> +| [[cl_khr_terminate_context]] link:{APISpecURL}#cl_khr_terminate_context[`cl_khr_terminate_context`] | Terminate an OpenCL Context | Extension -| <> +| [[cl_khr_throttle_hints]] link:{APISpecURL}#cl_khr_throttle_hints[`cl_khr_throttle_hints`] | Create Command-Queues with Different Throttle Policies | Extension -| <> +| [[cl_khr_work_group_uniform_arithmetic]] link:{APISpecURL}#cl_khr_work_group_uniform_arithmetic[`cl_khr_work_group_uniform_arithmetic`] | Work-group Uniform Arithmetic | Extension diff --git a/extensions/cl_ext_image_requirements_info.asciidoc b/extensions/cl_ext_image_requirements_info.asciidoc index 200116a51..aedc71bc5 100644 --- a/extensions/cl_ext_image_requirements_info.asciidoc +++ b/extensions/cl_ext_image_requirements_info.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2018-2021 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 :data-uri: :icons: font @@ -108,7 +107,7 @@ is replaced with: -- For a 2D image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the +pitch specified is 0) must be a multiple of the {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} value returned for parameters compatible with those used to create the image. -- @@ -214,7 +213,7 @@ include::{generated}/api/protos/clGetImageRequirementsInfoEXT.txt[] + Both _image_format_ and _image_desc_ must be non-`NULL`, otherwise {CL_INVALID_VALUE} is returned. - + | {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} | `cl_uint` | Returns the max width supported for creating images with the parameters passed @@ -343,7 +342,7 @@ When `cl_khr_image2d_from_buffer` is supported: * For all image formats, image types and a selection of values for other members in _image_desc_ (that MUST include `0`) ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed successfully ** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE} for images of {CL_MEM_OBJECT_IMAGE1D_BUFFER} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE3D_MAX_WIDTH} for images of {CL_MEM_OBJECT_IMAGE3D} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE2D_MAX_WIDTH} for all other image types. - + . Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all image types for which it is not valid * Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. diff --git a/makeSpec b/makeSpec new file mode 100755 index 000000000..4c3decf3c --- /dev/null +++ b/makeSpec @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# +# Copyright 2020-2024 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +# Build OpenCL specification with requested extension sets and options. +# +# Usage: makeSpec script-options make-options +# Script options are parsed by this script before invoking 'make': +# -genpath path - directory for generated files and outputs +# -spec core - make a spec with no extensions (default) +# -spec khr - make a spec with all KHR extensions +# -spec all - make a spec with all available registered extensions +# -ext name - add specified extension and its dependencies +# -clean - clean generated files before building +# -registry path - API XML to use instead of default +# -apiname name - API name to use instead of default +# -test - Build the test spec instead +# -v - verbose, print actions before executing them +# -n - dry-run, print actions instead of executing them +# make-options - all other options are passed to 'make', including +# requested build targets + +import argparse, copy, io, os, re, string, subprocess, sys + +def execute(args, results): + if results.verbose or results.dryrun: + print("'" + "' '".join(args) + "'") + if not results.dryrun: + subprocess.check_call(args) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-clean', action='store_true', + help='Clean generated files before building') + parser.add_argument('-extension', action='append', + default=[], + help='Specify a required extension or extensions to add to targets') + parser.add_argument('-genpath', action='store', + default='gen', + help='Path to directory containing generated files') + parser.add_argument('-spec', action='store', + choices=[ 'core', 'khr', 'all' ], + default='core', + help='Type of spec to generate') + parser.add_argument('-registry', action='store', + default=None, + help='Path to API XML registry file specifying version and extension dependencies') + parser.add_argument('-apiname', action='store', + default=None, + help='API name to generate') + parser.add_argument('-test', action='store_true', + help='Build the test spec instead of the Vulkan spec') + parser.add_argument('-n', action='store_true', dest='dryrun', + help='Only prints actions, do not execute them') + parser.add_argument('-v', action='store_true', dest='verbose', + help='Print actions before executing them') + + (results, options) = parser.parse_known_args() + + # Ensure genpath is an absolute path, not relative + if results.genpath[0] != '/': + results.genpath = os.getcwd() + '/' + results.genpath + + # Look for scripts/extdependency.py + # This requires makeSpec to be invoked from the repository root, but we + # could derive that path. + sys.path.insert(0, 'scripts') + from extdependency import ApiDependencies + deps = ApiDependencies(results.registry, results.apiname) + + # List of extensions to build with from the requested -spec + # Also construct a spec title + # This should respect version dependencies as well + + # Temporary workaround, as the spec markup does not include non-khr + # extension appendices yet. + + if results.spec == 'all': + results.spec = 'khr' + print("WARNING: 'all' argument to -results interpreted as 'khr' at present", file=sys.stderr) + + if results.spec == 'core': + title = '' + exts = set() + elif results.spec == 'khr': + title = 'with all KHR extensions' + exts = set(deps.khrExtensions()) + elif results.spec == 'all': + title = 'with all registered extensions' + exts = set(deps.allExtensions()) + + # List of explicitly requested extension and all its supported dependencies + extraexts = set() + for name in results.extension: + if name in deps.allExtensions(): + extraexts.add(name) + for dep in deps.children(name): + if dep in deps.allExtensions(): + extraexts.update({dep}) + else: + raise Exception(f'ERROR: unknown extension {name}') + + # See if any explicitly requested extensions are not implicitly requested + # Add any such extensions to the spec title + extraexts -= exts + if len(extraexts) > 0: + exts.update(extraexts) + if title != '': + title += ' and ' + ', '.join(sorted(extraexts)) + else: + title += 'with ' + ', '.join(sorted(extraexts)) + + if title != '': + title = '(' + title + ')' + + # Finally, actually invoke make as needed for the targets + args = [ 'make', 'GENERATED=' + results.genpath ] + + if results.clean: + # If OUTDIR is set on the command line, pass it to the 'clean' + # target so it is cleaned as well. + cleanopts = ['clean'] + for opt in options: + if opt[:7] == 'OUTDIR=': + cleanopts.append(opt) + try: + execute(args + cleanopts, results) + except: + sys.exit(1) + + # Use the test spec if specified. This is used solely by self tests. + rootdir = os.path.dirname(os.path.abspath(__file__)) + if results.test: + # Set the spec source to the test spec + args.append(f'SPECSRC={rootdir}/build_tests/testspec.adoc') + args.append(f'SPECDIR={rootdir}/build_tests/') + # Make sure the build is invariant + args.append('SPECREVISION=1.2.3') + args.append('SPECDATE=\\"2100-11-22 00:33:44Z\\"') + args.append('SPECREMARK=\\"test build\\"') + + # The actual target + if len(exts) > 0: + args.append(f'EXTENSIONS={" ".join(sorted(exts))}') + args.append(f'APITITLE={title}') + args += options + + try: + execute(args, results) + except: + sys.exit(1) diff --git a/man/static/clCreateEventFromEGLSyncKHR.txt b/man/static/clCreateEventFromEGLSyncKHR.txt deleted file mode 100644 index f067fc3d2..000000000 --- a/man/static/clCreateEventFromEGLSyncKHR.txt +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateEventFromEGLSyncKHR(3) - -== Name - -clCreateEventFromEGLSyncKHR - Creates a linked event object. - -[source,c] ----- -cl_event clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int *errcode_ret) ----- - - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context or - share group, using the reflink:cl_khr_gl_sharing extension. - * _sync_ - The name of a sync object of type `EGL_SYNC_FENCE_KHR` created - with respect to `EGLDisplay` _display_. - * _display_ - An `EGLDisplay` handle. - -== Description - -An event object may be created by linking to an EGL sync object. -Completion of such an event object is equivalent to waiting for completion -of the fence command associated with the linked EGL sync object. - - -== Notes - -The parameters of an event object linked to an EGL sync object will return -the following values when queried with flink:clGetEventInfo: - - * The `CL_EVENT_COMMAND_QUEUE` of a linked event is NULL, because the - event is not associated with any OpenCL command-queue. - * The `CL_EVENT_COMMAND_TYPE` of a linked event is - `CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR`, indicating that the event is - associated with a EGL sync object, rather than an OpenCL command. - * The `CL_EVENT_COMMAND_EXECUTION_STATUS` of a linked event is either - `CL_SUBMITTED`, indicating that the fence command associated with the - sync object has not yet completed, or `CL_COMPLETE`, indicating that the - fence command has completed. - -`clCreateEventFromEGLSyncKHR` performs an implicit flink:clRetainEvent on -the returned event object. Creating a linked event object also places a -reference on the linked EGL sync object. When the event object is deleted, -the reference will be removed from the EGL sync object. - -Events returned from `clCreateEventFromEGLSyncKHR` may only be consumed by -`clEnqueueAcquire***` commands. Passing such events to any other OpenCL API -that enqueues commands will generate a `CL_INVALID_EVENT` error." - -Event objects can also be used to reflect the status of an EGL fence sync -object. The sync object in turn refers to a fence command executing in an -EGL client API command stream. This provides another method of coordinating -sharing of EGL / EGL client API objects with OpenCL. Completion of EGL / EGL -client API commands may be determined by placing an EGL fence command after -commands using `eglCreateSyncKHR`, creating an event from the resulting EGL -sync object using `clCreateEventFromEGLSyncKHR` and then specifying it in -the _event_wait_list_ of a `clEnqueueAcquire***` command. This method may be -considerably more efficient than calling operations like `glFinish`, and is -referred to as _explicit synchronization_. The application is responsible -for ensuring the command stream associated with the EGL fence is flushed to -ensure the CL queue is submitted to the device. Explicit synchronization is -most useful when an EGL client API context bound to another thread is -accessing the memory objects. - - -== Errors - -Returns a valid OpenCL event object and _errcode_ret_ is set to `CL_SUCCESS` -if the event object is created successfully. -Otherwise, it returns a NULL value with one of the following error values -returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not - created from a GL context. - * `CL_INVALID_EGL_OBJECT_KHR` if _sync_ is not a valid EGLSyncKHR handle - created with respect to `EGLDisplay` _display_. - * `CL_INVALID_EGL_OBJECT_KHR` if _sync_ is not a valid EGLSyncKHR object - of type `EGL_SYNC_FENCE_KHR` created with respect to `EGLDisplay` - _display_. - -== See Also - -reflink:cl_khr_egl_event, -flink:clEnqueueAcquireEGLObjectsKHR, -flink:clEnqueueAcquireGLObjects - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_event - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateEventFromGLsyncKHR.txt b/man/static/clCreateEventFromGLsyncKHR.txt deleted file mode 100644 index 1cbc618ae..000000000 --- a/man/static/clCreateEventFromGLsyncKHR.txt +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateEventFromGLsyncKHR(3) - -== Name - -clCreateEventFromGLsyncKHR - Creates an event object linked to an OpenGL sync object. - -== C Specification - -[source,c] ----- -cl_event clCreateEventFromGLsyncKHR(cl_context context, - GLsync sync, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context or - share group, using the reflink:cl_khr_gl_sharing extension. - * _sync_ - The name of a sync object in the GL share group associated with - _context_. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Description - -An event object may be created by linking to an OpenGL sync object. -Completion of such an event object is equivalent to waiting for completion of the fence command associated with the linked GL sync object. - -== Notes - -The parameters of an event object linked to a GL sync object will return the following values when queried with flink:clGetEventInfo: - - * The `CL_EVENT_COMMAND_QUEUE` of a linked event is NULL, because the event is not associated with any OpenCL command-queue. - * The `CL_EVENT_COMMAND_TYPE` of a linked event is `CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR`, indicating that the event is associated with a GL sync object, rather than an OpenCL command. - * The `CL_EVENT_COMMAND_EXECUTION_STATUS` of a linked event is either `CL_SUBMITTED`, indicating that the fence command associated with the sync object has not yet completed, or `CL_COMPLETE`, indicating that the fence command has completed. - -`clCreateEventFromGLsyncKHR` performs an implicit flink:clRetainEvent on the returned event object. -Creating a linked event object also places a reference on the linked GL sync object. -When the event object is deleted, the reference will be removed from the GL sync object. - -Events returned from `clCreateEventFromGLsyncKHR` can be used in the -_event_wait_list_ argument to flink:clEnqueueAcquireGLObjects and CL APIs -that take a `cl_event` as an argument but do not enqueue commands. Passing -such events to any other CL API that enqueues commands will generate a -`CL_INVALID_EVENT` error. - -== Errors - -Returns a valid OpenCL event object and _errcode_ret_ is set to `CL_SUCCESS` if the event object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_GL_OBJECT` if _sync_ is not the name of a sync object in the GL share group associated with _context_. - -== See Also - -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event, -flink:clGetEventInfo, -flink:clEnqueueAcquireGLObjects, -flink:clRetainEvent - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLBuffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D10BufferKHR.txt b/man/static/clCreateFromD3D10BufferKHR.txt deleted file mode 100644 index 445c42aa3..000000000 --- a/man/static/clCreateFromD3D10BufferKHR.txt +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D10BufferKHR(3) - -== Name - -clCreateFromD3D10BufferKHR - Creates an OpenCL buffer object from a Direct3D 10 buffer. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D10BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D10Buffer *resource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 10 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 10 buffer to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The size of the returned OpenCL buffer object is the same as the size of _resource_. -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_D3D10_RESOURCE_KHR` if _resource_ is not a Direct3D 10 buffer resource, if _resource_ was created with the `D3D10_USAGE` flag `D3D10_USAGE_IMMUTABLE`, if a `cl_mem` from _resource_ has already been created using `clCreateFromD3D10BufferKHR`, or if _context_ was not created against the same Direct3D 10 device from which _resource_ was created. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D10BufferKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D10Texture2DKHR.txt b/man/static/clCreateFromD3D10Texture2DKHR.txt deleted file mode 100644 index a8f420d81..000000000 --- a/man/static/clCreateFromD3D10Texture2DKHR.txt +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D10Texture2DKHR(3) - -== Name - -clCreateFromD3D10Texture2DKHR - Creates an OpenCL 2D image object from a subresource of a Direct3D 10 2D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D10Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture2D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 10 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 10 2D texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 2D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is determined by the format of _resource_ as shown in of Direct3D 10 and corresponding OpenCL image formats of _resource_ in flink:clCreateFromD3D10Texture3DKHR. - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D10_RESOURCE_KHR` if _resource_ is not a Direct3D 10 texture resource, if _resource_ was created with the `D3D10_USAGE` flag `D3D10_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D10Texture2DKHR`, or if _context_ was not created against the same Direct3D 10 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 10 texture format of _resource_ is not a value listed in the table of Direct3D 10 and corresponding OpenCL image formats for flink:clCreateFromD3D10Texture3DKHR, or if the Direct3D 10 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D10Texture2DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D10Texture3DKHR.txt b/man/static/clCreateFromD3D10Texture3DKHR.txt deleted file mode 100644 index d8f0ab1c2..000000000 --- a/man/static/clCreateFromD3D10Texture3DKHR.txt +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D10Texture3DKHR(3) - -== Name - -clCreateFromD3D10Texture3DKHR - Creates an OpenCL 3D image object from a subresource of a Direct3D 10 3D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D10Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture3D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 10 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 10 3D texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 3D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 3D image object is determined by the format of _resource_ as shown below in the table of Direct3D 10 and corresponding OpenCL image formats. - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -Following is a list of Direct3D 10 and corresponding OpenCL image formats. - -[cols="1a,1a", options="header"] -|==== -| DXGI format | CL image format (channel order, channel data type) -| `DXGI_FORMAT_R32G32B32A32_FLOAT` | CL_RGBA, CL_FLOAT -| `DXGI_FORMAT_R32G32B32A32_UINT` | CL_RGBA, CL_UNSIGNED_INT32 -| `DXGI_FORMAT_R32G32B32A32_SINT` | CL_RGBA, CL_SIGNED_INT32 -| | -| `DXGI_FORMAT_R16G16B16A16_FLOAT` | CL_RGBA, CL_HALF_FLOAT -| `DXGI_FORMAT_R16G16B16A16_UNORM` | CL_RGBA, CL_UNORM_INT16 -| `DXGI_FORMAT_R16G16B16A16_UINT` | CL_RGBA, CL_UNSIGNED_INT16 -| `DXGI_FORMAT_R16G16B16A16_SNORM` | CL_RGBA, CL_SNORM_INT16 -| `DXGI_FORMAT_R16G16B16A16_SINT` | CL_RGBA, CL_SIGNED_INT16 -| | -| `DXGI_FORMAT_R8G8B8A8_UNORM` | CL_BGRA, CL_UNORM_INT8 -| `DXGI_FORMAT_R8G8B8A8_UNORM` | CL_RGBA, CL_UNORM_INT8 -| `DXGI_FORMAT_R8G8B8A8_UINT` | CL_RGBA, CL_UNSIGNED_INT8 -| `DXGI_FORMAT_R8G8B8A8_SNORM` | CL_RGBA, CL_SNORM_INT8 -| `DXGI_FORMAT_R8G8B8A8_SINT` | CL_RGBA, CL_SIGNED_INT8 -| | -| `DXGI_FORMAT_R32G32_FLOAT` | CL_RG, CL_FLOAT -| `DXGI_FORMAT_R32G32_UINT` | CL_RG, CL_UNSIGNED_INT32 -| `DXGI_FORMAT_R32G32_SINT` | CL_RG, CL_SIGNED_INT32 -| | -| `DXGI_FORMAT_R16G16_FLOAT` | CL_RG, CL_HALF_FLOAT -| `DXGI_FORMAT_R16G16_UNORM` | CL_RG, CL_UNORM_INT16 -| `DXGI_FORMAT_R16G16_UINT` | CL_RG, CL_UNSIGNED_INT16 -| `DXGI_FORMAT_R16G16_SNORM` | CL_RG, CL_SNORM_INT16 -| `DXGI_FORMAT_R16G16_SINT` | CL_RG, CL_SIGNED_INT16 -| | -| `DXGI_FORMAT_R8G8_UNORM` | CL_RG, CL_UNORM_INT8 -| `DXGI_FORMAT_R8G8_UINT` | CL_RG, CL_UNSIGNED_INT8 -| `DXGI_FORMAT_R8G8_SNORM` | CL_RG, CL_SNORM_INT8 -| `DXGI_FORMAT_R8G8_SINT` | CL_RG, CL_SIGNED_INT8 -| | -| `DXGI_FORMAT_R32_FLOAT` | CL_R, CL_FLOAT -| `DXGI_FORMAT_R32_UINT` | CL_R, CL_UNSIGNED_INT32 -| `DXGI_FORMAT_R32_SINT` | CL_R, CL_SIGNED_INT32 -| | -| `DXGI_FORMAT_R16_FLOAT` | CL_R, CL_HALF_FLOAT -| `DXGI_FORMAT_R16_UNORM` | CL_R, CL_UNORM_INT16 -| `DXGI_FORMAT_R16_UINT` | CL_R, CL_UNSIGNED_INT16 -| `DXGI_FORMAT_R16_SNORM` | CL_R, CL_SNORM_INT16 -| `DXGI_FORMAT_R16_SINT` | CL_R, CL_SIGNED_INT16 -| | -| `DXGI_FORMAT_R8_UNORM` | CL_R, CL_UNORM_INT8 -| `DXGI_FORMAT_R8_UINT` | CL_R, CL_UNSIGNED_INT8 -| `DXGI_FORMAT_R8_SNORM` | CL_R, CL_SNORM_INT8 -| `DXGI_FORMAT_R8_SINT` | CL_R, CL_SIGNED_INT8 -|==== - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D10_RESOURCE_KHR` if _resource_ is not a Direct3D 10 texture resource, if _resource_ was created with the `D3D10_USAGE` flag `D3D10_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D10Texture3DKHR`, or if _context_ was not created against the same Direct3D 10 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 10 texture format of _resource_ is not a value listed in the above table of Direct3D 10 and corresponding OpenCL image formats, or if the Direct3D 10 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D10Texture3DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D11BufferKHR.txt b/man/static/clCreateFromD3D11BufferKHR.txt deleted file mode 100644 index e3a007aa3..000000000 --- a/man/static/clCreateFromD3D11BufferKHR.txt +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D11BufferKHR(3) - -== Name - -clCreateFromD3D11BufferKHR - Creates an OpenCL buffer object from a Direct3D 11 buffer. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D11BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D11Buffer *resource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 11 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 11 buffer to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The size of the returned OpenCL buffer object is the same as the size of _resource_. -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_D3D11_RESOURCE_KHR` if _resource_ is not a Direct3D 11 buffer resource, if _resource_ was created with the `D3D11_USAGE` flag `D3D11_USAGE_IMMUTABLE`, if a `cl_mem` from _resource_ has already been created using `clCreateFromD3D11BufferKHR`, or if _context_ was not created against the same Direct3D 11 device from which _resource_ was created. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D11BufferKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D11Texture2DKHR.txt b/man/static/clCreateFromD3D11Texture2DKHR.txt deleted file mode 100644 index 1ff783c0e..000000000 --- a/man/static/clCreateFromD3D11Texture2DKHR.txt +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D11Texture2DKHR(3) - -== Name - -clCreateFromD3D11Texture2DKHR - Creates an OpenCL 2D image object from a subresource of a Direct3D 11 2D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D11Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture2D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 11 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 11 2D texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 2D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is determined by the format of _resource_ as shown in Direct3D 11 and corresponding OpenCL image formats of _resource_ in flink:clCreateFromD3D11Texture3DKHR. - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D11_RESOURCE_KHR` if _resource_ is not a Direct3D 11 texture resource, if _resource_ was created with the `D3D11_USAGE` flag `D3D11_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D11Texture2DKHR`, or if _context_ was not created against the same Direct3D 11 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 11 texture format of _resource_ is not a value listed in the table of Direct3D 11 and corresponding OpenCL image formats for flink:clCreateFromD3D11Texture3DKHR, or if the Direct3D 11 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D11Texture2DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D11Texture3DKHR.txt b/man/static/clCreateFromD3D11Texture3DKHR.txt deleted file mode 100644 index ccbec7667..000000000 --- a/man/static/clCreateFromD3D11Texture3DKHR.txt +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D11Texture3DKHR(3) - -== Name - -clCreateFromD3D11Texture3DKHR - Creates an OpenCL 3D image object from a subresource of a Direct3D 11 3D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D11Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture3D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 11 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 11 texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 3D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 3D image object is determined by the format of _resource_ as shown in the table below (Table 9.9.3). - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -[cols="1a,1a", options="header"] -|==== -| DXGI format | CL image format (channel order, channel data type) -| `DXGI_FORMAT_R32G32B32A32_FLOAT` | `CL_RGBA, CL_FLOAT` -| `DXGI_FORMAT_R32G32B32A32_UINT` | `CL_RGBA, CL_UNSIGNED_INT32` -| `DXGI_FORMAT_R32G32B32A32_SINT` | `CL_RGBA, CL_SIGNED_INT32` -| `DXGI_FORMAT_R16G16B16A16_FLOAT` | `CL_RGBA, CL_HALF_FLOAT` -| `DXGI_FORMAT_R16G16B16A16_UNORM` | `CL_RGBA, CL_UNORM_INT16` -| `DXGI_FORMAT_R16G16B16A16_UINT` | `CL_RGBA, CL_UNSIGNED_INT16` -| `DXGI_FORMAT_R16G16B16A16_SNORM` | `CL_RGBA, CL_SNORM_INT16` -| `DXGI_FORMAT_R16G16B16A16_SINT` | `CL_RGBA, CL_SIGNED_INT16` -| `DXGI_FORMAT_B8G8R8A8_UNORM` | `CL_BGRA, CL_UNORM_INT8` -| `DXGI_FORMAT_R8G8B8A8_UNORM` | `CL_RGBA, CL_UNORM_INT8` -| `DXGI_FORMAT_R8G8B8A8_UINT` | `CL_RGBA, CL_UNSIGNED_INT8` -| `DXGI_FORMAT_R8G8B8A8_SNORM` | `CL_RGBA, CL_SNORM_INT8` -| `DXGI_FORMAT_R8G8B8A8_SINT` | `CL_RGBA, CL_SIGNED_INT8` -| `DXGI_FORMAT_R32G32_FLOAT` | `CL_RG, CL_FLOAT` -| `DXGI_FORMAT_R32G32_UINT` | `CL_RG, CL_UNSIGNED_INT32` -| `DXGI_FORMAT_R32G32_SINT` | `CL_RG, CL_SIGNED_INT32` -| `DXGI_FORMAT_R16G16_FLOAT` | `CL_RG, CL_HALF_FLOAT` -| `DXGI_FORMAT_R16G16_UNORM` | `CL_RG, CL_UNORM_INT16` -| `DXGI_FORMAT_R16G16_UINT` | `CL_RG, CL_UNSIGNED_INT16` -| `DXGI_FORMAT_R16G16_SNORM` | `CL_RG, CL_SNORM_INT16` -| `DXGI_FORMAT_R16G16_SINT` | `CL_RG, CL_SIGNED_INT16` -| `DXGI_FORMAT_R8G8_UNORM` | `CL_RG, CL_UNORM_INT8` -| `DXGI_FORMAT_R8G8_UINT` | `CL_RG, CL_UNSIGNED_INT8` -| `DXGI_FORMAT_R8G8_SNORM` | `CL_RG, CL_SNORM_INT8` -| `DXGI_FORMAT_R8G8_SINT` | `CL_RG, CL_SIGNED_INT8` -| `DXGI_FORMAT_R32_FLOAT` | `CL_R, CL_FLOAT` -| `DXGI_FORMAT_R32_UINT` | `CL_R, CL_UNSIGNED_INT32` -| `DXGI_FORMAT_R32_SINT` | `CL_R, CL_SIGNED_INT32` -| `DXGI_FORMAT_R16_FLOAT` | `CL_R, CL_HALF_FLOAT` -| `DXGI_FORMAT_R16_UNORM` | `CL_R, CL_UNORM_INT16` -| `DXGI_FORMAT_R16_UINT` | `CL_R, CL_UNSIGNED_INT16` -| `DXGI_FORMAT_R16_SNORM` | `CL_R, CL_SNORM_INT16` -| `DXGI_FORMAT_R16_SINT` | `CL_R, CL_SIGNED_INT16` -| `DXGI_FORMAT_R8_UNORM` | `CL_R, CL_UNORM_INT8` -| `DXGI_FORMAT_R8_UINT` | `CL_R, CL_UNSIGNED_INT8` -| `DXGI_FORMAT_R8_SNORM` | `CL_R, CL_SNORM_INT8` -| `DXGI_FORMAT_R8_SINT` | `CL_R, CL_SIGNED_INT8` -|==== - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D11_RESOURCE_KHR` if _resource_ is not a Direct3D 11 texture resource, if _resource_ was created with the `D3D11_USAGE` flag `D3D11_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D11Texture3DKHR`, or if _context_ was not created against the same Direct3D 11 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 11 texture format of _resource_ is not a value listed in the table above of Direct3D 11 and corresponding OpenCL image formats, or if the Direct3D 11 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D11Texture3DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromDX9MediaSurfaceKHR.txt b/man/static/clCreateFromDX9MediaSurfaceKHR.txt deleted file mode 100644 index af3a7a4bd..000000000 --- a/man/static/clCreateFromDX9MediaSurfaceKHR.txt +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromDX9MediaSurfaceKHR(3) - -== Name - -clCreateFromDX9MediaSurfaceKHR - Creates an OpenCL image object from a media surface. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromDX9MediaSurfaceKHR(cl_context context, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - void *surface_info, - cl_uint plane, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a media adapter. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table of allowed values for _flags_ for flink:clCreateBuffer. - Only `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, and `CL_MEM_READ_WRITE` - values specified in the table of allowed values for _flags_ for - flink:clCreateBuffer can be used. - * _adapter_type_ -+ --- -A value from enumeration of supported adapters described in the table of -`cl_dx9_media_adapter_type_khr` values for -flink:clGetDeviceIDsFromDX9MediaAdapterKHR. -The type of _surface_info_ is determined by the adapter type. -The implementation does not need to support all adapter types. -This approach provides flexibility to support additional adapter types in -the future. -Supported adapter types are `CL_ADAPTER_D3D9_KHR`, `CL_ADAPTER_D3D9EX_KHR`, -and `CL_ADAPTER_DXVA_KHR`. - -If _adapter_type_ is `CL_ADAPTER_D3D9_KHR`, `CL_ADAPTER_D3D9EX_KHR`, or -`CL_ADAPTER_DXVA_KHR`, the _surface_info_ points to the following structure: - -`typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; HANDLE shared_handle; } cl_dx9_surface_info_khr;` - -For D3D9 surfaces, we need both the handle to the resource and the resource -itself to have a sufficient amount of information to eliminate a copy of the -surface for sharing in cases where this is possible. Elimination of the copy -is driver dependent. `shared_handle` may be NULL and this may result in -sub-optimal performance. --- - * _surface_info_ - A pointer to one of the structures defined in the - _adapter_type_ description above passed in as a `void` *. - * _plane_ - The plane of resource to share for planar surface formats. For - planar formats, we use the plane parameter to obtain a handle to thie - specific plane (Y, U or V for example). For nonplanar formats used by - media, _plane_ must be 0. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width and height of the returned OpenCL 2D image object are determined by the width and height of the plane of resource. -The channel type and order of the returned image object is determined by the format and plane of resource and are described in tables 9.10.3 and 9.10.4. - -This call will increment the internal media surface count on _resource_. -The internal media surface reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingDX9Inc.txt[] - -== Errors - -Returns a valid non-zero 2D image object and _errcode_ret_ is set to `CL_SUCCESS` if the 2D image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _plane_ is not a valid plane of _resource_ specified in _surface_info_. - * `CL_INVALID_DX9_MEDIA_SURFACE_KHR` if _resource_ specified in _surface_info_ is not a valid resource or is not associated with _adapter_type_ (e.g., _adapter_type_ is set to `CL_ADAPTER_D3D9_KHR` and _resource_ is not a Direct3D 9 surface created in `D3DPOOL_DEFAULT`). - * `CL_INVALID_DX9_MEDIA_SURFACE_KHR` if `shared_handle` specified in _surface_info_ is not NULL or a valid handle value. - * `CL_INVALID_DX9_MEDIA_SURFACE_KHR` if _adapter_type_ is set to a media adapter and the _surface_info_ does not reference a media surface of the required type, or if _adapter_type_ is set to a media adapter type and _surface_info_ does not contain a valid reference to a media surface on that adapter, by - flink:clGetMemObjectInfo when _param_name_ is a surface or handle when the image was not created from an appropriate media surface, and from - flink:clGetImageInfo when _param_name_ is `CL_IMAGE_DX9_MEDIA_PLANE_KHR` and image was not created from an appropriate media surface. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the texture format of _resource_ is not listed in tables 9.10.3 and 9.10.4. - * `CL_INVALID_OPERATION` if there are no devices in _context_ that support _adapter_type_. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing, -flink:clCreateBuffer, - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromDX9MediaSurfaceKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromEGLImageKHR.txt b/man/static/clCreateFromEGLImageKHR.txt deleted file mode 100644 index 99e85f0dd..000000000 --- a/man/static/clCreateFromEGLImageKHR.txt +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromEGLImageKHR(3) - -== Name - -clCreateFromEGLImageKHR - Creates an EGLImage target from an EGLImage source. - -[source,c] ----- -cl_mem clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR display, - CLeglImageKHR image, - cl_mem_flags flags, - const cl_egl_image_properties_khr * properties, - cl_int * errcode_ret) ----- - - -== Parameters - - * _context_ - A valid OpenCL context. - * _display_ - Should be of type `EGLDisplay`, cast into the type - `CLeglDisplayKHR`. - * _image_ - Should be of type `EGLImageKHR`, cast into the type - `CLeglImageKHR`. Assuming no errors are generated in this function, the - resulting image object will be an EGLImage target of the specified - EGLImage _image_. The resulting `cl_mem` is an image object which may be - used normally by all OpenCL operations. This maps to an `image2d_t` type - in OpenCL kernel code. - * _flags_ - -+ --- -A bit-field that is used to specify usage information about the memory -object being created. The possible values for _flags_ are: -`CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY` and `CL_MEM_READ_WRITE`. - -For OpenCL 1.2 _flags_ also accepts: `CL_MEM_HOST_WRITE_ONLY`, -`CL_MEM_HOST_READ_ONLY` or `CL_MEM_HOST_NO_ACCESS`. - -This extension only requires support for `CL_MEM_READ_ONLY`, and for OpenCL -1.2 `CL_MEM_HOST_NO_ACCESS`. For OpenCL 1.1, a `CL_INVALID_OPERATION` will -be returned for images which do not support host mapping. - -If the value passed in _flags_ is not supported by the OpenCL implementation -it will return `CL_INVALID_VALUE`. The accepted _flags_ may be dependent -upon the texture format used. --- - * _properties_ - Specifies a list of property names and their - corresponding values. Each property name is immediately followed by the - corresponding desired value. The list is terminated with 0. No - properties are currently supported with this version of the extension. - _properties_ can be NULL. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Description - -`clCreateFromEGLImageKHR` creates an EGLImage target of type `cl_mem` from -the EGLImage source provided as _image_. - -.Lifetime of Shared Objects - -An OpenCL memory object created from an EGL image remains valid according to -the lifetime behaviour as described in EGL_KHR_image_base. - -"Any EGLImage siblings exist in any client API context" - -For OpenCL this means that while the application retains a reference on the -`cl_mem` (EGL sibling), the image remains valid. - -.Synchronizing OpenCL and EGL Access to Shared Objects - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/EGL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling flink:clEnqueueAcquireEGLObjectsKHR, the application must -ensure that any pending operations which access the objects specified in -_mem_objects_ have completed. This may be accomplished in a portable way by -ceasing all client operations on the resource, and issuing and waiting for -completion of a `glFinish` command on all GL contexts with pending -references to these objects. Implementations may offer more efficient -synchronization methods, such as synchronisation primitives or fence -operations. - -Similarly, after calling `clEnqueueReleaseEGLImageObjects`, the application -is responsible for ensuring that any pending OpenCL operations which access -the objects specified in _mem_objects_ have completed prior to executing -subsequent commands in other APIs which reference these objects. This may be -accomplished in a portable way by calling flink:clWaitForEvents with -the event object returned by flink:clEnqueueReleaseGLObjects, or by -calling flink:clFinish. As above, some implementations may offer more -efficient methods. - -Attempting to access the data store of an EGLImage object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. Similarly, attempting to access a shared EGLImage object from -OpenCL before it has been acquired by the OpenCL command-queue or after it -has been released, will result in undefined behavior. - -== Errors - - * `CL_INVALID_CONTEXT` if _context_ is not a valid OpenCL context. - * `CL_INVALID_VALUE` if _properties_ contains invalid values, if _display_ - is not a valid display object or if _flags_ are not in the set defined - above. - * `CL_INVALID_EGL_OBJECT_KHR` if _image_ is not a valid EGLImage object. - * `CL_IMAGE_FORMAT_NOT_SUPPORTED` if the OpenCL implementation is not able - to create a `cl_mem` compatible with the provided `CLeglImageKHR` for an - implementation-dependent reason (this could be caused by, but not limited - to, reasons such as unsupported texture formats, etc). - * `CL_INVALID_OPERATION` if there are no devices in _context_ that support - images (i.e. `CL_DEVICE_IMAGE_SUPPORT` specified in table 4.3 (see - flink:clGetDeviceInfo) is `CL_FALSE`) or if the flags passed are not - supported for that image type. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources - required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_egl_image, -reflink:cl_khr_egl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromGLBuffer.txt b/man/static/clCreateFromGLBuffer.txt deleted file mode 100644 index 8518310ab..000000000 --- a/man/static/clCreateFromGLBuffer.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromGLBuffer(3) - -== Name - -clCreateFromGLBuffer - Creates an OpenCL buffer object from an OpenGL buffer object. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromGLBuffer(cl_context context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table for flink:clCreateBuffer for a description of _flags_. Only - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY` and `CL_MEM_READ_WRITE` values - specified in the table at flink:clCreateBuffer can be used. - * _bufobj_ - The name of a GL buffer object. The data store of the GL - buffer object must have have been previously created by calling OpenGL - function `glBufferData`, although its contents need not be initialized. - The size of the data store will be used to determine the size of the CL - buffer object. - * _errcode_ret_ - Returns an appropriate error code as described below. If - _errcode_ret_ is NULL, no error code is returned. - -== Description - -The size of the GL buffer object data store at the time `clCreateFromGLBuffer` is called will be used as the size of buffer object returned by `clCreateFromGLBuffer`. -If the state of a GL buffer object is modified through the GL API (e.g. -`glBufferData`) while there exists a corresponding CL buffer object, subsequent use of the CL buffer object will result in undefined behavior. - -The flink:clRetainMemObject and -flink:clReleaseMemObject functions can be used to retain and release the buffer object. - -The CL buffer object created using `clCreateFromGLBuffer` can also be used to create a CL 1D image buffer object. - -== Notes - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_GL_OBJECT` if _bufobj_ is not a GL buffer object or is a GL buffer object but does not have an existing data store or the size of the buffer is 0. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event, -flink:clCreateBuffer, -flink:clCreateFromGLTexture - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLBuffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromGLRenderbuffer.txt b/man/static/clCreateFromGLRenderbuffer.txt deleted file mode 100644 index a79e8e4dd..000000000 --- a/man/static/clCreateFromGLRenderbuffer.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromGLRenderbuffer(3) - -== Name - -clCreateFromGLRenderbuffer - Creates an OpenCL 2D image object from an OpenGL renderbuffer object. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromGLRenderbuffer(cl_context context, - cl_mem_flags flags, - GLuint renderbuffer, - cl_int * errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table at flink:clCreateBuffer for a description of _flags_. Only - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, and `CL_MEM_READ_WRITE` values - specified in the table at flink:clCreateBuffer can be used. - * _renderbuffer_ - The name of a GL renderbuffer object. The renderbuffer - storage must be specified before the image object can be created. The - _renderbuffer_ format and dimensions defined by OpenGL will be used to - create the 2D image object. Only GL renderbuffers with internal formats - that map to appropriate image channel order and data type specified in - tables 5.5 and 5.6 (see reflink:cl_image_format) can be used to create - the 2D image object. - * _errcode_ret_ - Returns an appropriate error code as described below. If - _errcode_ret_ is NULL, no error code is returned. - -== Description - -If the state of a GL renderbuffer object is modified through the GL API (i.e. -changes to the dimensions or format used to represent pixels of the GL renderbuffer using appropriate GL API calls such as `glRenderbufferStorage`) while there exists a corresponding CL image object, subsequent use of the CL image object will result in undefined behavior. - -The flink:clRetainMemObject and -flink:clReleaseMemObject functions can be used to retain and release the image objects. - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_GL_OBJECT` if _renderbuffer_ is not a GL renderbuffer object or if the width or height of _renderbuffer_ is zero. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the OpenGL renderbuffer internal format does not map to a supported OpenCL image format. - * `CL_INVALID_OPERATION` if _renderbuffer_ is a multi-sample GL renderbuffer object. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clCreateBuffer, -reflink:cl_image_format, -flink:clRetainMemObject, -flink:clReleaseMemObject - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLRenderbuffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromGLTexture.txt b/man/static/clCreateFromGLTexture.txt deleted file mode 100644 index 6cffe9407..000000000 --- a/man/static/clCreateFromGLTexture.txt +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromGLTexture(3) - -== Name - -clCreateFromGLTexture - Creates an OpenCL image object, image array object, or image buffer object from an OpenGL texture object, texture array object, texture buffer object, or a single face of an OpenGL cubemap texture object. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromGLTexture(cl_context context, - cl_mem_flags flags, - GLenum texture_target, - GLint miplevel, - GLuint texture, - cl_int * errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table for flink:clCreateBuffer for a description of _flags_. Only - the values `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY` and - `CL_MEM_READ_WRITE` can be used. - * _texture_target_ - -+ --- -This value must be one of `GL_TEXTURE_1D`, `GL_TEXTURE_1D_ARRAY`, -`GL_TEXTURE_BUFFER`, `GL_TEXTURE_2D`, `GL_TEXTURE_2D_ARRAY`, -`GL_TEXTURE_3D`, `GL_TEXTURE_CUBE_MAP_POSITIVE_X`, -`GL_TEXTURE_CUBE_MAP_POSITIVE_Y`, `GL_TEXTURE_CUBE_MAP_POSITIVE_Z`, -`GL_TEXTURE_CUBE_MAP_NEGATIVE_X`, `GL_TEXTURE_CUBE_MAP_NEGATIVE_Y`, -`GL_TEXTURE_CUBE_MAP_NEGATIVE_Z`, or `GL_TEXTURE_RECTANGLE`. -(`GL_TEXTURE_RECTANGLE` requires OpenGL 3.1. Alternatively, -`GL_TEXTURE_RECTANGLE_ARB` may be specified if the OpenGL extension -`GL_ARB_texture_rectangle` is supported.) _texture_target_ is used only to -define the image type of _texture_. No reference to a bound GL texture -object is made or implied by this parameter. - -If the reflink:cl_khr_gl_msaa_sharing extension is enabled, _texture_target_ -may be `GL_TEXTURE_2D_MULTISAMPLE` or `GL_TEXTURE_2D_MULTISAMPLE_ARRAY`. - -If _texture_target_ is `GL_TEXTURE_2D_MULTISAMPLE`, `clCreateFromGLTexture` -creates an OpenCL 2D multi-sample image object from an OpenGL 2D -multi-sample texture - -If _texture_target_ is `GL_TEXTURE_2D_MULTISAMPLE_ARRAY`, -`clCreateFromGLTexture` creates an OpenCL 2D multi-sample array image object -from an OpenGL 2D multi-sample texture. --- - * _miplevel_ - The mipmap level to be used. If _texture_target_ is - `GL_TEXTURE_BUFFER`, miplevel must be 0. Implementations may return - `CL_INVALID_OPERATION` for miplevel values > 0 - * _texture_ - The name of a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, - rectangle or buffer texture object. The texture object must be a - complete texture as per OpenGL rules on texture completeness. The - _texture_ format and dimensions defined by OpenGL for the specified - _miplevel_ of the texture will be used to create the OpenCL image memory - object. Only GL texture objects with an internal format that maps to - appropriate image channel order and data type specified in tables 5.5 - and 5.6 (see reflink:cl_image_format) may be used to create the OpenCL - image memory object. - * _errcode_ret_ - Returns an appropriate error code as described below. If - _errcode_ret_ is NULL, no error code is returned. - -== Notes - -If the state of a GL texture object is modified through the GL API (e.g. -`glTexImage2D`, `glTexImage3D` or the values of the texture parameters `GL_TEXTURE_BASE_LEVEL` or `GL_TEXTURE_MAX_LEVEL` are modified) while there exists a corresponding CL image object, subsequent use of the CL image object will result in undefined behavior. - -The flink:clRetainMemObject and -flink:clReleaseMemObject functions can be used to retain and release the image objects. - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if value specified in _texture_target_ is not one of the values specified in the description of _texture_target_. - * `CL_INVALID_MIP_LEVEL` if _miplevel_ is less than the value of _level~base~_ (for OpenGL implementations) or zero (for OpenGL ES implementations); or greater than the value of _q_ (for both OpenGL and OpenGL ES). - _level~base~_ and _q_ are defined for the texture in section 3.8.10 (Texture Completeness) of the OpenGL 2.1 specification and section 3.7.10 of the OpenGL ES 2.0. - * `CL_INVALID_MIP_LEVEL` if _miplevel_ is greater than zero and the OpenGL implementation does not support creating from non-zero mipmap levels. - * `CL_INVALID_GL_OBJECT` if _texture_ is not a GL texture object whose type matches _texture_target_, if the specified _miplevel_ of _texture_ is not defined, or if the width or height of the specified _miplevel_ is zero or if the GL texture object is incomplete. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the OpenGL texture internal format does not map to a supported OpenCL image format. - * `CL_INVALID_OPERATION` if _texture_ is a GL texture object created with a border width value greater than zero. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clCreateBuffer, -flink:clCreateFromGLBuffer - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLTexture - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt b/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt deleted file mode 100644 index 946322777..000000000 --- a/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireD3D10ObjectsKHR(3) - -== Name - -clEnqueueAcquireD3D10ObjectsKHR - Acquire OpenCL memory objects that have been created from Direct3D 10 resources - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 10 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - command and can be used to query or queue a wait for this particular - command to complete. _event_ can be NULL, in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. - -== Notes - -The Direct3D 10 objects are acquired by the OpenCL context associated with `command-queue` and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from Direct3D 10 resources must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 10 resource is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR`. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueAcquireD3D10ObjectsKHR` provides the synchronization guarantee that any Direct3D 10 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D10ObjectsKHR` is called will complete executing before _event_ reports completion and before the execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 10 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D10ObjectsKHR` is called have completed before calling `clEnqueueAcquireD3D10ObjectsKHR`. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 10 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an Direct3D 10 context. - * `CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR` when any of _mem_objects_ are currently acquired using `clEnqueueAcquireD3D10ObjectsKHR` but have not been released using flink:clEnqueueReleaseD3D10ObjectsKHR. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireD3D10ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt b/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt deleted file mode 100644 index 34822411e..000000000 --- a/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireD3D11ObjectsKHR(3) - -== Name - -clEnqueueAcquireD3D11ObjectsKHR - Acquire OpenCL memory objects that have been created from Direct3D 11 resources - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 11 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - command and can be used to query or queue a wait for this particular - command to complete. _event_ can be NULL in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. - -== Notes - -The Direct3D 11 objects are acquired by the OpenCL context associated with `command-queue` and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from Direct3D 11 resources must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 11 resource is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR`. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueAcquireD3D11ObjectsKHR` provides the synchronization guarantee that any Direct3D 11 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D11ObjectsKHR` is called will complete executing before _event_ reports completion and before the execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 11 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D11ObjectsKHR` is called have completed before calling `clEnqueueAcquireD3D11ObjectsKHR`. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 11 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a Direct3D 11 context. - * `CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR` if memory objects in _mem_objects_ have previously been acquired using `clEnqueueAcquireD3D11ObjectsKHR` but have not been released using flink:clEnqueueReleaseD3D11ObjectsKHR. - * `CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR` when any of _mem_objects_ are currently acquired by OpenCL. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireD3D11ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt b/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt deleted file mode 100644 index e2750b64c..000000000 --- a/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireDX9MediaSurfacesKHR(3) - -== Name - -clEnqueueAcquireDX9MediaSurfacesKHR - Acquire OpenCL memory objects that have been created from a media surface. - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from media surfaces. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - command and can be used to query or queue a wait for this particular - command to complete. _event_ can be NULL in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. - -== Notes - -Used to acquire OpenCL memory objects that have been created from a media surface. -The media surfaces are acquired by the OpenCL context associated with _command_queue_ and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from media surfaces must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a media surface is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR`. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueAcquireDX9MediaSurfacesKHR` provides the synchronization guarantee that any media adapter API calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireDX9MediaSurfacesKHR` is called will complete executing before _event_ reports completion and before the execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any media adapter API calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireDX9MediaSurfacesKHR` is called have completed before calling `clEnqueueAcquireDX9MediaSurfacesKHR`. - -include::sharingDX9Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from media surfaces. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a device that can share the media surface referenced by _mem_objects_. - * `CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR` if memory objects in _mem_objects_ have previously been acquired using `clEnqueueAcquireDX9MediaSurfacesKHR` but have not been released using flink:clEnqueueReleaseDX9MediaSurfacesKHR. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireDX9MediaSurfacesKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireEGLObjectsKHR.txt b/man/static/clEnqueueAcquireEGLObjectsKHR.txt deleted file mode 100644 index a2f84a60c..000000000 --- a/man/static/clEnqueueAcquireEGLObjectsKHR.txt +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireEGLObjectsKHR(3) - -== Name - -clEnqueueAcquireEGLObjectsKHR - Acquire OpenCL memory objects that have been created from EGL resources. - -[source,c] ----- -cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from EGL resources, within the context associate with - _command_queue_. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this command and can - be used to query or queue a wait for the command to complete. _event_ - can be NULL in which case it will not be possible for the application to - query the status of this command or queue a wait for this command to - complete. - -== Description - -This function is used to acquire OpenCL memory objects that have been created from EGL resources. -The EGL objects are acquired by the OpenCL context associated with _command_queue_ and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from EGL resources must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a EGL resource is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_EGL_RESOURCE_NOT_ACQUIRED_KHR`. - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects in the context associated with _command_queue_. - * `CL_INVALID_EGL_OBJECT_KHR` if memory objects in _mem_objects_ have not been created from EGL resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_egl_image, -reflink:cl_khr_egl_event, -flink:clEnqueueReleaseEGLObjectsKHR, -flink:clCreateFromEGLImageKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireGLObjects.txt b/man/static/clEnqueueAcquireGLObjects.txt deleted file mode 100644 index d989fd3de..000000000 --- a/man/static/clEnqueueAcquireGLObjects.txt +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireGLObjects(3) - -== Name - -clEnqueueAcquireGLObjects - Acquire OpenCL memory objects that have been created from OpenGL objects. - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. All devices used to create the - OpenCL context associated with _command_queue_ must support acquiring - shared CL/GL objects. This constraint is enforced at context creation - time. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of CL memory objects that correspond - to GL objects. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this command and can - be used to query or queue a wait for the command to complete. _event_ - can be NULL in which case it will not be possible for the application to - query the status of this command or queue a wait for this command to - complete. If _event_wait_list_ and _event_ are not NULL, _event_ should - not refer to an element of the _event_wait_list_ array. -+ --- -If the reflink:cl_khr_gl_event extension is supported, if an OpenGL context -is bound to the current thread, then any OpenGL commands which - - * affect or access the contents of a memory object listed in the - _mem_objects_ list, and - * were issued on that OpenGL context prior to the call to - `clEnqueueAcquireGLObjects` - -will complete before execution of any OpenCL commands following the -`clEnqueueAcquireGLObjects` which affect or access any of those memory -objects. If a non-NULL event object is returned, it will report completion -only after completion of such OpenGL commands. - -If the reflink:cl_khr_egl_event extension is supported, prior to calling -`clEnqueueAcquireGLObjects`, the application must ensure that any pending -EGL or EGL client API operations which access the objects specified in -_mem_objects_ have completed. --- - -== Description - -These objects need to be acquired before they can be used by any OpenCL commands queued to a command-queue. -The OpenGL objects are acquired by the OpenCL context associated with _command_queue_ and can therefore be used by all command-queues associated with the OpenCL context. - -== Notes - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an OpenGL context. - * `CL_INVALID_GL_OBJECT` if memory objects in _mem_objects_ have not been created from a GL object(s). - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event, -flink:clEnqueueReleaseGLObjects, -flink:clCreateBuffer - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireGLObjects - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt b/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt deleted file mode 100644 index 7be974afd..000000000 --- a/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseD3D10ObjectsKHR(3) - -== Name - -clEnqueueReleaseD3D10ObjectsKHR - Release OpenCL memory objects that have been created from Direct3D 10 resources. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 10 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -== Notes - -The Direct3D 10 objects are released by the OpenCL context associated with _command_queue_. - -OpenCL memory objects created from Direct3D 10 resources which have been acquired by OpenCL must be released by OpenCL before they may be accessed by Direct3D 10. -Accessing a Direct3D 10 resource while its corresponding OpenCL memory object is acquired is in error and will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueReleaseD3D10ObjectsKHR` provides the synchronization guarantee that any calls to Direct3D 10 calls involving the interop device(s) used in the OpenCL context made after the call to `clEnqueueReleaseD3D10ObjectsKHR` will not start executing until after all events in _event_wait_list_ are complete and all work already submitted to _command_queue_ completes execution. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 10 calls involving the interop device(s) used in the OpenCL context made after `clEnqueueReleaseD3D10ObjectsKHR` will not start executing until after event returned by `clEnqueueReleaseD3D10ObjectsKHR` reports completion. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 10 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an Direct3D 10 device. - * `CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR` if memory objects in _mem_objects_ have not previously been acquired using `clEnqueueAcquireD3D10ObjectsKHR`, or have been released using `clEnqueueReleaseD3D10ObjectsKHR` since the last time that they were acquired. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseD3D10ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt b/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt deleted file mode 100644 index fedb562e3..000000000 --- a/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseD3D11ObjectsKHR(3) - -== Name - -clEnqueueReleaseD3D11ObjectsKHR - Release OpenCL memory objects that have been created from Direct3D 11 resources. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 11 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -== Notes - -The Direct3D 11 objects are released by the OpenCL context associated with _command_queue_. - -OpenCL memory objects created from Direct3D 11 resources which have been acquired by OpenCL must be released by OpenCL before they may be accessed by Direct3D 11. -Accessing a Direct3D 11 resource while its corresponding OpenCL memory object is acquired is in error and will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueReleaseD3D11ObjectsKHR` provides the synchronization guarantee that any calls to Direct3D 11 calls involving the interop device(s) used in the OpenCL context made after the call to `clEnqueueReleaseD3D11ObjectsKHR` will not start executing until after all events in _event_wait_list_ are complete and all work already submitted to _command_queue_ completes execution. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 11 calls involving the interop device(s) used in the OpenCL context made after `clEnqueueReleaseD3D11ObjectsKHR` will not start executing until after event returned by `clEnqueueReleaseD3D11ObjectsKHR` reports completion. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 11 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a Direct3D 11 device. - * `CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR` if memory objects in _mem_objects_ have not previously been acquired using flink:clEnqueueAcquireD3D11ObjectsKHR, or have been released using `clEnqueueReleaseD3D11ObjectsKHR` since the last time that they were acquired. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseD3D11ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt b/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt deleted file mode 100644 index c5648dada..000000000 --- a/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseDX9MediaSurfacesKHR(3) - -== Name - -clEnqueueReleaseDX9MediaSurfacesKHR - Release OpenCL memory objects that have been created from media surfaces. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from media surfaces. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -== Notes - -Used to release OpenCL memory objects that have been created from media surfaces. -The media surfaces are released by the OpenCL context associated with _command_queue_. - -OpenCL memory objects created from media surfaces which have been acquired by OpenCL must be released by OpenCL before they may be accessed by the media adapter API. -Accessing a media surface while its corresponding OpenCL memory object is acquired is in error and will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueReleaseDX9MediaSurfacesKHR` provides the synchronization guarantee that any calls to media adapter APIs involving the interop device(s) used in the OpenCL context made after the call to `clEnqueueReleaseDX9MediaSurfacesKHR` will not start executing until after all events in _event_wait_list_ are complete and all work already submitted to _command_queue_ completes execution. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any media adapter API calls involving the interop device(s) used in the OpenCL context made after `clEnqueueReleaseDX9MediaSurfacesKHR` will not start executing until after event returned by `clEnqueueReleaseDX9MediaSurfacesKHR` reports completion. - -include::sharingDX9Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and <_mem_objects_> is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from valid media surfaces. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a media object. - * `CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR` if memory objects in _mem_objects_ have not been previously been acquired using flink:clEnqueueAcquireDX9MediaSurfacesKHR or have been released using `clEnqueueReleaseDX9MediaSurfacesKHR` since the last time that they were acquired. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseDX9MediaSurfacesKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseEGLObjectsKHR.txt b/man/static/clEnqueueReleaseEGLObjectsKHR.txt deleted file mode 100644 index cddd6dcfa..000000000 --- a/man/static/clEnqueueReleaseEGLObjectsKHR.txt +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseEGLObjectsKHR(3) - -== Name - -clEnqueueReleaseEGLObjectsKHR - Release OpenCL memory objects that have been created from EGL resources. - -[source,c] ----- -cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from EGL resources, within the context associated with - _command_queue_. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this command and can - be used to query or queue a wait for the command to complete. _event_ - can be NULL in which case it will not be possible for the application to - query the status of this command or queue a wait for this command to - complete. - -== Description - -This function is used to release OpenCL memory objects that have been -created from EGL resources. The EGL objects are released by the OpenCL -context associated with _command_queue_. - -OpenCL memory objects created from EGL resources which have been acquired by -OpenCL must be released by OpenCL before they may be accessed by EGL or by -EGL client APIs. - -Accessing a EGL resource while its corresponding OpenCL memory object is -acquired is in error and will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. If -_num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and -returns `CL_SUCCESS`. Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects in the context associated with _command_queue_. - * `CL_INVALID_EGL_OBJECT_KHR` if memory objects in _mem_objects_ have not been created from EGL resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_egl_image, -reflink:cl_khr_egl_event, -flink:clEnqueueAcquireEGLObjectsKHR, -flink:clCreateFromEGLImageKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseGLObjects.txt b/man/static/clEnqueueReleaseGLObjects.txt deleted file mode 100644 index 8f6ee5dbd..000000000 --- a/man/static/clEnqueueReleaseGLObjects.txt +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseGLObjects(3) - -== Name - -clEnqueueReleaseGLObjects - Release OpenCL memory objects that have been created from OpenGL objects. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of CL memory objects that correspond - to GL objects. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - These parameters specify events that need to - complete before this command can be executed. If _event_wait_list_ is - NULL, then this particular command does not wait on any event to - complete. If _event_wait_list_ is NULL, _num_events_in_wait_list_ must - be 0. If _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - read/write command and can be used to query or queue a wait for the - command to complete. _event_ can be NULL in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. -+ --- -If the reflink:cl_khr_gl_event extension is supported, if an OpenGL context -is bound to the current thread, then then any OpenGL commands which - - * affect or access the contents of the memory objects listed in the - _mem_objects_ list, and - * are issued on that context after the call to `clEnqueueReleaseGLObjects` - -will not execute until after execution of any OpenCL commands preceding the -`clEnqueueReleaseGLObjects` which affect or access any of those memory -objects. If a non-NULL event object is returned, it will report completion -before execution of such OpenGL commands. --- - -== Description - -Release OpenCL memory objects that have been created from OpenGL objects. -These objects need to be released before they can be used by OpenGL. -The OpenGL objects are released by the OpenCL context associated with _command_queue_. - -== Notes - -If the reflink:cl_khr_gl_sharing extension is supported and if an OpenGL context is bound to the current thread, then any OpenGL commands which does: - - * affect or access the contents of a memory object listed in the _mem_objects_ list, and - * are issued on that context after the call to `clEnqueueReleaseGLObjects` - -will not execute until after execution of any OpenCL commands preceding the `clEnqueueReleaseGLObjects` which affect or access any of those memory objects. -If a non-NULL _event_ object is returned, it will report completion before execution of such OpenGL commands. - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -`clEnqueueReleaseGLObjects` returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an OpenGL context. - * `CL_INVALID_GL_OBJECT` if memory objects in _mem_objects_ have not been created from a GL object(s). - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clEnqueueAcquireGLObjects, -reflink:cl_khr_gl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseGLObjects - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetDeviceIDsFromD3D10KHR.txt b/man/static/clGetDeviceIDsFromD3D10KHR.txt deleted file mode 100644 index 80941348e..000000000 --- a/man/static/clGetDeviceIDsFromD3D10KHR.txt +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetDeviceIDsFromD3D10KHR(3) - -== Name - -clGetDeviceIDsFromD3D10KHR - Querying OpenCL Devices Corresponding to Direct3D 10 Devices. - -== C Specification - -[source,c] ----- -cl_int clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices) ----- - -== Parameters - - * _platform_ - Refers to the platform ID returned by - flink:clGetPlatformIDs. - * _d3d_device_source_ - Specifies the type of _d3d_object_ and may be one - of the following:. -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_source_khr | Type of _d3d_object_ -| `CL_D3D10_DEVICE_KHR` | `ID3D10Device` * -| `CL_D3D10_DXGI_ADAPTER_KHR` | `IDXGIAdapter` * -|==== --- - * _d3d_object_ - Specifies the object whose corresponding OpenCL devices - are being queried. The type of _d3d_object_ must be as specified in the - table above. - * d3d_device_set* - Specifies the set of devices to return, and must be - one of the following: -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_set_khr | Devices returned in _devices_ -| `CL_PREFERRED_DEVICES_FOR_D3D10_KHR` - | The OpenCL devices associated with the specified Direct3D object. -| `CL_ALL_DEVICES_FOR_D3D10_KHR` - | All OpenCL devices which may interoperate with the specified Direct3D object. - Performance of sharing data on these devices may be considerably less than on the preferred devices. -|==== --- - * _num_entries_ - The number of `cl_device_id` entries that can be added - to _devices_. If _devices_ is not NULL, the _num_entries_ must be - greater than zero. - * _devices_ - Returns a list of OpenCL devices found. The `cl_device_id` - values returned in _devices_ can be used to identify a specific OpenCL - device. If _devices_ is NULL, this argument is ignored. The number of - OpenCL devices returned is the mininum of the value specified by - _num_entries_ and the number of OpenCL devices corresponding to - _d3d_object_. - * _num_devices_ - Returns the number of OpenCL devices available that - correspond to _d3d_object_. If _num_devices_ is NULL, this argument is - ignored. - -== Description - -The OpenCL devices corresponding to a Direct3D 10 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 10 device will be a subset of the OpenCL devices corresponding to the DXGI adapter against which the Direct3D 10 device was created. - -== Notes - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise it may return: - - * `CL_INVALID_PLATFORM` if _platform_ is not a valid platform. - * `CL_INVALID_VALUE` if _d3d_device_source_ is not a valid value, _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero and _devices_ is not NULL, or if both _num_devices_ and _devices_ are NULL. - * `CL_DEVICE_NOT_FOUND` if no OpenCL devices that correspond to _d3d_object_ were found. - -== See Also - -reflink:cl_khr_d3d10_sharing, -flink:clCreateFromD3D10BufferKHR, -flink:clCreateFromD3D10Texture2DKHR, -flink:clCreateFromD3D10Texture3DKHR, -flink:clEnqueueAcquireD3D10ObjectsKHR, -flink:clEnqueueReleaseD3D10ObjectsKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetDeviceIDsFromD3D10KHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetDeviceIDsFromD3D11KHR.txt b/man/static/clGetDeviceIDsFromD3D11KHR.txt deleted file mode 100644 index cd6e0f60d..000000000 --- a/man/static/clGetDeviceIDsFromD3D11KHR.txt +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetDeviceIDsFromD3D11KHR(3) - -== Name - -clGetDeviceIDsFromD3D11KHR - Querying OpenCL Devices Corresponding to Direct3D 11 Devices. - -== C Specification - -[source,c] ----- -cl_int clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, - cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices) ----- - -== Parameters - - * _platform_ - Refers to the platform ID returned by - flink:clGetPlatformIDs. - * _d3d_device_source_ - Specifies the type of _d3d_object_ and may be one - of the following (Table 9.11.1): -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_source_khr | Type of _d3d_object_ -| `CL_D3D11_DEVICE_KHR` | ID3D11Device * -| `CL_D3D11_DXGI_ADAPTER_KHR` | IDXGIAdapter * -|==== --- - * _d3d_object_ - Specifies the object whose corresponding OpenCL devices - are being queried. The type of _d3d_object_ must be as specified in the - table above. - * _d3d_device_set_ - Specifies the set of devices to return, and must be - one of the following (Table 9.11.2): -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_set_khr | Devices returned in _devices_ -| `CL_PREFERRED_DEVICES_FOR_D3D11_KHR` - | The OpenCL devices associated with the specified Direct3D object. -| `CL_ALL_DEVICES_FOR_D3D11_KHR` - | All OpenCL devices which may interoperate with the specified Direct3D - object. Performance of sharing data on these devices may be - considerably less than on the preferred devices. -|==== --- - * _num_entries_ - The number of `cl_device_id` entries that can be added - to _devices_. If _devices_ is not NULL, the _num_entries_ must be - greater than zero. - * _devices_ - Returns a list of OpenCL devices found. The `cl_device_id` - values returned in _devices_ can be used to identify a specific OpenCL - device. If _devices_ is NULL, this argument is ignored. The number of - OpenCL devices returned is the mininum of the value specified by - _num_entries_ and the number of OpenCL devices corresponding to - _d3d_object_. - * _num_devices_ - Returns the number of OpenCL devices available that - correspond to _d3d_object_. If _num_devices_ is NULL, this argument is - ignored. - -== Description - -The OpenCL devices corresponding to a Direct3D 11 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 11 device will be a subset of the OpenCL devices corresponding to the DXGI adapter against which the Direct3D 11 device was created. - -== Notes - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise it may return: - - * `CL_INVALID_PLATFORM` if _platform_ is not a valid platform. - * `CL_INVALID_VALUE` if _d3d_device_source_ is not a valid value, _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero and _devices_ is not NULL, or if both _num_devices_ and _devices_ are NULL. - * `CL_DEVICE_NOT_FOUND` if no OpenCL devices that correspond to _d3d_object_ were found. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetDeviceIDsFromD3D10KHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt b/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt deleted file mode 100644 index 58b265d30..000000000 --- a/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetDeviceIDsFromDX9MediaAdapterKHR(3) - -== Name - -clGetDeviceIDsFromDX9MediaAdapterKHR - Query a media adapter for any associated OpenCL devices. - -== C Specification - -[source,c] ----- -cl_int clGetDeviceIDsFromDX9MediaAdapterKHR(cl_platform_id platform, - cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapters_type, - void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, - cl_uint num_entries, - cl_device_id *devices, - cl_int *num_devices) ----- - -== Parameters - - * _platform_ - Refers to the platform ID returned by - flink:clGetPlatformIDs. - * _num_media_adapters_ - Specifies the number of media adapters. - * _media_adapters_type_ - An array of _num_media_adapters_ entries. Each - entry specifies the type of media adapter and must be one of the values - described in the table (Table 9.10.1) below. -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_dx9_media_adapter_type_khr | Type of media adapters -| `CL_ADAPTER_D3D9_KHR` | IDirect3DDevice9 * -| `CL_ADAPTER_D3D9EX_KHR` | IDirect3DDevice9Ex * -| `CL_ADAPTER_DXVA_KHR` | IDXVAHD_Device * -|==== --- - * _media_adapters_ - An array of _num_media_adapters_ entries. Each entry - specifies the actual adapter whose type is specified by - `media_adapter_type`. The _media_adapters_ must be one of the types - describes in the table above. - * _media_adapter_set_ - Specifies the set of adapters to return and must - be one of the values described in the table (Table 9.10.2) below. -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_dx9_media_adapter_set_khr | Description -| `CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR` - | The preferred OpenCL devices associated with the media adapter. -| `CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR` | -| `CL_ALL_DEVICES_FOR_MEDIA_DX9_ADAPTER_KHR` - | All OpenCL devices that may interoperate with the media adapter. -|==== --- - * _num_entries_ - The number of cl_device_id entries that can be added to - _devices_. If _devices_ is not NULL, the _num_entries_ must be greater - than zero. - * _devices_ - Returns a list of OpenCL devices found that support the list - of media adapters specified. The `cl_device_id` values returned in - _devices_ can be used to identify a specific OpenCL device. If _devices_ - is NULL, this argument is ignored. The number of OpenCL devices returned - is the minimum of the value specified by _num_entries_ or the number of - OpenCL devices whose type matches `device_type`. - * _num_devices_ - Returns the number of OpenCL devices. If _num_devices_ - is NULL, this argument is ignored. - -== Description - -Queries a media adapter for any associated OpenCL devices. -Adapters with associated OpenCL devices can enable media surface sharing between the two. - -== Notes - -include::sharingDX9Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_PLATFORM` if _platform_ is not a valid platform. - * `CL_INVALID_VALUE` if _num_media_adapters_ is zero or if _media_adapters_type_ is NULL or if _media_adapters_ is NULL. - * `CL_INVALID_VALUE` if any of the entries in _media_adapters_type_ or _media_adapters_ is not a valid value. - * `CL_INVALID_VALUE` if _media_adapter_set_ is not a valid value. - * `CL_INVALID_VALUE` if _num_entries_ is equal to zero and _devices_ is not NULL or if both _num_devices_ and _devices_ are NULL. - * `CL_DEVICE_NOT_FOUND` if no OpenCL devices that correspond to adapters specified in _media_adapters_ and _media_adapters_type_ were found. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetDeviceIDsFromDX9MediaAdapterKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetGLContextInfoKHR.txt b/man/static/clGetGLContextInfoKHR.txt deleted file mode 100644 index df2534e1d..000000000 --- a/man/static/clGetGLContextInfoKHR.txt +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetGLContextInfoKHR(3) - -== Name - -clGetGLContextInfoKHR - Get OpenGL context information. - -[source,c] ----- -cl_int clGetGLContextInfoKHR(const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - - -== Parameters - - * _properties_ - Points to an attribute list whose format and valid - contents are identical to the _properties_ argument of - flink:clCreateContext. _properties_ must identify a single valid GL - context or GL share group object. - * _param_name_ - A constant that specifies the GL context information to - query, and must be one of the values shown in the table below. - * _param_value_size_ - Specifies the size in bytes of memory pointed to by - _param_value_. This size must be greater than or equal to the size of - the return type described in the table below. - * _param_value_ - A pointer to memory where the result of the query is - returned as described in the table below. If _param_value_ is NULL, it - is ignored. - * _param_value_size_ret_ - Returns the actual size in bytes of data being - queried by _param_value_. If _param_value_size_ret_ is NULL, it is - ignored. - -== Description - -Get OpenGL context information. - -[cols="1a,1a,1a", options="header"] -|=== -| _param_name_ | Return Type | Information returned in param_value -| `CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR` | `cl_device_id` - | Return the CL device currently associated with the specified OpenGL context. -| `CL_DEVICES_FOR_GL_CONTEXT_KHR` | `cl_device_id[]` - | List of all CL devices which may be associated with the specified OpenGL context. -|=== - -== Notes - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. If no -device(s) exist corresponding to _param_name_, the call will not fail, but -the value of _param_value_size_ret_ will be zero. Otherwise returns one of -the following: - - * {blank} -+ --- -`CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR` if a context was specified by any -of the following means: - - * A context was specified for an EGL-based OpenGL ES or OpenGL implementation by setting the attributes `CL_GL_CONTEXT_KHR` and `CL_EGL_DISPLAY_KHR` - * A context was specified for a GLX-based OpenGL implementation by setting the attributes `CL_GL_CONTEXT_KHR` and `CL_GLX_DISPLAY_KHR` - * A context was specified for a WGL-based OpenGL implementation by setting the attributes `CL_GL_CONTEXT_KHR` and `CL_WGL_HDC_KHR` - -and any of the following conditions hold: - - * The specified display and context attributes do not identify a valid OpenGL or OpenGL ES context. - * The specified context does not support buffer and renderbuffer objects. - * The specified context is not compatible with the OpenCL context being created (for example, it exists in a physically distinct address space, such as another hardware device; or it does not support sharing data with OpenCL due to implementation restrictions). --- - * `CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR` if a share group was specified - for a CGL-based OpenGL implementation by setting the attribute - `CL_CGL_SHAREGROUP_KHR`, and the specified share group does not identify a - valid CGL share group object. - - * {blank} -+ --- -`CL_INVALID_OPERATION` if a context was specified as described above and any of the following conditions hold: - - * A context or share group object was specified for one of CGL, EGL, GLX, or WGL and the OpenGL implementation does not support that window-system binding API. - * More than one of the attributes `CL_CGL_SHAREGROUP_KHR`, `CL_EGL_DISPLAY_KHR`, `CL_GLX_DISPLAY_KHR`, and `CL_WGL_HDC_KHR` is set to a non-default value. - * Both of the attributes `CL_CGL_SHAREGROUP_KHR` and `CL_GL_CONTEXT_KHR` are set to non-default values. - * Any of the devices specified in _devices_ cannot support OpenCL objects which share the data store of an OpenGL object. --- - * `CL_INVALID_VALUE` if an attribute name other than those specified in - the table of supported _properties_ for flink:clCreateContext, - * `CL_INVALID_VALUE` if _param_name_ is not one of the values listed in - the table above; or if the size in bytes specified by _param_value_size_ - is less than the size of the return type shown in the table above, and - _param_value_ is not a NULL value. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources - required by the OpenCL implementation on the device - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources - required by the OpenCL implementation on the host. - - -== See Also - -flink:clCreateContext, -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetGLObjectInfo.txt b/man/static/clGetGLObjectInfo.txt deleted file mode 100644 index 1d7b7bbc3..000000000 --- a/man/static/clGetGLObjectInfo.txt +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetGLObjectInfo(3) - -== Name - -clGetGLObjectInfo - Query an OpenGL object used to create an OpenCL memory object. - -== C Specification - -[source,c] ----- -cl_int clGetGLObjectInfo(cl_mem memobj, - cl_gl_object_type *gl_object_type, - GLuint *gl_object_name) ----- - -== Parameters - - * _memobj_ - An OpenCL memory object handle. - * _gl_object_type_ - Returns the type of GL object attached to _memobj_ - and can be `CL_GL_OBJECT_BUFFER`, `CL_GL_OBJECT_TEXTURE2D`, - `CL_GL_OBJECT_TEXTURE3D`, `CL_GL_OBJECT_TEXTURE2D_ARRAY`, - `CL_GL_OBJECT_TEXTURE1D`, `CL_GL_OBJECT_TEXTURE1D_ARRAY`, - `CL_GL_OBJECT_TEXTURE_BUFFER`, or `CL_GL_OBJECT_RENDERBUFFER`. If - _gl_object_type_ is NULL, it is ignored. - * _gl_object_name_ - Returns the GL object name used to create _memobj_. - If _gl_object_name_ is NULL, it is ignored. - -== Description - -The OpenGL object used to create the OpenCL memory object and information about the object type i.e. -whether it is a texture, renderbuffer, or buffer object can be queried using this function. - -== Errors - -Returns `CL_SUCCESS` if the call was executed successfully. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_MEM_OBJECT` if _memobj_ is not a valid OpenCL memory object. - * `CL_INVALID_GL_OBJECT` if there is no GL object associated with _memobj_. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clGetGLTextureInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetGLObjectInfo - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetGLTextureInfo.txt b/man/static/clGetGLTextureInfo.txt deleted file mode 100644 index dffef2e65..000000000 --- a/man/static/clGetGLTextureInfo.txt +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetGLTextureInfo(3) - -== Name - -clGetGLTextureInfo - Returns additional information about the GL texture object associated with a memory object. - -== C Specification - -[source,c] ----- -cl_int clGetGLTextureInfo(cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - -== Parameters - - * _memobj_ - An OpenCL memory object handle. - * _param_name_ - Specifies what additional information about the GL - texture object associated with _memobj_ to query. The list of supported - _param_name_ types and the information returned in _param_value_ by - `clGetGLTextureInfo` is described in the table below (Table 9.5). - * _param_value_ - A pointer to memory where the result being queried is - returned. If _param_value_ is NULL, it is ignored. - * _param_value_size_ - Specifies the size in bytes of memory pointed to by - _param_value_. This size must be {geq} size of return type as described - in the table below. - * _param_value_size_ret_ - Returns the actual size in bytes of data copied - to _param_value_. If _param_value_size_ret_ is NULL, it is ignored. -+ --- -Table 9.5: - -[cols="1a,1a,1a", options="header"] -|==== -| cl_gl_texture_info | Return Type | Information returned in _param_value_ -| `CL_GL_TEXTURE_TARGET` | GLenum - | The _texture_target_ argument specified in - flink:clCreateFromGLTexture. -| `CL_GL_MIPMAP_LEVEL` | GLint - | The _miplevel_ argument specified in flink:clCreateFromGLTexture. -| `CL_GL_NUM_SAMPLES` | GLsizei - | If the reflink:cl_khr_gl_msaa_sharing extension is supported, the - _samples_ argument passed to `glTexImage2DMultisample` or - `glTexImage3DMultisample`. If _image_ is not a MSAA texture, 1 is - returned. -|==== --- - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_MEM_OBJECT` if _memobj_ is not a valid OpenCL memory object - * `CL_INVALID_GL_OBJECT` if there is no GL texture object associated with _memobj_. - * `CL_INVALID_VALUE` if _param_name_ is not valid, or if size in bytes specified by _param_value_size_ is < size of return type as described in the table above and _param_value_ is not NULL, or if _param_value_ and _param_value_size_ret_ are NULL. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clGetGLObjectInfo, -flink:clCreateFromGLTexture, -flink:clCreateFromGLTexture - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetGLTextureInfo - -== Copyright - -include::footer.txt[] diff --git a/man/static/clIcdGetPlatformIDsKHR.txt b/man/static/clIcdGetPlatformIDsKHR.txt deleted file mode 100644 index 7c937659b..000000000 --- a/man/static/clIcdGetPlatformIDsKHR.txt +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clIcdGetPlatformIDsKHR(3) - -== Name - -clIcdGetPlatformIDsKHR - Obtain the list of platforms accessible through the Khronos ICD Loader. - -[source,c] ----- -cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, - cl_platform_id *platforms, - cl_uint *num_platforms) ----- - - -== Parameters - - * _num_entries_ - The number of `cl_platform_id` entries that can be added - to _platforms_. If _platforms_ is not NULL, then _num_entries_ must be - greater than zero. - * _platforms_ - Returns a list of OpenCL platforms available for access - through the Khronos ICD Loader. The `cl_platform_id` values returned in - _platforms_ are ICD compatible and can be used to identify a specific - OpenCL platform. If _platforms_ is NULL, then this argument is ignored. - The number of OpenCL platforms returned is the minimum of the value - specified by _num_entries_ or the number of OpenCL platforms available. - * _num_platforms_ - Returns the number of OpenCL platforms available. If - _num_platforms_ is NULL, then this argument is ignored. - -== Notes - -This function is enabled by the -reflink:cl_khr_icd extension. - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully and there are -a non zero number of platforms available, else it returns on the errors -below: - - * `CL_PLATFORM_NOT_FOUND_KHR` if zero platforms are available. - * `CL_INVALID_VALUE` if _num_entries_ is equal to zero and _platforms_ is not NULL or if both _num_platforms_ and _platforms_ are NULL. - -== See Also - -flink:clGetPlatformIDs, -reflink:cl_khr_icd - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_icd - -== Copyright - -include::footer.txt[] diff --git a/man/static/clTerminateContextKHR.txt b/man/static/clTerminateContextKHR.txt deleted file mode 100644 index 8b0abcbcd..000000000 --- a/man/static/clTerminateContextKHR.txt +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clTerminateContextKHR(3) - -== Name - -clTerminateContextKHR - Terminates all pending work associated with the context and renders all data owned by the context invalid. - -[source,c] ----- -cl_int clTerminateContextKHR(cl_context context) ----- - -== Parameters - -== Notes - -It is the responsibility of the application to release all objects -associated with the context being terminated. - -When a context is terminated: - - * The execution status of enqueued commands will be `CL_TERMINATED_KHR`. - Event objects can be queried using - flink:clGetEventInfo. Event callbacks can be - registered and registered event callbacks will be called with - `event_command_status` set to `CL_TERMINATED_KHR`. - flink:clWaitForEvents will return as immediately - for commands associated with event objects specified in `event_list`. - The status of user events can be set. Event objects can be retained and - released. flink:clGetEventProfilingInfo - returns `CL_PROFILING_INFO_NOT_AVAILABLE`. - * The context is considered to be terminated. A callback function - registered when the context was created will be called. Only queries, - retain and release operations can be performed on the context. All other - APIs that use a context as an argument will return - `CL_CONTEXT_TERMINATED_KHR`. - * The contents of the memory regions of the memory objects is undefined. - Queries, registering a destructor callback, retain and release - operations can be performed on the memory objects. - * Once a context has been terminated, all OpenCL API calls that create - objects or enqueue commands will return `CL_CONTEXT_TERMINATED_KHR`. - APIs that release OpenCL objects will continue to operate as though - `clTerminateContextKHR` was not called. - * The behavior of callbacks will remain unchanged, and will report - appropriate error, if executing after termination of context. This - behavior is similar to enqueued commands, after the command-queue has - become invalid. - -An implementation that supports this extension must be able to terminate -commands currently executing on devices or queued across all command-queues -associated with the context that is being terminated. The implementation -cannot implement this extension by waiting for currently executing (or -queued) commands to finish execution on devices associated with this context -(i.e. doing a flink:clFinish. - -In Table 4.5 (see flink:clCreateContext), `CL_CONTEXT_TERMINATE_KHR` -can be specified in the context properties only if all devices associated -with the context support the ability to support context termination (i.e. -`CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR` is set for -`CL_DEVICE_TERMINATE_CAPABILITY_KHR`). Otherwise, context creation fails -with error code of `CL_INVALID_PROPERTY`. - -== Errors - -`clTerminateContextKHR` returns `CL_SUCCESS` if the function is executed -successfully. Otherwise, it returns one of the following errors: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid OpenCL context. - * `CL_CONTEXT_TERMINATED_KHR` if _context_ has already been terminated. - * `CL_INVALID_OPERATION` if _context_ was not created with - `CL_CONTEXT_TERMINATE_KHR` set to `CL_TRUE`. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources - required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -// == See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_terminate_context - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_3d_image_writes.txt b/man/static/cl_khr_3d_image_writes.txt deleted file mode 100644 index 474126f40..000000000 --- a/man/static/cl_khr_3d_image_writes.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_3d_image_writes(3) - -== Name - -cl_khr_3d_image_writes - Extension to enable writes to 3D image memory objects. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable ----- - -== See Also - -reflink:EXTENSION, -reflink:cl_image_format, -reflink:imageWriteFunctions - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_3d_image_writes - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_byte_addressable_store.txt b/man/static/cl_khr_byte_addressable_store.txt deleted file mode 100644 index 9265f1aa4..000000000 --- a/man/static/cl_khr_byte_addressable_store.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_byte_addressable_store(3) - -== Name - -cl_khr_byte_addressable_store - deprecated extension - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable ----- - -== Description - -This extension was promoted to OpenCL 1.1 core. - -// == See Also - -== Document Notes - -For more information, see the OpenCL API Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#changes_to_opencl - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_d3d10_sharing.txt b/man/static/cl_khr_d3d10_sharing.txt deleted file mode 100644 index 11ee4ced7..000000000 --- a/man/static/cl_khr_d3d10_sharing.txt +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_d3d10_sharing(3) - -== Name - -cl_khr_d3d10_sharing - Provide interoperability between OpenCL and Direct3D 10. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_d3d10_sharing : enable ----- - -== Description - -If the `cl_khr_d3d10_sharing` extension is supported, then the following functions are enabled: - - * flink:clGetDeviceIDsFromD3D10KHR - * flink:clCreateFromD3D10BufferKHR - * flink:clCreateFromD3D10Texture2DKHR - * flink:clCreateFromD3D10Texture3DKHR - * flink:clEnqueueAcquireD3D10ObjectsKHR - * flink:clEnqueueReleaseD3D10ObjectsKHR - -include::sharingD3D10Inc.txt[] - -== See Also - -reflink:EXTENSION, -flink:clGetPlatformInfo, -flink:clGetDeviceInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_d3d10_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_d3d11_sharing.txt b/man/static/cl_khr_d3d11_sharing.txt deleted file mode 100644 index edf6faede..000000000 --- a/man/static/cl_khr_d3d11_sharing.txt +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_d3d11_sharing(3) - -== Name - -cl_khr_d3d11_sharing - Provide interoperability between OpenCL and Direct3D 11. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_d3d11_sharing : enable ----- - -== Description - -The goal of this extension is to provide interoperability between OpenCL and Direct3D 11. -This is designed to function analogously to the reflink:cl_khr_gl_sharing as defined in sections 9.7 and 9.8. -If this extension is supported by an implementation, the string "cl_khr_d3d11_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` or `CL_DEVICE_EXTENSIONS` string described in the table of allowed values for _param_name_ for flink:clGetDeviceInfo or -flink:clGetPlatformInfo. - -As currently proposed, the interfaces for this extension are provided in the header file `cl_d3d11.h`. - -If the `cl_khr_d3d11_sharing` extension is supported, then the following functions are enabled: - - * flink:clGetDeviceIDsFromD3D11KHR - * flink:clCreateFromD3D11BufferKHR - * flink:clCreateFromD3D11Texture2DKHR - * flink:clCreateFromD3D11Texture3DKHR - * flink:clEnqueueAcquireD3D11ObjectsKHR - * flink:clEnqueueReleaseD3D11ObjectsKHR - -The OpenCL functions enabled by the `cl_khr_d3d11_sharing` extension allow applications to use Direct3D 11 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 11. -The OpenCL API may be used to execute kernels that read and/or write memory objects that are also Direct3D 11 resources. -An OpenCL image object may be created from a Direct3D 11 texture resource. -An OpenCL buffer object may be created from a Direct3D 11 buffer resource. -OpenCL memory objects may be created from Direct3D 11 objects if and only if the OpenCL context has been created from a Direct3D 11 device. - -.Lifetime of Shared [D3D11] Objects - -An OpenCL memory object created from a Direct3D 11 resource remains valid as long as the corresponding Direct3D 11 resource has not been deleted. -If the Direct3D 11 resource is deleted through the Direct3D 11 API, subsequent use of the OpenCL memory object will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -The successful creation of a cl_context against a Direct3D 11 device specified via the context create parameter `CL_CONTEXT_D3D11_DEVICE_KHR` will increment the internal Direct3D reference count on the specified Direct3D 11 device. -The internal Direct3D reference count on that Direct3D 11 device will be decremented when the OpenCL reference count on the returned OpenCL context drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the existence of the Direct3D 11 device from which the OpenCL context was created. -If the Direct3D 11 device is deleted through the Direct3D 11 API, subsequent use of the OpenCL context will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -== See Also - -reflink:EXTENSION, -flink:clGetDeviceIDsFromD3D11KHR, -flink:clCreateFromD3D11BufferKHR, -flink:clCreateFromD3D11Texture2DKHR, -flink:clCreateFromD3D11Texture3DKHR, -flink:clEnqueueAcquireD3D10ObjectsKHR, -flink:clEnqueueReleaseD3D11ObjectsKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_d3d11_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_depth_images.txt b/man/static/cl_khr_depth_images.txt deleted file mode 100644 index e333c9600..000000000 --- a/man/static/cl_khr_depth_images.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_depth_images(3) - -== Name - -cl_khr_depth_images - deprecated extension - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_depth_images : enable ----- - -== Description - -This extension was promoted to OpenCL 2.0 core. - -// == See Also - -== Document Notes - -For more information, see the OpenCL API Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#changes_to_opencl - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_device_enqueue_local_arg_types.txt b/man/static/cl_khr_device_enqueue_local_arg_types.txt deleted file mode 100644 index 88c465fd2..000000000 --- a/man/static/cl_khr_device_enqueue_local_arg_types.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_device_enqueue_local_arg_types(3) - -== Name - -cl_khr_device_enqueue_local_arg_types - Allows arguments to blocks passed to enqueue_kernel functions to be declared as a pointer to any type in local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_device_enqueue_local_arg_types : enable ----- - -== Description - -This extension allows arguments to blocks passed to enqueue_kernel functions to be declared as a pointer to any type (built-in or user-defined) in local memory instead of just `local void *`. - -If this extension is supported by an implementation, the string cl_khr_device_enqueue_local_arg_types will be present in the `CL_DEVICE_EXTENSIONS` string described in table 4.3 (see flink:clGetDeviceInfo). - -== See Also - -flink:clGetDeviceInfo, -reflink:enqueue_kernel, -reflink:get_kernel_work_group_size - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_device_enqueue_local_arg_types - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_dx9_media_sharing.txt b/man/static/cl_khr_dx9_media_sharing.txt deleted file mode 100644 index 84f892362..000000000 --- a/man/static/cl_khr_dx9_media_sharing.txt +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_dx9_media_sharing(3) - -== Name - -cl_khr_dx9_media_sharing - Provide sharing of data between OpenCL and DX9. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_dx9_media_sharing : enable ----- - -== Description - -If the `cl_khr_dx9_media_sharing` extension is supported, then the following functions are enabled: - - * flink:clGetDeviceIDsFromDX9MediaAdapterKHR - * flink:clCreateFromDX9MediaSurfaceKHR - * flink:clEnqueueAcquireDX9MediaSurfacesKHR - * flink:clEnqueueReleaseDX9MediaSurfacesKHR - -include::sharingDX9Inc.txt[] - -== See Also - -reflink:EXTENSION, -flink:clGetPlatformInfo, -flink:clGetDeviceInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_dx9_media_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_egl_event.txt b/man/static/cl_khr_egl_event.txt deleted file mode 100644 index e182c857a..000000000 --- a/man/static/cl_khr_egl_event.txt +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_egl_event(3) - -== Name - -cl_khr_egl_event - Create CL event objects linked to EGL fence sync objects. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_egl_event : enable ----- - -== Description - -This extension allows creating OpenCL event objects linked to EGL fence sync objects, potentially improving efficiency of sharing images and buffers between the two APIs. -The companion EGL_KHR_cl_event extension provides the complementary functionality of creating an EGL sync object from an OpenCL event object. - -If this extension is supported by an implementation, the string `cl_khr_egl_event` will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` string described in table 4.3 (see -flink:clGetDeviceInfo). - -If the `cl_khr_egl_event` extension is supported, then the following function is enabled: - - * flink:clCreateEventFromEGLSyncKHR - -The `cl_khr_egl_event` extension enables changes to the following: - - * flink:clGetEventInfo - * flink:clWaitForEvents - * flink:clGetEventInfo - * flink:clEnqueueAcquireGLObjects - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_egl_image - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_event - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_egl_image.txt b/man/static/cl_khr_egl_image.txt deleted file mode 100644 index d6609c90d..000000000 --- a/man/static/cl_khr_egl_image.txt +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_egl_image(3) - -== Name - -cl_khr_egl_image - Create derived resources, such as OpenCL image objects, from EGLImages. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_egl_image : enable ----- - -== Description - -If this extension is supported by an implementation, the string `cl_khr_egl_image` will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` string described in table 4.3 (see -flink:clGetDeviceInfo). - -If the `cl_khr_egl_image` extension is supported, then the following functions are enabled: - - * flink:clCreateFromEGLImageKHR - * flink:clEnqueueAcquireEGLObjectsKHR - * flink:clEnqueueReleaseEGLObjectsKHR - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_egl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_fp16.txt b/man/static/cl_khr_fp16.txt deleted file mode 100644 index fbc273ce2..000000000 --- a/man/static/cl_khr_fp16.txt +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_fp16(3) - -== Name - -cl_khr_fp16 - Optional half floating-point support. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_fp16 : enable ----- - -== Description - -This extension adds support for half scalar and vector types as built-in types that can be used for arithmetic operations, conversions, etc. -An application that wants to use `half` and `halfn` types will need to include the directive shown above. - -The list of built-in scalar and vector data types are extended to include the types in the table below. - -The built-in vector data types for `halfn` are also declared as appropriate types in the OpenCL API (and header files) that can be used by an application. -The following table describes the built-in vector data types for `halfn` as defined in the OpenCL C programming language and the corresponding data type available to the application: - -[cols="1a,1a,1a", options="header"] -|==== -| Type in OpenCL Language | Description | API type for application -| half2 | 2-component half-precision floating-point vector | `cl_half2` -| half3 | 3-component half-precision floating-point vector | `cl_half3` -| half4 | 4-component half-precision floating-point vector | `cl_half4` -| half8 | 8-component half-precision floating-point vector | `cl_half8` -| half16 | 16-component half-precision floating-point vector | `cl_half16` -|==== - -The relational, equality, logical and logical unary reflink:operators can be used with `half` scalar and `halfn` vector types and shall produce a scalar `int` and vector `shortn` result respectively. - -The OpenCL compiler accepts an `h` and `H` suffix on floating-point literals, indicating the literal is typed as a `half`. - -The macro names given in the following list must use the values specified. -These constant expressions are suitable for use in `#if` preprocessing directives. - -The following table also describes the corresponding macro names available to the application. - -[cols="1a,1a,1a", options="header"] -|==== -| Macro in OpenCL Language | value | Macro for application -| `#define HALF_DIG` | `3` | `HALF_DIG` -| `#define HALF_MANT_DIG` | `11` | `HALF_MANT_DIG` -| `#define HALF_MAX_10_EXP` | `+4` | `HALF_MAX_10_EXP` -| `#define HALF_MAX_EXP` | `+16` | `HALF_MAX_EXP` -| `#define HALF_MIN_10_EXP` | `-4` | `HALF_MIN_10_EXP` -| `#define HALF_MIN_EXP` | `-13` | `HALF_MIN_EXP` -| `#define HALF_RADIX` | `2` | `HALF_RADIX` -| `#define HALF_MAX` | `0x1.ffcp15h` | `HALF_MAX` -| `#define HALF_MIN` | `0x1.0p-14h` | `HALF_MIN` -| `#define HALF_EPSILON` | `0x1.0p-10h` | `HALF_EPSILON` -|==== - -The following constants are also available. -They are of type `half` and are accurate within the precision of the `half` type. - -[cols="1a,1a", options="header"] -|==== -| Constant | Description -| `M_E_H` | Value of e -| `M_LOG2E_H` | Value of log~2~ e -| `M_LOG10E_H` | Value of log~10~ e -| `M_LN2_H` | Value of ln 2 -| `M_LN10_H` | Value of ln 10 -| `M_PI_H` | Value of {pi} -| `M_PI_2_H` | Value of {pi} / 2 -| `M_PI_4_H` | Value of {pi} / 4 -| `M_1_PI_H` | Value of 1 / {pi} -| `M_2_PI_H` | Value of 2 / {pi} -| `M_2_SQRTPI_H` | Value of 2 / {sqrt}{pi} -| `M_SQRT2_H` | Value of {sqrt}2 -| `M_SQRT1_2_H` | Value of 1 / {sqrt}2 -|==== - -Ann application may query the configuration information using the op-code `CL_DEVICE_HALF_FP_CONFIG` with flink:clGetDeviceInfo for an OpenCL device that supports half precision floating-point. - -.Conversions - -The implicit conversion rules specified in section 6.2.1 now include the `half` scalar and `halfn` vector data types. - -The explicit casts described in section 6.2.2 are extended to take a `half` scalar data type and a `halfn` vector data type. - -The explicit conversion functions described in section 6.2.3 are extended to take a `half` scalar data type and a `halfn` vector data type. - -The *as_typen*() function for re-interpreting types as described in section -6.2.4.2 is extended to allow conversion-free casts between `shortn`, -`ushortn` and `halfn` scalar and vector data types. - -== See Also - -reflink:EXTENSION, -flink:clGetDeviceInfo, -reflink:mathFunctions.txt, -reflink:commonFunctions.txt, -reflink:geometricFunctions.txt, -reflink:relationalFunctions.txt, -reflink:vectorDataLoadandStoreFunctions.txt, -reflink:asyncCopyFunctions.txt, -reflink:imageReadFunctions.txt -reflink:imageWriteFunctions.txt - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_fp16 - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_fp64.txt b/man/static/cl_khr_fp64.txt deleted file mode 100644 index 3d567e34e..000000000 --- a/man/static/cl_khr_fp64.txt +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_fp64(3) - -== Name - -cl_khr_fp64 - Provided for backward compatibility if `double` floating-point precision is supported. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_fp64 : enable ----- - -== Description - -This extension was promoted to an optional core feature in OpenCL 1.2. -The extension string exists for backward compatibility if double precision -is supported. - -== See Also - -flink:clGetDeviceInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_fp64 - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_depth_images.txt b/man/static/cl_khr_gl_depth_images.txt deleted file mode 100644 index 6b8486f09..000000000 --- a/man/static/cl_khr_gl_depth_images.txt +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_depth_images(3) - -== Name - -cl_khr_gl_depth_images - Extends CL/GL sharing to allow a CL image to be created from a GL depth or depth-stencil texture. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_gl_depth_images : enable ----- - -== Description - -The `cl_khr_gl_depth_images` extension extends CL / GL sharing (i.e. -the reflink:cl_khr_gl_sharing extension) defined in section 9.7 to allow a CL depth image to be created from a GL depth or depth-stencil texture. -If this extension is supported by an implementation, the string cl_khr_gl_depth_images will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 or `CL_DEVICE_EXTENSIONS` string described in table 4.3. - -Depth images with an image channel order of `CL_DEPTH_STENCIL` can only be created using the flink:clCreateFromGLTexture API. - -For the image format given by channel order of `CL_DEPTH_STENCIL` and channel data type of `CL_UNORM_INT24`, the depth is stored as an unsigned normalized 24-bit value. - -For the image format given by channel order of `CL_DEPTH_STENCIL` and channel data type of `CL_FLOAT`, each pixel is two 32-bit values. -The depth is stored as a single precision floating-point value followed by the stencil which is stored as a 8-bit integer value. - -The stencil value cannot be read or written using the read imagef and write imagef built-in functions in an OpenCL kernel. - -Depth image objects with an image channel order = `CL_DEPTH_STENCIL` cannot be used as arguments to flink:clEnqueueReadImage, -flink:clEnqueueWriteImage, -flink:clEnqueueCopyImage, -flink:clEnqueueCopyImageToBuffer, -flink:clEnqueueCopyBufferToImage, -flink:clEnqueueMapImage and -flink:clEnqueueFillImage and will return a `CL_INVALID_OPERATION` error. - -The following new image formats are added to table 9.4 in section 9.7.3.1 of the OpenCL 2.1 extension specification. -If a GL texture object with an internal format from table 9.4 is successfully created by OpenGL, then there is guaranteed to be a mapping to one of the corresponding CL image format(s) in that table. - -[cols="1a,1a", options="header"] -|==== -| GL internal format | CL image format (channel order, channel data type) -| `GL_DEPTH_COMPONENT32F` | `CL_DEPTH, CL_FLOAT` -| `GL_DEPTH_COMPONENT16` | `CL_DEPTH, CL_UNORM_INT16` -| `GL_DEPTH24_STENCIL8` | `CL_DEPTH_STENCIL, CL_UNORM_INT24` -| `GL_DEPTH32F_STENCIL8` | `CL_DEPTH_STENCIL, CL_FLOAT` -|==== - -== See Also - -No cross-references are available - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_depth_images - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_event.txt b/man/static/cl_khr_gl_event.txt deleted file mode 100644 index 8e5e37a80..000000000 --- a/man/static/cl_khr_gl_event.txt +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_event(3) - -== Name - -cl_khr_gl_event - Create an OpenCL event object from a GL sync object. - -== C Specification - -[source,c] ----- -cl_khr_gl_event ----- - -== Description - -This extension enables the function flink:clCreateEventFromGLsyncKHR. - -This extension allows creating OpenCL event objects linked to OpenGL fence sync objects, potentially improving efficiency of sharing images and buffers between the two APIs. -The companion `GL_ARB_cl_event` OpenGL extension provides the complementary functionality of creating an OpenGL sync object from an OpenCL event object. - -In addition, this extension modifies the behavior of flink:clEnqueueAcquireGLObjects and -flink:clEnqueueReleaseGLObjects to implicitly guarantee synchronization with an OpenGL context bound in the same thread as the OpenCL context. - -If this extension is supported by an implementation, the string `cl_khr_gl_event` will be present in the `CL_PLATFORM_EXTENSIONS` (see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` string (see -flink:clGetDeviceInfo). - -== Notes - -Event objects can also be used to reflect the status of an OpenGL sync object. -The sync object in turn refers to a fence command executing in an OpenGL command stream. -This provides another method of coordinating sharing of buffers and images between OpenGL and OpenCL (see section 9.7.6.1). - -If the `cl_khr_gl_event` extension is supported, then the OpenCL implementation will ensure that any such pending OpenGL operations are complete for an OpenGL context bound to the same thread as the OpenCL context. -This is referred to as implicit synchronization. - -If the `cl_khr_gl_event` extension is supported and the OpenGL context in question supports fence sync objects, completion of OpenGL commands may also be determined by placing a GL fence command after those commands using `glFenceSync`, creating an event from the resulting GL sync object using flink:clCreateEventFromGLsyncKHR, and determining completion of that event object via -flink:clEnqueueAcquireGLObjects. -This method may be considerably more efficient than calling `glFinish`, and is referred to as `explicit synchronization`. -Explicit synchronization is most useful when an OpenGL context bound to another thread is accessing the memory objects. - -If the `cl_khr_gl_event` extension is not supported, completion of OpenGL commands may be determined by issuing and waiting for completion of a `glFinish` command on all OpenGL contexts with pending references to these objects. -Some implementations may offer other efficient synchronization methods. -If such methods exist they will be described in platform-specific documentation. - -Note that no synchronization method other than `glFinish` is portable between all OpenGL implementations and all OpenCL implementations. -While this is the only way to ensure completion that is portable to all platforms, `glFinish` is an expensive operation and its use should be avoided if the `cl_khr_gl_event` extension is supported on a platform. - -== See Also - -reflink:EXTENSION, -flink:clCreateEventFromGLsyncKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_event - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_msaa_sharing.txt b/man/static/cl_khr_gl_msaa_sharing.txt deleted file mode 100644 index e87bb2f87..000000000 --- a/man/static/cl_khr_gl_msaa_sharing.txt +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_msaa_sharing(3) - -== Name - -cl_khr_gl_msaa_sharing - Extends the CL/GL sharing to support GL multi-sampled texture (color or depth). - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable ----- - -== Description - -This extension name is `cl_khr_gl_msaa_sharing`. -This extension requires reflink:cl_khr_gl_depth_images. - -This extension adds read_image and write_image functions to the built-in -link:imageReadFunctions.html[Image Read Functions] and -link:imageWriteFunctions.html[Image Write Functions], respectively. - -Multi-sample CL image objects (MSAA) can only be read from a kernel. -Multi-sample CL image objects cannot be used as arguments to flink:clEnqueueReadImage , -flink:clEnqueueWriteImage, -flink:clEnqueueCopyImage, -flink:clEnqueueCopyImageToBuffer, -flink:clEnqueueCopyBufferToImage, -flink:clEnqueueMapImage and -flink:clEnqueueFillImage and will return a `CL_INVALID_OPERATION` error. - -Add the following new data types to table 6.3 in section 6.1.3 of the OpenCL 2.1 specification (see reflink:otherDataTypes): - -[cols="1a,1a", options="header"] -|==== -| Type | Description -| `image2d_msaa_t` - | A 2D multi-sample color image. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -| `image2d_array_msaa_t` - | A 2D multi-sample color image array. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -| `image2d_msaa_depth_t` - | A 2D multi-sample depth image. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -| `image2d_array_msaa_depth_t` - | A 2D multi-sample depth image array. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -|==== - -NOTE: When a multisample image is accessed in a kernel, the access takes one -vector of integers describing which pixel to fetch and an integer -corresponding to the sample numbers describing which sample within the pixel -to fetch. _sample_ identifies the sample position in the multi-sample image. - -For best performance, we recommend that _sample_ be a literal value so it is -known at compile time and the OpenCL compiler can perform appropriate -optimizations for multisample reads on the device. - -No standard sampling instructions are allowed on the multisample image. -Accessing a coordinate outside the image and/or a sample that is outside the number of samples associated with each pixel in the image is undefined - -== See Also - -link:imageReadFunctions.html[Image Read Functions], -link:imageWriteFunctions.html[Image Write Functions], -flink:clGetGLTextureInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_msaa_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_sharing.txt b/man/static/cl_khr_gl_sharing.txt deleted file mode 100644 index 3525731ff..000000000 --- a/man/static/cl_khr_gl_sharing.txt +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_sharing(3) - -== Name - -cl_khr_gl_sharing - Allow applications to use OpenGL buffer, texture and renderbuffer objects as OpenCL image objects. - -== C Specification - -[source,c] ----- -cl_khr_gl_sharing ----- - -== Description - -include::gl_sharingInc.txt[] - -== Notes - -If the reflink:cl_khr_mipmap_image, extension is supported by the OpenCL device, the `cl_khr_gl_sharing` extension adds support for creating a mip-mapped CL image from a mip-mapped GL texture. - -To create a mip-mapped CL image from a mip-mapped GL texture, the _miplevel_ argument to flink:clCreateFromGLTexture, should be a negative value. -If _miplevel_ is a negative value then a CL mipmapped image object is created from a mipmapped GL texture object instead of a CL image object for a specific miplevel of a GL texture. - -NOTE: For a detailed description of how the level of detail is computed, please refer to section 3.9.7 of the OpenGL 3.0 specification. - -== See Also - -reflink:EXTENSION, -flink:clGetGLContextInfoKHR, -flink:clCreateFromGLBuffer, -flink:clCreateFromGLTexture, -flink:clCreateFromGLRenderbuffer, -flink:clGetGLObjectInfo, -flink:clGetGLTextureInfo, -flink:clEnqueueAcquireGLObjects, -flink:clEnqueueReleaseGLObjects, -reflink:cl_khr_mipmap_image - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_global_int32_base_atomics.txt b/man/static/cl_khr_global_int32_base_atomics.txt deleted file mode 100644 index 81a45a93c..000000000 --- a/man/static/cl_khr_global_int32_base_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_global_int32_base_atomics(3) - -== Name - -cl_khr_global_int32_base_atomics - Extension enabling base 32-bit atomic functions. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_extended_atomics, -reflink:cl_khr_local_int32_base_atomics, -reflink:cl_khr_local_int32_extended_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_global_int32_extended_atomics.txt b/man/static/cl_khr_global_int32_extended_atomics.txt deleted file mode 100644 index e4ce63f1a..000000000 --- a/man/static/cl_khr_global_int32_extended_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_global_int32_extended_atomics(3) - -== Name - -cl_khr_global_int32_extended_atomics - Extension enabling extended 32-bit atomic functions - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_base_atomics, -reflink:cl_khr_local_int32_base_atomics, -reflink:cl_khr_local_int32_extended_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_icd.txt b/man/static/cl_khr_icd.txt deleted file mode 100644 index df5349f2d..000000000 --- a/man/static/cl_khr_icd.txt +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_icd(3) - -== Name - -cl_khr_icd - Extension through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_icd : enable ----- - -== Description - -This is a platform extension which defines a simple mechanism through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. -An application written against the ICD Loader will be able to access all `cl_platform_ids` exposed by all vendor implementations with the ICD Loader acting as a demultiplexor. -If this extension is supported by an implementation, the string `cl_khr_icd` will be present in the `CL_PLATFORM_EXTENSIONS` string described in the table of allowed values for _param_name_ for flink:clGetDeviceInfo. - -If the `cl_khr_icd` extension is supported, then the flink:clIcdGetPlatformIDsKHR function is enabled. - -The official source for the ICD loader is available at the Khronos website. -The complete `_cl_icd_dispatch` structure is defined in the header `icd_dispatch.h` which is available as a part of the source code. - -.Inferring Vendors from Function Call Arguments - -At every OpenCL function call, the ICD Loader infers the vendor ICD function to call from the arguments to the function. -An object is said to be ICD compatible if it is of the following structure: - -[source,c] ----- -struct _cl_ -{ - struct _cl_icd_dispatch *dispatch; - // ... remainder of internal data -}; ----- - -`` is one of `platform_id`, `device_id`, `context`, `command_queue`, `mem`, `program`, `kernel`, `event`, or `sampler`. - -The structure `_cl_icd_dispatch` is a function pointer dispatch table which is used to direct calls to a particular vendor implementation. -All objects created from ICD compatible objects must be ICD compatible. - -A link to source code which defines the entries in the function table structure `_cl_icd_dispatch` is available in the Sample Code section of the OpenCL specification. -The order of the functions in `_cl_icd_dispatch` is determined by the ICD Loader's source. -The ICD Loader's source's `_cl_icd_dispatch` table is to be appended to only. - -Functions which do not have an argument from which the vendor implementation may be inferred are ignored, with the exception of `clGetExtensionFunctionAddress`. -which is described below. - -.ICD Data - -A Vendor ICD is defined by two pieces of data: - - * The Vendor ICD library specifies a library which contains the OpenCL entry points for the vendor's OpenCL implementation. - The vendor ICD's library file name should include the vendor name, or a vendor-specific implementation identifier. - * The Vendor ICD extension suffix is a short string which specifies the default suffix for extensions implemented only by that vendor. - See Additions to Chapter 9 for details on the mechanism through which this is accomplished. - The vendor suffix string is optional. - -.ICD Loader Vendor Enumeration on Windows - -To enumerate Vendor ICDs on Windows, the ICD Loader scans the values in the registry key `HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors`. -For each value in this key which has `DWORD` data set to 0, the ICD Loader opens the dynamic link library specified by the name of the value using `LoadLibraryA`. - -For example, if the registry contains the following value - ----- -[HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors] -"c:\\vendor a\\vndra_ocl.dll"=dword:00000000 ----- - -then the ICD will open the library "`c:\vendor a\vndra_ocl.dll`". - -.ICD Loader Vendor Enumeration on Linux - -To enumerate vendor ICDs on Linux, the ICD Loader scans the files in the path `/etc/OpenCL/vendors`. -For each file in this path, the ICD Loader opens the file as a text file. -The expected format for the file is a single line of text which specifies the Vendor ICD's library. -The ICD Loader will attempt to open that file as a shared object using `dlopen()`. -Note that the library specified may be an absolute path or just a file name. - -For example, if the following file exists `/etc/OpenCL/vendors/VendorA.icd` and contains the text `libVendorAOpenCL.so` then the ICD Loader will load the library "`libVendorAOpenCL.so`". - -.ICD Loader Vendor Enumeration on Android - -To enumerate vendor ICDs on Android, the ICD Loader scans the files in the path `/system/vendor/Khronos/OpenCL/vendors`. -For each file in this path, the ICD Loader opens the file as a text file. -The expected format for the file is a single line of text which specifies the Vendor ICD's library. -The ICD Loader will attempt to open that file as a shared object using `dlopen()`. -Note that the library specified may be an absolute path or just a file name. - -For example, if the following file exists `/system/vendor/Khronos/OpenCL/vendors/VendorA.icd` and contains the text `libVendorAOpenCL.so` then the ICD Loader will load the library "`libVendorAOpenCL.so`". - -.Adding a Vendor Library - -Upon successfully loading a Vendor ICD's library, the ICD Loader queries the following functions from the library: flink:clIcdGetPlatformIDsKHR, -flink:clGetPlatformInfo, and `clGetExtensionFunctionAddress`. -If any of these functions are not present then the ICD Loader will close and ignore the library. - -Next the ICD Loader queries available ICD-enabled platforms in the library using flink:clIcdGetPlatformIDsKHR. -For each of these platforms, the ICD Loader queries the platform's extension string to verify that `cl_khr_icd` is supported, then queries the platform's Vendor ICD extension suffix using flink:clGetPlatformInfo with the value `CL_PLATFORM_ICD_SUFFIX_KHR`. - -If any of these steps fail, the ICD Loader will ignore the Vendor ICD and continue on to the next. - -== See Also - -reflink:EXTENSION, -flink:clIcdGetPlatformIDsKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_icd - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_il_program.txt b/man/static/cl_khr_il_program.txt deleted file mode 100644 index 96c03dfe1..000000000 --- a/man/static/cl_khr_il_program.txt +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_il_program(3) - -== Name - -cl_khr_il_program - Enable loading SPIR IL programs - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_il_program : enable ----- - -== Description - -The OpenCL KHR extension reflink:cl_khr_il_program has been deprecated. -This feature is now core. - -== See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_il_program - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_image2d_from_buffer.txt b/man/static/cl_khr_image2d_from_buffer.txt deleted file mode 100644 index 2c15ac8f5..000000000 --- a/man/static/cl_khr_image2d_from_buffer.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_image2d_from_buffer(3) - -== Name - -cl_khr_image2d_from_buffer - Extension enabling creating 2D image from buffer data - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_image2d_from_buffer : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 2.0. - -// == See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_image2d_from_buffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_initialize_memory.txt b/man/static/cl_khr_initialize_memory.txt deleted file mode 100644 index 3b9a900cf..000000000 --- a/man/static/cl_khr_initialize_memory.txt +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_initialize_memory(3) - -== Name - -cl_khr_initialize_memory - Extension adding support for initializing local and private memory before a kernel begins execution. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_initialize_memory : enable ----- - -== Description - -Memory is allocated in various forms in OpenCL both explicitly (global memory) or implicitly (local, private memory). -This allocation so far does not provide a straightforward mechanism to initialize the memory on allocation. -In other words what is lacking is the equivalent of calloc for the currently supported malloc like capability. -This functionality is useful for a variety of reasons including ease of debugging, application controlled limiting of visibility to previous contents of memory and in some cases, optimization. - -This extension adds support for initializing local and private memory before a kernel begins execution. -This extension name is `cl_khr_initialize_memory`. - -Add a new context property to table 4.5 in section 4.4 (see flink:clCreateContext): - -[cols="1a,1a,1a", options="header"] -|==== -| cl_context_properties enum | Property value | Description -| `CL_CONTEXT_MEMORY_INITIALIZE_KHR` | `cl_context_memory_initialize_khr` - | Describes which memory types for the context must be initialized. - This is a bit-field, where the following values are currently supported: -+ - * `CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR` - Initialize local memory to zeros. - * `CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR` - Initialize private memory to zeros. -|==== - -Updates to section 6.9 - Restrictions: - -If the context is created with `CL_CONTEXT_MEMORY_INITIALIZE_KHR`, appropriate memory locations as specified by the bit-field is initialized with zeroes, prior to the start of execution of any kernel. -The driver chooses when, prior to kernel execution, the initialization of local and/or private memory is performed. -The only requirement is there should be no values set from outside the context, which can be read during a kernel execution. - -== See Also - -No cross-references are available - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_initialize_memory - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_int64_base_atomics.txt b/man/static/cl_khr_int64_base_atomics.txt deleted file mode 100644 index 8c448ce9b..000000000 --- a/man/static/cl_khr_int64_base_atomics.txt +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_int64_base_atomics(3) - -== Name - -cl_khr_int64_base_atomics - Optional extensions that implement base atomic operations on 64-bit signed and unsigned integers to locations in __global and __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable ----- - -== Description - -The behavior of these extensions is defined in the SPIR-V environment and appropriate kernel language specifications. - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_int64_extended_atomics, -link:atomicFunctions.html[Atomic Functions] - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int64_base_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_int64_extended_atomics.txt b/man/static/cl_khr_int64_extended_atomics.txt deleted file mode 100644 index e3dccdde7..000000000 --- a/man/static/cl_khr_int64_extended_atomics.txt +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_int64_extended_atomics(3) - -== Name - -cl_khr_int64_extended_atomics - Optional extensions that implement extended atomic operations on 64-bit signed and unsigned integers to locations in __global and __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable ----- - -== Description - -The behavior of these extensions is defined in the SPIR-V environment and appropriate kernel language specifications. - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_int64_base_atomics, -link:atomicFunctions.html[Atomic Functions] - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int64_extended_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_local_int32_base_atomics.txt b/man/static/cl_khr_local_int32_base_atomics.txt deleted file mode 100644 index 7a18190bf..000000000 --- a/man/static/cl_khr_local_int32_base_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_local_int32_base_atomics(3) - -== Name - -cl_khr_local_int32_base_atomics - Extension enabling base atomic operations on 32-bit integers to locations in __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_base_atomics, -reflink:cl_khr_global_int32_extended_atomics, -reflink:cl_khr_local_int32_extended_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_local_int32_extended_atomics.txt b/man/static/cl_khr_local_int32_extended_atomics.txt deleted file mode 100644 index dec50fa86..000000000 --- a/man/static/cl_khr_local_int32_extended_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_local_int32_extended_atomics(3) - -== Name - -cl_khr_local_int32_extended_atomics - Extension enabling extended atomic operations on 32-bit integers to locations in __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_base_atomics, -reflink:cl_khr_global_int32_extended_atomics, -reflink:cl_khr_local_int32_base_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_mipmap_image.txt b/man/static/cl_khr_mipmap_image.txt deleted file mode 100644 index 67058baa4..000000000 --- a/man/static/cl_khr_mipmap_image.txt +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_mipmap_image(3) - -== Name - -cl_khr_mipmap_image - Extension adding support for mipmaps. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_mipmap_image : enable - -#pragma OPENCL EXTENSION cl_khr_mipmap_image_writes : enable ----- - -== Description - -This extension adds support for mipmaps. -This proposal is implemented as two optional extensions. -The `cl_khr_mipmap_image` extension implements support to create a mipmapped image, enqueue commands to read/write/copy/map a region of a mipmapped image and built-in functions that can be used to read a mip-mapped image in an OpenCL C program. -The `cl_khr_mipmap_image_writes` extension adds built-in functions that can be used to write a mip-mapped image in an OpenCL C program. -If the `cl_khr_mipmap_image_writes` extension is supported by the OpenCL device, the `cl_khr_mipmap_image` extension must also be supported. - -This extension enables the following functions: - - * reflink:get_image_num_mip_levels - -This extension enables changes to the following: - - * flink:clCreateImage - * flink:clEnqueueReadImage - * flink:clEnqueueWriteImage - * flink:clEnqueueMapImage - * flink:clEnqueueCopyImage - * flink:clEnqueueCopyImageToBuffer - * flink:clEnqueueCopyBufferToImage - * flink:clCreateSamplerWithProperties - * flink:imageReadFunctions - * flink:imageWriteFunctions - * reflink:cl_khr_gl_sharing - * flink:clCreateFromGLTexture - -== See Also - -reflink:EXTENSION, -reflink:get_image_num_mip_levels.txt - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_mipmap_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_priority_hints.txt b/man/static/cl_khr_priority_hints.txt deleted file mode 100644 index 776ad08f0..000000000 --- a/man/static/cl_khr_priority_hints.txt +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_priority_hints(3) - -== Name - -cl_khr_priority_hints - Extension adding priority hints for OpenCL - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_priority_hints : enable ----- - -== Description - -This extension adds priority hints for OpenCL, but does not specify the scheduling behavior or minimum guarantees. -It is expected that the the user guides associated with each implementation which supports this extension describe the scheduling behavior guaranteed. - -If this extension is supported by an implementation, the string cl_khr_priority_hints will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo). - -== See Also - -flink:clGetPlatformInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_priority_hints - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_spir.txt b/man/static/cl_khr_spir.txt deleted file mode 100644 index eb4d7f840..000000000 --- a/man/static/cl_khr_spir.txt +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_spir(3) - -== Name - -cl_khr_spir - Extension adding support to create an OpenCL program object from a Standard Portable Intermediate Representation (SPIR) instance. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_spir : enable ----- - -== Description - -This extension adds support to create an OpenCL program object from a Standard Portable Intermediate Representation (SPIR) instance. -SPIR is a vendor neutral non-source representation for OpenCL C programs that has since been superceded by the SPIR-V standard. - -flink:clCreateProgramWithBinary can be used to load a SPIR binary. -Once a program object has been created from a SPIR binary, -flink:clBuildProgram can be called to build a program executable or -flink:clCompileProgram can be called to compile the SPIR binary. - -This extension adds changes to the following: - - * flink:clGetDeviceInfo - * flink:clGetProgramBuildInfo - * flink:clGetKernelArgInfo - -== See Also - -reflink:EXTENSION, -flink:clCreateProgramWithBinary, -flink:clBuildProgram, -flink:clCompileProgram, -flink:clGetDeviceInfo, -flink:clGetProgramBuildInfo, -flink:clGetKernelArgInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_spir - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_srgb_image_writes.txt b/man/static/cl_khr_srgb_image_writes.txt deleted file mode 100644 index e567116dc..000000000 --- a/man/static/cl_khr_srgb_image_writes.txt +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_srgb_image_writes(3) - -== Name - -cl_khr_srgb_image_writes - Extension allowing writes to sRGB images from a kernel. - -== Description - -This extensions adds changes to the following: - - * flink:imageWriteFunctions - * flink:clGetSupportedImageFormats - -== See Also - -reflink:EXTENSION, -flink:clCreateProgramWithBinary, -flink:clBuildProgram, -flink:clCompileProgram, - -== Description - -This extension enables kernels to write to sRGB images using the -reflink:imageWriteFunctions[write_imagef] built-in function. -The sRGB image formats that may be written to will be returned by -flink:clGetSupportedImageFormats. - -When the image is an sRGB image, the reflink:imageWriteFunctions[write_imagef] -built-in function will perform the linear to sRGB conversion. Only the R, G, -and B components are converted from linear to sRGB; the A component is -written as-is. - -// == See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_srgb_image_writes - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_subgroups.txt b/man/static/cl_khr_subgroups.txt deleted file mode 100644 index 64a9075f6..000000000 --- a/man/static/cl_khr_subgroups.txt +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_subgroups(3) - -== Name - -cl_khr_subgroups - Deprecated extension - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_subgroups : enable ----- - -== Description - -The OpenCL 2.0 KHR extension `cl_khr_subgroups` has been deprecated. -The feature is now core. - -== See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_subgroups - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_terminate_context.txt b/man/static/cl_khr_terminate_context.txt deleted file mode 100644 index d20eec864..000000000 --- a/man/static/cl_khr_terminate_context.txt +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_terminate_context(3) - -== Name - -cl_khr_terminate_context - Extension allowing an API to release a context. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_terminate_context : enable ----- - -== Description - -Today, OpenCL provides an API to release a context. -This operation is done only after all queues, memory object, programs and kernels are released, which in turn might wait for all ongoing operations to complete. -However, there are cases in which a fast release is required, or release operation cannot be done, as commands are stuck in mid execution. -An example of the first case can be program termination due to exception, or quick shutdown due to low power. -Examples of the second case are when a kernel is running too long, or gets stuck, or it may result from user action which makes the results of the computation unnecessary. - -In many cases, the driver or the device is capable of speeding up the closure of ongoing operations when the results are no longer required in a much more expedient manner than waiting for all previously enqueued operations to finish. - -This extension implements a new query to check whether a device can terminate an OpenCL context and adds an API to terminate a context. - -This extensions enables the following function: - - * flink:clTerminateContextKHR - -This extensions adds changes to the following: - - * flink:clGetDeviceInfo - * flink:clCreateContext - -== See Also - -flink:clTerminateContextKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_terminate_context - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_throttle_hints.txt b/man/static/cl_khr_throttle_hints.txt deleted file mode 100644 index 8463c503a..000000000 --- a/man/static/cl_khr_throttle_hints.txt +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_throttle_hints(3) - -== Name - -cl_khr_throttle_hints - Extension adding throttle hints for OpenCL - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_throttle_hints : enable ----- - -== Description - -This extension adds throttle hints for OpenCL, but does not specify the throttling behaviour or minimum guarantees. -It is expected that the user guide associated with each implementation which supports this extension describe the throttling behaviour guaranteed. - -If this extension is supported by an implementation, the string cl_khr_throttle_hints will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo). - -Note that the throttle hint is orthogonal to functionality defined in reflink:cl_khr_priority_hints) extension. -For example, a task may have high priority (`CL_QUEUE_PRIORITY_HIGH_KHR`) but should at the same time be executed at an optimized throttle setting (`CL_QUEUE_THROTTLE_LOW`). - -== See Also - -flink:clGetPlatformInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_throttle_hints - -== Copyright - -include::footer.txt[] diff --git a/man/static/gl_formatsInc.txt b/man/static/gl_formatsInc.txt deleted file mode 100644 index 8264d6022..000000000 --- a/man/static/gl_formatsInc.txt +++ /dev/null @@ -1,74 +0,0 @@ -.OpenGL and Corresponding OpenCL Image Formats - -The table below (Table 9.4) describes the list of GL texture internal -formats and the corresponding CL image formats. -If a GL texture object with an internal format from the table below is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding CL image format(s) in that table. -Texture objects created with other OpenGL internal formats may (but are not -guaranteed to) have a mapping to a CL image format; if such mappings exist, -they are guaranteed to preserve all color components, data types, and at -least the number of bits/component actually allocated by OpenGL for that -format. - -[cols="1a,1a", options="header"] -|==== -|GL internal format | CL image format (channel order, channel data type) -| `GL_RGBA8` | `CL_RGBA, CL_UNORM_INT8 or CL_BGRA, CL_UNORM_INT8` -| `GL_SRGBA8_ALPHA8` | `CL_sRGBA, CL_UNORM_INT8` -| `GL_RGBA`, - `GL_UNSIGNED_INT_8_8_8_8_REV` | `CL_RGBA, CL_UNORM_INT8` -| `GL_BGRA`, - `GL_UNSIGNED_INT_8_8_8_8_REV` | `CL_BGRA, CL_UNORM_INT8` -| `GL_RGBA8I, GL_RGBA8I_EXT` | `CL_RGBA, CL_SIGNED_INT8` -| `GL_RGBA16I, GL_RGBA16I_EXT` | `CL_RGBA, CL_SIGNED_INT16` -| `GL_RGBA32I, GL_RGBA32I_EXT` | `CL_RGBA, CL_SIGNED_INT32` -| `GL_RGBA8UI, GL_RGBA8UI_EXT` | `CL_RGBA, CL_UNSIGNED_INT8` -| `GL_RGBA16UI, GL_RGBA16UI_EXT` | `CL_RGBA, CL_UNSIGNED_INT16` -| `GL_RGBA32UI, GL_RGBA32UI_EXT` | `CL_RGBA, CL_UNSIGNED_INT32` -| `GL_RGBA8_SNORM` | `CL_RGBA, CL_SNORM_INT8` -| `GL_RGBA16` | `CL_RGBA, CL_UNORM_INT16` -| `GL_RGBA16_SNORM` | `CL_RGBA, CL_SNORM_INT166` -| `GL_RGBA16F, GL_RGBA16F_ARB` | `CL_RGBA, CL_HALF_FLOAT` -| `GL_RGBA32F, GL_RGBA32F_ARB` | `CL_RGBA, CL_FLOAT` -| `GL_R8` | `CL_R, CL_UNORM_INT8` -| `GL_R8_SNORM` | `CL_R, CL_SNORM_INT8` -| `GL_R16` | `CL_R, CL_UNORM_INT16` -| `GL_R16_SNORM` | `CL_R, CL_SNORM_INT16` -| `GL_R16F` | `CL_R, CL_HALF_FLOAT` -| `GL_R32F` | `CL_R, CL_FLOAT` -| `GL_R8I` | `CL_R, CL_SIGNED_INT8` -| `GL_R16I` | `CL_R, CL_SIGNED_INT16` -| `GL_R32I` | `CL_R, CL_SIGNED_INT32` -| `GL_R8UI` | `CL_R, CL_UNSIGNED_INT8` -| `GL_R16UI` | `CL_R, CL_UNSIGNED_INT16` -| `GL_R32UI` | `CL_R, CL_UNSIGNED_INT32` -| `GL_RG8` | `CL_RG, CL_UNORM_INT8` -| `GL_RG8_SNORM` | `CL_RG, CL_SNORM_INT8` -| `GL_RG16` | `CL_RG, CL_UNORM_INT16` -| `GL_RG16_SNORM` | `CL_RG, CL_SNORM_INT16` -| `GL_RG16F` | `CL_RG, CL_HALF_FLOAT` -| `GL_RG32F` | `CL_RG, CL_FLOAT` -| `GL_RG8I` | `CL_RG, CL_SIGNED_INT8` -| `GL_RG16I` | `CL_RG, CL_SIGNED_INT16` -| `GL_RG32I` | `CL_RG, CL_SIGNED_INT32` -| `GL_RG8UI` | `CL_RG, CL_UNSIGNED_INT8` -| `GL_RG16UI` | `CL_RG, CL_UNSIGNED_INT16` -| `GL_RG32UI` | `CL_RG, CL_UNSIGNED_INT32` -|==== - -If the reflink:cl_khr_gl_depth_images extension is enabled, the following -new image formats are added to table 9.4 in section 9.6.3.1 of the OpenCL -2.0 extension specification. If a GL texture object with an internal format -from table 9.4 is successfully created by OpenGL, then there is guaranteed -to be a mapping to one of the corresponding CL image format(s) in that -table. - -[cols="1a,1a", options="header"] -|==== -| GL internal format | CL image format (channel order, channel data type) -| `GL_DEPTH_COMPONENT32F` | `CL_DEPTH, CL_FLOAT` -| `GL_DEPTH_COMPONENT16` | `CL_DEPTH, CL_UNORM_INT16` -| `GL_DEPTH24_STENCIL8` | `CL_DEPTH_STENCIL, CL_UNORM_INT24` -| `GL_DEPTH32F_STENCIL8` | `CL_DEPTH_STENCIL, CL_FLOAT` -|==== diff --git a/man/static/gl_lifetimeInc.txt b/man/static/gl_lifetimeInc.txt deleted file mode 100644 index 7704c2c85..000000000 --- a/man/static/gl_lifetimeInc.txt +++ /dev/null @@ -1,18 +0,0 @@ -.Lifetime of [GL] Shared Objects - -An OpenCL memory object created from an OpenGL object (hereinafter refered -to as a "shared CL/GL object") remains valid as long as the corresponding GL -object has not been deleted. If the GL object is deleted through the GL API -(e.g. `glDeleteBuffers`, `glDeleteTextures`, or `glDeleteRenderbuffers`), -subsequent use of the CL buffer or image object will result in undefined -behavior, including but not limited to possible CL errors and data -corruption, but may not result in program termination. - -The CL context and corresponding command-queues are dependent on the -existence of the GL share group object, or the share group associated with -the GL context from which the CL context is created. If the GL share group -object or all GL contexts in the share group are destroyed, any use of the -CL context or command-queue(s) will result in undefined behavior, which may -include program termination. Applications should destroy the CL -command-queue(s) and CL context before destroying the corresponding GL share -group or contexts. diff --git a/man/static/gl_sharingInc.txt b/man/static/gl_sharingInc.txt deleted file mode 100644 index d389e6c42..000000000 --- a/man/static/gl_sharingInc.txt +++ /dev/null @@ -1,27 +0,0 @@ -General information about GL sharing follows. - -The OpenCL specification in section 9.7 defines how to share data with -texture and buffer objects in a parallel OpenGL implementation, but does not -define how the association between an OpenCL context and an OpenGL context -or share group is established. This extension defines optional attributes to -OpenCL context creation routines which associate a GL context or share group -object with a newly created OpenCL context. If this extension is supported -by an implementation, the string "cl_khr_gl_sharing" will be present in the -`CL_DEVICE_EXTENSIONS` string described in the table of allowed values for -_param_name_ for flink:clGetDeviceInfo or in the `CL_PLATFORM_EXTENSIONS` -string described in the table of allowed values for _param_name_ for -flink:clGetPlatformInfo. - -This section discusses OpenCL functions that allow applications to use -OpenGL buffer, texture, and renderbuffer objects as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and OpenGL. The OpenCL -API may be used to execute kernels that read and/or write memory objects -that are also OpenGL objects. - -An OpenCL image object may be created from an OpenGL texture or renderbuffer -object. An OpenCL buffer object may be created from an OpenGL buffer object. - -Any supported OpenGL object defined within the GL share group object, or the -share group associated with the GL context from which the CL context is -created, may be shared, with the exception of the default OpenGL objects -(i.e. objects named zero), which may not be shared. diff --git a/man/static/gl_syncInc.txt b/man/static/gl_syncInc.txt deleted file mode 100644 index 735f0d9a9..000000000 --- a/man/static/gl_syncInc.txt +++ /dev/null @@ -1,66 +0,0 @@ -.Synchronizing OpenCL and OpenGL Access - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/GL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling flink:clEnqueueAcquireGLObjects, the application must -ensure that any pending GL operations which access the objects specified in -_mem_objects_ have completed. This may be accomplished portably by issuing -and waiting for completion of a `glFinish` command on all GL contexts with -pending references to these objects. Implementations may offer more -efficient synchronization methods; for example on some platforms calling -`glFlush` may be sufficient, or synchronization may be implicit within a -thread, or there may be vendor-specific extensions that enable placing a -fence in the GL command stream and waiting for completion of that fence in -the CL command-queue. Note that no synchronization methods other than -`glFinish` are portable between OpenGL implementations at this time. - -When the extension reflink:cl_khr_egl_event is supported: Prior to calling -flink:clEnqueueAcquireGLObjects, the application must ensure that any -pending EGL or EGL client API operations which access the objects specified -in _mem_objects_ have completed. If the reflink:cl_khr_egl_event extension -is supported and the EGL context in question supports fence sync objects, -_explicit synchronisation_ can be achieved as set out in section 5.7.1. If -the reflink:cl_khr_egl_event extension is not supported, completion of EGL -client API commands may be determined by issuing and waiting for completion -of commands such as `glFinish` or `vgFinish` on all client API contexts with -pending references to these objects. Some implementations may offer other -efficient synchronization methods. If such methods exist they will be -described in platform-specific documentation. Note that no synchronization -methods other than `glFinish` and `vgFinish` are portable between all EGL -client API implementations and all OpenCL implementations. While this is the -only way to ensure completion that is portable to all platforms, these are -expensive operation and their use should be avoided if the -reflink:cl_khr_egl_event extension is supported on a platform. - -Similarly, after calling flink:clEnqueueReleaseGLObjects, the application is -responsible for ensuring that any pending OpenCL operations which access the -objects specified in _mem_objects_ have completed prior to executing -subsequent GL commands which reference these objects. This may be -accomplished portably by calling flink:clWaitForEvents with the event object -returned by flink:clEnqueueReleaseGLObjects, or by calling flink:clFinish. -As above, some implementations may offer more efficient methods. - -The application is responsible for maintaining the proper order of -operations if the CL and GL contexts are in separate threads. - -If a GL context is bound to a thread other than the one in which -flink:clEnqueueReleaseGLObjects is called, changes to any of the objects in -_mem_objects_ may not be visible to that context without additional steps -being taken by the application. For an OpenGL 3.1 (or later) context, the -requirements are described in Appendix D ("Shared Objects and Multiple -Contexts") of the OpenGL 3.1 Specification. For prior versions of OpenGL, -the requirements are implementation-dependent. - -Attempting to access the data store of an OpenGL object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. Similarly, attempting to access a shared CL/GL object from OpenCL -before it has been acquired by the OpenCL command-queue, or after it has -been released, will result in undefined behavior. - -If the reflink:cl_khr_gl_event extension is supported, -then the OpenCL implementation will ensure that any such pending OpenGL -operations are complete for an OpenGL context bound to the same thread as -the OpenCL context. This is referred to as implicit synchronization. diff --git a/man/static/sharingD3D10Inc.txt b/man/static/sharingD3D10Inc.txt deleted file mode 100644 index f14213b26..000000000 --- a/man/static/sharingD3D10Inc.txt +++ /dev/null @@ -1,45 +0,0 @@ -General information about sharing memory objects with Direct3D 10 follows. - -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 10. This is designed to function analogously to the -reflink:cl_khr_gl_sharing as defined in sections 9.7 and 9.8. If this -extension is supported by an implementation, the string -"cl_khr_d3d10_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` (see -flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` (see -flink:clGetDeviceInfo). - -As currently proposed the interfaces for this extension would be provided in -`cl_d3d10.h`. - -The OpenCL functions enabled by the reflink:cl_khr_d3d10_sharing extension -allow applications to use Direct3D 10 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 10. The -OpenCL API may be used to execute kernels that read and/or write memory -objects that are also Direct3D 10 resources. An OpenCL image object may be -created from a Direct3D 10 texture resource. An OpenCL buffer object may be -created from a Direct3D 10 buffer resource. OpenCL memory objects may be -created from Direct3D 10 objects if and only if the OpenCL context has been -created from a Direct3D 10 device. - -.Lifetime of Shared [D3D10] Objects - -An OpenCL memory object created from a Direct3D 10 resource remains valid as -long as the corresponding Direct3D 10 resource has not been deleted. If the -Direct3D 10 resource is deleted through the Direct3D 10 API, subsequent use -of the OpenCL memory object will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -The successful creation of a `cl_context` against a Direct3D 10 device -specified via the context create parameter `CL_CONTEXT_D3D10_DEVICE_KHR` -will increment the internal Direct3D reference count on the specified -Direct3D 10 device. The internal Direct3D reference count on that Direct3D -10 device will be decremented when the OpenCL reference count on the -returned OpenCL context drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 10 device from which the OpenCL context was -created. If the Direct3D 10 device is deleted through the Direct3D 10 API, -subsequent use of the OpenCL context will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. diff --git a/man/static/sharingD3D11Inc.txt b/man/static/sharingD3D11Inc.txt deleted file mode 100644 index 8cc2a9149..000000000 --- a/man/static/sharingD3D11Inc.txt +++ /dev/null @@ -1,49 +0,0 @@ -.Sharing Memory Objects with Direct3D 11 Resources - -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 11. This is designed to function analogously to the -reflink:cl_khr_gl_sharing as defined in sections 9.7 and 9.8. If this -extension is supported by an implementation, the string -"cl_khr_d3d11_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` or -`CL_DEVICE_EXTENSIONS` string described in the table of allowed values for -_param_name_ for flink:clGetDeviceInfo or flink:clGetPlatformInfo. - -As currently proposed the interfaces for this extension would be provided in -`cl_d3d11.h`. - -This section discusses OpenCL functions that allow applications to use -Direct3D 11 resources as OpenCL memory objects. This allows efficient -sharing of data between OpenCL and Direct3D 11. The OpenCL API may be used -to execute kernels that read and/or write memory objects that are also -Direct3D 11 resources. An OpenCL image object may be created from a Direct3D -11 texture resource. An OpenCL buffer object may be created from a Direct3D -11 buffer resource. OpenCL memory objects may be created from Direct3D 11 -objects if and only if the OpenCL context has been created from a Direct3D -11 device. - -.Lifetime of Shared Objects - -An OpenCL memory object created from a Direct3D 11 resource remains valid as -long as the corresponding Direct3D 11 resource has not been deleted. If the -Direct3D 11 resource is deleted through the Direct3D 11 API, subsequent use -of the OpenCL memory object will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -The successful creation of a `cl_context` against a Direct3D 11 device -specified via the context create parameter `CL_CONTEXT_D3D11_DEVICE_KHR` -will increment the internal Direct3D reference count on the specified -Direct3D 11 device. The internal Direct3D reference count on that Direct3D -11 device will be decremented when the OpenCL reference count on the -returned OpenCL context drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 11 device from which the OpenCL context was -created. If the Direct3D 11 device is deleted through the Direct3D 11 API, -subsequent use of the OpenCL context will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. - -Properties of Direct3D 11 objects may be queried using -flink:clGetMemObjectInfo and flink:clGetImageInfo with _param_name_ -`CL_MEM_D3D11_RESOURCE_KHR` and `CL_IMAGE_D3D11_SUBRESOURCE_KHR`. diff --git a/man/static/sharingDX9Inc.txt b/man/static/sharingDX9Inc.txt deleted file mode 100644 index e05578a79..000000000 --- a/man/static/sharingDX9Inc.txt +++ /dev/null @@ -1,77 +0,0 @@ -General information about sharing memory objects with DX9 follows. - -The goal of this extension is to allow applications to use media surfaces as -OpenCL memory objects. This allows efficient sharing of data between OpenCL -and selected adapter APIs (only DX9 for now). If this extension is -supported, an OpenCL image object can be created from a media surface and -the OpenCL API can be used to execute kernels that read and/or write memory -objects that are media surfaces. Note that OpenCL memory objects may be -created from the adapter media surface if and only if the OpenCL context has -been created from that adapter. - -If this extension is supported by an implementation, the string -"cl_khr_dx9_media_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` -(see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` (see -flink:clGetDeviceInfo). - -As currently proposed the interfaces for this extension would be provided in -`cl_dx9_media_sharing.h`. - -.Surface formats for Media Surface Sharing - -This section includes the D3D surface formats that are supported when the -adapter type is one of the Direct 3D lineage. Using a D3D surface format not -listed here is an error. To extend the use of this extension to support -media adapters beyond DirectX9 tables similar to the ones in this section -will need to be defined for the surface formats supported by the new media -adapter. All implementations that support this extension are required to -support the NV12 surface format, the other surface formats supported are the -same surface formats that the adapter you are sharing with supports as long -as they are listed in the tables below (Tables 9.10.3 and 9.10.4). - -[cols="1a,1a", options="header"] -|==== -| FOUR CC code | CL image format (channel order, channel data type) -| FOURCC('N', 'V', '1', '2'), Plane 0 | `CL_R, CL_UNORM_INT8` -| FOURCC('N', 'V', '1', '2'), Plane 1 | `CL_RG, CL_UNORM_INT8` -| FOURCC('Y', 'V', '1', '2'), Plane 0 | `CL_R, CL_UNORM_INT8` -| FOURCC('Y', 'V', '1', '2'), Plane 1 | `CL_R, CL_UNORM_INT8` -| FOURCC('Y', 'V', '1', '2'), Plane 2 | `CL_R, CL_UNORM_INT8` -|==== - -In the table above, NV12 Plane 0 corresponds to the luminance (Y) channel -and Plane 1 corresponds to the UV channels. The YV12 Plane 0 corresponds to -the Y channel, Plane 1 to the U channel and Plane 2 to the V channel. Note -that the YUV formats map to `CL_R` and `CL_RG` but do not perform any YUV to -RGB conversion and vice-versa. - -[cols="1a,1a", options="header"] -|==== -| D3D format | CL image format (channel order, channel data type) -| D3DFMT_R32F | `CL_R, CL_FLOAT` -| D3DFMT_R16F | `CL_R, CL_HALF_FLOAT` -| D3DFMT_L16 | `CL_R, CL_UNORM_INT16` -| D3DFMT_A8 | `CL_A, CL_UNORM_INT8` -| D3DFMT_L8 | `CL_R, CL_UNORM_INT8` -| D3DFMT_G32R32F | `CL_RG, CL_FLOAT` -| D3DFMT_G16R16F | `CL_RG, CL_HALF_FLOAT` -| D3DFMT_G16R16 | `CL_RG, CL_UNORM_INT16` -| D3DFMT_A8L8 | `CL_RG, CL_UNORM_INT8` -| D3DFMT_A32B32G32R32F | `CL_RGBA, CL_FLOAT` -| D3DFMT_A16B16G16R16F | `CL_RGBA, CL_HALF_FLOAT` -| D3DFMT_A16B16G16R16 | `CL_RGBA, CL_UNORM_INT16` -| D3DFMT_A8B8G8R8 | `CL_RGBA, CL_UNORM_INT8` -| D3DFMT_X8B8G8R8 | `CL_RGBA, CL_UNORM_INT8` -| D3DFMT_A8R8G8B8 | `CL_BGRA, CL_UNORM_INT8` -| D3DFMT_X8R8G8B8 | `CL_BGRA, CL_UNORM_INT8` -|==== - -Note that D3D9 format names seem to imply that the order of the color -channels are switched relative to OpenCL but this is not the case. For -example, layout of channels for each pixel for `D3DFMT_A32FB32FG32FR32F` is -the same as `CL_RGBA`, `CL_FLOAT`. - -Properties of media surface objects may be queried using -flink:clGetMemObjectInfo and flink:clGetImageInfo with _param_name_ -`CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR`, `CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR` and -`CL_IMAGE_DX9_MEDIA_SURFACE_PLANE_KHR`. diff --git a/scripts/apiconventions.py b/scripts/apiconventions.py new file mode 100644 index 000000000..4d27d04f9 --- /dev/null +++ b/scripts/apiconventions.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 -i +# +# Copyright 2021-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# Generic alias for working group-specific API conventions interface. + +# This import should be changed at the repository / working group level to +# specify the correct API's conventions. + +defaultAPI = 'opencl' + +from clconventions import OpenCLConventions as APIConventions diff --git a/scripts/cgenerator.py b/scripts/cgenerator.py index d4cab2b0b..f86658ee0 100644 --- a/scripts/cgenerator.py +++ b/scripts/cgenerator.py @@ -6,9 +6,11 @@ import os import re -from generator import (GeneratorOptions, OutputGenerator, noneStr, - regSortFeatures, write) +from generator import (GeneratorOptions, + MissingGeneratorOptionsConventionsError, + MissingGeneratorOptionsError, MissingRegistryError, + OutputGenerator, noneStr, regSortFeatures, write) class CGeneratorOptions(GeneratorOptions): """CGeneratorOptions - subclass of GeneratorOptions. @@ -17,12 +19,14 @@ class CGeneratorOptions(GeneratorOptions): generation.""" def __init__(self, - prefixText="", + prefixText='', genFuncPointers=True, protectFile=True, protectFeature=True, protectProto=None, protectProtoStr=None, + protectExtensionProto=None, + protectExtensionProtoStr=None, apicall='', apientry='', apientryp='', @@ -31,6 +35,7 @@ def __init__(self, alignFuncParam=0, genEnumBeginEndRange=False, genAliasMacro=False, + genStructExtendsComment=False, aliasMacro='', misracstyle=False, misracppstyle=False, @@ -40,11 +45,11 @@ def __init__(self, Additional parameters beyond parent class: - prefixText - list of strings to prefix generated header with - (usually a copyright statement + calling convention macros). + (usually a copyright statement + calling convention macros) - protectFile - True if multiple inclusion protection should be - generated (based on the filename) around the entire header. + generated (based on the filename) around the entire header - protectFeature - True if #ifndef..#endif protection should be - generated around a feature interface in the header file. + generated around a feature interface in the header file - genFuncPointers - True if function pointer typedefs should be generated - protectProto - If conditional protection should be generated @@ -54,12 +59,19 @@ def __init__(self, set to None. - protectProtoStr - #ifdef/#ifndef symbol to use around prototype declarations, if protectProto is set + - protectExtensionProto - If conditional protection should be generated + around extension prototype declarations, set to either '#ifdef' + to require opt-in (#ifdef protectExtensionProtoStr) or '#ifndef' + to require opt-out (#ifndef protectExtensionProtoStr). Otherwise + set to None + - protectExtensionProtoStr - #ifdef/#ifndef symbol to use around + extension prototype declarations, if protectExtensionProto is set - apicall - string to use for the function declaration prefix, - such as APICALL on Windows. + such as APICALL on Windows - apientry - string to use for the calling convention macro, - in typedefs, such as APIENTRY. + in typedefs, such as APIENTRY - apientryp - string to use for the calling convention macro - in function pointer typedefs, such as APIENTRYP. + in function pointer typedefs, such as APIENTRYP - indentFuncProto - True if prototype declarations should put each parameter on a separate line - indentFuncPointer - True if typedefed function pointers should put each @@ -70,6 +82,9 @@ def __init__(self, be generated for enumerated types - genAliasMacro - True if the OpenXR alias macro should be generated for aliased types (unclear what other circumstances this is useful) + - genStructExtendsComment - True if comments showing the structures + whose pNext chain a structure extends are included before its + definition - aliasMacro - alias macro to inject when genAliasMacro is True - misracstyle - generate MISRA C-friendly headers - misracppstyle - generate MISRA C++-friendly headers""" @@ -94,6 +109,12 @@ def __init__(self, self.protectProtoStr = protectProtoStr """#ifdef/#ifndef symbol to use around prototype declarations, if protectProto is set""" + self.protectExtensionProto = protectExtensionProto + """If conditional protection should be generated around extension prototype declarations, set to either '#ifdef' to require opt-in (#ifdef protectExtensionProtoStr) or '#ifndef' to require opt-out (#ifndef protectExtensionProtoStr). Otherwise set to None.""" + + self.protectExtensionProtoStr = protectExtensionProtoStr + """#ifdef/#ifndef symbol to use around extension prototype declarations, if protectExtensionProto is set""" + self.apicall = apicall """string to use for the function declaration prefix, such as APICALL on Windows.""" @@ -118,6 +139,9 @@ def __init__(self, self.genAliasMacro = genAliasMacro """True if the OpenXR alias macro should be generated for aliased types (unclear what other circumstances this is useful)""" + self.genStructExtendsComment = genStructExtendsComment + """True if comments showing the structures whose pNext chain a structure extends are included before its definition""" + self.aliasMacro = aliasMacro """alias macro to inject when genAliasMacro is True""" @@ -148,10 +172,12 @@ def __init__(self, *args, **kwargs): def beginFile(self, genOpts): OutputGenerator.beginFile(self, genOpts) + if self.genOpts is None: + raise MissingGeneratorOptionsError() # C-specific # # Multiple inclusion protection & C++ wrappers. - if genOpts.protectFile and self.genOpts.filename: + if self.genOpts.protectFile and self.genOpts.filename: headerSym = re.sub(r'\.h', '_h_', os.path.basename(self.genOpts.filename)).upper() write('#ifndef', headerSym, file=self.outFile) @@ -173,6 +199,8 @@ def beginFile(self, genOpts): def endFile(self): # C-specific # Finish C++ wrapper and multiple inclusion protection + if self.genOpts is None: + raise MissingGeneratorOptionsError() self.newline() write('#ifdef __cplusplus', file=self.outFile) write('}', file=self.outFile) @@ -188,53 +216,97 @@ def beginFeature(self, interface, emit): OutputGenerator.beginFeature(self, interface, emit) # C-specific # Accumulate includes, defines, types, enums, function pointer typedefs, - # end function prototypes separately for this feature. They're only + # end function prototypes separately for this feature. They are only # printed in endFeature(). self.sections = {section: [] for section in self.ALL_SECTIONS} self.feature_not_empty = False + def _endProtectComment(self, protect_str, protect_directive='#ifdef'): + if protect_directive is None or protect_str is None: + raise RuntimeError('Should not call in here without something to protect') + + # Do not put comments after #endif closing blocks if this is not set + if not self.genOpts.conventions.protectProtoComment: + return '' + elif 'ifdef' in protect_directive: + return f' /* {protect_str} */' + else: + return f' /* !{protect_str} */' + def endFeature(self): "Actually write the interface to the output file." # C-specific if self.emit: if self.feature_not_empty: + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() + is_core = self.featureName and self.featureName.startswith(self.conventions.api_prefix + 'VERSION_') if self.genOpts.conventions.writeFeature(self.featureExtraProtect, self.genOpts.filename): self.newline() if self.genOpts.protectFeature: write('#ifndef', self.featureName, file=self.outFile) + # If type declarations are needed by other features based on # this one, it may be necessary to suppress the ExtraProtect, # or move it below the 'for section...' loop. if self.featureExtraProtect is not None: write('#ifdef', self.featureExtraProtect, file=self.outFile) self.newline() + + # Generate warning of possible use in IDEs + write(f'// {self.featureName} is a preprocessor guard. Do not pass it to API calls.', file=self.outFile) write('#define', self.featureName, '1', file=self.outFile) for section in self.TYPE_SECTIONS: contents = self.sections[section] if contents: write('\n'.join(contents), file=self.outFile) + if self.genOpts.genFuncPointers and self.sections['commandPointer']: write('\n'.join(self.sections['commandPointer']), file=self.outFile) self.newline() + if self.sections['command']: if self.genOpts.protectProto: write(self.genOpts.protectProto, self.genOpts.protectProtoStr, file=self.outFile) + if self.genOpts.protectExtensionProto and not is_core: + write(self.genOpts.protectExtensionProto, + self.genOpts.protectExtensionProtoStr, file=self.outFile) write('\n'.join(self.sections['command']), end='', file=self.outFile) + if self.genOpts.protectExtensionProto and not is_core: + write('#endif' + + self._endProtectComment(protect_directive=self.genOpts.protectExtensionProto, + protect_str=self.genOpts.protectExtensionProtoStr), + file=self.outFile) if self.genOpts.protectProto: - write('#endif', file=self.outFile) + write('#endif' + + self._endProtectComment(protect_directive=self.genOpts.protectProto, + protect_str=self.genOpts.protectProtoStr), + file=self.outFile) else: self.newline() + if self.featureExtraProtect is not None: - write('#endif /*', self.featureExtraProtect, '*/', file=self.outFile) + write('#endif' + + self._endProtectComment(protect_str=self.featureExtraProtect), + file=self.outFile) + if self.genOpts.protectFeature: - write('#endif /*', self.featureName, '*/', file=self.outFile) + write('#endif' + + self._endProtectComment(protect_str=self.featureName), + file=self.outFile) # Finish processing in superclass OutputGenerator.endFeature(self) def appendSection(self, section, text): "Append a definition to the specified section" - # self.sections[section].append('SECTION: ' + section + '\n') + + if section is None: + self.logMsg('error', 'Missing section in appendSection (probably a element missing its \'category\' attribute. Text:', text) + exit(1) + self.sections[section].append(text) self.feature_not_empty = True @@ -260,6 +332,8 @@ def genType(self, typeinfo, name, alias): # special-purpose generator. self.genStruct(typeinfo, name, alias) else: + if self.genOpts is None: + raise MissingGeneratorOptionsError() # OpenXR: this section was not under 'else:' previously, just fell through if alias: # If the type is an alias, just emit a typedef declaration @@ -267,13 +341,15 @@ def genType(self, typeinfo, name, alias): else: # Replace tags with an APIENTRY-style string # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. + # If the resulting text is an empty string, do not emit it. body = noneStr(typeElem.text) for elem in typeElem: if elem.tag == 'apientry': body += self.genOpts.apientry + noneStr(elem.tail) else: body += noneStr(elem.text) + noneStr(elem.tail) + if category == 'define' and self.misracppstyle(): + body = body.replace("(uint32_t)", "static_cast") if body: # Add extra newline after multi-line entries. if '\n' in body[0:-1]: @@ -284,7 +360,7 @@ def genProtectString(self, protect_str): """Generate protection string. Protection strings are the strings defining the OS/Platform/Graphics - requirements for a given OpenXR command. When generating the + requirements for a given API command. When generating the language header files, we need to make sure the items specific to a graphics API or OS platform are properly wrapped in #ifs.""" protect_if_str = '' @@ -293,7 +369,7 @@ def genProtectString(self, protect_str): return (protect_if_str, protect_end_str) if ',' in protect_str: - protect_list = protect_str.split(",") + protect_list = protect_str.split(',') protect_defs = ('defined(%s)' % d for d in protect_list) protect_def_str = ' && '.join(protect_defs) protect_if_str = '#if %s\n' % protect_def_str @@ -306,8 +382,10 @@ def genProtectString(self, protect_str): def typeMayAlias(self, typeName): if not self.may_alias: - # First time we've asked if a type may alias. - # So, let's populate the set of all names of types that may. + if self.registry is None: + raise MissingRegistryError() + # First time we have asked if a type may alias. + # So, populate the set of all names of types that may. # Everyone with an explicit mayalias="true" self.may_alias = set(typeName @@ -315,9 +393,9 @@ def typeMayAlias(self, typeName): if data.elem.get('mayalias') == 'true') # Every type mentioned in some other type's parentstruct attribute. - parent_structs = (otherType.elem.get('parentstruct') - for otherType in self.registry.typedict.values()) - self.may_alias.update(set(x for x in parent_structs + polymorphic_bases = (otherType.elem.get('parentstruct') + for otherType in self.registry.typedict.values()) + self.may_alias.update(set(x for x in polymorphic_bases if x is not None)) return typeName in self.may_alias @@ -335,6 +413,9 @@ def genStruct(self, typeinfo, typeName, alias): generate a typedef of that alias.""" OutputGenerator.genStruct(self, typeinfo, typeName, alias) + if self.genOpts is None: + raise MissingGeneratorOptionsError() + typeElem = typeinfo.elem if alias: @@ -344,6 +425,11 @@ def genStruct(self, typeinfo, typeName, alias): (protect_begin, protect_end) = self.genProtectString(typeElem.get('protect')) if protect_begin: body += protect_begin + + if self.genOpts.genStructExtendsComment: + structextends = typeElem.get('structextends') + body += '// ' + typeName + ' extends ' + structextends + '\n' if structextends else '' + body += 'typedef ' + typeElem.get('category') # This is an OpenXR-specific alternative where aliasing refers @@ -387,11 +473,16 @@ def genGroup(self, groupinfo, groupName, alias=None): body = 'typedef ' + alias + ' ' + groupName + ';\n' self.appendSection(section, body) else: + if self.genOpts is None: + raise MissingGeneratorOptionsError() (section, body) = self.buildEnumCDecl(self.genOpts.genEnumBeginEndRange, groupinfo, groupName) - self.appendSection(section, "\n" + body) + self.appendSection(section, '\n' + body) def genEnum(self, enuminfo, name, alias): - """Generate the C declaration for a constant (a single value).""" + """Generate the C declaration for a constant (a single value). + + tags may specify their values in several ways, but are usually + just integers.""" OutputGenerator.genEnum(self, enuminfo, name, alias) @@ -406,6 +497,8 @@ def genCmd(self, cmdinfo, name, alias): # prefix = '// ' + name + ' is an alias of command ' + alias + '\n' # else: # prefix = '' + if self.genOpts is None: + raise MissingGeneratorOptionsError() prefix = '' decls = self.makeCDecls(cmdinfo.elem) diff --git a/scripts/clconventions.py b/scripts/clconventions.py index dc4d95fe8..2e601519b 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -6,43 +6,44 @@ # Working-group-specific style conventions, # used in generation. +import os import re -from conventions import ConventionsBase +from spec_tools.conventions import ConventionsBase class OpenCLConventions(ConventionsBase): - def formatExtension(self, name): - """Mark up a name as an extension for the spec.""" - return '`<<{}>>`'.format(name) - @property def null(self): """Preferred spelling of NULL.""" return '`NULL`' - @property - def constFlagBits(self): - """Returns True if static const flag bits should be generated, False if an enumerated type should be generated.""" - return False + def formatVersion(self, name, apivariant, major, minor): + """Mark up an API version name as a link in the spec.""" + return f'`<>`' + + def formatExtension(self, name): + """Mark up a name as an extension for the spec.""" + return '`<<{}>>`'.format(name) @property def struct_macro(self): - return 'sname:' + return '' @property - def external_macro(self): - return 'code:' + def constFlagBits(self): + """Returns True if static const flag bits should be generated, False if an enumerated type should be generated.""" + return False @property def structtype_member_name(self): """Return name of the structure type member""" - return 'sType' + return 'type' @property def nextpointer_member_name(self): """Return name of the structure pointer chain member""" - return 'pNext' + return 'next' @property def valid_pointer_prefix(self): @@ -88,57 +89,55 @@ def api_name(self, spectype='api'): else: return None - @property - def xml_supported_name_of_api(self): - """Return the supported= attribute used in API XML""" - return 'opencl' - @property def api_prefix(self): """Return API token prefix""" return 'CL_' @property - def api_version_prefix(self): - """Return API core version token prefix""" - return 'CL_VERSION_' + def extension_name_prefix(self): + """Return extension name prefix""" + return 'cl_' - @property - def KHR_prefix(self): - """Return extension name prefix for KHR extensions""" - return 'cl_khr_' + def extension_short_description(self, elem): + """Return a short description of an extension for use in refpages.""" - @property - def EXT_prefix(self): - """Return extension name prefix for EXT extensions""" - return 'cl_ext_' + return 'OpenCL extension' @property def write_contacts(self): """Return whether contact list should be written to extension appendices""" - return True + return False @property - def write_refpage_include(self): - """Return whether refpage include should be written to extension appendices""" + def write_extension_type(self): + """Return whether extension type should be written to extension appendices""" return False - def writeFeature(self, featureExtraProtect, filename): - """Returns True if OutputGenerator.endFeature should write this feature. - Used in COutputGenerator - """ - return True + @property + def write_extension_number(self): + """Return whether extension number should be written to extension appendices""" + return False - def requires_error_validation(self, return_type): - """Returns True if the return_type element is an API result code - requiring error validation. - """ + @property + def write_extension_revision(self): + """Return whether extension revision number should be written to extension appendices""" return False @property - def required_errors(self): - """Return a list of required error codes for validation.""" - return [] + def write_refpage_include(self): + """Return whether refpage include should be written to extension appendices""" + return True + + @property + def KHR_prefix(self): + """Return extension name prefix for KHR extensions""" + return 'cl_khr_' + + @property + def EXT_prefix(self): + """Return extension name prefix for EXT extensions""" + return 'cl_ext_' def is_externsync_command(self, protoname): """Returns True if the protoname element is an API command requiring @@ -152,18 +151,6 @@ def is_api_name(self, name): """ return name[0:2].lower() == 'cl' - def is_voidpointer_alias(self, tag, text, tail): - """Return True if the declaration components (tag,text,tail) of an - element represents a void * type - """ - return tag == 'type' and text == 'void' and tail.startswith('*') - - def make_voidpointer_alias(self, tail): - """Reformat a void * declaration to include the API alias macro. - Vulkan doesn't have an API alias macro, so do nothing. - """ - return tail - def specURL(self, spectype = 'api'): """Return public registry URL which ref pages should link to for full Specification, so xrefs in the asciidoc source that aren't @@ -196,6 +183,25 @@ def specification_path(self): """Return relpath to the Asciidoctor specification sources in this project.""" return '../appendices/meta' + def extension_file_path(self, name): + """Return file path to an extension appendix relative to a directory + containing all such appendices. + - name - extension name + + Must implement.""" + + # Normally this would use self.file_suffix, but the OpenCL spec + # currently uses a variety of suffixing conventions. + return f'{name}.asciidoc' + + def extension_include_string(self, name): + """Return format string for include:: line for an extension appendix + file. + - name - extension name""" + + return 'include::{{apispec}}/{}[]'.format( + self.extension_file_path(name)) + @property def extra_refpage_headers(self): """Return any extra text to add to refpage headers.""" @@ -222,7 +228,7 @@ def unified_flag_refpages(self): @property def spec_reflow_path(self): """Return the relative path to the spec source folder to reflow""" - return '.' + return os.getcwd() @property def spec_no_reflow_dirs(self): @@ -240,3 +246,16 @@ def should_skip_checking_codes(self): Vulkan, so these checks are not appropriate.""" return True + + @property + def extra_refpage_body(self): + """Return any extra text (following the title) for generated + reference pages.""" + return 'include::{generated}/specattribs.adoc[]' + + @property + def docgen_language(self): + """Return the language to be used in docgenerator [source] + blocks.""" + + return 'opencl' diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index 4ebabd870..3692768cf 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -7,18 +7,42 @@ from pathlib import Path from generator import GeneratorOptions, OutputGenerator, noneStr, write +from parse_dependency import dependencyLanguageComment -ENUM_TABLE_PREFIX = """ +_ENUM_TABLE_PREFIX = """ [cols=",",options="header",] -|======================================================================= +|==== |Enum |Description""" -ENUM_TABLE_SUFFIX = """|=======================================================================""" +_TABLE_SUFFIX = """|====""" -FLAG_BLOCK_PREFIX = """.Flag Descriptions +_ENUM_BLOCK_PREFIX = """.Enumerant Descriptions ****""" -FLAG_BLOCK_SUFFIX = """****""" +_FLAG_BLOCK_PREFIX = """.Flag Descriptions +****""" + +_BLOCK_SUFFIX = """****""" + +def orgLevelKey(name): + # Sort key for organization levels of features / extensions + # From highest to lowest, core versions, KHR extensions, EXT extensions, + # and vendor extensions + + prefixes = ( + 'VK_VERSION_', + 'VKSC_VERSION_', + 'VK_KHR_', + 'VK_EXT_') + + i = 0 + for prefix in prefixes: + if name.startswith(prefix): + return i + i += 1 + + # Everything else (e.g. vendor extensions) is least important + return i class DocGeneratorOptions(GeneratorOptions): @@ -124,8 +148,6 @@ class DocOutputGenerator(OutputGenerator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # Keep track of all extension numbers - self.extension_numbers = set() def beginFile(self, genOpts): OutputGenerator.beginFile(self, genOpts) @@ -141,28 +163,16 @@ def beginFeature(self, interface, emit): # Start processing in superclass OutputGenerator.beginFeature(self, interface, emit) - # Decide if we're in a core or an + # Decide if we are in a core or an self.in_core = (interface.tag == 'feature') - # Verify that each has a unique number during doc - # generation - # TODO move this to consistency_tools - if not self.in_core: - extension_number = interface.get('number') - if extension_number is not None and extension_number != "0": - if extension_number in self.extension_numbers: - self.logMsg('error', 'Duplicate extension number ', extension_number, ' detected in feature ', interface.get('name'), '\n') - exit(1) - else: - self.extension_numbers.add(extension_number) - def endFeature(self): # Finish processing in superclass OutputGenerator.endFeature(self) def genRequirements(self, name, mustBeFound = True): """Generate text showing what core versions and extensions introduce - an API. This relies on the map in api.py, which may be loaded at + an API. This relies on the map in apimap.py, which may be loaded at runtime into self.apidict. If not present, no message is generated. @@ -173,13 +183,40 @@ def genRequirements(self, name, mustBeFound = True): if self.apidict: if name in self.apidict.requiredBy: - features = [] + # It is possible to get both 'A with B' and 'B with A' for + # the same API. + # To simplify this, sort the (base,dependency) requirements + # and put them in a set to ensure they are unique. + features = set() + # 'dependency' may be a boolean expression of extension names for (base,dependency) in self.apidict.requiredBy[name]: if dependency is not None: - features.append('{} with {}'.format(base, dependency)) + # 'dependency' may be a boolean expression of extension + # names, in which case the sorting will not work well. + + # First, convert it from asciidoctor markup to language. + depLanguage = dependencyLanguageComment(dependency) + + # If they are the same, the dependency is only a + # single extension, and sorting them works. + # Otherwise, skip it. + if depLanguage == dependency: + deps = sorted( + sorted((base, dependency)), + key=orgLevelKey) + depString = ' with '.join(deps) + else: + # An expression with multiple extensions + depString = f'{base} with {depLanguage}' + + features.add(depString) else: - features.append(base) - return '// Provided by {}\n'.format(', '.join(features)) + features.add(base) + # Sort the overall dependencies so core versions are first + provider = ', '.join(sorted( + sorted(features), + key=orgLevelKey)) + return f'// Provided by {provider}\n' else: if mustBeFound: self.logMsg('warn', 'genRequirements: API {} not found'.format(name)) @@ -199,26 +236,26 @@ def writeInclude(self, directory, basename, contents): self.makeDir(directory) # Create file - filename = directory + '/' + basename + '.txt' + filename = directory + '/' + basename + self.file_suffix self.logMsg('diag', '# Generating include file:', filename) fp = open(filename, 'w', encoding='utf-8') # Asciidoc anchor write(self.genOpts.conventions.warning_comment, file=fp) - write('[[{0},{0}]]'.format(basename), file=fp) + write('[[{0}]]'.format(basename), file=fp) if self.genOpts.conventions.generate_index_terms: - index_terms = [] if basename.startswith(self.conventions.command_prefix): - index_terms.append(basename[2:] + " (function)") + index_term = basename + " (function)" elif basename.startswith(self.conventions.type_prefix): - index_terms.append(basename[2:] + " (type)") + index_term = basename + " (type)" elif basename.startswith(self.conventions.api_prefix): - index_terms.append(basename[len(self.conventions.api_prefix):] + " (define)") - index_terms.append(basename) - write('indexterm:[{}]'.format(','.join(index_terms)), file=fp) + index_term = basename + " (define)" + else: + index_term = basename + write('indexterm:[{}]'.format(index_term), file=fp) - write('[source%unbreakable,opencl]', file=fp) + write(f'[source%unbreakable,{self.conventions.docgen_language}]', file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) @@ -226,55 +263,69 @@ def writeInclude(self, directory, basename, contents): if self.genOpts.secondaryInclude: # Create secondary no cross-reference include file - filename = directory + '/' + basename + '.no-xref.txt' + filename = f'{directory}/{basename}.no-xref{self.file_suffix}' self.logMsg('diag', '# Generating include file:', filename) fp = open(filename, 'w', encoding='utf-8') # Asciidoc anchor write(self.genOpts.conventions.warning_comment, file=fp) write('// Include this no-xref version without cross reference id for multiple includes of same file', file=fp) - write('[source,%unbreakable,opencl]', file=fp) + write(f'[source,%unbreakable,{self.conventions.docgen_language}]', file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) fp.close() - def writeTable(self, basename, values): + def writeEnumTable(self, basename, values): """Output a table of enumerants.""" directory = Path(self.genOpts.directory) / 'enums' self.makeDir(str(directory)) - filename = str(directory / '{}.comments.txt'.format(basename)) + filename = str(directory / f'{basename}.comments{self.file_suffix}') self.logMsg('diag', '# Generating include file:', filename) with open(filename, 'w', encoding='utf-8') as fp: write(self.conventions.warning_comment, file=fp) - write(ENUM_TABLE_PREFIX, file=fp) + write(_ENUM_TABLE_PREFIX, file=fp) for data in values: write("|ename:{}".format(data['name']), file=fp) write("|{}".format(data['comment']), file=fp) - write(ENUM_TABLE_SUFFIX, file=fp) + write(_TABLE_SUFFIX, file=fp) - def writeFlagBox(self, basename, values): - """Output a box of flag bit comments.""" - directory = Path(self.genOpts.directory) / 'enums' - self.makeDir(str(directory)) - - filename = str(directory / '{}.comments.txt'.format(basename)) + def writeBox(self, filename, prefix, items): + """Write a generalized block/box for some values.""" self.logMsg('diag', '# Generating include file:', filename) with open(filename, 'w', encoding='utf-8') as fp: write(self.conventions.warning_comment, file=fp) - write(FLAG_BLOCK_PREFIX, file=fp) + write(prefix, file=fp) - for data in values: - write("* ename:{} -- {}".format(data['name'], - data['comment']), - file=fp) + for item in items: + write("* {}".format(item), file=fp) + + write(_BLOCK_SUFFIX, file=fp) + + def writeEnumBox(self, basename, values): + """Output a box of enumerants.""" + directory = Path(self.genOpts.directory) / 'enums' + self.makeDir(str(directory)) - write(FLAG_BLOCK_SUFFIX, file=fp) + filename = str(directory / f'{basename}.comments-box{self.file_suffix}') + self.writeBox(filename, _ENUM_BLOCK_PREFIX, + ("ename:{} -- {}".format(data['name'], data['comment']) + for data in values)) + + def writeFlagBox(self, basename, values): + """Output a box of flag bit comments.""" + directory = Path(self.genOpts.directory) / 'enums' + self.makeDir(str(directory)) + + filename = str(directory / f'{basename}.comments{self.file_suffix}') + self.writeBox(filename, _FLAG_BLOCK_PREFIX, + ("ename:{} -- {}".format(data['name'], data['comment']) + for data in values)) def genType(self, typeinfo, name, alias): """Generate type.""" @@ -288,6 +339,10 @@ def genType(self, typeinfo, name, alias): # If the type is a struct type, generate it using the # special-purpose generator. self.genStruct(typeinfo, name, alias) + elif category not in OutputGenerator.categoryToPath: + # If there is no path, do not write output + self.logMsg('diag', 'NOT writing include for {} category {}'.format( + name, category)) else: body = self.genRequirements(name) if alias: @@ -298,7 +353,7 @@ def genType(self, typeinfo, name, alias): else: # Replace tags with an APIENTRY-style string # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. + # If the resulting text is an empty string, do not emit it. body += noneStr(typeElem.text) for elem in typeElem: if elem.tag == 'apientry': @@ -307,32 +362,42 @@ def genType(self, typeinfo, name, alias): body += noneStr(elem.text) + noneStr(elem.tail) if body: - if category in OutputGenerator.categoryToPath: - self.writeInclude(OutputGenerator.categoryToPath[category], - name, body + '\n') - else: - self.logMsg('diag', '# NOT writing include file for type:', - name, '- bad category: ', category) + self.writeInclude(OutputGenerator.categoryToPath[category], + name, body + '\n') else: - self.logMsg('diag', '# NOT writing empty include file for type', name) + self.logMsg('diag', 'NOT writing empty include file for type', name) + + def genStructBody(self, typeinfo, typeName): + """ + Returns the body generated for a struct. + + Factored out to allow aliased types to also generate the original type. + """ + typeElem = typeinfo.elem + body = 'typedef ' + typeElem.get('category') + ' ' + typeName + ' {\n' + + targetLen = self.getMaxCParamTypeLength(typeinfo) + for member in typeElem.findall('.//member'): + body += self.makeCParamDecl(member, targetLen + 4) + body += ';\n' + body += '} ' + typeName + ';' + return body def genStruct(self, typeinfo, typeName, alias): """Generate struct.""" OutputGenerator.genStruct(self, typeinfo, typeName, alias) - typeElem = typeinfo.elem - body = self.genRequirements(typeName) if alias: + if self.conventions.duplicate_aliased_structs: + # TODO maybe move this outside the conditional? This would be a visual change. + body += '// {} is an alias for {}\n'.format(typeName, alias) + alias_info = self.registry.typedict[alias] + body += self.genStructBody(alias_info, alias) + body += '\n\n' body += 'typedef ' + alias + ' ' + typeName + ';\n' else: - body += 'typedef ' + typeElem.get('category') + ' ' + typeName + ' {\n' - - targetLen = self.getMaxCParamTypeLength(typeinfo) - for member in typeElem.findall('.//member'): - body += self.makeCParamDecl(member, targetLen + 4) - body += ';\n' - body += '} ' + typeName + ';' + body += self.genStructBody(typeinfo, typeName) self.writeInclude('structs', typeName, body) @@ -352,14 +417,14 @@ def genEnumTable(self, groupinfo, groupName): 'name': name, } - (numVal, strVal) = self.enumToValue(elem, True) + (numVal, _) = self.enumToValue(elem, True) data['value'] = numVal extname = elem.get('extname') added_by_extension_to_core = (extname is not None and self.in_core) if added_by_extension_to_core and not self.genOpts.extEnumerantAdditions: - # We're skipping such values + # We are skipping such values continue comment = elem.get('comment') @@ -370,7 +435,8 @@ def genEnumTable(self, groupinfo, groupName): # Just skip this silently continue else: - # Skip but record this in case it's an odd-one-out missing a comment. + # Skip but record this in case it is an odd-one-out missing + # a comment. missing_comments.append(name) continue @@ -393,16 +459,17 @@ def genEnumTable(self, groupinfo, groupName): group_type = groupinfo.elem.get('type') if groupName == self.result_type: # Split this into success and failure - self.writeTable(groupName + '.success', + self.writeEnumTable(groupName + '.success', (data for data in values if data['value'] >= 0)) - self.writeTable(groupName + '.error', + self.writeEnumTable(groupName + '.error', (data for data in values if data['value'] < 0)) elif group_type == 'bitmask': self.writeFlagBox(groupName, values) elif group_type == 'enum': - self.writeTable(groupName, values) + self.writeEnumTable(groupName, values) + self.writeEnumBox(groupName, values) else: raise RuntimeError("Unrecognized enums type: " + str(group_type)) @@ -437,17 +504,6 @@ def genCmd(self, cmdinfo, name, alias): "Generate command." OutputGenerator.genCmd(self, cmdinfo, name, alias) - return_type = cmdinfo.elem.find('proto/type') - if self.genOpts.conventions.requires_error_validation(return_type): - # This command returns an API result code, so check that it - # returns at least the required errors. - # TODO move this to consistency_tools - required_errors = set(self.genOpts.conventions.required_errors) - errorcodes = cmdinfo.elem.get('errorcodes').split(',') - if not required_errors.issubset(set(errorcodes)): - self.logMsg('error', 'Missing required error code for command: ', name, '\n') - exit(1) - body = self.genRequirements(name) decls = self.makeCDecls(cmdinfo.elem) body += decls[0] diff --git a/scripts/extdependency.py b/scripts/extdependency.py new file mode 100755 index 000000000..69dbec3cc --- /dev/null +++ b/scripts/extdependency.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# +# Copyright 2017-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +"""Generate a mapping of extension name -> all required extension names for + that extension, from dependencies in the API XML.""" + +import argparse +import errno +import xml.etree.ElementTree as etree +from pathlib import Path + +from apiconventions import APIConventions +from parse_dependency import dependencyNames + +class DiGraph: + """A directed graph. + + The implementation and API mimic that of networkx.DiGraph in networkx-1.11. + networkx implements graphs as nested dicts; it uses dicts all the way + down, no lists. + + Some major differences between this implementation and that of + networkx-1.11 are: + + * This omits edge and node attribute data, because we never use them + yet they add additional code complexity. + + * This returns iterator objects when possible instead of collection + objects, because it simplifies the implementation and should provide + better performance. + """ + + def __init__(self): + self.__nodes = {} + + def add_node(self, node): + if node not in self.__nodes: + self.__nodes[node] = DiGraphNode() + + def add_edge(self, src, dest): + self.add_node(src) + self.add_node(dest) + self.__nodes[src].adj.add(dest) + + def nodes(self): + """Iterate over the nodes in the graph.""" + return self.__nodes.keys() + + def descendants(self, node): + """ + Iterate over the nodes reachable from the given start node, excluding + the start node itself. Each node in the graph is yielded at most once. + """ + + # Implementation detail: Do a breadth-first traversal because it is + # easier than depth-first. + + # All nodes seen during traversal. + seen = set() + + # The stack of nodes that need visiting. + visit_me = [] + + # Bootstrap the traversal. + seen.add(node) + for x in self.__nodes[node].adj: + if x not in seen: + seen.add(x) + visit_me.append(x) + + while visit_me: + x = visit_me.pop() + assert x in seen + yield x + + for y in self.__nodes[x].adj: + if y not in seen: + seen.add(y) + visit_me.append(y) + +class DiGraphNode: + def __init__(self): + # Set of adjacent of nodes. + self.adj = set() + +class ApiDependencies: + def __init__(self, + registry_path = None, + api_name = None): + """Load an API registry and generate extension dependencies + + registry_path - relative filename of XML registry. If not specified, + uses the API default. + + api_name - API name for which to generate dependencies. Only + extensions supported for that API are considered. + """ + + conventions = APIConventions() + if registry_path is None: + registry_path = conventions.registry_path + if api_name is None: + api_name = conventions.xml_api_name + + self.allExts = set() + self.khrExts = set() + self.ratifiedExts = set() + self.graph = DiGraph() + self.extensions = {} + self.tree = etree.parse(registry_path) + + # Loop over all supported extensions, creating a digraph of the + # extension dependencies in the 'depends' attribute, which is a + # boolean expression of core version and extension names. + # A static dependency tree can be constructed only by treating all + # extension names in the expression as dependencies, even though + # that may not be true if it is of form (ext OR ext). + # For the purpose these dependencies are used for - generating + # specifications with required dependencies included automatically - + # this will suffice. + # Separately tracks lists of all extensions and all KHR extensions, + # which are common specification targets. + for elem in self.tree.findall('extensions/extension'): + name = elem.get('name') + supported = elem.get('supported') + ratified = elem.get('ratified', '') + + if api_name in supported.split(','): + self.allExts.add(name) + + if conventions.KHR_prefix in name: + self.khrExts.add(name) + + if api_name in ratified.split(','): + self.ratifiedExts.add(name) + + self.graph.add_node(name) + + depends = elem.get('depends') + if depends: + # Walk a list of the leaf nodes (version and extension + # names) in the boolean expression. + for dep in dependencyNames(depends): + # Filter out version names, which are explicitly + # specified when building a specification. + if not conventions.is_api_version_name(dep): + self.graph.add_edge(name, dep) + else: + # Skip unsupported extensions + pass + + def allExtensions(self): + """Returns a set of all extensions in the graph""" + return self.allExts + + def khrExtensions(self): + """Returns a set of all KHR extensions in the graph""" + return self.khrExts + + def ratifiedExtensions(self): + """Returns a set of all ratified extensions in the graph""" + return self.ratifiedExts + + def children(self, extension): + """Returns a set of the dependencies of an extension. + Throws an exception if the extension is not in the graph.""" + + if extension not in self.allExts: + raise Exception(f'Extension {extension} not found in XML!') + + return set(self.graph.descendants(extension)) + + +# Test script +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-registry', action='store', + default=APIConventions().registry_path, + help='Use specified registry file instead of ' + APIConventions().registry_path) + parser.add_argument('-loops', action='store', + default=10, type=int, + help='Number of timing loops to run') + parser.add_argument('-test', action='store', + default=None, + help='Specify extension to find dependencies of') + + args = parser.parse_args() + + deps = ApiDependencies(args.registry) + print('KHR exts =', sorted(deps.khrExtensions())) + print('Ratified exts =', sorted(deps.ratifiedExtensions())) + + import time + startTime = time.process_time() + + for loop in range(args.loops): + deps = ApiDependencies(args.registry) + + endTime = time.process_time() + + deltaT = endTime - startTime + print('Total time = {} time/loop = {}'.format(deltaT, deltaT / args.loops)) diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index 957cb5a49..bc38084ea 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -9,6 +9,7 @@ import sys from functools import total_ordering from generator import GeneratorOptions, OutputGenerator, regSortFeatures, write +from parse_dependency import dependencyMarkup class ExtensionMetaDocGeneratorOptions(GeneratorOptions): """ExtensionMetaDocGeneratorOptions - subclass of GeneratorOptions. @@ -17,10 +18,6 @@ class ExtensionMetaDocGeneratorOptions(GeneratorOptions): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - -EXT_NAME_DECOMPOSE_RE = re.compile(r'[A-Z]+_(?P[A-Z]+)_(?P[\w_]+)') - - @total_ordering class Extension: def __init__(self, @@ -29,23 +26,23 @@ def __init__(self, name, number, ext_type, - requires, - requiresCore, + depends, contact, promotedTo, deprecatedBy, obsoletedBy, provisional, revision, - specialuse ): + specialuse, + ratified + ): self.generator = generator self.conventions = generator.genOpts.conventions self.filename = filename self.name = name self.number = number self.ext_type = ext_type - self.requires = requires - self.requiresCore = requiresCore + self.depends = depends self.contact = contact self.promotedTo = promotedTo self.deprecatedBy = deprecatedBy @@ -53,10 +50,16 @@ def __init__(self, self.provisional = provisional self.revision = revision self.specialuse = specialuse + self.ratified = ratified self.deprecationType = None self.supercedingAPIVersion = None self.supercedingExtension = None + # This is a set containing names of extensions (if any) promoted + # *to* this extension. + # It is filled in after all the Extension objects are created, + # since it requires a reverse mapping step. + self.promotedFrom = set() if self.promotedTo is not None and self.deprecatedBy is not None and self.obsoletedBy is not None: self.generator.logMsg('warn', 'All \'promotedto\', \'deprecatedby\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'promotedto\' and \'deprecatedby\'.') @@ -83,15 +86,11 @@ def __init__(self, pass # supercedingAPIVersion, supercedingExtension is None elif supercededBy.startswith(self.conventions.api_version_prefix): self.supercedingAPIVersion = supercededBy - elif supercededBy.startswith(self.conventions.api_prefix): + elif supercededBy.startswith(self.conventions.extension_name_prefix): self.supercedingExtension = supercededBy else: self.generator.logMsg('error', 'Unrecognized ' + self.deprecationType + ' attribute value \'' + supercededBy + '\'!') - match = EXT_NAME_DECOMPOSE_RE.match(self.name) - self.vendor = match.group('tag') - self.bare_name = match.group('name') - def __str__(self): return self.name def __eq__(self, other): @@ -174,8 +173,13 @@ def conditionalLinkExt(self, extName, indent = ' '): return doc - def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): - ext = next(x for x in extensionsList if x.name == succeededBy) + def resolveDeprecationChain(self, extensions, succeededBy, isRefpage, file): + if succeededBy not in extensions: + write(f' ** *NOTE* The extension `{succeededBy}` is not supported for the API specification being generated', file=file) + self.generator.logMsg('warn', f'resolveDeprecationChain: {self.name} defines a superceding interface {succeededBy} which is not in the supported extensions list') + return + + ext = extensions[succeededBy] if ext.deprecationType: if ext.deprecationType == 'promotion': @@ -183,13 +187,13 @@ def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): write(' ** Which in turn was _promoted_ to\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-promotions', isRefpage), file=file) else: # ext.supercedingExtension write(' ** Which in turn was _promoted_ to extension\n' + ext.conditionalLinkExt(ext.supercedingExtension), file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + ext.resolveDeprecationChain(extensions, ext.supercedingExtension, file) elif ext.deprecationType == 'deprecation': if ext.supercedingAPIVersion: write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) elif ext.supercedingExtension: write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + ext.resolveDeprecationChain(extensions, ext.supercedingExtension, file) else: write(' ** Which in turn was _deprecated_ without replacement', file=file) elif ext.deprecationType == 'obsoletion': @@ -197,7 +201,7 @@ def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) elif ext.supercedingExtension: write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + ext.resolveDeprecationChain(extensions, ext.supercedingExtension, file) else: write(' ** Which in turn was _obsoleted_ without replacement', file=file) else: # should be unreachable @@ -207,6 +211,11 @@ def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): def writeTag(self, tag, value, isRefpage, fp): """Write a tag and (if non-None) a tag value to a file. + If the value is None, just write the tag. + + If the tag is None, just write the value (used for adding a value + to a just-written tag). + - tag - string tag name - value - tag value, or None - isRefpage - controls style in which the tag is marked up @@ -221,18 +230,20 @@ def writeTag(self, tag, value, isRefpage, fp): tagPrefix = '*' tagSuffix = '*::' - write(tagPrefix + tag + tagSuffix, file=fp) + if tag is not None: + write(tagPrefix + tag + tagSuffix, file=fp) if value is not None: write(value, file=fp) if isRefpage: write('', file=fp) - def makeMetafile(self, extensionsList, isRefpage = False): + def makeMetafile(self, extensions, isRefpage = False): """Generate a file containing extension metainformation in asciidoctor markup form. - - extensionsList - list of extensions spec is being generated against + - extensions - dictionary of Extension objects for extensions spec + is being generated against - isRefpage - True if generating a refpage include, False if generating a specification extension appendix include""" @@ -249,20 +260,39 @@ def makeMetafile(self, extensionsList, isRefpage = False): write('', file=fp) self.writeTag('Name String', '`' + self.name + '`', isRefpage, fp) - self.writeTag('Extension Type', self.typeToStr(), isRefpage, fp) + if self.conventions.write_extension_type: + self.writeTag('Extension Type', self.typeToStr(), isRefpage, fp) + + if self.conventions.write_extension_number: + self.writeTag('Registered Extension Number', self.number, isRefpage, fp) + if self.conventions.write_extension_revision: + self.writeTag('Revision', self.revision, isRefpage, fp) - self.writeTag('Registered Extension Number', self.number, isRefpage, fp) - self.writeTag('Revision', self.revision, isRefpage, fp) + if self.conventions.xml_api_name in self.ratified.split(','): + ratstatus = 'Ratified' + else: + ratstatus = 'Not ratified' + self.writeTag('Ratification Status', ratstatus, isRefpage, fp) # Only API extension dependencies are coded in XML, others are explicit self.writeTag('Extension and Version Dependencies', None, isRefpage, fp) - write(' * Requires ' + self.conventions.api_name() + ' ' + self.requiresCore, file=fp) - if self.requires: - for dep in self.requires.split(','): - write(' * Requires', self.conventions.formatExtension(dep), - file=fp) - if self.provisional == 'true': + # Transform the boolean 'depends' expression into equivalent + # human-readable asciidoc markup. + if self.depends is not None: + if isRefpage: + separator = '' + else: + separator = '+' + write(separator + '\n--\n' + + dependencyMarkup(self.depends) + + '--', file=fp) + else: + # Do not specify the base API redundantly, but put something + # here to avoid formatting trouble. + self.writeTag(None, 'None', isRefpage, fp) + + if self.provisional == 'true' and self.conventions.provisional_extension_warning: write(' * *This is a _provisional_ extension and must: be used with caution.', file=fp) write(' See the ' + self.specLink(xrefName = 'boilerplate-provisional-header', @@ -272,20 +302,20 @@ def makeMetafile(self, extensionsList, isRefpage = False): write('', file=fp) if self.deprecationType: - self.writeTag('Deprecation state', None, isRefpage, fp) + self.writeTag('Deprecation State', None, isRefpage, fp) if self.deprecationType == 'promotion': if self.supercedingAPIVersion: write(' * _Promoted_ to\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-promotions', isRefpage), file=fp) else: # ext.supercedingExtension write(' * _Promoted_ to\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension', file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + self.resolveDeprecationChain(extensions, self.supercedingExtension, isRefpage, fp) elif self.deprecationType == 'deprecation': if self.supercedingAPIVersion: write(' * _Deprecated_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) elif self.supercedingExtension: write(' * _Deprecated_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + self.resolveDeprecationChain(extensions, self.supercedingExtension, isRefpage, fp) else: write(' * _Deprecated_ without replacement' , file=fp) elif self.deprecationType == 'obsoletion': @@ -293,7 +323,7 @@ def makeMetafile(self, extensionsList, isRefpage = False): write(' * _Obsoleted_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) elif self.supercedingExtension: write(' * _Obsoleted_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + self.resolveDeprecationChain(extensions, self.supercedingExtension, isRefpage, fp) else: # TODO: Does not make sense to retroactively ban use of extensions from 1.0. # Needs some tweaks to the semantics and this message, when such extension(s) occur. @@ -332,10 +362,10 @@ def makeMetafile(self, extensionsList, isRefpage = False): if handle.startswith('gitlab:'): prettyHandle = 'icon:gitlab[alt=GitLab, role="red"]' + handle.replace('gitlab:@', '') elif handle.startswith('@'): - issuePlaceholderText = '[' + self.name + '] ' + handle - issuePlaceholderText += '%0A<>' - trackerLink = 'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body=' + issuePlaceholderText + '++' - prettyHandle = trackerLink + '[icon:github[alt=GitHub,role="black"]' + handle[1:] + ', window=_blank]' + issuePlaceholderText = f'[{self.name}]{handle}' + issuePlaceholderText += f'%0A*Here describe the issue or question you have about the {self.name} extension*' + trackerLink = f'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body={issuePlaceholderText}++' + prettyHandle = f'{trackerLink}[icon:github[alt=GitHub,role="black"]{handle[1:]},window=_blank,opts=nofollow]' else: prettyHandle = handle @@ -346,13 +376,49 @@ def makeMetafile(self, extensionsList, isRefpage = False): # current repository, and link to the same document (parameterized # by a URL prefix attribute) if it does. # The assumption is that a proposal document for an extension - # VK_name will be located in 'proposals/VK_name.asciidoc' relative + # VK_name will be located in 'proposals/VK_name.adoc' relative # to the repository root, and that this script will be invoked from # the repository root. - path = 'proposals/{}.asciidoc'.format(self.name) - if os.path.exists(path) and os.access(path, os.R_OK): - self.writeTag('Extension Proposal', - 'link:{{specRepositoryURL}}/{}[{}]'.format(path, self.name), isRefpage, fp) + # If a proposal for this extension does not exist, look for + # proposals for the extensions it is promoted from. + + def checkProposal(extname): + """Check if a proposal document for an extension exists, + returning the path to that proposal or None otherwise.""" + + path = 'proposals/{}.adoc'.format(extname) + if os.path.exists(path) and os.access(path, os.R_OK): + return path + else: + return None + + # List of [ extname, proposal link ] + proposals = [] + + path = checkProposal(self.name) + if path is not None: + proposals.append([self.name, path]) + else: + for name in self.promotedFrom: + path = checkProposal(name) + if path is not None: + proposals.append([name, path]) + + if len(proposals) > 0: + tag = 'Extension Proposal' + for (name, path) in sorted(proposals): + self.writeTag(tag, + f'link:{{specRepositoryURL}}/{path}[{name}]', + isRefpage, fp) + # Setting tag = None so additional values will not get + # additional tag headers. + tag = None + + # If this is metadata to be included in a refpage, adjust the + # leveloffset to account for the relative structure of the extension + # appendices vs. refpages. + if isRefpage and self.conventions.include_extension_appendix_in_refpage: + write(':leveloffset: -1', file=fp) fp.close() @@ -366,8 +432,7 @@ class ExtensionMetaDocOutputGenerator(OutputGenerator): - number extension number (optional) - contact name and GitHub login or email address (optional) - type 'instance' | 'device' (optional) - - requires list of comma-separated required API extensions (optional) - - requiresCore required core version of API (optional) + - depends boolean expression of core version and extension names this depends on (optional) - promotedTo extension or API version it was promoted to - deprecatedBy extension or API version which deprecated this extension, or empty string if deprecated without replacement @@ -377,7 +442,7 @@ class ExtensionMetaDocOutputGenerator(OutputGenerator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.extensions = [] + self.extensions = {} # List of strings containing all vendor tags self.vendor_tags = [] self.file_suffix = '' @@ -432,46 +497,54 @@ def conditionalExt(self, extName, content, ifdef = None, condition = None): return doc - def makeExtensionInclude(self, ext): - return self.conventions.extension_include_string(ext) + def makeExtensionInclude(self, extname): + return self.conventions.extension_include_string(extname) def endFile(self): - self.extensions.sort() + # Determine the extension an extension is promoted from, if any. + # This is used when attempting to locate a proposal document in + # makeMetafile() below. + for (extname, ext) in self.extensions.items(): + promotedTo = ext.promotedTo + if promotedTo is not None: + if promotedTo in self.extensions: + #print(f'{promotedTo} is promoted from {extname}') + self.extensions[promotedTo].promotedFrom.add(extname) + #print(f'setting self.extensions[{promotedTo}].promotedFrom = {self.extensions[promotedTo].promotedFrom}') + elif not self.conventions.is_api_version_name(promotedTo): + self.logMsg('warn', f'{extname} is promoted to {promotedTo} which is not in the extension map') # Generate metadoc extension files, in refpage and non-refpage form - for ext in self.extensions: + for ext in self.extensions.values(): ext.makeMetafile(self.extensions, isRefpage = False) if self.conventions.write_refpage_include: ext.makeMetafile(self.extensions, isRefpage = True) + # Key to sort extensions alphabetically within 'KHR', 'EXT', vendor + # extension prefixes. + def makeSortKey(extname): + name = extname.lower() + prefixes = self.conventions.extension_index_prefixes + for i, prefix in enumerate(prefixes): + if extname.startswith(prefix): + return (i, name) + return (len(prefixes), name) + # Generate list of promoted extensions promotedExtensions = {} - for ext in self.extensions: + for ext in self.extensions.values(): if ext.deprecationType == 'promotion' and ext.supercedingAPIVersion: - promotedExtensions.setdefault(ext.supercedingAPIVersion, []).append(ext) + promotedExtensions.setdefault(ext.supercedingAPIVersion, []).append(ext.name) for coreVersion, extensions in promotedExtensions.items(): promoted_extensions_fp = self.newFile(self.directory + '/promoted_extensions_' + coreVersion + self.file_suffix) - for ext in extensions: + for extname in sorted(extensions, key=makeSortKey): indent = '' - write(' * {blank}\n+\n' + ext.conditionalLinkExt(ext.name, indent), file=promoted_extensions_fp) + write(' * {blank}\n+\n' + ext.conditionalLinkExt(extname, indent), file=promoted_extensions_fp) promoted_extensions_fp.close() - # Re-sort to match earlier behavior - # TODO: Remove this extra sort when re-arranging section order OK. - - def makeSortKey(ext): - name = ext.name.lower() - prefixes = self.conventions.extension_index_prefixes - for i, prefix in enumerate(prefixes): - if ext.name.startswith(prefix): - return (i, name) - return (len(prefixes), name) - - self.extensions.sort(key=makeSortKey) - # Generate include directives for the extensions appendix, grouping # extensions by status (current, deprecated, provisional, etc.) with self.newFile(self.directory + '/current_extensions_appendix' + self.file_suffix) as current_extensions_appendix_fp, \ @@ -486,8 +559,14 @@ def makeSortKey(ext): self.newFile(self.directory + '/provisional_extension_appendices_toc' + self.file_suffix) as provisional_extension_appendices_toc_fp, \ self.newFile(self.directory + '/provisional_extensions_guard_macro' + self.file_suffix) as provisional_extensions_guard_macro_fp: + # Note: there is a hardwired assumption in creating the + # include:: directives below that all of these files are located + # in the 'meta/' subdirectory of the generated files directory. + # This is difficult to change, and it is very unlikely changing + # it will be needed. + write('', file=current_extensions_appendix_fp) - write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('', file=current_extensions_appendix_fp) write('ifndef::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) write('[[extension-appendices-list]]', file=current_extensions_appendix_fp) @@ -498,19 +577,19 @@ def makeSortKey(ext): write('== List of Current Extensions', file=current_extensions_appendix_fp) write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) write('', file=current_extensions_appendix_fp) - write('include::current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('include::{generated}/meta/current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('\n<<<\n', file=current_extensions_appendix_fp) - write('include::current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('include::{generated}/meta/current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('', file=deprecated_extensions_appendix_fp) write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) write('[[deprecated-extension-appendices-list]]', file=deprecated_extensions_appendix_fp) write('== List of Deprecated Extensions', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('\n<<<\n', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) # add include guards to allow multiple includes @@ -520,18 +599,22 @@ def makeSortKey(ext): write(':PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD:\n', file=provisional_extensions_guard_macro_fp) write('', file=provisional_extensions_appendix_fp) - write('include::provisional_extensions_guard_macro' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('include::{generated}/meta/provisional_extensions_guard_macro' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('', file=provisional_extensions_appendix_fp) write('ifdef::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) write('[[provisional-extension-appendices-list]]', file=provisional_extensions_appendix_fp) write('== List of Provisional Extensions', file=provisional_extensions_appendix_fp) - write('include::provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('include::{generated}/meta/provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('\n<<<\n', file=provisional_extensions_appendix_fp) - write('include::provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('include::{generated}/meta/provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('endif::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) - for ext in self.extensions: - include = self.makeExtensionInclude(ext) + # Emit extensions in author ID order + sorted_keys = sorted(self.extensions.keys(), key=makeSortKey) + for name in sorted_keys: + ext = self.extensions[name] + + include = self.makeExtensionInclude(ext.name) link = ' * ' + self.conventions.formatExtension(ext.name) if ext.provisional == 'true': write(self.conditionalExt(ext.name, include), file=provisional_extension_appendices_fp) @@ -564,22 +647,28 @@ def beginFeature(self, interface, emit): self.logMsg('diag', 'beginFeature: ignoring non-extension feature', self.featureName) return - # These attributes must exist name = self.featureName - number = self.getAttrib(interface, 'number') - ext_type = self.getAttrib(interface, 'type') - revision = self.getSpecVersion(interface, name) + + # These attributes may be required to exist, depending on the API + number = self.getAttrib(interface, 'number', + self.conventions.write_extension_number) + ext_type = self.getAttrib(interface, 'type', + self.conventions.write_extension_type) + if self.conventions.write_extension_revision: + revision = self.getSpecVersion(interface, name) + else: + revision = None # These attributes are optional OPTIONAL = False - requires = self.getAttrib(interface, 'requires', OPTIONAL) - requiresCore = self.getAttrib(interface, 'requiresCore', OPTIONAL, '1.0') # TODO update this line with update_version.py + depends = self.getAttrib(interface, 'depends', OPTIONAL) # TODO should default to base API version 1.0? contact = self.getAttrib(interface, 'contact', OPTIONAL) promotedTo = self.getAttrib(interface, 'promotedto', OPTIONAL) deprecatedBy = self.getAttrib(interface, 'deprecatedby', OPTIONAL) obsoletedBy = self.getAttrib(interface, 'obsoletedby', OPTIONAL) provisional = self.getAttrib(interface, 'provisional', OPTIONAL, 'false') specialuse = self.getAttrib(interface, 'specialuse', OPTIONAL) + ratified = self.getAttrib(interface, 'ratified', OPTIONAL, '') filename = self.directory + '/' + name + self.file_suffix @@ -589,17 +678,16 @@ def beginFeature(self, interface, emit): name = name, number = number, ext_type = ext_type, - requires = requires, - requiresCore = requiresCore, + depends = depends, contact = contact, promotedTo = promotedTo, deprecatedBy = deprecatedBy, obsoletedBy = obsoletedBy, provisional = provisional, revision = revision, - specialuse = specialuse) - self.extensions.append(extdata) - + specialuse = specialuse, + ratified = ratified) + self.extensions[name] = extdata def endFeature(self): # Finish processing in superclass @@ -636,6 +724,7 @@ def numbersToWords(self, name): def getSpecVersion(self, elem, extname, default=None): """Determine the extension revision from the EXTENSION_NAME_SPEC_VERSION enumerant. + This only makes sense for Vulkan. - elem - element to query - extname - extension name from the 'name' attribute diff --git a/scripts/find_adoc_deps b/scripts/find_adoc_deps new file mode 100755 index 000000000..d20d04976 --- /dev/null +++ b/scripts/find_adoc_deps @@ -0,0 +1,17 @@ +#!/bin/bash + +# Copyright 2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# find_adoc_deps - find include:: dependencies of an asciidoc file +# Usage: find_adoc_deps file GENERATED +# Prints a space-separated list of file dependencies determined from the +# include:: constructs in 'file'. +# The literal text '{generated}' in an include:: path is substituted +# with the value of the second argument. + +if test -f $1 ; then + echo `grep '^include::' $1 | sed -e 's/^include:://' -e 's/\[\]/ /' -e "s#{generated}#$2#"` +else + exit 1 +fi diff --git a/scripts/genRef.py b/scripts/genRef.py index ed7f2580c..2b103761a 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -14,27 +14,55 @@ import re import sys from collections import OrderedDict -from reflib import (findRefs, fixupRefs, loadFile, logDiag, logWarn, +from reflib import (findRefs, fixupRefs, loadFile, logDiag, logWarn, logErr, printPageInfo, setLogFile) from reg import Registry -from clconventions import OpenCLConventions as APIConventions +from generator import GeneratorOptions +from parse_dependency import dependencyNames +from apiconventions import APIConventions + + +# refpage 'type' attributes which are API entities and contain structured +# content such as API includes, valid usage blocks, etc. +refpage_api_types = ( + 'basetypes', + 'consts', + 'defines', + 'enums', + 'flags', + 'funcpointers', + 'handles', + 'protos', + 'structs', +) + +# Other refpage types - SPIR-V builtins, API feature blocks, etc. - which do +# not have structured content. +refpage_other_types = ( + 'builtins', + 'feature', + 'freeform', + 'spirv' +) def makeExtensionInclude(name): - """Return an include command, given an extension name.""" - return 'include::{}/refpage.{}{}[]'.format( - conventions.specification_path, - name, - conventions.file_suffix) + """Return an include command for a generated extension interface. + - name - extension name""" + + return 'include::{}/meta/refpage.{}{}[]'.format( + conventions.generated_include_path, + name, + conventions.file_suffix) def makeAPIInclude(type, name): """Return an include command for a generated API interface - - type - type of the API, e.g. 'flags', 'handles', etc - - name - name of the API""" + - type - type of the API, e.g. 'flags', 'handles', etc + - name - name of the API""" return 'include::{}/api/{}/{}{}\n'.format( - conventions.refpage_generated_include_path, + conventions.generated_include_path, type, name, conventions.file_suffix) @@ -51,23 +79,30 @@ def printCopyrightSourceComments(fp): Writes an asciidoc comment block, which copyrights the source file.""" - print('// Copyright 2014-2024 The Khronos Group, Inc.', file=fp) + print('// Copyright 2014-2024 The Khronos Group Inc.', file=fp) print('//', file=fp) # This works around constraints of the 'reuse' tool print('// SPDX' + '-License-Identifier: CC-BY-4.0', file=fp) print('', file=fp) -def printFooter(fp): +def printFooter(fp, leveloffset=0): """Print footer material at the end of each refpage on open file fp. If generating separate refpages, adds the copyright. - If generating the single combined refpage, just add a separator.""" + If generating the single combined refpage, just add a separator. + + - leveloffset - number of levels to bias section titles up or down.""" + + # Generate the section header. + # Default depth is 2. + depth = max(0, leveloffset + 2) + prefix = '=' * depth print('ifdef::doctype-manpage[]', - '== Copyright', + f'{prefix} Copyright', '', - 'include::{config}/copyright-ccby.txt[]', + 'include::{config}/copyright-ccby' + conventions.file_suffix + '[]', 'endif::doctype-manpage[]', '', 'ifndef::doctype-manpage[]', @@ -89,7 +124,7 @@ def macroPrefix(name): if name in api.enums: return 'elink:' + name if name in api.flags: - return 'elink:' + name + return 'tlink:' + name if name in api.funcpointers: return 'tlink:' + name if name in api.handles: @@ -137,7 +172,11 @@ def seeAlsoList(apiName, explicitRefs=None, apiAliases=[]): for (base,dependency) in api.requiredBy[name]: refs.add(base) if dependency is not None: - refs.add(dependency) + # 'dependency' may be a boolean expression of extension + # names. + # Extract them for use in cross-references. + for extname in dependencyNames(dependency): + refs.add(extname) if len(refs) == 0: return None @@ -184,7 +223,7 @@ def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tai """Generate body of a reference page. - pageName - string name of the page - - pageDesc - string short description of the page + - pageDesc - string short description of the page, or empty string - fp - file to write to - head_content - text to include before the sections - sections - iterable returning (title,body) for each section. @@ -195,14 +234,18 @@ def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tai print(':data-uri:', ':icons: font', + ':attribute-missing: warn', conventions.extra_refpage_headers, '', sep='\n', file=fp) s = '{}({})'.format(pageName, man_section) print('= ' + s, + '', + conventions.extra_refpage_body, '', sep='\n', file=fp) + if pageDesc.strip() == '': pageDesc = 'NO SHORT DESCRIPTION PROVIDED' logWarn('refPageHead: no short description provided for', pageName) @@ -265,7 +308,8 @@ def refPageTail(pageName, specAnchor=None, seeAlso=None, fp=None, - auto=False): + auto=False, + leveloffset=0): """Generate end boilerplate of a reference page. - pageName - name of the page @@ -273,7 +317,12 @@ def refPageTail(pageName, identifying the specification name and URL this refpage links to. - specAnchor - None or the 'anchor' attribute from the refpage block, identifying the anchor in the specification this refpage links to. If - None, the pageName is assumed to be a valid anchor.""" + None, the pageName is assumed to be a valid anchor. + - seeAlso - text of the "See Also" section + - fp - file to write the page to + - auto - True if this is an entirely generated refpage, False if it is + handwritten content from the spec. + - leveloffset - number of levels to bias section titles up or down.""" specName = conventions.api_name(specType) specURL = conventions.specURL(specType) @@ -302,19 +351,24 @@ def refPageTail(pageName, 'not directly.', )) - print('== See Also', + # Generate the section header. + # Default depth is 2. + depth = max(0, leveloffset + 2) + prefix = '=' * depth + + print(f'{prefix} See Also', '', seeAlso, '', sep='\n', file=fp) - print('== Document Notes', + print(f'{prefix} Document Notes', '', '\n'.join(notes), '', sep='\n', file=fp) - printFooter(fp) + printFooter(fp, leveloffset) def xrefRewriteInitialize(): @@ -324,26 +378,26 @@ def xrefRewriteInitialize(): global refLinkTextPattern, refLinkTextSubstitute global specLinkPattern, specLinkSubstitute - # These are xrefs to Vulkan API entities, rewritten to link to refpages + # These are xrefs to API entities, rewritten to link to refpages # The refLink variants are for xrefs with only an anchor and no text. # The refLinkText variants are for xrefs with both anchor and text - refLinkPattern = re.compile(r'<<([Vv][Kk][^>,]+)>>') + refLinkPattern = re.compile(r'<<([Vv][Kk][A-Za-z0-9_]+)>>') refLinkSubstitute = r'link:\1.html[\1^]' - refLinkTextPattern = re.compile(r'<<([Vv][Kk][^>,]+)[,]?[ \t\n]*([^>,]*)>>') + refLinkTextPattern = re.compile(r'<<([Vv][Kk][A-Za-z0-9_]+)[,]?[ \t\n]*([^>,]*)>>') refLinkTextSubstitute = r'link:\1.html[\2^]' # These are xrefs to other anchors, rewritten to link to the spec - specLinkPattern = re.compile(r'<<([^>,]+)[,]?[ \t\n]*([^>,]*)>>') + specLinkPattern = re.compile(r'<<([-A-Za-z0-9_.(){}:]+)[,]?[ \t\n]*([^>,]*)>>') # Unfortunately, specLinkSubstitute depends on the link target, - # so can't be constructed in advance. + # so cannot be constructed in advance. specLinkSubstitute = None def xrefRewrite(text, specURL): """Rewrite asciidoctor xrefs in text to resolve properly in refpages. - Xrefs which are to Vulkan refpages are rewritten to link to those + Xrefs which are to refpages are rewritten to link to those refpages. The remainder are rewritten to generate external links into the supplied specification document URL. @@ -372,7 +426,7 @@ def emitPage(baseDir, specDir, pi, file): - specDir - directory extracted page source came from - pi - pageInfo for this page relative to file - file - list of strings making up the file, indexed by pi""" - pageName = baseDir + '/' + pi.name + '.txt' + pageName = f'{baseDir}/{pi.name}{conventions.file_suffix}' # Add a dictionary entry for this page global genDict @@ -387,9 +441,9 @@ def emitPage(baseDir, specDir, pi, file): field = None fieldText = None - if pi.type != 'freeform' and pi.type != 'spirv': + # Only do structural checks on API pages + if pi.type in refpage_api_types: if pi.include is None: - # Not sure how this happens yet logWarn('emitPage:', pageName, 'INCLUDE is None, no page generated') return @@ -421,9 +475,12 @@ def emitPage(baseDir, specDir, pi, file): logWarn('emitPage: INCLUDE == BODY, so description will be empty for', pi.name) if pi.begin != pi.include: logWarn('emitPage: Note: BEGIN != INCLUDE, so the description might be incorrectly located before the API include!') - else: + elif pi.type in refpage_other_types: specText = None descText = ''.join(file[pi.begin:pi.end + 1]) + else: + # This should be caught in the spec markup checking tests + logErr(f"emitPage: refpage type='{pi.type}' is unrecognized") # Rewrite asciidoctor xrefs to resolve properly in refpages specURL = conventions.specURL(pi.spec) @@ -456,7 +513,7 @@ def autoGenEnumsPage(baseDir, pi, file): - baseDir - base directory to emit page into - pi - pageInfo for this page relative to file - file - list of strings making up the file, indexed by pi""" - pageName = baseDir + '/' + pi.name + '.txt' + pageName = f'{baseDir}/{pi.name}{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page @@ -509,7 +566,7 @@ def autoGenFlagsPage(baseDir, flagName): - baseDir - base directory to emit page into - flagName - API *Flags name""" - pageName = baseDir + '/' + flagName + '.txt' + pageName = f'{baseDir}/{flagName}{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page @@ -564,7 +621,7 @@ def autoGenHandlePage(baseDir, handleName): - handleName - API handle name""" # @@ Need to determine creation function & add handles/ include for the # @@ interface in generator.py. - pageName = baseDir + '/' + handleName + '.txt' + pageName = f'{baseDir}/{handleName}{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page @@ -602,7 +659,8 @@ def genRef(specFile, baseDir): - specFile - filename to extract from - baseDir - output directory to generate page in""" - file = loadFile(specFile) + # We do not care the newline format used here. + file, _ = loadFile(specFile) if file is None: return @@ -623,6 +681,28 @@ def genRef(specFile, baseDir): for name in sorted(pageMap): pi = pageMap[name] + # Only generate the page if it is in the requested build + # 'freeform' pages are always generated + # 'feature' pages (core versions & extensions) are generated if they are in + # the requested feature list + # All other pages (APIs) are generated if they are in the API map for + # the build. + if pi.type in refpage_api_types: + if name not in api.typeCategory: + # Also check aliases of name - api.nonexistent is the same + # mapping used to rewrite *link: macros in this build. + if name not in api.nonexistent: + logWarn(f'genRef: NOT generating feature page {name} - API not in this build') + continue + else: + logWarn(f'genRef: generating feature page {name} because its alias {api.nonexistent[name]} exists') + elif pi.type in refpage_other_types: + # The only non-API type which can be checked is a feature refpage + if pi.type == 'feature': + if name not in api.features: + logWarn(f'genRef: NOT generating feature page {name} - feature not in this build') + continue + printPageInfo(pi, file) if pi.Warning: @@ -635,7 +715,7 @@ def genRef(specFile, baseDir): elif pi.type == 'flags': autoGenFlagsPage(baseDir, pi.name) else: - # Don't extract this page + # Do not extract this page logWarn('genRef: Cannot extract or autogenerate:', pi.name) pages[pi.name] = pi @@ -646,9 +726,9 @@ def genRef(specFile, baseDir): def genSinglePageRef(baseDir): - """Generate baseDir/apispec.txt, the single-page version of the ref pages. + """Generate the single-page version of the ref pages. - This assumes there's a page for everything in the api module dictionaries. + This assumes there is a page for everything in the api module dictionaries. Extensions (KHR, EXT, etc.) are currently skipped""" # Accumulate head of page head = io.StringIO() @@ -664,13 +744,15 @@ def genSinglePageRef(baseDir): ':data-uri:', ':toc2:', ':toclevels: 2', + ':attribute-missing: warn', '', sep='\n', file=head) print('== Copyright', file=head) print('', file=head) - print('include::{config}/copyright-ccby.txt[]', file=head) + print('include::{config}/copyright-ccby' + conventions.file_suffix + '[]', file=head) print('', file=head) + # Inject the table of contents. Asciidoc really ought to be generating # this for us. @@ -706,7 +788,7 @@ def genSinglePageRef(baseDir): keys = sorted(apiDict.keys()) for refPage in keys: - # Don't generate links for aliases, which are included with the + # Do not generate links for aliases, which are included with the # aliased page if refPage not in api.alias: # Add page to body @@ -720,7 +802,7 @@ def genSinglePageRef(baseDir): # Now, all are emitted. continue else: - print('include::' + refPage + '.txt[]', file=body) + print(f'include::{refPage}{conventions.file_suffix}[]', file=body) else: # Alternatively, we could (probably should) link to the # aliased refpage @@ -731,7 +813,7 @@ def genSinglePageRef(baseDir): print('\n' + ':leveloffset: 0' + '\n', file=body) # Write head and body to the output file - pageName = baseDir + '/apispec.txt' + pageName = f'{baseDir}/apispec{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') print(head.getvalue(), file=fp, end='') @@ -757,17 +839,14 @@ def genExtension(baseDir, extpath, name, info): declares = [] elem = info.elem - # Type of extension (instance, device, etc.) - ext_type = elem.get('type') - # Autogenerate interfaces from entry - for required in elem.find('require'): + for required in elem.findall('require'): req_name = required.get('name') if not req_name: - # This isn't what we're looking for + # This is not what we are looking for continue if req_name.endswith('_SPEC_VERSION') or req_name.endswith('_EXTENSION_NAME'): - # Don't link to spec version or extension name - those ref pages aren't created. + # Do not link to spec version or extension name - those ref pages are not created. continue if required.get('extends'): @@ -777,54 +856,70 @@ def genExtension(baseDir, extpath, name, info): continue if req_name not in genDict: - logWarn('ERROR: {} (in extension {}) does not have a ref page.'.format(req_name, name)) + if req_name in api.alias: + logWarn(f'WARN: {req_name} (in extension {name}) is an alias, so does not have a ref page') + else: + logWarn(f'ERROR: {req_name} (in extension {name}) does not have a ref page.') declares.append(req_name) - # import pdb - # pdb.set_trace() - appbody = None + tail_content = None if extpath is not None: - appfp = open('{}/{}.txt'.format(extpath, name), 'r', encoding='utf-8') - if appfp is not None: + try: + appPath = extpath + '/' + conventions.extension_file_path(name) + appfp = open(appPath, 'r', encoding='utf-8') appbody = appfp.read() + appfp.close() # Transform internal links to crosslinks specURL = conventions.specURL() appbody = xrefRewrite(appbody, specURL) - else: + except FileNotFoundError: + print('Cannot find extension appendix for', name) logWarn('Cannot find extension appendix for', name) # Fall through to autogenerated page extpath = None appbody = None - appfp.close() - # Include the extension appendix without an extra title - # head_content = 'include::{{appendices}}/{}.txt[]'.format(name) + appbody = f'Cannot find extension appendix {appPath} for {name}\n' + else: + tail_content = makeExtensionInclude(name) # Write the extension refpage - pageName = baseDir + '/' + name + '.txt' + pageName = f'{baseDir}/{name}{conventions.file_suffix}' logDiag('genExtension:', pageName) fp = open(pageName, 'w', encoding='utf-8') # There are no generated titled sections sections = None - # 'See link:{html_spec_relative}#%s[ %s] in the main specification for complete information.' % ( - # name, name) refPageShell(name, - "{} extension".format(ext_type), + conventions.extension_short_description(elem), fp, appbody, - sections=sections) + sections=sections, + tail_content=tail_content) + + # Restore leveloffset for boilerplate in refPageTail + if conventions.include_extension_appendix_in_refpage: + # The generated metadata include (refpage.extensionname.adoc) moved + # the leveloffset attribute by -1 to account for the relative + # structuring of the spec extension appendix section structure vs. + # the refpages. + # This restores leveloffset for the boilerplate in refPageTail. + leveloffset = 1 + else: + leveloffset = 0 + refPageTail(pageName=name, specType=None, specAnchor=name, seeAlso=seeAlsoList(name, declares), fp=fp, - auto=True) + auto=True, + leveloffset=leveloffset) fp.close() @@ -872,10 +967,9 @@ def genExtension(baseDir, extpath, name, info): results = parser.parse_args() - # Look for api.py in the specified directory - if results.genpath is not None: - sys.path.insert(0, results.genpath) - import api + # Load the generated apimap module + sys.path.insert(0, results.genpath) + import apimap as api setLogFile(True, True, results.logFile) setLogFile(True, False, results.diagFile) @@ -896,19 +990,21 @@ def genExtension(baseDir, extpath, name, info): d = genRef(file, baseDir) pages.update(d) - # Now figure out which pages *weren't* generated from the spec. + # Now figure out which pages were not generated from the spec. # This relies on the dictionaries of API constructs in the api module. if not results.noauto: - registry = Registry() + # Must have an apiname selected to avoid complaints from + # registry.loadFile, even though it is irrelevant to our uses. + genOpts = GeneratorOptions(apiname = conventions.xml_api_name) + registry = Registry(genOpts = genOpts) registry.loadFile(results.registry) if conventions.write_refpage_include: # Only extensions with a supported="..." attribute in this set # will be considered for extraction/generation. - supported_strings = set((conventions.xml_api_name,)) ext_names = set(k for k, v in registry.extdict.items() - if v.supported in supported_strings) + if conventions.xml_api_name in v.supported.split(',')) desired_extensions = ext_names.intersection(set(results.extension)) for prefix in conventions.extension_index_prefixes: @@ -943,19 +1039,19 @@ def genExtension(baseDir, extpath, name, info): (extensions, apiName + ' Extensions'), ] - # Summarize pages that weren't generated, for good or bad reasons + # Summarize pages that were not generated, for good or bad reasons for (apiDict, title) in sections: # OpenXR was keeping a 'flagged' state which only printed out a # warning for the first non-generated page, but was otherwise - # unused. This doesn't seem helpful. + # unused. This does not seem helpful. for page in apiDict: if page not in genDict: # Page was not generated - why not? if page in api.alias: - logWarn('(Benign, is an alias) Ref page for', title, page, 'is aliased into', api.alias[page]) + logDiag('(Benign, is an alias) Ref page for', title, page, 'is aliased into', api.alias[page]) elif page in api.flags and api.flags[page] is None: - logWarn('(Benign, no FlagBits defined) No ref page generated for ', title, + logDiag('(Benign, no FlagBits defined) No ref page generated for ', title, page) else: # Could introduce additional logic to detect diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 4d750876a..85d6ffcff 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -19,9 +19,8 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ + return """// Copyright 2017-2024 The Khronos Group. +// SPDX-License-Identifier: CC-BY-4.0 """ diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index 5843fe1da..b16faa42d 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -23,9 +23,8 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ + return """// Copyright 2017-2024 The Khronos Group. +// SPDX-License-Identifier: CC-BY-4.0 """ @@ -58,40 +57,58 @@ def ShortNote(name, added_in, deprecated_by): if added_in != "1.0" and deprecated_by != None: return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) -# Find feature groups that are parents of a feature/require/${entry_type} -# hierarchy, and then find all the ${entry_type} within each hierarchy: +# Find feature or extension groups that are parents of a or +# <${entry_type}> tag, and then find all the +# ${entry_type} within each hierarchy: def process_xml(spec, entry_type, note_printer): numberOfEntries = 0 numberOfNewEntries = 0 numberOfDeprecatedEntries = 0 - for feature in spec.findall('.//feature/require/%s/../..' % entry_type): - for entry in feature.findall('.//%s' % entry_type): - name = entry.get('name') - - numberOfEntries += 1 - added_in = feature.get('number') - deprecated_by = None - - # All the groups that this specific API ${entry_type} belongs. - categories = spec.findall( - './/require[@comment]/%s[@name="%s"]/..' % (entry_type, name)) - for category in categories: - comment = category.get('comment') - if "deprecated in OpenCL" in comment: - words = comment.split(" ") - assert " ".join(words[-4:-1]) == "deprecated in OpenCL" - assert deprecated_by == None # Can't deprecate something twice. - deprecated_by = words[-1] - - versionFileName = os.path.join(args.directory, name + ".asciidoc") - with open(versionFileName, 'w') as versionFile: - versionFile.write(GetHeader()) - versionFile.write(note_printer(name, added_in, deprecated_by)) - versionFile.write(GetFooter()) - - numberOfNewEntries += 0 if added_in == "1.0" else 1 - numberOfDeprecatedEntries += 0 if deprecated_by == None else 1 + # Track the APIs which have already had a version file written, to avoid + # a couple of cases like CL_DEPTH, which is required by both a core + # version and an extension. + seen_apis = set() + + for feature_type in [ 'feature', 'extension' ]: + for feature in spec.findall(f'.//{feature_type}/require/{entry_type}/../..'): + for entry in feature.findall(f'.//{entry_type}'): + name = entry.get('name') + deprecated_by = None + + numberOfEntries += 1 + if feature_type == 'feature': + added_in = feature.get('number') + + # All the groups that this specific API ${entry_type} belongs. + categories = spec.findall( + './/require[@comment]/%s[@name="%s"]/..' % (entry_type, name)) + for category in categories: + comment = category.get('comment') + if "deprecated in OpenCL" in comment: + words = comment.split(" ") + assert " ".join(words[-4:-1]) == "deprecated in OpenCL" + assert deprecated_by == None # Can't deprecate something twice. + deprecated_by = words[-1] + else: + if name in seen_apis: + print(f'WARNING: {name} exists as both a core version and extension API in the XML') + print('This is not currently handled correctly - only the core version dependency is noted') + continue + + # Extensions do not allow for deprecation + added_in = feature.get('name') + + seen_apis.add(name) + + versionFileName = os.path.join(args.directory, name + ".asciidoc") + with open(versionFileName, 'w') as versionFile: + versionFile.write(GetHeader()) + versionFile.write(note_printer(name, added_in, deprecated_by)) + versionFile.write(GetFooter()) + + numberOfNewEntries += 0 if added_in == "1.0" else 1 + numberOfDeprecatedEntries += 0 if deprecated_by == None else 1 print('Found ' + str(numberOfEntries) + ' API ' + entry_type + 's, ' + str(numberOfNewEntries) + " newer than 1.0, " diff --git a/scripts/gencl.py b/scripts/gencl.py index df7c74765..04f821ced 100755 --- a/scripts/gencl.py +++ b/scripts/gencl.py @@ -20,11 +20,9 @@ from pygenerator import PyOutputGenerator -from reflib import logDiag, logWarn, setLogFile +from reflib import logDiag, logWarn, logErr, setLogFile from reg import Registry - -from clconventions import OpenCLConventions as APIConventions - +from apiconventions import APIConventions # Simple timer functions startTime = None @@ -131,6 +129,11 @@ def makeGenOpts(args): # An API style conventions object conventions = APIConventions() + if args.apiname is not None: + defaultAPIName = args.apiname + else: + defaultAPIName = conventions.xml_api_name + # API include files for spec and ref pages # Overwrites include subdirectories in spec source tree # The generated include files do not include the calling convention @@ -145,7 +148,7 @@ def makeGenOpts(args): filename = 'timeMarker', directory = directory, genpath = genpath, - apiname = 'opencl', + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = featuresPat, @@ -163,14 +166,14 @@ def makeGenOpts(args): # Python representation of API information, used by scripts that # don't need to load the full XML. - genOpts['api.py'] = [ + genOpts['apimap.py'] = [ PyOutputGenerator, DocGeneratorOptions( conventions = conventions, - filename = 'api.py', + filename = 'apimap.py', directory = directory, - genpath = genpath, - apiname = 'opencl', + genpath = None, + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = featuresPat, @@ -181,6 +184,7 @@ def makeGenOpts(args): reparentEnums = False) ] + # Extension metainformation for spec extension appendices # Includes all extensions by default, but only so that the generated # 'promoted_extensions_*' files refer to all extensions that were @@ -192,7 +196,7 @@ def makeGenOpts(args): filename = 'timeMarker', directory = directory, genpath = None, - apiname = 'opencl', + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = None, @@ -202,70 +206,6 @@ def makeGenOpts(args): emitExtensions = emitExtensionsPat) ] - # Platform extensions, in their own header files - # Each element of the platforms[] array defines information for - # generating a single platform: - # [0] is the generated header file name - # [1] is the set of platform extensions to generate - # [2] is additional extensions whose interfaces should be considered, - # but suppressed in the output, to avoid duplicate definitions of - # dependent types like VkDisplayKHR and VkSurfaceKHR which come from - # non-platform extensions. - - # Track all platform extensions, for exclusion from vulkan_core.h - allPlatformExtensions = [] - - # # Extensions suppressed for all platforms. - # # Covers common WSI extension types. - # commonSuppressExtensions = [ 'VK_KHR_display', 'VK_KHR_swapchain' ] - # - # platforms = [ - # [ 'vulkan_android.h', [ 'VK_KHR_android_surface', - # 'VK_ANDROID_external_memory_android_hardware_buffer' - # ], commonSuppressExtensions ], - # [ 'vulkan_fuchsia.h', [ 'VK_FUCHSIA_imagepipe_surface'], commonSuppressExtensions ], - # [ 'vulkan_ios.h', [ 'VK_MVK_ios_surface' ], commonSuppressExtensions ], - # [ 'vulkan_macos.h', [ 'VK_MVK_macos_surface' ], commonSuppressExtensions ], - # [ 'vulkan_vi.h', [ 'VK_NN_vi_surface' ], commonSuppressExtensions ], - # [ 'vulkan_wayland.h', [ 'VK_KHR_wayland_surface' ], commonSuppressExtensions ], - # [ 'vulkan_win32.h', [ 'VK_.*_win32(|_.*)' ], commonSuppressExtensions + [ 'VK_KHR_external_semaphore', 'VK_KHR_external_memory_capabilities', 'VK_KHR_external_fence', 'VK_KHR_external_fence_capabilities', 'VK_NV_external_memory_capabilities' ] ], - # [ 'vulkan_xcb.h', [ 'VK_KHR_xcb_surface' ], commonSuppressExtensions ], - # [ 'vulkan_xlib.h', [ 'VK_KHR_xlib_surface' ], commonSuppressExtensions ], - # [ 'vulkan_xlib_xrandr.h', [ 'VK_EXT_acquire_xlib_display' ], commonSuppressExtensions ], - # ] - # - # for platform in platforms: - # headername = platform[0] - # - # allPlatformExtensions += platform[1] - # - # addPlatformExtensionsRE = makeREstring(platform[1] + platform[2]) - # emitPlatformExtensionsRE = makeREstring(platform[1]) - # - # opts = CGeneratorOptions( - # filename = headername, - # directory = directory, - # apiname = 'vulkan', - # profile = None, - # versions = featuresPat, - # emitversions = None, - # defaultExtensions = None, - # addExtensions = addPlatformExtensionsRE, - # removeExtensions = None, - # emitExtensions = emitPlatformExtensionsRE, - # prefixText = prefixStrings + clPrefixStrings, - # genFuncPointers = True, - # protectFile = protectFile, - # protectFeature = False, - # protectProto = '#ifndef', - # protectProtoStr = 'VK_NO_PROTOTYPES', - # apicall = 'VKAPI_ATTR ', - # apientry = 'VKAPI_CALL ', - # apientryp = 'VKAPI_PTR *', - # alignFuncParam = 0) - # - # genOpts[headername] = [ COutputGenerator, opts ] - # Header for core API + extensions. # To generate just the core API, # change to 'defaultExtensions = None' below. @@ -274,8 +214,8 @@ def makeGenOpts(args): # It removes all platform extensions (from the platform headers options # constructed above) as well as any explicitly specified removals. - removeExtensionsPat = makeREstring( - allPlatformExtensions + removeExtensions, None, strings_are_regex=True) + removeExtensionsPat = makeREstring(removeExtensions, None, + strings_are_regex=True) genOpts['cl.h'] = [ COutputGenerator, @@ -284,7 +224,7 @@ def makeGenOpts(args): filename = 'cl.h', directory = directory, genpath = None, - apiname = 'opencl', + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = featuresPat, @@ -353,8 +293,11 @@ def genTarget(args): if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument('-apiname', action='store', + default=None, + help='Specify API to generate (defaults to repository-specific conventions object value)') parser.add_argument('-defaultExtensions', action='store', - default='opencl', + default=APIConventions().xml_api_name, help='Specify a single class of extensions to add to targets') parser.add_argument('-extension', action='append', default=[], @@ -425,10 +368,12 @@ def genTarget(args): else: diag = None - (gen, options) = (None, None) - if not args.validate: - # Create the API generator & generator options - (gen, options) = genTarget(args) + if args.time: + # Log diagnostics and warnings + setLogFile(setDiag = True, setWarn = True, filename = '-') + + # Create the API generator & generator options + (gen, options) = genTarget(args) # Create the registry object with the specified generator and generator # options. The options are set before XML loading as they may affect it. @@ -444,10 +389,6 @@ def genTarget(args): reg.loadElementTree(tree) endTimer(args.time, '* Time to parse ElementTree =') - if args.validate: - success = reg.validateRegistry() - sys.exit(0 if success else 1) - if args.dump: logDiag('* Dumping registry to regdump.txt') reg.dumpReg(filehandle=open('regdump.txt', 'w', encoding='utf-8')) diff --git a/scripts/generator.py b/scripts/generator.py index a5b648b98..dea2ffa37 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -17,7 +17,7 @@ try: from pathlib import Path except ImportError: - from pathlib2 import Path + from pathlib2 import Path # type: ignore from spec_tools.util import getElemName, getElemType @@ -43,7 +43,10 @@ def enquote(s): """Return string argument with surrounding quotes, for serialization into Python code.""" if s: - return "'{}'".format(s) + if isinstance(s, str): + return f"'{s}'" + else: + return s return None @@ -52,14 +55,17 @@ def regSortCategoryKey(feature): Sorts by category of the feature name string: - Core API features (those defined with a `` tag) + - (sort VKSC after VK - this is Vulkan-specific) - ARB/KHR/OES (Khronos extensions) - other (EXT/vendor extensions)""" if feature.elem.tag == 'feature': - return 0 - if (feature.category == 'ARB' - or feature.category == 'KHR' - or feature.category == 'OES'): + if feature.name.startswith('VKSC'): + return 0.5 + else: + return 0 + + if feature.category.upper() in ['ARB', 'KHR', 'OES']: return 1 return 2 @@ -68,10 +74,15 @@ def regSortCategoryKey(feature): def regSortOrderKey(feature): """Sort key for regSortFeatures - key is the sortorder attribute.""" - # print("regSortOrderKey {} -> {}".format(feature.name, feature.sortorder)) return feature.sortorder +def regSortNameKey(feature): + """Sort key for regSortFeatures - key is the extension name.""" + + return feature.name + + def regSortFeatureVersionKey(feature): """Sort key for regSortFeatures - key is the feature version. `` elements all have version number 0.""" @@ -99,6 +110,36 @@ def regSortFeatures(featureList): featureList.sort(key=regSortOrderKey) +class MissingGeneratorOptionsError(RuntimeError): + """Error raised when a Generator tries to do something that requires GeneratorOptions but it is None.""" + + def __init__(self, msg=None): + full_msg = 'Missing generator options object self.genOpts' + if msg: + full_msg += ': ' + msg + super().__init__(full_msg) + + +class MissingRegistryError(RuntimeError): + """Error raised when a Generator tries to do something that requires a Registry object but it is None.""" + + def __init__(self, msg=None): + full_msg = 'Missing Registry object self.registry' + if msg: + full_msg += ': ' + msg + super().__init__(full_msg) + + +class MissingGeneratorOptionsConventionsError(RuntimeError): + """Error raised when a Generator tries to do something that requires a Conventions object but it is None.""" + + def __init__(self, msg=None): + full_msg = 'Missing Conventions object self.genOpts.conventions' + if msg: + full_msg += ': ' + msg + super().__init__(full_msg) + + class GeneratorOptions: """Base class for options used during header/documentation production. @@ -111,6 +152,7 @@ def __init__(self, directory='.', genpath=None, apiname=None, + mergeApiNames=None, profile=None, versions='.*', emitversions='.*', @@ -119,8 +161,12 @@ def __init__(self, removeExtensions=None, emitExtensions=None, emitSpirv=None, + emitFormats=None, reparentEnums=True, - sortProcedure=regSortFeatures): + sortProcedure=regSortFeatures, + requireCommandAliases=False, + requireDepends=True, + ): """Constructor. Arguments: @@ -128,9 +174,11 @@ def __init__(self, - conventions - may be mandatory for some generators: an object that implements ConventionsBase - filename - basename of file to generate, or None to write to stdout. - - directory - directory in which to generate files - - genpath - path to previously generated files, such as api.py + - directory - directory in which to generate filename + - genpath - path to previously generated files, such as apimap.py - apiname - string matching `` 'apiname' attribute, e.g. 'gl'. + - mergeApiNames - If not None, a comma separated list of API names + to merge into the API specified by 'apiname' - profile - string specifying API profile , e.g. 'core', or None. - versions - regex matching API versions to process interfaces for. Normally `'.*'` or `'[0-9][.][0-9]'` to match all defined versions. @@ -148,10 +196,11 @@ def __init__(self, to None. - emitExtensions - regex matching names of extensions to actually emit interfaces for (though all requested versions are considered when - deciding which interfaces to generate). - to None. + deciding which interfaces to generate). Defaults to None. - emitSpirv - regex matching names of extensions and capabilities to actually emit interfaces for. + - emitFormats - regex matching names of formats to actually emit + interfaces for. - reparentEnums - move elements which extend an enumerated type from or elements to the target element. This is required for almost all purposes, but the @@ -159,9 +208,16 @@ def __init__(self, or being complete. Defaults to True. - sortProcedure - takes a list of FeatureInfo objects and sorts them in place to a preferred order in the generated output. - Default is core API versions, ARB/KHR/OES extensions, all other - extensions, by core API version number or extension number in each - group. + - requireCommandAliases - if True, treat command aliases + as required dependencies. + - requireDepends - whether to follow API dependencies when emitting + APIs. + + Default is + - core API versions + - Khronos (ARB/KHR/OES) extensions + - All other extensions + - By core API version number or extension number in each group. The regex patterns can be None or empty, in which case they match nothing.""" @@ -173,7 +229,7 @@ def __init__(self, "basename of file to generate, or None to write to stdout." self.genpath = genpath - """path to previously generated files, such as api.py""" + """path to previously generated files, such as apimap.py""" self.directory = directory "directory in which to generate filename" @@ -181,6 +237,9 @@ def __init__(self, self.apiname = apiname "string matching `` 'apiname' attribute, e.g. 'gl'." + self.mergeApiNames = mergeApiNames + "comma separated list of API names to merge into the API specified by 'apiname'" + self.profile = profile "string specifying API profile , e.g. 'core', or None." @@ -217,6 +276,10 @@ def __init__(self, """regex matching names of extensions and capabilities to actually emit interfaces for.""" + self.emitFormats = self.emptyRegex(emitFormats) + """regex matching names of formats + to actually emit interfaces for.""" + self.reparentEnums = reparentEnums """boolean specifying whether to remove elements from or when extending an type.""" @@ -230,6 +293,16 @@ def __init__(self, self.codeGenerator = False """True if this generator makes compilable code""" + self.registry = None + """Populated later with the registry object.""" + + self.requireCommandAliases = requireCommandAliases + """True if alias= attributes of tags are transitively + required.""" + + self.requireDepends = requireDepends + """True if dependencies of API tags are transitively required.""" + def emptyRegex(self, pat): """Substitute a regular expression which matches no version or extension names for None or the empty string.""" @@ -257,6 +330,17 @@ class OutputGenerator: 'basetype': 'basetypes', } + def breakName(self, name, msg): + """Break into debugger if this is a special name""" + + # List of string names to break on + bad = ( + ) + + if name in bad and True: + print('breakName {}: {}'.format(name, msg)) + pdb.set_trace() + def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout): """Constructor @@ -268,9 +352,17 @@ def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout) self.diagFile = diagFile # Internal state self.featureName = None + """The current feature name being generated.""" + self.genOpts = None + """The GeneratorOptions subclass instance.""" + self.registry = None + """The specification registry object.""" + self.featureDictionary = {} + """The dictionary of dictionaries of API features.""" + # Used for extension enum value generation self.extBase = 1000000000 self.extBlockSize = 1000 @@ -280,6 +372,9 @@ def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout) # derived generators. self.apidict = None + # File suffix for generated files, set in beginFile below. + self.file_suffix = '' + def logMsg(self, level, *args): """Write a message of different categories to different destinations. @@ -306,9 +401,17 @@ def logMsg(self, level, *args): raise UserWarning( '*** FATAL ERROR in Generator.logMsg: unknown level:' + level) - def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): + def enumToValue(self, elem, needsNum, bitwidth = 32, + forceSuffix = False, parent_for_alias_dereference=None): """Parse and convert an `` tag into a value. + - elem - Element + - needsNum - generate a numeric representation of the element value + - bitwidth - size of the numeric representation in bits (32 or 64) + - forceSuffix - if True, always use a 'U' / 'ULL' suffix on integers + - parent_for_alias_dereference - if not None, an Element containing + the parent of elem, used to look for elements this is an alias of + Returns a list: - first element - integer representation of the value, or None @@ -330,6 +433,11 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): - An 'alias' attribute contains the name of another enum which this is an alias of. The other enum must be declared first when emitting this enum.""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() + name = elem.get('name') numVal = None if 'value' in elem.keys(): @@ -337,7 +445,7 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): # print('About to translate value =', value, 'type =', type(value)) if needsNum: numVal = int(value, 0) - # If there's a non-integer, numeric 'type' attribute (e.g. 'u' or + # If there is a non-integer, numeric 'type' attribute (e.g. 'u' or # 'ull'), append it to the string value. # t = enuminfo.elem.get('type') # if t is not None and t != '' and t != 'i' and t != 's': @@ -354,7 +462,7 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): bitpos = int(value, 0) numVal = 1 << bitpos value = '0x%08x' % numVal - if bitwidth == 64: + if bitwidth == 64 or bitpos >= 32: value = value + 'ULL' elif forceSuffix: value = value + 'U' @@ -381,7 +489,15 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): self.logMsg('diag', 'Enum', name, '-> offset [', numVal, ',', value, ']') return [numVal, value] if 'alias' in elem.keys(): - return [None, elem.get('alias')] + alias_of = elem.get('alias') + if parent_for_alias_dereference is None: + return (None, alias_of) + siblings = parent_for_alias_dereference.findall('enum') + for sib in siblings: + sib_name = sib.get('name') + if sib_name == alias_of: + return self.enumToValue(sib, needsNum) + raise RuntimeError("Could not find the aliased enum value") return [None, None] def checkDuplicateEnums(self, enums): @@ -418,7 +534,7 @@ def checkDuplicateEnums(self, enums): + ') found with different values:' + strVal + ' and ' + strVal2) - # Don't add the duplicate to the returned list + # Do not add the duplicate to the returned list continue elif numVal in valueMap: # Duplicate value found (such as an alias); report it, but @@ -448,6 +564,11 @@ def misracppstyle(self): def buildEnumCDecl(self, expand, groupinfo, groupName): """Generate the C declaration for an enum""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() + groupElem = groupinfo.elem # Determine the required bit width for the enum group. @@ -519,7 +640,7 @@ def buildEnumCDecl_BitmaskOrDefine(self, groupinfo, groupName, bitwidth, usedefi # Accumulate non-numeric enumerant values separately and append # them following the numeric values, to allow for aliases. - # NOTE: this doesn't do a topological sort yet, so aliases of + # NOTE: this does not do a topological sort yet, so aliases of # aliases can still get in the wrong order. aliasText = '' @@ -553,7 +674,10 @@ def buildEnumCDecl_BitmaskOrDefine(self, groupinfo, groupName, bitwidth, usedefi # Work around this by chasing the aliases to get the actual value. while numVal is None: alias = self.registry.tree.find("enums/enum[@name='" + strVal + "']") - (numVal, strVal) = self.enumToValue(alias, True, bitwidth, True) + if alias is not None: + (numVal, strVal) = self.enumToValue(alias, True, bitwidth, True) + else: + self.logMsg('error', 'No such alias {} for enum {}'.format(strVal, name)) decl += "static const {} {} = {};\n".format(flagTypeName, name, strVal) if numVal is not None: @@ -596,7 +720,6 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): maxValidValue = 2**(32 - 1) - 1 minValidValue = (maxValidValue * -1) - 1 - # Get a list of nested 'enum' tags. enums = groupElem.findall('enum') @@ -612,10 +735,13 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): # Accumulate non-numeric enumerant values separately and append # them following the numeric values, to allow for aliases. - # NOTE: this doesn't do a topological sort yet, so aliases of + # NOTE: this does not do a topological sort yet, so aliases of # aliases can still get in the wrong order. aliasText = [] + maxName = None + minValue = None + maxValue = None for elem in enums: # Convert the value to an integer and use that to track min/max. # Values of form -(number) are accepted but nothing more complex. @@ -651,15 +777,15 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): self.logMsg('error', 'Allowable range for C enum types is [', minValidValue, ',', maxValidValue, '], but', name, 'has a value outside of this (', strVal, ')\n') exit(1) - # Don't track min/max for non-numbers (numVal is None) + # Do not track min/max for non-numbers (numVal is None) if isEnum and numVal is not None and elem.get('extends') is None: if minName is None: minName = maxName = name minValue = maxValue = numVal - elif numVal < minValue: + elif minValue is None or numVal < minValue: minName = name minValue = numVal - elif numVal > maxValue: + elif maxValue is None or numVal > maxValue: maxName = name maxValue = numVal @@ -668,17 +794,15 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): # Generate min/max value tokens - legacy use case. if isEnum and expand: - body.extend((" {}_BEGIN_RANGE{} = {},".format(expandPrefix, expandSuffix, minName), - " {}_END_RANGE{} = {},".format( - expandPrefix, expandSuffix, maxName), - " {}_RANGE_SIZE{} = ({} - {} + 1),".format(expandPrefix, expandSuffix, maxName, minName))) + body.extend((f' {expandPrefix}_BEGIN_RANGE{expandSuffix} = {minName},', + f' {expandPrefix}_END_RANGE{expandSuffix} = {maxName},', + f' {expandPrefix}_RANGE_SIZE{expandSuffix} = ({maxName} - {minName} + 1),')) # Generate a range-padding value to ensure the enum is 32 bits, but - # only in code generators, so it doesn't appear in documentation + # only in code generators, so it does not appear in documentation if (self.genOpts.codeGenerator or self.conventions.generate_max_enum_in_docs): - body.append(" {}_MAX_ENUM{} = 0x7FFFFFFF".format( - expandPrefix, expandSuffix)) + body.append(f' {expandPrefix}_MAX_ENUM{expandSuffix} = 0x7FFFFFFF') # Postfix body.append("} %s;" % groupName) @@ -747,19 +871,25 @@ def makeDir(self, path): def beginFile(self, genOpts): """Start a new interface file - - genOpts - GeneratorOptions controlling what's generated and how""" + - genOpts - GeneratorOptions controlling what is generated and how""" + self.genOpts = genOpts + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() self.should_insert_may_alias_macro = \ self.genOpts.conventions.should_insert_may_alias_macro(self.genOpts) + self.file_suffix = self.genOpts.conventions.file_suffix - # Try to import the API dictionary, api.py, if it exists. Nothing in - # api.py cannot be extracted directly from the XML, and in the + # Try to import the API dictionary, apimap.py, if it exists. Nothing + # in apimap.py cannot be extracted directly from the XML, and in the # future we should do that. if self.genOpts.genpath is not None: try: sys.path.insert(0, self.genOpts.genpath) - import api - self.apidict = api + import apimap + self.apidict = apimap except ImportError: self.apidict = None @@ -778,19 +908,23 @@ def endFile(self): self.warnFile.flush() if self.diagFile: self.diagFile.flush() - self.outFile.flush() - if self.outFile != sys.stdout and self.outFile != sys.stderr: - self.outFile.close() - - # On successfully generating output, move the temporary file to the - # target file. - if self.genOpts.filename is not None: - if sys.platform == 'win32': - directory = Path(self.genOpts.directory) - if not Path.exists(directory): - os.makedirs(directory) - shutil.copy(self.outFile.name, self.genOpts.directory + '/' + self.genOpts.filename) - os.remove(self.outFile.name) + if self.outFile: + self.outFile.flush() + if self.outFile != sys.stdout and self.outFile != sys.stderr: + self.outFile.close() + + if self.genOpts is None: + raise MissingGeneratorOptionsError() + + # On successfully generating output, move the temporary file to the + # target file. + if self.genOpts.filename is not None: + if sys.platform == 'win32': + directory = Path(self.genOpts.directory) + if not Path.exists(directory): + os.makedirs(directory) + shutil.copy(self.outFile.name, self.genOpts.directory + '/' + self.genOpts.filename) + os.remove(self.outFile.name) self.genOpts = None def beginFeature(self, interface, emit): @@ -800,7 +934,7 @@ def beginFeature(self, interface, emit): - emit - actually write to the header only when True""" self.emit = emit self.featureName = interface.get('name') - # If there's an additional 'protect' attribute in the feature, save it + # If there is an additional 'protect' attribute in the feature, save it self.featureExtraProtect = interface.get('protect') def endFeature(self): @@ -812,7 +946,7 @@ def endFeature(self): def genRequirements(self, name, mustBeFound = True): """Generate text showing what core versions and extensions introduce - an API. This exists in the base Generator class because it's used by + an API. This exists in the base Generator class because it is used by the shared enumerant-generating interfaces (buildEnumCDecl, etc.). Here it returns an empty string for most generators, but can be overridden by e.g. DocGenerator. @@ -825,7 +959,7 @@ def genRequirements(self, name, mustBeFound = True): return '' def validateFeature(self, featureType, featureName): - """Validate we're generating something only inside a `` tag""" + """Validate we are generating something only inside a `` tag""" if self.featureName is None: raise UserWarning('Attempt to generate', featureType, featureName, 'when not in feature') @@ -887,16 +1021,52 @@ def genSpirv(self, spirv, spirvinfo, alias): Extend to generate as desired in your derived class.""" return + def genFormat(self, format, formatinfo, alias): + """Generate interface for a format element. + + - formatinfo - FormatInfo + + Extend to generate as desired in your derived class.""" + return + + def genSyncStage(self, stageinfo): + """Generate interface for a sync stage element. + + - stageinfo - SyncStageInfo + + Extend to generate as desired in your derived class.""" + return + + def genSyncAccess(self, accessinfo): + """Generate interface for a sync stage element. + + - accessinfo - AccessInfo + + Extend to generate as desired in your derived class.""" + return + + def genSyncPipeline(self, pipelineinfo): + """Generate interface for a sync stage element. + + - pipelineinfo - SyncPipelineInfo + + Extend to generate as desired in your derived class.""" + return + def makeProtoName(self, name, tail): """Turn a `` `` into C-language prototype and typedef declarations for that name. - name - contents of `` tag - tail - whatever text follows that tag in the Element""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() return self.genOpts.apientry + name + tail def makeTypedefName(self, name, tail): """Make the function-pointer typedef name for a command.""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() return '(' + self.genOpts.apientryp + 'PFN_' + name + tail + ')' def makeCParamDecl(self, param, aligncol): @@ -907,6 +1077,10 @@ def makeCParamDecl(self, param, aligncol): - param - Element (`` or ``) to format - aligncol - if non-zero, attempt to align the nested `` element at this column""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() indent = ' ' paramdecl = indent prefix = noneStr(param.text) @@ -940,8 +1114,6 @@ def makeCParamDecl(self, param, aligncol): # Clear prefix for subsequent iterations prefix = '' - # If prefix was originally non-empty and the param has no elements - # (e.g. is nothing but text), preserve it. paramdecl = paramdecl + prefix if aligncol == 0: @@ -955,6 +1127,10 @@ def getCParamTypeLength(self, param): or structure/union member). - param - Element (`` or ``) to identify""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() # Allow for missing tag newLen = 0 @@ -985,6 +1161,9 @@ def getMaxCParamTypeLength(self, info): def getHandleParent(self, typename): """Get the parent of a handle object.""" + if self.registry is None: + raise MissingRegistryError() + info = self.registry.typedict.get(typename) if info is None: return None @@ -1008,6 +1187,9 @@ def getHandleAncestors(self, typename): def getTypeCategory(self, typename): """Get the category of a type.""" + if self.registry is None: + raise MissingRegistryError() + info = self.registry.typedict.get(typename) if info is None: return None @@ -1018,10 +1200,12 @@ def getTypeCategory(self, typename): return None def isStructAlwaysValid(self, structname): - """Try to do check if a structure is always considered valid (i.e. there's no rules to its acceptance).""" + """Try to do check if a structure is always considered valid (i.e. there is no rules to its acceptance).""" # A conventions object is required for this call. if not self.conventions: raise RuntimeError("To use isStructAlwaysValid, be sure your options include a Conventions object.") + if self.registry is None: + raise MissingRegistryError() if self.conventions.type_always_valid(structname): return True @@ -1031,7 +1215,8 @@ def isStructAlwaysValid(self, structname): return False info = self.registry.typedict.get(structname) - assert(info is not None) + if info is None: + self.logMsg('error', f'isStructAlwaysValid({structname}) - structure not found in typedict') members = info.getMembers() @@ -1063,6 +1248,21 @@ def isStructAlwaysValid(self, structname): return True + def paramIsArray(self, param): + """Check if the parameter passed in is a pointer to an array. + + param the XML information for the param + """ + return param.get('len') is not None + + def paramIsPointer(self, param): + """Check if the parameter passed in is a pointer. + + param the XML information for the param + """ + tail = param.find('type').tail + return tail is not None and '*' in tail + def isEnumRequired(self, elem): """Return True if this `` element is required, False otherwise @@ -1098,6 +1298,8 @@ def makeCDecls(self, cmd): `` Element, as a two-element list of strings. - cmd - Element containing a `` tag""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() proto = cmd.find('proto') params = cmd.findall('param') # Begin accumulating prototype and typedef strings @@ -1114,7 +1316,7 @@ def makeCDecls(self, cmd): # Leading text pdecl += noneStr(proto.text) tdecl += noneStr(proto.text) - # For each child element, if it's a wrap in appropriate + # For each child element, if it is a wrap in appropriate # declaration. Otherwise append its contents and tail contents. for elem in proto: text = noneStr(elem.text) diff --git a/scripts/parse_dependency.py b/scripts/parse_dependency.py new file mode 100755 index 000000000..5d204959c --- /dev/null +++ b/scripts/parse_dependency.py @@ -0,0 +1,403 @@ +#!/usr/bin/python3 + +# Copyright 2022-2024 The Khronos Group Inc. +# Copyright 2003-2019 Paul McGuire +# SPDX-License-Identifier: MIT + +# apirequirements.py - parse 'depends' expressions in API XML +# Supported methods: +# dependency - the expression string +# +# evaluateDependency(dependency, isSupported) evaluates the expression, +# returning a boolean result. isSupported takes an extension or version name +# string and returns a boolean. +# +# dependencyLanguage(dependency) returns an English string equivalent +# to the expression, suitable for header file comments. +# +# dependencyNames(dependency) returns a set of the extension and +# version names in the expression. +# +# dependencyMarkup(dependency) returns a string containing asciidoctor +# markup for English equivalent to the expression, suitable for extension +# appendices. +# +# All may throw a ParseException if the expression cannot be parsed or is +# not completely consumed by parsing. + +# Supported expressions at present: +# - extension names +# - '+' as AND connector +# - ',' as OR connector +# - parenthesization for grouping + +# Based on https://github.com/pyparsing/pyparsing/blob/master/examples/fourFn.py + +from pyparsing import ( + Literal, + Word, + Group, + Forward, + alphas, + alphanums, + Regex, + ParseException, + CaselessKeyword, + Suppress, + delimitedList, + infixNotation, +) +import math +import operator +import pyparsing as pp +import re + +from apiconventions import APIConventions as APIConventions +conventions = APIConventions() + +def markupPassthrough(name): + """Pass a name (leaf or operator) through without applying markup""" + return name + +def leafMarkupAsciidoc(name): + """Markup a leaf name as an asciidoc link to an API version or extension + anchor. + + - name - version or extension name""" + + return conventions.formatVersionOrExtension(name) + +def leafMarkupC(name): + """Markup a leaf name as a C expression, using conventions of the + Vulkan Validation Layers + + - name - version or extension name""" + + (apivariant, major, minor) = apiVersionNameMatch(name) + + if apivariant is not None: + return name + else: + return f'ext.{name}' + +opMarkupAsciidocMap = { '+' : 'and', ',' : 'or' } + +def opMarkupAsciidoc(op): + """Markup an operator as an asciidoc spec markup equivalent + + - op - operator ('+' or ',')""" + + return opMarkupAsciidocMap[op] + +opMarkupCMap = { '+' : '&&', ',' : '||' } + +def opMarkupC(op): + """Markup an operator as a C language equivalent + + - op - operator ('+' or ',')""" + + return opMarkupCMap[op] + + +# Unfortunately global to be used in pyparsing +exprStack = [] + +def push_first(toks): + """Push a token on the global stack + + - toks - first element is the token to push""" + + exprStack.append(toks[0]) + +# An identifier (version or extension name) +dependencyIdent = Word(alphanums + '_') + +# Infix expression for depends expressions +dependencyExpr = pp.infixNotation(dependencyIdent, + [ (pp.oneOf(', +'), 2, pp.opAssoc.LEFT), ]) + +# BNF grammar for depends expressions +_bnf = None +def dependencyBNF(): + """ + boolop :: '+' | ',' + extname :: Char(alphas) + atom :: extname | '(' expr ')' + expr :: atom [ boolop atom ]* + """ + global _bnf + if _bnf is None: + and_, or_ = map(Literal, '+,') + lpar, rpar = map(Suppress, '()') + boolop = and_ | or_ + + expr = Forward() + expr_list = delimitedList(Group(expr)) + atom = ( + boolop[...] + + ( + (dependencyIdent).setParseAction(push_first) + | Group(lpar + expr + rpar) + ) + ) + + expr <<= atom + (boolop + atom).setParseAction(push_first)[...] + _bnf = expr + return _bnf + + +# map operator symbols to corresponding arithmetic operations +_opn = { + '+': operator.and_, + ',': operator.or_, +} + +def evaluateStack(stack, isSupported): + """Evaluate an expression stack, returning a boolean result. + + - stack - the stack + - isSupported - function taking a version or extension name string and + returning True or False if that name is supported or not.""" + + op, num_args = stack.pop(), 0 + if isinstance(op, tuple): + op, num_args = op + + if op in '+,': + # Note: operands are pushed onto the stack in reverse order + op2 = evaluateStack(stack, isSupported) + op1 = evaluateStack(stack, isSupported) + return _opn[op](op1, op2) + elif op[0].isalpha(): + return isSupported(op) + else: + raise Exception(f'invalid op: {op}') + +def evaluateDependency(dependency, isSupported): + """Evaluate a dependency expression, returning a boolean result. + + - dependency - the expression + - isSupported - function taking a version or extension name string and + returning True or False if that name is supported or not.""" + + global exprStack + exprStack = [] + results = dependencyBNF().parseString(dependency, parseAll=True) + val = evaluateStack(exprStack[:], isSupported) + return val + +def evalDependencyLanguage(stack, leafMarkup, opMarkup, parenthesize, root): + """Evaluate an expression stack, returning an English equivalent + + - stack - the stack + - leafMarkup, opMarkup, parenthesize - same as dependencyLanguage + - root - True only if this is the outer (root) expression level""" + + op, num_args = stack.pop(), 0 + if isinstance(op, tuple): + op, num_args = op + if op in '+,': + # Could parenthesize, not needed yet + rhs = evalDependencyLanguage(stack, leafMarkup, opMarkup, parenthesize, root = False) + opname = opMarkup(op) + lhs = evalDependencyLanguage(stack, leafMarkup, opMarkup, parenthesize, root = False) + if parenthesize and not root: + return f'({lhs} {opname} {rhs})' + else: + return f'{lhs} {opname} {rhs}' + elif op[0].isalpha(): + # This is an extension or feature name + return leafMarkup(op) + else: + raise Exception(f'invalid op: {op}') + +def dependencyLanguage(dependency, leafMarkup, opMarkup, parenthesize): + """Return an API dependency expression translated to a form suitable for + asciidoctor conditionals or header file comments. + + - dependency - the expression + - leafMarkup - function taking an extension / version name and + returning an equivalent marked up version + - opMarkup - function taking an operator ('+' / ',') name name and + returning an equivalent marked up version + - parenthesize - True if parentheses should be used in the resulting + expression, False otherwise""" + + global exprStack + exprStack = [] + results = dependencyBNF().parseString(dependency, parseAll=True) + return evalDependencyLanguage(exprStack, leafMarkup, opMarkup, parenthesize, root = True) + +# aka specmacros = False +def dependencyLanguageComment(dependency): + """Return dependency expression translated to a form suitable for + comments in headers of emitted C code, as used by the + docgenerator.""" + return dependencyLanguage(dependency, leafMarkup = markupPassthrough, opMarkup = opMarkupAsciidoc, parenthesize = True) + +# aka specmacros = True +def dependencyLanguageSpecMacros(dependency): + """Return dependency expression translated to a form suitable for + comments in headers of emitted C code, as used by the + interfacegenerator.""" + return dependencyLanguage(dependency, leafMarkup = leafMarkupAsciidoc, opMarkup = opMarkupAsciidoc, parenthesize = False) + +def dependencyLanguageC(dependency): + """Return dependency expression translated to a form suitable for + use in C expressions""" + return dependencyLanguage(dependency, leafMarkup = leafMarkupC, opMarkup = opMarkupC, parenthesize = True) + +def evalDependencyNames(stack): + """Evaluate an expression stack, returning the set of extension and + feature names used in the expression. + + - stack - the stack""" + + op, num_args = stack.pop(), 0 + if isinstance(op, tuple): + op, num_args = op + if op in '+,': + # Do not evaluate the operation. We only care about the names. + return evalDependencyNames(stack) | evalDependencyNames(stack) + elif op[0].isalpha(): + return { op } + else: + raise Exception(f'invalid op: {op}') + +def dependencyNames(dependency): + """Return a set of the extension and version names in an API dependency + expression. Used when determining transitive dependencies for spec + generation with specific extensions included. + + - dependency - the expression""" + + global exprStack + exprStack = [] + results = dependencyBNF().parseString(dependency, parseAll=True) + # print(f'names(): stack = {exprStack}') + return evalDependencyNames(exprStack) + +def markupTraverse(expr, level = 0, root = True): + """Recursively process a dependency in infix form, transforming it into + asciidoctor markup with expression nesting indicated by indentation + level. + + - expr - expression to process + - level - indentation level to render expression at + - root - True only on initial call""" + + if level > 0: + prefix = '{nbsp}{nbsp}' * level * 2 + ' ' + else: + prefix = '' + str = '' + + for elem in expr: + if isinstance(elem, pp.ParseResults): + if not root: + nextlevel = level + 1 + else: + # Do not indent the outer expression + nextlevel = level + + str = str + markupTraverse(elem, level = nextlevel, root = False) + elif elem in ('+', ','): + str = str + f'{prefix}{opMarkupAsciidoc(elem)} +\n' + else: + str = str + f'{prefix}{leafMarkupAsciidoc(elem)} +\n' + + return str + +def dependencyMarkup(dependency): + """Return asciidoctor markup for a human-readable equivalent of an API + dependency expression, suitable for use in extension appendix + metadata. + + - dependency - the expression""" + + parsed = dependencyExpr.parseString(dependency) + return markupTraverse(parsed) + +if __name__ == "__main__": + for str in [ 'VK_VERSION_1_0', 'cl_khr_extension_name', 'XR_VERSION_3_2', 'CL_VERSION_1_0' ]: + print(f'{str} -> {conventions.formatVersionOrExtension(str)}') + import sys + sys.exit(0) + + termdict = { + 'VK_VERSION_1_1' : True, + 'false' : False, + 'true' : True, + } + termSupported = lambda name: name in termdict and termdict[name] + + def test(dependency, expected): + val = False + try: + val = evaluateDependency(dependency, termSupported) + except ParseException as pe: + print(dependency, f'failed parse: {dependency}') + except Exception as e: + print(dependency, f'failed eval: {dependency}') + + if val == expected: + True + # print(f'{dependency} = {val} (as expected)') + else: + print(f'{dependency} ERROR: {val} != {expected}') + + # Verify expressions are evaluated left-to-right + + test('false,false+false', False) + test('false,false+true', False) + test('false,true+false', False) + test('false,true+true', True) + test('true,false+false', False) + test('true,false+true', True) + test('true,true+false', False) + test('true,true+true', True) + + test('false,(false+false)', False) + test('false,(false+true)', False) + test('false,(true+false)', False) + test('false,(true+true)', True) + test('true,(false+false)', True) + test('true,(false+true)', True) + test('true,(true+false)', True) + test('true,(true+true)', True) + + + test('false+false,false', False) + test('false+false,true', True) + test('false+true,false', False) + test('false+true,true', True) + test('true+false,false', False) + test('true+false,true', True) + test('true+true,false', True) + test('true+true,true', True) + + test('false+(false,false)', False) + test('false+(false,true)', False) + test('false+(true,false)', False) + test('false+(true,true)', False) + test('true+(false,false)', False) + test('true+(false,true)', True) + test('true+(true,false)', True) + test('true+(true,true)', True) + + # Check formatting + for dependency in [ + #'true', + #'true+true+false', + 'true+false', + 'true+(true+false),(false,true)', + #'true+((true+false),(false,true))', + 'VK_VERSION_1_0+VK_KHR_display', + #'VK_VERSION_1_1+(true,false)', + ]: + print(f'expr = {dependency}\n{dependencyMarkup(dependency)}') + print(f' spec language = {dependencyLanguageSpecMacros(dependency)}') + print(f' comment language = {dependencyLanguageComment(dependency)}') + print(f' C language = {dependencyLanguageC(dependency)}') + print(f' names = {dependencyNames(dependency)}') + print(f' value = {evaluateDependency(dependency, termSupported)}') diff --git a/scripts/pygenerator.py b/scripts/pygenerator.py index da8cd7d69..6656b4605 100644 --- a/scripts/pygenerator.py +++ b/scripts/pygenerator.py @@ -4,118 +4,50 @@ # # SPDX-License-Identifier: Apache-2.0 -import sys -from generator import OutputGenerator, enquote, noneStr, write +from generator import OutputGenerator, enquote, write +from scriptgenerator import ScriptOutputGenerator import pprint -class PyOutputGenerator(OutputGenerator): - """PyOutputGenerator - subclass of OutputGenerator. - Generates Python data structures describing API names and relationships. - Similar to DocOutputGenerator, but writes a single file.""" - - def apiName(self, name): - """Return True if name is in the reserved API namespace. - - Delegates to the conventions object. """ - return self.genOpts.conventions.is_api_name(name) +class PyOutputGenerator(ScriptOutputGenerator): + """PyOutputGenerator - subclass of ScriptOutputGenerator. + Generates Python data structures describing API names and + relationships.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # Track features being generated - self.features = [] - - # Reverse map from interface names to features requiring them - self.apimap = {} - - def beginFile(self, genOpts): - OutputGenerator.beginFile(self, genOpts) - # - # Dictionaries are keyed by the name of the entity (e.g. - # self.structs is keyed by structure names). Values are - # the names of related entities (e.g. structs contain - # a list of type names of members, enums contain a list - # of enumerants belong to the enumerated type, etc.), or - # just None if there are no directly related entities. - # - # Collect the mappings, then emit the Python script in endFile - self.basetypes = {} - self.consts = {} - self.enums = {} - self.flags = {} - self.funcpointers = {} - self.protos = {} - self.structs = {} - self.handles = {} - self.defines = {} - self.alias = {} - # Dictionary containing the type of a type name - # (e.g. the string name of the dictionary with its contents). - self.typeCategory = {} - self.mapDict = {} - - def addInterfaceMapping(self, api, feature, required): - """Add a reverse mapping in self.apimap from an API to a feature - requiring that API. - - - api - name of the API - - feature - name of the feature requiring it - - required - None, or an additional feature dependency within - 'feature' """ - - # Each entry in self.apimap contains one or more - # ( feature, required ) tuples. - deps = ( feature, required ) + def beginDict(self, name): + """String starting definition of a named dictionary""" + return f'{name} = {{' - if api in self.apimap: - self.apimap[api].append(deps) - else: - self.apimap[api] = [ deps ] + def endDict(self): + """ String ending definition of a named dictionary""" + return '}' - def mapInterfaceKeys(self, feature, key): - """Construct reverse mapping of APIs to features requiring them in - self.apimap. + def writeDict(self, dict, name, printValues = True): + """Write dictionary as a Python dictionary with the given name. + If printValues is False, just output keys with None values.""" - - feature - name of the feature being generated - - key - API category - 'define', 'basetype', etc.""" - - dict = self.featureDictionary[feature][key] - - if dict: - # Not clear why handling of command vs. type APIs is different - - # see interfacedocgenerator.py, which this was based on. - if key == 'command': - for required in dict: - for api in dict[required]: - self.addInterfaceMapping(api, feature, required) + write(self.beginDict(name), file=self.outFile) + for key in sorted(dict): + if printValues: + value = enquote(dict[key]) else: - for required in dict: - for parent in dict[required]: - for api in dict[required][parent]: - self.addInterfaceMapping(api, feature, required) - - def mapInterfaces(self, feature): - """Construct reverse mapping of APIs to features requiring them in - self.apimap. + value = 'None' + write(f'{enquote(key)} : {value},', file=self.outFile) + write(self.endDict(), file=self.outFile) - - feature - name of the feature being generated""" + def writeList(self, l, name): + """Write list l as a Ruby hash with the given name""" - # Map each category of interface - self.mapInterfaceKeys(feature, 'basetype') - self.mapInterfaceKeys(feature, 'bitmask') - self.mapInterfaceKeys(feature, 'command') - self.mapInterfaceKeys(feature, 'define') - self.mapInterfaceKeys(feature, 'enum') - self.mapInterfaceKeys(feature, 'enumconstant') - self.mapInterfaceKeys(feature, 'funcpointer') - self.mapInterfaceKeys(feature, 'handle') - self.mapInterfaceKeys(feature, 'include') - self.mapInterfaceKeys(feature, 'struct') - self.mapInterfaceKeys(feature, 'union') + self.writeDict(l, name, printValues = False) def endFile(self): + # Creates the inverse mapping of nonexistent APIs to their aliases. + super().createInverseMap() + # Print out all the dictionaries as Python strings. - # Could just print(dict) but that's not human-readable + # Could just print(dict) but that is not human-readable dicts = ( [ self.basetypes, 'basetypes' ], [ self.consts, 'consts' ], [ self.enums, 'enums' ], @@ -126,240 +58,37 @@ def endFile(self): [ self.handles, 'handles' ], [ self.defines, 'defines' ], [ self.typeCategory, 'typeCategory' ], - [ self.alias, 'alias' ] ) - for (entry_dict, name) in dicts: - write(name + ' = {}', file=self.outFile) - for key in sorted(entry_dict.keys()): - write(name + '[' + enquote(key) + '] = ', entry_dict[key], - file=self.outFile) + [ self.alias, 'alias' ], + [ self.nonexistent, 'nonexistent' ], + ) + + for (dict, name) in dicts: + self.writeDict(dict, name) # Dictionary containing the relationships of a type # (e.g. a dictionary with each related type as keys). - write('mapDict = {}', file=self.outFile) - # Could just print(self.mapDict), but prefer something # human-readable and stable-ordered + write(self.beginDict('mapDict'), file=self.outFile) for baseType in sorted(self.mapDict.keys()): - write('mapDict[' + enquote(baseType) + '] = ', file=self.outFile, end='') - pprint.pprint(self.mapDict[baseType], self.outFile) + write('{} : {},'.format(enquote(baseType), + pprint.pformat(self.mapDict[baseType])), file=self.outFile) + write(self.endDict(), file=self.outFile) + + # List of included feature names + self.writeList(sorted(self.features), 'features') # Generate feature <-> interface mappings for feature in self.features: self.mapInterfaces(feature) # Write out the reverse map from APIs to requiring features - write('requiredBy = {}', file=self.outFile) - + write(self.beginDict('requiredBy'), file=self.outFile) for api in sorted(self.apimap): - # Construct list of requirements as Python list arguments - ##reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) - ##write('requiredBy[{}] = ( {} )'.format(enquote(api), reqs), file=self.outFile) - - # Ideally these would be sorted by dep[0] as well - reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) - write('requiredBy[{}] = {}'.format(enquote(api), pprint.saferepr(self.apimap[api])), file=self.outFile) - - OutputGenerator.endFile(self) - - def beginFeature(self, interface, emit): - # Start processing in superclass - OutputGenerator.beginFeature(self, interface, emit) - - # Add this feature to the list being tracked - self.features.append( self.featureName ) - - def endFeature(self): - # Finish processing in superclass - OutputGenerator.endFeature(self) - - def addName(self, entry_dict, name, value): - """Add a string entry to the dictionary, quoting it so it gets printed - out correctly in self.endFile().""" - entry_dict[name] = enquote(value) - - def addMapping(self, baseType, refType): - """Add a mapping between types to mapDict. - - Only include API types, so we don't end up with a lot of useless uint32_t and void types.""" - if not self.apiName(baseType) or not self.apiName(refType): - self.logMsg('diag', 'PyOutputGenerator::addMapping: IGNORE map from', baseType, '<->', refType) - return - - self.logMsg('diag', 'PyOutputGenerator::addMapping: map from', - baseType, '<->', refType) - - if baseType not in self.mapDict: - baseDict = {} - self.mapDict[baseType] = baseDict - else: - baseDict = self.mapDict[baseType] - if refType not in self.mapDict: - refDict = {} - self.mapDict[refType] = refDict - else: - refDict = self.mapDict[refType] - - baseDict[refType] = None - refDict[baseType] = None - - def genType(self, typeinfo, name, alias): - """Generate type. - - - For 'struct' or 'union' types, defer to genStruct() to - add to the dictionary. - - For 'bitmask' types, add the type name to the 'flags' dictionary, - with the value being the corresponding 'enums' name defining - the acceptable flag bits. - - For 'enum' types, add the type name to the 'enums' dictionary, - with the value being '@STOPHERE@' (because this case seems - never to happen). - - For 'funcpointer' types, add the type name to the 'funcpointers' - dictionary. - - For 'handle' and 'define' types, add the handle or #define name - to the 'struct' dictionary, because that's how the spec sources - tag these types even though they aren't structs.""" - OutputGenerator.genType(self, typeinfo, name, alias) - typeElem = typeinfo.elem - # If the type is a struct type, traverse the embedded tags - # generating a structure. Otherwise, emit the tag text. - category = typeElem.get('category') - - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, category) - - if category in ('struct', 'union'): - self.genStruct(typeinfo, name, alias) - else: - if alias: - # Add name -> alias mapping - self.addName(self.alias, name, alias) - - # Always emit an alias (?!) - count = 1 - - # May want to only emit full type definition when not an alias? - else: - # Extract the type name - # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. - count = len(noneStr(typeElem.text)) - for elem in typeElem: - count += len(noneStr(elem.text)) + len(noneStr(elem.tail)) - - if count > 0: - if category == 'bitmask': - requiredEnum = typeElem.get('requires') - self.addName(self.flags, name, requiredEnum) - - # This happens when the Flags type is defined, but no - # FlagBits are defined yet. - if requiredEnum is not None: - self.addMapping(name, requiredEnum) - elif category == 'enum': - # This case does not seem to come up. It nominally would - # result from - # , - # but the output generator doesn't emit them directly. - self.logMsg('warn', 'PyOutputGenerator::genType: invalid \'enum\' category for name:', name) - elif category == 'funcpointer': - self.funcpointers[name] = None - elif category == 'handle': - self.handles[name] = None - elif category == 'define': - self.defines[name] = None - elif category == 'basetype': - # Don't add an entry for base types that are not API types - # e.g. an API Bool type gets an entry, uint32_t does not - if self.apiName(name): - self.basetypes[name] = None - self.addName(self.typeCategory, name, 'basetype') - else: - self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name, 'category:', category) - else: - self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name) - - def genStruct(self, typeinfo, typeName, alias): - """Generate struct (e.g. C "struct" type). - - Add the struct name to the 'structs' dictionary, with the - value being an ordered list of the struct member names.""" - OutputGenerator.genStruct(self, typeinfo, typeName, alias) - - if alias: - # Add name -> alias mapping - self.addName(self.alias, typeName, alias) - else: - # May want to only emit definition on this branch - True - - members = [member.text for member in typeinfo.elem.findall('.//member/name')] - self.structs[typeName] = members - memberTypes = [member.text for member in typeinfo.elem.findall('.//member/type')] - for member_type in memberTypes: - self.addMapping(typeName, member_type) - - def genGroup(self, groupinfo, groupName, alias): - """Generate group (e.g. C "enum" type). - - These are concatenated together with other types. - - - Add the enum type name to the 'enums' dictionary, with - the value being an ordered list of the enumerant names. - - Add each enumerant name to the 'consts' dictionary, with - the value being the enum type the enumerant is part of.""" - OutputGenerator.genGroup(self, groupinfo, groupName, alias) - groupElem = groupinfo.elem - - if alias: - # Add name -> alias mapping - self.addName(self.alias, groupName, alias) - else: - # May want to only emit definition on this branch - True - - # Loop over the nested 'enum' tags. - enumerants = [elem.get('name') for elem in groupElem.findall('enum')] - for name in enumerants: - self.addName(self.consts, name, groupName) - self.enums[groupName] = enumerants - - def genEnum(self, enuminfo, name, alias): - """Generate enumerant (compile-time constants). - - - Add the constant name to the 'consts' dictionary, with the - value being None to indicate that the constant isn't - an enumeration value.""" - OutputGenerator.genEnum(self, enuminfo, name, alias) - - if name not in self.consts: - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, 'consts') - self.consts[name] = None - # Otherwise, don't add it to the consts dictionary because it's - # already present. This happens due to the generator 'reparentEnums' - # parameter being False, so each extension enum appears in both the - # type and in the or it originally - # came from. - - def genCmd(self, cmdinfo, name, alias): - """Generate command. - - - Add the command name to the 'protos' dictionary, with the - value being an ordered list of the parameter names.""" - OutputGenerator.genCmd(self, cmdinfo, name, alias) - - if alias: - # Add name -> alias mapping - self.addName(self.alias, name, alias) - else: - # May want to only emit definition on this branch - True - - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, 'protos') + # Sort requirements by first feature in each one + deps = sorted(self.apimap[api], key = lambda dep: dep[0]) + reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in deps) + write('{} : [{}],'.format(enquote(api), reqs), file=self.outFile) + write(self.endDict(), file=self.outFile) - params = [param.text for param in cmdinfo.elem.findall('param/name')] - self.protos[name] = params - paramTypes = [param.text for param in cmdinfo.elem.findall('param/type')] - for param_type in paramTypes: - self.addMapping(name, param_type) + super().endFile() diff --git a/scripts/reflib.py b/scripts/reflib.py index 535683ae8..36db7590d 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -100,26 +100,26 @@ def logErr(*args, **kwargs): if file is not None: file.write(strfile.getvalue()) - sys.exit(1) + raise UserWarning(strfile.getvalue()) def isempty(s): """Return True if s is nothing but white space, False otherwise""" return len(''.join(s.split())) == 0 class pageInfo: - """Information about a ref page relative to the file it's extracted from.""" + """Information about a ref page relative to the file it is extracted from.""" def __init__(self): self.extractPage = True """True if page should be extracted""" self.Warning = None - """string warning if page is suboptimal or can't be generated""" + """string warning if page is suboptimal or cannot be generated""" self.embed = False """False or the name of the ref page this include is embedded within""" self.type = None - """'structs', 'protos', 'funcpointers', 'flags', 'enums'""" + """refpage type attribute - 'structs', 'protos', 'freeform', etc.""" self.name = None """struct/proto/enumerant/etc. name""" @@ -236,23 +236,27 @@ def lookupPage(pageMap, name): return pi def loadFile(filename): - """Load a file into a list of strings. Return the list or None on failure""" + """Load a file into a list of strings. Return the (list, newline_string) or (None, None) on failure""" + newline_string = "\n" try: - fp = open(filename, 'r', encoding='utf-8') + with open(filename, 'rb') as fp: + contents = fp.read() + if contents.count(b"\r\n") > 1: + newline_string = "\r\n" + + with open(filename, 'r', encoding='utf-8') as fp: + lines = fp.readlines() except: logWarn('Cannot open file', filename, ':', sys.exc_info()[0]) - return None - - file = fp.readlines() - fp.close() + return None, None - return file + return lines, newline_string def clampToBlock(line, minline, maxline): """Clamp a line number to be in the range [minline,maxline]. If the line number is None, just return it. - If minline is None, don't clamp to that value.""" + If minline is None, do not clamp to that value.""" if line is None: return line if minline and line < minline: @@ -280,8 +284,8 @@ def fixupRefs(pageMap, specFile, file): # # line to the include line, so autogeneration can at least # # pull the include out, but mark it not to be extracted. # # Examples include the host sync table includes in - # # chapters/fundamentals.txt and the table of Vk*Flag types in - # # appendices/boilerplate.txt. + # # chapters/fundamentals.adoc and the table of Vk*Flag types in + # # appendices/boilerplate.adoc. # if pi.begin is None and pi.validity is None and pi.end is None: # pi.begin = pi.include # pi.extractPage = False @@ -289,7 +293,7 @@ def fixupRefs(pageMap, specFile, file): # continue # Using open block delimiters, ref pages must *always* have a - # defined begin and end. If either is undefined, that's fatal. + # defined begin and end. If either is undefined, that is fatal. if pi.begin is None: pi.extractPage = False pi.Warning = 'Can\'t identify begin of ref page open block' @@ -300,7 +304,7 @@ def fixupRefs(pageMap, specFile, file): pi.Warning = 'Can\'t identify end of ref page open block' continue - # If there's no description of the page, infer one from the type + # If there is no description of the page, infer one from the type if pi.desc is None: if pi.type is not None: # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)' @@ -314,6 +318,9 @@ def fixupRefs(pageMap, specFile, file): # begin. funcpointer, proto, and struct pages infer the location of # the parameter and body sections. Other pages infer the location of # the body, but have no parameter sections. + # + # Probably some other types infer this as well - refer to list of + # all page types in genRef.py:emitPage() if pi.include is not None: if pi.type in ['funcpointers', 'protos', 'structs']: pi.param = nextPara(file, pi.include) @@ -325,13 +332,13 @@ def fixupRefs(pageMap, specFile, file): else: pi.Warning = 'Page does not have an API definition include::' - # It's possible for the inferred param and body lines to run past + # It is possible for the inferred param and body lines to run past # the end of block, if, for example, there is no parameter section. pi.param = clampToBlock(pi.param, pi.include, pi.end) pi.body = clampToBlock(pi.body, pi.param, pi.end) # We can get to this point with .include, .param, and .validity - # all being None, indicating those sections weren't found. + # all being None, indicating those sections were not found. logDiag('fixupRefs: after processing,', pi.name, 'looks like:') printPageInfo(pi, file) @@ -340,7 +347,7 @@ def fixupRefs(pageMap, specFile, file): # inferences about invalid pages. # # If a reference without a .end is entirely inside a valid reference, - # then it's intentionally embedded - may want to create an indirect + # then it is intentionally embedded - may want to create an indirect # page that links into the embedding page. This is done by a very # inefficient double loop, but the loop depth is small. for name in sorted(pageMap.keys()): @@ -350,7 +357,7 @@ def fixupRefs(pageMap, specFile, file): for embedName in sorted(pageMap.keys()): logDiag('fixupRefs: comparing', pi.name, 'to', embedName) embed = pageMap[embedName] - # Don't check embeddings which are themselves invalid + # Do not check embeddings which are themselves invalid if not embed.extractPage: logDiag('Skipping check for embedding in:', embed.name) continue @@ -375,9 +382,20 @@ def fixupRefs(pageMap, specFile, file): 'at line', pi.include) +def compatiblePageTypes(refpage_type, pagemap_type): + """Returns whether two refpage 'types' (categories) are compatible - + this is only true for 'consts' and 'enums' types.""" + + constsEnums = [ 'consts', 'enums' ] + + if refpage_type == pagemap_type: + return True + if refpage_type in constsEnums and pagemap_type in constsEnums: + return True + return False + # Patterns used to recognize interesting lines in an asciidoc source file. # These patterns are only compiled once. -INCSVAR_DEF = re.compile(r':INCS-VAR: (?P.*)') endifPat = re.compile(r'^endif::(?P[\w_+,]+)\[\]') beginPat = re.compile(r'^\[open,(?Prefpage=.*)\]') # attribute key/value pairs of an open block @@ -387,13 +405,13 @@ def fixupRefs(pageMap, specFile, file): errorPat = re.compile(r'^// *refError') # This regex transplanted from check_spec_links -# It looks for either OpenXR or Vulkan generated file conventions, and for -# the api/validity include (generated_type), protos/struct/etc path -# (category), and API name (entity_name). It could be put into the API -# conventions object. +# It looks for various generated file conventions, and for the api/validity +# include (generated_type), protos/struct/etc path (category), and API name +# (entity_name). +# It could be put into the API conventions object, instead of being +# generalized for all the different specs. INCLUDE = re.compile( - r'include::(?P((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P[\w]+)/(?P\w+)/(?P[^./]+).txt[\[][\]]') - + r'include::(?P((../){1,4}|\{generated\}/)(generated/)?)(?P[\w]+)/(?P\w+)/(?P[^./]+)\.(adoc|txt)[\[][\]]') def findRefs(file, filename): """Identify reference pages in a list of strings, returning a dictionary of @@ -405,7 +423,7 @@ def findRefs(file, filename): # first detect the '[open,refpage=...]' markup delimiting the block; # skip past the '--' block delimiter on the next line; and identify the # '--' block delimiter closing the page. - # This can't be done solely with pattern matching, and requires state to + # This cannot be done solely with pattern matching, and requires state to # track 'inside/outside block'. # When looking for open blocks, possible states are: # 'outside' - outside a block @@ -422,26 +440,10 @@ def findRefs(file, filename): # Track the pageInfo object corresponding to the current open block pi = None - incsvar = None while (line < numLines): setLogLine(line) - # Look for a file-wide definition - matches = INCSVAR_DEF.match(file[line]) - if matches: - incsvar = matches.group('value') - logDiag('Matched INCS-VAR definition:', incsvar) - - line = line + 1 - continue - - # Perform INCS-VAR substitution immediately. - if incsvar and '{INCS-VAR}' in file[line]: - newLine = file[line].replace('{INCS-VAR}', incsvar) - logDiag('PERFORMING SUBSTITUTION', file[line], '->', newLine) - file[line] = newLine - # Only one of the patterns can possibly match. Add it to # the dictionary for that name. @@ -451,7 +453,7 @@ def findRefs(file, filename): logDiag('Matched open block pattern') attribs = matches.group('attribs') - # If the previous open block wasn't closed, raise an error + # If the previous open block was not closed, raise an error if openBlockState != 'outside': logErr('Nested open block starting at line', line, 'of', filename) @@ -553,7 +555,7 @@ def findRefs(file, filename): if gen_type == 'validity': logDiag('Matched validity pattern') if pi is not None: - if pi.type and refpage_type != pi.type: + if pi.type and not compatiblePageTypes(refpage_type, pi.type): logWarn('ERROR: pageMap[' + name + '] type:', pi.type, 'does not match type:', refpage_type) pi.type = refpage_type @@ -570,7 +572,7 @@ def findRefs(file, filename): if pi is not None: if pi.include is not None: logDiag('found multiple includes for this block') - if pi.type and refpage_type != pi.type: + if pi.type and not compatiblePageTypes(refpage_type, pi.type): logWarn('ERROR: pageMap[' + name + '] type:', pi.type, 'does not match type:', refpage_type) pi.type = refpage_type @@ -643,7 +645,7 @@ def getBranch(): """Determine current git branch Returns (branch name, ''), or (None, stderr output) if the branch name - can't be determined""" + cannot be determined""" command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ] results = subprocess.run(command, diff --git a/scripts/reg.py b/scripts/reg.py index 1b1173e78..b8f8af7ce 100755 --- a/scripts/reg.py +++ b/scripts/reg.py @@ -10,19 +10,25 @@ import re import sys import xml.etree.ElementTree as etree -from collections import defaultdict, namedtuple -from generator import OutputGenerator, GeneratorOptions, write -import pdb +from collections import defaultdict, deque, namedtuple + +from generator import GeneratorOptions, OutputGenerator, noneStr, write +from apiconventions import APIConventions def apiNameMatch(str, supported): """Return whether a required api name matches a pattern specified for an XML 'api' attribute or 'supported' attribute. - - str - api name such as 'vulkan' or 'openxr' - - supported - comma-separated list of XML API names""" + - str - API name such as 'vulkan' or 'openxr'. May be None, in which + case it never matches (this should not happen). + - supported - comma-separated list of XML API names. May be None, in + which case str always matches (this is the usual case).""" - return (str is not None and str in supported.split(',')) + if str is not None: + return supported is None or str in supported.split(',') + # Fallthrough case - either str is None or the test failed + return False def matchAPIProfile(api, profile, elem): """Return whether an API and profile @@ -52,7 +58,7 @@ def matchAPIProfile(api, profile, elem): --------- -------- None None Always matches 'string' None Always matches - None 'string' Does not match. Can't generate multiple APIs + None 'string' Does not match. Cannot generate multiple APIs or profiles, so if an API/profile constraint is present, it must be asked for explicitly. 'string' 'string' Strings must match @@ -60,7 +66,7 @@ def matchAPIProfile(api, profile, elem): ** In the future, we will allow regexes for the attributes, not just strings, so that `api="^(gl|gles2)"` will match. Even - this isn't really quite enough, we might prefer something + this is not really quite enough, we might prefer something like `"gl(core)|gles1(common-lite)"`.""" # Match 'api', if present elem_api = elem.get('api') @@ -69,7 +75,7 @@ def matchAPIProfile(api, profile, elem): raise UserWarning("No API requested, but 'api' attribute is present with value '" + elem_api + "'") elif api != elem_api: - # Requested API doesn't match attribute + # Requested API does not match attribute return False elem_profile = elem.get('profile') if elem_profile: @@ -77,11 +83,114 @@ def matchAPIProfile(api, profile, elem): raise UserWarning("No profile requested, but 'profile' attribute is present with value '" + elem_profile + "'") elif profile != elem_profile: - # Requested profile doesn't match attribute + # Requested profile does not match attribute return False return True +def mergeAPIs(tree, fromApiNames, toApiName): + """Merge multiple APIs using the precedence order specified in apiNames. + Also deletes elements. + + tree - Element at the root of the hierarchy to merge. + apiNames - list of strings of API names.""" + + stack = deque() + stack.append(tree) + + while len(stack) > 0: + parent = stack.pop() + + for child in parent.findall('*'): + if child.tag == 'remove': + # Remove elements + parent.remove(child) + else: + stack.append(child) + + supportedList = child.get('supported') + if supportedList: + supportedList = supportedList.split(',') + for apiName in [toApiName] + fromApiNames: + if apiName in supportedList: + child.set('supported', toApiName) + + if child.get('api'): + definitionName = None + definitionVariants = [] + + # Keep only one definition with the same name if there are multiple definitions + if child.tag in ['type']: + if child.get('name') is not None: + definitionName = child.get('name') + definitionVariants = parent.findall(f"{child.tag}[@name='{definitionName}']") + else: + definitionName = child.find('name').text + definitionVariants = parent.findall(f"{child.tag}/name[.='{definitionName}']/..") + elif child.tag in ['member', 'param']: + definitionName = child.find('name').text + definitionVariants = parent.findall(f"{child.tag}/name[.='{definitionName}']/..") + elif child.tag in ['enum', 'feature']: + definitionName = child.get('name') + definitionVariants = parent.findall(f"{child.tag}[@name='{definitionName}']") + elif child.tag in ['require']: + definitionName = child.get('feature') + definitionVariants = parent.findall(f"{child.tag}[@feature='{definitionName}']") + elif child.tag in ['command']: + definitionName = child.find('proto/name').text + definitionVariants = parent.findall(f"{child.tag}/proto/name[.='{definitionName}']/../..") + + if definitionName: + bestMatchApi = None + requires = None + for apiName in [toApiName] + fromApiNames: + for variant in definitionVariants: + # Keep any requires attributes from the target API + if variant.get('requires') and variant.get('api') == apiName: + requires = variant.get('requires') + # Find the best matching definition + if apiName in variant.get('api').split(',') and bestMatchApi is None: + bestMatchApi = variant.get('api') + + if bestMatchApi: + for variant in definitionVariants: + if variant.get('api') != bestMatchApi: + # Only keep best matching definition + parent.remove(variant) + else: + # Add requires attribute from the target API if it is not overridden + if requires is not None and variant.get('requires') is None: + variant.set('requires', requires) + variant.set('api', toApiName) + + +def stripNonmatchingAPIs(tree, apiName, actuallyDelete = True): + """Remove tree Elements with 'api' attributes matching apiName. + + tree - Element at the root of the hierarchy to strip. Only its + children can actually be removed, not the tree itself. + apiName - string which much match a command-separated component of + the 'api' attribute. + actuallyDelete - only delete matching elements if True.""" + + stack = deque() + stack.append(tree) + + while len(stack) > 0: + parent = stack.pop() + + for child in parent.findall('*'): + api = child.get('api') + + if apiNameMatch(apiName, api): + # Add child to the queue + stack.append(child) + elif not apiNameMatch(apiName, api): + # Child does not match requested api. Remove it. + if actuallyDelete: + parent.remove(child) + + class BaseInfo: """Base class for information about a registry feature (type/group/enum/command/API/extension). @@ -128,12 +237,12 @@ def compareElem(self, info, infoName): if (self.compareKeys(info, 'value', required = True) or self.compareKeys(info, 'bitpos', required = True)): # If both specify the same value or bit position, - # they're equal + # they are equal return True elif (self.compareKeys(info, 'extnumber') and self.compareKeys(info, 'offset') and self.compareKeys(info, 'dir')): - # If both specify the same relative offset, they're equal + # If both specify the same relative offset, they are equal return True elif (self.compareKeys(info, 'alias')): # If both are aliases of the same value @@ -141,7 +250,7 @@ def compareElem(self, info, infoName): else: return False else: - # The same enum can't extend two different types + # The same enum cannot extend two different types return False else: # Non-s should never be redefined @@ -236,22 +345,21 @@ def __init__(self, elem): attribute of . Extensions do not have API version numbers and are assigned number 0.""" - self.number = "0" + self.number = 0 self.supported = None else: # Extract vendor portion of __ self.category = self.name.split('_', 2)[1] self.version = "0" self.versionNumber = "0" - self.number = elem.get('number') + + self.number = int(elem.get('number','0')) """extension number, used for ordering and for assigning enumerant offsets. features do not have extension - numbers and are assigned number 0.""" + numbers and are assigned number 0, as are extensions without + numbers, so sorting works.""" - # If there's no 'number' attribute, use 0, so sorting works - if self.number is None: - self.number = 0 - self.supported = elem.get('supported') + self.supported = elem.get('supported', 'disabled') class SpirvInfo(BaseInfo): """Registry information about an API @@ -260,6 +368,36 @@ class SpirvInfo(BaseInfo): def __init__(self, elem): BaseInfo.__init__(self, elem) +class FormatInfo(BaseInfo): + """Registry information about an API .""" + + def __init__(self, elem, condition): + BaseInfo.__init__(self, elem) + # Need to save the condition here when it is known + self.condition = condition + +class SyncStageInfo(BaseInfo): + """Registry information about .""" + + def __init__(self, elem, condition): + BaseInfo.__init__(self, elem) + # Need to save the condition here when it is known + self.condition = condition + +class SyncAccessInfo(BaseInfo): + """Registry information about .""" + + def __init__(self, elem, condition): + BaseInfo.__init__(self, elem) + # Need to save the condition here when it is known + self.condition = condition + +class SyncPipelineInfo(BaseInfo): + """Registry information about .""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + class Registry: """Object representing an API registry, loaded from an XML file.""" @@ -272,7 +410,9 @@ def __init__(self, gen=None, genOpts=None): "Output generator used to write headers / messages" if genOpts is None: - self.genOpts = GeneratorOptions() + # If no generator is provided, we may still need the XML API name + # (for example, in genRef.py). + self.genOpts = GeneratorOptions(apiname = APIConventions().xml_api_name) else: self.genOpts = genOpts "Options controlling features to write and how to format them" @@ -311,6 +451,18 @@ def __init__(self, gen=None, genOpts=None): self.spirvcapdict = {} "dictionary of FeatureInfo objects for `` elements keyed by spirv capability name" + self.formatsdict = {} + "dictionary of FeatureInfo objects for `` elements keyed by VkFormat name" + + self.syncstagedict = {} + "dictionary of Sync*Info objects for `` elements keyed by VkPipelineStageFlagBits2 name" + + self.syncaccessdict = {} + "dictionary of Sync*Info objects for `` elements keyed by VkAccessFlagBits2 name" + + self.syncpipelinedict = {} + "dictionary of Sync*Info objects for `` elements keyed by pipeline type name" + self.emitFeatures = False """True to actually emit features for a version / extension, or False to just treat them as emitted""" @@ -356,27 +508,20 @@ def addElementInfo(self, elem, info, infoName, dictionary): Intended for internal use only. - - elem - ``/``/``/``/``/``/``/`` Element - - info - corresponding {Type|Group|Enum|Cmd|Feature|Spirv}Info object - - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / 'extension' / 'spirvextension' / 'spirvcapability' - - dictionary - self.{type|group|enum|cmd|api|ext|spirvext|spirvcap}dict + - elem - ``/``/``/``/``/``/``/``/``/``/``/`` Element + - info - corresponding {Type|Group|Enum|Cmd|Feature|Spirv|Format|SyncStage|SyncAccess|SyncPipeline}Info object + - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / 'extension' / 'spirvextension' / 'spirvcapability' / 'format' / 'syncstage' / 'syncaccess' / 'syncpipeline' + - dictionary - self.{type|group|enum|cmd|api|ext|format|spirvext|spirvcap|sync}dict + + The dictionary key is the element 'name' attribute.""" - If the Element has an 'api' attribute, the dictionary key is the - tuple (name,api). If not, the key is the name. 'name' is an - attribute of the Element""" # self.gen.logMsg('diag', 'Adding ElementInfo.required =', # info.required, 'name =', elem.get('name')) - api = elem.get('api') - if api: - key = (elem.get('name'), api) - else: - key = elem.get('name') + key = elem.get('name') if key in dictionary: if not dictionary[key].compareElem(info, infoName): self.gen.logMsg('warn', 'Attempt to redefine', key, '(this should not happen)') - else: - True else: dictionary[key] = info @@ -406,27 +551,48 @@ def breakOnName(self, regexp): def parseTree(self): """Parse the registry Element, once created""" # This must be the Element for the root + if self.tree is None: + raise RuntimeError("Tree not initialized!") self.reg = self.tree.getroot() + # Preprocess the tree in one of the following ways: + # - either merge a set of APIs to another API based on their 'api' attributes + # - or remove all elements with non-matching 'api' attributes + # The preprocessing happens through a breath-first tree traversal. + # This is a blunt hammer, but eliminates the need to track and test + # the apis deeper in processing to select the correct elements and + # avoid duplicates. + # Schema validation should prevent duplicate elements with + # overlapping api attributes, or where one element has an api + # attribute and the other does not. + + if self.genOpts.mergeApiNames: + mergeAPIs(self.reg, self.genOpts.mergeApiNames.split(','), self.genOpts.apiname) + else: + stripNonmatchingAPIs(self.reg, self.genOpts.apiname, actuallyDelete = True) + # Create dictionary of registry types from toplevel tags # and add 'name' attribute to each tag (where missing) # based on its element. # - # There's usually one block; more are OK + # There is usually one block; more are OK # Required attributes: 'name' or nested tag contents self.typedict = {} for type_elem in self.reg.findall('types/type'): - # If the doesn't already have a 'name' attribute, set + # If the does not already have a 'name' attribute, set # it from contents of its tag. if type_elem.get('name') is None: - type_elem.set('name', type_elem.find('name').text) + name_elem = type_elem.find('name') + if name_elem is None or not name_elem.text: + raise RuntimeError("Type without a name!") + type_elem.set('name', name_elem.text) self.addElementInfo(type_elem, TypeInfo(type_elem), 'type', self.typedict) # Create dictionary of registry enum groups from tags. # # Required attributes: 'name'. If no name is given, one is - # generated, but that group can't be identified and turned into an - # enum type definition - it's just a container for tags. + # generated, but that group cannot be identified and turned into an + # enum type definition - it is just a container for tags. self.groupdict = {} for group in self.reg.findall('enums'): self.addElementInfo(group, GroupInfo(group), 'group', self.groupdict) @@ -452,7 +618,7 @@ def parseTree(self): # and add 'name' attribute to each tag (where missing) # based on its element. # - # There's usually only one block; more are OK. + # There is usually only one block; more are OK. # Required attributes: 'name' or tag contents self.cmddict = {} # List of commands which alias others. Contains @@ -460,11 +626,14 @@ def parseTree(self): # for each alias cmdAlias = [] for cmd in self.reg.findall('commands/command'): - # If the doesn't already have a 'name' attribute, set + # If the does not already have a 'name' attribute, set # it from contents of its tag. name = cmd.get('name') if name is None: - name = cmd.set('name', cmd.find('proto/name').text) + name_elem = cmd.find('proto/name') + if name_elem is None or not name_elem.text: + raise RuntimeError("Command without a name!") + name = cmd.set('name', name_elem.text) ci = CmdInfo(cmd) self.addElementInfo(cmd, ci, 'command', self.cmddict) alias = cmd.get('alias') @@ -494,6 +663,7 @@ def parseTree(self): # Create dictionaries of API and extension interfaces # from toplevel and tags. self.apidict = {} + format_condition = dict() for feature in self.reg.findall('feature'): featureInfo = FeatureInfo(feature) self.addElementInfo(feature, featureInfo, 'feature', self.apidict) @@ -507,11 +677,11 @@ def parseTree(self): # Instead, generateRequiredInterface ignores elements # that extend enumerated types. # - # For tags which are actually just constants, if there's + # For tags which are actually just constants, if there is # no 'extends' tag but there is a 'value' or 'bitpos' tag, just # add an EnumInfo record to the dictionary. That works because # output generation of constants is purely dependency-based, and - # doesn't need to iterate through the XML tags. + # does not need to iterate through the XML tags. for elem in feature.findall('require'): for enum in elem.findall('enum'): addEnumInfo = False @@ -530,6 +700,11 @@ def parseTree(self): else: self.gen.logMsg('warn', 'NO matching group', groupName, 'for enum', enum.get('name'), 'found.') + if groupName == "VkFormat": + format_name = enum.get('name') + if enum.get('alias'): + format_name = enum.get('alias') + format_condition[format_name] = featureInfo.name addEnumInfo = True elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): # self.gen.logMsg('diag', 'Adding extension constant "enum"', @@ -539,6 +714,9 @@ def parseTree(self): enumInfo = EnumInfo(enum) self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) + sync_pipeline_stage_condition = dict() + sync_access_condition = dict() + self.extensions = self.reg.findall('extensions/extension') self.extdict = {} for feature in self.extensions: @@ -564,10 +742,10 @@ def parseTree(self): # as when redefining an enum in another extension. extnumber = enum.get('extnumber') if not extnumber: - enum.set('extnumber', featureInfo.number) + enum.set('extnumber', str(featureInfo.number)) enum.set('extname', featureInfo.name) - enum.set('supported', featureInfo.supported) + enum.set('supported', noneStr(featureInfo.supported)) # Look up the GroupInfo with matching groupName if groupName in self.groupdict: # self.gen.logMsg('diag', 'Matching group', @@ -577,6 +755,34 @@ def parseTree(self): else: self.gen.logMsg('warn', 'NO matching group', groupName, 'for enum', enum.get('name'), 'found.') + # This is Vulkan-specific + if groupName == "VkFormat": + format_name = enum.get('name') + if enum.get('alias'): + format_name = enum.get('alias') + if format_name in format_condition: + format_condition[format_name] += "," + featureInfo.name + else: + format_condition[format_name] = featureInfo.name + elif groupName == "VkPipelineStageFlagBits2": + stage_flag = enum.get('name') + if enum.get('alias'): + stage_flag = enum.get('alias') + featureName = elem.get('depends') if elem.get('depends') is not None else featureInfo.name + if stage_flag in sync_pipeline_stage_condition: + sync_pipeline_stage_condition[stage_flag] += "," + featureName + else: + sync_pipeline_stage_condition[stage_flag] = featureName + elif groupName == "VkAccessFlagBits2": + access_flag = enum.get('name') + if enum.get('alias'): + access_flag = enum.get('alias') + featureName = elem.get('depends') if elem.get('depends') is not None else featureInfo.name + if access_flag in sync_access_condition: + sync_access_condition[access_flag] += "," + featureName + else: + sync_access_condition[access_flag] = featureName + addEnumInfo = True elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): # self.gen.logMsg('diag', 'Adding extension constant "enum"', @@ -586,23 +792,6 @@ def parseTree(self): enumInfo = EnumInfo(enum) self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) - # Construct a "validextensionstructs" list for parent structures - # based on "structextends" tags in child structures - disabled_types = [] - for disabled_ext in self.reg.findall('extensions/extension[@supported="disabled"]'): - for type_elem in disabled_ext.findall("*/type"): - disabled_types.append(type_elem.get('name')) - for type_elem in self.reg.findall('types/type'): - if type_elem.get('name') not in disabled_types: - parentStructs = type_elem.get('structextends') - if parentStructs is not None: - for parent in parentStructs.split(','): - # self.gen.logMsg('diag', type.get('name'), 'extends', parent) - self.validextensionstructs[parent].append(type_elem.get('name')) - # Sort the lists so they don't depend on the XML order - for parent in self.validextensionstructs: - self.validextensionstructs[parent].sort() - # Parse out all spirv tags in dictionaries # Use addElementInfo to catch duplicates for spirv in self.reg.findall('spirvextensions/spirvextension'): @@ -612,6 +801,34 @@ def parseTree(self): spirvInfo = SpirvInfo(spirv) self.addElementInfo(spirv, spirvInfo, 'spirvcapability', self.spirvcapdict) + for format in self.reg.findall('formats/format'): + condition = None + format_name = format.get('name') + if format_name in format_condition: + condition = format_condition[format_name] + formatInfo = FormatInfo(format, condition) + self.addElementInfo(format, formatInfo, 'format', self.formatsdict) + + for stage in self.reg.findall('sync/syncstage'): + condition = None + stage_flag = stage.get('name') + if stage_flag in sync_pipeline_stage_condition: + condition = sync_pipeline_stage_condition[stage_flag] + syncInfo = SyncStageInfo(stage, condition) + self.addElementInfo(stage, syncInfo, 'syncstage', self.syncstagedict) + + for access in self.reg.findall('sync/syncaccess'): + condition = None + access_flag = access.get('name') + if access_flag in sync_access_condition: + condition = sync_access_condition[access_flag] + syncInfo = SyncAccessInfo(access, condition) + self.addElementInfo(access, syncInfo, 'syncaccess', self.syncaccessdict) + + for pipeline in self.reg.findall('sync/syncpipeline'): + syncInfo = SyncPipelineInfo(pipeline) + self.addElementInfo(pipeline, syncInfo, 'syncpipeline', self.syncpipelinedict) + def dumpReg(self, maxlen=120, filehandle=sys.stdout): """Dump all the dictionaries constructed from the Registry object. @@ -651,6 +868,10 @@ def dumpReg(self, maxlen=120, filehandle=sys.stdout): for key in self.spirvcapdict: write(' SPIR-V Capability', key, '->', etree.tostring(self.spirvcapdict[key].elem)[0:maxlen], file=filehandle) + write('// VkFormat', file=filehandle) + for key in self.formatsdict: + write(' VkFormat', key, '->', + etree.tostring(self.formatsdict[key].elem)[0:maxlen], file=filehandle) def markTypeRequired(self, typename, required): """Require (along with its dependencies) or remove (but not its dependencies) a type. @@ -659,6 +880,7 @@ def markTypeRequired(self, typename, required): - required - boolean (to tag features as required or not) """ self.gen.logMsg('diag', 'tagging type:', typename, '-> required =', required) + # Get TypeInfo object for tag corresponding to typename typeinfo = self.lookupElementInfo(typename, self.typedict) if typeinfo is not None: @@ -671,7 +893,7 @@ def markTypeRequired(self, typename, required): if depname: self.gen.logMsg('diag', 'Generating dependent type', depname, 'for', attrib_name, 'type', typename) - # Don't recurse on self-referential structures. + # Do not recurse on self-referential structures. if typename != depname: self.markTypeRequired(depname, required) else: @@ -713,20 +935,18 @@ def markEnumRequired(self, enumname, required): - enumname - name of enum - required - boolean (to tag features as required or not)""" - self.gen.logMsg('diag', 'tagging enum:', enumname, '-> required =', required) + self.gen.logMsg('diag', 'markEnumRequired: tagging enum:', enumname, '-> required =', required) enum = self.lookupElementInfo(enumname, self.enumdict) if enum is not None: # If the enum is part of a group, and is being removed, then - # look it up in that tag and remove it there, so that it - # isn't visible to generators (which traverse the tag - # elements themselves). - # This isn't the most robust way of doing this, since a removed - # enum that's later required again will no longer have a group - # element, but it makes the change non-intrusive on generator - # code. - if required is False: + # look it up in that tag and remove the Element there, + # so that it is not visible to generators (which traverse the + # tag elements rather than using the dictionaries). + if not required: groupName = enum.elem.get('extends') if groupName is not None: + self.gen.logMsg('diag', f'markEnumRequired: Removing extending enum {enum.elem.get("name")}') + # Look up the Info with matching groupName if groupName in self.groupdict: gi = self.groupdict[groupName] @@ -735,23 +955,42 @@ def markEnumRequired(self, enumname, required): # Remove copy of this enum from the group gi.elem.remove(gienum) else: - self.gen.logMsg('warn', 'Cannot remove enum', + self.gen.logMsg('warn', 'markEnumRequired: Cannot remove enum', enumname, 'not found in group', groupName) else: - self.gen.logMsg('warn', 'Cannot remove enum', + self.gen.logMsg('warn', 'markEnumRequired: Cannot remove enum', enumname, 'from nonexistent group', groupName) + else: + # This enum is not an extending enum. + # The XML tree must be searched for all that + # might have it, so we know the parent to delete from. + + enumName = enum.elem.get('name') + + self.gen.logMsg('diag', f'markEnumRequired: Removing non-extending enum {enumName}') + + count = 0 + for enums in self.reg.findall('enums'): + for thisEnum in enums.findall('enum'): + if thisEnum.get('name') == enumName: + # Actually remove it + count = count + 1 + enums.remove(thisEnum) + + if count == 0: + self.gen.logMsg('warn', f'markEnumRequired: {enumName}) not found in any tag') enum.required = required # Tag enum dependencies in 'alias' attribute as required depname = enum.elem.get('alias') if depname: - self.gen.logMsg('diag', 'Generating dependent enum', + self.gen.logMsg('diag', 'markEnumRequired: Generating dependent enum', depname, 'for alias', enumname, 'required =', enum.required) self.markEnumRequired(depname, required) else: - self.gen.logMsg('warn', 'enum:', enumname, 'IS NOT DEFINED') + self.gen.logMsg('warn', f'markEnumRequired: {enumname} IS NOT DEFINED') def markCmdRequired(self, cmdname, required): """Mark a command as required or not. @@ -762,14 +1001,25 @@ def markCmdRequired(self, cmdname, required): cmd = self.lookupElementInfo(cmdname, self.cmddict) if cmd is not None: cmd.required = required + # Tag command dependencies in 'alias' attribute as required - depname = cmd.elem.get('alias') - if depname: - self.gen.logMsg('diag', 'Generating dependent command', - depname, 'for alias', cmdname) - self.markCmdRequired(depname, required) + # + # This is usually not done, because command 'aliases' are not + # actual C language aliases like type and enum aliases. Instead + # they are just duplicates of the function signature of the + # alias. This means that there is no dependency of a command + # alias on what it aliases. One exception is validity includes, + # where the spec markup needs the promoted-to validity include + # even if only the promoted-from command is being built. + if self.genOpts.requireCommandAliases: + depname = cmd.elem.get('alias') + if depname: + self.gen.logMsg('diag', 'Generating dependent command', + depname, 'for alias', cmdname) + self.markCmdRequired(depname, required) + # Tag all parameter types of this command as required. - # This DOES NOT remove types of commands in a + # This does not remove types of commands in a # tag, because many other commands may use the same type. # We could be more clever and reference count types, # instead of using a boolean. @@ -792,11 +1042,12 @@ def markRequired(self, featurename, feature, required): # Loop over types, enums, and commands in the tag # @@ It would be possible to respect 'api' and 'profile' attributes - # in individual features, but that's not done yet. + # in individual features, but that is not done yet. for typeElem in feature.findall('type'): self.markTypeRequired(typeElem.get('name'), required) for enumElem in feature.findall('enum'): self.markEnumRequired(enumElem.get('name'), required) + for cmdElem in feature.findall('command'): self.markCmdRequired(cmdElem.get('name'), required) @@ -831,6 +1082,8 @@ def getAlias(self, elem, dict): if alias is None: name = elem.get('name') typeinfo = self.lookupElementInfo(name, dict) + if not typeinfo: + self.gen.logMsg('error', name, 'is not a known name') alias = typeinfo.elem.get('alias') return alias @@ -842,8 +1095,13 @@ def checkForCorrectionAliases(self, alias, require, tag): - require - `` block from the registry - tag - tag to look for in the require block""" - if alias and require.findall(tag + "[@name='" + alias + "']"): - return True + # For the time being, the code below is bypassed. It has the effect + # of excluding "spelling aliases" created to comply with the style + # guide, but this leaves references out of the specification and + # causes broken internal links. + # + # if alias and require.findall(tag + "[@name='" + alias + "']"): + # return True return False @@ -877,9 +1135,12 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): # Determine the required extension or version needed for a require block # Assumes that only one of these is specified - required_key = require.get('feature') - if required_key is None: - required_key = require.get('extension') + # 'extension', and therefore 'required_key', may be a boolean + # expression of extension names. + # 'required_key' is used only as a dictionary key at + # present, and passed through to the script generators, so + # they must be prepared to parse that boolean expression. + required_key = require.get('depends') # Loop over types, enums, and commands in the tag for typeElem in require.findall('type'): @@ -887,7 +1148,7 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): typeinfo = self.lookupElementInfo(typename, self.typedict) if typeinfo: - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + # Remove aliases in the same extension/feature; these are always added as a correction. Do not need the original to be visible. alias = self.getAlias(typeElem, self.typedict) if not self.checkForCorrectionAliases(alias, require, 'type'): # Resolve the type info to the actual type, so we get an accurate read for 'structextends' @@ -902,12 +1163,15 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): if not typeextends in self.gen.featureDictionary[featurename][typecat][required_key]: self.gen.featureDictionary[featurename][typecat][required_key][typeextends] = [] self.gen.featureDictionary[featurename][typecat][required_key][typeextends].append(typename) + else: + self.gen.logMsg('warn', 'fillFeatureDictionary: NOT filling for {}'.format(typename)) + for enumElem in require.findall('enum'): enumname = enumElem.get('name') typeinfo = self.lookupElementInfo(enumname, self.enumdict) - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + # Remove aliases in the same extension/feature; these are always added as a correction. Do not need the original to be visible. alias = self.getAlias(enumElem, self.enumdict) if not self.checkForCorrectionAliases(alias, require, 'enum'): enumextends = enumElem.get('extends') @@ -916,29 +1180,42 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): if not enumextends in self.gen.featureDictionary[featurename]['enumconstant'][required_key]: self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends] = [] self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends].append(enumname) + else: + self.gen.logMsg('warn', 'fillFeatureDictionary: NOT filling for {}'.format(typename)) for cmdElem in require.findall('command'): - - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + # Remove aliases in the same extension/feature; these are always added as a correction. Do not need the original to be visible. alias = self.getAlias(cmdElem, self.cmddict) if not self.checkForCorrectionAliases(alias, require, 'command'): if not required_key in self.gen.featureDictionary[featurename]['command']: self.gen.featureDictionary[featurename]['command'][required_key] = [] self.gen.featureDictionary[featurename]['command'][required_key].append(cmdElem.get('name')) + else: + self.gen.logMsg('warn', 'fillFeatureDictionary: NOT filling for {}'.format(typename)) - - def requireAndRemoveFeatures(self, interface, featurename, api, profile): - """Process `` and `` tags for a `` or ``. + def requireFeatures(self, interface, featurename, api, profile): + """Process `` tags for a `` or ``. - interface - Element for `` or ``, containing - `` and `` tags + `` tags - featurename - name of the feature - api - string specifying API name being generated - profile - string specifying API profile being generated""" + # marks things that are required by this version/profile for feature in interface.findall('require'): if matchAPIProfile(api, profile, feature): self.markRequired(featurename, feature, True) + + def removeFeatures(self, interface, featurename, api, profile): + """Process `` tags for a `` or ``. + + - interface - Element for `` or ``, containing + `` tags + - featurename - name of the feature + - api - string specifying API name being generated + - profile - string specifying API profile being generated""" + # marks things that are removed by this version/profile for feature in interface.findall('remove'): if matchAPIProfile(api, profile, feature): @@ -954,6 +1231,7 @@ def assignAdditionalValidity(self, interface, api, profile): if v.get('struct'): self.typedict[v.get('struct')].additionalValidity.append(copy.deepcopy(v)) + def removeAdditionalValidity(self, interface, api, profile): # Loop over all usage inside all tags. for feature in interface.findall('remove'): if matchAPIProfile(api, profile, feature): @@ -963,15 +1241,23 @@ def assignAdditionalValidity(self, interface, api, profile): if v.get('struct'): self.typedict[v.get('struct')].removedValidity.append(copy.deepcopy(v)) - def generateFeature(self, fname, ftype, dictionary): + def generateFeature(self, fname, ftype, dictionary, explicit=False): """Generate a single type / enum group / enum / command, and all its dependencies as needed. - fname - name of feature (``/``/``) - ftype - type of feature, 'type' | 'enum' | 'command' - - dictionary - of *Info objects - self.{type|enum|cmd}dict""" + - dictionary - of *Info objects - self.{type|enum|cmd}dict + - explicit - True if this is explicitly required by the top-level + XML tag, False if it is a dependency of an explicit + requirement.""" self.gen.logMsg('diag', 'generateFeature: generating', ftype, fname) + + if not (explicit or self.genOpts.requireDepends): + self.gen.logMsg('diag', 'generateFeature: NOT generating', ftype, fname, 'because generator does not require dependencies') + return + f = self.lookupElementInfo(fname, dictionary) if f is None: # No such feature. This is an error, but reported earlier @@ -979,7 +1265,7 @@ def generateFeature(self, fname, ftype, dictionary): 'returning!') return - # If feature isn't required, or has already been declared, return + # If feature is not required, or has already been declared, return if not f.required: self.gen.logMsg('diag', 'Skipping', ftype, fname, '(not required)') return @@ -1056,7 +1342,7 @@ def generateFeature(self, fname, ftype, dictionary): # @ The enum group is not ready for generation. At this # @ point, it contains all tags injected by # @ tags without any verification of whether - # @ they're required or not. It may also contain + # @ they are required or not. It may also contain # @ duplicates injected by multiple consistent # @ definitions of an . @@ -1081,7 +1367,8 @@ def generateFeature(self, fname, ftype, dictionary): if extname is not None: # 'supported' attribute was injected when the element was # moved into the group in Registry.parseTree() - if self.genOpts.defaultExtensions == elem.get('supported'): + supported_list = elem.get('supported').split(",") + if self.genOpts.defaultExtensions in supported_list: required = True elif re.match(self.genOpts.addExtensions, extname) is not None: required = True @@ -1094,7 +1381,7 @@ def generateFeature(self, fname, ftype, dictionary): if required: # Mark this element as required (in the element, not the EnumInfo) elem.set('required', 'true') - # If it's an alias, track that for later use + # If it is an alias, track that for later use enumAlias = elem.get('alias') if enumAlias: enumAliases.append(enumAlias) @@ -1103,6 +1390,8 @@ def generateFeature(self, fname, ftype, dictionary): if name in enumAliases: elem.set('required', 'true') self.gen.logMsg('diag', '* also need to require alias', name) + if f is None: + raise RuntimeError("Should not get here") if f.elem.get('category') == 'bitmask': followupFeature = f.elem.get('bitvalues') elif ftype == 'command': @@ -1125,6 +1414,8 @@ def generateFeature(self, fname, ftype, dictionary): # Actually generate the type only if emitting declarations if self.emitFeatures: self.gen.logMsg('diag', 'Emitting', ftype, 'decl for', fname) + if genProc is None: + raise RuntimeError("genProc is None when we should be emitting") genProc(f, fname, alias) else: self.gen.logMsg('diag', 'Skipping', ftype, fname, @@ -1143,16 +1434,16 @@ def generateRequiredInterface(self, interface): # Loop over all features inside all tags. for features in interface.findall('require'): for t in features.findall('type'): - self.generateFeature(t.get('name'), 'type', self.typedict) + self.generateFeature(t.get('name'), 'type', self.typedict, explicit=True) for e in features.findall('enum'): - # If this is an enum extending an enumerated type, don't + # If this is an enum extending an enumerated type, do not # generate it - this has already been done in reg.parseTree, # by copying this element into the enumerated type. enumextends = e.get('extends') if not enumextends: - self.generateFeature(e.get('name'), 'enum', self.enumdict) + self.generateFeature(e.get('name'), 'enum', self.enumdict, explicit=True) for c in features.findall('command'): - self.generateFeature(c.get('name'), 'command', self.cmddict) + self.generateFeature(c.get('name'), 'command', self.cmddict, explicit=True) def generateSpirv(self, spirv, dictionary): if spirv is None: @@ -1167,6 +1458,99 @@ def generateSpirv(self, spirv, dictionary): genProc = self.gen.genSpirv genProc(spirv, name, alias) + def stripUnsupportedAPIs(self, dictionary, attribute, supportedDictionary): + """Strip unsupported APIs from attributes of APIs. + dictionary - *Info dictionary of APIs to be updated + attribute - attribute name to look for in each API + supportedDictionary - dictionary in which to look for supported + API elements in the attribute""" + + for key in dictionary: + eleminfo = dictionary[key] + attribstring = eleminfo.elem.get(attribute) + if attribstring is not None: + apis = [] + stripped = False + for api in attribstring.split(','): + ##print('Checking API {} referenced by {}'.format(api, key)) + if api in supportedDictionary and supportedDictionary[api].required: + apis.append(api) + else: + stripped = True + ##print('\t**STRIPPING API {} from {}'.format(api, key)) + + # Update the attribute after stripping stuff. + # Could sort apis before joining, but it is not a clear win + if stripped: + eleminfo.elem.set(attribute, ','.join(apis)) + + def stripUnsupportedAPIsFromList(self, dictionary, supportedDictionary): + """Strip unsupported APIs from attributes of APIs. + dictionary - dictionary of list of structure name strings + supportedDictionary - dictionary in which to look for supported + API elements in the attribute""" + + for key in dictionary: + attribstring = dictionary[key] + if attribstring is not None: + apis = [] + stripped = False + for api in attribstring: + ##print('Checking API {} referenced by {}'.format(api, key)) + if supportedDictionary[api].required: + apis.append(api) + else: + stripped = True + ##print('\t**STRIPPING API {} from {}'.format(api, key)) + + # Update the attribute after stripping stuff. + # Could sort apis before joining, but it is not a clear win + if stripped: + dictionary[key] = apis + + def generateFormat(self, format, dictionary): + if format is None: + self.gen.logMsg('diag', 'No entry found for format element', + 'returning!') + return + + name = format.elem.get('name') + # No known alias for VkFormat elements + alias = None + if format.emit: + genProc = self.gen.genFormat + genProc(format, name, alias) + + def generateSyncStage(self, sync): + genProc = self.gen.genSyncStage + genProc(sync) + + def generateSyncAccess(self, sync): + genProc = self.gen.genSyncAccess + genProc(sync) + + def generateSyncPipeline(self, sync): + genProc = self.gen.genSyncPipeline + genProc(sync) + + def tagValidExtensionStructs(self): + """Construct a "validextensionstructs" list for parent structures + based on "structextends" tags in child structures. + Only do this for structures tagged as required.""" + + for typeinfo in self.typedict.values(): + type_elem = typeinfo.elem + if typeinfo.required and type_elem.get('category') == 'struct': + struct_extends = type_elem.get('structextends') + if struct_extends is not None: + for parent in struct_extends.split(','): + # self.gen.logMsg('diag', type_elem.get('name'), 'extends', parent) + self.validextensionstructs[parent].append(type_elem.get('name')) + + # Sort the lists so they do not depend on the XML order + for parent in self.validextensionstructs: + self.validextensionstructs[parent].sort() + def apiGen(self): """Generate interface for specified versions using the current generator and generator options""" @@ -1177,8 +1561,13 @@ def apiGen(self): 'profile:', self.genOpts.profile) self.gen.logMsg('diag', '*******************************************') - # Reset required/declared flags for all features - self.apiReset() + # Could reset required/declared flags for all features here. + # This has been removed as never used. The initial motivation was + # the idea of calling apiGen() repeatedly for different targets, but + # this has never been done. The 20% or so build-time speedup that + # might result is not worth the effort to make it actually work. + # + # self.apiReset() # Compile regexps used to select versions & extensions regVersions = re.compile(self.genOpts.versions) @@ -1187,6 +1576,7 @@ def apiGen(self): regRemoveExtensions = re.compile(self.genOpts.removeExtensions) regEmitExtensions = re.compile(self.genOpts.emitExtensions) regEmitSpirv = re.compile(self.genOpts.emitSpirv) + regEmitFormats = re.compile(self.genOpts.emitFormats) # Get all matching API feature names & add to list of FeatureInfo # Note we used to select on feature version attributes, not names. @@ -1224,7 +1614,7 @@ def apiGen(self): # Get all matching extensions, in order by their extension number, # and add to the list of features. - # Start with extensions tagged with 'api' pattern matching the API + # Start with extensions whose 'supported' attributes match the API # being generated. Add extensions matching the pattern specified in # regExtensions, then remove extensions matching the pattern # specified in regRemoveExtensions @@ -1242,9 +1632,9 @@ def apiGen(self): # Include additional extensions if the extension name matches # the regexp specified in the generator options. This allows - # forcing extensions into an interface even if they're not + # forcing extensions into an interface even if they are not # tagged appropriately in the registry. - # However we still respect the 'supported' attribute. + # However, we still respect the 'supported' attribute. if regAddExtensions.match(extName) is not None: if not apiNameMatch(self.genOpts.apiname, ei.elem.get('supported')): self.gen.logMsg('diag', 'NOT including extension', @@ -1256,7 +1646,7 @@ def apiGen(self): include = True # Remove extensions if the name matches the regexp specified # in generator options. This allows forcing removal of - # extensions from an interface even if they're tagged that + # extensions from an interface even if they are tagged that # way in the registry. if regRemoveExtensions.match(extName) is not None: self.gen.logMsg('diag', 'Removing extension', @@ -1274,8 +1664,8 @@ def apiGen(self): 'for emission (does not match emitextensions pattern)') # Hack - can be removed when validity generator goes away - # (Jon) I'm not sure what this does, or if it should respect - # the ei.emit flag above. + # (Jon) I am not sure what this does, or if it should + # respect the ei.emit flag above. self.requiredextensions.append(extName) else: self.gen.logMsg('diag', 'NOT including extension', @@ -1295,41 +1685,68 @@ def apiGen(self): si.emit = (regEmitSpirv.match(key) is not None) spirvcaps.append(si) + formats = [] + for key in self.formatsdict: + si = self.formatsdict[key] + si.emit = (regEmitFormats.match(key) is not None) + formats.append(si) + # Sort the features list, if a sort procedure is defined if self.genOpts.sortProcedure: self.genOpts.sortProcedure(features) - # print('sortProcedure ->', [f.name for f in features]) - # Pass 1: loop over requested API versions and extensions tagging + # Passes 1+2: loop over requested API versions and extensions tagging # types/commands/features as required (in an block) or no - # longer required (in an block). It is possible to remove - # a feature in one version and restore it later by requiring it in - # a later version. + # longer required (in an block). s are processed + # after all s, so removals win. # If a profile other than 'None' is being generated, it must # match the profile attribute (if any) of the and # tags. self.gen.logMsg('diag', 'PASS 1: TAG FEATURES') for f in features: - self.gen.logMsg('diag', 'PASS 1: Tagging required and removed features for', - f.name) + self.gen.logMsg('diag', 'PASS 1: Tagging required and features for', f.name) self.fillFeatureDictionary(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) - self.requireAndRemoveFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) + self.requireFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) self.assignAdditionalValidity(f.elem, self.genOpts.apiname, self.genOpts.profile) - # Pass 2: loop over specified API versions and extensions printing - # declarations for required things which haven't already been + for f in features: + self.gen.logMsg('diag', 'PASS 2: Tagging removed features for', f.name) + self.removeFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) + self.removeAdditionalValidity(f.elem, self.genOpts.apiname, self.genOpts.profile) + + # Now, strip references to APIs that are not required. + # At present such references may occur in: + # Structs in 'structextends' attributes + # Enums in 'successcodes' and 'errorcodes' attributes + self.stripUnsupportedAPIs(self.typedict, 'structextends', self.typedict) + self.stripUnsupportedAPIs(self.cmddict, 'successcodes', self.enumdict) + self.stripUnsupportedAPIs(self.cmddict, 'errorcodes', self.enumdict) + self.stripUnsupportedAPIsFromList(self.validextensionstructs, self.typedict) + + # Construct lists of valid extension structures + self.tagValidExtensionStructs() + + # @@May need to strip / + # tags of these forms: + # + # + # + # + + # Pass 3: loop over specified API versions and extensions printing + # declarations for required things which have not already been # generated. - self.gen.logMsg('diag', 'PASS 2: GENERATE INTERFACES FOR FEATURES') + self.gen.logMsg('diag', 'PASS 3: GENERATE INTERFACES FOR FEATURES') self.gen.beginFile(self.genOpts) for f in features: - self.gen.logMsg('diag', 'PASS 2: Generating interface for', + self.gen.logMsg('diag', 'PASS 3: Generating interface for', f.name) emit = self.emitFeatures = f.emit if not emit: - self.gen.logMsg('diag', 'PASS 2: NOT declaring feature', + self.gen.logMsg('diag', 'PASS 3: NOT declaring feature', f.elem.get('name'), 'because it is not tagged for emission') # Generate the interface (or just tag its elements as having been - # emitted, if they haven't been). + # emitted, if they have not been). self.gen.beginFeature(f.elem, emit) self.generateRequiredInterface(f.elem) self.gen.endFeature() @@ -1338,6 +1755,14 @@ def apiGen(self): self.generateSpirv(s, self.spirvextdict) for s in spirvcaps: self.generateSpirv(s, self.spirvcapdict) + for s in formats: + self.generateFormat(s, self.formatsdict) + for s in self.syncstagedict: + self.generateSyncStage(self.syncstagedict[s]) + for s in self.syncaccessdict: + self.generateSyncAccess(self.syncaccessdict[s]) + for s in self.syncpipelinedict: + self.generateSyncPipeline(self.syncpipelinedict[s]) self.gen.endFile() def apiReset(self): @@ -1352,46 +1777,3 @@ def apiReset(self): self.cmddict[cmd].resetState() for cmd in self.apidict: self.apidict[cmd].resetState() - - def __validateStructLimittypes(self, struct): - """Validate 'limittype' attributes for a single struct.""" - limittypeDiags = namedtuple('limittypeDiags', ['missing', 'invalid']) - badFields = defaultdict(lambda : limittypeDiags(missing=[], invalid=[])) - validLimittypes = { 'min', 'max', 'bitmask', 'range', 'struct', 'noauto' } - for member in struct.getMembers(): - memberName = member.findtext('name') - if memberName in ['sType', 'pNext']: - continue - limittype = member.get('limittype') - if not limittype: - badFields[struct.elem.get('name')].missing.append(memberName) - elif limittype == 'struct': - typeName = member.findtext('type') - memberType = self.typedict[typeName] - badFields.update(self.__validateStructLimittypes(memberType)) - elif limittype not in validLimittypes: - badFields[struct.elem.get('name')].invalid.append(memberName) - return badFields - - def __validateLimittype(self): - """Validate 'limittype' attributes.""" - self.gen.logMsg('diag', 'VALIDATING LIMITTYPE ATTRIBUTES') - badFields = self.__validateStructLimittypes(self.typedict['VkPhysicalDeviceProperties2']) - for featStructName in self.validextensionstructs['VkPhysicalDeviceProperties2']: - featStruct = self.typedict[featStructName] - badFields.update(self.__validateStructLimittypes(featStruct)) - - if badFields: - self.gen.logMsg('diag', 'SUMMARY OF FIELDS WITH INCORRECT LIMITTYPES') - for key in sorted(badFields.keys()): - diags = badFields[key] - if diags.missing: - self.gen.logMsg('diag', ' ', key, 'missing limittype:', ', '.join(badFields[key].missing)) - if diags.invalid: - self.gen.logMsg('diag', ' ', key, 'invalid limittype:', ', '.join(badFields[key].invalid)) - return False - return True - - def validateRegistry(self): - """Validate properties of the registry.""" - return self.__validateLimittype() diff --git a/scripts/scriptgenerator.py b/scripts/scriptgenerator.py new file mode 100644 index 000000000..f5ed14d00 --- /dev/null +++ b/scripts/scriptgenerator.py @@ -0,0 +1,390 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2024 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +from generator import OutputGenerator, enquote, noneStr + +def mostOfficial(api, newapi): + """Return the 'most official' of two related names, api and newapi. + KHR is more official than EXT is more official than everything else. + If there is ambiguity, return api. + Accommodate APIs using lower-case vendor suffixes.""" + + apicat = api[-3:].upper() + newapicat = newapi[-3:].upper() + + if apicat == 'KHR': + return api + if newapicat == 'KHR': + return newapi; + if apicat == 'EXT': + return api + if newapicat == 'EXT': + return newapi; + return api + +class ScriptOutputGenerator(OutputGenerator): + """ScriptOutputGenerator - subclass of OutputGenerator. + Base class to Generate script (Python/Ruby/JS/etc.) data structures + describing API names and relationships. + Similar to DocOutputGenerator, but writes a single file.""" + + def apiName(self, name): + """Return True if name is in the reserved API namespace. + + Delegates to the conventions object. """ + return self.genOpts.conventions.is_api_name(name) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Track features being generated + self.features = [] + + # Reverse map from interface names to features requiring them + self.apimap = {} + + # Reverse map from unsupported APIs in this build to aliases which + # are supported + self.nonexistent = {} + + def beginFile(self, genOpts): + OutputGenerator.beginFile(self, genOpts) + # + # Dictionaries are keyed by the name of the entity (e.g. + # self.structs is keyed by structure names). Values are + # the names of related entities (e.g. structs contain + # a list of type names of members, enums contain a list + # of enumerants belong to the enumerated type, etc.), or + # just None if there are no directly related entities. + # + # Collect the mappings, then emit the Python script in endFile + self.basetypes = {} + self.consts = {} + self.enums = {} + self.flags = {} + self.funcpointers = {} + self.protos = {} + self.structs = {} + self.handles = {} + self.defines = {} + self.alias = {} + # Dictionary containing the type of a type name + # (e.g. the string name of the dictionary with its contents). + self.typeCategory = {} + self.mapDict = {} + + def addInterfaceMapping(self, api, feature, required): + """Add a reverse mapping in self.apimap from an API to a feature + requiring that API. + + - api - name of the API + - feature - name of the feature requiring it + - required - None, or an additional feature dependency within + 'feature'. The additional dependency is a boolean expression of + one or more extension and/or core version names, which is passed + through to the output script intact.""" + + # Each entry in self.apimap contains one or more + # ( feature, required ) tuples. + deps = ( feature, required ) + + if api in self.apimap: + self.apimap[api].append(deps) + else: + self.apimap[api] = [ deps ] + + def mapInterfaceKeys(self, feature, key): + """Construct reverse mapping of APIs to features requiring them in + self.apimap. + + - feature - name of the feature being generated + - key - API category - 'define', 'basetype', etc.""" + + dict = self.featureDictionary[feature][key] + + if dict: + # Not clear why handling of command vs. type APIs is different - + # see interfacedocgenerator.py, which this was based on. + if key == 'command': + for required in dict: + for api in dict[required]: + self.addInterfaceMapping(api, feature, required) + else: + for required in dict: + for parent in dict[required]: + for api in dict[required][parent]: + self.addInterfaceMapping(api, feature, required) + + def mapInterfaces(self, feature): + """Construct reverse mapping of APIs to features requiring them in + self.apimap. + + - feature - name of the feature being generated""" + + # Map each category of interface + self.mapInterfaceKeys(feature, 'basetype') + self.mapInterfaceKeys(feature, 'bitmask') + self.mapInterfaceKeys(feature, 'command') + self.mapInterfaceKeys(feature, 'define') + self.mapInterfaceKeys(feature, 'enum') + self.mapInterfaceKeys(feature, 'enumconstant') + self.mapInterfaceKeys(feature, 'funcpointer') + self.mapInterfaceKeys(feature, 'handle') + self.mapInterfaceKeys(feature, 'include') + self.mapInterfaceKeys(feature, 'struct') + self.mapInterfaceKeys(feature, 'union') + + def endFile(self): + super().endFile() + + def beginFeature(self, interface, emit): + # Start processing in superclass + OutputGenerator.beginFeature(self, interface, emit) + + # Add this feature to the list being tracked + self.features.append( self.featureName ) + + def endFeature(self): + # Finish processing in superclass + OutputGenerator.endFeature(self) + + def addName(self, dict, name, value): + """Add a string entry to the dictionary, quoting it so it gets + printed out correctly in self.endFile().""" + dict[name] = value + + def addMapping(self, baseType, refType): + """Add a mapping between types to mapDict. + + Only include API types, so we do not end up with a lot of useless + uint32_t and void types.""" + if not self.apiName(baseType) or not self.apiName(refType): + self.logMsg('diag', 'ScriptOutputGenerator::addMapping: IGNORE map from', baseType, '<->', refType) + return + + self.logMsg('diag', 'ScriptOutputGenerator::addMapping: map from', + baseType, '<->', refType) + + if baseType not in self.mapDict: + baseDict = {} + self.mapDict[baseType] = baseDict + else: + baseDict = self.mapDict[baseType] + if refType not in self.mapDict: + refDict = {} + self.mapDict[refType] = refDict + else: + refDict = self.mapDict[refType] + + baseDict[refType] = None + refDict[baseType] = None + + def breakCheck(self, procname, name): + """Debugging aid - call from procname to break on API 'name' if it + matches logic in this call.""" + + pat = 'VkExternalFenceFeatureFlagBits' + if name[0:len(pat)] == pat: + print('{}(name = {}) matches {}'.format(procname, name, pat)) + import pdb + pdb.set_trace() + + def genType(self, typeinfo, name, alias): + """Generate type. + + - For 'struct' or 'union' types, defer to genStruct() to + add to the dictionary. + - For 'bitmask' types, add the type name to the 'flags' dictionary, + with the value being the corresponding 'enums' name defining + the acceptable flag bits. + - For 'enum' types, add the type name to the 'enums' dictionary, + with the value being '@STOPHERE@' (because this case seems + never to happen). + - For 'funcpointer' types, add the type name to the 'funcpointers' + dictionary. + - For 'handle' and 'define' types, add the handle or #define name + to the 'struct' dictionary, because that is how the spec sources + tag these types even though they are not structs.""" + OutputGenerator.genType(self, typeinfo, name, alias) + + typeElem = typeinfo.elem + # If the type is a struct type, traverse the embedded tags + # generating a structure. Otherwise, emit the tag text. + category = typeElem.get('category') + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, category) + + if category in ('struct', 'union'): + self.genStruct(typeinfo, name, alias) + else: + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + + # Always emit an alias (?!) + count = 1 + + # May want to only emit full type definition when not an alias? + else: + # Extract the type name + # (from self.genOpts). Copy other text through unchanged. + # If the resulting text is an empty string, do not emit it. + count = len(noneStr(typeElem.text)) + for elem in typeElem: + count += len(noneStr(elem.text)) + len(noneStr(elem.tail)) + + if count > 0: + if category == 'bitmask': + requiredEnum = typeElem.get('requires') + self.addName(self.flags, name, requiredEnum) + + # This happens when the Flags type is defined, but no + # FlagBits are defined yet. + if requiredEnum is not None: + self.addMapping(name, requiredEnum) + elif category == 'enum': + # This case does not seem to come up. It nominally would + # result from + # , + # but the output generator does not emit them directly. + self.logMsg('warn', 'ScriptOutputGenerator::genType: invalid \'enum\' category for name:', name) + elif category == 'funcpointer': + self.funcpointers[name] = None + elif category == 'handle': + self.handles[name] = None + elif category == 'define': + self.defines[name] = None + elif category == 'basetype': + self.basetypes[name] = None + self.addName(self.typeCategory, name, 'basetype') + else: + self.logMsg('diag', 'ScriptOutputGenerator::genType: unprocessed type:', name) + + def genStruct(self, typeinfo, typeName, alias): + """Generate struct (e.g. C "struct" type). + + Add the struct name to the 'structs' dictionary, with the + value being an ordered list of the struct member names.""" + OutputGenerator.genStruct(self, typeinfo, typeName, alias) + + if alias: + # Add name -> alias mapping + self.addName(self.alias, typeName, alias) + else: + # May want to only emit definition on this branch + True + + members = [member.text for member in typeinfo.elem.findall('.//member/name')] + self.structs[typeName] = members + memberTypes = [member.text for member in typeinfo.elem.findall('.//member/type')] + for member_type in memberTypes: + self.addMapping(typeName, member_type) + + def genGroup(self, groupinfo, groupName, alias): + """Generate group (e.g. C "enum" type). + + These are concatenated together with other types. + + - Add the enum type name to the 'enums' dictionary, with + the value being an ordered list of the enumerant names. + - Add each enumerant name to the 'consts' dictionary, with + the value being the enum type the enumerant is part of.""" + OutputGenerator.genGroup(self, groupinfo, groupName, alias) + groupElem = groupinfo.elem + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, groupName, 'group') + + if alias: + # Add name -> alias mapping + self.addName(self.alias, groupName, alias) + else: + # May want to only emit definition on this branch + True + + # Add each nested 'enum' tag + enumerants = [elem.get('name') for elem in groupElem.findall('enum')] + for name in enumerants: + self.addName(self.consts, name, groupName) + + # Sort enums for output stability, since their order is irrelevant + self.enums[groupName] = sorted(enumerants) + + def genEnum(self, enuminfo, name, alias): + """Generate enumerant (compile time constant). + + - Add the constant name to the 'consts' dictionary, with the + value being None to indicate that the constant is not + an enumeration value.""" + OutputGenerator.genEnum(self, enuminfo, name, alias) + + if name not in self.consts: + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, 'consts') + self.consts[name] = None + + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + else: + # May want to only emit definition on this branch + True + + # Otherwise, do not add it to the consts dictionary because it is + # already present. This happens due to the generator 'reparentEnums' + # parameter being False, so each extension enum appears in both the + # type and in the or it originally + # came from. + + def genCmd(self, cmdinfo, name, alias): + """Generate command. + + - Add the command name to the 'protos' dictionary, with the + value being an ordered list of the parameter names.""" + OutputGenerator.genCmd(self, cmdinfo, name, alias) + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, 'protos') + + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + else: + # May want to only emit definition on this branch + True + + params = [param.text for param in cmdinfo.elem.findall('param/name')] + self.protos[name] = params + paramTypes = [param.text for param in cmdinfo.elem.findall('param/type')] + for param_type in paramTypes: + self.addMapping(name, param_type) + + def createInverseMap(self): + """This creates the inverse mapping of nonexistent APIs in this + build to their aliases which are supported. Must be called by + language-specific subclasses before emitting that mapping.""" + + # Map from APIs not supported in this build to aliases that are. + # When there are multiple valid choices for remapping, choose the + # most-official suffixed one (KHR > EXT > vendor). + for key in self.alias: + # If the API key is aliased to something which does not exist, + # then add the thing that does not exist to the nonexistent map. + # This is used in spec macros to make promoted extension links + # in specs built without the promoted interface refer to the + # older interface instead. + + invkey = self.alias[key] + + if invkey not in self.typeCategory: + if invkey in self.nonexistent: + # Potentially remap existing mapping to a more official + # alias. + self.nonexistent[invkey] = mostOfficial(self.nonexistent[invkey], key) + else: + # Create remapping to an alias + self.nonexistent[invkey] = key diff --git a/scripts/conventions.py b/scripts/spec_tools/conventions.py similarity index 62% rename from scripts/conventions.py rename to scripts/spec_tools/conventions.py index 34fa2ea52..5b9f6dd40 100644 --- a/scripts/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -8,6 +8,8 @@ # used in generation. from enum import Enum +import abc +import re # Type categories that respond "False" to isStructAlwaysValid # basetype is home to typedefs like ..Bool32 @@ -21,13 +23,21 @@ TYPES_KNOWN_ALWAYS_VALID = set(('char', 'float', 'int8_t', 'uint8_t', + 'int16_t', 'uint16_t', 'int32_t', 'uint32_t', 'int64_t', 'uint64_t', 'size_t', - 'uintptr_t', + 'intptr_t', 'uintptr_t', 'int', )) +# Split an extension name into vendor ID and name portions +EXT_NAME_DECOMPOSE_RE = re.compile(r'(?P[A-Za-z]+)_(?P[A-Za-z]+)_(?P[\w_]+)') + +# Match an API version name. +# Match object includes API prefix, major, and minor version numbers. +# This could be refined further for specific APIs. +API_VERSION_NAME_RE = re.compile(r'(?P[A-Za-z]+)_VERSION_(?P[0-9]+)_(?P[0-9]+)') class ProseListFormats(Enum): """A connective, possibly with a quantifier.""" @@ -42,7 +52,7 @@ def from_string(cls, s): return cls.OR if s == 'and': return cls.AND - return None + raise RuntimeError("Unrecognized string connective: " + s) @property def connective(self): @@ -63,18 +73,37 @@ def quantifier(self, n): return '' -class ConventionsBase: +class ConventionsBase(abc.ABC): """WG-specific conventions.""" def __init__(self): self._command_prefix = None self._type_prefix = None + def formatVersionOrExtension(self, name): + """Mark up an API version or extension name as a link in the spec.""" + + # Is this a version name? + match = API_VERSION_NAME_RE.match(name) + if match is not None: + return self.formatVersion(name, + match.group('apivariant'), + match.group('major'), + match.group('minor')) + else: + # If not, assumed to be an extension name. Might be worth checking. + return self.formatExtension(name) + + def formatVersion(self, name, apivariant, major, minor): + """Mark up an API version name as a link in the spec.""" + return '`<<{}>>`'.format(name) + def formatExtension(self, name): - """Mark up an extension name as a link the spec.""" - return '`apiext:{}`'.format(name) + """Mark up an extension name as a link in the spec.""" + return '`<<{}>>`'.format(name) @property + @abc.abstractmethod def null(self): """Preferred spelling of NULL.""" raise NotImplementedError @@ -112,6 +141,38 @@ def external_macro(self): """ return 'code:' + @property + @abc.abstractmethod + def structtype_member_name(self): + """Return name of the structure type member. + + Must implement. + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def nextpointer_member_name(self): + """Return name of the structure pointer chain member. + + Must implement. + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def xml_api_name(self): + """Return the name used in the default API XML registry for the default API""" + raise NotImplementedError() + + @abc.abstractmethod + def generate_structure_type_from_name(self, structname): + """Generate a structure type name, like XR_TYPE_CREATE_INSTANCE_INFO. + + Must implement. + """ + raise NotImplementedError() + def makeStructName(self, name): """Prepend the appropriate format macro for a structure to a structure type name. @@ -139,9 +200,9 @@ def _implMakeProseList(self, elements, fmt, with_verb, comma_for_two_elts=False, Optionally adds a quantifier (like 'any') before a list of 2 or more, if specified by fmt. - Don't edit these defaults, override self.makeProseList(). + Do not edit these defaults, override self.makeProseList(). """ - assert(serial_comma) # didn't implement what we didn't need + assert(serial_comma) # did not implement what we did not need if isinstance(fmt, str): fmt = ProseListFormats.from_string(fmt) @@ -166,10 +227,12 @@ def _implMakeProseList(self, elements, fmt, with_verb, comma_for_two_elts=False, return ''.join(parts) @property + @abc.abstractmethod def file_suffix(self): """Return suffix of generated Asciidoctor files""" raise NotImplementedError + @abc.abstractmethod def api_name(self, spectype=None): """Return API or specification name for citations in ref pages. @@ -206,6 +269,7 @@ def type_prefix(self): return self._type_prefix @property + @abc.abstractmethod def api_prefix(self): """Return API token prefix. @@ -214,6 +278,56 @@ def api_prefix(self): Must implement.""" raise NotImplementedError + @property + def extension_name_prefix(self): + """Return extension name prefix. + + Typically two uppercase letters followed by an underscore. + + Assumed to be the same as api_prefix, but some APIs use different + case convntions.""" + + return self.api_prefix + + def extension_short_description(self, elem): + """Return a short description of an extension for use in refpages. + + elem is an ElementTree for the tag in the XML. + The default behavior is to use the 'type' field of this tag, but not + all APIs support this field.""" + + ext_type = elem.get('type') + + if ext_type is not None: + return f'{ext_type} extension' + else: + return '' + + @property + def write_contacts(self): + """Return whether contact list should be written to extension appendices""" + return False + + @property + def write_extension_type(self): + """Return whether extension type should be written to extension appendices""" + return True + + @property + def write_extension_number(self): + """Return whether extension number should be written to extension appendices""" + return True + + @property + def write_extension_revision(self): + """Return whether extension revision number should be written to extension appendices""" + return True + + @property + def write_refpage_include(self): + """Return whether refpage include should be written to extension appendices""" + return True + @property def api_version_prefix(self): """Return API core version token prefix. @@ -329,24 +443,43 @@ def generate_max_enum_in_docs(self): documentation includes.""" return False + @abc.abstractmethod + def extension_file_path(self, name): + """Return file path to an extension appendix relative to a directory + containing all such appendices. + - name - extension name + + Must implement.""" + raise NotImplementedError - def extension_include_string(self, ext): + def extension_include_string(self, name): """Return format string for include:: line for an extension appendix - file. ext is an object with the following members: - - name - extension string string - - vendor - vendor portion of name - - barename - remainder of name + file. + - name - extension name""" - Must implement.""" - raise NotImplementedError + return 'include::{{appendices}}/{}[]'.format( + self.extension_file_path(name)) @property - def refpage_generated_include_path(self): + def provisional_extension_warning(self): + """Return True if a warning should be included in extension + appendices for provisional extensions.""" + return True + + @property + def generated_include_path(self): """Return path relative to the generated reference pages, to the - generated API include files. + generated API include files.""" - Must implement.""" - raise NotImplementedError + return '{generated}' + + @property + def include_extension_appendix_in_refpage(self): + """Return True if generating extension refpages by embedding + extension appendix content (default), False otherwise + (OpenXR).""" + + return True def valid_flag_bit(self, bitpos): """Return True if bitpos is an allowed numeric bit position for @@ -356,3 +489,41 @@ def valid_flag_bit(self, bitpos): or 64 bits), and may depend on assumptions about compiler handling of sign bits in enumerated types, as well.""" return True + + @property + def duplicate_aliased_structs(self): + """ + Should aliased structs have the original struct definition listed in the + generated docs snippet? + """ + return False + + @property + def protectProtoComment(self): + """Return True if generated #endif should have a comment matching + the protection symbol used in the opening #ifdef/#ifndef.""" + return False + + @property + def extra_refpage_headers(self): + """Return any extra headers (preceding the title) for generated + reference pages.""" + return '' + + @property + def extra_refpage_body(self): + """Return any extra text (following the title) for generated + reference pages.""" + return '' + + def is_api_version_name(self, name): + """Return True if name is an API version name.""" + + return API_VERSION_NAME_RE.match(name) is not None + + @property + def docgen_language(self): + """Return the language to be used in docgenerator [source] + blocks.""" + + return 'c++' diff --git a/scripts/spec_tools/util.py b/scripts/spec_tools/util.py index 3dde0bd42..e67038a5a 100644 --- a/scripts/spec_tools/util.py +++ b/scripts/spec_tools/util.py @@ -1,18 +1,7 @@ """Utility functions not closely tied to other spec_tools types.""" # Copyright (c) 2018-2019 Collabora, Ltd. -# Copyright (c) 2013-2024 The Khronos Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2013-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 def getElemName(elem, default=None): diff --git a/xml/cl.xml b/xml/cl.xml index 44aac6105..fa6b29974 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -305,11 +305,11 @@ server's OpenCL/api-docs repository. cl_version version - charname[CL_NAME_VERSION_MAX_NAME_SIZE] + char name[CL_NAME_VERSION_MAX_NAME_SIZE] cl_version_khr version - charname[CL_NAME_VERSION_MAX_NAME_SIZE_KHR] + char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR] cl_uint pci_domain @@ -321,7 +321,7 @@ server's OpenCL/api-docs repository. cl_command_queue_properties properties cl_command_queue_capabilities_intel capabilities cl_uint count - charname[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL] + char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL] #define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) #define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) @@ -336,12 +336,12 @@ server's OpenCL/api-docs repository. (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ ((patch) & CL_VERSION_PATCH_MASK_KHR)) - cl_boolsigned_accelerated - cl_boolunsigned_accelerated - cl_boolmixed_signedness_accelerated - cl_boolaccumulating_saturating_signed_accelerated - cl_boolaccumulating_saturating_unsigned_accelerated - cl_boolaccumulating_saturating_mixed_signedness_accelerated + cl_bool signed_accelerated + cl_bool unsigned_accelerated + cl_bool mixed_signedness_accelerated + cl_bool accumulating_saturating_signed_accelerated + cl_bool accumulating_saturating_unsigned_accelerated + cl_bool accumulating_saturating_mixed_signedness_accelerated cl_uint arg_index @@ -2098,7 +2098,7 @@ server's OpenCL/api-docs repository. - + @@ -5359,7 +5359,7 @@ server's OpenCL/api-docs repository. - + @@ -5406,7 +5406,7 @@ server's OpenCL/api-docs repository. - + @@ -5453,7 +5453,7 @@ server's OpenCL/api-docs repository. - + @@ -5502,7 +5502,7 @@ server's OpenCL/api-docs repository. - + @@ -5530,7 +5530,7 @@ server's OpenCL/api-docs repository. - + @@ -5544,7 +5544,7 @@ server's OpenCL/api-docs repository. - + @@ -5552,7 +5552,7 @@ server's OpenCL/api-docs repository. - + @@ -5578,7 +5578,7 @@ server's OpenCL/api-docs repository. - + @@ -5629,7 +5629,7 @@ server's OpenCL/api-docs repository. - + @@ -5643,13 +5643,13 @@ server's OpenCL/api-docs repository. - + - + @@ -5664,7 +5664,7 @@ server's OpenCL/api-docs repository. - + @@ -5687,7 +5687,7 @@ server's OpenCL/api-docs repository. - + @@ -5698,7 +5698,7 @@ server's OpenCL/api-docs repository. - + @@ -5900,7 +5900,7 @@ server's OpenCL/api-docs repository. - + @@ -5930,7 +5930,7 @@ server's OpenCL/api-docs repository. - + @@ -5945,7 +5945,7 @@ server's OpenCL/api-docs repository. - + @@ -5961,7 +5961,7 @@ server's OpenCL/api-docs repository. - + @@ -5977,7 +5977,7 @@ server's OpenCL/api-docs repository. - + @@ -6413,7 +6413,7 @@ server's OpenCL/api-docs repository. - + @@ -6511,7 +6511,7 @@ server's OpenCL/api-docs repository. - + @@ -6522,7 +6522,7 @@ server's OpenCL/api-docs repository. - + @@ -6532,7 +6532,7 @@ server's OpenCL/api-docs repository. - + @@ -6666,7 +6666,7 @@ server's OpenCL/api-docs repository. - + @@ -6747,7 +6747,7 @@ server's OpenCL/api-docs repository. - + @@ -6755,7 +6755,7 @@ server's OpenCL/api-docs repository. - + @@ -6821,7 +6821,7 @@ server's OpenCL/api-docs repository. - + @@ -6892,7 +6892,7 @@ server's OpenCL/api-docs repository. - + @@ -6903,7 +6903,7 @@ server's OpenCL/api-docs repository. - + @@ -6932,7 +6932,7 @@ server's OpenCL/api-docs repository. - + @@ -6950,7 +6950,7 @@ server's OpenCL/api-docs repository. - + @@ -6997,7 +6997,7 @@ server's OpenCL/api-docs repository. - + @@ -7024,7 +7024,7 @@ server's OpenCL/api-docs repository. - + @@ -7032,7 +7032,7 @@ server's OpenCL/api-docs repository. - + @@ -7040,7 +7040,7 @@ server's OpenCL/api-docs repository. - + @@ -7054,7 +7054,7 @@ server's OpenCL/api-docs repository. - + @@ -7063,7 +7063,7 @@ server's OpenCL/api-docs repository. - + @@ -7090,7 +7090,7 @@ server's OpenCL/api-docs repository. - + @@ -7098,7 +7098,7 @@ server's OpenCL/api-docs repository. - + @@ -7109,7 +7109,7 @@ server's OpenCL/api-docs repository. - + @@ -7117,7 +7117,7 @@ server's OpenCL/api-docs repository. - + @@ -7161,7 +7161,7 @@ server's OpenCL/api-docs repository. - + @@ -7228,9 +7228,11 @@ server's OpenCL/api-docs repository. + + + - @@ -7306,7 +7308,7 @@ server's OpenCL/api-docs repository. - + @@ -7380,7 +7382,7 @@ server's OpenCL/api-docs repository. - + @@ -7415,6 +7417,31 @@ server's OpenCL/api-docs repository. + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xml/registry.rnc b/xml/registry.rnc index f29ba8d5b..210073c24 100644 --- a/xml/registry.rnc +++ b/xml/registry.rnc @@ -1,20 +1,7 @@ -# Copyright (c) 2013-2024 The Khronos Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2013-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 -# Relax NG schema for Khronos Vulkan API Registry XML -# -# See https://www.khronos.org/vulkan/ +# Relax NG schema for Khronos API Registry XML # # This definition is subject to change (mostly in the form of additions) @@ -25,13 +12,13 @@ namespace xsd = "http://www.w3.org/2001/XMLSchema-datatypes" start = element registry { ( element comment { text } ? | - Platforms * | - Tags * | - Types * | - Enums * | - Commands * | - Feature * | - Extensions * + Platforms * | + Tags * | + Types * | + Enums * | + Commands * | + Feature * | + Extensions * ) * } @@ -80,15 +67,19 @@ Types = element types { # may contain arbitrary C code. # name - name of this type, if not present in the tag # api - matches a api attribute, if present +# alias - name of a type this type aliases # requires - name of another type definition required by this one +# bitvalues - for a *Flags type, name of an enum definition that +# defines the valid values for parameters of that type # category - if present, 'enum' indicates a matching # block to generate an enumerated type for, and 'struct' # causes special interpretation of the contents of the type # tag including ... TBD ... # Other allowed values are 'include', 'define', 'handle' and 'bitmask', -# which don't change syntactic interpretation but allow organization in -# the generated header. -# comment - unused +# which do not change syntactic interpretation but allow organization +# in the generated header. +# deprecated - denotes that this type is deprecated, and why. +# Valid values: 'aliased', 'true'. # parent - only applicable if category is 'handle'. Notes another type with # the 'handle' category that acts as a parent object for this type. # returnedonly - only applicable if category is 'struct'. Notes that this @@ -100,6 +91,11 @@ Types = element types { # When present it suppresses generation of automatic validity for the # pNext member of that structure, and instead the structure is added # to pNext chain validity for the parent structures it extends. +# allowduplicate - only applicable if category is 'struct'. pNext can include +# multiple structures of this type. +# objtypeenum - name of VK_OBJECT_TYPE_* API enumerant which corresponds +# to this type. Currently only specified for category="handle" types. +# comment - descriptive text with no semantic meaning # For types without a category, contents include # - substitutes for an APIENTRY-style macro on output # - contains name of the type being defined @@ -109,21 +105,26 @@ Types = element types { # For types with category 'struct', contents should be one or more # - like for a struct or union member # len - if the member is an array, len may be one or more of the following -# things, separated by commas (one for each array indirection): another -# member of that struct, 'null-terminated' for a string, '1' to indicate it's -# just a pointer (used for nested pointers), or a latex equation (prefixed with -# 'latexmath:') +# things, separated by commas (one for each array indirection): +# another member of that struct, 'null-terminated' for a string, +# '1' to indicate it is just a pointer (used for nested pointers), +# or a latex equation (prefixed with 'latexmath:') # altlen - if len has latexmath equations, this contains equivalent C99 # expressions separated by commas. +# deprecated - denotes that this member is deprecated, and why. +# Valid values: 'ignored', 'true'. # externsync - denotes that the member should be externally synchronized # when accessed by Vulkan # optional - whether this value can be omitted by providing NULL (for # pointers), VK_NULL_HANDLE (for handles) or 0 (for bitmasks/values) +# selector - for a union member, identifies a separate enum member that +# selects which of the union's members are valid +# selection - for a member of a union, identifies an enum value indicating the member is valid # noautovalidity - tag stating that no automatic validity language should be generated # values - comma-separated list of legal values, usually used only for sType enums # - containing arbitrary text (unused) # -# *** There's a problem here: I'm not sure how to represent the +# *** There is a problem here: I am not sure how to represent the # syntax where it may contain arbitrarily interleaved text, , and # child tags. This allows only the syntax # text name text name text @@ -133,11 +134,15 @@ Type = element type { attribute api { text } ? , attribute alias { text } ? , attribute requires { text } ? , + attribute bitvalues { text } ? , attribute name { TypeName } ? , attribute category { text } ? , + attribute deprecated { text } ? , attribute parent { TypeName } ? , attribute returnedonly { text } ? , attribute structextends { text } ? , + attribute allowduplicate { text } ? , + attribute objtypeenum { text } ? , Comment ? , ( ( @@ -155,18 +160,22 @@ Type = element type { ) | ( element member { + attribute api { text } ? , attribute len { text } ? , attribute altlen { text } ? , attribute externsync { text } ? , attribute optional { text } ? , + attribute selector { text } ? , + attribute selection { EnumName } ? , attribute noautovalidity { text } ? , attribute values { text } ? , + attribute deprecated { text } ? , mixed { element type { TypeName } ? , element name { text } ? , element enum { EnumName } ? , element comment { text } ? - } + } + } | element comment { text } ) * @@ -179,10 +188,12 @@ Type = element type { # start, end - beginning and end of a numeric range # vendor - owner of the numeric range # type - 'enum' or 'bitmask', if present +# bitwidth - bit width of the enum value type. # comment - unused Enums = element enums { attribute name { text } ? , attribute type { text } ? , + attribute bitwidth { Integer } ? , attribute start { Integer } ? , attribute end { Integer } ? , Vendor ? , @@ -216,10 +227,16 @@ Enums = element enums { # # Other attributes: # api - matches a api attribute, if present -# type - 'u' (unsigned), 'ull' (uint64), or integer if not present +# type - 'uint32_t', 'uint64_t', or 'float', if present. There are +# certain conditions under which the tag must be present, or absent, +# but they are context-dependent and difficult to express in the +# RNC syntax. # name - enumerant name # alias - another enumerant this is semantically identical to -# comment - unused +# protect - additional #ifdef symbol to place around the enum +# comment - descriptive text with no semantic meaning +# deprecated - denotes that this enum is deprecated, and why. +# Valid values: 'aliased', 'ignored', 'true'. Enum = element enum { ( ( @@ -242,9 +259,11 @@ Enum = element enum { attribute alias { TypeName } ) ) ? & + attribute protect { text } ? & attribute api { text } ? & attribute type { TypeSuffix } ? & attribute name { text } & + attribute deprecated { text } ? & Comment ? ) } @@ -252,7 +271,7 @@ Enum = element enum { # defines a range of enumerants not currently being used # start, end - beginning and end of an unused numeric range # vendor - unused -# comment - unused +# comment - descriptive text with no semantic meaning Unused = element unused { attribute start { Integer } , attribute end { Integer } ? , @@ -270,28 +289,46 @@ Commands = element commands { # # There are two forms of the tag. # -# The first only has 'name' and 'alias' attributes, and no contents. +# Either form may have an 'api' attribute +# api - matches a api attribute, if present +# +# The first form only has 'name' and 'alias' attributes, and no contents. # It defines a command alias. # -# The second fully defines a command, and has the following structure: +# The second form fully defines a command, and has the following structure: # The possible attributes are not described in this comment block yet, but -# are in readme.pdf. The "prefix" and "suffix" attributes are currently +# are in registry.html. The "prefix" and "suffix" attributes are currently # present only in the OpenCL XML registry, where they are currently unused. # # is the C function prototype, including the return type # are function parameters, in order # len - if the member is an array, len may be one or more of the following -# things, separated by commas (one for each array indirection): another -# member of that struct, 'null-terminated' for a string, '1' to indicate it's -# just a pointer (used for nested pointers), or a latex equation (prefixed with -# 'latexmath:') +# things, separated by commas (one for each array indirection): +# another member of that struct, 'null-terminated' for a string, +# '1' to indicate it is just a pointer (used for nested pointers), +# or a latex equation (prefixed with 'latexmath:') # altlen - if len has latexmath equations, this contains equivalent C99 # expressions separated by commas. # externsync - denotes that the member should be externally synchronized # when accessed by Vulkan # optional - whether this value can be omitted by providing NULL (for # pointers), VK_NULL_HANDLE (for handles) or 0 (for bitmasks/values) -# noautovalidity - tag stating that no automatic validity language should be generated +# selector - for a union parameter, identifies a separate enum parameter that +# selects which of the union's members are valid +# noautovalidity - tag stating that no automatic validity language should be +# generated +# objecttype - only applicable for parameters representing a handle as +# a uint64_t value. Specifies the name of another parameter which is +# a VkObjectType or VkDebugReportObjectTypeEXT value specifying +# the type of object the handle references. +# validstructs - only applicable for parameters which are pointers to +# VkBaseInStructure or VkBaseOutStructure types, used as abstract +# placeholders. Specifies a comma-separated list of structures which +# may be passed in place of the parameter, or anywhere in the pNext +# chain of the parameter. +# stride - if the member is an array, stride specifies the name of +# another member containing the byte stride between consecutive +# elements in the array. Is assumed tightly packed if omitted. # is a name, if present # is the function / parameter name, if present (normally should # be, except for void parameters). @@ -305,16 +342,20 @@ Commands = element commands { # are related to them and also require external synchronization. Command = element command { ( attribute name { text } , - attribute alias { text } ) | + attribute alias { text } , + attribute api { text } ? + ) | ( + attribute tasks { text } ? , attribute queues { text } ? , attribute successcodes { text } ? , attribute errorcodes { text } ? , attribute renderpass { text } ? , + attribute videocoding { text } ? , attribute cmdbufferlevel { text } ? , - attribute pipeline { text } ? , attribute prefix { text } ? , attribute suffix { text } ? , + attribute api { text } ? , Comment ? , element proto { mixed { @@ -323,11 +364,16 @@ Command = element command { } } , element param { + attribute api { text } ? , attribute len { text } ? , attribute altlen { text } ? , attribute externsync { text } ? , attribute optional { text } ? , + attribute selector { text } ? , attribute noautovalidity { text } ? , + attribute objecttype { text } ? , + attribute validstructs { text } ? , + attribute stride { text } ? , mixed { element type { TypeName } ? , element name { text } ? @@ -357,7 +403,7 @@ Command = element command { # / contains features to require or remove in # this version # profile - only require/remove when generated profile matches -# comment - unused +# comment - descriptive text with no semantic meaning Feature = element feature { attribute api { text } , Name , @@ -368,7 +414,7 @@ Feature = element feature { ( element require { ProfileName ? , - ExtensionName ? , + Depends ? , Comment ? , ( InterfaceElement | @@ -391,14 +437,15 @@ Extensions = element extensions { Extension * } -# Defines the interface of an API . Like a -# tag, but with slightly different attributes: +# Each defines the interface of an API . +# Like a tag, but with slightly different attributes: # api - regexp pattern matching one or more API tags, indicating # which APIs the extension is known to work with. The only # syntax supported is {|}* and each name must # exactly match an API being generated (implicit ^$ surrounding). # name - extension name string # number - extension number (positive integer, should be unique) +# revision - extension spec revision (text, usually numeric major.minor.patch) # sortorder - order relative to other extensions, default 0 # protect - C preprocessor symbol to conditionally define the interface # platform - should be one of the platform names defined in the @@ -406,50 +453,56 @@ Extensions = element extensions { # author - name of the author (usually a company or project name) # contact - contact responsible for the tag (name and contact information) # type - 'device' or 'instance', if present -# requires - commas-separated list of extension names required by this -# extension -# requiresCore - core version of Vulkan required by the extension, e.g. -# "1.1". Defaults to "1.0". -# supported - profile name(s) supporting this extension, e.g. 'vulkan' -# or 'disabled' to never generate output. -# promotedto - Vulkan version or a name of an extension that this -# extension was promoted to; e.g. 'VK_VERSION_1_1', or -# 'VK_KHR_draw_indirect_county' -# deprecatedby - Vulkan version or a name of an extension that deprecates -# this extension. It may be empty string. -# e.g. 'VK_VERSION_1_1', or 'VK_EXT_debug_utils', or '' -# obsoletedby - Vulkan version or a name of an extension that obsoletes -# this extension. It may be empty string. -# e.g. 'VK_VERSION_1_1', or 'VK_EXT_debug_utils', or '' +# condition - C preprocessor expression (**TBD**) +# depends - boolean expression of API and/or extension names +# upon which this extension depends. +# supported - comma-separated list of API name(s) supporting this extension, +# e.g. 'opencl', or 'disabled' to never generate output. +# ratified - comma-separated list of API name(s) for which this extension +# has been ratified by Khronos. Defaults to "" if not specified. +# promotedto - API version or name of an extension that this +# extension was promoted to; e.g. 'CL_VERSION_1_1', or +# 'cl_khr_semaphore' +# deprecatedby - API version or name of an extension that deprecates +# this extension. It may be an empty string. +# e.g. 'CL_VERSION_1_1', or 'cl_khr_semaphore', or '' +# obsoletedby - API version or a name of an extension that obsoletes +# this extension. It may be an empty string. +# e.g. 'CL_VERSION_1_1', or 'cl_khr_semaphore', or '' # provisional - 'true' if this extension is released provisionally +# specialuse - contains one or more tokens separated by commas, indicating +# a special purpose of the extension. Tokens may include 'cadsupport', +# 'd3demulation', 'devtools', 'debugging', and 'glemulation'. Others +# may be added in the future. # In addition, / tags also support an api attribute: # api - only require/remove these features for the matching API. # Not a regular expression. Extension = element extension { Name , attribute number { Integer } ? , + attribute revision { text } ? , attribute sortorder { xsd:integer } ?, attribute protect { text } ? , attribute platform { text } ? , attribute author { text } ? , attribute contact { text } ? , attribute type { text } ? , - attribute requires { text } ? , - attribute requiresCore { text } ? , attribute condition { text } ? , + attribute depends { text } ?, attribute supported { StringGroup } ? , + attribute ratified { text } ? , attribute promotedto { text } ? , attribute deprecatedby { text } ? , attribute obsoletedby { text } ? , attribute provisional { text } ? , + attribute specialuse { text } ? , Comment ? , ( element require { attribute api { text } ? , attribute condition { text } ? , ProfileName ? , - ExtensionName ? , - FeatureName ? , + Depends ? , Comment ? , ( InterfaceElement | @@ -485,7 +538,7 @@ InterfaceElement = } # Integers are allowed to be either decimal or C-hex (0x[0-9A-F]+), but -# XML Schema types don't seem to support hex notation, so we use this +# XML Schema types do not seem to support hex notation, so we use this # as a placeholder. Integer = text @@ -505,7 +558,8 @@ StringGroup = text # Repeatedly used attributes ProfileName = attribute profile { text } ExtensionName = attribute extension { text } -FeatureName = attribute feature { text } +# Boolean expression of core version and extension names using (),+ operators +Depends = attribute depends { text } Vendor = attribute vendor { text } Comment = attribute comment { text } Name = attribute name { text } From 550195082c53b974be3e3fe5cc20bab28eefcd79 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 26 Mar 2024 09:06:56 -0700 Subject: [PATCH 072/190] Switch to GitHub Actions for CI (#1085) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * WIP First stab at Github Actions * sudo * remove travis config * change travis builds to only build on tags * sync GitHub actions with travis script * configure dependabot github actions version updates * update gem versions * add libwebp-dev dependency for asciidoctor-mathematical * minor updates * update for the new spec toolchain --------- Co-authored-by: Kévin Petit --- .github/dependabot.yml | 10 +++++++ .github/workflows/presubmit.yml | 49 +++++++++++++++++++++++++++++++++ .travis.yml | 4 +++ 3 files changed, 63 insertions(+) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/presubmit.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..2390d8c80 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml new file mode 100644 index 000000000..605571254 --- /dev/null +++ b/.github/workflows/presubmit.yml @@ -0,0 +1,49 @@ +name: Presubmit + +permissions: + contents: read + +on: [push, pull_request] + +jobs: + build: + name: Build all specs + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Install required packages + run: | + sudo apt-get install -y libpango1.0-dev libwebp-dev ghostscript fonts-lyx jing libavalon-framework-java libbatik-java python3-pyparsing + sudo gem install asciidoctor -v 2.0.16 + sudo gem install coderay -v 1.1.1 + sudo gem install rouge -v 3.19.0 + sudo gem install ttfunk -v 1.7.0 + sudo gem install hexapdf -v 0.27.0 + sudo gem install asciidoctor-pdf -v 2.3.4 + sudo gem install asciidoctor-mathematical -v 0.3.5 + sudo pip install pyparsing + + - name: List git tag + run: | + git describe --tags --dirty + + - name: Generate core specs (HTML and PDF) + run: | + python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 api c env ext cxx4opencl + + - name: Generate core + extension specs (HTML) + run: | + python3 makeSpec -clean -spec khr OUTDIR=out.khr -j 12 html + + - name: Generate reference pages + run: | + python3 makeSpec -spec khr OUTDIR=out.refpages -j 12 manhtmlpages + + - name: Validate XML + run: | + make -C xml validate diff --git a/.travis.yml b/.travis.yml index f8e77eddd..bc8f9117f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,9 @@ dist: focal +# Only build (and deploy) on travis for tags. +# Use GitHub actions for other CI. +if: tag IS present + language: ruby git: From 4efa7517ecd57697e8977774d2d6b04011880f98 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 26 Mar 2024 16:12:42 +0000 Subject: [PATCH 073/190] Reference layered extensions in command-buffer intro (#1090) The "Interaction With Other Extensions" introductory section of `cl_khr_command_buffer` references future layered extensions as part of the design rationale. Two of these layered extensions currently exist, so we can update this section to reference them directly as opposed to the existing speculative language. --- api/cl_khr_command_buffer.asciidoc | 39 ++++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index 6099d7729..d866b92a0 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -123,9 +123,12 @@ the capability is optional to enable optimizations on command-buffer recording. The introduction of the command-buffer abstraction enables functionality beyond what the `cl_khr_command_buffer` extension currently provides, i.e. the recording of immutable commands to a single queue which can then be -executed without commands synchronizing outside the command-buffer. It is -intended that extra functionality expanding on this will be provided as layered -extensions on top of `cl_khr_command_buffer`. +executed without commands synchronizing outside the command-buffer. Extra +functionality expanding on this is provided as layered extensions on top of +`cl_khr_command_buffer`. The layered extensions that currently exist are: + +* `<>` +* `<>` Having `cl_khr_command_buffer` as a minimal base specification means that the API defines mechanisms for functionality that is not enabled by this extension, @@ -138,29 +141,33 @@ support their intended use cases. The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined -in `cl_khr_command_buffer`, but the parameter is intended to enable future -functionality that would change the characteristics of the kernel command. +in `cl_khr_command_buffer`, but the parameter enables layered extensions like +`<>` to define properties that inform +the characteristics of the kernel command. ==== Command Handles All command recording entry-points define a {cl_mutable_command_khr_TYPE} output parameter which provides a handle to the specific command being recorded. Use of these output handles is not enabled by the `cl_khr_command_buffer` extension, -but the handles will allow individual commands in a command-buffer to be -referenced by the user. In particular, the capability for an application to use -these handles to modify commands between enqueues of a command-buffer is -envisaged. +but the handles allow individual commands in a command-buffer to be +referenced by the user. + +Use of these handles is enabled in `<>` +to give the capability for an application to use the handles to modify commands +between enqueues of a command-buffer. ==== List of Queues Only a single command-queue can be associated with a command-buffer in the -`cl_khr_command_buffer` extension, but the API is designed with the intention -that a future extension will allow commands to be recorded across multiple -queues in the same command-buffer, providing replay of heterogeneous task -graphs. - -Using multiple queue functionality will result in an error without any layered -extensions to relax usage of the following API features: +`cl_khr_command_buffer` extension, but the API is designed so that the layered +`<>` extension can relax this constraint +to allow commands to be recorded across multiple queues in the same +command-buffer, providing replay of heterogeneous task graphs. + +Using multiple queue functionality will result in an error without +`<>` to relax usage of the following API +features: * When a command-buffer is created the API enables passing a list of queues that the command-buffer will record commands to. Only a single queue is From f01c7d686905e2084a87eb713aaddbc6c2e82d2a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 26 Mar 2024 09:14:03 -0700 Subject: [PATCH 074/190] fixes a few incorrect asciidoctor attributes (#1091) --- OpenCL_API.txt | 3 +++ api/cl_khr_command_buffer.asciidoc | 16 ++++++++-------- api/cl_khr_device_uuid.asciidoc | 2 +- api/cl_khr_egl_event.asciidoc | 2 +- api/cl_khr_gl_sharing.asciidoc | 4 ++-- api/cl_khr_semaphore.asciidoc | 2 +- api/cl_khr_subgroups.asciidoc | 2 +- api/opencl_architecture.asciidoc | 8 ++++---- api/opencl_runtime_layer.asciidoc | 28 +++++++++++++--------------- 9 files changed, 34 insertions(+), 33 deletions(-) diff --git a/OpenCL_API.txt b/OpenCL_API.txt index 2be31d8e1..2be2268ca 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -39,6 +39,9 @@ include::config/version-local-links.asciidoc[] // Formatting and links for API functions and enums. include::api/dictionary.asciidoc[] +// Feature Dictionary - used by some extensions. +include::c/feature-dictionary.asciidoc[] + // External Footnotes include::api/footnotes.asciidoc[] diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index d866b92a0..a4ade6c6e 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -207,8 +207,8 @@ features: === New Structures - * {cl_command_buffer_khr} - * {cl_mutable_command_khr} + * {cl_command_buffer_khr_TYPE} + * {cl_mutable_command_khr_TYPE} === New Types @@ -227,19 +227,15 @@ features: * {cl_device_info_TYPE} ** {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} ** {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} - * {cl_device_command_buffer_capabilities_khr - bitfield_TYPE} + * {cl_device_command_buffer_capabilities_khr_TYPE} ** {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} ** {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} ** {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR} ** {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} * {cl_command_buffer_properties_khr_TYPE} ** {CL_COMMAND_BUFFER_FLAGS_KHR} - * {cl_command_buffer_flags_khr - bitfield_TYPE} + * {cl_command_buffer_flags_khr_TYPE} ** {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} - * {Error codes_TYPE} - ** {CL_INVALID_COMMAND_BUFFER_KHR} - ** {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} - ** {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} * {cl_command_buffer_info_khr_TYPE} ** {CL_COMMAND_BUFFER_QUEUES_KHR} ** {CL_COMMAND_BUFFER_NUM_QUEUES_KHR} @@ -253,6 +249,10 @@ features: ** {CL_COMMAND_BUFFER_STATE_PENDING_KHR} * {cl_command_type_TYPE} ** {CL_COMMAND_COMMAND_BUFFER_KHR} + * New Error Codes + ** {CL_INVALID_COMMAND_BUFFER_KHR} + ** {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} + ** {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} === Sample Code diff --git a/api/cl_khr_device_uuid.asciidoc b/api/cl_khr_device_uuid.asciidoc index 023b34dfd..87f803daf 100644 --- a/api/cl_khr_device_uuid.asciidoc +++ b/api/cl_khr_device_uuid.asciidoc @@ -23,7 +23,7 @@ across processes or APIs. Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - * {cl_device_info} + * {cl_device_info_TYPE} ** {CL_DEVICE_UUID_KHR} ** {CL_DRIVER_UUID_KHR} ** {CL_DEVICE_LUID_VALID_KHR} diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc index 86b5fbb32..f9663530f 100644 --- a/api/cl_khr_egl_event.asciidoc +++ b/api/cl_khr_egl_event.asciidoc @@ -54,7 +54,7 @@ context, and to reach into each such context. . Should we restrict which CL APIs can be used with this cl_event? + -- -*RESOLVED* Use is limited to {clEnqueueAcquire}*** calls only. +*RESOLVED* Use is limited to calls to acquire and release memory objects only. -- . What is the desired behaviour for this extension when EGLSyncKHR is of a diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc index b6c55df65..7d1bcdaa8 100644 --- a/api/cl_khr_gl_sharing.asciidoc +++ b/api/cl_khr_gl_sharing.asciidoc @@ -23,7 +23,7 @@ object, using additional attributes described for {clCreateContext}. An OpenCL image object may be created from an OpenGL texture or renderbuffer object as described for {clCreateFromGLTexture} and -{clCreateFromGLRenderuffer}, respectively. +{clCreateFromGLRenderbuffer}, respectively. An OpenCL buffer object may be created from an OpenGL buffer object using {clCreateFromGLBuffer}. @@ -59,7 +59,7 @@ and buffer object images with OpenCL is required by this extension. * {cl_gl_context_info_TYPE} * {cl_gl_object_type_TYPE} * {cl_gl_texture_info_TYPE} - * {cl_gl_platform_info} + * {cl_gl_platform_info_TYPE} === New Tokens diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 43c2b1ae9..25c8cdd48 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -93,7 +93,7 @@ In particular, this extension defines: * New return values from {clGetEventInfo} ** {CL_COMMAND_SEMAPHORE_WAIT_KHR} ** {CL_COMMAND_SEMAPHORE_SIGNAL_KHR} - * New error codes + * New Error Codes ** {CL_INVALID_SEMAPHORE_KHR} diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc index 4acff29aa..bc5e2df20 100644 --- a/api/cl_khr_subgroups.asciidoc +++ b/api/cl_khr_subgroups.asciidoc @@ -35,7 +35,7 @@ OpenCL C specification for more information. === New Types - * {cl_kernel_sub_group_info} + * {cl_kernel_sub_group_info_TYPE} === New Commands diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 3ad82c904..a342edbf3 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2232,7 +2232,7 @@ working with version numbers easier. {CL_VERSION_PATCH_BITS_anchor} are the number of bits in the corresponding field. * `CL_VERSION_MAJOR_MASK`, `CL_VERSION_MINOR_MASK`, and - {CL_VERSION_PATCH_MASK` are bitmasks used to extract the + `CL_VERSION_PATCH_MASK` are bitmasks used to extract the corresponding packed fields from the version number. [source,opencl] @@ -2274,7 +2274,7 @@ corresponding entity (e.g. extension or built-in kernel) name: include::{generated}/api/structs/cl_name_version.txt[] * _version_ is a <>. - * _name_ is an array of {CL_NAME_VERSION_MAX_NAME_SIZE_anchor} `char` - containing a null-terminated string whose maximum length is therefore - {CL_NAME_VERSION_MAX_NAME_SIZE} - 1`. + * _name_ is an array of {CL_NAME_VERSION_MAX_NAME_SIZE_anchor} characters + containing a null-terminated string, whose maximum length is therefore + {CL_NAME_VERSION_MAX_NAME_SIZE} minus one. -- diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 298620641..bb1146285 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -4727,19 +4727,19 @@ ifdef::cl_khr_egl_image[] [open,refpage='clCreateFromEGLImageKHR',desc='Create cl_mem target from EGLImage source',type='protos'] -- -To create an `EGLImage` target of type {cl_mem} from the `EGLImage` source +To create an `EGLImage` target of type {cl_mem_TYPE} from the `EGLImage` source provided as _image_, call the function include::{generated}/api/protos/clCreateFromEGLImageKHR.txt[] include::{generated}/api/version-notes/clCreateFromEGLImageKHR.asciidoc[] * _display_ should be of type `EGLDisplay`, cast into the type - {CLeglDisplayKHR}. + {CLeglDisplayKHR_TYPE}. * _image_ should be of type `EGLImageKHR`, cast into the type {CLeglImageKHR_TYPE}. Assuming no errors are generated in this function, the resulting image object will be an `EGLImage` target of the specified `EGLImage` _image_. - The resulting {cl_mem} is an image object which may be used normally by + The resulting {cl_mem_TYPE} is an image object which may be used normally by all OpenCL operations. This maps to an `image2d_t` type in OpenCL kernel code. * _flags_ is a bit-field that is used to specify usage information about @@ -4786,7 +4786,7 @@ returned in _errcode_ret_: above. * {CL_INVALID_EGL_OBJECT_KHR} if _image_ is not a valid `EGLImage` object. * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if the OpenCL implementation is not able - to create a {cl_mem} compatible with the provided {CLeglImageKHR_TYPE} + to create a {cl_mem_TYPE} compatible with the provided {CLeglImageKHR_TYPE} for an implementation-dependent reason (this could be caused by, but not limited to, reasons such as unsupported texture formats, etc). * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -6264,7 +6264,7 @@ include::{generated}/api/version-notes/clGetGLTextureInfo.asciidoc[] .OpenGL texture info that may be queried with {clGetGLTextureInfo} [cols=",,",options="header",] |==== -| {cl_gl_texture_info} | Return Type | Info. Returned in _param_value_ +| {cl_gl_texture_info_TYPE} | Return Type | Info. Returned in _param_value_ | {CL_GL_TEXTURE_TARGET_anchor} include::{generated}/api/version-notes/CL_GL_TEXTURE_TARGET.asciidoc[] @@ -7153,10 +7153,9 @@ memory objects through the other API with which such objects are shared. Failure to provide such synchronization may result in race conditions and other undefined behavior including non-portability between implementations. -Prior to acquiring objects shared with the other API via an appropriate -{clEnqueueAcquire}*** call, the application must ensure that any pending -operations in that API which accesses the objects specified in _mem_objects_ -have completed. +Prior to acquiring objects shared with the other API, the application must +ensure that any pending operations in that API which accesses the objects +specified in _mem_objects_ have completed. Depending on the application and the implementation, there are two extensions which may be used to synchronize with other APIs: @@ -7216,10 +7215,9 @@ are supported on a platform. ===== Synchronizing OpenCL Operations With Other APIs -After releasing a shared memory object via an appropriate -{clEnqueueRelease}*** call, the application is responsible for ensuring that -any pending OpenCL operations which access the objects specified in -_mem_objects_ have completed prior to executing subsequent commands in the +After releasing a shared memory object, the application is responsible for +ensuring that any pending OpenCL operations which access the objects specified +in _mem_objects_ have completed prior to executing subsequent commands in the other API which reference these objects. This may be accomplished portably by calling {clWaitForEvents} with the @@ -12348,7 +12346,7 @@ When the event object is deleted, the reference will be removed from the EGL sync object. Events returned from {clCreateEventFromEGLSyncKHR} may only be consumed by -{clEnqueueAcquire}*** commands. +commands to acquire and release memory objects. Passing such events to any other CL API that enqueues commands will generate a {CL_INVALID_EVENT} error. @@ -12452,7 +12450,7 @@ sync object. Events returned from {clCreateEventFromGLsyncKHR} can be used in the _event_wait_list_ argument to {clEnqueueAcquireGLObjects} and CL APIs that -take a {cl_event} as an argument but do not enqueue commands. +take a {cl_event_TYPE} as an argument but do not enqueue commands. Passing such events to any other CL API that enqueues commands will generate a {CL_INVALID_EVENT} error. -- From 19b3aae642429caaf931e300f4718dd467b46067 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 26 Mar 2024 09:19:17 -0700 Subject: [PATCH 075/190] strengthen requirements for CL_DEVICE_TYPE query (#1069) * strengthen requirements for CL_DEVICE_TYPE query * require that an OpenCL device only reports a single device type * clarify device types that may be used with clCreateContextFromType --- api/opencl_platform_layer.asciidoc | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index b27079192..c09ff4304 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -383,12 +383,18 @@ include::{generated}/api/version-notes/CL_DEVICE_TYPE_ACCELERATOR.asciidoc[] include::{generated}/api/version-notes/CL_DEVICE_TYPE_CUSTOM.asciidoc[] | Specialized devices that implement some of the OpenCL runtime APIs but - do not support all required OpenCL functionality. + do not support all of the required OpenCL functionality. | {CL_DEVICE_TYPE_DEFAULT_anchor} include::{generated}/api/version-notes/CL_DEVICE_TYPE_DEFAULT.asciidoc[] | The default OpenCL device in the platform. + One device in the platform must be returned as the {CL_DEVICE_TYPE_DEFAULT} + device when passed as the _device_type_ to {clGetDeviceIDs}. + {CL_DEVICE_TYPE_DEFAULT} is only used to query OpenCL devices using + {clGetDeviceIDs} or to create OpenCL contexts using + {clCreateContextFromType}, and will never be returned in {CL_DEVICE_TYPE} + for any OpenCL device. The default OpenCL device must not be a {CL_DEVICE_TYPE_CUSTOM} device. | {CL_DEVICE_TYPE_ALL_anchor} @@ -396,22 +402,13 @@ include::{generated}/api/version-notes/CL_DEVICE_TYPE_DEFAULT.asciidoc[] include::{generated}/api/version-notes/CL_DEVICE_TYPE_ALL.asciidoc[] | All OpenCL devices available in the platform, except for {CL_DEVICE_TYPE_CUSTOM} devices. + {CL_DEVICE_TYPE_ALL} is only used to query OpenCL devices using + {clGetDeviceIDs} or to create OpenCL contexts using + {clCreateContextFromType}, and will never be returned in {CL_DEVICE_TYPE} + for any OpenCL device. |==== -The device type is purely informational and has no semantic meaning. - -Some devices may be more than one type. -For example, a {CL_DEVICE_TYPE_CPU} device may also be a -{CL_DEVICE_TYPE_GPU} device, or a {CL_DEVICE_TYPE_ACCELERATOR} device -may also be some other, more descriptive device type. -{CL_DEVICE_TYPE_CUSTOM} devices must not be combined with any other -device types. - -One device in the platform should be a {CL_DEVICE_TYPE_DEFAULT} device. -The default device should also be a more specific device type, such -as {CL_DEVICE_TYPE_CPU} or {CL_DEVICE_TYPE_GPU}. - // refError {clGetDeviceIDs} returns {CL_SUCCESS} if the function is executed @@ -483,10 +480,13 @@ device except for the following queries: include::{generated}/api/version-notes/CL_DEVICE_TYPE.asciidoc[] | {cl_device_type_TYPE} - | The type or types of the OpenCL device. + | The type of the OpenCL device. + The device type is purely informational and has no semantic meaning. + The device must report a single device type, which must not be + {CL_DEVICE_TYPE_DEFAULT} or {CL_DEVICE_TYPE_ALL}. Please see the <> table - for supported device types and device type combinations. + for supported device types and device type descriptions. | {CL_DEVICE_VENDOR_ID_anchor} footnote:[{fn-vendor-id}] From 02e7f0d9d4d5836cc6200bb8860071007440e245 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 26 Mar 2024 09:20:02 -0700 Subject: [PATCH 076/190] fix a markup issue regarding cl_khr_subgroup_extended_types (#1104) --- OpenCL_C.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 5e7776c9b..eab333330 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -13888,8 +13888,8 @@ footnote:[{fn-half-supported}], `float`, and `double` footnote:[{fn-double-supported}]. ifdef::cl_khr_subgroup_extended_types[] -NOTE: If the `<>` extension is supported, -the supported `gentype`s also include `char`, `uchar`, `short`, and +NOTE: If the `<>` extension is supported, the +generic type name `gentype` may additionally be `char`, `uchar`, `short`, and `ushort`. For the `sub_group_broadcast` function, `gentype` may additionally be one of the supported built-in vector data types `char__n__`, `uchar__n__`, From 1f4fa641afc349445b00fefdee8f6a245ac4bb6e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 26 Mar 2024 09:20:11 -0700 Subject: [PATCH 077/190] fix footnotes (#1097) Moves text for CL_KERNEL_ARG_TYPE_QUALIFIER out of a footnote and into the main spec text. Removes link to the cl_khr_fp16 extension for the footnote. This is not ideal, but is a reasonable short-term solution. --- api/footnotes.asciidoc | 15 +-------------- api/opencl_runtime_layer.asciidoc | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index e21cb8719..26e1454f3 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -68,19 +68,6 @@ This value for memory_scope can only be used with atomic_work_item_fence with fl Note that the performance of 64-bit integer arithmetic can vary significantly between embedded devices. \ ] -:fn-kernel-arg-type-const-addr-space: pass:n[ \ -{CL_KERNEL_ARG_TYPE_CONST} is returned for {CL_KERNEL_ARG_TYPE_QUALIFIER} if the argument is declared with the `constant` address space qualifier. \ -] - -:fn-kernel-arg-type-qualifier: pass:n[ \ -{CL_KERNEL_ARG_TYPE_CONST} is returned if the argument is a pointer and the referenced type is declared with the const qualifier. \ -For example, a kernel argument declared as `global int const *x` returns {CL_KERNEL_ARG_TYPE_CONST} but a kernel argument declared as `global int * const x` does not. + \ -Similarly, {CL_KERNEL_ARG_TYPE_RESTRICT} will be returned if the pointer type is marked `restrict`. \ -For example, `global int * restrict x` returns {CL_KERNEL_ARG_TYPE_RESTRICT}. + \ -{CL_KERNEL_ARG_TYPE_VOLATILE} is returned for {CL_KERNEL_ARG_TYPE_QUALIFIER} if the argument is a pointer and the referenced type is declared with the volatile qualifier. \ -For example, a kernel argument declared as `global int volatile *x` returns {CL_KERNEL_ARG_TYPE_VOLATILE} but a kernel argument declared as `global int * volatile x` does not. \ -] - :fn-map-count-usage: pass:n[ \ The map count returned should be considered immediately stale. \ It is unsuitable for general use in applications. \ @@ -120,7 +107,7 @@ Rather than attempt to share {cl_kernel_TYPE} objects among multiple host thread ] :fn-readimageh: pass:n[ \ -And *read_imageh*, if the `<>` extension is supported. \ +And *read_imageh*, if the `cl_khr_fp16` extension is supported. \ ] :fn-reference-count-usage: pass:n[ \ diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index bb1146285..de138f557 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -11078,15 +11078,33 @@ include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_QUALIFIER.asciidoc[] for the argument given by _arg_index_. The returned values can be: - {CL_KERNEL_ARG_TYPE_CONST_anchor} - footnote:[{fn-kernel-arg-type-qualifier}] - footnote:[{fn-kernel-arg-type-const-addr-space}] + + {CL_KERNEL_ARG_TYPE_CONST_anchor} + {CL_KERNEL_ARG_TYPE_RESTRICT_anchor} + {CL_KERNEL_ARG_TYPE_VOLATILE_anchor} + {CL_KERNEL_ARG_TYPE_PIPE_anchor}, or + {CL_KERNEL_ARG_TYPE_NONE_anchor} - {CL_KERNEL_ARG_TYPE_NONE} is returned for all parameters passed by + {CL_KERNEL_ARG_TYPE_CONST} is returned if the kernel argument is a + pointer and the referenced type is declared with the `const` qualifier. + For example, a kernel argument declared as `global int const*` returns + {CL_KERNEL_ARG_TYPE_CONST} but a kernel argument declared as `global + int* const` does not. + Additionally, {CL_KERNEL_ARG_TYPE_CONST} is returned if the kernel + argument is declared with the `constant` address space qualifier. + + {CL_KERNEL_ARG_TYPE_RESTRICT} is returned if the pointer type is marked + `restrict`. + For example, `global int* restrict` returns + {CL_KERNEL_ARG_TYPE_RESTRICT}. + + {CL_KERNEL_ARG_TYPE_VOLATILE} is returned for + {CL_KERNEL_ARG_TYPE_QUALIFIER} if the kernel argument is a pointer and + the referenced type is declared with the `volatile` qualifier. + For example, a kernel argument declared as `global int volatile*` + returns {CL_KERNEL_ARG_TYPE_VOLATILE} but a kernel argument declared as + `global int* volatile` does not. + + {CL_KERNEL_ARG_TYPE_NONE} is returned for all kernel arguments passed by value. | {CL_KERNEL_ARG_NAME_anchor} From f13b0c4540d4fbeb7fbc0f90e79399a7938e83fe Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 26 Mar 2024 18:47:04 +0000 Subject: [PATCH 078/190] Fixup cl_khr_command_buffer after spec unification (#1089) After PR #950 merged the cl_khr_command_buffer spec needs updated in a couple of places: * Error around _num_queues_ to `clCreateCommandBufferKHR` should be in terms of `cl_khr_command_buffer_multi_device`. * "New Structure" heading can be deleted as these are listed under "New Types" * Typos in rendering of some types --- api/cl_khr_command_buffer.asciidoc | 5 ----- api/opencl_runtime_layer.asciidoc | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index a4ade6c6e..a71d0ec99 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -205,11 +205,6 @@ features: ** {clCommandSVMMemcpyKHR} ** {clCommandSVMMemFillKHR} -=== New Structures - - * {cl_command_buffer_khr_TYPE} - * {cl_mutable_command_khr_TYPE} - === New Types * {cl_device_command_buffer_capabilities_khr_TYPE} diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index de138f557..cb7908951 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14145,7 +14145,10 @@ returned in _errcode_ret_: specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. * {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have the same OpenCL context. - * {CL_INVALID_VALUE} if _num_queues_ is zero. + * {CL_INVALID_VALUE} if the `<>` + extension is supported and _num_queues_ is zero, or if the + `<>` extension is not supported + and _num_queues_ is not one. * {CL_INVALID_VALUE} if _queues_ is `NULL`. * {CL_INVALID_VALUE} if values specified in _properties_ are not valid, or if the same property name is specified more than once. From ad320b71d5a6f70c6c36c12e17cda79548ecdb3f Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Sun, 31 Mar 2024 19:00:21 +0100 Subject: [PATCH 079/190] Set anchor on `CL_COMMAND_BUFFER_MUTABLE_KHR` (#1115) The link in the "New Enums" section of cl_khr_command_buffer_mutable_dispatch doesn't lead anywhere otherwise. --- api/opencl_runtime_layer.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index cb7908951..b8af9a812 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14051,7 +14051,7 @@ include::{generated}/api/version-notes/CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR.as endif::cl_khr_command_buffer_multi_device[] ifdef::cl_khr_command_buffer_mutable_dispatch[] - {CL_COMMAND_BUFFER_MUTABLE_KHR} - Enables modification of the + {CL_COMMAND_BUFFER_MUTABLE_KHR_anchor} - Enables modification of the command-buffer, by default command-buffers are immutable. If set, commands in the command-buffer may be updated via {clUpdateMutableCommandsKHR}. From 70579b8084c2293577a018a695604d0def5d4670 Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Sun, 31 Mar 2024 11:05:00 -0700 Subject: [PATCH 080/190] Fix provisional extension notes in appendices (#1113) * Remove provisional notice include for recently ratified extensions * Remove provisional_notice include for each provisional extensions. Add a new subsection of the extensions appendix capturing the same information. The autogenerated link in the extension metadata for each provisional extension links to this subsection. --- api/appendix_extensions.asciidoc | 16 ++++++++++++++++ api/cl_khr_command_buffer.asciidoc | 2 -- api/cl_khr_command_buffer_multi_device.asciidoc | 2 -- ..._khr_command_buffer_mutable_dispatch.asciidoc | 2 -- api/cl_khr_external_memory.asciidoc | 2 -- api/cl_khr_external_memory_dma_buf.asciidoc | 4 ---- api/cl_khr_external_memory_dx.asciidoc | 4 ---- api/cl_khr_external_memory_opaque_fd.asciidoc | 4 ---- api/cl_khr_external_memory_win32.asciidoc | 4 ---- api/cl_khr_external_semaphore.asciidoc | 4 ---- api/cl_khr_external_semaphore_dx_fence.asciidoc | 4 ---- api/cl_khr_external_semaphore_opaque_fd.asciidoc | 4 ---- api/cl_khr_external_semaphore_sync_fd.asciidoc | 4 ---- api/cl_khr_external_semaphore_win32.asciidoc | 4 ---- api/cl_khr_semaphore.asciidoc | 4 ---- 15 files changed, 16 insertions(+), 48 deletions(-) diff --git a/api/appendix_extensions.asciidoc b/api/appendix_extensions.asciidoc index 2e5ceccbb..0c2d4dc4e 100644 --- a/api/appendix_extensions.asciidoc +++ b/api/appendix_extensions.asciidoc @@ -25,6 +25,22 @@ alphabetically by author ID. Within each group, extensions are listed in alphabetical order by their names. + +[[boilerplate-provisional-header]] +== Provisional Extensions + +_Provisional_ OpenCL extensions described in this appendix have been +Ratified under the Khronos Intellectual Property Framework. +They are being made publicly available as provisional extensions to enable +review and feedback from the community. +While an extension is provisional, features may be added, removed, or +changed in non-backward compatible ways. + +If you have feedback on a provisional extension, please create an issue on +the link:https://github.com/KhronosGroup/OpenCL-Docs/[OpenCL-Docs +repository]. + + == Extension Dependencies Extensions which have dependencies on specific core versions or on other diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index a71d0ec99..e5da09a4f 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -419,8 +419,6 @@ features: *UNRESOLVED* -- -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-11-10 diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc index 9d3d87c34..60ea8c530 100644 --- a/api/cl_khr_command_buffer_multi_device.asciidoc +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -305,8 +305,6 @@ require it. *RESOLVED*: Added as an optional feature. -- -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2023-04-14 diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index beda74ae0..ea37650c2 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -367,8 +367,6 @@ specification it is omitted, and if its functionality has demand later, it may be a introduced as a stand alone extension. -- -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2022-08-31 diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index 0685d72c3..c9f3567ba 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -298,8 +298,6 @@ while (true) { *UNRESOLVED* -- -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc index 19d54ffa4..63c86b339 100644 --- a/api/cl_khr_external_memory_dma_buf.asciidoc +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -72,10 +72,6 @@ TODO * {cl_external_memory_handle_type_khr_TYPE} ** {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc index c2fbf6184..5675324ec 100644 --- a/api/cl_khr_external_memory_dx.asciidoc +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -75,10 +75,6 @@ TODO ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc index 990582b40..1b1d7c44a 100644 --- a/api/cl_khr_external_memory_opaque_fd.asciidoc +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -72,10 +72,6 @@ TODO * {cl_external_memory_handle_type_khr_TYPE} ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index fdbb7e75d..8b07606bc 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -73,10 +73,6 @@ TODO ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index 211b42ff2..8e44962ac 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -274,10 +274,6 @@ while (true) { } ---- -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_semaphore_dx_fence.asciidoc b/api/cl_khr_external_semaphore_dx_fence.asciidoc index 6f9c2ee71..1fd274587 100644 --- a/api/cl_khr_external_semaphore_dx_fence.asciidoc +++ b/api/cl_khr_external_semaphore_dx_fence.asciidoc @@ -39,10 +39,6 @@ D3D12 fence as an external semaphore using the APIs introduced by * {cl_external_semaphore_handle_type_khr_TYPE} ** {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_semaphore_opaque_fd.asciidoc b/api/cl_khr_external_semaphore_opaque_fd.asciidoc index d1119242f..b74a93884 100644 --- a/api/cl_khr_external_semaphore_opaque_fd.asciidoc +++ b/api/cl_khr_external_semaphore_opaque_fd.asciidoc @@ -39,10 +39,6 @@ introduced by `<>`. * {cl_external_semaphore_handle_type_khr_TYPE} ** {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index a8175fe28..507588848 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -48,10 +48,6 @@ external semaphore using the APIs introduced by * {cl_external_semaphore_handle_type_khr_TYPE} ** {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_external_semaphore_win32.asciidoc b/api/cl_khr_external_semaphore_win32.asciidoc index 224302f2d..e8b1a772a 100644 --- a/api/cl_khr_external_semaphore_win32.asciidoc +++ b/api/cl_khr_external_semaphore_win32.asciidoc @@ -40,10 +40,6 @@ introduced by `<>`. ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 25c8cdd48..4d442fec2 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -247,10 +247,6 @@ while (true) { } ---- -=== Issues - -include::provisional_notice.asciidoc[] - === Version History * Revision 0.9.0, 2021-09-10 From 1062e64e4b3b23147d11ad1c4d79cfb317736053 Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Sun, 31 Mar 2024 11:13:19 -0700 Subject: [PATCH 081/190] Synchronize scripts with Vulkan and correct extension appendix section nesting (#1116) The observable effects of this are - Pushes the subsection titles in the extension appendices down one level, similar to #1087 but keeping the scripts in sync - Adds an autogenerated 'API Interactions' section with currently only affects the cl_khr_command_buffer extension, since that's the only one with some APIs tagged in the XML as dependent on a particular core version --- scripts/extensionmetadocgenerator.py | 100 ++++++++++++++++++++++++--- scripts/genRef.py | 3 +- scripts/reflib.py | 17 +++++ scripts/spec_tools/conventions.py | 23 +++++- 4 files changed, 129 insertions(+), 14 deletions(-) diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index bc38084ea..a200bab95 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -9,7 +9,7 @@ import sys from functools import total_ordering from generator import GeneratorOptions, OutputGenerator, regSortFeatures, write -from parse_dependency import dependencyMarkup +from parse_dependency import dependencyMarkup, dependencyNames class ExtensionMetaDocGeneratorOptions(GeneratorOptions): """ExtensionMetaDocGeneratorOptions - subclass of GeneratorOptions. @@ -23,6 +23,7 @@ class Extension: def __init__(self, generator, # needed for logging and API conventions filename, + interface, name, number, ext_type, @@ -36,9 +37,14 @@ def __init__(self, specialuse, ratified ): + """Object encapsulating information from an XML tag. + Most of the parameters / members are XML tag values. + 'interface' is the actual XML element.""" + self.generator = generator self.conventions = generator.genOpts.conventions self.filename = filename + self.interface = interface self.name = name self.number = number self.ext_type = ext_type @@ -176,7 +182,7 @@ def conditionalLinkExt(self, extName, indent = ' '): def resolveDeprecationChain(self, extensions, succeededBy, isRefpage, file): if succeededBy not in extensions: write(f' ** *NOTE* The extension `{succeededBy}` is not supported for the API specification being generated', file=file) - self.generator.logMsg('warn', f'resolveDeprecationChain: {self.name} defines a superceding interface {succeededBy} which is not in the supported extensions list') + self.generator.logMsg('warn', f'resolveDeprecationChain: {self.name} defines a superseding interface {succeededBy} which is not in the supported extensions list') return ext = extensions[succeededBy] @@ -223,10 +229,11 @@ def writeTag(self, tag, value, isRefpage, fp): if isRefpage: # Use subsection headers for the tag name - tagPrefix = '== ' + # Because we do not know what preceded this, add whitespace + tagPrefix = '\n== ' tagSuffix = '' else: - # Use an bolded item list for the tag name + # Use a bolded item list for the tag name tagPrefix = '*' tagSuffix = '*::' @@ -238,12 +245,14 @@ def writeTag(self, tag, value, isRefpage, fp): if isRefpage: write('', file=fp) - def makeMetafile(self, extensions, isRefpage = False): + def makeMetafile(self, extensions, SPV_deps, isRefpage = False): """Generate a file containing extension metainformation in asciidoctor markup form. - extensions - dictionary of Extension objects for extensions spec is being generated against + - SPV_deps - dictionary of SPIR-V extension names required for each + extension and version name - isRefpage - True if generating a refpage include, False if generating a specification extension appendix include""" @@ -256,7 +265,7 @@ def makeMetafile(self, extensions, isRefpage = False): if not isRefpage: write('[[' + self.name + ']]', file=fp) - write('=== ' + self.name, file=fp) + write('== ' + self.name, file=fp) write('', file=fp) self.writeTag('Name String', '`' + self.name + '`', isRefpage, fp) @@ -301,6 +310,34 @@ def makeMetafile(self, extensions, isRefpage = False): ' of provisional header files for enablement and stability details.*', file=fp) write('', file=fp) + # Determine version and extension interactions from 'depends' + # attributes of tags. + interacts = set() + for elem in self.interface.findall('require[@depends]'): + names = dependencyNames(elem.get('depends')) + interacts |= names + + if len(interacts) > 0: + self.writeTag('API Interactions', None, isRefpage, fp) + + def versionKey(name): + """Sort _VERSION_ names before extension names""" + return '_VERSION_' not in name + + names = sorted(sorted(interacts), key=versionKey) + for name in names: + write(f'* Interacts with {name}', file=fp) + + write('', file=fp) + + if self.name in SPV_deps: + self.writeTag('SPIR-V Dependencies', None, isRefpage, fp) + + for spvname in sorted(SPV_deps[self.name]): + write(f' * {self.conventions.formatSPIRVlink(spvname)}', file=fp) + + write('', file=fp) + if self.deprecationType: self.writeTag('Deprecation State', None, isRefpage, fp) @@ -362,7 +399,7 @@ def makeMetafile(self, extensions, isRefpage = False): if handle.startswith('gitlab:'): prettyHandle = 'icon:gitlab[alt=GitLab, role="red"]' + handle.replace('gitlab:@', '') elif handle.startswith('@'): - issuePlaceholderText = f'[{self.name}]{handle}' + issuePlaceholderText = f'[{self.name}] {handle}' issuePlaceholderText += f'%0A*Here describe the issue or question you have about the {self.name} extension*' trackerLink = f'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body={issuePlaceholderText}++' prettyHandle = f'{trackerLink}[icon:github[alt=GitHub,role="black"]{handle[1:]},window=_blank,opts=nofollow]' @@ -408,7 +445,7 @@ def checkProposal(extname): tag = 'Extension Proposal' for (name, path) in sorted(proposals): self.writeTag(tag, - f'link:{{specRepositoryURL}}/{path}[{name}]', + f'{{proposalRefPath}}{path}[{name}]', isRefpage, fp) # Setting tag = None so additional values will not get # additional tag headers. @@ -446,6 +483,8 @@ def __init__(self, *args, **kwargs): # List of strings containing all vendor tags self.vendor_tags = [] self.file_suffix = '' + # SPIR-V dependencies, generated in beginFile() + self.SPV_deps = {} def newFile(self, filename): self.logMsg('diag', '# Generating include file:', filename) @@ -465,6 +504,28 @@ def beginFile(self, genOpts): for tag in root.findall('tags/tag'): self.vendor_tags.append(tag.get('name')) + # If there are elements in the XML, generate a + # reverse map from API version and extension names to the SPV + # extensions they depend on. + + def add_dep(SPV_deps, name, spvname): + """Add spvname as a dependency of name. + name may be an API or extension name.""" + + if name not in SPV_deps: + SPV_deps[name] = set() + SPV_deps[name].add(spvname) + + for spvext in root.findall('spirvextensions/spirvextension'): + spvname = spvext.get('name') + for elem in spvext.findall('enable'): + if elem.get('version'): + version_name = elem.get('version') + add_dep(self.SPV_deps, version_name, spvname) + elif elem.get('extension'): + ext_name = elem.get('extension') + add_dep(self.SPV_deps, ext_name, spvname) + # Create subdirectory, if needed self.makeDir(self.directory) @@ -516,9 +577,9 @@ def endFile(self): # Generate metadoc extension files, in refpage and non-refpage form for ext in self.extensions.values(): - ext.makeMetafile(self.extensions, isRefpage = False) + ext.makeMetafile(self.extensions, self.SPV_deps, isRefpage = False) if self.conventions.write_refpage_include: - ext.makeMetafile(self.extensions, isRefpage = True) + ext.makeMetafile(self.extensions, self.SPV_deps, isRefpage = True) # Key to sort extensions alphabetically within 'KHR', 'EXT', vendor # extension prefixes. @@ -565,6 +626,10 @@ def makeSortKey(extname): # This is difficult to change, and it is very unlikely changing # it will be needed. + # Do not include the lengthy '*extension_appendices_toc' indices + # in the Antora site build, since all the extensions are already + # indexed on the right navigation sidebar. + write('', file=current_extensions_appendix_fp) write('include::{generated}/meta/deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('', file=current_extensions_appendix_fp) @@ -577,7 +642,9 @@ def makeSortKey(extname): write('== List of Current Extensions', file=current_extensions_appendix_fp) write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) write('', file=current_extensions_appendix_fp) + write('ifndef::site-gen-antora[]', file=current_extensions_appendix_fp) write('include::{generated}/meta/current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('endif::site-gen-antora[]', file=current_extensions_appendix_fp) write('\n<<<\n', file=current_extensions_appendix_fp) write('include::{generated}/meta/current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) @@ -587,7 +654,9 @@ def makeSortKey(extname): write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) write('[[deprecated-extension-appendices-list]]', file=deprecated_extensions_appendix_fp) write('== List of Deprecated Extensions', file=deprecated_extensions_appendix_fp) + write('ifndef::site-gen-antora[]', file=deprecated_extensions_appendix_fp) write('include::{generated}/meta/deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('endif::site-gen-antora[]', file=deprecated_extensions_appendix_fp) write('\n<<<\n', file=deprecated_extensions_appendix_fp) write('include::{generated}/meta/deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) @@ -604,7 +673,9 @@ def makeSortKey(extname): write('ifdef::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) write('[[provisional-extension-appendices-list]]', file=provisional_extensions_appendix_fp) write('== List of Provisional Extensions', file=provisional_extensions_appendix_fp) + write('ifndef::site-gen-antora[]', file=provisional_extensions_appendix_fp) write('include::{generated}/meta/provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('endif::site-gen-antora[]', file=provisional_extensions_appendix_fp) write('\n<<<\n', file=provisional_extensions_appendix_fp) write('include::{generated}/meta/provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('endif::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) @@ -614,8 +685,14 @@ def makeSortKey(extname): for name in sorted_keys: ext = self.extensions[name] - include = self.makeExtensionInclude(ext.name) + # Increase the leveloffset of the extension include so it is + # lower than the subsection (extension name) it belongs to + include = ':leveloffset: +1\n' + include += '\n' + self.makeExtensionInclude(ext.name) + '\n\n' + include += ':leveloffset: -1\n' + link = ' * ' + self.conventions.formatExtension(ext.name) + if ext.provisional == 'true': write(self.conditionalExt(ext.name, include), file=provisional_extension_appendices_fp) write(self.conditionalExt(ext.name, link), file=provisional_extension_appendices_toc_fp) @@ -675,6 +752,7 @@ def beginFeature(self, interface, emit): extdata = Extension( generator = self, filename = filename, + interface = interface, name = name, number = number, ext_type = ext_type, diff --git a/scripts/genRef.py b/scripts/genRef.py index 2b103761a..9b78fd0dc 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -223,7 +223,7 @@ def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tai """Generate body of a reference page. - pageName - string name of the page - - pageDesc - string short description of the page, or empty string + - pageDesc - string short description of the page - fp - file to write to - head_content - text to include before the sections - sections - iterable returning (title,body) for each section. @@ -245,7 +245,6 @@ def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tai conventions.extra_refpage_body, '', sep='\n', file=fp) - if pageDesc.strip() == '': pageDesc = 'NO SHORT DESCRIPTION PROVIDED' logWarn('refPageHead: no short description provided for', pageName) diff --git a/scripts/reflib.py b/scripts/reflib.py index 36db7590d..41fec4928 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -326,6 +326,13 @@ def fixupRefs(pageMap, specFile, file): pi.param = nextPara(file, pi.include) if pi.body is None: pi.body = nextPara(file, pi.param) + + # Vulkan Feature struct refpages may have interstitial + # text between the include block and the actual + # parameter descriptions. + # If so, advance the body one more paragraph. + if 'This structure describes the following feature' in file[pi.param]: + pi.body = nextPara(file, pi.body) else: if pi.body is None: pi.body = nextPara(file, pi.include) @@ -337,6 +344,16 @@ def fixupRefs(pageMap, specFile, file): pi.param = clampToBlock(pi.param, pi.include, pi.end) pi.body = clampToBlock(pi.body, pi.param, pi.end) + if pi.type in ['funcpointers', 'protos']: + # It is possible for the inferred parameter section to be invalid, + # such as for the type PFN_vkVoidFunction, which has no parameters. + # Since the parameter section is always a bullet-point list, we know + # the section is invalid if its text does not start with a list item. + # Note: This also deletes parameter sections that are simply empty. + if pi.param is not None and not file[pi.param].startswith(' * '): + pi.body = pi.param + pi.param = None + # We can get to this point with .include, .param, and .validity # all being None, indicating those sections were not found. diff --git a/scripts/spec_tools/conventions.py b/scripts/spec_tools/conventions.py index 5b9f6dd40..edfa906cf 100644 --- a/scripts/spec_tools/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -102,6 +102,17 @@ def formatExtension(self, name): """Mark up an extension name as a link in the spec.""" return '`<<{}>>`'.format(name) + def formatSPIRVlink(self, name): + """Mark up a SPIR-V extension name as an external link in the spec. + Since these are external links, the formatting probably will be + the same for all APIs creating such links, so long as they use + the asciidoctor {spirv} attribute for the base path to the SPIR-V + extensions.""" + + (vendor, _) = self.extension_name_split(name) + + return f'{{spirv}}/{vendor}/{name}.html[{name}]' + @property @abc.abstractmethod def null(self): @@ -285,7 +296,7 @@ def extension_name_prefix(self): Typically two uppercase letters followed by an underscore. Assumed to be the same as api_prefix, but some APIs use different - case convntions.""" + case conventions.""" return self.api_prefix @@ -443,6 +454,16 @@ def generate_max_enum_in_docs(self): documentation includes.""" return False + def extension_name_split(self, name): + """Split an extension name, returning (vendor, rest of name). + The API prefix of the name is ignored.""" + + match = EXT_NAME_DECOMPOSE_RE.match(name) + vendor = match.group('vendor') + bare_name = match.group('name') + + return (vendor, bare_name) + @abc.abstractmethod def extension_file_path(self, name): """Return file path to an extension appendix relative to a directory From 0cf59f1ba6f1d9722ad047d3d9c264cba93be4b6 Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Sun, 31 Mar 2024 11:15:22 -0700 Subject: [PATCH 082/190] Fix markup for a few extension names. (#1112) --- OpenCL_C.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index eab333330..aecf83270 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -218,13 +218,13 @@ operations across a work-group. ifdef::cl_khr_integer_dot_product[] | {opencl_c_integer_dot_product_input_4x8bit_packed} + - (when the {cl_khr_integer_dot_product} extension macro is defined) + (when the `<>` extension macro is defined) | The OpenCL C compiler supports built-in functions that perform dot products on 4x8 bit packed integer vectors | {opencl_c_integer_dot_product_input_4x8bit} + - (when the {cl_khr_integer_dot_product} extension macro is defined) + (when the `<>` extension macro is defined) | The OpenCL C compiler supports built-in functions that perform dot products on 4x8 bit integer vectors endif::cl_khr_integer_dot_product[] @@ -354,7 +354,7 @@ ifdef::cl_khr_fp16[] ==== Half-Precision Floating-Point The `cl_khr_fp16` extension was promoted to OpenCL C 1.2 as an optional -feature, and to OpenCL 3.0 as the optional {cl_khr_fp16} feature. +feature, and to OpenCL 3.0 as the optional `<>` feature. The extension provides 16-bit precision scalar and vector floating-point data types and extends many functions to accept these types. endif::cl_khr_fp16[] @@ -365,7 +365,7 @@ ifdef::cl_khr_fp64[] ==== Double-Precision Floating-Point The `cl_khr_fp64` extension was promoted to OpenCL C 1.2 as an optional -feature, and to OpenCL 3.0 as the optional {cl_khr_fp64} feature. +feature, and to OpenCL 3.0 as the optional `<>` feature. The extension provides double-precision scalar and vector floating-point data types and extends many functions to accept these types. endif::cl_khr_fp64[] From 14263c9892a9e432bed81714c2d0a566366e4880 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 14:13:29 -0700 Subject: [PATCH 083/190] use the feature macro dictionary attributes in more places (#1107) --- api/appendix_h.asciidoc | 36 +++++++++++++++--------------- api/opencl_architecture.asciidoc | 2 +- api/opencl_platform_layer.asciidoc | 4 ++-- c/footnotes.asciidoc | 18 +++++++-------- ext/to_core_features.asciidoc | 2 +- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index 9e9ec6ea8..1bbd6f330 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -121,10 +121,10 @@ OpenCL C compilers supporting atomics orders or scopes beyond the mandated minimum will define some or all of following feature macros as appropriate: [none] -* `+__opencl_c_atomic_order_acq_rel+` -- Indicating atomic operations support acquire-release orderings. -* `+__opencl_c_atomic_order_seq_cst+` -- Indicating atomic operations and fences support acquire sequentially consistent orderings. -* `+__opencl_c_atomic_scope_device+` -- Indicating atomic operations and fences support device-wide memory ordering constraints. -* `+__opencl_c_atomic_scope_all_devices+` -- Indicating atomic operations and fences support all-device memory ordering constraints, across any host threads and all devices that can share SVM memory with each other and the host process. +* {opencl_c_atomic_order_acq_rel} -- Indicating atomic operations support acquire-release orderings. +* {opencl_c_atomic_order_seq_cst} -- Indicating atomic operations and fences support acquire sequentially consistent orderings. +* {opencl_c_atomic_scope_device} -- Indicating atomic operations and fences support device-wide memory ordering constraints. +* {opencl_c_atomic_scope_all_devices} -- Indicating atomic operations and fences support all-device memory ordering constraints, across any host threads and all devices that can share SVM memory with each other and the host process. == Device-Side Enqueue @@ -184,9 +184,9 @@ When device-side enqueue is supported but a replaceable default on-device queue |==== -OpenCL C compilers supporting device-side enqueue and on-device queues will define the feature macro `+__opencl_c_device_enqueue+`. -OpenCL C compilers that define the feature macro `+__opencl_c_device_enqueue+` must also define the feature macro `+__opencl_c_generic_address_space+` because some OpenCL C functions for device-side enqueue accept pointers to the generic address space. -OpenCL C compilers that define the feature macro `+__opencl_c_device_enqueue+` must also define the feature macro `+__opencl_c_program_scope_global_variables+` because an implementation of blocks may interact with program scope variables in global address space as part of ABI. +OpenCL C compilers supporting device-side enqueue and on-device queues will define the feature macro {opencl_c_device_enqueue}. +OpenCL C compilers that define the feature macro {opencl_c_device_enqueue} must also define the feature macro {opencl_c_generic_address_space} because some OpenCL C functions for device-side enqueue accept pointers to the generic address space. +OpenCL C compilers that define the feature macro {opencl_c_device_enqueue} must also define the feature macro {opencl_c_program_scope_global_variables} because an implementation of blocks may interact with program scope variables in global address space as part of ABI. == Pipes @@ -216,8 +216,8 @@ When pipes are not supported: |==== -OpenCL C compilers supporting pipes will define the feature macro `+__opencl_c_pipes+`. -OpenCL C compilers that define the feature macro `+__opencl_c_pipes+` must also define the feature macro `+__opencl_c_generic_address_space+` because some OpenCL C functions for pipes accept pointers to the generic address space. +OpenCL C compilers supporting pipes will define the feature macro {opencl_c_pipes}. +OpenCL C compilers that define the feature macro {opencl_c_pipes} must also define the feature macro {opencl_c_generic_address_space} because some OpenCL C functions for pipes accept pointers to the generic address space. == Program Scope Global Variables @@ -243,7 +243,7 @@ When program scope global variables are not supported: |==== -OpenCL C compilers supporting program scope global variables will define the feature macro `+__opencl_c_program_scope_global_variables+`. +OpenCL C compilers supporting program scope global variables will define the feature macro {opencl_c_program_scope_global_variables}. // TODO: There is no SPIR-V capability specific to program scope global variables. // May need to update the validation rules to disallow program scope global variables @@ -294,7 +294,7 @@ When read-write images are not supported: |==== -OpenCL C compilers supporting read-write images will define the feature macro `+__opencl_c_read_write_images+`. +OpenCL C compilers supporting read-write images will define the feature macro {opencl_c_read_write_images}. == Creating 2D Images From Buffers @@ -434,7 +434,7 @@ When sub-groups are not supported: |==== -OpenCL C compilers supporting sub-groups will define the feature macro `+__opencl_c_subgroups+`. +OpenCL C compilers supporting sub-groups will define the feature macro {opencl_c_subgroups}. == Program Initialization and Clean-Up Kernels @@ -479,7 +479,7 @@ When writing to 3D image objects is not supported: |==== -OpenCL C compilers supporting writing to 3D image objects will define the feature macro `+__opencl_c_3d_image_writes+`. +OpenCL C compilers supporting writing to 3D image objects will define the feature macro {opencl_c_3d_image_writes}. == Work-group Collective Functions @@ -497,7 +497,7 @@ When work-group collective functions are not supported: |==== -OpenCL C compilers supporting work-group collective functions will define the feature macro `+__opencl_c_work_group_collective_functions+`. +OpenCL C compilers supporting work-group collective functions will define the feature macro {opencl_c_work_group_collective_functions}. == Generic Address Space @@ -515,7 +515,7 @@ When the generic address space is not supported: |==== -OpenCL C compilers supporting the generic address space will define the feature macro `+__opencl_c_generic_address_space+`. +OpenCL C compilers supporting the generic address space will define the feature macro {opencl_c_generic_address_space}. //== Required APIs // @@ -549,6 +549,6 @@ OpenCL C compilers supporting the generic address space will define the feature Some OpenCL C language features were already optional before OpenCL 3.0, the API mechanisms for querying these have not changed. New feature macros for these optional features have been added to OpenCL C to provide a consistent mechanism for using optional features in OpenCL C 3.0. -OpenCL C compilers supporting images will define the feature macro `+__opencl_c_images+`. -OpenCL C compilers supporting the `double` type will define the feature macro `+__opencl_c_fp64+`. -OpenCL C compilers supporting the `long`, `unsigned long` and `ulong` types will define the feature macro `+__opencl_c_int64+`, note that compilers for FULL_PROFILE devices must support these types and define the macro unconditionally. +OpenCL C compilers supporting images will define the feature macro {opencl_c_images}. +OpenCL C compilers supporting the `double` type will define the feature macro {opencl_c_fp64}. +OpenCL C compilers supporting the `long`, `unsigned long` and `ulong` types will define the feature macro {opencl_c_int64}, note that compilers for FULL_PROFILE devices must support these types and define the macro unconditionally. diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index a342edbf3..4ae5b44ad 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2168,7 +2168,7 @@ OpenCL 3.0 also adds a new version of the OpenCL C programming language, which m The new version of OpenCL C is backwards compatible with OpenCL C 1.2, but is not backwards compatible with OpenCL C 2.0. The new version of OpenCL C must be explicitly requested via the `-cl-std=` build option, otherwise a program will continue to be compiled using the highest OpenCL C 1.x language version supported for the device. + -Whenever an OpenCL C feature is optional in the new version of the OpenCL C programming language, it will be paired with a feature macro, such as `+__opencl_c_feature_name+`, and a corresponding API query. +Whenever an OpenCL C feature is optional in the new version of the OpenCL C programming language, it will be paired with a feature macro, such as {opencl_c_feature_name}, and a corresponding API query. If a feature macro is defined then the feature is supported by the OpenCL C compiler, otherwise the optional feature is not supported. In order to allow future versions of OpenCL to support new types of diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index c09ff4304..c5f591ac9 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -2068,9 +2068,9 @@ returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: |==== | Feature Bit | Feature Macro | {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} - | `__opencl_c_integer_dot_product_input_4x8bit_packed` + | {opencl_c_integer_dot_product_input_4x8bit_packed} | {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} - | `__opencl_c_integer_dot_product_input_4x8bit` + | {opencl_c_integer_dot_product_input_4x8bit} |==== endif::cl_khr_integer_dot_product[] diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index 4045e8e60..6abc922aa 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -22,12 +22,12 @@ The <> consume operation is not supported. \ :fn-atomic-double-supported: pass:n[ \ The `atomic_double` type is only supported if double precision is supported and the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_fp64+` feature. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature. \ ] :fn-atomic-int64-supported: pass:n[ \ The atomic_long and atomic_ulong types are supported if the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_int64+` feature. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_int64} feature. \ ] :fn-atomic-size_t-supported: pass:n[ \ @@ -77,17 +77,17 @@ Although `CL_UNORM_INT_101010_2` was added in OpenCL 2.1, because there was no O :fn-double: pass:n[ \ The `double` scalar type is an optional type that is supported if the value of the `CL_DEVICE_DOUBLE_FP_CONFIG` device query is not zero. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_fp64+` feature macro. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature macro. \ ] :fn-double-supported: pass:n[ \ Only if double precision is supported. \ -In OpenCL C 3.0 this will be indicated by the presence of the `+__opencl_c_fp64+` feature macro. \ +In OpenCL C 3.0 this will be indicated by the presence of the {opencl_c_fp64} feature macro. \ ] :fn-double-vec: pass:n[ \ The `double__n__` vector type is an optional type that is supported if the value of the `CL_DEVICE_DOUBLE_FP_CONFIG` device query is not zero. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_fp64+` feature macro. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature macro. \ ] :fn-dse-CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP: pass:n[ \ @@ -132,7 +132,7 @@ If an implementation extends this specification to support IEEE-754 flags or exc :fn-float-types-supported: pass:n[ \ The `half` scalar and vector types can only be used if the *cl_khr_fp16* extension is supported and has been enabled. \ -The `double` scalar and vector types can only be used if `double` precision is supported, e.g. for OpenCL C 3.0 the `+__opencl_c_fp64+` feature macro is present. \ +The `double` scalar and vector types can only be used if `double` precision is supported, e.g. for OpenCL C 3.0 the {opencl_c_fp64} feature macro is present. \ ] :fn-fmin-fmax-nan: pass:n[ \ @@ -155,7 +155,7 @@ Refer to the detailed description of the built-in < Date: Sun, 31 Mar 2024 14:14:49 -0700 Subject: [PATCH 084/190] add initial draft of Valid Usage and Undefined Behavior section (#1105) --- api/opencl_architecture.asciidoc | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 4ae5b44ad..af80cd9ea 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2278,3 +2278,33 @@ include::{generated}/api/structs/cl_name_version.txt[] containing a null-terminated string, whose maximum length is therefore {CL_NAME_VERSION_MAX_NAME_SIZE} minus one. -- + +[[valid-usage]] +=== Valid Usage and Undefined Behavior + +The OpenCL specification describes valid usage and how to use the API correctly. +For some conditions where an API is used incorrectly, behavior is well-defined, +such as returning an error code. +For other conditions, behavior is undefined, and may include program +termination. +However, OpenCL implementations must always ensure that incorrect usage by an +application does not affect the integrity of the operating system, the OpenCL +implementation, or other OpenCL client applications in the system. +In particular, any guarantees made by an operating system about whether memory +from one process can be visible to another process or not must not be violated +by an OpenCL implementation for any memory allocation. +OpenCL implementations are not required to make additional security or integrity +guarantees beyond those provided by the operating system unless explicitly +directed by the application’s use of a particular feature or extension. + +[NOTE] +-- +For instance, if an operating system guarantees that data in all its memory +allocations are set to zero when newly allocated, the OpenCL implementation must +make the same guarantees for any allocations it controls. + +Similarly, if an operating system guarantees that use-after-free of host +allocations will not result in values written by another process becoming +visible, the same guarantees must be made by the OpenCL implementation for +memory accessible to an OpenCL device. +-- From a5fd8e78353972192ad365fd5107e8ad8876f9b9 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 14:15:26 -0700 Subject: [PATCH 085/190] update generated version text for extension APIs and enums (#1108) --- scripts/gen_version_notes.py | 57 +++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index b16faa42d..21271643d 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -33,29 +33,37 @@ def GetFooter(): return """ """ -def FullNote(name, added_in, deprecated_by): - # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in - # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. - if added_in == "1.0" and deprecated_by == None: - return "\n// Intentionally empty, %s has always been present." % name - if added_in != "1.0" and deprecated_by == None: - return "\nIMPORTANT: {%s} is {missing_before} version %s." % (name, added_in) - if added_in == "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is {deprecated_by} version %s." % (name, deprecated_by) - if added_in != "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is {missing_before} version %s and {deprecated_by} version %s." % (name, added_in, deprecated_by) - -def ShortNote(name, added_in, deprecated_by): - # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in - # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. - if added_in == "1.0" and deprecated_by == None: - return "// Intentionally empty, %s has always been present." % name - if added_in != "1.0" and deprecated_by == None: - return "{missing_before} version %s." % added_in - if added_in == "1.0" and deprecated_by != None: - return "{deprecated_by} version %s." % deprecated_by - if added_in != "1.0" and deprecated_by != None: - return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) +def FullNote(name, is_extension, added_in, deprecated_by): + if is_extension: + assert deprecated_by == None + return "\nIMPORTANT: {%s} is provided by the `%s` extension." % (name, added_in) + else: + # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in + # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. + if added_in == "1.0" and deprecated_by == None: + return "\n// Intentionally empty, %s has always been present." % name + if added_in != "1.0" and deprecated_by == None: + return "\nIMPORTANT: {%s} is {missing_before} version %s." % (name, added_in) + if added_in == "1.0" and deprecated_by != None: + return "\nIMPORTANT: {%s} is {deprecated_by} version %s." % (name, deprecated_by) + if added_in != "1.0" and deprecated_by != None: + return "\nIMPORTANT: {%s} is {missing_before} version %s and {deprecated_by} version %s." % (name, added_in, deprecated_by) + +def ShortNote(name, is_extension, added_in, deprecated_by): + if is_extension: + assert deprecated_by == None + return "provided by the `%s` extension." % added_in + else: + # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in + # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. + if added_in == "1.0" and deprecated_by == None: + return "// Intentionally empty, %s has always been present." % name + if added_in != "1.0" and deprecated_by == None: + return "{missing_before} version %s." % added_in + if added_in == "1.0" and deprecated_by != None: + return "{deprecated_by} version %s." % deprecated_by + if added_in != "1.0" and deprecated_by != None: + return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) # Find feature or extension groups that are parents of a or # <${entry_type}> tag, and then find all the @@ -74,6 +82,7 @@ def process_xml(spec, entry_type, note_printer): for feature in spec.findall(f'.//{feature_type}/require/{entry_type}/../..'): for entry in feature.findall(f'.//{entry_type}'): name = entry.get('name') + is_extension = feature_type != 'feature' deprecated_by = None numberOfEntries += 1 @@ -104,7 +113,7 @@ def process_xml(spec, entry_type, note_printer): versionFileName = os.path.join(args.directory, name + ".asciidoc") with open(versionFileName, 'w') as versionFile: versionFile.write(GetHeader()) - versionFile.write(note_printer(name, added_in, deprecated_by)) + versionFile.write(note_printer(name, is_extension, added_in, deprecated_by)) versionFile.write(GetFooter()) numberOfNewEntries += 0 if added_in == "1.0" else 1 From cdbd9e4c6a1a2726af376959bebf77c2b586b878 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 14:33:12 -0700 Subject: [PATCH 086/190] add missing anchors (#1120) * add missing anchors for external memory and semaphore commands * add missing anchors for external memory handles also a few missing anchors for various extensions * fix a few more missing anchors and incorrect attributes --- OpenCL_C.txt | 2 +- api/cl_khr_extended_versioning.asciidoc | 6 +- api/cl_khr_external_semaphore.asciidoc | 2 +- ...cl_khr_external_semaphore_sync_fd.asciidoc | 2 +- api/cl_khr_gl_event.asciidoc | 2 +- api/cl_khr_semaphore.asciidoc | 2 +- api/cl_khr_spir.asciidoc | 2 +- api/opencl_architecture.asciidoc | 2 +- api/opencl_platform_layer.asciidoc | 12 +++- api/opencl_runtime_layer.asciidoc | 69 +++++++++++++------ scripts/checklinks.py | 6 +- 11 files changed, 71 insertions(+), 36 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index aecf83270..9dfc156b2 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -9735,7 +9735,7 @@ The *F* conversion specifier produces `INF`, `INFINITY`, or `NAN` instead of **e,E** A `double`, `half__n__`, `float__n__` or `double__n__` argument representing a floating-point number is converted in the style -__[__**-**__]d__**.**__ddd __**e{plusmn}}**_dd_, where there is one digit +__[__**-**__]d__**.**__ddd __**e{plusmn}**_dd_, where there is one digit (which is nonzero if the argument is nonzero) before the decimal-point character and the number of digits after it is equal to the precision; if the precision is missing, it is taken as 6; if the precision is zero and the diff --git a/api/cl_khr_extended_versioning.asciidoc b/api/cl_khr_extended_versioning.asciidoc index 4cf053be1..cf2ea54ee 100644 --- a/api/cl_khr_extended_versioning.asciidoc +++ b/api/cl_khr_extended_versioning.asciidoc @@ -47,9 +47,9 @@ Versioning>> section. === New Macro Names - * {CL_VERSION_MAJOR_BITS_KHR_anchor} - * {CL_VERSION_MINOR_BITS_KHR_anchor} - * {CL_VERSION_PATCH_BITS_KHR_anchor} + * {CL_VERSION_MAJOR_BITS_KHR} + * {CL_VERSION_MINOR_BITS_KHR} + * {CL_VERSION_PATCH_BITS_KHR} * `CL_VERSION_MAJOR_MASK_KHR` * `CL_VERSION_MINOR_MASK_KHR` * `CL_VERSION_PATCH_MASK_KHR` diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index 8e44962ac..0671f544a 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -279,7 +279,7 @@ while (true) { * Revision 0.9.0, 2021-09-10 ** Initial version (provisional). * Revision 0.9.1, 2023-11-16 - ** Added {CL_SEMAPHORE_EXPORTABLE_KHR_anchor}. + ** Added {CL_SEMAPHORE_EXPORTABLE_KHR}. * Revision 0.9.2, 2023-11-21 ** Added re-import function call to `<>` diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index 507588848..0368bdee3 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -53,6 +53,6 @@ external semaphore using the APIs introduced by * Revision 0.9.0, 2021-09-10 ** Initial version (provisional). * Revision 0.9.1, 2023-11-16 - ** Added {CL_SEMAPHORE_EXPORTABLE_KHR_anchor}. + ** Added {CL_SEMAPHORE_EXPORTABLE_KHR}. * Revision 0.9.2, 2023-11-21 ** Added re-import function call to `<>` diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc index b7d10c5c4..2118d7e2b 100644 --- a/api/cl_khr_gl_event.asciidoc +++ b/api/cl_khr_gl_event.asciidoc @@ -96,7 +96,7 @@ runtime. . Where can events generated from GL syncs be usable? + -- -*PROPOSED*: Only with clEnqueueAcquireGLObjects, and attempting to use such +*PROPOSED*: Only with {clEnqueueAcquireGLObjects}, and attempting to use such an event elsewhere will generate an error. There is no apparent use case for using such events elsewhere, and possibly some cost to supporting it, balanced by the cost of checking the source of diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 4d442fec2..ec578bc37 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -79,7 +79,7 @@ In particular, this extension defines: * {cl_device_info_TYPE} ** {CL_DEVICE_SEMAPHORE_TYPES_KHR} * {cl_semaphore_type_khr_TYPE} - ** {CL_SEMAPHORE_TYPE_BINARY_KHR} 1 + ** {CL_SEMAPHORE_TYPE_BINARY_KHR} * {cl_semaphore_info_khr_TYPE} ** {CL_SEMAPHORE_CONTEXT_KHR} ** {CL_SEMAPHORE_REFERENCE_COUNT_KHR} diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc index 33ec23e43..a79966984 100644 --- a/api/cl_khr_spir.asciidoc +++ b/api/cl_khr_spir.asciidoc @@ -31,7 +31,7 @@ feature in OpenCL 2.1. * {cl_device_info_TYPE} ** {CL_DEVICE_SPIR_VERSIONS} * {cl_program_binary_type_TYPE} - ** CL_PROGRAM_BINARY_TYPE_INTERMEDIATE} + ** {CL_PROGRAM_BINARY_TYPE_INTERMEDIATE} === Version History diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index af80cd9ea..0a70e6278 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2226,7 +2226,7 @@ working with version numbers easier. {cl_version_TYPE}. * `CL_VERSION_PATCH` extracts the _patch_ version from a packed {cl_version_TYPE}. - * `CL_MAKE_VERSION` returns a packed `cl_version_TYPE} from a + * `CL_MAKE_VERSION` returns a packed {cl_version_TYPE} from a _major_, _minor_ and _patch_ version. * {CL_VERSION_MAJOR_BITS_anchor}, {CL_VERSION_MINOR_BITS_anchor}, and {CL_VERSION_PATCH_BITS_anchor} are the number of bits in the diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index c5f591ac9..6211b1384 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -786,7 +786,7 @@ endif::cl_khr_image2d_from_buffer[] include::{generated}/api/version-notes/CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT.asciidoc[] ifdef::cl_khr_image2d_from_buffer[] -The equivalent {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_anchor} may be used +The equivalent {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR_anchor} may be used if the `<>` extension is supported. endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} @@ -1813,6 +1813,8 @@ include::{generated}/api/version-notes/CL_DEVICE_UUID_KHR.asciidoc[] Device UUIDs must be immutable for a given device across processes, driver APIs, driver versions, and system reboots. + + {CL_UUID_SIZE_KHR_anchor} is the size of the UUID, in bytes. | {CL_DRIVER_UUID_KHR_anchor} include::{generated}/api/version-notes/CL_DRIVER_UUID_KHR.asciidoc[] @@ -1820,6 +1822,8 @@ include::{generated}/api/version-notes/CL_DRIVER_UUID_KHR.asciidoc[] | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] | Returns a universally unique identifier (UUID) for the software driver for the device. + + {CL_UUID_SIZE_KHR} is the size of the UUID, in bytes. | {CL_DEVICE_LUID_VALID_KHR_anchor} include::{generated}/api/version-notes/CL_DEVICE_LUID_VALID_KHR.asciidoc[] @@ -1843,6 +1847,8 @@ include::{generated}/api/version-notes/CL_DEVICE_LUID_KHR.asciidoc[] value can be cast to an `LUID` object and must be equal to the locally unique identifier of an `IDXGIAdapter1` object that corresponds to the OpenCL device. + + {CL_LUID_SIZE_KHR_anchor} is the size of the LUID, in bytes. | {CL_DEVICE_NODE_MASK_KHR_anchor} include::{generated}/api/version-notes/CL_DEVICE_NODE_MASK_KHR.asciidoc[] @@ -1936,14 +1942,14 @@ include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIE | Returns the integer dot product capabilities supported by the device. - {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} is always + {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR_anchor} is always set, indicating that all implementations that support `<>` must support dot product built-in functions and, when SPIR-V is supported, SPIR-V instructions that take four-component vectors of 8-bit integers packed into 32-bit integers as input. - {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} is set when dot + {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR_anchor} is set when dot product built-in functions and, when SPIR-V is supported, SPIR-V instructions that take four-component of 8-bit elements as input are supported. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index b8af9a812..7e98ce853 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -5568,7 +5568,7 @@ The `<>` extension extends of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR_anchor} specifies a POSIX file descriptor handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the POSIX system calls `dup`, `dup2`, @@ -5585,7 +5585,7 @@ The `<>` extension extends handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - * {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} is a file descriptor for a Linux + * {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR_anchor} is a file descriptor for a Linux dma_buf. It owns a reference to the underlying memory resource represented by its memory object. @@ -5607,21 +5607,21 @@ The `<>` extension extends of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} specifies an NT handle + * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR_anchor} specifies an NT handle returned by `IDXGIResource1::CreateSharedHandle` referring to a Direct3D 10 or 11 texture resource. It owns a reference to the memory used by the Direct3D resource. - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} specifies a global + * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR_anchor} specifies a global share handle returned by `IDXGIResource::GetSharedHandle` referring to a Direct3D 10 or 11 texture resource. It does not own a reference to the underlying Direct3D resource, and will therefore become invalid when all memory objects and Direct3D resources associated with it are destroyed. - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} specifies an NT handle + * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR_anchor} specifies an NT handle returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D 12 heap resource. It owns a reference to the resources used by the Direct3D heap. - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} specifies an NT handle + * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR_anchor} specifies an NT handle returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D 12 committed resource. It owns a reference to the memory used by the Direct3D resource. @@ -5633,7 +5633,7 @@ The `<>` extension extends of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR_anchor} specifies an NT handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the functions `DuplicateHandle`, @@ -5641,7 +5641,7 @@ buffer or an image memory object from an external handle: `SetHandleInformation`. It owns a reference to the underlying memory resource represented by its memory object. - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR_anchor} specifies a global share handle that has only limited valid usage outside of OpenCL and other compatible APIs. It is not compatible with any native APIs. @@ -6213,10 +6213,10 @@ include::{generated}/api/version-notes/clGetGLObjectInfo.asciidoc[] * _memobj_ is the memory object to query. * _gl_object_type_ returns the type of OpenGL object attached to _memobj_ - and can be {CL_GL_OBJECT_BUFFER}, {CL_GL_OBJECT_TEXTURE2D}, - {CL_GL_OBJECT_TEXTURE3D}, {CL_GL_OBJECT_TEXTURE2D_ARRAY}, - {CL_GL_OBJECT_TEXTURE1D}, {CL_GL_OBJECT_TEXTURE1D_ARRAY}, - {CL_GL_OBJECT_TEXTURE_BUFFER}, or {CL_GL_OBJECT_RENDERBUFFER}. + and can be {CL_GL_OBJECT_BUFFER_anchor}, {CL_GL_OBJECT_TEXTURE2D_anchor}, + {CL_GL_OBJECT_TEXTURE3D_anchor}, {CL_GL_OBJECT_TEXTURE2D_ARRAY_anchor}, + {CL_GL_OBJECT_TEXTURE1D_anchor}, {CL_GL_OBJECT_TEXTURE1D_ARRAY_anchor}, + {CL_GL_OBJECT_TEXTURE_BUFFER_anchor}, or {CL_GL_OBJECT_RENDERBUFFER_anchor}. If _gl_object_type_ is `NULL`, it is ignored * _gl_object_name_ returns the OpenGL object name used to create _memobj_. If _gl_object_name_ is `NULL`, it is ignored. @@ -10875,7 +10875,10 @@ Also see `<>`. include::{generated}/api/version-notes/CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE.asciidoc[] -Also see `<>`. +ifdef::cl_khr_subgroups[] +The equivalent {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR_anchor} may be used if +the `<>` extension is supported. +endif::cl_khr_subgroups[] | {size_t_TYPE}* | {size_t_TYPE} | Returns the maximum sub-group size for this kernel. @@ -10892,7 +10895,10 @@ Also see `<>`. include::{generated}/api/version-notes/CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE.asciidoc[] -Also see `<>`. +ifdef::cl_khr_subgroups[] +The equivalent {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR_anchor} may be used if +the `<>` extension is supported. +endif::cl_khr_subgroups[] | {size_t_TYPE}* | {size_t_TYPE} | Returns the number of sub-groups that will be present in each @@ -12112,6 +12118,17 @@ ifdef::cl_khr_egl_event[] include::{generated}/api/version-notes/CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR.asciidoc[] endif::cl_khr_egl_event[] +ifdef::cl_khr_external_memory[] +| {clEnqueueAcquireExternalMemObjectsKHR} + | {CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR.asciidoc[] +| {clEnqueueReleaseExternalMemObjectsKHR} + | {CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR.asciidoc[] +endif::cl_khr_external_memory[] + ifdef::cl_khr_gl_sharing[] | {clEnqueueAcquireGLObjects} | {CL_COMMAND_ACQUIRE_GL_OBJECTS_anchor} @@ -12130,6 +12147,17 @@ ifdef::cl_khr_gl_event[] include::{generated}/api/version-notes/CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR.asciidoc[] endif::cl_khr_gl_event[] +ifdef::cl_khr_semaphore[] +| {clEnqueueSignalSemaphoresKHR} + | {CL_COMMAND_SEMAPHORE_SIGNAL_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SEMAPHORE_SIGNAL_KHR.asciidoc[] +| {clEnqueueWaitSemaphoresKHR} + | {CL_COMMAND_SEMAPHORE_WAIT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SEMAPHORE_WAIT_KHR.asciidoc[] +endif::cl_khr_semaphore[] + |==== Using {clGetEventInfo} to determine if a command identified by _event_ has @@ -12747,14 +12775,15 @@ This section describes the semaphore types and functions defined by the * {cl_semaphore_type_khr_TYPE} represent the different types of semaphores. - ** It is mandatory to support {CL_SEMAPHORE_TYPE_BINARY_KHR}. +// TODO: This isn't a very good anchor for {CL_SEMAPHORE_TYPE_BINARY_KHR}... + ** It is mandatory to support {CL_SEMAPHORE_TYPE_BINARY_KHR_anchor}. * {cl_semaphore_properties_khr_TYPE} represents properties associated with semaphores. ** {CL_SEMAPHORE_TYPE_KHR} must be supported. * {cl_semaphore_info_khr_TYPE} represents queries for additional information about semaphores. ** All enums described in the "`New API Enums`" section of the - `<>` extension for cl_semaphore_info_khr_TYPE} must + `<>` extension for {cl_semaphore_info_khr_TYPE} must be supported. * {cl_semaphore_payload_khr_TYPE} represents payload values of semaphores. * {cl_semaphore_khr_TYPE} represent semaphore objects. @@ -12791,7 +12820,7 @@ properties by {cl_semaphore_properties_khr_TYPE} that can be passed to | {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR_anchor} | {cl_device_id_TYPE}[] | Specifies the list of OpenCL devices (terminated with - {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR_anchor}) to associate with the semaphore. Only a single device is permitted in the list. @@ -12801,7 +12830,7 @@ ifdef::cl_khr_external_semaphore[] include::{generated}/api/version-notes/CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] | {cl_external_semaphore_handle_type_khr_TYPE}[] | Specifies the list of semaphore handle type properties (terminated - with {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR}) that can be + with {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR_anchor}) that can be used to export the semaphore being created. endif::cl_khr_external_semaphore[] |==== @@ -15803,7 +15832,7 @@ description here and is defined as: include::{generated}/api/structs/cl_mutable_base_config_khr.txt[] * _type_ is the type of this structure, and must be - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR_anchor} * _next_ is `NULL` or a pointer to an extending structure. * _num_mutable_dispatch_ is the number of mutable-dispatch objects to configure in this enqueue of the command-buffer. @@ -15823,7 +15852,7 @@ The {cl_mutable_dispatch_arg_khr_TYPE} structure is passed to include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] * _type_ is the type of this structure, and must be - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR_anchor}. * _next_ is `NULL` or a pointer to an extending structure. * _command_ is a mutable-command object returned by {clCommandNDRangeKernelKHR} representing a kernel execution as part of a diff --git a/scripts/checklinks.py b/scripts/checklinks.py index 3b8ee644c..816f601a7 100755 --- a/scripts/checklinks.py +++ b/scripts/checklinks.py @@ -28,9 +28,9 @@ sourcefile.close() # We're not going to check API links. - #filelinks = re.findall(r"{((cl\w+)|(CL\w+))}", sourcetext) - filelinks = re.findall(r"{((CL\w+))}", sourcetext) - fileanchors = re.findall(r"{((cl\w+)|(CL\w+))_anchor}", sourcetext) + #filelinks = re.findall(r"{((cl\w+)|(CL_\w+))}", sourcetext) + filelinks = re.findall(r"{((CL_\w+))}", sourcetext) + fileanchors = re.findall(r"{((cl\w+)|(CL_\w+))_anchor}", sourcetext) filelinks = [re.sub(r"_anchor\b", "", link[0]) for link in filelinks] fileanchors = [anchor[0] for anchor in fileanchors] From e553da1814fc4b286b3d36b3474bf234ed0f19c4 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 15:34:35 -0700 Subject: [PATCH 087/190] cleans up extension metadata (#1122) use a consistent order for new commands, types, enums, etc. add a few missing extension types do not use the new SPIR-V capabilities section use consistent terminology --- api/cl_khr_command_buffer_multi_device.asciidoc | 8 ++++---- api/cl_khr_command_buffer_mutable_dispatch.asciidoc | 10 +++++----- api/cl_khr_d3d10_sharing.asciidoc | 12 ++++++------ api/cl_khr_d3d11_sharing.asciidoc | 7 ++++++- api/cl_khr_dx9_media_sharing.asciidoc | 7 ++++++- api/cl_khr_egl_event.asciidoc | 2 +- api/cl_khr_egl_image.asciidoc | 3 +++ api/cl_khr_external_memory.asciidoc | 4 ---- api/cl_khr_external_memory_dma_buf.asciidoc | 12 ------------ api/cl_khr_external_memory_dx.asciidoc | 12 ------------ api/cl_khr_external_memory_opaque_fd.asciidoc | 12 ------------ api/cl_khr_external_memory_win32.asciidoc | 12 ------------ api/cl_khr_fp16.asciidoc | 2 +- api/cl_khr_fp64.asciidoc | 2 +- api/cl_khr_gl_event.asciidoc | 2 +- api/cl_khr_gl_sharing.asciidoc | 2 +- api/cl_khr_icd.asciidoc | 2 +- api/cl_khr_il_program.asciidoc | 2 +- api/cl_khr_image2d_from_buffer.asciidoc | 2 +- api/cl_khr_integer_dot_product.asciidoc | 4 ---- api/cl_khr_spir.asciidoc | 2 +- api/cl_khr_subgroups.asciidoc | 8 ++++---- api/cl_khr_terminate_context.asciidoc | 8 ++++---- 23 files changed, 47 insertions(+), 90 deletions(-) diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc index 60ea8c530..26d2d72ea 100644 --- a/api/cl_khr_command_buffer_multi_device.asciidoc +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -46,6 +46,10 @@ Depending on platform support the mapping of commands to the new target device can be done either explicitly by the user, or automatically by the OpenCL runtime. +=== New Commands + + * {clRemapCommandBufferKHR} + === New Types Bitfield for querying command-buffer capabilities of an OpenCL Platform with @@ -54,10 +58,6 @@ queries table>>: * {cl_platform_command_buffer_capabilities_khr_TYPE} -=== New Commands - - * {clRemapCommandBufferKHR} - === New Enums Enums for querying device command-buffer capabilities with diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index ea37650c2..6f35f3006 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -70,6 +70,11 @@ As all command recording entry-points return a {cl_mutable_command_khr_TYPE} handle, and aspects like which {cl_mem_TYPE} object a command uses could also be updated between enqueues of the command-buffer. +=== New Commands + + * {clUpdateMutableCommandsKHR} + * {clGetMutableCommandInfoKHR} + === New Types * {cl_mutable_dispatch_fields_khr_TYPE} @@ -81,11 +86,6 @@ also be updated between enqueues of the command-buffer. * {cl_mutable_dispatch_exec_info_khr_TYPE} * {cl_mutable_dispatch_arg_khr_TYPE} -=== New Commands - - * {clUpdateMutableCommandsKHR} - * {clGetMutableCommandInfoKHR} - === New Enums * {cl_device_info_TYPE} diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc index ace32dbe9..1f8fe5242 100644 --- a/api/cl_khr_d3d10_sharing.asciidoc +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -16,11 +16,6 @@ include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] `cl_khr_d3d10_sharing` provides interoperability between OpenCL and Direct3D 10. -=== New Types - - * {cl_d3d10_device_source_khr_TYPE} - * {cl_d3d10_device_set_khr_TYPE} - === New Commands * {clGetDeviceIDsFromD3D10KHR} @@ -30,7 +25,12 @@ include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] * {clEnqueueAcquireD3D10ObjectsKHR} * {clEnqueueReleaseD3D10ObjectsKHR} -=== New Tokens +=== New Types + + * {cl_d3d10_device_source_khr_TYPE} + * {cl_d3d10_device_set_khr_TYPE} + +=== New Enums * {cl_d3d10_device_source_khr_TYPE} ** {CL_D3D10_DEVICE_KHR} diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc index 884044eda..2ee08b859 100644 --- a/api/cl_khr_d3d11_sharing.asciidoc +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -25,7 +25,12 @@ include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] * {clEnqueueAcquireD3D11ObjectsKHR} * {clEnqueueReleaseD3D11ObjectsKHR} -=== New Tokens +=== New Types + + * {cl_d3d11_device_source_khr_TYPE} + * {cl_d3d11_device_set_khr_TYPE} + +=== New Enums * {cl_d3d11_device_source_khr_TYPE} ** {CL_D3D11_DEVICE_KHR} diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc index 92e8ed517..60465c6d5 100644 --- a/api/cl_khr_dx9_media_sharing.asciidoc +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -32,7 +32,12 @@ adapter. * {clEnqueueAcquireDX9MediaSurfacesKHR} * {clEnqueueReleaseDX9MediaSurfacesKHR} -=== New Tokens +=== New Types + + * {cl_dx9_media_adapter_type_khr_TYPE} + * {cl_dx9_media_adapter_set_khr_TYPE} + +=== New Enums * {cl_dx9_media_adapter_type_khr_TYPE} ** {CL_ADAPTER_D3D9_KHR} diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc index f9663530f..97293a7cc 100644 --- a/api/cl_khr_egl_event.asciidoc +++ b/api/cl_khr_egl_event.asciidoc @@ -24,7 +24,7 @@ functionality of creating an EGL sync object from an OpenCL event object. * {clCreateEventFromEGLSyncKHR} -=== New Tokens +=== New Enums * New Error Codes ** {CL_INVALID_EGL_OBJECT_KHR} diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index d324637ad..d4e7f5fee 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -22,6 +22,9 @@ from from EGLImages. * {clCreateFromEGLImageKHR} * {clEnqueueAcquireEGLObjectsKHR} * {clEnqueueReleaseEGLObjectsKHR} + +=== New Enums + * {cl_event_info_TYPE} ** {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR} diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index c9f3567ba..867d9a5f2 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -66,10 +66,6 @@ TODO * {clEnqueueAcquireExternalMemObjectsKHR} * {clEnqueueReleaseExternalMemObjectsKHR} -=== New Structures - - * None - === New Types * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc index 63c86b339..be578969e 100644 --- a/api/cl_khr_external_memory_dma_buf.asciidoc +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -55,18 +55,6 @@ TODO // The 'New ...' section can be auto-generated -=== New Commands - - None - -=== New Structures - - * None - -=== New Types - - * None - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc index 5675324ec..a0be41b1b 100644 --- a/api/cl_khr_external_memory_dx.asciidoc +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -55,18 +55,6 @@ TODO // The 'New ...' section can be auto-generated -=== New Commands - - None - -=== New Structures - - * None - -=== New Types - - * None - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc index 1b1d7c44a..208b2391b 100644 --- a/api/cl_khr_external_memory_opaque_fd.asciidoc +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -55,18 +55,6 @@ TODO // The 'New ...' section can be auto-generated -=== New Commands - - None - -=== New Structures - - * None - -=== New Types - - * None - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index 8b07606bc..65f3dc337 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -55,18 +55,6 @@ TODO // The 'New ...' section can be auto-generated -=== New Commands - - None - -=== New Structures - - * None - -=== New Types - - * None - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_fp16.asciidoc b/api/cl_khr_fp16.asciidoc index 7732cc29e..d2ebcd284 100644 --- a/api/cl_khr_fp16.asciidoc +++ b/api/cl_khr_fp16.asciidoc @@ -23,7 +23,7 @@ built-in types that can be used for arithmetic operations, conversions, etc. See the link:{OpenCLCSpecURL}#cl_khr_fp16[Half-Precision Floating-Point] section of the OpenCL C specification for more information. -=== New Tokens +=== New Enums * {cl_device_info_TYPE} ** {CL_DEVICE_HALF_FP_CONFIG} diff --git a/api/cl_khr_fp64.asciidoc b/api/cl_khr_fp64.asciidoc index ad1e8f763..9bb28c3a5 100644 --- a/api/cl_khr_fp64.asciidoc +++ b/api/cl_khr_fp64.asciidoc @@ -24,7 +24,7 @@ conversions, etc. See the link:{OpenCLCSpecURL}#cl_khr_fp64[Double-Precision Floating-Point] section of the OpenCL C specification for more information. -=== New Tokens +=== New Enums * {cl_device_info_TYPE} ** {CL_DEVICE_DOUBLE_FP_CONFIG} diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc index 2118d7e2b..2ee22f486 100644 --- a/api/cl_khr_gl_event.asciidoc +++ b/api/cl_khr_gl_event.asciidoc @@ -30,7 +30,7 @@ as the OpenCL context. * {clCreateEventFromGLsyncKHR} -=== New Tokens +=== New Enums * {cl_command_type_TYPE} ** {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR} diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc index 7d1bcdaa8..94a1a0004 100644 --- a/api/cl_khr_gl_sharing.asciidoc +++ b/api/cl_khr_gl_sharing.asciidoc @@ -61,7 +61,7 @@ and buffer object images with OpenCL is required by this extension. * {cl_gl_texture_info_TYPE} * {cl_gl_platform_info_TYPE} -=== New Tokens +=== New Enums * New Error Codes ** {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} diff --git a/api/cl_khr_icd.asciidoc b/api/cl_khr_icd.asciidoc index a28baa504..fb99fb74f 100644 --- a/api/cl_khr_icd.asciidoc +++ b/api/cl_khr_icd.asciidoc @@ -236,7 +236,7 @@ continue on to the next. * {clIcdGetPlatformIDsKHR} -=== New Tokens +=== New Enums Accepted as _param_name_ to the function {clGetPlatformInfo}: diff --git a/api/cl_khr_il_program.asciidoc b/api/cl_khr_il_program.asciidoc index fc4a3d7e9..472823546 100644 --- a/api/cl_khr_il_program.asciidoc +++ b/api/cl_khr_il_program.asciidoc @@ -28,7 +28,7 @@ This functionality described by this extension is a core feature in OpenCL * {clCreateProgramWithILKHR} -=== New Tokens +=== New Enums * {cl_device_info_TYPE} ** {CL_DEVICE_IL_VERSION_KHR} diff --git a/api/cl_khr_image2d_from_buffer.asciidoc b/api/cl_khr_image2d_from_buffer.asciidoc index 17432c1ea..cb3f29eb0 100644 --- a/api/cl_khr_image2d_from_buffer.asciidoc +++ b/api/cl_khr_image2d_from_buffer.asciidoc @@ -22,7 +22,7 @@ This extension became a core feature in OpenCL 2.0. Refer to the discussion of 2D images created from buffers in the <> section for additional details. -=== New Tokens +=== New Enums * {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} * {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} diff --git a/api/cl_khr_integer_dot_product.asciidoc b/api/cl_khr_integer_dot_product.asciidoc index ef47c2a6c..38377238d 100644 --- a/api/cl_khr_integer_dot_product.asciidoc +++ b/api/cl_khr_integer_dot_product.asciidoc @@ -52,10 +52,6 @@ Product] section of the OpenCL C specification for more information. ** {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} ** {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} -=== New SPIR-V Capabilities - - * TBD - === Version History * Revision 1.0.0, 2021-06-17 diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc index a79966984..6776a9cc6 100644 --- a/api/cl_khr_spir.asciidoc +++ b/api/cl_khr_spir.asciidoc @@ -26,7 +26,7 @@ information on compiling SPIR binaries. which is supported by the `<>` extension, and is a core feature in OpenCL 2.1. -=== New Tokens +=== New Enums * {cl_device_info_TYPE} ** {CL_DEVICE_SPIR_VERSIONS} diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc index bc5e2df20..44edb39c7 100644 --- a/api/cl_khr_subgroups.asciidoc +++ b/api/cl_khr_subgroups.asciidoc @@ -33,14 +33,14 @@ However, note that: See the link:{OpenCLCSpecURL}#cl_khr_subgroups[Sub-Groups] section of the OpenCL C specification for more information. -=== New Types - - * {cl_kernel_sub_group_info_TYPE} - === New Commands * {clGetKernelSubGroupInfoKHR} +=== New Types + + * {cl_kernel_sub_group_info_TYPE} + === New Enums * {cl_kernel_sub_group_info_TYPE} diff --git a/api/cl_khr_terminate_context.asciidoc b/api/cl_khr_terminate_context.asciidoc index fac86afd8..103f5f7ea 100644 --- a/api/cl_khr_terminate_context.asciidoc +++ b/api/cl_khr_terminate_context.asciidoc @@ -35,14 +35,14 @@ closure of ongoing operations when the results are no longer required in a much more expedient manner than waiting for all previously enqueued operations to finish. -=== New Types - - * {cl_device_terminate_capability_khr_TYPE} - === New Commands * {clTerminateContextKHR} +=== New Types + + * {cl_device_terminate_capability_khr_TYPE} + === New Enums * {cl_device_info_TYPE} From 6005ac7d10572e080a26929b60bdeeb904aa7648 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 21:44:00 -0700 Subject: [PATCH 088/190] fix section titles for sub-group extensions (#1124) also fix a few other section titles for consistency --- OpenCL_C.txt | 64 ++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 9dfc156b2..7a3b8379d 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -570,7 +570,7 @@ endif::cl_khr_subgroup_ballot[] ifdef::cl_khr_subgroup_clustered_reduce[] [[cl_khr_subgroup_clustered_reduce,cl_khr_subgroup_clustered_reduce]] -==== Clustered Reductions +==== Sub-Group Clustered Reductions The `cl_khr_subgroup_clustered_reduce` extension adds support for clustered reductions that operate on a subset of work items in the sub-group. @@ -600,7 +600,7 @@ endif::cl_khr_subgroup_extended_types[] ifdef::cl_khr_subgroup_non_uniform_arithmetic[] [[cl_khr_subgroup_non_uniform_arithmetic,cl_khr_subgroup_non_uniform_arithmetic]] -==== Built-in Non-Uniform Arithmetic Functions for Sub-Groups +==== Sub-Group Non-Uniform Arithmetic The `cl_khr_subgroup_non_uniform_arithmetic` extension adds the ability to use some sub-group functions within non-uniform flow control, including @@ -620,7 +620,7 @@ endif::cl_khr_subgroup_non_uniform_arithmetic[] ifdef::cl_khr_subgroup_non_uniform_vote[] [[cl_khr_subgroup_non_uniform_vote,cl_khr_subgroup_non_uniform_vote]] -==== Built-in Non-Uniform Vote and Election Functions for Sub-Groups +==== Sub-Group Non-Uniform Vote and Election Functions The `cl_khr_subgroup_non_uniform_vote` extension adds the ability to elect a single work item from a sub-group to perform a task and to hold votes among @@ -651,7 +651,7 @@ endif::cl_khr_subgroup_rotate[] ifdef::cl_khr_subgroup_shuffle[] [[cl_khr_subgroup_shuffle,cl_khr_subgroup_shuffle]] -==== General Purpose Shuffles +==== Sub-Group General Purpose Shuffles The `cl_khr_subgroup_shuffle` extension adds additional ways to exchange data among work items in a sub-group. @@ -665,7 +665,7 @@ endif::cl_khr_subgroup_shuffle[] ifdef::cl_khr_subgroup_shuffle_relative[] [[cl_khr_subgroup_shuffle_relative,cl_khr_subgroup_shuffle_relative]] -==== Relative Shuffles +==== Sub-Group Relative Shuffles The `cl_khr_subgroup_shuffle_relative` extension adds specialized ways to exchange data among work items in a sub-group that may perform better on @@ -8141,7 +8141,7 @@ This section specifies each general kind. [[atomic_store]] -===== *The atomic_store Functions* +===== The atomic_store Functions [open,refpage='atomic_store',desc='The atomic_store Functions',type='freeform',spec='clang',anchor='atomic_store',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -8212,7 +8212,7 @@ feature. [[atomic_load]] -===== *The atomic_load Functions* +===== The atomic_load Functions [open,refpage='atomic_load',desc='The atomic_load Functions',type='freeform',spec='clang',anchor='atomic_load',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -8276,7 +8276,7 @@ feature. [[atomic_exchange]] -===== *The atomic_exchange Functions* +===== The atomic_exchange Functions [open,refpage='atomic_exchange',desc='The atomic_exchange Functions',type='freeform',spec='clang',anchor='atomic_exchange',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -8348,7 +8348,7 @@ feature. [[atomic_compare_exchange]] -===== *The atomic_compare_exchange Functions* +===== The atomic_compare_exchange Functions [open,refpage='atomic_compare_exchange',desc='The atomic_compare_exchange Functions',type='freeform',spec='clang',anchor='atomic_compare_exchange',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -8669,7 +8669,7 @@ feature. [[atomic_fetch_key]] -===== *The atomic_fetch and modify Functions* +===== The atomic_fetch and modify Functions [open,refpage='atomic_fetch_key',desc='The atomic_fetch and modify Functions',type='freeform',spec='clang',anchor='atomic_fetch_key',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -8772,7 +8772,7 @@ feature. [[atomic_flag]] -===== *Atomic Flag Type and Operations* +===== Atomic Flag Type and Operations [open,refpage='atomic_flag',desc='Atomic Flag Type and Operations',type='freeform',spec='clang',anchor='atomic_flag',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -8800,7 +8800,7 @@ global atomic_flag guard = ATOMIC_FLAG_INIT; [[atomic_flag_test_and_set]] -===== *The atomic_flag_test_and_set Functions* +===== The atomic_flag_test_and_set Functions [open,refpage='atomicFlagTestAndSet',desc='The atomic_flag_test_and_set Functions',type='freeform',spec='clang',anchor='atomic_flag_test_and_set',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_init atomic_load atomic_store atomic_work_item_fence',alias='atomic_flag_test_and_set atomic_flag_test_and_set_explicit'] -- @@ -8874,7 +8874,7 @@ feature. [[atomic_flag_clear]] -===== *The atomic_flag_clear Functions* +===== The atomic_flag_clear Functions [open,refpage='atomic_flag_clear',desc='The atomic_flag_clear Functions',type='freeform',spec='clang',anchor='atomic_flag_clear',xrefs='atomicFunctions atomicTypes atomic_compare_exchange atomic_exchange atomic_fetch_key atomic_flag atomic_flag_clear atomic_flag_test_and_set atomic_flag_test_and_set_explicit atomic_init atomic_load atomic_store atomic_work_item_fence'] -- @@ -10087,7 +10087,7 @@ queried using the `CL_DEVICE_MAX_SAMPLERS` token in *clGetDeviceInfo*. [[determining-the-border-color-or-value]] -===== *Determining the Border Color or Value* +===== Determining the Border Color or Value If `` in sampler is `CLK_ADDRESS_CLAMP`, then out-of-range image coordinates return the border color. @@ -10104,7 +10104,7 @@ of the following values: [[srgb-images]] -===== *sRGB Images* +===== sRGB Images The built-in image read functions will perform sRGB to linear RGB conversions if the image is an sRGB image. @@ -14071,7 +14071,7 @@ ifdef::cl_khr_subgroup_ballot[] NOTE: The functionality described in this section <> support for the `<>` extension. -The <> describes OpenCL C +The <> describes OpenCL C programming language built-in functions to allow work items in a sub-group to collect and operate on ballots from work items in the sub-group. These functions need not be encountered by all work items in a sub-group @@ -14266,7 +14266,7 @@ endif::cl_khr_subgroup_ballot[] ifdef::cl_khr_subgroup_clustered_reduce[] [[sub-group-clustered-reduction-functions]] -==== Built-in Clustered Reduction Functions for Sub-Groups +==== Built-in Sub-Group Clustered Reduction Functions NOTE: The functionality described in this section <> support for the `<>` extension. @@ -14298,7 +14298,7 @@ footnote:[{fn-half-supported}], and `double` footnote:[{fn-double-supported}]. [[table-clustered-reduce-math-functions]] -.Built-in Arithmetic Functions for Sub-Groups +.Built-in Sub-Group Clustered Reduction Arithmetic Functions [cols="1a,1",options="header",] |==== | Function | Description @@ -14336,7 +14336,7 @@ the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, or `ulong`. [[table-clustered-reduce-bitwise-functions]] -.Built-in Bitwise Functions for Sub-Groups +.Built-in Sub-Group Clustered Reduction Bitwise Functions [cols="1a,1",options="header",] |==== | Function | Description @@ -14366,7 +14366,7 @@ logically `true` and a zero _predicate_ argument or return value is logically `false`. [[table-clustered-reduce-logical-functions]] -.Built-in Logical Functions for Sub-Groups +.Built-in Sub-Group Clustered Reduction Logical Functions [cols="3a,2",options="header",] |==== | Function | Description @@ -14388,7 +14388,7 @@ endif::cl_khr_subgroup_clustered_reduce[] ifdef::cl_khr_subgroup_non_uniform_arithmetic[] -==== Built-in Non-Uniform Scan and Reduction Functions for Sub-Groups +==== Built-in Sub-Group Non-Uniform Scan and Reduction Functions NOTE: The functionality described in this section <> support for the `<>` extension. @@ -14407,7 +14407,7 @@ footnote:[{fn-half-supported}], and `double` footnote:[{fn-double-supported}]. [[table-non-uniform-math-functions]] -.Built-in Non-Uniform Arithmetic Functions for Sub-Groups +.Built-in Sub-Group Non-Uniform Arithmetic Functions [cols="3a,2",options="header",] |==== | Function | Description @@ -14499,7 +14499,7 @@ supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, and `ulong`. [[table-non-uniform-bitwise-functions]] -.Built-in Non-Uniform Bitwise Functions for Sub-Groups +.Built-in Sub-Group Non-Uniform Bitwise Functions [cols="3a,2",options="header",] |==== | Function | Description @@ -14561,7 +14561,7 @@ logically `true` and a zero _predicate_ argument or return value is logically `false`. [[table-non-uniform-logical-functions]] -.Built-in Non-Uniform Logical Functions for Sub-Groups +.Built-in Sub-Group Non-Uniform Logical Functions [cols="2a,1",options="header",] |==== | Function | Description @@ -14614,7 +14614,7 @@ endif::cl_khr_subgroup_non_uniform_arithmetic[] ifdef::cl_khr_subgroup_non_uniform_vote[] -==== Built-in Non-Uniform Vote Functions for Sub-Groups +==== Built-in Sub-Group Non-Uniform Vote Functions NOTE: The functionality described in this section <> support for the `<>` extension. @@ -14632,7 +14632,7 @@ footnote:[{fn-half-supported}], and `double` footnote:[{fn-double-supported}]. [[table-non-uniform-vote-functions]] -.Built-in Non-Uniform Vote Functions for Sub-Groups +.Built-in Sub-Group Non-Uniform Vote Functions [cols="1a,1",options="header",] |==== | Function | Description @@ -14704,7 +14704,7 @@ footnote:[{fn-half-supported}], and `double` footnote:[{fn-double-supported}]. [[table-rotate-functions]] -.Built-in Rotation Functions for Sub-Groups +.Built-in Sub-Group Rotation Functions [cols="1a,1",options="header",] |==== | Function | Description @@ -14748,7 +14748,7 @@ endif::cl_khr_subgroup_rotate[] ifdef::cl_khr_subgroup_shuffle[] -==== Built-in Shuffle Functions for Sub-Groups +==== Built-in Sub-Group General Purpose Shuffle Functions NOTE: The functionality described in this section <> support for the `<>` extension. @@ -14765,7 +14765,7 @@ footnote:[{fn-half-supported}], and `double` footnote:[{fn-double-supported}]. [[table-shuffle-functions]] -.Built-in Shuffle Functions for Sub-Groups +.Built-in Sub-Group General Purpose Shuffle Functions [cols="1a,1",options="header",] |==== | Function | Description @@ -14804,7 +14804,7 @@ endif::cl_khr_subgroup_shuffle[] ifdef::cl_khr_subgroup_shuffle_relative[] -==== Add a new Section 6.15.X - Sub-Group Relative Shuffle Built-in Functions +==== Built-in Sub-Group Relative Shuffle Functions The table below describes specialized OpenCL C programming language built-in functions that allow work items in a sub-group to exchange data. @@ -14817,7 +14817,7 @@ footnote:[{fn-half-supported}], and `double` footnote:[{fn-double-supported}]. [[table-shuffle-relative-functions]] -.Built-in Relative Shuffle Functions for Sub-Groups +.Built-in Sub-Group Relative Shuffle Functions [cols="1a,1",options="header",] |==== | Function | Description @@ -14859,7 +14859,7 @@ endif::cl_khr_subgroup_shuffle_relative[] [[extended-sub-groups-mapping]] -=== Sub-Groups Function Mapping and Capabilities +==== Sub-Groups Function Mapping and Capabilities This section describes a possible mapping between OpenCL built-in sub-group functions and SPIR-V instructions and required SPIR-V capabilities. From d339665e40ba24e77ba9b18ab1d9a83a7b03e394 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 22:49:21 -0700 Subject: [PATCH 089/190] adjustments to fix long table cells (#1125) We really need a better long-term solution for this... --- OpenCL_C.txt | 925 ++++++++++++---------- api/opencl_runtime_layer.asciidoc | 28 +- config/rouge/lib/rouge/lexers/opencl.rb | 23 +- config/rouge/lib/rouge/lexers/opencl_c.rb | 1 + 4 files changed, 540 insertions(+), 437 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 7a3b8379d..bcab4229f 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -3528,18 +3528,18 @@ Examples: kernel void f() { -int *ptr; -local int *lptr; -global int *gptr; -local int val = 55; - -ptr = &val; // legal: implicit cast to generic, then assign -lptr = ptr; // illegal: no implicit cast from - // generic to local -lptr = gptr; // illegal: no implicit cast from - // global to local -ptr = gptr; // legal: implicit cast from global to generic, - // then assign + int *ptr; + local int *lptr; + global int *gptr; + local int val = 55; + + ptr = &val; // legal: implicit cast to generic, then assign + lptr = ptr; // illegal: no implicit cast from + // generic to local + lptr = gptr; // illegal: no implicit cast from + // global to local + ptr = gptr; // legal: implicit cast from global to generic, + // then assign } ---------- @@ -6225,8 +6225,8 @@ a| [source,opencl_c] ---- gentype bitfield_insert( - gentype base, gentype insert, - uint offset, uint count) + gentype base, gentype insert, + uint offset, uint count) ---- | Returns a copy of _base_, with a modified bitfield that comes from _insert_. @@ -6252,8 +6252,8 @@ a| [source,opencl_c] ---- igentype bitfield_extract_signed( - gentype base, - uint offset, uint count) + gentype base, + uint offset, uint count) ---- | Returns an extracted bitfield from _base_ with sign extension. The type of the return value is always a signed type. @@ -6277,8 +6277,8 @@ a| [source,opencl_c] ---- ugentype bitfield_extract_unsigned( - gentype base, - uint offset, uint count) + gentype base, + uint offset, uint count) ---- | Returns an extracted bitfield from _base_ with zero extension. The type of the return value is always an unsigned type. @@ -6301,7 +6301,7 @@ a| [source,opencl_c] ---- gentype bit_reverse( - gentype base) + gentype base) ---- | Returns the value of _base_ with reversed bits. That is, the bit numbered _n_ of the result value will be taken from @@ -7581,7 +7581,7 @@ ifdef::cl_khr_async_work_group_copy_fence[] |[source,opencl_c] ---- void async_work_group_copy_fence( - cl_mem_fence_flags flags) + cl_mem_fence_flags flags) ---- | Orders async copies produced by the work-items of a work-group executing a kernel. @@ -7682,28 +7682,28 @@ a| [source,opencl_c] ---- event_t async_work_group_copy_2D2D( - __local void *dst, - size_t dst_offset, - const __global void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t src_total_line_length, - size_t dst_total_line_length, - event_t event) + __local void *dst, + size_t dst_offset, + const __global void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t src_total_line_length, + size_t dst_total_line_length, + event_t event) event_t async_work_group_copy_2D2D( - __global void *dst, - size_t dst_offset, - const __local void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t src_total_line_length, - size_t dst_total_line_length, - event_t event) + __global void *dst, + size_t dst_offset, + const __local void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t src_total_line_length, + size_t dst_total_line_length, + event_t event) ---- | Perform an async copy of (_num_elements_per_line_ * _num_lines_) elements of size _num_bytes_per_element_ from (_src_ + (_src_offset_ * @@ -7730,34 +7730,34 @@ a| [source,opencl_c] ---- event_t async_work_group_copy_3D3D( - __local void *dst, - size_t dst_offset, - const __global void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t num_planes, - size_t src_total_line_length, - size_t src_total_plane_area, - size_t dst_total_line_length, - size_t dst_total_plane_area, - event_t event) + __local void *dst, + size_t dst_offset, + const __global void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t num_planes, + size_t src_total_line_length, + size_t src_total_plane_area, + size_t dst_total_line_length, + size_t dst_total_plane_area, + event_t event) event_t async_work_group_copy_3D3D( - __global void *dst, - size_t dst_offset, - const __local void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t num_planes, - size_t src_total_line_length, - size_t src_total_plane_area, - size_t dst_total_line_length, - size_t dst_total_plane_area, - event_t event) + __global void *dst, + size_t dst_offset, + const __local void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t num_planes, + size_t src_total_line_length, + size_t src_total_plane_area, + size_t dst_total_line_length, + size_t dst_total_plane_area, + event_t event) ---- | Perform an async copy of ((_num_elements_per_line_ * _num_lines_) * _num_planes_) elements of size _num_bytes_per_element_ from (_src_ + @@ -10750,28 +10750,28 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) int4 read_imagei( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) uint4 read_imageui( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) float read_imagef( - read_only image2d_depth_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image2d_depth_t image, + sampler_t sampler, + float2 coord, + float lod) ---- | Use the coordinate _coord.xy_ to do an element lookup in the mip level specified by _lod_ in the 2D image object specified by _image_. @@ -10782,32 +10782,32 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) int4 read_imagei( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) uint4 read_imageui( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) float read_imagef( - read_only image2d_depth_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_depth_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) ---- | Use the gradients to compute the lod and coordinate _coord.xy_ to do an element lookup in the mip level specified by the computed lod in @@ -10819,22 +10819,22 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) int4 read_imagei( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) uint4 read_imageui( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) ---- | Use the coordinate _coord_ to do an element lookup in the mip level specified by _lod_ in the 1D image object specified by _image_. @@ -10845,25 +10845,25 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) int4 read_imagei( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) uint4 read_imageui( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) ---- | Use the gradients to compute the lod and coordinate _coord_ to do an element lookup in the mip level specified by the computed lod in the @@ -10875,22 +10875,22 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) int4 read_imagei( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) uint4 read_imageui( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) ---- | Use the coordinate _coord.xyz_ to do an element lookup in the mip level specified by _lod_ in the 3D image object specified by _image_. @@ -10901,25 +10901,25 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) int4 read_imagei( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) uint4 read_imageui( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) ---- | Use the gradients to compute the lod and coordinate _coord.xyz_ to do an element lookup in the mip level specified by the computed lod in @@ -10931,22 +10931,22 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) int4 read_imagei( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) uint4 read_imageui( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) ---- | Use the coordinate _coord.x_ to do an element lookup in the 1D image identified by _coord.x_ and mip level specified by _lod_ in the 1D @@ -10958,25 +10958,25 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) int4 read_imagei( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) uint4 read_imageui( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) ---- | Use the gradients to compute the lod and coordinate _coord.x_ to do an element lookup in the mip level specified by the computed lod in the @@ -10988,28 +10988,28 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) int4 read_imagei( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) uint4 read_imageui( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) float read_imagef( - read_only image2d_array_depth_t image, - sampler_t sampler, - float4 coord, - float lod) + read_only image2d_array_depth_t image, + sampler_t sampler, + float4 coord, + float lod) ---- | Use the coordinate _coord.xy_ to do an element lookup in the 2D image identified by _coord.z_ and mip level specified by _lod_ in the 2D @@ -11021,32 +11021,32 @@ a| [source,opencl_c] ---- float4 read_imagef( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) int4 read_imagei( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) uint4 read_imageui( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) float read_imagef( - read_only image2d_array_depth_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) + read_only image2d_array_depth_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) ---- | Use the gradients to compute the lod coordinate and _coord.xy_ to do an element lookup in the 2D image identified by _coord.z_ and mip @@ -11520,9 +11520,9 @@ a| [source,opencl_c] ---- float4 read_imagef( - image2d_msaa_t image, - int2 coord, - int sample) + image2d_msaa_t image, + int2 coord, + int sample) ---- | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup in the 2D image object specified by _image_. @@ -11548,13 +11548,15 @@ float4 read_imagef( a| [source,opencl_c] ---- -int4 read_imagei(image2d_msaa_t image, - int2 coord, - int sample) +int4 read_imagei( + image2d_msaa_t image, + int2 coord, + int sample) -uint4 read_imageui(image2d_msaa_t image, - int2 coord, - int sample) +uint4 read_imageui( + image2d_msaa_t image, + int2 coord, + int sample) ---- | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup in the 2D image object specified by _image_. @@ -11588,9 +11590,10 @@ uint4 read_imageui(image2d_msaa_t image, a| [source,opencl_c] ---- -float4 read_imagef(image2d_array_msaa_t image, - int4 coord, - int sample) +float4 read_imagef( + image2d_array_msaa_t image, + int4 coord, + int sample) ---- | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image identified by _coord.z_ in the 2D image array specified by _image_. @@ -11616,13 +11619,15 @@ float4 read_imagef(image2d_array_msaa_t image, a| [source,opencl_c] ---- -int4 read_imagei(image2d_array_msaa_t image, - int4 coord, - int sample) +int4 read_imagei( + image2d_array_msaa_t image, + int4 coord, + int sample) -uint4 read_imageui(image2d_array_msaa_t image, - int4 coord, - int sample) +uint4 read_imageui( + image2d_array_msaa_t image, + int4 coord, + int sample) ---- | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image identified by _coord.z_ in the 2D image array specified by _image_. @@ -11656,9 +11661,10 @@ uint4 read_imageui(image2d_array_msaa_t image, a| [source,opencl_c] ---- -float read_imagef(image2d_msaa_depth_t image, - int2 coord, - int sample) +float read_imagef( + image2d_msaa_depth_t image, + int2 coord, + int sample) ---- | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup in the 2D depth image object specified by _image_. @@ -11751,15 +11757,27 @@ endif::cl_khr_srgb_image_writes[] [[table-image-write]] .Built-in Image Write Functions -[cols=",",options="header",] +[cols="3,4",options="header",] |==== | Function | Description -| void *write_imagef*(_aQual_ image2d_t _image_, int2 _coord_, float4 _color_) + +| void *write_imagef*( + + _aQual_ image2d_t _image_, + + int2 _coord_, + + float4 _color_) + ifdef::cl_khr_fp16[] - void *write_imageh*(_aQual_ image2d_t _image_, int2 _coord_, half4 _color_) + + void *write_imageh*( + + _aQual_ image2d_t _image_, + + int2 _coord_, + + half4 _color_) + endif::cl_khr_fp16[] - void *write_imagei*(_aQual_ image2d_t _image_, int2 _coord_, int4 _color_) + - void *write_imageui*(_aQual_ image2d_t _image_, int2 _coord_, uint4 _color_) + void *write_imagei*( + + _aQual_ image2d_t _image_, + + int2 _coord_, + + int4 _color_) + + void *write_imageui*( + + _aQual_ image2d_t _image_, + + int2 _coord_, + + uint4 _color_) | Write _color_ value to location specified by _coord.xy_ in the 2D image object specified by _image_. Appropriate data format conversion to the specified image format is @@ -11805,15 +11823,23 @@ ifdef::cl_khr_fp16[] `<>` extension macro. endif::cl_khr_fp16[] | | -| void *write_imagef*(_aQual_ image2d_array_t _image_, int4 _coord_, +| void *write_imagef*( + + _aQual_ image2d_array_t _image_, + + int4 _coord_, + float4 _color_) + ifdef::cl_khr_fp16[] - void *write_imageh*(_aQual_ image2d_array_t _image_, int4 _coord_, + void *write_imageh*( + + _aQual_ image2d_array_t _image_, + + int4 _coord_, + half4 _color_) + endif::cl_khr_fp16[] - void *write_imagei*(_aQual_ image2d_array_t _image_, int4 _coord_, + void *write_imagei*( + + _aQual_ image2d_array_t _image_, + + int4 _coord_, + int4 _color_) + - void *write_imageui*(_aQual_ image2d_array_t _image_, int4 _coord_, + void *write_imageui*( + + _aQual_ image2d_array_t _image_, + + int4 _coord_, + uint4 _color_) | Write _color_ value to location specified by _coord.xy_ in the 2D image identified by _coord.z_ in the 2D image array specified by @@ -11862,25 +11888,41 @@ ifdef::cl_khr_fp16[] `<>` extension macro. endif::cl_khr_fp16[] | | -| void *write_imagef*(_aQual_ image1d_t _image_, int _coord_, +| void *write_imagef*( + + _aQual_ image1d_t _image_, + + int _coord_, + float4 _color_) + ifdef::cl_khr_fp16[] - void *write_imageh*(_aQual_ image1d_t _image_, int _coord_, + void *write_imageh*( + + _aQual_ image1d_t _image_, + + int _coord_, + half4 _color_) + endif::cl_khr_fp16[] - void *write_imagei*(_aQual_ image1d_t _image_, int _coord_, + void *write_imagei*( + + _aQual_ image1d_t _image_, + + int _coord_, + int4 _color_) + - void *write_imageui*(_aQual_ image1d_t _image_, int _coord_, + void *write_imageui*( + + _aQual_ image1d_t _image_, + + int _coord_, + uint4 _color_) + - void *write_imagef*(_aQual_ image1d_buffer_t _image_, int _coord_, + void *write_imagef*( + + _aQual_ image1d_buffer_t _image_, + + int _coord_, + float4 _color_) + ifdef::cl_khr_fp16[] - void *write_imageh*(_aQual_ image1d_buffer_t _image_, int _coord_, + void *write_imageh*( + + _aQual_ image1d_buffer_t _image_, + + int _coord_, + half4 _color_) + endif::cl_khr_fp16[] - void *write_imagei*(_aQual_ image1d_buffer_t _image_, int _coord_, + void *write_imagei*( + + _aQual_ image1d_buffer_t _image_, + + int _coord_, + int4 _color_) + - void *write_imageui*(_aQual_ image1d_buffer_t _image_, int _coord_, + void *write_imageui*( + + _aQual_ image1d_buffer_t _image_, + + int _coord_, + uint4 _color_) | Write _color_ value to location specified by _coord_ in the 1D image or 1D image buffer object specified by _image_. @@ -11927,15 +11969,23 @@ ifdef::cl_khr_fp16[] `<>` extension macro. endif::cl_khr_fp16[] | | -| void *write_imagef*(_aQual_ image1d_array_t _image_, int2 _coord_, +| void *write_imagef*( + + _aQual_ image1d_array_t _image_, + + int2 _coord_, + float4 _color_) + ifdef::cl_khr_fp16[] - void *write_imageh*(_aQual_ image1d_array_t _image_, int2 _coord_, + void *write_imageh*( + + _aQual_ image1d_array_t _image_, + + int2 _coord_, + half4 _color_) + endif::cl_khr_fp16[] - void *write_imagei*(_aQual_ image1d_array_t _image_, int2 _coord_, + void *write_imagei*( + + _aQual_ image1d_array_t _image_, + + int2 _coord_, + int4 _color_) + - void *write_imageui*(_aQual_ image1d_array_t _image_, int2 _coord_, + void *write_imageui*( + + _aQual_ image1d_array_t _image_, + + int2 _coord_, uint4 _color_) | Write _color_ value to location specified by _coord.x_ in the 1D image identified by _coord.y_ in the 1D image array specified by _image_. @@ -11979,7 +12029,9 @@ ifdef::cl_khr_fp16[*write_imageh*,] <> support for OpenCL C 1.2 or newer. | | -| void *write_imagef*(_aQual_ image2d_depth_t _image_, int2 _coord_, +| void *write_imagef*( + + _aQual_ image2d_depth_t _image_, + + int2 _coord_, + float _depth_) | Write _depth_ value to location specified by _coord.xy_ in the 2D depth image object specified by _image_. @@ -12005,7 +12057,9 @@ ifdef::cl_khr_fp16[*write_imageh*,] <> support for OpenCL C 2.0 or newer, or for the `<>` extension macro. | | -| void *write_imagef*(_aQual_ image2d_array_depth_t _image_, int4 _coord_, +| void *write_imagef*( + + _aQual_ image2d_array_depth_t _image_, + + int4 _coord_, + float _depth_) | Write _depth_ value to location specified by _coord.xy_ in the 2D image identified by _coord.z_ in the 2D depth image array specified by @@ -12032,15 +12086,23 @@ ifdef::cl_khr_fp16[*write_imageh*,] <> support for OpenCL C 2.0 or newer, or for the `<>` extension macro. | | -| void *write_imagef*(_aQual_ image3d_t _image_, int4 _coord_, +| void *write_imagef*( + + _aQual_ image3d_t _image_, + + int4 _coord_, + float4 _color_) + ifdef::cl_khr_fp16[] - void *write_imageh*(_aQual_ image3d_t _image_, int4 _coord_, + void *write_imageh*( + + _aQual_ image3d_t _image_, + + int4 _coord_, + half4 _color_) + endif::cl_khr_fp16[] - void *write_imagei*(_aQual_ image3d_t _image_, int4 _coord_, + void *write_imagei*( + + _aQual_ image3d_t _image_, + + int4 _coord_, + int4 _color_) + - void *write_imageui*(_aQual_ image3d_t _image_, int4 _coord_, + void *write_imageui*( + + _aQual_ image3d_t _image_, + + int4 _coord_, + uint4 _color_) | Write _color_ value to the location specified by _coord.xyz_ in the 3D image object specified by _image_. @@ -12096,28 +12158,28 @@ a| [source,opencl_c] ---- void write_imagef( - write_only image2d_t image, - int2 coord, - int lod, - float4 color) + write_only image2d_t image, + int2 coord, + int lod, + float4 color) void write_imagei( - write_only image2d_t image, - int2 coord, - int lod, - int4 color) + write_only image2d_t image, + int2 coord, + int lod, + int4 color) void write_imageui( - write_only image2d_t image, - int2 coord, - int lod, - uint4 color) + write_only image2d_t image, + int2 coord, + int lod, + uint4 color) void write_imagef( - write_only image2d_depth_t image, - int2 coord, - int lod, - float depth) + write_only image2d_depth_t image, + int2 coord, + int lod, + float depth) ---- | Write _color_ value to location specified by _coord.xy_ in the mip level specified by _lod_ in the 2D image object specified by _image_. @@ -12137,22 +12199,22 @@ a| [source,opencl_c] ---- void write_imagef( - write_only image1d_t image, - int coord, - int lod, - float4 color) + write_only image1d_t image, + int coord, + int lod, + float4 color) void write_imagei( - write_only image1d_t image, - int coord, - int lod, - int4 color) + write_only image1d_t image, + int coord, + int lod, + int4 color) void write_imageui( - write_only image1d_t image, - int coord, - int lod, - uint4 color) + write_only image1d_t image, + int coord, + int lod, + uint4 color) ---- | Write _color_ value to location specified by _coord_ in the mip level specified by _lod_ in the 1D image object specified by _image_. @@ -12170,22 +12232,22 @@ a| [source,opencl_c] ---- void write_imagef( - write_only image1d_array_t image, - int2 coord, - int lod, - float4 color) + write_only image1d_array_t image, + int2 coord, + int lod, + float4 color) void write_imagei( - write_only image1d_array_t image, - int2 coord, - int lod, - int4 color) + write_only image1d_array_t image, + int2 coord, + int lod, + int4 color) void write_imageui( - write_only image1d_array_t image, - int2 coord, - int lod, - uint4 color) + write_only image1d_array_t image, + int2 coord, + int lod, + uint4 color) ---- | Write _color_ value to location specified by _coord.x_ in the 1D image identified by _coord.y_ and mip level _lod_ in the 1D image array @@ -12205,28 +12267,28 @@ a| [source,opencl_c] ---- void write_imagef( - write_only image2d_array_t image, - int4 coord, - int lod, - float4 color) + write_only image2d_array_t image, + int4 coord, + int lod, + float4 color) void write_imagei( - write_only image2d_array_t image, - int4 coord, - int lod, - int4 color) + write_only image2d_array_t image, + int4 coord, + int lod, + int4 color) void write_imageui( - write_only image2d_array_t image, - int4 coord, - int lod, - uint4 color) + write_only image2d_array_t image, + int4 coord, + int lod, + uint4 color) void write_imagef( - write_only image2d_array_depth_t image, - int4 coord, - int lod, - float depth) + write_only image2d_array_depth_t image, + int4 coord, + int lod, + float depth) ---- | Write _color_ value to location specified by _coord.xy_ in the 2D image identified by _coord.z_ and mip level _lod_ in the 2D image array @@ -12247,22 +12309,22 @@ a| [source,opencl_c] ---- void write_imagef( - write_only image3d_t image, - int4 coord, - int lod, - float4 color) + write_only image3d_t image, + int4 coord, + int lod, + float4 color) void write_imagei( - write_only image3d_t image, - int4 coord, - int lod, - int4 color) + write_only image3d_t image, + int4 coord, + int lod, + int4 color) void write_imageui( - write_only image3d_t image, - int4 coord, - int lod, - uint4 color) + write_only image3d_t image, + int4 coord, + int lod, + uint4 color) ---- | Write _color_ value to location specified by _coord.xyz_ and mip level _lod_ in the 3D image object specified by _image_. @@ -12765,22 +12827,28 @@ logically `false`. [[table-builtin-work-group-logical]] .Built-in Work-group Logical Arithmetic Functions -[cols="2a,1",options="header"] +[cols="4a,3",options="header"] |==== | Function | Description |[source,opencl_c] ---- -int work_group_reduce_logical_and(int predicate); -int work_group_reduce_logical_or(int predicate); -int work_group_reduce_logical_xor(int predicate); +int work_group_reduce_logical_and( + int predicate); +int work_group_reduce_logical_or( + int predicate); +int work_group_reduce_logical_xor( + int predicate); ---- | Returns the logical *and*, *or*, or *xor* of _predicate_ for all work items in the work-group. |[source,opencl_c] ---- -int work_group_scan_inclusive_logical_and(int predicate); -int work_group_scan_inclusive_logical_or(int predicate); -int work_group_scan_inclusive_logical_xor(int predicate); +int work_group_scan_inclusive_logical_and( + int predicate); +int work_group_scan_inclusive_logical_or( + int predicate); +int work_group_scan_inclusive_logical_xor( + int predicate); ---- | Returns the result of an inclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all work items in the @@ -12788,9 +12856,12 @@ int work_group_scan_inclusive_logical_xor(int predicate); work item's work-group linear local ID. |[source,opencl_c] ---- -int work_group_scan_exclusive_logical_and(int predicate); -int work_group_scan_exclusive_logical_or(int predicate); -int work_group_scan_exclusive_logical_xor(int predicate); +int work_group_scan_exclusive_logical_and( + int predicate); +int work_group_scan_exclusive_logical_or( + int predicate); +int work_group_scan_exclusive_logical_xor( + int predicate); ---- | Returns the result of an exclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all work items in the @@ -12814,22 +12885,28 @@ supported built-in scalar data types `int`, `uint`, `long`, and `ulong`. [[table-builtin-work-group-bitwise-integer]] .Built-in Work-group Bitwise Integer Functions -[cols="2a,1",options="header"] +[cols="1a,1",options="header"] |==== | Function | Description |[source,opencl_c] ---- -gentype work_group_reduce_and(gentype value); -gentype work_group_reduce_or(gentype value); -gentype work_group_reduce_xor(gentype value); +gentype work_group_reduce_and( + gentype value); +gentype work_group_reduce_or( + gentype value); +gentype work_group_reduce_xor( + gentype value); ---- | Returns the bitwise *and*, *or*, or *xor* of _value_ for all work items in the work-group. |[source,opencl_c] ---- -gentype work_group_scan_inclusive_and(gentype value); -gentype work_group_scan_inclusive_or(gentype value); -gentype work_group_scan_inclusive_xor(gentype value); +gentype work_group_scan_inclusive_and( + gentype value); +gentype work_group_scan_inclusive_or( + gentype value); +gentype work_group_scan_inclusive_xor( + gentype value); ---- | Returns the result of an inclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all work items in the work-group @@ -12837,9 +12914,12 @@ gentype work_group_scan_inclusive_xor(gentype value); work-group linear local ID. |[source,opencl_c] ---- -gentype work_group_scan_exclusive_and(gentype value); -gentype work_group_scan_exclusive_or(gentype value); -gentype work_group_scan_exclusive_xor(gentype value); +gentype work_group_scan_exclusive_and( + gentype value); +gentype work_group_scan_exclusive_or( + gentype value); +gentype work_group_scan_exclusive_xor( + gentype value); ---- | Returns the result of an exclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all work items in the work-group @@ -12865,18 +12945,20 @@ precision is supported). [[table-builtin-work-group-multiplicative]] .Built-in Work-group Multiplicative Functions -[cols="2a,1",options="header"] +[cols="1a,1",options="header"] |==== | Function | Description |[source,opencl_c] ---- -gentype work_group_reduce_mul(gentype value); +gentype work_group_reduce_mul( + gentype value); ---- | Returns the multiplication of _value_ for all work items in the work-group. |[source,opencl_c] ---- -gentype work_group_scan_inclusive_mul(gentype value); +gentype work_group_scan_inclusive_mul( + gentype value); ---- | Returns the result of an inclusive scan operation which is the multiplication of _value_ for all work items in the work-group with a @@ -12884,7 +12966,8 @@ gentype work_group_scan_inclusive_mul(gentype value); work-group linear local ID. |[source,opencl_c] ---- -gentype work_group_scan_exclusive_mul(gentype value); +gentype work_group_scan_exclusive_mul( + gentype value); ---- | Returns the result of an exclusive scan operation which is the multiplication of _value_ for all work items in the work-group with a @@ -14098,8 +14181,8 @@ footnote:[{fn-double-supported}]. |[source,opencl_c] ---- gentype sub_group_non_uniform_broadcast( - gentype value, - uint index ) + gentype value, + uint index ) ---- | Returns _value_ for the work item with sub-group local ID equal to _index_. @@ -14113,14 +14196,14 @@ gentype sub_group_non_uniform_broadcast( |[source,opencl_c] ---- gentype sub_group_broadcast_first( - gentype value ) + gentype value ) ---- | Returns _value_ for the work item with the smallest sub-group local ID among active work items in the sub-group. |[source,opencl_c] ---- uint4 sub_group_ballot( - int predicate ) + int predicate ) ---- | Returns a bitfield combining the _predicate_ values from all work items in the sub-group. @@ -14132,7 +14215,7 @@ uint4 sub_group_ballot( |[source,opencl_c] ---- int sub_group_inverse_ballot( - uint4 value ) + uint4 value ) ---- | Returns the predicate value for this work item in the sub-group from the bitfield _value_ representing predicate values from all work items in @@ -14148,8 +14231,8 @@ int sub_group_inverse_ballot( |[source,opencl_c] ---- int sub_group_ballot_bit_extract( - uint4 value, - uint index ) + uint4 value, + uint index ) ---- | Returns the predicate value for the work item with sub-group local ID equal to _index_ from the bitfield _value_ representing predicate values @@ -14164,7 +14247,7 @@ int sub_group_ballot_bit_extract( |[source,opencl_c] ---- uint sub_group_ballot_bit_count( - uint4 value ) + uint4 value ) ---- | Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ that represent predicate values @@ -14173,7 +14256,7 @@ uint sub_group_ballot_bit_count( |[source,opencl_c] ---- uint sub_group_ballot_inclusive_scan( - uint4 value ) + uint4 value ) ---- | Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ representing work items with a sub-group @@ -14181,7 +14264,7 @@ uint sub_group_ballot_inclusive_scan( |[source,opencl_c] ---- uint sub_group_ballot_exclusive_scan( - uint4 value ) + uint4 value ) ---- | Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ representing work items with a sub-group @@ -14189,7 +14272,7 @@ uint sub_group_ballot_exclusive_scan( |[source,opencl_c] ---- uint sub_group_ballot_find_lsb( - uint4 value ) + uint4 value ) ---- | Returns the smallest sub-group local ID with a bit set in the bitfield _value_, only considering the bits in _value_ that represent predicate @@ -14202,7 +14285,7 @@ uint sub_group_ballot_find_lsb( |[source,opencl_c] ---- uint sub_group_ballot_find_msb( - uint4 value ) + uint4 value ) ---- | Returns the largest sub-group local ID with a bit set in the bitfield _value_, only considering the bits in _value_ that represent predicate @@ -14305,13 +14388,13 @@ footnote:[{fn-double-supported}]. |[source,opencl_c] ---- gentype sub_group_clustered_reduce_add( - gentype value, uint clustersize ) + gentype value, uint clustersize ) gentype sub_group_clustered_reduce_mul( - gentype value, uint clustersize ) + gentype value, uint clustersize ) gentype sub_group_clustered_reduce_min( - gentype value, uint clustersize ) + gentype value, uint clustersize ) gentype sub_group_clustered_reduce_max( - gentype value, uint clustersize ) + gentype value, uint clustersize ) ---- | Returns the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group within a cluster of the @@ -14343,11 +14426,11 @@ the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, |[source,opencl_c] ---- gentype sub_group_clustered_reduce_and( - gentype value, uint clustersize ) + gentype value, uint clustersize ) gentype sub_group_clustered_reduce_or( - gentype value, uint clustersize ) + gentype value, uint clustersize ) gentype sub_group_clustered_reduce_xor( - gentype value, uint clustersize ) + gentype value, uint clustersize ) ---- | Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group within a cluster of the specified _clustersize_. @@ -14373,11 +14456,11 @@ logically `false`. |[source,opencl_c] ---- int sub_group_clustered_reduce_logical_and( - int predicate, uint clustersize ) + int predicate, uint clustersize ) int sub_group_clustered_reduce_logical_or( - int predicate, uint clustersize ) + int predicate, uint clustersize ) int sub_group_clustered_reduce_logical_xor( - int predicate, uint clustersize ) + int predicate, uint clustersize ) ---- | Returns the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group within a cluster of the specified @@ -14414,13 +14497,13 @@ footnote:[{fn-double-supported}]. |[source,opencl_c] ---- gentype sub_group_non_uniform_reduce_add( - gentype value ) + gentype value ) gentype sub_group_non_uniform_reduce_min( - gentype value ) + gentype value ) gentype sub_group_non_uniform_reduce_max( - gentype value ) + gentype value ) gentype sub_group_non_uniform_reduce_mul( - gentype value ) + gentype value ) ---- | Returns the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group. @@ -14432,13 +14515,13 @@ gentype sub_group_non_uniform_reduce_mul( |[source,opencl_c] ---- gentype sub_group_non_uniform_scan_inclusive_add( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_inclusive_min( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_inclusive_max( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_inclusive_mul( - gentype value ) + gentype value ) ---- | Returns the result of an inclusive scan operation, which is the summation, multiplication, minimum, or maximum of _value_ for all active @@ -14452,13 +14535,13 @@ gentype sub_group_non_uniform_scan_inclusive_mul( |[source,opencl_c] ---- gentype sub_group_non_uniform_scan_exclusive_add( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_exclusive_min( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_exclusive_max( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_exclusive_mul( - gentype value ) + gentype value ) ---- | Returns the result of an exclusive scan operation, which is the summation, multiplication, minimum, or maximum of _value_ for all active @@ -14506,22 +14589,22 @@ supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, |[source,opencl_c] ---- gentype sub_group_non_uniform_reduce_and( - gentype value ) + gentype value ) gentype sub_group_non_uniform_reduce_or( - gentype value ) + gentype value ) gentype sub_group_non_uniform_reduce_xor( - gentype value ) + gentype value ) ---- | Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group. |[source,opencl_c] ---- gentype sub_group_non_uniform_scan_inclusive_and( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_inclusive_or( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_inclusive_xor( - gentype value ) + gentype value ) ---- | Returns the result of an inclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the @@ -14530,11 +14613,11 @@ gentype sub_group_non_uniform_scan_inclusive_xor( |[source,opencl_c] ---- gentype sub_group_non_uniform_scan_exclusive_and( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_exclusive_or( - gentype value ) + gentype value ) gentype sub_group_non_uniform_scan_exclusive_xor( - gentype value ) + gentype value ) ---- | Returns the result of an exclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the @@ -14568,22 +14651,22 @@ logically `false`. |[source,opencl_c] ---- int sub_group_non_uniform_reduce_logical_and( - int predicate ) + int predicate ) int sub_group_non_uniform_reduce_logical_or( - int predicate ) + int predicate ) int sub_group_non_uniform_reduce_logical_xor( - int predicate ) + int predicate ) ---- | Returns the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group. |[source,opencl_c] ---- int sub_group_non_uniform_scan_inclusive_logical_and( - int predicate ) + int predicate ) int sub_group_non_uniform_scan_inclusive_logical_or( - int predicate ) + int predicate ) int sub_group_non_uniform_scan_inclusive_logical_xor( - int predicate ) + int predicate ) ---- | Returns the result of an inclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the @@ -14592,11 +14675,11 @@ int sub_group_non_uniform_scan_inclusive_logical_xor( |[source,opencl_c] ---- int sub_group_non_uniform_scan_exclusive_logical_and( - int predicate ) + int predicate ) int sub_group_non_uniform_scan_exclusive_logical_or( - int predicate ) + int predicate ) int sub_group_non_uniform_scan_exclusive_logical_xor( - int predicate ) + int predicate ) ---- | Returns the result of an exclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the @@ -14648,7 +14731,7 @@ int sub_group_elect() |[source,opencl_c] ---- int sub_group_non_uniform_all( - int predicate ) + int predicate ) ---- | Examines _predicate_ for all active work items in the sub-group and returns a non-zero value if _predicate_ is non-zero for all active work @@ -14660,7 +14743,7 @@ int sub_group_non_uniform_all( |[source,opencl_c] ---- int sub_group_non_uniform_any( - int predicate ) + int predicate ) ---- | Examines _predicate_ for all active work items in the sub-group and returns a non-zero value if _predicate_ is non-zero for any active work @@ -14672,7 +14755,7 @@ int sub_group_non_uniform_any( |[source,opencl_c] ---- int sub_group_non_uniform_all_equal( - gentype value ) + gentype value ) ---- | Examines _value_ for all active work items in the sub-group and returns a non-zero value if _value_ is equivalent for all active invocations in @@ -14712,7 +14795,7 @@ footnote:[{fn-double-supported}]. |[source,opencl_c] ---- gentype sub_group_rotate( - gentype value, int delta) + gentype value, int delta) ---- | Returns _value_ for the work item with sub-group local ID equal to the remainder of the division of the sum of this work item's sub-group local @@ -14725,8 +14808,8 @@ gentype sub_group_rotate( |[source,opencl_c] ---- gentype sub_group_clustered_rotate( - gentype value, int delta, - uint clustersize) + gentype value, int delta, + uint clustersize) ---- | Returns _value_ for the work item with sub-group local ID equal to the sum of, the remainder of the division of the sum of this work item's ID @@ -14772,7 +14855,7 @@ footnote:[{fn-double-supported}]. |[source,opencl_c] ---- gentype sub_group_shuffle( - gentype value, uint index ) + gentype value, uint index ) ---- | Returns _value_ for the work item with sub-group local ID equal to _index_. @@ -14785,7 +14868,7 @@ gentype sub_group_shuffle( |[source,opencl_c] ---- gentype sub_group_shuffle_xor( - gentype value, uint mask ) + gentype value, uint mask ) ---- | Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID xor'd with _mask_. @@ -14824,7 +14907,7 @@ footnote:[{fn-double-supported}]. |[source,opencl_c] ---- gentype sub_group_shuffle_up( - gentype value, uint delta ) + gentype value, uint delta ) ---- | Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID minus _delta_. @@ -14840,7 +14923,7 @@ gentype sub_group_shuffle_up( |[source,opencl_c] ---- gentype sub_group_shuffle_down( - gentype value, uint delta ) + gentype value, uint delta ) ---- | Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID plus _delta_. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 7e98ce853..b349d587a 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14049,7 +14049,7 @@ include::{generated}/api/version-notes/clCreateCommandBufferKHR.asciidoc[] + [[commandbuffer-properties]] .{clCreateCommandBufferKHR} properties -[cols=",,",options="header",] +[cols="2,1,3",options="header",] |==== | Recording Properties | Property Value | Description @@ -15187,7 +15187,7 @@ endif::cl_khr_command_buffer_mutable_dispatch[] [[ndrange-kernel-properties-table]] .List of supported properties by {clCommandNDRangeKernelKHR} -[cols=",,",options="header",] +[cols="3,2,10",options="header",] |==== | Recording Properties | Property Value | Description @@ -15208,7 +15208,9 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERTS_KHR.asciidoc[ _0_ to _work_dim - 1_ of _ceil(global_work_size[i]/local_work_size[i])_. -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR.asciidoc[] +// Removed for now to reduce table cell size. +// The extension dependency is captured in the property itself. +//include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR.asciidoc[] | {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR_anchor} @@ -15226,7 +15228,9 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR. parameter. Otherwise, the _global_work_offset_ cannot be modified. -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asciidoc[] +// Removed for now to reduce table cell size. +// The extension dependency is captured in the property itself. +//include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asciidoc[] {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR_anchor} determines whether the _global_work_size_ of kernel execution can be modified after @@ -15237,7 +15241,9 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asc parameter. Otherwise, the _global_work_size_ cannot be modified. -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.asciidoc[] +// Removed for now to reduce table cell size. +// The extension dependency is captured in the property itself. +//include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.asciidoc[] {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR_anchor} determines whether the _local_work_size_ of kernel execution can be modified after recording. @@ -15247,7 +15253,9 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.ascii parameter. Otherwise, the _local_work_size_ cannot be modified. -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciidoc[] +// Removed for now to reduce table cell size. +// The extension dependency is captured in the property itself. +//include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciidoc[] {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR_anchor} determines whether the kernel arguments set on _kernel_ can be updated between executions. @@ -15258,7 +15266,9 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciid parameter. Otherwise, the kernel arguments cannot be modified between executions. -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciidoc[] +// Removed for now to reduce table cell size. +// The extension dependency is captured in the property itself. +//include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciidoc[] {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR_anchor} determines whether the information passed to _kernel_ can be updated between executions. @@ -15268,7 +15278,9 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciido parameter. Otherwise, the kernel execution information cannot be modified. -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_EXEC_INFO_KHR.asciidoc[] +// Removed for now to reduce table cell size. +// The extension dependency is captured in the property itself. +//include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_EXEC_INFO_KHR.asciidoc[] If {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR_anchor} is not specified then it defaults to the value returned by the diff --git a/config/rouge/lib/rouge/lexers/opencl.rb b/config/rouge/lib/rouge/lexers/opencl.rb index 69d22503e..1e1457c80 100644 --- a/config/rouge/lib/rouge/lexers/opencl.rb +++ b/config/rouge/lib/rouge/lexers/opencl.rb @@ -120,6 +120,8 @@ class OpenCL < Cpp cl_profiling_info cl_sampler_properties cl_kernel_exec_info + cl_context_memory_initialize_khr + cl_device_terminate_capability_khr cl_device_unified_shared_memory_capabilities_intel cl_mem_properties_intel cl_mem_alloc_flags_intel @@ -135,6 +137,7 @@ class OpenCL < Cpp cl_mem_alloc_flags_img cl_layer_info cl_layer_api_version + cl_icdl_info cl_icd_dispatch cl_device_scheduling_controls_capabilities_arm cl_device_controlled_termination_capabilities_arm @@ -142,6 +145,7 @@ class OpenCL < Cpp cl_device_feature_capabilities_intel cl_device_integer_dot_product_capabilities_khr cl_semaphore_properties_khr + cl_semaphore_reimport_properties_khr cl_semaphore_info_khr cl_semaphore_type_khr cl_semaphore_payload_khr @@ -153,7 +157,16 @@ class OpenCL < Cpp cl_command_buffer_info_khr cl_command_buffer_state_khr cl_command_buffer_properties_khr + cl_command_buffer_flags_khr cl_ndrange_kernel_command_properties_khr + cl_mutable_command_khr + cl_mutable_dispatch_fields_khr + cl_mutable_command_info_khr + cl_command_buffer_structure_type_khr + cl_device_fp_atomic_capabilities_ext + cl_image_requirements_info_ext + cl_platform_command_buffer_capabilities_khr + cl_mutable_dispatch_asserts_khr cl_dx9_surface_info_khr cl_motion_estimation_desc_intel cl_mem_ext_host_ptr @@ -174,16 +187,10 @@ class OpenCL < Cpp CL_VERSION_PATCH_KHR CL_MAKE_VERSION_KHR cl_device_integer_dot_product_acceleration_properties_khr - cl_command_buffer_khr - cl_mutable_command_khr - cl_mutable_command_info_khr - cl_command_buffer_structure_type_khr - cl_mutable_base_config_khr cl_mutable_dispatch_arg_khr - cl_mutable_dispatch_config_khr cl_mutable_dispatch_exec_info_khr - cl_mutable_dispatch_fields_khr - cl_platform_command_buffer_capabilities_khr + cl_mutable_dispatch_config_khr + cl_mutable_base_config_khr ) # Here are some interesting tokens diff --git a/config/rouge/lib/rouge/lexers/opencl_c.rb b/config/rouge/lib/rouge/lexers/opencl_c.rb index 9a56bf6c2..72412f13c 100644 --- a/config/rouge/lib/rouge/lexers/opencl_c.rb +++ b/config/rouge/lib/rouge/lexers/opencl_c.rb @@ -31,6 +31,7 @@ class OpenCL_C < Cpp image2d_array_t image2d_depth_t image2d_array_depth_t + image2d_msaa_t sampler_t queue_t ndrange_t From ccc786c204c312f20cd1fa7ea4d4621fb0fed8bd Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Mon, 1 Apr 2024 08:28:17 -0700 Subject: [PATCH 090/190] Restore static refpages to the build (#1127) These were commented out during debugging and not restored. Also factors out a small difference between Vulkan and OpenCL scripts. Closes #1121 --- Makefile | 5 +++-- scripts/docgenerator.py | 8 ++++++-- scripts/spec_tools/conventions.py | 8 ++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8e171b81e..1540b8343 100644 --- a/Makefile +++ b/Makefile @@ -459,7 +459,7 @@ $(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(PYAPIMAP) (cat $(MANDIR)/rewritehead ; \ echo ; echo "# Aliases hard-coded in refpage markup" ; \ sort < $(REFPATH)/rewritebody) > $(REFPATH)/.htaccess - echo $(CP) $(MANDIR)/static/*.txt $(REFPATH) + $(CP) $(MANDIR)/static/*.txt $(REFPATH) # These targets are HTML5 ref pages # @@ -493,11 +493,12 @@ $(MANHTMLDIR)/%.html: $(REFPATH)/%.txt $(MANCOPYRIGHT) $(GENDEPENDS) $(KATEXINST $(VERYQUIET)$(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ $(ADOCREFOPTS) -o $@ $< +# This is not formatted as a refpage, so needs a different build rule $(MANHTMLDIR)/intro.html: $(REFPATH)/intro.txt $(MANCOPYRIGHT) $(VERYQUIET)echo "Building $@ from $< using default options" $(VERYQUIET)$(MKDIR) $(MANHTMLDIR) $(VERYQUIET)$(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ - $(ADOCREFOPTS) -o $@ $< + -o $@ $< # Targets generated from the XML and registry processing scripts # apimap.py - Python encoding of the registry diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index 3692768cf..b714ef7c4 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -255,7 +255,11 @@ def writeInclude(self, directory, basename, contents): index_term = basename write('indexterm:[{}]'.format(index_term), file=fp) - write(f'[source%unbreakable,{self.conventions.docgen_language}]', file=fp) + source_options = self.conventions.docgen_source_options + source_language = self.conventions.docgen_language + source_directive = f'[source{source_options},{source_language}]' + + write(source_directive, file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) @@ -270,7 +274,7 @@ def writeInclude(self, directory, basename, contents): # Asciidoc anchor write(self.genOpts.conventions.warning_comment, file=fp) write('// Include this no-xref version without cross reference id for multiple includes of same file', file=fp) - write(f'[source,%unbreakable,{self.conventions.docgen_language}]', file=fp) + write(source_directive, file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) diff --git a/scripts/spec_tools/conventions.py b/scripts/spec_tools/conventions.py index edfa906cf..50ca75d41 100644 --- a/scripts/spec_tools/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -548,3 +548,11 @@ def docgen_language(self): blocks.""" return 'c++' + + @property + def docgen_source_options(self): + """Return block options to be used in docgenerator [source] blocks, + which are appended to the 'source' block type. + Can be empty.""" + + return '%unbreakable' From 33e87e04a985863e16bf2c911145397c6abb6604 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 1 Apr 2024 23:24:15 -0700 Subject: [PATCH 091/190] fix an incorrect link to cl_khr_async_work_group_copy_fence (#1132) --- api/appendix_e.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 2bec7aeb0..c88b80933 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -521,7 +521,7 @@ Changes from *v3.0.9*: * Clarified that {clCompileProgram} is valid for programs created from SPIR. * Documented the possible state of a kernel object after a failed call to {clSetKernelArg}. * Added new extensions: - ** `<>` (final) + ** `<>` (final) ** `<>` (final) ** `<>` ** `<>` (provisional) From 808ff8e26f7f7b0c31538201c1e2024dcfc8cf2f Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 08:23:16 -0700 Subject: [PATCH 092/190] add extension docs for spir-v-related extensions (#1123) * add extension docs for spir-v-related extensions cl_khr_spirv_extended_debug_info cl_khr_spirv_linkonce_odr cl_khr_spirv_no_integer_wrap_decoration * fix extension name --- OpenCL_Ext.txt | 4 --- api/cl_khr_spirv_extended_debug_info.asciidoc | 27 ++++++++++++++++++ api/cl_khr_spirv_linkonce_odr.asciidoc | 27 ++++++++++++++++++ ..._spirv_no_integer_wrap_decoration.asciidoc | 28 +++++++++++++++++++ config/opencl.asciidoc | 1 + ext/quick_reference.asciidoc | 12 ++++++++ ext/spirv_extensions.asciidoc | 14 ---------- xml/cl.xml | 3 ++ 8 files changed, 98 insertions(+), 18 deletions(-) create mode 100644 api/cl_khr_spirv_extended_debug_info.asciidoc create mode 100644 api/cl_khr_spirv_linkonce_odr.asciidoc create mode 100644 api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc delete mode 100644 ext/spirv_extensions.asciidoc diff --git a/OpenCL_Ext.txt b/OpenCL_Ext.txt index 68e4295b7..b84dddf87 100644 --- a/OpenCL_Ext.txt +++ b/OpenCL_Ext.txt @@ -51,10 +51,6 @@ include::ext/introduction.asciidoc[] // included markup in the OpenCL API and C Language Specifications, rather // than being included here as separate documents. -// These are SPIR-V Extensions: - -include::ext/spirv_extensions.asciidoc[] - // Index and Appendices: ifdef::backend-pdf[] diff --git a/api/cl_khr_spirv_extended_debug_info.asciidoc b/api/cl_khr_spirv_extended_debug_info.asciidoc new file mode 100644 index 000000000..5e280fb1a --- /dev/null +++ b/api/cl_khr_spirv_extended_debug_info.asciidoc @@ -0,0 +1,27 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_spirv_extended_debug_info.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_spirv_extended_debug_info` allows use of the SPIR-V +`OpenCL.DebugInfo.100` extended instruction set. + +See the +link:{OpenCLEnvSpecURL}#_cl_khr_spirv_extended_debug_info[cl_khr_spirv_extended_debug_info] +section of the OpenCL SPIR-V Environment specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_spirv_linkonce_odr.asciidoc b/api/cl_khr_spirv_linkonce_odr.asciidoc new file mode 100644 index 000000000..f35df832f --- /dev/null +++ b/api/cl_khr_spirv_linkonce_odr.asciidoc @@ -0,0 +1,27 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_spirv_linkonce_odr.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_spirv_linkonce_odr` allows use of the SPIR-V extension +`SPV_KHR_linkonce_odr`. + +See the +link:{OpenCLEnvSpecURL}#_cl_khr_spirv_linkonce_odr[cl_khr_spirv_linkonce_odr] +section of the OpenCL SPIR-V Environment specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc b/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc new file mode 100644 index 000000000..d8456b6ab --- /dev/null +++ b/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc @@ -0,0 +1,28 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_spirv_no_integer_wrap_decoration.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_spirv_no_integer_wrap_decoration` allows use of the SPIR-V extension +`SPV_KHR_no_integer_wrap_decoration`, which adds new decorations to indicate +that a given instruction does not cause integer wrapping to occur. + +See the +link:{OpenCLEnvSpecURL}#_cl_khr_spirv_no_integer_wrap_decoration[cl_khr_spirv_no_integer_wrap_decoration] +section of the OpenCL SPIR-V Environment specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/config/opencl.asciidoc b/config/opencl.asciidoc index 62f166b4f..7839cc441 100644 --- a/config/opencl.asciidoc +++ b/config/opencl.asciidoc @@ -7,6 +7,7 @@ :khronos-opencl-repo: https://github.com/KhronosGroup/OpenCL-Docs :khronos-opencl-pr: {khronos-opencl-repo}/pull :OpenCLCSpecURL: OpenCL_C.html +:OpenCLEnvSpecURL: OpenCL_Env.html :blank: pass:[ +] :pp: ++ diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 3b47e039f..6fddf7120 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -232,6 +232,18 @@ Language Specifications. | Standard Portable Intermediate Representation Programs | Extension, Superseded by IL Programs / SPIR-V +| [[cl_khr_spirv_extended_debug_info]] link:{APISpecURL}#cl_khr_spirv_extended_debug_info[`cl_khr_spirv_extended_debug_info`] +| Allows Use of the SPIR-V `OpenCL.DebugInfo.100` Extended Instruction Set +| Extension + +| [[cl_khr_spirv_linkonce_odr]] link:{APISpecURL}#cl_khr_spirv_linkonce_odr[`cl_khr_spirv_linkonce_odr`] +| Allows Use of the SPIR-V `SPV_KHR_linkonce_odr` Extension +| Extension + +| [[cl_khr_spirv_no_integer_wrap_decoration]] link:{APISpecURL}#cl_khr_spirv_no_integer_wrap_decoration[`cl_khr_spirv_no_integer_wrap_decoration`] +| Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension +| Extension + | [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[`cl_khr_srgb_image_writes`] | Write to sRGB Images | Extension diff --git a/ext/spirv_extensions.asciidoc b/ext/spirv_extensions.asciidoc deleted file mode 100644 index bffbf1f8d..000000000 --- a/ext/spirv_extensions.asciidoc +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[spirv_extensions]] -== Extensions to the OpenCL SPIR-V Environment - -An OpenCL SPIR-V environment may be modified by OpenCL extensions. -Please refer to the OpenCL SPIR-V Environment Specification for descriptions how OpenCL extensions modify an OpenCL SPIR-V environment. -In addition to the extensions described in this document, the OpenCL SPIR-V Environment Specification also describes how the following OpenCL extensions modify an OpenCL SPIR-V environment: - -* `cl_khr_spirv_no_integer_wrap_decoration` -* `cl_khr_spirv_extended_debug_info` -* `cl_khr_spirv_linkonce_odr` diff --git a/xml/cl.xml b/xml/cl.xml index fa6b29974..b0c5cc2ed 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7442,6 +7442,9 @@ server's OpenCL/api-docs repository. + + + From d06f06f4b8d105a2c6efe7830777161d7fa2e37c Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 08:27:20 -0700 Subject: [PATCH 093/190] add and link error codes for extensions (#1119) * add error codes for cl_khr_command_buffer * add error codes for cl_khr_d3d10_sharing * add error codes for cl_khr_d3d11_sharing * error codes for cl_khr_dx9_media_sharing * error codes for cl_khr_egl_event * error codes for cl_khr_egl_image * error codes for cl_khr_gl_sharing * error codes for cl_khr_semaphore * error codes for cl_khr_terminate_context * error codes for cl_khr_command_buffer_mutable_dispatch * fix typo Co-authored-by: Alastair Murray --------- Co-authored-by: Alastair Murray --- api/appendix_f.asciidoc | 124 +++++++++++++++++- ...r_command_buffer_mutable_dispatch.asciidoc | 2 +- api/cl_khr_d3d10_sharing.asciidoc | 2 +- api/cl_khr_d3d11_sharing.asciidoc | 2 +- api/cl_khr_dx9_media_sharing.asciidoc | 2 +- api/cl_khr_egl_event.asciidoc | 7 +- api/cl_khr_egl_image.asciidoc | 2 +- api/cl_khr_external_memory.asciidoc | 2 +- api/cl_khr_gl_sharing.asciidoc | 5 +- api/cl_khr_semaphore.asciidoc | 2 +- 10 files changed, 130 insertions(+), 20 deletions(-) diff --git a/api/appendix_f.asciidoc b/api/appendix_f.asciidoc index f2bb39c62..b20d88295 100644 --- a/api/appendix_f.asciidoc +++ b/api/appendix_f.asciidoc @@ -139,12 +139,6 @@ include::{generated}/api/version-notes/CL_INVALID_EVENT.asciidoc[] include::{generated}/api/version-notes/CL_INVALID_EVENT_WAIT_LIST.asciidoc[] | Returned when the specified event wait list or number of events in the wait list is not valid. -// This is currently defined in cl.h, but it's not a core API error code. -//| {CL_INVALID_GL_OBJECT_anchor} -// -//include::{generated}/api/version-notes/CL_INVALID_GL_OBJECT.asciidoc[] -//| - | {CL_INVALID_GLOBAL_OFFSET_anchor} include::{generated}/api/version-notes/CL_INVALID_GLOBAL_OFFSET.asciidoc[] @@ -332,6 +326,110 @@ include::{generated}/api/version-notes/CL_MAX_SIZE_RESTRICTION_EXCEEDED.asciidoc include::{generated}/api/version-notes/CL_PROFILING_INFO_NOT_AVAILABLE.asciidoc[] | Returned by {clGetEventProfilingInfo} when the command associated with the specified event was not enqueued into a command-queue with {CL_QUEUE_PROFILING_ENABLE}. +ifdef::cl_khr_command_buffer[] +| {CL_INVALID_COMMAND_BUFFER_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_COMMAND_BUFFER_KHR.asciidoc[] +| Returned when the specified command-buffer is not a <>. + +| {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_SYNC_POINT_WAIT_LIST_KHR.asciidoc[] +| Returned when the specified sync point wait list or number of sync points in the wait list is not valid. + +| {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INCOMPATIBLE_COMMAND_QUEUE_KHR.asciidoc[] +| Returned when one or more command-queues is incompatible with a command-buffer. +endif::cl_khr_command_buffer[] + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_INVALID_MUTABLE_COMMAND_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_MUTABLE_COMMAND_KHR.asciidoc[] +| Returned when a specified command is not a <>. +endif::cl_khr_command_buffer_mutable_dispatch[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_INVALID_D3D10_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_D3D10_DEVICE_KHR.asciidoc[] +| Returned when a Direct3D 10 device cannot interoperate with OpenCL device IDs. +| {CL_INVALID_D3D10_RESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_D3D10_RESOURCE_KHR.asciidoc[] +| Returned when an OpenCL object cannot be created from a Direct3D 10 resource. +| {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR.asciidoc[] +| Returned when attempting to acquire an OpenCL object created from a Direct3D 10 resource that was already acquired. +| {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR.asciidoc[] +| Returned when attempting to release an OpenCL object created from a Direct3D 10 resource that has not been acquired. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_INVALID_D3D11_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_D3D11_DEVICE_KHR.asciidoc[] +| Returned when a Direct3D 11 device cannot interoperate with OpenCL device IDs. +| {CL_INVALID_D3D11_RESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_D3D11_RESOURCE_KHR.asciidoc[] +| Returned when an OpenCL object cannot be created from a Direct3D 11 resource. +| {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR.asciidoc[] +| Returned when attempting to acquire an OpenCL object created from a Direct3D 11 resource that was already acquired. +| {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR.asciidoc[] +| Returned when attempting to release an OpenCL object created from a Direct3D 11 resource that has not been acquired. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_INVALID_DX9_MEDIA_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_DX9_MEDIA_ADAPTER_KHR.asciidoc[] +| Returned when a DirectX 9 media adapter cannot interoperate with OpenCL device IDs. +| {CL_INVALID_DX9_MEDIA_SURFACE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_DX9_MEDIA_SURFACE_KHR.asciidoc[] +| Returned when an OpenCL object cannot be created from a DirectX 9 media surface. +| {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR.asciidoc[] +| Returned when attempting to acquire an OpenCL object created from a DirectX 9 media surface that was already acquired. +| {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR.asciidoc[] +| Returned when attempting to release an OpenCL object created from a DirectX 9 media surface that has not been acquired. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_egl_image[] +| {CL_EGL_RESOURCE_NOT_ACQUIRED_KHR_anchor} + +include::{generated}/api/version-notes/CL_EGL_RESOURCE_NOT_ACQUIRED_KHR.asciidoc[] +| Possible event status if an EGL resource is used without being acquired. +| {CL_INVALID_EGL_OBJECT_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_EGL_OBJECT_KHR.asciidoc[] +| Returned when the specified EGL object is not valid. +endif::cl_khr_egl_image[] + +ifdef::cl_khr_gl_sharing[] +// Note: This is currently defined in cl.h, but it's not a core API error code. +| {CL_INVALID_GL_OBJECT_anchor} + +include::{generated}/api/version-notes/CL_INVALID_GL_OBJECT.asciidoc[] +| Returned when the specified OpenGL object is not valid, or when there is no associated OpenGL object for an OpenCL object. +| {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR.asciidoc[] +| Returned when the specified OpenGL sharing context creation property is not valid. +endif::cl_khr_gl_sharing[] + ifdef::cl_khr_icd[] | {CL_PLATFORM_NOT_FOUND_KHR_anchor} @@ -339,4 +437,18 @@ include::{generated}/api/version-notes/CL_PLATFORM_NOT_FOUND_KHR.asciidoc[] | Returned by {clGetPlatformIDs} when no platforms are available. endif::cl_khr_icd[] +ifdef::cl_khr_semaphore[] +| {CL_INVALID_SEMAPHORE_KHR_anchor} + +include::{generated}/api/version-notes/CL_INVALID_SEMAPHORE_KHR.asciidoc[] +| Returned when the specified semaphore is not a <>. +endif::cl_khr_semaphore[] + +ifdef::cl_khr_terminate_context[] +| {CL_CONTEXT_TERMINATED_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_TERMINATED_KHR.asciidoc[] +| Returned when the specified context has already been terminated, or as an event status for terminated commands. +endif::cl_khr_terminate_context[] + |==== diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index 6f35f3006..8883fc837 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -118,7 +118,7 @@ also be updated between enqueues of the command-buffer. * {cl_command_buffer_structure_type_khr_TYPE} ** {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} ** {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR} - * New <> + * New Error Codes ** {CL_INVALID_MUTABLE_COMMAND_KHR} === Sample Code diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc index 1f8fe5242..0a69e5e89 100644 --- a/api/cl_khr_d3d10_sharing.asciidoc +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -46,7 +46,7 @@ include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] ** {CL_MEM_D3D10_RESOURCE_KHR} * {cl_image_info_TYPE} ** {CL_IMAGE_D3D10_SUBRESOURCE_KHR} - * {cl_event_info_TYPE} + * {cl_command_type_TYPE} ** {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR} * New Error Codes diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc index 2ee08b859..e0573b296 100644 --- a/api/cl_khr_d3d11_sharing.asciidoc +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -46,7 +46,7 @@ include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] ** {CL_MEM_D3D11_RESOURCE_KHR} * {cl_image_info_TYPE} ** {CL_IMAGE_D3D11_SUBRESOURCE_KHR} - * {cl_event_info_TYPE} + * {cl_command_type_TYPE} ** {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR} * New Error Codes diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc index 60465c6d5..91fd3dad4 100644 --- a/api/cl_khr_dx9_media_sharing.asciidoc +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -55,7 +55,7 @@ adapter. ** {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} * {cl_image_info_TYPE} ** {CL_IMAGE_DX9_MEDIA_PLANE_KHR} - * {cl_event_info_TYPE} + * {cl_command_type_TYPE} ** {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR} ** {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR} * New Error Codes diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc index 97293a7cc..106bb82ec 100644 --- a/api/cl_khr_egl_event.asciidoc +++ b/api/cl_khr_egl_event.asciidoc @@ -26,8 +26,7 @@ functionality of creating an EGL sync object from an OpenCL event object. === New Enums - * New Error Codes - ** {CL_INVALID_EGL_OBJECT_KHR} + * {cl_command_type_TYPE} ** {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR} === Issues @@ -57,13 +56,13 @@ context, and to reach into each such context. *RESOLVED* Use is limited to calls to acquire and release memory objects only. -- - . What is the desired behaviour for this extension when EGLSyncKHR is of a + . What is the desired behavior for this extension when EGLSyncKHR is of a type other than `EGL_SYNC_FENCE_KHR`? + -- *RESOLVED* This extension only requires support for `EGL_SYNC_FENCE_KHR`. Support of other types is an implementation choice, and will result in -CL_INVALID_EGL_OBJECT_KHR if unsupported. +{CL_INVALID_EGL_OBJECT_KHR} if unsupported. -- === Version History diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index d4e7f5fee..0d1b0df27 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -25,7 +25,7 @@ from from EGLImages. === New Enums - * {cl_event_info_TYPE} + * {cl_command_type_TYPE} ** {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR} * New Error Codes diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index 867d9a5f2..dbe5e6a95 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -81,7 +81,7 @@ TODO ** {CL_MEM_DEVICE_HANDLE_LIST_KHR} ** {CL_MEM_DEVICE_HANDLE_LIST_END_KHR} * Return values from from {clGetEventInfo} when _param_name_ is - {CL_EVENT_COMMAND_TYPE}: + {cl_command_type_TYPE}: ** {CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR} diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc index 94a1a0004..237d96511 100644 --- a/api/cl_khr_gl_sharing.asciidoc +++ b/api/cl_khr_gl_sharing.asciidoc @@ -63,8 +63,6 @@ and buffer object images with OpenCL is required by this extension. === New Enums - * New Error Codes - ** {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} * {cl_gl_context_info_TYPE} ** {CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR} ** {CL_DEVICES_FOR_GL_CONTEXT_KHR} @@ -86,7 +84,8 @@ and buffer object images with OpenCL is required by this extension. * {cl_gl_texture_info_TYPE} ** {CL_GL_TEXTURE_TARGET} ** {CL_GL_MIPMAP_LEVEL} - + * New Error Codes + ** {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} === Issues diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index ec578bc37..1d3af4634 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -90,7 +90,7 @@ In particular, this extension defines: ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR} // TODO these are not described anywhere in the extension spec document - * New return values from {clGetEventInfo} + * {cl_command_type_TYPE} ** {CL_COMMAND_SEMAPHORE_WAIT_KHR} ** {CL_COMMAND_SEMAPHORE_SIGNAL_KHR} * New Error Codes From be61f4cc74134b19d2c6344f2afa69586e2284c7 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 08:53:46 -0700 Subject: [PATCH 094/190] add a few missing enums to the XML file (#1118) * add a few missing enums to the XML file use the feature dictionaries in a lot more places * add versioning macros to OpenCL 3.0 * update dictionary generation for API macros * add missing anchors --- OpenCL_C.txt | 761 +++++++++--------- api/cl_khr_extended_versioning.asciidoc | 20 +- api/opencl_architecture.asciidoc | 12 +- c/dictionary.asciidoc | 5 + c/footnotes.asciidoc | 14 +- env/common_properties.asciidoc | 66 +- env/extensions.asciidoc | 4 +- env/image_addressing_and_filtering.asciidoc | 138 ++-- env/numerical_compliance.asciidoc | 8 +- ext/introduction.asciidoc | 22 +- ...GetExtensionFunctionAddressForPlatform.txt | 4 +- scripts/gen_dictionaries.py | 62 ++ xml/cl.xml | 24 + 13 files changed, 617 insertions(+), 523 deletions(-) create mode 100644 c/dictionary.asciidoc diff --git a/OpenCL_C.txt b/OpenCL_C.txt index bcab4229f..dd372a8bb 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -33,6 +33,9 @@ include::config/attribs.txt[] // Attributes that are shared by OpenCL specifications. include::config/opencl.asciidoc[] +// Formatting and links for API functions and enums. +include::c/dictionary.asciidoc[] + // Feature Dictionary include::c/feature-dictionary.asciidoc[] @@ -1095,7 +1098,7 @@ The `image2d_t`, `image3d_t`, `image2d_array_t`, `image1d_t`, `image1d_buffer_t`, `image1d_array_t`, `image2d_depth_t`, `image2d_array_depth_t` and `sampler_t` types are only defined if the device supports images, i.e. the value of the <>) is `CL_TRUE`. +{CL_DEVICE_IMAGE_SUPPORT} device query>>) is {CL_TRUE}. If this is the case then an OpenCL C 3.0 or newer compiler must also define the {opencl_c_images} feature macro. ==== @@ -2751,7 +2754,7 @@ be declared in program scope or in the outermost kernel scope or inside Each argument to a kernel that is a pointer to the `{constant}` address space is counted separately towards the maximum number of such arguments, defined as the value of the <>. +{CL_DEVICE_MAX_CONSTANT_ARGS} device query>>. ==== It is illegal to write to a variable in the constant address space and will @@ -3628,7 +3631,7 @@ qualifier: ==== Kernel functions with variables declared inside the function with the `{local}` or `local` qualifier can be called by the host using appropriate -APIs such as *clEnqueueNDRangeKernel*. +APIs such as {clEnqueueNDRangeKernel}. ==== The `{kernel}` and `kernel` names are reserved for use as functions @@ -3712,20 +3715,20 @@ concerns or some preference for divisibility by 2. The optional `+__attribute__((work_group_size_hint(X, Y, Z)))+` is a hint to the compiler and is intended to specify the work-group size that may be used i.e. value most likely to be specified by the _local_work_size_ argument to -*clEnqueueNDRangeKernel*. +{clEnqueueNDRangeKernel}. For example, the `+__attribute__((work_group_size_hint(1, 1, 1)))+` is a hint to the compiler that the kernel will most likely be executed with a work-group size of 1. The optional `+__attribute__((reqd_work_group_size(X, Y, Z)))+` is the work-group size that must be used as the _local_work_size_ argument to -*clEnqueueNDRangeKernel*. +{clEnqueueNDRangeKernel}. This allows the compiler to optimize the generated code appropriately for this kernel. -If `Z` is one, the _work_dim_ argument to *clEnqueueNDRangeKernel* can be 2 +If `Z` is one, the _work_dim_ argument to {clEnqueueNDRangeKernel} can be 2 or 3. -If `Y` and `Z` are one, the _work_dim_ argument to *clEnqueueNDRangeKernel* +If `Y` and `Z` are one, the _work_dim_ argument to {clEnqueueNDRangeKernel} can be 1, 2 or 3. -- @@ -3927,7 +3930,7 @@ supported with `{global}` address space qualifier. ifdef::cl_khr_initialize_memory[] . [[restrictions-initialize-memory]] The following restriction only applies if the `<>` extension is supported: + - If the context is created with `CL_CONTEXT_MEMORY_INITIALIZE_KHR`, + If the context is created with {CL_CONTEXT_MEMORY_INITIALIZE_KHR}, appropriate memory locations as specified by the bit-field are initialized with zeroes, prior to the start of execution of any kernel. The driver chooses when, prior to kernel execution, the initialization of @@ -4014,7 +4017,7 @@ The following predefined macro names are available. `+__OPENCL_C_VERSION__+` :: Substitutes an integer reflecting the OpenCL C version specified by the `-cl-std` build option (see <>) to - *clBuildProgram* or *clCompileProgram*. + {clBuildProgram} or {clCompileProgram}. If the `-cl-std` build option is not specified, the highest OpenCL C 1.x language version supported by each device is used as the version of OpenCL C when compiling the program for each device. @@ -4031,7 +4034,7 @@ The following predefined macro names are available. or a big endian architecture (an integer constant of 1 if device is little endian and is undefined otherwise). Also refer to the value of the <>. + {CL_DEVICE_ENDIAN_LITTLE} device query>>. `+__kernel_exec(X, typen)+` (and `kernel_exec(X, typen)`) :: is defined as: @@ -4047,13 +4050,13 @@ __kernel __attribute__((work_group_size_hint(X, 1, 1))) \ This is an integer constant of 1 if images are supported and is undefined otherwise. Also refer to the value of the <> and the {opencl_c_images} + {CL_DEVICE_IMAGE_SUPPORT} device query>> and the {opencl_c_images} feature. `+__FAST_RELAXED_MATH__+` :: Used to determine if the `-cl-fast-relaxed-math` optimization option is - specified in build options given to *clBuildProgram* or - *clCompileProgram*. + specified in build options given to {clBuildProgram} or + {clCompileProgram}. This is an integer constant of 1 if the `-cl-fast-relaxed-math` build option is specified and is undefined otherwise. @@ -4913,7 +4916,7 @@ that operate on mixed scalar and vector types, however. -- The following table describes the list of built-in work-item functions that can be used to query the number of dimensions, the global and local work -size specified to *clEnqueueNDRangeKernel*, and the global and local +size specified to {clEnqueueNDRangeKernel}, and the global and local identifier of each work-item when this kernel is being executed on a device. [[table-work-item-functions]] @@ -4924,12 +4927,12 @@ identifier of each work-item when this kernel is being executed on a device. | uint *get_work_dim*() | Returns the number of dimensions in use. This is the value given to the _work_dim_ argument specified in - *clEnqueueNDRangeKernel*. + {clEnqueueNDRangeKernel}. | size_t *get_global_size*(uint _dimindx_) | Returns the number of global work-items specified for dimension identified by _dimindx_. This value is given by the _global_work_size_ argument to - *clEnqueueNDRangeKernel*. + {clEnqueueNDRangeKernel}. Valid values of _dimindx_ are 0 to *get_work_dim*() - 1. For other values of _dimindx_, *get_global_size*() returns 1. @@ -4945,7 +4948,7 @@ identifier of each work-item when this kernel is being executed on a device. | Returns the number of local work-items specified in dimension identified by _dimindx_. This value is at most the value given by the _local_work_size_ - argument to *clEnqueueNDRangeKernel* if _local_work_size_ is not + argument to {clEnqueueNDRangeKernel} if _local_work_size_ is not `NULL`; otherwise the OpenCL implementation chooses an appropriate _local_work_size_ value which is returned by this function. If the kernel is executed with a non-uniform work-group size @@ -4964,7 +4967,7 @@ identifier of each work-item when this kernel is being executed on a device. the number of local work-items in each of the work-groups that make up the uniform region of the global range in the dimension identified by _dimindx_. - If the _local_work_size_ argument to *clEnqueueNDRangeKernel* is not + If the _local_work_size_ argument to {clEnqueueNDRangeKernel} is not `NULL`, this value will match the value specified in _local_work_size_[_dimindx_]. If _local_work_size_ is `NULL`, this value will match the local size @@ -4995,7 +4998,7 @@ identifier of each work-item when this kernel is being executed on a device. For other values, *get_group_id*() returns 0. | size_t *get_global_offset*(uint _dimindx_) | *get_global_offset* returns the offset values specified in - _global_work_offset_ argument to *clEnqueueNDRangeKernel*. + _global_work_offset_ argument to {clEnqueueNDRangeKernel}. Valid values of _dimindx_ are 0 to *get_work_dim*() - 1. For other values, *get_global_offset*() returns 0. @@ -5070,7 +5073,7 @@ sub-group when this kernel is being executed on a device. This number will be constant for the duration of a work-group's execution. If the kernel is executed with a non-uniform work-group size - (i.e. the global_work_size values specified to *clEnqueueNDRangeKernel* + (i.e. the global_work_size values specified to {clEnqueueNDRangeKernel} are not evenly divisible by the local_work_size values for any dimension, calls to this built-in from some work-groups may return different values than calls to this built-in from other work-groups. @@ -5087,7 +5090,7 @@ sub-group when this kernel is being executed on a device. | *get_sub_group_id* returns the sub-group ID which is a number from 0 .. *get_num_sub_groups*() - 1. - For *clEnqueueTask*, this returns 0. + For {clEnqueueTask}, this returns 0. | uint *get_sub_group_local_id*() | Returns the unique work-item ID within the current sub-group. @@ -5811,16 +5814,16 @@ the application. [cols=",",options="header",] |==== | Macro in OpenCL Language | Macro for application -| `FLT_DIG` | `CL_FLT_DIG` -| `FLT_MANT_DIG` | `CL_FLT_MANT_DIG` -| `FLT_MAX_10_EXP` | `CL_FLT_MAX_10_EXP` -| `FLT_MAX_EXP` | `CL_FLT_MAX_EXP` -| `FLT_MIN_10_EXP` | `CL_FLT_MIN_10_EXP` -| `FLT_MIN_EXP` | `CL_FLT_MIN_EXP` -| `FLT_RADIX` | `CL_FLT_RADIX` -| `FLT_MAX` | `CL_FLT_MAX` -| `FLT_MIN` | `CL_FLT_MIN` -| `FLT_EPSILSON` | `CL_FLT_EPSILON` +| `FLT_DIG` | {CL_FLT_DIG} +| `FLT_MANT_DIG` | {CL_FLT_MANT_DIG} +| `FLT_MAX_10_EXP` | {CL_FLT_MAX_10_EXP} +| `FLT_MAX_EXP` | {CL_FLT_MAX_EXP} +| `FLT_MIN_10_EXP` | {CL_FLT_MIN_10_EXP} +| `FLT_MIN_EXP` | {CL_FLT_MIN_EXP} +| `FLT_RADIX` | {CL_FLT_RADIX} +| `FLT_MAX` | {CL_FLT_MAX} +| `FLT_MIN` | {CL_FLT_MIN} +| `FLT_EPSILSON` | {CL_FLT_EPSILON} |==== The following macros shall expand to integer constant expressions whose @@ -5883,15 +5886,15 @@ the application. [cols=",",options="header",] |==== | Macro in OpenCL Language | Macro for application -| `DBL_DIG` | `CL_DBL_DIG` -| `DBL_MANT_DIG` | `CL_DBL_MANT_DIG` -| `DBL_MAX_10_EXP` | `CL_DBL_MAX_10_EXP` -| `DBL_MAX_EXP` | `CL_DBL_MAX_EXP` -| `DBL_MIN_10_EXP` | `CL_DBL_MIN_10_EXP` -| `DBL_MIN_EXP` | `CL_DBL_MIN_EXP` -| `DBL_MAX` | `CL_DBL_MAX` -| `DBL_MIN` | `CL_DBL_MIN` -| `DBL_EPSILSON` | `CL_DBL_EPSILON` +| `DBL_DIG` | {CL_DBL_DIG} +| `DBL_MANT_DIG` | {CL_DBL_MANT_DIG} +| `DBL_MAX_10_EXP` | {CL_DBL_MAX_10_EXP} +| `DBL_MAX_EXP` | {CL_DBL_MAX_EXP} +| `DBL_MIN_10_EXP` | {CL_DBL_MIN_10_EXP} +| `DBL_MIN_EXP` | {CL_DBL_MIN_EXP} +| `DBL_MAX` | {CL_DBL_MAX} +| `DBL_MIN` | {CL_DBL_MIN} +| `DBL_EPSILSON` | {CL_DBL_EPSILON} |==== The following constants are also available. @@ -5952,16 +5955,16 @@ the application. [cols=",",options="header",] |==== | Macro in OpenCL Language | Macro for application -| `HALF_DIG` | `CL_HALF_DIG` -| `HALF_MANT_DIG` | `CL_HALF_MANT_DIG` -| `HALF_MAX_10_EXP` | `CL_HALF_MAX_10_EXP` -| `HALF_MAX_EXP` | `CL_HALF_MAX_EXP` -| `HALF_MIN_10_EXP` | `CL_HALF_MIN_10_EXP` -| `HALF_MIN_EXP` | `CL_HALF_MIN_EXP` -| `HALF_RADIX` | `CL_HALF_RADIX` -| `HALF_MAX` | `CL_HALF_MAX` -| `HALF_MIN` | `CL_HALF_MIN` -| `HALF_EPSILSON` | `CL_HALF_EPSILON` +| `HALF_DIG` | {CL_HALF_DIG} +| `HALF_MANT_DIG` | {CL_HALF_MANT_DIG} +| `HALF_MAX_10_EXP` | {CL_HALF_MAX_10_EXP} +| `HALF_MAX_EXP` | {CL_HALF_MAX_EXP} +| `HALF_MIN_10_EXP` | {CL_HALF_MIN_10_EXP} +| `HALF_MIN_EXP` | {CL_HALF_MIN_EXP} +| `HALF_RADIX` | {CL_HALF_RADIX} +| `HALF_MAX` | {CL_HALF_MAX} +| `HALF_MIN` | {CL_HALF_MIN} +| `HALF_EPSILSON` | {CL_HALF_EPSILON} |==== The following constants are also available. @@ -6352,21 +6355,21 @@ the application. [cols=",",options="header",] |==== | Macro in OpenCL Language | Macro for application -| `CHAR_BIT` | `CL_CHAR_BIT` -| `CHAR_MAX` | `CL_CHAR_MAX` -| `CHAR_MIN` | `CL_CHAR_MIN` -| `INT_MAX` | `CL_INT_MAX` -| `INT_MIN` | `CL_INT_MIN` -| `LONG_MAX` | `CL_LONG_MAX` -| `LONG_MIN` | `CL_LONG_MIN` -| `SCHAR_MAX` | `CL_SCHAR_MAX` -| `SCHAR_MIN` | `CL_SCHAR_MIN` -| `SHRT_MAX` | `CL_SHRT_MAX` -| `SHRT_MIN` | `CL_SHRT_MIN` -| `UCHAR_MAX` | `CL_UCHAR_MAX` -| `USHRT_MAX` | `CL_USHRT_MAX` -| `UINT_MAX` | `CL_UINT_MAX` -| `ULONG_MAX` | `CL_ULONG_MAX` +| `CHAR_BIT` | {CL_CHAR_BIT} +| `CHAR_MAX` | {CL_CHAR_MAX} +| `CHAR_MIN` | {CL_CHAR_MIN} +| `INT_MAX` | {CL_INT_MAX} +| `INT_MIN` | {CL_INT_MIN} +| `LONG_MAX` | {CL_LONG_MAX} +| `LONG_MIN` | {CL_LONG_MIN} +| `SCHAR_MAX` | {CL_SCHAR_MAX} +| `SCHAR_MIN` | {CL_SCHAR_MIN} +| `SHRT_MAX` | {CL_SHRT_MAX} +| `SHRT_MIN` | {CL_SHRT_MIN} +| `UCHAR_MAX` | {CL_UCHAR_MAX} +| `USHRT_MAX` | {CL_USHRT_MAX} +| `UINT_MAX` | {CL_UINT_MAX} +| `ULONG_MAX` | {CL_ULONG_MAX} |==== -- @@ -9525,7 +9528,7 @@ The OpenCL C programming language implements the *printf* function. When the event that is associated with a particular kernel invocation is completed, the output of all printf() calls executed by this kernel invocation is flushed to the implementation-defined output stream. -Calling *clFinish* on a command-queue flushes all pending output by printf +Calling {clFinish} on a command-queue flushes all pending output by printf in previously enqueued and completed commands to the implementation-defined output stream. In the case that printf is executed from multiple work-items concurrently, @@ -9906,7 +9909,7 @@ kernel void my_kernel(global char *s, ... ) a `float` argument to a `double` only if the `double` data type is supported. Refer to the value of the <>. + {CL_DEVICE_DOUBLE_FP_CONFIG} device query>>. If the `double` data type is not supported, the argument will be a `float` instead of a `double`. * For the embedded profile, the *l* length modifier is supported only if @@ -9929,7 +9932,7 @@ from and/or write to specific locations in the image. Support for the image built-in functions is optional. If a device supports images then the value of the <>) is `CL_TRUE` and the OpenCL C +{CL_DEVICE_IMAGE_SUPPORT} device query>>) is {CL_TRUE} and the OpenCL C compiler for that device must define the `+__IMAGE_SUPPORT__+` macro. A compiler for OpenCL C 3.0 or newer for that device must also support the {opencl_c_images} feature. @@ -9963,7 +9966,7 @@ component. The image read functions take a sampler argument. The sampler can be passed as an argument to the kernel using -*clSetKernelArg*, or can be declared in the outermost scope of kernel +{clSetKernelArg}, or can be declared in the outermost scope of kernel functions, or it can be a constant variable of type `sampler_t` declared in the program source. @@ -10005,8 +10008,8 @@ Note that samplers declared using the `constant` qualifier are not counted towards the maximum number of arguments pointing to the constant address space or the maximum size of the `constant` address space allowed per device (i.e. the value of the <> and <> device queries). +{CL_DEVICE_MAX_CONSTANT_ARGS}>> and <> device queries). The sampler fields are described in the following table. @@ -10082,7 +10085,7 @@ const sampler_t samplerA = CLK_NORMALIZED_COORDS_TRUE | addressing mode and a nearest filter. The maximum number of samplers that can be declared in a kernel can be -queried using the `CL_DEVICE_MAX_SAMPLERS` token in *clGetDeviceInfo*. +queried using the {CL_DEVICE_MAX_SAMPLERS} token in {clGetDeviceInfo}. -- @@ -10094,13 +10097,13 @@ image coordinates return the border color. The border color selected depends on the image channel order and can be one of the following values: - * If the image channel order is `CL_A`, `CL_INTENSITY`, `CL_Rx`, - `CL_RA`, `CL_RGx`, `CL_RGBx`, `CL_sRGBx`, `CL_ARGB`, `CL_BGRA`, - `CL_ABGR`, `CL_RGBA`, `CL_sRGBA` or `CL_sBGRA`, the border color is + * If the image channel order is {CL_A}, {CL_INTENSITY}, {CL_Rx}, + {CL_RA}, {CL_RGx}, {CL_RGBx}, {CL_sRGBx}, {CL_ARGB}, {CL_BGRA}, + {CL_ABGR}, {CL_RGBA}, {CL_sRGBA} or {CL_sBGRA}, the border color is `(0.0f, 0.0f, 0.0f, 0.0f)`. - * If the image channel order is `CL_R`, `CL_RG`, `CL_RGB`, or - `CL_LUMINANCE`, the border color is `(0.0f, 0.0f, 0.0f, 1.0f)`. - * If the image channel order is `CL_DEPTH`, the border value is `0.0f`. + * If the image channel order is {CL_R}, {CL_RG}, {CL_RGB}, or + {CL_LUMINANCE}, the border color is `(0.0f, 0.0f, 0.0f, 1.0f)`. + * If the image channel order is {CL_DEPTH}, the border value is `0.0f`. [[srgb-images]] @@ -10148,15 +10151,15 @@ endif::cl_khr_mipmap_image[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10179,14 +10182,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed - formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + formats or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. The *read_imageh* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10220,9 +10223,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -10230,9 +10233,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -10257,15 +10260,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10289,14 +10292,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. **read_imageh** returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. The *read_imageh* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10331,9 +10334,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -10341,9 +10344,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -10367,15 +10370,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10398,14 +10401,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. The *read_imageh* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10439,9 +10442,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -10449,9 +10452,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -10475,15 +10478,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10508,14 +10511,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. The *read_imageh* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10549,9 +10552,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -10559,9 +10562,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -10587,15 +10590,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10620,14 +10623,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. The *read_imageh* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10661,9 +10664,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -10671,9 +10674,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -10699,10 +10702,10 @@ endif::cl_khr_fp16[] *read_imagef* returns a floating-point value in the range [0.0, 1.0] for depth image objects created with _image_channel_data_type_ set to - `CL_UNORM_INT16` or `CL_UNORM_INT24`. + {CL_UNORM_INT16} or {CL_UNORM_INT24}. *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to `CL_FLOAT`. + created with _image_channel_data_type_ set to {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -10726,10 +10729,10 @@ endif::cl_khr_fp16[] *read_imagef* returns a floating-point value in the range [0.0, 1.0] for depth image objects created with _image_channel_data_type_ set to - `CL_UNORM_INT16` or `CL_UNORM_INT24`. + {CL_UNORM_INT16} or {CL_UNORM_INT24}. *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to `CL_FLOAT`. + created with _image_channel_data_type_ set to {CL_FLOAT}. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -11062,12 +11065,12 @@ endif::cl_khr_mipmap_image[] ifdef::cl_khr_mipmap_image[] NOTE: If the `<>` extension macro is supported, -`CL_SAMPLER_NORMALIZED_COORDS` must be `CL_TRUE` for built-in functions +{CL_SAMPLER_NORMALIZED_COORDS} must be {CL_TRUE} for built-in functions described in the table above that read from a mipmapped image; otherwise behavior is undefined. The value specified in the _lod_ argument is clamped to the minimum of (actual number of mip levels - 1) in the image or the value specified for -`CL_SAMPLER_LOD_MAX`. +{CL_SAMPLER_LOD_MAX}. endif::cl_khr_mipmap_image[] @@ -11087,7 +11090,7 @@ integer coordinates and a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode to `CLK_ADDRESS_NONE`. There is one exception when the _image_channel_data_type_ is a floating-point -type (such as `CL_FLOAT`). +type (such as {CL_FLOAT}). In this exceptional case, when channel data values are denormalized, the sampler-less image read function may return the denormalized data, while the image read function with a sampler argument may flush the denormalized @@ -11107,15 +11110,15 @@ For sampler-less read functions this may be `read_only` or `read_write`. *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11129,14 +11132,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. Values returned by *read_imageh* for image objects with _image_channel_data_type_ values not specified in the description @@ -11158,9 +11161,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11168,9 +11171,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11182,15 +11185,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description are @@ -11205,14 +11208,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. Values returned by *read_imageh* for image objects with _image_channel_data_type_ values not specified in the description are @@ -11235,9 +11238,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11245,9 +11248,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11258,15 +11261,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11280,14 +11283,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. Values returned by *read_imageh* for image objects with _image_channel_data_type_ values not specified in the description @@ -11309,9 +11312,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11319,9 +11322,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11333,15 +11336,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11356,14 +11359,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. Values returned by *read_imageh* for image objects with _image_channel_data_type_ values not specified in the description @@ -11387,9 +11390,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11397,9 +11400,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11410,15 +11413,15 @@ endif::cl_khr_fp16[] *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11432,14 +11435,14 @@ ifdef::cl_khr_fp16[] *read_imageh* returns half-precision floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + or {CL_UNORM_INT8}, or {CL_UNORM_INT16}. *read_imageh* returns half-precision floating-point values in the range [-1.0, 1.0] for image objects created with - _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + _image_channel_data_type_ set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imageh* returns half-precision floating-point values for image - objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. Values returned by *read_imageh* for image objects with _image_channel_data_type_ values not specified in the description @@ -11461,9 +11464,9 @@ endif::cl_khr_fp16[] *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11471,9 +11474,9 @@ endif::cl_khr_fp16[] *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11484,10 +11487,10 @@ endif::cl_khr_fp16[] *read_imagef* returns a floating-point value in the range [0.0, 1.0] for depth image objects created with _image_channel_data_type_ set to - `CL_UNORM_INT16` or `CL_UNORM_INT24`. + {CL_UNORM_INT16} or {CL_UNORM_INT24}. *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to `CL_FLOAT`. + created with _image_channel_data_type_ set to {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11502,10 +11505,10 @@ endif::cl_khr_fp16[] *read_imagef* returns a floating-point value in the range [0.0, 1.0] for depth image objects created with _image_channel_data_type_ set to - `CL_UNORM_INT16` or `CL_UNORM_INT24`. + {CL_UNORM_INT16} or {CL_UNORM_INT24}. *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to `CL_FLOAT`. + created with _image_channel_data_type_ set to {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11529,15 +11532,15 @@ float4 read_imagef( *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11568,9 +11571,9 @@ uint4 read_imageui( *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - * `CL_SIGNED_INT8`, - * `CL_SIGNED_INT16`, and - * `CL_SIGNED_INT32`. + * {CL_SIGNED_INT8}, + * {CL_SIGNED_INT16}, and + * {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11578,9 +11581,9 @@ uint4 read_imageui( *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - * `CL_UNSIGNED_INT8`, - * `CL_UNSIGNED_INT16`, and - * `CL_UNSIGNED_INT32`. + * {CL_UNSIGNED_INT8}, + * {CL_UNSIGNED_INT16}, and + * {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11600,15 +11603,15 @@ float4 read_imagef( *read_imagef* returns floating-point values in the range [0.0, 1.0] for image objects created with _image_channel_data_type_ set to one of - the pre-defined packed formats or `CL_UNORM_INT8`, or - `CL_UNORM_INT16`. + the pre-defined packed formats or {CL_UNORM_INT8}, or + {CL_UNORM_INT16}. *read_imagef* returns floating-point values in the range [-1.0, 1.0] for image objects created with _image_channel_data_type_ set to - `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + {CL_SNORM_INT8}, or {CL_SNORM_INT16}. *read_imagef* returns floating-point values for image objects created - with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to {CL_HALF_FLOAT} or {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11639,9 +11642,9 @@ uint4 read_imageui( *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - * `CL_SIGNED_INT8`, - * `CL_SIGNED_INT16`, and - * `CL_SIGNED_INT32`. + * {CL_SIGNED_INT8}, + * {CL_SIGNED_INT16}, and + * {CL_SIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imagei* are undefined. @@ -11649,9 +11652,9 @@ uint4 read_imageui( *read_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - * `CL_UNSIGNED_INT8`, - * `CL_UNSIGNED_INT16`, and - * `CL_UNSIGNED_INT32`. + * {CL_UNSIGNED_INT8}, + * {CL_UNSIGNED_INT16}, and + * {CL_UNSIGNED_INT32}. If the _image_channel_data_type_ is not one of the above values, the values returned by *read_imageui* are undefined. @@ -11671,10 +11674,10 @@ float read_imagef( *read_imagef* returns a floating-point value in the range [0.0, 1.0] for depth image objects created with _image_channel_data_type_ set to - `CL_UNORM_INT16` or `CL_UNORM_INT24`. + {CL_UNORM_INT16} or {CL_UNORM_INT24}. *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to `CL_FLOAT`. + created with _image_channel_data_type_ set to {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11695,10 +11698,10 @@ float read_imagef(image2d_array_msaaa_depth_t image, *read_imagef* returns a floating-point value in the range [0.0, 1.0] for depth image objects created with _image_channel_data_type_ set to - `CL_UNORM_INT16` or `CL_UNORM_INT24`. + {CL_UNORM_INT16} or {CL_UNORM_INT24}. *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to `CL_FLOAT`. + created with _image_channel_data_type_ set to {CL_FLOAT}. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description @@ -11790,8 +11793,8 @@ endif::cl_khr_fp16[] ifdef::cl_khr_fp16[and *write_imageh*] can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, - `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. + or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, + {CL_UNORM_INT16}, {CL_HALF_FLOAT} or {CL_FLOAT}. Appropriate data format conversion will be done to convert channel data from a floating-point value to actual data format in which the channels are stored. @@ -11799,16 +11802,16 @@ ifdef::cl_khr_fp16[and *write_imageh*] *write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. *write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. The behavior of *write_imagef*, ifdef::cl_khr_fp16[*write_imageh*,] @@ -11854,8 +11857,8 @@ endif::cl_khr_fp16[] ifdef::cl_khr_fp16[and *write_imageh*] can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, - `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. + or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, + {CL_UNORM_INT16}, {CL_HALF_FLOAT} or {CL_FLOAT}. Appropriate data format conversion will be done to convert channel data from a floating-point value to actual data format in which the channels are stored. @@ -11863,16 +11866,16 @@ ifdef::cl_khr_fp16[and *write_imageh*] *write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. *write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. The behavior of *write_imagef*, ifdef::cl_khr_fp16[*write_imageh*,] @@ -11935,8 +11938,8 @@ endif::cl_khr_fp16[] ifdef::cl_khr_fp16[and *write_imageh*] can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, - `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. + or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, + {CL_UNORM_INT16}, {CL_HALF_FLOAT} or {CL_FLOAT}. Appropriate data format conversion will be done to convert channel data from a floating-point value to actual data format in which the channels are stored. @@ -11944,16 +11947,16 @@ ifdef::cl_khr_fp16[and *write_imageh*] *write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. *write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. The behavior of *write_imagef*, ifdef::cl_khr_fp16[*write_imageh*,] @@ -11999,8 +12002,8 @@ endif::cl_khr_fp16[] ifdef::cl_khr_fp16[and *write_imageh*] can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, - `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. + or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, + {CL_UNORM_INT16}, {CL_HALF_FLOAT} or {CL_FLOAT}. Appropriate data format conversion will be done to convert channel data from a floating-point value to actual data format in which the channels are stored. @@ -12008,16 +12011,16 @@ ifdef::cl_khr_fp16[and *write_imageh*] *write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16} and + + {CL_SIGNED_INT32}. *write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16} and + + {CL_UNSIGNED_INT32}. The behavior of *write_imagef*, ifdef::cl_khr_fp16[*write_imageh*,] @@ -12042,8 +12045,8 @@ ifdef::cl_khr_fp16[*write_imageh*,] respectively. *write_imagef* can only be used with image objects created with - _image_channel_data_type_ set to `CL_UNORM_INT16`, `CL_UNORM_INT24` or - `CL_FLOAT`. + _image_channel_data_type_ set to {CL_UNORM_INT16}, {CL_UNORM_INT24} or + {CL_FLOAT}. Appropriate data format conversion will be done to convert depth valye from a floating-point value to actual data format associated with the image. @@ -12071,8 +12074,8 @@ ifdef::cl_khr_fp16[*write_imageh*,] height-1], and [0, image number of layers-1], respectively. *write_imagef* can only be used with image objects created with - _image_channel_data_type_ set to `CL_UNORM_INT16`, `CL_UNORM_INT24` or - `CL_FLOAT`. + _image_channel_data_type_ set to {CL_UNORM_INT16}, {CL_UNORM_INT24} or + {CL_FLOAT}. Appropriate data format conversion will be done to convert depth valye from a floating-point value to actual data format associated with the image. @@ -12116,8 +12119,8 @@ endif::cl_khr_fp16[] ifdef::cl_khr_fp16[and *write_imageh*] can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats - or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, - `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. + or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, + {CL_UNORM_INT16}, {CL_HALF_FLOAT} or {CL_FLOAT}. Appropriate data format conversion will be done to convert channel data from a floating-point value to actual data format in which the channels are stored. @@ -12125,16 +12128,16 @@ ifdef::cl_khr_fp16[and *write_imageh*] *write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16`, or + - `CL_SIGNED_INT32`. + {CL_SIGNED_INT8}, + + {CL_SIGNED_INT16}, or + + {CL_SIGNED_INT32}. *write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: - `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16`, or + - `CL_UNSIGNED_INT32`. + {CL_UNSIGNED_INT8}, + + {CL_UNSIGNED_INT16}, or + + {CL_UNSIGNED_INT32}. The behavior of *write_imagef*, ifdef::cl_khr_fp16[*write_imageh*,] @@ -12145,7 +12148,7 @@ ifdef::cl_khr_fp16[*write_imageh*,] image depth-1], respectively, is undefined. <> support for OpenCL C 2.0, or OpenCL C 3.0 or - newer and the {c_3d_image_writes} feature, or the + newer and the {opencl_c_3d_image_writes} feature, or the `<>` extension. ifdef::cl_khr_fp16[] @@ -12598,7 +12601,7 @@ Query Functions>> with the `CLK_` prefixes correspond to the `CL_` prefixes used to describe the <> and <> in the <>. -For example, both `CL_UNORM_INT8` and `CLK_UNORM_INT8` refer to an image +For example, both {CL_UNORM_INT8} and `CLK_UNORM_INT8` refer to an image channel data type that is an unnormalized unsigned 8-bit integer. -- @@ -12657,26 +12660,26 @@ and will be set to 1.0 for the alpha channel. [cols=",",options="header",] |==== | Channel Order | `float4`, `int4` or `uint4` components of channel data -| `CL_R`, `CL_Rx` | (r, 0.0, 0.0, 1.0) -| `CL_A` | (0.0, 0.0, 0.0, a) -| `CL_RG`, `CL_RGx` | (r, g, 0.0, 1.0) -| `CL_RA` | (r, 0.0, 0.0, a) -| `CL_RGB`, `CL_RGBx`, `CL_sRGB`, `CL_sRGBx` +| {CL_R}, {CL_Rx} | (r, 0.0, 0.0, 1.0) +| {CL_A} | (0.0, 0.0, 0.0, a) +| {CL_RG}, {CL_RGx} | (r, g, 0.0, 1.0) +| {CL_RA} | (r, 0.0, 0.0, a) +| {CL_RGB}, {CL_RGBx}, {CL_sRGB}, {CL_sRGBx} | (r, g, b, 1.0) -| `CL_RGBA`, `CL_BGRA`, `CL_ARGB`, `CL_ABGR`, `CL_sRGBA`, `CL_sBGRA` +| {CL_RGBA}, {CL_BGRA}, {CL_ARGB}, {CL_ABGR}, {CL_sRGBA}, {CL_sBGRA} | (r, g, b, a) -| `CL_INTENSITY` | (I, I, I, I) -| `CL_LUMINANCE` | (L, L, L, 1.0) +| {CL_INTENSITY} | (I, I, I, I) +| {CL_LUMINANCE} | (L, L, L, 1.0) |==== -For `CL_DEPTH` images, a scalar value is returned by *read_imagef* or +For {CL_DEPTH} images, a scalar value is returned by *read_imagef* or supplied to *write_imagef*. <> support for OpenCL C 2.0 or newer, or for the `<>` extension macro. [NOTE] ==== -A kernel that uses a sampler with the `CL_ADDRESS_CLAMP` addressing mode +A kernel that uses a sampler with the {CL_ADDRESS_CLAMP} addressing mode with multiple images may result in additional samplers being used internally by an implementation. If the same sampler is used with multiple images called via @@ -12685,9 +12688,9 @@ allocate an additional sampler to handle the different border color values that may be needed depending on the image formats being used. These implementation allocated samplers will count against the maximum sampler values supported by the device and given by -`CL_DEVICE_MAX_SAMPLERS`. +{CL_DEVICE_MAX_SAMPLERS}. Enqueuing a kernel that requires more samplers than the implementation can -support will result in a `CL_OUT_OF_RESOURCES` error being returned. +support will result in a {CL_OUT_OF_RESOURCES} error being returned. ==== @@ -13168,7 +13171,7 @@ packets to the pipe. ==== There can only be the value of the <> reservations active +{CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS} device query>> reservations active (i.e. reservation IDs that have been reserved but not committed) per work-item or work-group for a pipe in a kernel executing on a device. @@ -13330,7 +13333,7 @@ Work-items can enqueue multiple blocks to a device queue(s). The *enqueue_kernel* built-in function returns `CLK_SUCCESS` if the block is enqueued successfully and returns `CLK_ENQUEUE_FAILURE` otherwise. If the -g compile option is specified in compiler options passed to -*clCompileProgram* or *clBuildProgram* when compiling or building the parent +{clCompileProgram} or {clBuildProgram} when compiling or building the parent program, the following errors may be returned instead of `CLK_ENQUEUE_FAILURE` to indicate why *enqueue_kernel* failed to enqueue the block: @@ -13633,7 +13636,7 @@ child kernels can begin execution. A parent kernel's execution status is considered to be complete when it and all its child kernels have finished execution. -The execution status of a parent kernel will be `CL_COMPLETE` if this kernel +The execution status of a parent kernel will be {CL_COMPLETE} if this kernel and all its child kernels finish execution successfully. The execution status of the kernel will be an error code (given by a negative integer value) if it or any of its child kernels encounter an @@ -13648,7 +13651,7 @@ with kernel `A` in the `event_wait_list` argument, i.e. wait for kernel `A` to finish execution before kernel `B` can begin execution. Let's assume kernel `A` enqueues kernels `X`, `Y` and `Z`. Kernel `A` is considered to have finished execution, i.e. its execution -status is `CL_COMPLETE`, only after `A` and the kernels `A` enqueued (and +status is {CL_COMPLETE}, only after `A` and the kernels `A` enqueued (and any kernels these enqueued kernels enqueue and so on) have finished execution. @@ -13718,7 +13721,7 @@ The *enqueue_marker* built-in function returns `CLK_SUCCESS` if the marked command is enqueued successfully and returns `CLK_ENQUEUE_FAILURE` otherwise. If the -g compile option is specified in compiler options passed to -*clCompileProgram* or *clBuildProgram*, the following errors may be returned +{clCompileProgram} or {clBuildProgram}, the following errors may be returned instead of `CLK_ENQUEUE_FAILURE` to indicate why *enqueue_marker* failed to enqueue the marker command: @@ -13764,7 +13767,7 @@ events. | Create a user event. Returns the user event. The execution status of the user event created is set to - `CL_SUBMITTED`. + {CL_SUBMITTED}. | bool *is_valid_event*(clk_event_t _event_) | Returns _true_ if _event_ is a valid event. Otherwise returns _false_. @@ -13772,7 +13775,7 @@ events. | Sets the execution status of a user event. Behavior is undefined if _event_ is not a valid event returned by *create_user_event*. - _status_ can be either `CL_COMPLETE` or a negative integer value + _status_ can be either {CL_COMPLETE} or a negative integer value indicating an error. | | | void *capture_event_profiling_info*(clk_event_t _event_, @@ -13792,12 +13795,12 @@ _name_ identifies which profiling information is to be queried and can be: _value_ is a pointer to two 64-bit values. -The first 64-bit value describes the elapsed time `CL_PROFILING_COMMAND_END` -- `CL_PROFLING_COMMAND_START` for the command identified by _event_ in +The first 64-bit value describes the elapsed time {CL_PROFILING_COMMAND_END} +- {CL_PROFILING_COMMAND_START} for the command identified by _event_ in nanoseconds. The second 64-bit value describes the elapsed time -`CL_PROFILING_COMMAND_COMPLETE` - `CL_PROFILING_COMAMND_START` for the +{CL_PROFILING_COMMAND_COMPLETE} - {CL_PROFILING_COMMAND_START} for the command identified by _event_ in nanoseconds. [NOTE] @@ -13811,21 +13814,21 @@ Events can be used to identify commands enqueued to a command-queue from the host. These events created by the OpenCL runtime can only be used on the host, i.e. as events passed in the _event_wait_list_ argument to various -*clEnqueue* APIs or runtime APIs that take events as arguments, such as -*clRetainEvent*, *clReleaseEvent*, and *clGetEventProfilingInfo*. +enqueue APIs or runtime APIs that take events as arguments, such as +{clRetainEvent}, {clReleaseEvent}, and {clGetEventProfilingInfo}. Similarly, events can be used to identify commands enqueued to a device queue (from a kernel). These event objects cannot be passed to the host or used by OpenCL runtime -APIs such as the *clEnqueue* APIs or runtime APIs that take event arguments. +APIs such as the enqueue APIs or runtime APIs that take event arguments. -*clRetainEvent* and *clReleaseEvent* will return `CL_INVALID_OPERATION` if +{clRetainEvent} and {clReleaseEvent} will return {CL_INVALID_OPERATION} if _event_ specified is an event that refers to any kernel enqueued to a device queue using *enqueue_kernel* or *enqueue_marker*, or is a user event created by *create_user_event*. -Similarly, *clSetUserEventStatus* can only be used to set the execution -status of events created using *clCreateUserEvent*. +Similarly, {clSetUserEventStatus} can only be used to set the execution +status of events created using {clCreateUserEvent}. User events created on the device can be set using set_user_event_status built-in function. @@ -15338,7 +15341,7 @@ spec is unsupported. ifdef::cl_khr_fp16[] If the `<>` extension macro is supported, then -if `CL_FP_ROUND_TO_NEAREST` is supported, the default rounding mode for +if {CL_FP_ROUND_TO_NEAREST} is supported, the default rounding mode for half-precision floating-point operations will be round to nearest even; otherwise the default rounding mode will be round to zero. @@ -16681,7 +16684,7 @@ If any of the selected `T_ijk` or `T_ij` in the above equations refers to a location outside the image, the border color is used as the color value for `T_ijk` or `T_ij`. -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT` and any of the +If the image channel type is {CL_FLOAT} or {CL_HALF_FLOAT} and any of the image elements `T_ijk` or `T_ij` is `INF` or NaN, the behavior of the built-in image read function is undefined. @@ -16793,7 +16796,7 @@ T = (1 - a) * (1 - b) * T_i0j0 where `T_ij` is the image element at location (_i_,_j_) in the 2D image. -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT` and any of the +If the image channel type is {CL_FLOAT} or {CL_HALF_FLOAT} and any of the image elements `T_ijk` or `T_ij` is `INF` or NaN, the behavior of the built-in image read function is undefined. @@ -16921,7 +16924,7 @@ T = (1 - a) * T_i0 where `T_i` is the image element at location (_i_) in the 1D image. -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT` and any of the +If the image channel type is {CL_FLOAT} or {CL_HALF_FLOAT} and any of the image elements `T_ijk` or `T_ij` is `INF` or NaN, the behavior of the built-in image read function is undefined. @@ -16967,39 +16970,39 @@ to floating-point values and vice-versa. [[converting-normalized-integer-channel-data-types-to-floating-point-values]] ==== Converting Normalized Integer Channel Data Types to Floating-point Values -For images created with image channel data type of `CL_UNORM_INT8` and -`CL_UNORM_INT16`, *read_imagef* will convert the channel values from an +For images created with image channel data type of {CL_UNORM_INT8} and +{CL_UNORM_INT16}, *read_imagef* will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized floating-point values in the range [`0.0f`, `1.0f`]. -For images created with image channel data type of `CL_SNORM_INT8` and -`CL_SNORM_INT16`, *read_imagef* will convert the channel values from an +For images created with image channel data type of {CL_SNORM_INT8} and +{CL_SNORM_INT16}, *read_imagef* will convert the channel values from an 8-bit or 16-bit signed integer to normalized floating-point values in the range [`-1.0f`, `1.0f`]. These conversions are performed as follows: -`CL_UNORM_INT8` (8-bit unsigned integer) {rightarrow} `float` +{CL_UNORM_INT8} (8-bit unsigned integer) {rightarrow} `float` [none] * normalized `float` value = `(float)c / 255.0f` -`CL_UNORM_INT_101010` (10-bit unsigned integer) {rightarrow} `float` +{CL_UNORM_INT_101010} (10-bit unsigned integer) {rightarrow} `float` [none] * normalized `float` value = `(float)c / 1023.0f` -`CL_UNORM_INT16` (16-bit unsigned integer) {rightarrow} `float` +{CL_UNORM_INT16} (16-bit unsigned integer) {rightarrow} `float` [none] * normalized `float` value = `(float)c / 65535.0f` -`CL_SNORM_INT8` (8-bit signed integer) {rightarrow} `float` +{CL_SNORM_INT8} (8-bit signed integer) {rightarrow} `float` [none] * normalized `float` value = *max*(`-1.0f`, `(float)c / 127.0f`) -`CL_SNORM_INT16` (16-bit signed integer) {rightarrow} `float` +{CL_SNORM_INT16} (16-bit signed integer) {rightarrow} `float` [none] * normalized `float` value = *max*(`-1.0f`, `(float)c / 32767.0f`) @@ -17007,32 +17010,32 @@ These conversions are performed as follows: The precision of the above conversions is \<= 1.5 ulp except for the following cases: -For `CL_UNORM_INT8` +For {CL_UNORM_INT8} [none] * 0 must convert to `0.0f` and * 255 must convert to `1.0f` -For `CL_UNORM_INT_101010` +For {CL_UNORM_INT_101010} [none] * 0 must convert to `0.0f` and * 1023 must convert to `1.0f` -For `CL_UNORM_INT16` +For {CL_UNORM_INT16} [none] * 0 must convert to `0.0f` and * 65535 must convert to `1.0f` -For `CL_SNORM_INT8` +For {CL_SNORM_INT8} [none] * -128 and -127 must convert to `-1.0f`, * 0 must convert to `0.0f` and * 127 must convert to `1.0f` -For `CL_SNORM_INT16` +For {CL_SNORM_INT16} [none] * -32768 and -32767 must convert to `-1.0f`, @@ -17045,39 +17048,39 @@ ifdef::cl_khr_fp16[] ==== Converting Normalized Integer Channel Data Types to Half-Precision Floating-Point Values If the `<>` extension is supported, then -for images created with image channel data type of `CL_UNORM_INT8` and -`CL_UNORM_INT16`, *read_imageh* will convert the channel values from an +for images created with image channel data type of {CL_UNORM_INT8} and +{CL_UNORM_INT16}, *read_imageh* will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized half-precision floating-point values in the range [`0.0h`, `1.0h`]. -For images created with image channel data type of `CL_SNORM_INT8` and -`CL_SNORM_INT16`, *read_imageh* will convert the channel values from an +For images created with image channel data type of {CL_SNORM_INT8} and +{CL_SNORM_INT16}, *read_imageh* will convert the channel values from an 8-bit or 16-bit signed integer to normalized half-precision floating-point values in the range [`-1.0h`, `1.0h`]. These conversions are performed as follows: -`CL_UNORM_INT8` (8-bit unsigned integer) {rightarrow} `half` +{CL_UNORM_INT8} (8-bit unsigned integer) {rightarrow} `half` [none] * normalized `half` value = `round_to_half(c / 255)` -`CL_UNORM_INT_101010` (10-bit unsigned integer) {rightarrow} `half` +{CL_UNORM_INT_101010} (10-bit unsigned integer) {rightarrow} `half` [none] * normalized `half` value = `round_to_half(c / 1023)` -`CL_UNORM_INT16` (16-bit unsigned integer) {rightarrow} `half` +{CL_UNORM_INT16} (16-bit unsigned integer) {rightarrow} `half` [none] * normalized `half` value = `round_to_half(c / 65535)` -`CL_SNORM_INT8` (8-bit signed integer) {rightarrow} `half` +{CL_SNORM_INT8} (8-bit signed integer) {rightarrow} `half` [none] * normalized `half` value = *max*(`-1.0h`, `round_to_half(c / 127)`) -`CL_SNORM_INT16` (16-bit signed integer) {rightarrow} `half` +{CL_SNORM_INT16} (16-bit signed integer) {rightarrow} `half` [none] * normalized `half` value = *max*(`-1.0h`, `round_to_half(c / 32767)`) @@ -17085,32 +17088,32 @@ These conversions are performed as follows: The precision of the above conversions is \<= 1.5 ulp except for the following cases: -For `CL_UNORM_INT8` +For {CL_UNORM_INT8} [none] * 0 must convert to `0.0h` and * 255 must convert to `1.0h` -For `CL_UNORM_INT_101010` +For {CL_UNORM_INT_101010} [none] * 0 must convert to `0.0h` and * 1023 must convert to `1.0h` -For `CL_UNORM_INT16` +For {CL_UNORM_INT16} [none] * 0 must convert to `0.0h` and * 65535 must convert to `1.0h` -For `CL_SNORM_INT8` +For {CL_SNORM_INT8} [none] * -128 and -127 must convert to `-1.0h`, * 0 must convert to `0.0h` and * 127 must convert to `1.0h` -For `CL_SNORM_INT16` +For {CL_SNORM_INT16} [none] * -32768 and -32767 must convert to `-1.0h`, @@ -17122,38 +17125,38 @@ endif::cl_khr_fp16[] [[converting-floating-point-values-to-normalized-integer-channel-data-types]] ==== Converting Floating-Point Values to Normalized Integer Channel Data Types -For images created with image channel data type of `CL_UNORM_INT8` and -`CL_UNORM_INT16`, *write_imagef* will convert the floating-point color value +For images created with image channel data type of {CL_UNORM_INT8} and +{CL_UNORM_INT16}, *write_imagef* will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. -For images created with image channel data type of `CL_SNORM_INT8` and -`CL_SNORM_INT16`, *write_imagef* will convert the floating-point color value +For images created with image channel data type of {CL_SNORM_INT8} and +{CL_SNORM_INT16}, *write_imagef* will convert the floating-point color value to an 8-bit or 16-bit signed integer. The preferred method for how conversions from floating-point values to normalized integer values are performed is as follows: -`float` {rightarrow} `CL_UNORM_INT8` (8-bit unsigned integer) +`float` {rightarrow} {CL_UNORM_INT8} (8-bit unsigned integer) [none] * *convert_uchar_sat_rte*(`f * 255.0f`) -`float` {rightarrow} `CL_UNORM_INT_101010` (10-bit unsigned integer) +`float` {rightarrow} {CL_UNORM_INT_101010} (10-bit unsigned integer) [none] * *min*(*convert_ushort_sat_rte*(`f * 1023.0f`), `0x3ff`) -`float` {rightarrow} `CL_UNORM_INT16` (16-bit unsigned integer) +`float` {rightarrow} {CL_UNORM_INT16} (16-bit unsigned integer) [none] * *convert_ushort_sat_rte*(`f * 65535.0f`) -`float` {rightarrow} `CL_SNORM_INT8` (8-bit signed integer) +`float` {rightarrow} {CL_SNORM_INT8} (8-bit signed integer) [none] * *convert_char_sat_rte*(`f * 127.0f`) -`float` {rightarrow} `CL_SNORM_INT16` (16-bit signed integer) +`float` {rightarrow} {CL_SNORM_INT16} (16-bit signed integer) [none] * *convert_short_sat_rte*(`f * 32767.0f`) @@ -17168,14 +17171,14 @@ absolute error of the implementation dependant rounding mode vs. the result produced by the round to nearest even rounding mode must be {leq} 0.6. -`float` {rightarrow} `CL_UNORM_INT8` (8-bit unsigned integer) +`float` {rightarrow} {CL_UNORM_INT8} (8-bit unsigned integer) [none] * Let f~preferred~ = *convert_uchar_sat_rte*(f * `255.0f`) * Let f~approx~ = *convert_uchar_sat_*(f * `255.0f`) * *fabs*(f~preferred~ - f~approx~) must be \<= 0.6 -`float` {rightarrow} `CL_UNORM_INT_101010` (10-bit unsigned integer) +`float` {rightarrow} {CL_UNORM_INT_101010} (10-bit unsigned integer) [none] * Let f~preferred~ = *convert_ushort_sat_rte*(f * `1023.0f`) @@ -17183,7 +17186,7 @@ the result produced by the round to nearest even rounding mode must be {leq} `1023.0f`) * *fabs*(f~preferred~ - f~approx~) must be \<= 0.6 -`float` {rightarrow} `CL_UNORM_INT16` (16-bit unsigned integer) +`float` {rightarrow} {CL_UNORM_INT16} (16-bit unsigned integer) [none] * Let f~preferred~ = *convert_ushort_sat_rte*(f * `65535.0f`) @@ -17191,14 +17194,14 @@ the result produced by the round to nearest even rounding mode must be {leq} `65535.0f`) * *fabs*(f~preferred~ - f~approx~) must be \<= 0.6 -`float` {rightarrow} `CL_SNORM_INT8` (8-bit signed integer) +`float` {rightarrow} {CL_SNORM_INT8} (8-bit signed integer) [none] * Let f~preferred~ = *convert_char_sat_rte*(f * `127.0f`) * Let f~approx~ = *convert_char_sat_*(f * `127.0f`) * *fabs*(f~preferred~ - f~approx~) must be \<= 0.6 -`float` {rightarrow} `CL_SNORM_INT16` (16-bit signed integer) +`float` {rightarrow} {CL_SNORM_INT16} (16-bit signed integer) [none] * Let f~preferred~ = *convert_short_sat_rte*(f * `32767.0f`) @@ -17212,12 +17215,12 @@ ifdef::cl_khr_fp16[] ==== Converting Half-Precision Floating-point Values to Normalized Integer Channel Data Types If the `<>` extension is supported, then -for images created with image channel data type of `CL_UNORM_INT8` and -`CL_UNORM_INT16`, *write_imageh* will convert the floating-point color value +for images created with image channel data type of {CL_UNORM_INT8} and +{CL_UNORM_INT16}, *write_imageh* will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. -For images created with image channel data type of `CL_SNORM_INT8` and -`CL_SNORM_INT16`, *write_imageh* will convert the floating-point color value +For images created with image channel data type of {CL_SNORM_INT8} and +{CL_SNORM_INT16}, *write_imageh* will convert the floating-point color value to an 8-bit or 16-bit signed integer. The preferred conversion uses the round to nearest even (`_rte`) rounding @@ -17226,7 +17229,7 @@ used in the conversions described below. When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. -`half` {rightarrow` `CL_UNORM_INT8` (8-bit unsigned integer) +`half` {rightarrow` {CL_UNORM_INT8} (8-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) @@ -17234,7 +17237,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` `CL_UNORM_INT_101010` (10-bit unsigned integer) +`half` {rightarrow` {CL_UNORM_INT_101010} (10-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) @@ -17243,7 +17246,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` `CL_UNORM_INT16` (16-bit unsigned integer) +`half` {rightarrow` {CL_UNORM_INT16} (16-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) @@ -17252,7 +17255,7 @@ result of the conversion must satisfy the bound given below. 65535.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` `CL_SNORM_INT8` (8-bit signed integer) +`half` {rightarrow` {CL_SNORM_INT8} (8-bit signed integer) [none] * Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) @@ -17260,7 +17263,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` `CL_SNORM_INT16` (16-bit signed integer) +`half` {rightarrow` {CL_SNORM_INT16} (16-bit signed integer) [none] * Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) @@ -17273,7 +17276,7 @@ endif::cl_khr_fp16[] [[conversion-rules-for-half-precision-floating-point-channel-data-type]] === Conversion Rules for Half-Precision Floating-Point Channel Data Type -For images created with a channel data type of `CL_HALF_FLOAT`, the +For images created with a channel data type of {CL_HALF_FLOAT}, the conversions from `half` to `float` are lossless (as described in <>). Conversions from `float` to `half` round the mantissa using the round to @@ -17289,7 +17292,7 @@ type. === Conversion Rules for Floating-Point Channel Data Type The following rules apply for reading and writing images created with -channel data type of `CL_FLOAT`. +channel data type of {CL_FLOAT}. * NaNs may be converted to a NaN value(s) supported by the device. * Denorms can be flushed to zero. @@ -17299,12 +17302,12 @@ channel data type of `CL_FLOAT`. [[conversion-rules-for-signed-and-unsigned-8-bit-16-bit-and-32-bit-integer-channel-data-types]] === Conversion Rules for Signed and Unsigned 8-Bit, 16-Bit and 32-Bit Integer Channel Data Types -Calls to *read_imagei* with channel data type values of `CL_SIGNED_INT8`, -`CL_SIGNED_INT16` and `CL_SIGNED_INT32` return the unmodified integer values +Calls to *read_imagei* with channel data type values of {CL_SIGNED_INT8}, +{CL_SIGNED_INT16} and {CL_SIGNED_INT32} return the unmodified integer values stored in the image at specified location. -Calls to *read_imageui* with channel data type values of `CL_UNSIGNED_INT8`, -`CL_UNSIGNED_INT16` and `CL_UNSIGNED_INT32` return the unmodified integer +Calls to *read_imageui* with channel data type values of {CL_UNSIGNED_INT8}, +{CL_UNSIGNED_INT16} and {CL_UNSIGNED_INT32} return the unmodified integer values stored in the image at specified location. Calls to *write_imagei* will perform one of the following conversions: @@ -17474,7 +17477,7 @@ one of the integers 0, 1, ... h~t~ - 1. References are to sections and tables of this specific version, although other versions exists. . [[opencl-device-queries]] "`Device Queries`" are defined in the - <> for *clGetDeviceInfo*, and the + <> for {clGetDeviceInfo}, and the individual queries are defined in the "`OpenCL Device Queries`" table (4.3) of that Specification. . [[opencl-channel-order,image channel order]] "`Image Channel Order`" is diff --git a/api/cl_khr_extended_versioning.asciidoc b/api/cl_khr_extended_versioning.asciidoc index cf2ea54ee..d67e61f0a 100644 --- a/api/cl_khr_extended_versioning.asciidoc +++ b/api/cl_khr_extended_versioning.asciidoc @@ -47,16 +47,16 @@ Versioning>> section. === New Macro Names - * {CL_VERSION_MAJOR_BITS_KHR} - * {CL_VERSION_MINOR_BITS_KHR} - * {CL_VERSION_PATCH_BITS_KHR} - * `CL_VERSION_MAJOR_MASK_KHR` - * `CL_VERSION_MINOR_MASK_KHR` - * `CL_VERSION_PATCH_MASK_KHR` - * `CL_VERSION_MAJOR_KHR` - * `CL_VERSION_MINOR_KHR` - * `CL_VERSION_PATCH_KHR` - * `CL_MAKE_VERSION_KHR` + * {CL_VERSION_MAJOR_BITS_KHR_anchor} + * {CL_VERSION_MINOR_BITS_KHR_anchor} + * {CL_VERSION_PATCH_BITS_KHR_anchor} + * {CL_VERSION_MAJOR_MASK_KHR_anchor} + * {CL_VERSION_MINOR_MASK_KHR_anchor} + * {CL_VERSION_PATCH_MASK_KHR_anchor} + * {CL_VERSION_MAJOR_KHR_anchor} + * {CL_VERSION_MINOR_KHR_anchor} + * {CL_VERSION_PATCH_KHR_anchor} + * {CL_MAKE_VERSION_KHR_anchor} === New Enums diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 0a70e6278..d45e84c89 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2220,19 +2220,19 @@ This enables versions to be ordered using standard C/C++ operators. A number of convenience macros are provided by the OpenCL Headers to make working with version numbers easier. - * `CL_VERSION_MAJOR` extracts the _major_ version from a packed + * {CL_VERSION_MAJOR_anchor} extracts the _major_ version from a packed {cl_version_TYPE}. - * `CL_VERSION_MINOR` extracts the _minor_ version from a packed + * {CL_VERSION_MINOR_anchor} extracts the _minor_ version from a packed {cl_version_TYPE}. - * `CL_VERSION_PATCH` extracts the _patch_ version from a packed + * {CL_VERSION_PATCH_anchor} extracts the _patch_ version from a packed {cl_version_TYPE}. - * `CL_MAKE_VERSION` returns a packed {cl_version_TYPE} from a + * {CL_MAKE_VERSION_anchor} returns a packed {cl_version_TYPE} from a _major_, _minor_ and _patch_ version. * {CL_VERSION_MAJOR_BITS_anchor}, {CL_VERSION_MINOR_BITS_anchor}, and {CL_VERSION_PATCH_BITS_anchor} are the number of bits in the corresponding field. - * `CL_VERSION_MAJOR_MASK`, `CL_VERSION_MINOR_MASK`, and - `CL_VERSION_PATCH_MASK` are bitmasks used to extract the + * {CL_VERSION_MAJOR_MASK_anchor}, {CL_VERSION_MINOR_MASK_anchor}, and + {CL_VERSION_PATCH_MASK_anchor} are bitmasks used to extract the corresponding packed fields from the version number. [source,opencl] diff --git a/c/dictionary.asciidoc b/c/dictionary.asciidoc new file mode 100644 index 000000000..bdf9c23c3 --- /dev/null +++ b/c/dictionary.asciidoc @@ -0,0 +1,5 @@ +// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Creative Commons Attribution 4.0 International License; see +// http://creativecommons.org/licenses/by/4.0/ + +include::{generated}/api/api-dictionary-no-links.asciidoc[] diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index 6abc922aa..c22db5889 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -72,11 +72,11 @@ This is similar to the `GL_ADDRESS_CLAMP_TO_BORDER` addressing mode. \ ] :fn-CLK_UNORM_INT_101010_2: pass:n[ \ -Although `CL_UNORM_INT_101010_2` was added in OpenCL 2.1, because there was no OpenCL C 2.1 this image channel order <> OpenCL 3.0. \ +Although {CL_UNORM_INT_101010_2} was added in OpenCL 2.1, because there was no OpenCL C 2.1 this image channel order <> OpenCL 3.0. \ ] :fn-double: pass:n[ \ -The `double` scalar type is an optional type that is supported if the value of the `CL_DEVICE_DOUBLE_FP_CONFIG` device query is not zero. \ +The `double` scalar type is an optional type that is supported if the value of the {CL_DEVICE_DOUBLE_FP_CONFIG} device query is not zero. \ If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature macro. \ ] @@ -86,7 +86,7 @@ In OpenCL C 3.0 this will be indicated by the presence of the {opencl_c_fp64} fe ] :fn-double-vec: pass:n[ \ -The `double__n__` vector type is an optional type that is supported if the value of the `CL_DEVICE_DOUBLE_FP_CONFIG` device query is not zero. \ +The `double__n__` vector type is an optional type that is supported if the value of the {CL_DEVICE_DOUBLE_FP_CONFIG} device query is not zero. \ If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature macro. \ ] @@ -167,12 +167,12 @@ Integer promotion is described in < c) ? c : a) \end{aligned} ++++ -If the addressing mode is `CL_ADDRESS_CLAMP` or `CL_ADDRESS_CLAMP_TO_EDGE`, and the selected texel location `(i,j,k)` refers to a location outside the image, the border color is used as the color value for the texel. +If the addressing mode is {CL_ADDRESS_CLAMP} or {CL_ADDRESS_CLAMP_TO_EDGE}, and the selected texel location `(i,j,k)` refers to a location outside the image, the border color is used as the color value for the texel. -Otherwise, if the addressing mode is `CL_ADDRESS_NONE` and the selected texel location `(i,j,k)` refers to a location outside the image, the color value for the texel is undefined. +Otherwise, if the addressing mode is {CL_ADDRESS_NONE} and the selected texel location `(i,j,k)` refers to a location outside the image, the color value for the texel is undefined. [[clamp-linear-filtering]] ===== Linear Filtering -When the filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements (for a 2D image) or a 2 x 2 x 2 cube of image elements (for a 3D image is selected). +When the filter mode is {CL_FILTER_LINEAR}, a 2 x 2 square of image elements (for a 2D image) or a 2 x 2 x 2 cube of image elements (for a 3D image is selected). This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. Let: @@ -148,21 +148,21 @@ T &=& (1 - a) \times (1 - b) \times T_{i0j0}\\ where `T~ij~` is the image element at location `(i,j)` in the 2D image. -If the addressing mode is `CL_ADDRESS_CLAMP` or `CL_ADDRESS_CLAMP_TO_EDGE`, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the border color is used as the image element. +If the addressing mode is {CL_ADDRESS_CLAMP} or {CL_ADDRESS_CLAMP_TO_EDGE}, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the border color is used as the image element. -Otherwise, if the addressing mode is `CL_ADDRESS_NONE`, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the color value is undefined. +Otherwise, if the addressing mode is {CL_ADDRESS_NONE}, and any of the selected `T~ijk~` or `T~ij~` refers to a location outside the image, the color value is undefined. -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT`, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. +If the image channel type is {CL_FLOAT} or {CL_HALF_FLOAT}, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. [[repeat-addressing]] ==== Repeat Addressing Mode -We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_REPEAT`. +We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is {CL_ADDRESS_REPEAT}. [[repeat-nearest-filtering]] ===== Nearest Filtering -When filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. +When filter mode is {CL_FILTER_NEAREST}, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. The image element location `(i,j,k)` is computed as: [latexmath] @@ -189,7 +189,7 @@ For a 2D image, the image element at location (i, j) becomes the color value. [[repeat-linear-filtering]] ===== Linear Filtering -When filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. +When filter mode is {CL_FILTER_LINEAR}, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. Let @@ -256,18 +256,18 @@ T &=&(1 - a) \times (1 - b) \times T_{i0j0}\\ where `T~ij~` is the image element at location `(i,j)` in the 2D image. -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT`, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. +If the image channel type is {CL_FLOAT} or {CL_HALF_FLOAT}, and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. [[mirrored-repeat-addressing]] ==== Mirrored Repeat Addressing Mode -We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is `CL_ADDRESS_MIRRORED_REPEAT`. -The `CL_ADDRESS_MIRRORED_REPEAT` addressing mode causes the image to be read as if it is tiled at every integer seam, with the interpretation of the image data flipped at each integer crossing. +We now discuss how the addressing and filter modes are applied to generate the appropriate sample locations to read from the image if the addressing mode is {CL_ADDRESS_MIRRORED_REPEAT}. +The {CL_ADDRESS_MIRRORED_REPEAT} addressing mode causes the image to be read as if it is tiled at every integer seam, with the interpretation of the image data flipped at each integer crossing. [[mirrored-repeat-nearest-filtering]] ===== Nearest Filtering -When filter mode is `CL_FILTER_NEAREST`, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. +When filter mode is {CL_FILTER_NEAREST}, the result of the image read instruction is the image element that is nearest (in Manhattan distance) to the image element location `(i,j,k)`. The image element location `(i,j,k)` is computed as: [latexmath] @@ -297,7 +297,7 @@ For a 2D image, the image element at location (i, j) becomes the color value. [[mirrored-repeat-linear-filtering]] ===== Linear Filtering -When filter mode is `CL_FILTER_LINEAR`, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. +When filter mode is {CL_FILTER_LINEAR}, a 2 x 2 square of image elements for a 2D image or a 2 x 2 x 2 cube of image elements for a 3D image is selected. This 2 x 2 square or 2 x 2 x 2 cube is obtained as follows. Let @@ -375,15 +375,15 @@ T &=& (1 - a) \times T_i0 + a \times T_i1 where `T~i~` is the image element at location `(i)` in the 1D image. -If the image channel type is `CL_FLOAT` or `CL_HALF_FLOAT` and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. +If the image channel type is {CL_FLOAT} or {CL_HALF_FLOAT} and any of the image elements `T~ijk~` or `T~ij~` is INF or NaN, the color value is undefined. [[precision-of-addressing-and-filter-modes]] === Precision of Addressing and Filter Modes -If the sampler is specified as using unnormalized coordinates (floating-point or integer coordinates), filter mode set to `CL_FILTER_NEAREST` and addressing mode set to one of the following modes - `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE` or `CL_ADDRESS_NONE` - the location of the image element in the image given by `(i,j,k)` will be computed without any loss of precision. +If the sampler is specified as using unnormalized coordinates (floating-point or integer coordinates), filter mode set to {CL_FILTER_NEAREST} and addressing mode set to one of the following modes - {CL_ADDRESS_CLAMP}, {CL_ADDRESS_CLAMP_TO_EDGE} or {CL_ADDRESS_NONE} - the location of the image element in the image given by `(i,j,k)` will be computed without any loss of precision. For all other sampler combinations of normalized or unnormalized coordinates, filter modes, and addressing modes, the relative error or precision of the addressing mode calculations and the image filter operation are not defined. -To ensure precision of image addressing and filter calculations across any OpenCL device for these sampler combinations, developers may unnormalize the image coordinate in the kernel, and then implement the linear filter in the kernel with appropriate read image instructions with a sampler that uses unnormalized coordinates, filter mode set to `CL_FILTER_NEAREST`, addressing mode set to `CL_ADDRESS_CLAMP`, `CL_ADDRESS_CLAMP_TO_EDGE` or `CL_ADDRESS_NONE`, and finally performing the interpolation of color values read from the image to generate the filtered color value. +To ensure precision of image addressing and filter calculations across any OpenCL device for these sampler combinations, developers may unnormalize the image coordinate in the kernel, and then implement the linear filter in the kernel with appropriate read image instructions with a sampler that uses unnormalized coordinates, filter mode set to {CL_FILTER_NEAREST}, addressing mode set to {CL_ADDRESS_CLAMP}, {CL_ADDRESS_CLAMP_TO_EDGE} or {CL_ADDRESS_NONE}, and finally performing the interpolation of color values read from the image to generate the filtered color value. [[conversion-rules]] === Conversion Rules @@ -398,41 +398,41 @@ In this section we discuss converting normalized integer channel data types to h [[converting-normalized-integer-channel-data-types-to-half-precision-floating-point-values]] ===== Converting Normalized Integer Channel Data Types to Half Precision Floating-point Values -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized half precision floating-point values in the range [0.0h ... 1.0h]. +For images created with image channel data type of {CL_UNORM_INT8} and {CL_UNORM_INT16}, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized half precision floating-point values in the range [0.0h ... 1.0h]. -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized half precision floating-point values in the range [-1.0h ... 1.0h]. +For images created with image channel data type of {CL_SNORM_INT8} and {CL_SNORM_INT16}, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized half precision floating-point values in the range [-1.0h ... 1.0h]. These conversions are performed as follows: -* `CL_UNORM_INT8` (8-bit unsigned integer) -> `half` +* {CL_UNORM_INT8} (8-bit unsigned integer) -> `half` + [latexmath] ++++ normalized\_half\_value(x)=round\_to\_half(\frac{x}{255}) ++++ -* `CL_UNORM_INT_101010` (10-bit unsigned integer) -> `half` +* {CL_UNORM_INT_101010} (10-bit unsigned integer) -> `half` + [latexmath] ++++ normalized\_half\_value(x)=round\_to\_half(\frac{x}{1023}) ++++ -* `CL_UNORM_INT16` (16-bit unsigned integer) -> `half` +* {CL_UNORM_INT16} (16-bit unsigned integer) -> `half` + [latexmath] ++++ normalized\_half\_value(x)=round\_to\_half(\frac{x}{65535}) ++++ -* `CL_SNORM_INT8` (8-bit signed integer) -> `half` +* {CL_SNORM_INT8} (8-bit signed integer) -> `half` + [latexmath] ++++ normalized\_half\_value(x)=max(-1.0h, round\_to\_half(\frac{x}{127})) ++++ -* `CL_SNORM_INT16` (16-bit signed integer) -> `half` +* {CL_SNORM_INT16} (16-bit signed integer) -> `half` + [latexmath] ++++ @@ -441,28 +441,28 @@ normalized\_half\_value(x)=max(-1.0h, round\_to\_half(\frac{x}{32767})) The precision of the above conversions is \<= 1.5 ulp except for the following cases: -For `CL_UNORM_INT8`: +For {CL_UNORM_INT8}: * 0 must convert to 0.0h, and * 255 must convert to 1.0h -For `CL_UNORM_INT_101010`: +For {CL_UNORM_INT_101010}: * 0 must convert to 0.0h, and * 1023 must convert to 1.0h -For `CL_UNORM_INT16`: +For {CL_UNORM_INT16}: * 0 must convert to 0.0h, and * 65535 must convert to 1.0h -For `CL_SNORM_INT8`: +For {CL_SNORM_INT8}: * -128 and -127 must convert to -1.0h, * 0 must convert to 0.0h, and * 127 must convert to 1.0h -For `CL_SNORM_INT16`: +For {CL_SNORM_INT16}: * -32768 and -32767 must convert to -1.0h, * 0 must convert to 0.0h, and @@ -471,16 +471,16 @@ For `CL_SNORM_INT16`: [[converting-half-precision-floating-point-values-to-normalized-integer-channel-data-types]] ===== Converting Half Precision Floating-point Values to Normalized Integer Channel Data Types -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit unsigned integer. +For images created with image channel data type of {CL_UNORM_INT8} and {CL_UNORM_INT16}, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit unsigned integer. -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit signed integer. +For images created with image channel data type of {CL_SNORM_INT8} and {CL_SNORM_INT16}, image write instructions will convert the half precision floating-point color value to an 8-bit or 16-bit signed integer. OpenCL implementations may choose to approximate the rounding mode used in the conversions described below. When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. The conversions from half precision floating-point values to normalized integer values are performed is as follows: - * `half` -> `CL_UNORM_INT8` (8-bit unsigned integer) + * `half` -> {CL_UNORM_INT8} (8-bit unsigned integer) + [latexmath] ++++ @@ -502,7 +502,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `half` -> `CL_UNORM_INT16` (16-bit unsigned integer) + * `half` -> {CL_UNORM_INT16} (16-bit unsigned integer) + [latexmath] ++++ @@ -524,7 +524,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `half` -> `CL_SNORM_INT8` (8-bit signed integer) + * `half` -> {CL_SNORM_INT8} (8-bit signed integer) + [latexmath] ++++ @@ -546,7 +546,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `half` -> `CL_SNORM_INT16` (16-bit signed integer) + * `half` -> {CL_SNORM_INT16} (16-bit signed integer) + [latexmath] ++++ @@ -571,41 +571,41 @@ The conversions from half precision floating-point values to normalized integer [[converting-normalized-integer-channel-data-types-to-floating-point-values]] ===== Converting Normalized Integer Channel Data Types to Floating-point Values -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized floating-point values in the range [0.0f ... 1.0f]. +For images created with image channel data type of {CL_UNORM_INT8} and {CL_UNORM_INT16}, image read instructions will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized floating-point values in the range [0.0f ... 1.0f]. -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized floating-point values in the range [-1.0f ... 1.0f]. +For images created with image channel data type of {CL_SNORM_INT8} and {CL_SNORM_INT16}, image read instructions will convert the channel values from an 8-bit or 16-bit signed integer to normalized floating-point values in the range [-1.0f ... 1.0f]. These conversions are performed as follows: - * `CL_UNORM_INT8` (8-bit unsigned integer) -> `float` + * {CL_UNORM_INT8} (8-bit unsigned integer) -> `float` + [latexmath] ++++ normalized\_float\_value(x)=round\_to\_float(\frac{x}{255}) ++++ - * `CL_UNORM_INT_101010` (10-bit unsigned integer) -> `float` + * {CL_UNORM_INT_101010} (10-bit unsigned integer) -> `float` + [latexmath] ++++ normalized\_float\_value(x)=round\_to\_float(\frac{x}{1023}) ++++ - * `CL_UNORM_INT16` (16-bit unsigned integer) -> `float` + * {CL_UNORM_INT16} (16-bit unsigned integer) -> `float` + [latexmath] ++++ normalized\_float\_value(x)=round\_to\_float(\frac{x}{65535}) ++++ - * `CL_SNORM_INT8` (8-bit signed integer) -> `float` + * {CL_SNORM_INT8} (8-bit signed integer) -> `float` + [latexmath] ++++ normalized\_float\_value(x)=max(-1.0f, round\_to\_float(\frac{x}{127})) ++++ - * `CL_SNORM_INT16` (16-bit signed integer) -> `float` + * {CL_SNORM_INT16} (16-bit signed integer) -> `float` + [latexmath] ++++ @@ -614,28 +614,28 @@ normalized\_float\_value(x)=max(-1.0f, round\_to\_float(\frac{x}{32767})) The precision of the above conversions is \<= 1.5 ulp except for the following cases. -For `CL_UNORM_INT8`: +For {CL_UNORM_INT8}: * 0 must convert to 0.0f, and * 255 must convert to 1.0f -For `CL_UNORM_INT_101010`: +For {CL_UNORM_INT_101010}: * 0 must convert to 0.0f, and * 1023 must convert to 1.0f -For `CL_UNORM_INT16`: +For {CL_UNORM_INT16}: * 0 must convert to 0.0f, and * 65535 must convert to 1.0f -For `CL_SNORM_INT8`: +For {CL_SNORM_INT8}: * -128 and -127 must convert to -1.0f, * 0 must convert to 0.0f, and * 127 must convert to 1.0f -For `CL_SNORM_INT16`: +For {CL_SNORM_INT16}: * -32768 and -32767 must convert to -1.0f, * 0 must convert to 0.0f, and @@ -644,16 +644,16 @@ For `CL_SNORM_INT16`: [[converting-floating-point-values-to-normalized-integer-channel-data-types]] ===== Converting Floating-point Values to Normalized Integer Channel Data Types -For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, image write instructions will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. +For images created with image channel data type of {CL_UNORM_INT8} and {CL_UNORM_INT16}, image write instructions will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. -For images created with image channel data type of `CL_SNORM_INT8` and `CL_SNORM_INT16`, image write instructions will convert the floating-point color value to an 8-bit or 16-bit signed integer. +For images created with image channel data type of {CL_SNORM_INT8} and {CL_SNORM_INT16}, image write instructions will convert the floating-point color value to an 8-bit or 16-bit signed integer. OpenCL implementations may choose to approximate the rounding mode used in the conversions described below. When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. The conversions from half precision floating-point values to normalized integer values are performed is as follows: - * `float` -> `CL_UNORM_INT8` (8-bit unsigned integer) + * `float` -> {CL_UNORM_INT8} (8-bit unsigned integer) + [latexmath] ++++ @@ -675,7 +675,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `float` -> `CL_UNORM_INT_101010` (10-bit unsigned integer) + * `float` -> {CL_UNORM_INT_101010} (10-bit unsigned integer) + [latexmath] ++++ @@ -697,7 +697,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `float` -> `CL_UNORM_INT16` (16-bit unsigned integer) + * `float` -> {CL_UNORM_INT16} (16-bit unsigned integer) + [latexmath] ++++ @@ -719,7 +719,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `float` -> `CL_SNORM_INT8` (8-bit signed integer) + * `float` -> {CL_SNORM_INT8} (8-bit signed integer) + [latexmath] ++++ @@ -741,7 +741,7 @@ The conversions from half precision floating-point values to normalized integer \end{aligned} ++++ - * `float` -> `CL_SNORM_INT16` (16-bit signed integer) + * `float` -> {CL_SNORM_INT16} (16-bit signed integer) + [latexmath] ++++ @@ -766,7 +766,7 @@ The conversions from half precision floating-point values to normalized integer [[conversion-rules-for-half-precision-floating-point-channel-data-type]] ==== Conversion Rules for Half Precision Floating-point Channel Data Type -For images created with a channel data type of `CL_HALF_FLOAT`, the conversions of half to float and half to half are lossless. +For images created with a channel data type of {CL_HALF_FLOAT}, the conversions of half to float and half to half are lossless. Conversions from float to half round the mantissa using the round to nearest even or round to zero rounding mode. Denormalized numbers for the half data type which may be generated when converting a float to a half may be flushed to zero. A float NaN must be converted to an appropriate NaN in the half type. @@ -775,7 +775,7 @@ A float INF must be converted to an appropriate INF in the half type. [[conversion-rules-for-floating-point-channel-data-type]] ==== Conversion Rules for Floating-point Channel Data Type -The following rules apply for reading and writing images created with channel data type of `CL_FLOAT`. +The following rules apply for reading and writing images created with channel data type of {CL_FLOAT}. * NaNs may be converted to a NaN value(s) supported by the device. * Denorms can be flushed to zero. @@ -784,48 +784,48 @@ The following rules apply for reading and writing images created with channel da [[conversion-rules-for-signed-and-unsigned-8-bit-16-bit-and-32-bit-integer-channel-data-types]] ==== Conversion Rules for Signed and Unsigned 8-bit, 16-bit and 32-bit Integer Channel Data Types -For images created with image channel data type of `CL_SIGNED_INT8`, `CL_SIGNED_INT16` and `CL_SIGNED_INT32`, image read instructions will return the unmodified integer values stored in the image at specified location. +For images created with image channel data type of {CL_SIGNED_INT8}, {CL_SIGNED_INT16} and {CL_SIGNED_INT32}, image read instructions will return the unmodified integer values stored in the image at specified location. -Likewise, for images created with image channel data type of `CL_UNSIGNED_INT8`, `CL_UNSIGNED_INT16` and `CL_UNSIGNED_INT32`, image read instructions will return the unmodified unsigned integer values stored in the image at specified location. +Likewise, for images created with image channel data type of {CL_UNSIGNED_INT8}, {CL_UNSIGNED_INT16} and {CL_UNSIGNED_INT32}, image read instructions will return the unmodified unsigned integer values stored in the image at specified location. Image write instructions will perform one of the following conversions: -* 32 bit signed integer -> `CL_SIGNED_INT8` (8-bit signed integer): +* 32 bit signed integer -> {CL_SIGNED_INT8} (8-bit signed integer): + [latexmath] ++++ int8\_value(x) = clamp(x, -128, 127) ++++ -* 32 bit signed integer -> `CL_SIGNED_INT16` (16-bit signed integer): +* 32 bit signed integer -> {CL_SIGNED_INT16} (16-bit signed integer): + [latexmath] ++++ int16\_value(x) = clamp(x, -32768, 32767) ++++ -* 32 bit signed integer -> `CL_SIGNED_INT32` (32-bit signed integer): +* 32 bit signed integer -> {CL_SIGNED_INT32} (32-bit signed integer): + [latexmath] ++++ int32\_value(x) = x \quad \text{(no conversion)} ++++ -* 32 bit unsigned integer -> `CL_UNSIGNED_INT8` (8-bit unsigned integer): +* 32 bit unsigned integer -> {CL_UNSIGNED_INT8} (8-bit unsigned integer): + [latexmath] ++++ uint8\_value(x) = clamp(x, 0, 255) ++++ -* 32 bit unsigned integer -> `CL_UNSIGNED_INT16` (16-bit unsigned integer): +* 32 bit unsigned integer -> {CL_UNSIGNED_INT16} (16-bit unsigned integer): + [latexmath] ++++ uint16\_value(x) = clamp(x, 0, 65535) ++++ -* 32 bit unsigned integer -> `CL_UNSIGNED_INT32` (32-bit unsigned integer): +* 32 bit unsigned integer -> {CL_UNSIGNED_INT32} (32-bit unsigned integer): + [latexmath] ++++ diff --git a/env/numerical_compliance.asciidoc b/env/numerical_compliance.asciidoc index ee339a948..bbb895b73 100644 --- a/env/numerical_compliance.asciidoc +++ b/env/numerical_compliance.asciidoc @@ -29,8 +29,8 @@ IEEE 754 defines four possible rounding modes: * _Round toward zero_ The complete set of rounding modes supported by the device are described by -the `CL_DEVICE_SINGLE_FP_CONFIG`, `CL_DEVICE_HALF_FP_CONFIG`, and -`CL_DEVICE_DOUBLE_FP_CONFIG` device queries. +the {CL_DEVICE_SINGLE_FP_CONFIG}, {CL_DEVICE_HALF_FP_CONFIG}, and +{CL_DEVICE_DOUBLE_FP_CONFIG} device queries. For double precision operations, _Round to nearest even_ is a required rounding mode, and is therefore the default rounding mode for double @@ -98,8 +98,8 @@ Support for denormalized numbers is required for double precision floating-point. Support for INFs, NaNs, and denormalized numbers is described by the -`CL_FP_DENORM` and `CL_FP_INF_NAN` bits in the `CL_DEVICE_SINGLE_FP_CONFIG`, -`CL_DEVICE_HALF_FP_CONFIG`, and `CL_DEVICE_DOUBLE_FP_CONFIG` device queries. +{CL_FP_DENORM} and {CL_FP_INF_NAN} bits in the {CL_DEVICE_SINGLE_FP_CONFIG}, +{CL_DEVICE_HALF_FP_CONFIG}, and {CL_DEVICE_DOUBLE_FP_CONFIG} device queries. === Floating-Point Exceptions diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 311549b02..9c22399cc 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -45,8 +45,8 @@ naming convention: * A unique _name string_ of the form `"*cl_khr_<__name__>*"` is associated with each extension. If the extension is supported by an implementation, this string will be - present in the implementation's `CL_PLATFORM_EXTENSIONS` string or - `CL_DEVICE_EXTENSIONS` string. + present in the implementation's {CL_PLATFORM_EXTENSIONS} string or + {CL_DEVICE_EXTENSIONS} string. * All API functions defined by the extension will have names of the form *cl<__function_name__>KHR*. * All enumerants defined by the extension will have names of the form @@ -55,8 +55,8 @@ naming convention: Functions and enumerants defined by extensions that are promoted to core features will have their *KHR* affix removed. OpenCL implementations of such later revisions must also export the name -strings of promoted extensions in the `CL_PLATFORM_EXTENSIONS` or -`CL_DEVICE_EXTENSIONS` string, and support the *KHR*-affixed versions of +strings of promoted extensions in the {CL_PLATFORM_EXTENSIONS} or +{CL_DEVICE_EXTENSIONS} string, and support the *KHR*-affixed versions of functions and enumerants as a transition aid. Vendor extensions are strongly encouraged to follow a similar naming @@ -65,8 +65,8 @@ convention: * A unique _name string_ of the form `"*cl_<__vendor_name__>_<__name>__*"` is associated with each extension. If the extension is supported by an implementation, this string will be - present in the implementation's `CL_PLATFORM_EXTENSIONS` string or - `CL_DEVICE_EXTENSIONS` string. + present in the implementation's {CL_PLATFORM_EXTENSIONS} string or + {CL_DEVICE_EXTENSIONS} string. * All API functions defined by the vendor extension will have names of the form *cl<__function_name__><__vendor_name__>*. * All enumerants defined by the vendor extension will have names of the @@ -175,12 +175,12 @@ type matching the extension function's definition defined in the appropriate extension specification and header file. A return value of `NULL` indicates that the specified function does not exist for the implementation or _platform_ is not a valid platform. -A non-`NULL` return value for *clGetExtensionFunctionAddressForPlatform* +A non-`NULL` return value for {clGetExtensionFunctionAddressForPlatform} does not guarantee that an extension function is actually supported by the platform. The application must also make a corresponding query using -*clGetPlatformInfo*(platform, CL_PLATFORM_EXTENSIONS, ...) or -*clGetDeviceInfo*(device, CL_DEVICE_EXTENSIONS, ...) to determine if an +{clGetPlatformInfo}(platform, CL_PLATFORM_EXTENSIONS, ...) or +{clGetDeviceInfo}(device, CL_DEVICE_EXTENSIONS, ...) to determine if an extension is supported by the OpenCL implementation. Since there is no way to qualify the query with a @@ -189,10 +189,10 @@ that extension on different devices for a platform. The behavior of calling a device extension function on a device not supporting that extension is undefined. -*clGetExtensionFunctionAddressForPlatform* may not be be used to query for core +{clGetExtensionFunctionAddressForPlatform} may not be be used to query for core (non-extension) functions in OpenCL. For extension functions that may be queried using -*clGetExtensionFunctionAddressForPlatform*, implementations may also choose to +{clGetExtensionFunctionAddressForPlatform}, implementations may also choose to export those functions statically from the object libraries implementing those functions, however, portable applications cannot rely on this behavior. diff --git a/man/static/clGetExtensionFunctionAddressForPlatform.txt b/man/static/clGetExtensionFunctionAddressForPlatform.txt index 0004c2a2d..7a63323cc 100644 --- a/man/static/clGetExtensionFunctionAddressForPlatform.txt +++ b/man/static/clGetExtensionFunctionAddressForPlatform.txt @@ -32,8 +32,8 @@ The pointer returned should be cast to a function pointer type matching the exte A return value of NULL indicates that the specified function does not exist for the implementation or _platform_ is not a valid platform. A non-NULL return value for `clGetExtensionFunctionAddressForPlatform` does not guarantee that an extension function is actually supported by the platform. -The application must also make a corresponding query using flink:clGetPlatformInfo (platform, `CL_PLATFORM_EXTENSIONS`, ... ) or -flink:clGetDeviceInfo (device,`CL_DEVICE_EXTENSIONS`, ... ) to determine if an extension is supported by the OpenCL implementation. +The application must also make a corresponding query using flink:clGetPlatformInfo (platform, {CL_PLATFORM_EXTENSIONS}, ... ) or +flink:clGetDeviceInfo (device,{CL_DEVICE_EXTENSIONS}, ... ) to determine if an extension is supported by the OpenCL implementation. `clGetExtensionFunctionAddressForPlatform` may not be queried for core (non-extension) functions in OpenCL. For functions that are queryable with `clGetExtensionFunctionAddressForPlatform`, implementations may choose to also export those functions statically from the object libraries implementing those functions. diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 85d6ffcff..045646c4d 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -162,6 +162,66 @@ def GetFooter(): print('Found ' + str(numberOfEnums) + ' API enumerations.') + # Generate the API macro dictionaries: + + numberOfMacros = 0 + + for types in spec.findall('types'): + for type in types.findall('type'): + name = "" + category = type.get('category') + if category == 'define': + if type.text and type.text.startswith("#define"): + name = type.find('name').text + else: + continue + else: + continue + + #print('found macro: ' +name) + + # Create a variant of the name that precedes underscores with + # "zero width" spaces. This causes some long names to be + # broken at more intuitive places. + htmlName = name[:3] + name[3:].replace("_", "_") + otherName = name[:3] + name[3:].replace("_", "_​") + + # Example with link: + # + # // CL_MAKE_VERSION + #:CL_MAKE_VERSION_label: pass:q[`CL_MAKE_VERSION`] + #:CL_MAKE_VERSION: <> + #:CL_MAKE_VERSION_anchor: [[CL_MAKE_VERSION]]{CL_MAKE_VERSION} + linkFile.write('// ' + name + '\n') + linkFile.write('ifdef::backend-html5[]\n') + linkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write('ifndef::backend-html5[]\n') + linkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') + linkFile.write('endif::[]\n') + linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') + linkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') + linkFile.write('\n') + + # Example without link: + # + # // CL_MAKE_VERSION + #:CL_MAKE_VERSION: pass:q[`CL_MAKE_VERSION`] + #:CL_MAKE_VERSION_anchor: {CL_MAKE_VERSION} + nolinkFile.write('// ' + name + '\n') + nolinkFile.write('ifdef::backend-html5[]\n') + nolinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') + nolinkFile.write('endif::[]\n') + nolinkFile.write('ifndef::backend-html5[]\n') + nolinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') + nolinkFile.write('endif::[]\n') + nolinkFile.write(':' + name + '_anchor: {' + name + '}\n') + nolinkFile.write('\n') + + numberOfMacros = numberOfMacros + 1 + + print('Found ' + str(numberOfMacros) + ' API macros.') + # Generate the API types dictionaries: numberOfTypes = 0 @@ -177,6 +237,8 @@ def GetFooter(): addLink = True name = type.get('name') elif category == 'define': + if type.text and type.text.startswith("#define"): + continue name = type.find('name').text else: continue diff --git a/xml/cl.xml b/xml/cl.xml index b0c5cc2ed..63f3145ce 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -323,6 +323,20 @@ server's OpenCL/api-docs repository. cl_uint count char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL] + + #define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1) + #define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1) + #define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1) + + #define CL_VERSION_MAJOR(version) ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) + #define CL_VERSION_MINOR(version) (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK) + #define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK) + + #define CL_MAKE_VERSION(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK) << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) | \ + (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ + ((patch) & CL_VERSION_PATCH_MASK)) + #define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) #define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) #define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) @@ -335,6 +349,7 @@ server's OpenCL/api-docs repository. ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ ((patch) & CL_VERSION_PATCH_MASK_KHR)) + cl_bool signed_accelerated cl_bool unsigned_accelerated @@ -5348,6 +5363,15 @@ server's OpenCL/api-docs repository. + + + + + + + + + From a99730eb8eae012ab75f57df4240fb362d335e22 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 13:03:48 -0700 Subject: [PATCH 095/190] spec source for cl_khr_kernel_clock (#1103) * spec source for cl_khr_kernel_clock * updated after March 26th teleconference Clarified that this is a provisional extension Removed ext from feature names and feature test macros Added undefined behavior description to the SPIR-V environment spec * fix a few more places where the extension should be marked provisional * clarify in a few more places that this extension is provisional * remove provisional_notice.asciidoc, since it should not be used anymore --- OpenCL_API.txt | 2 +- OpenCL_C.txt | 82 +++++++++++++++++++++++++++++- api/appendix_e.asciidoc | 5 ++ api/cl_khr_kernel_clock.asciidoc | 62 ++++++++++++++++++++++ api/opencl_platform_layer.asciidoc | 37 ++++++++++++++ c/feature-dictionary.asciidoc | 24 +++++++++ env/extensions.asciidoc | 16 ++++++ ext/provisional_notice.asciidoc | 12 ----- ext/quick_reference.asciidoc | 4 ++ xml/cl.xml | 27 +++++++++- 10 files changed, 255 insertions(+), 16 deletions(-) create mode 100644 api/cl_khr_kernel_clock.asciidoc delete mode 100644 ext/provisional_notice.asciidoc diff --git a/OpenCL_API.txt b/OpenCL_API.txt index 2be2268ca..e7e67a578 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -39,7 +39,7 @@ include::config/version-local-links.asciidoc[] // Formatting and links for API functions and enums. include::api/dictionary.asciidoc[] -// Feature Dictionary - used by some extensions. +// Feature Dictionary. include::c/feature-dictionary.asciidoc[] // External Footnotes diff --git a/OpenCL_C.txt b/OpenCL_C.txt index dd372a8bb..0935f4fa7 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -224,14 +224,28 @@ ifdef::cl_khr_integer_dot_product[] (when the `<>` extension macro is defined) | The OpenCL C compiler supports built-in functions that perform dot -products on 4x8 bit packed integer vectors +products on 4x8 bit packed integer vectors. | {opencl_c_integer_dot_product_input_4x8bit} + (when the `<>` extension macro is defined) | The OpenCL C compiler supports built-in functions that perform dot -products on 4x8 bit integer vectors +products on 4x8 bit integer vectors. endif::cl_khr_integer_dot_product[] +ifdef::cl_khr_kernel_clock[] +| {opencl_c_kernel_clock_scope_device} +| The OpenCL C compiler supports built-in functions that sample the value from a +clock shared by all work-items executing on the device. + +| {opencl_c_kernel_clock_scope_work_group} +| The OpenCL C compiler supports built-in functions that sample the value from a +clock shared by all work-items executing in the same work-group. + +| {opencl_c_kernel_clock_scope_sub_group} +| The OpenCL C compiler supports built-in functions that sample the value from a +clock shared by all work-items executing in the same sub-group. +endif::cl_khr_kernel_clock[] + |==== In OpenCL C 3.0 or newer, feature macros must expand to the value `1` if the @@ -462,6 +476,16 @@ The extension provides new <> operating on these types. endif::cl_khr_integer_dot_product[] +ifdef::cl_khr_kernel_clock[] +[[cl_khr_kernel_clock,cl_khr_kernel_clock]] +==== Kernel Clock + +The `cl_khr_kernel_clock` extension adds support for SPIR-V instructions and +OpenCL C built-in functions to sample the value from one of three clocks +provided by compute units. The extension provides the following functions: + +* <> +endif::cl_khr_kernel_clock[] ifdef::cl_khr_local_int32_base_atomics[] [[cl_khr_local_int32_base_atomics,cl_khr_local_int32_base_atomics]] @@ -15306,6 +15330,60 @@ endif::cl_khr_subgroup_shuffle_relative[] |==== +ifdef::cl_khr_kernel_clock[] +[[kernel-clock-functions]] +=== Kernel Clock Functions + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The `clock_read_device` and `clock_read_hilo_device` functions require support +for the {opencl_c_kernel_clock_scope_device} feature. +The `clock_read_work_group` and `clock_read_hilo_work_group` functions require +support for the {opencl_c_kernel_clock_scope_work_group} feature. +The `clock_read_sub_group` and `clock_read_hilo_sub_group` functions require +support for the {opencl_c_kernel_clock_scope_sub_group} feature. + +This section describes OpenCL C built-in functions that sample the value from +one of three clocks provided by compute units. + +[[table-kernel-clock-functions]] +.Built-in Kernel Clock Functions +[cols="1a,1",options="header",] +|==== +| Function | Description + +|[source,opencl_c] +---- +ulong clock_read_device(); +ulong clock_read_work_group(); +ulong clock_read_sub_group(); +---- + | Returns a sampled value of a clock as seen by the compute unit. + + An idealized clock is an unbounded unsigned scalar integer tick count + increasing monotonically over time. A clock’s rate of progress may vary + within the lifetime of a work-item, may vary across different + executions of the program, and may be affected by conditions beyond the + control of the programmer. The sampled value read by this function consists of + the least significant bits of the idealized clock’s tick count at the time the + instruction was executed. In particular, an observer may see sampled values wrap + around zero. + +|[source,opencl_c] +---- +uint2 clock_read_hilo_device(); +uint2 clock_read_hilo_work_group(); +uint2 clock_read_hilo_sub_group(); +---- + | Performs the same operation as `clock_read`, but returns the value as a + `uint2` whose `.lo` component contains the 32 least significant bits of the + result and `.hi` component contains the 32 most significant bits of the + result. + +|==== + +endif::cl_khr_kernel_clock[] + [[opencl-numerical-compliance]] = OpenCL Numerical Compliance diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index c88b80933..ec6626c2a 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -598,3 +598,8 @@ Changes from *v3.0.14*: ** Restricted semaphores to a single associated device, see {khronos-opencl-pr}/996[#996]. * `<>`: ** Clarified that only rotating within a subgroup is supported, see {khronos-opencl-pr}/967[#967]. + +Changes from *v3.0.15*: + + * Added new extensions: + ** `<>` (provisional) diff --git a/api/cl_khr_kernel_clock.asciidoc b/api/cl_khr_kernel_clock.asciidoc new file mode 100644 index 000000000..7f4c4a0de --- /dev/null +++ b/api/cl_khr_kernel_clock.asciidoc @@ -0,0 +1,62 @@ +// Copyright 2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_kernel_clock.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-03-25 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kevin Petit, Arm Ltd. + + - Paul Fradgley, Imagination Technologies + + - Jeremy Kemp, Imagination Technologies + + - Ben Ashbaugh, Intel + + - Balaji Calidas, Qualcomm Technologies, Inc. + + - Ruihao Zhang, Qualcomm Technologies, Inc. + +=== Description + +`cl_khr_kernel_clock` adds the ability for a kernel to sample the value from one +of three clocks provided by compute units. + +OpenCL C compilers supporting this extension will define the extension macro +`cl_khr_kernel_clock`, and may define corresponding feature macros +{opencl_c_kernel_clock_scope_device}, +{opencl_c_kernel_clock_scope_work_group}, and +{opencl_c_kernel_clock_scope_sub_group} depending on the reported +capabilities. + +See the link:{OpenCLCSpecURL}#cl_khr_kernel_clock[Kernel Clock] section of the +OpenCL C specification for more information. + +=== Interactions With Other Extensions + +On devices that implement the `EMBEDDED` profile, the `cles_khr_int64` extension +is required for the `clock_read_device`, `clock_read_work_group` and +`clock_read_sub_group` functions to be present. + +Support for sub-groups is required for the `clock_read_sub_group` and +`clock_read_hilo_sub_group` functions to be present. + +// The 'New ...' section can be auto-generated + +=== New Types + + * {cl_device_kernel_clock_capabilities_khr_TYPE} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR} + * {cl_device_kernel_clock_capabilities_khr_TYPE} + ** {CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR} + ** {CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR} + ** {CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR} + +=== Version History + + * Revision 0.9.0, 2024-03-25 + ** First assigned version (provisional). diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 6211b1384..7c39cb51e 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1985,6 +1985,26 @@ include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATIO is missing before version 2.0 of the extension. endif::cl_khr_integer_dot_product[] +ifdef::cl_khr_kernel_clock[] +| {CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR.asciidoc[] + | {cl_device_kernel_clock_capabilities_khr_TYPE} + | Returns the kernel clock capabilities of the device. + + + {CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR_anchor} is set when kernels are + allowed to call the `clock_read_device` and `clock_read_hilo_device` + OpenCL-C functions. + + {CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR_anchor} is set when kernels + are allowed to call the `clock_read_work_group` and + `clock_read_hilo_work_group` OpenCL-C functions. + + {CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR_anchor} is set when kernels + are allowed to call the `clock_read_sub_group` and + `clock_read_hilo_sub_group` OpenCL-C functions. +endif::cl_khr_kernel_clock[] + ifdef::cl_khr_pci_bus_info[] | {CL_DEVICE_PCI_BUS_INFO_KHR_anchor} @@ -2080,6 +2100,23 @@ returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: |==== endif::cl_khr_integer_dot_product[] +ifdef::cl_khr_kernel_clock[] +OpenCL 3 devices must report the following feature macros via +{CL_DEVICE_OPENCL_C_FEATURES} when the corresponding bit is set in the bitfield +returned for {CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR}: + +[cols="1,1",options="header"] +|==== +| Feature Bit | Feature Macro +| {CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR} + | {opencl_c_kernel_clock_scope_device} +| {CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR} + | {opencl_c_kernel_clock_scope_work_group} +| {CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR} + | {opencl_c_kernel_clock_scope_sub_group} +|==== +endif::cl_khr_kernel_clock[] + ifdef::cl_khr_external_semaphore[] One of the two queries {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} and {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} must return a non-empty list diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index 4943b36b5..e8375eb57 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -145,3 +145,27 @@ endif::[] ifndef::backend-html5[] :opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit_​packed`] endif::[] + +// opencl_c_kernel_clock_scope_device +ifdef::backend-html5[] +:opencl_c_kernel_clock_scope_device: pass:q[`\__opencl_c_kernel_clock_scope_device`] +endif::[] +ifndef::backend-html5[] +:opencl_c_kernel_clock_scope_device: pass:q[`\__opencl_c_​kernel_​clock_​scope_​device`] +endif::[] + +// opencl_c_kernel_clock_scope_work_group +ifdef::backend-html5[] +:opencl_c_kernel_clock_scope_work_group: pass:q[`\__opencl_c_kernel_clock_scope_work_group`] +endif::[] +ifndef::backend-html5[] +:opencl_c_kernel_clock_scope_work_group: pass:q[`\__opencl_c_​kernel_​clock_​scope_​work_​group`] +endif::[] + +// opencl_c_kernel_clock_scope_sub_group +ifdef::backend-html5[] +:opencl_c_kernel_clock_scope_sub_group: pass:q[`\__opencl_c_kernel_clock_scope_sub_group`] +endif::[] +ifndef::backend-html5[] +:opencl_c_kernel_clock_scope_sub_group: pass:q[`\__opencl_c_​kernel_​clock_​scope_​sub_​group`] +endif::[] diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index 4ef4fd7a6..df0259550 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -379,6 +379,22 @@ Otherwise, for the *GroupUniformArithmeticKHR* scan and reduction instructions, ** *OpTypeInt* with _Width_ equal to `32` or `64` (equivalent to `int`, `uint`, `long`, and `ulong`) ** *OpTypeFloat* (equivalent to `half`, `float`, and `double`) +==== `cl_khr_kernel_clock` + +If the OpenCL environment supports the extension `cl_khr_kernel_clock`, then the environment must accept modules that declare use of the extension `SPV_KHR_shader_clock` via *OpExtension*. + +If the OpenCL environment supports the extension `cl_khr_kernel_clock` and use of the SPIR-V extension `SPV_KHR_shader_clock` is declared in the module via *OpExtension*, then the environment must accept modules that declare the following SPIR-V capability: + +* *ShaderClockKHR* + +For the *OpReadClockKHR* instruction requiring this capability, supported values for _Scope_ are: + +* *Device*, if `CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR` is supported +* *Workgroup*, if `CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR` is supported +* *Subgroup*, if `CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR` is supported + +For unsupported _Scope_ values, the behavior of *OpReadClockKHR* is undefined. + === Embedded Profile Extensions ==== `cles_khr_int64` diff --git a/ext/provisional_notice.asciidoc b/ext/provisional_notice.asciidoc deleted file mode 100644 index 0cc0eb0d0..000000000 --- a/ext/provisional_notice.asciidoc +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2023-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[NOTE] -==== -This is a provisional OpenCL extension specification that has been Ratified under the Khronos Intellectual Property Framework. -It is being made publicly available as a provisional extension to enable review and feedback from the community. -While it is a provisional extension features may be added, removed, or changed in non-backward compatible ways. - -If you have feedback please create an issue on: https://github.com/KhronosGroup/OpenCL-Docs/ -==== \ No newline at end of file diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 6fddf7120..194c6df9d 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -208,6 +208,10 @@ Language Specifications. | Integer dot product operations | Extension +| [[cl_khr_kernel_clock]] link:{APISpecURL}#cl_khr_kernel_clock[`cl_khr_kernel_clock`] +| Sample Clock Values Within a Kernel +| Extension + | [[cl_khr_mipmap_image]] link:{APISpecURL}#cl_khr_mipmap_image[`cl_khr_mipmap_image`] | Create and Use Images with Mipmaps | Extension diff --git a/xml/cl.xml b/xml/cl.xml index 63f3145ce..6f1ae87bd 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -254,6 +254,7 @@ server's OpenCL/api-docs repository. typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; typedef cl_bitfield cl_mutable_dispatch_asserts_khr + typedef cl_bitfield cl_device_kernel_clock_capabilities_khr; Structure types @@ -1386,6 +1387,13 @@ server's OpenCL/api-docs repository. + + + + + + + In order to synchronize vendor IDs across Khronos APIs, Vulkan's vk.xml @@ -1545,7 +1553,8 @@ server's OpenCL/api-docs repository. - + + @@ -7477,5 +7486,21 @@ server's OpenCL/api-docs repository. + + + + + + + + + + + + + + + + From 94756c1a142b978adbe18a4f7e3aad0e167bb3f9 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 17:29:40 -0700 Subject: [PATCH 096/190] generate and use an extension dictionary (#1131) * generate and use an extension dictionary * change cl_khr_kernel_clock to use asciidoctor attributes --- OpenCL_C.txt | 332 +++++++++--------- api/appendix_c.asciidoc | 20 +- api/appendix_e.asciidoc | 118 +++---- api/appendix_h.asciidoc | 6 +- api/cl_khr_3d_image_writes.asciidoc | 2 +- ...l_khr_async_work_group_copy_fence.asciidoc | 2 +- api/cl_khr_byte_addressable_store.asciidoc | 2 +- api/cl_khr_command_buffer.asciidoc | 34 +- ...l_khr_command_buffer_multi_device.asciidoc | 6 +- ...r_command_buffer_mutable_dispatch.asciidoc | 10 +- api/cl_khr_create_command_queue.asciidoc | 2 +- api/cl_khr_d3d10_sharing.asciidoc | 2 +- api/cl_khr_d3d11_sharing.asciidoc | 2 +- api/cl_khr_depth_images.asciidoc | 2 +- ...hr_device_enqueue_local_arg_types.asciidoc | 2 +- api/cl_khr_device_uuid.asciidoc | 2 +- api/cl_khr_dx9_media_sharing.asciidoc | 2 +- api/cl_khr_egl_event.asciidoc | 4 +- api/cl_khr_egl_image.asciidoc | 4 +- api/cl_khr_expect_assume.asciidoc | 2 +- api/cl_khr_extended_async_copies.asciidoc | 2 +- api/cl_khr_extended_bit_ops.asciidoc | 2 +- api/cl_khr_extended_versioning.asciidoc | 4 +- api/cl_khr_external_memory.asciidoc | 6 +- api/cl_khr_external_memory_dma_buf.asciidoc | 4 +- api/cl_khr_external_memory_dx.asciidoc | 4 +- api/cl_khr_external_memory_opaque_fd.asciidoc | 4 +- api/cl_khr_external_memory_win32.asciidoc | 4 +- api/cl_khr_external_semaphore.asciidoc | 16 +- ...l_khr_external_semaphore_dx_fence.asciidoc | 4 +- ..._khr_external_semaphore_opaque_fd.asciidoc | 4 +- ...cl_khr_external_semaphore_sync_fd.asciidoc | 6 +- api/cl_khr_external_semaphore_win32.asciidoc | 4 +- api/cl_khr_fp16.asciidoc | 2 +- api/cl_khr_fp64.asciidoc | 2 +- api/cl_khr_gl_depth_images.asciidoc | 4 +- api/cl_khr_gl_event.asciidoc | 2 +- api/cl_khr_gl_msaa_sharing.asciidoc | 4 +- api/cl_khr_gl_sharing.asciidoc | 4 +- api/cl_khr_global_int32_base_atomics.asciidoc | 2 +- ...khr_global_int32_extended_atomics.asciidoc | 2 +- api/cl_khr_icd.asciidoc | 4 +- api/cl_khr_il_program.asciidoc | 2 +- api/cl_khr_image2d_from_buffer.asciidoc | 2 +- api/cl_khr_initialize_memory.asciidoc | 2 +- api/cl_khr_int64_base_atomics.asciidoc | 2 +- api/cl_khr_int64_extended_atomics.asciidoc | 2 +- api/cl_khr_integer_dot_product.asciidoc | 4 +- api/cl_khr_local_int32_base_atomics.asciidoc | 2 +- ..._khr_local_int32_extended_atomics.asciidoc | 2 +- api/cl_khr_mipmap_image.asciidoc | 2 +- api/cl_khr_mipmap_image_writes.asciidoc | 6 +- api/cl_khr_pci_bus_info.asciidoc | 2 +- api/cl_khr_priority_hints.asciidoc | 4 +- api/cl_khr_select_fprounding_mode.asciidoc | 2 +- api/cl_khr_semaphore.asciidoc | 2 +- api/cl_khr_spir.asciidoc | 6 +- api/cl_khr_srgb_image_writes.asciidoc | 2 +- api/cl_khr_subgroup_ballot.asciidoc | 2 +- api/cl_khr_subgroup_clustered_reduce.asciidoc | 2 +- api/cl_khr_subgroup_extended_types.asciidoc | 2 +- api/cl_khr_subgroup_named_barrier.asciidoc | 2 +- ...r_subgroup_non_uniform_arithmetic.asciidoc | 2 +- api/cl_khr_subgroup_non_uniform_vote.asciidoc | 2 +- api/cl_khr_subgroup_rotate.asciidoc | 2 +- api/cl_khr_subgroup_shuffle.asciidoc | 2 +- api/cl_khr_subgroup_shuffle_relative.asciidoc | 2 +- api/cl_khr_subgroups.asciidoc | 2 +- api/cl_khr_suggested_local_work_size.asciidoc | 2 +- api/cl_khr_terminate_context.asciidoc | 2 +- api/cl_khr_throttle_hints.asciidoc | 4 +- ...khr_work_group_uniform_arithmetic.asciidoc | 2 +- api/dictionary.asciidoc | 1 + api/footnotes.asciidoc | 2 +- api/opencl_architecture.asciidoc | 6 +- api/opencl_platform_layer.asciidoc | 62 ++-- api/opencl_runtime_layer.asciidoc | 252 ++++++------- c/dictionary.asciidoc | 1 + c/footnotes.asciidoc | 14 +- env/appendix_a.asciidoc | 20 +- env/common_properties.asciidoc | 4 +- env/dictionary.asciidoc | 1 + env/extensions.asciidoc | 130 +++---- ext/deprecated_extensions.asciidoc | 2 +- ext/dictionary.asciidoc | 1 + ext/introduction.asciidoc | 4 +- ext/quick_reference.asciidoc | 154 ++++---- ext/to_core_features.asciidoc | 66 ++-- scripts/clconventions.py | 1 + scripts/gen_dictionaries.py | 272 +++++++++----- 90 files changed, 909 insertions(+), 804 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 0935f4fa7..79a27f8f6 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -221,13 +221,13 @@ operations across a work-group. ifdef::cl_khr_integer_dot_product[] | {opencl_c_integer_dot_product_input_4x8bit_packed} + - (when the `<>` extension macro is defined) + (when the {cl_khr_integer_dot_product_EXT} extension macro is defined) | The OpenCL C compiler supports built-in functions that perform dot products on 4x8 bit packed integer vectors. | {opencl_c_integer_dot_product_input_4x8bit} + - (when the `<>` extension macro is defined) + (when the {cl_khr_integer_dot_product_EXT} extension macro is defined) | The OpenCL C compiler supports built-in functions that perform dot products on 4x8 bit integer vectors. endif::cl_khr_integer_dot_product[] @@ -289,7 +289,7 @@ ifdef::cl_khr_3d_image_writes[] [[cl_khr_3d_image_writes,cl_khr_3d_image_writes]] ==== 3D Image Writes -The `cl_khr_3d_image_writes` extension was promoted to OpenCL 2.0, and to +The {cl_khr_3d_image_writes_EXT} extension was promoted to OpenCL 2.0, and to OpenCL 3.0 as the {opencl_c_3d_image_writes} feature. The extension adds <> that allow a kernel to write to 3D image objects in addition to @@ -301,7 +301,7 @@ ifdef::cl_khr_async_work_group_copy_fence[] [[cl_khr_async_work_group_copy_fence,cl_khr_async_work_group_copy_fence]] ==== Async Work-group Copy Fence -The `cl_khr_async_work_group_copy_fence` extension supports establishing a +The {cl_khr_async_work_group_copy_fence_EXT} extension supports establishing a memory synchronization ordering of asynchronous copies. The extension provides the `async_work_group_copy_fence` function, as described in the <> on pointers to `char`, `uchar`, `char2`, `uchar2`, `short`, `ushort` and `half`, allowing applications to read from and write to pointers to these types. @@ -324,7 +324,7 @@ ifdef::cl_khr_depth_images[] [[cl_khr_depth_images,cl_khr_depth_images]] ==== Depth Images -The `cl_khr_depth_images` extension was promoted to OpenCL 2.0. +The {cl_khr_depth_images_EXT} extension was promoted to OpenCL 2.0. The extension provides new <>, as well as <>, <>, @@ -337,7 +337,7 @@ ifdef::cl_khr_device_enqueue_local_arg_types[] [[cl_khr_device_enqueue_local_arg_types,cl_khr_device_enqueue_local_arg_types]] ==== Device Enqueue Local Argument Types -The `cl_khr_device_enqueue_local_arg_types` extension allows arguments to +The {cl_khr_device_enqueue_local_arg_types_EXT} extension allows arguments to blocks that are passed to the <> and to the <> to be pointers to any type (built-in or @@ -350,7 +350,7 @@ ifdef::cl_khr_extended_async_copies[] [[cl_khr_extended_async_copies,cl_khr_extended_async_copies]] ==== Extended Async Copy Functions -The `cl_khr_extended_async_copies` extension provides additional +The {cl_khr_extended_async_copies_EXT} extension provides additional <> which interpret the source and destination as 2D or 3D images. endif::cl_khr_extended_async_copies[] @@ -360,7 +360,7 @@ ifdef::cl_khr_extended_bit_ops[] [[cl_khr_extended_bit_ops,cl_khr_extended_bit_ops]] ==== Extended Bit Operations -The `cl_khr_extended_bit_ops` extension provides additional +The {cl_khr_extended_bit_ops_EXT} extension provides additional <> including bitfield insert, bitfield extract, and bit reverse. endif::cl_khr_extended_bit_ops[] @@ -370,8 +370,8 @@ ifdef::cl_khr_fp16[] [[cl_khr_fp16,cl_khr_fp16]] ==== Half-Precision Floating-Point -The `cl_khr_fp16` extension was promoted to OpenCL C 1.2 as an optional -feature, and to OpenCL 3.0 as the optional `<>` feature. +The {cl_khr_fp16_EXT} extension was promoted to OpenCL C 1.2 as an optional +feature, and to OpenCL 3.0 as the optional {cl_khr_fp16_EXT} feature. The extension provides 16-bit precision scalar and vector floating-point data types and extends many functions to accept these types. endif::cl_khr_fp16[] @@ -381,8 +381,8 @@ ifdef::cl_khr_fp64[] [[cl_khr_fp64,cl_khr_fp64]] ==== Double-Precision Floating-Point -The `cl_khr_fp64` extension was promoted to OpenCL C 1.2 as an optional -feature, and to OpenCL 3.0 as the optional `<>` feature. +The {cl_khr_fp64_EXT} extension was promoted to OpenCL C 1.2 as an optional +feature, and to OpenCL 3.0 as the optional {cl_khr_fp64_EXT} feature. The extension provides double-precision scalar and vector floating-point data types and extends many functions to accept these types. endif::cl_khr_fp64[] @@ -392,7 +392,7 @@ ifdef::cl_khr_gl_msaa_sharing[] [[cl_khr_gl_msaa_sharing,cl_khr_gl_msaa_sharing]] ==== Multi-Sample Shared OpenCL/OpenGL Images -The `cl_khr_gl_msaa_sharing` extension adds support for multi-sample images +The {cl_khr_gl_msaa_sharing_EXT} extension adds support for multi-sample images shared with OpenGL multi-sample textures. The extension provides new <>, as well as <> table. @@ -458,7 +458,7 @@ ifdef::cl_khr_int64_extended_atomics[] [[cl_khr_int64_extended_atomics,cl_khr_int64_extended_atomics]] ==== 64-Bit Extended Atomics -The `cl_khr_int64_extended_atomics` extension provides extended atomic functions for +The {cl_khr_int64_extended_atomics_EXT} extension provides extended atomic functions for {global} and {local} 64-bit signed and unsigned integer variables, as described in the <> table. @@ -469,7 +469,7 @@ ifdef::cl_khr_integer_dot_product[] [[cl_khr_integer_dot_product,cl_khr_integer_dot_product]] ==== Integer Dot Product -The `cl_khr_integer_dot_product` extension adds support for SPIR-V +The {cl_khr_integer_dot_product_EXT} extension adds support for SPIR-V instructions and OpenCL C built-in functions to compute the dot product of vectors of integers. The extension provides new <> and <> functions operating on these images. @@ -530,8 +530,8 @@ ifdef::cl_khr_mipmap_image_writes[] [[cl_khr_mipmap_image_writes,cl_khr_mipmap_image_writes]] ==== Mipmapped Image Writes -The `cl_khr_mipmap_image_writes` extension adds support for writing to -mipmap images, and requires support for the `<>` +The {cl_khr_mipmap_image_writes_EXT} extension adds support for writing to +mipmap images, and requires support for the {cl_khr_mipmap_image_EXT} extension macro. The extension provides built-in <> functions operating on these images. @@ -542,7 +542,7 @@ ifdef::cl_khr_select_fprounding_mode[] [[cl_khr_select_fprounding_mode,cl_khr_select_fprounding_mode]] ==== Select Floating-Point Rounding Mode -The `cl_khr_select_fprounding_mode` extension allows <> for an instruction or group of instructions in the program source by use of a *#pragma*. @@ -554,7 +554,7 @@ ifdef::cl_khr_srgb_image_writes[] [[cl_khr_srgb_image_writes,cl_khr_srgb_image_writes]] ==== sRGB Image Write Functions -The `cl_khr_srgb_image_writes` extension adds support for writing to sRGB +The {cl_khr_srgb_image_writes_EXT} extension adds support for writing to sRGB images using the <> functions. Color space conversion is performed by the function. endif::cl_khr_srgb_image_writes[] @@ -564,7 +564,7 @@ ifdef::cl_khr_subgroups[] [[cl_khr_subgroups,cl_khr_subgroups]] ==== Sub-Groups -The `cl_khr_subgroups` extension was promoted to OpenCL C 2.1 as the +The {cl_khr_subgroups_EXT} extension was promoted to OpenCL C 2.1 as the {opencl_c_subgroups} feature. The extension provides the following functions: @@ -586,7 +586,7 @@ ifdef::cl_khr_subgroup_ballot[] [[cl_khr_subgroup_ballot,cl_khr_subgroup_ballot]] ==== Sub-Group Ballots -The `cl_khr_subgroup_ballot` extension adds the ability to collect and +The {cl_khr_subgroup_ballot_EXT} extension adds the ability to collect and operate on ballots from work items in a sub-group. The extension provides the following functions: @@ -599,7 +599,7 @@ ifdef::cl_khr_subgroup_clustered_reduce[] [[cl_khr_subgroup_clustered_reduce,cl_khr_subgroup_clustered_reduce]] ==== Sub-Group Clustered Reductions -The `cl_khr_subgroup_clustered_reduce` extension adds support for clustered +The {cl_khr_subgroup_clustered_reduce_EXT} extension adds support for clustered reductions that operate on a subset of work items in the sub-group. The extension provides the following functions: @@ -617,7 +617,7 @@ ifdef::cl_khr_subgroup_extended_types[] [[cl_khr_subgroup_extended_types,cl_khr_subgroup_extended_types]] ==== Sub-Group Extended Types -The `cl_khr_subgroup_extended_types` extension adds <> to the existing <>. @@ -629,7 +629,7 @@ ifdef::cl_khr_subgroup_non_uniform_arithmetic[] [[cl_khr_subgroup_non_uniform_arithmetic,cl_khr_subgroup_non_uniform_arithmetic]] ==== Sub-Group Non-Uniform Arithmetic -The `cl_khr_subgroup_non_uniform_arithmetic` extension adds the ability to +The {cl_khr_subgroup_non_uniform_arithmetic_EXT} extension adds the ability to use some sub-group functions within non-uniform flow control, including additional scan and reduction operators. @@ -649,7 +649,7 @@ ifdef::cl_khr_subgroup_non_uniform_vote[] [[cl_khr_subgroup_non_uniform_vote,cl_khr_subgroup_non_uniform_vote]] ==== Sub-Group Non-Uniform Vote and Election Functions -The `cl_khr_subgroup_non_uniform_vote` extension adds the ability to elect a +The {cl_khr_subgroup_non_uniform_vote_EXT} extension adds the ability to elect a single work item from a sub-group to perform a task and to hold votes among work items in a sub-group. @@ -665,7 +665,7 @@ ifdef::cl_khr_subgroup_rotate[] [[cl_khr_subgroup_rotate,cl_khr_subgroup_rotate]] ==== Sub-Group Rotation -The `cl_khr_subgroup_rotate` extension adds support for a new sub-group data +The {cl_khr_subgroup_rotate_EXT} extension adds support for a new sub-group data exchange operation that makes it possible to rotate values through the work items in a sub-group. @@ -680,7 +680,7 @@ ifdef::cl_khr_subgroup_shuffle[] [[cl_khr_subgroup_shuffle,cl_khr_subgroup_shuffle]] ==== Sub-Group General Purpose Shuffles -The `cl_khr_subgroup_shuffle` extension adds additional ways to exchange +The {cl_khr_subgroup_shuffle_EXT} extension adds additional ways to exchange data among work items in a sub-group. The extension provides the following functions: @@ -694,7 +694,7 @@ ifdef::cl_khr_subgroup_shuffle_relative[] [[cl_khr_subgroup_shuffle_relative,cl_khr_subgroup_shuffle_relative]] ==== Sub-Group Relative Shuffles -The `cl_khr_subgroup_shuffle_relative` extension adds specialized ways to +The {cl_khr_subgroup_shuffle_relative_EXT} extension adds specialized ways to exchange data among work items in a sub-group that may perform better on some implementations. @@ -710,7 +710,7 @@ ifdef::cl_khr_work_group_uniform_arithmetic[] [[cl_khr_work_group_uniform_arithmetic,cl_khr_work_group_uniform_arithmetic]] ==== Work-group Collective Uniform Arithmetic Functions -The `cl_khr_work_group_uniform_arithmetic` extension adds additional +The {cl_khr_work_group_uniform_arithmetic_EXT} extension adds additional work-group collective functions, including work-group scans and reductions for the following operators: @@ -837,7 +837,7 @@ application: ==== Double-Precision Floating-Point Support Double-precision floating-point is supported if -ifdef::cl_khr_fp64[the `<>` extension macro is supported, or if] +ifdef::cl_khr_fp64[the {cl_khr_fp64_EXT} extension macro is supported, or if] OpenCL 1.2 or newer is supported. In OpenCL 3.0, it also requires support for the {opencl_c_fp64} feature, @@ -1033,12 +1033,12 @@ OpenCL. | A 2D depth image. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | `image2d_array_depth_t` footnote:image-functions[] | A 2D depth image array. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | `sampler_t` footnote:image-functions[] | A sampler type. | `queue_t` @@ -1088,7 +1088,7 @@ ifdef::cl_khr_gl_msaa_sharing[] of the built-in functions that use this type. <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. | `image2d_array_msaa_t` | A 2D multi-sample color image array. Refer to the <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. | `image2d_msaa_depth_t` | A 2D multi-sample depth image. Refer to the <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. | `image2d_array_msaa_depth_t` | A 2D multi-sample depth image array. Refer to the <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. endif::cl_khr_gl_msaa_sharing[] |==== @@ -3887,7 +3887,7 @@ address space qualifiers. . The following restriction only applies to ifndef::cl_khr_byte_addressable_store[OpenCL C 1.0: +] ifdef::cl_khr_byte_addressable_store[] - OpenCL C 1.0, and only if the `<>` + OpenCL C 1.0, and only if the {cl_khr_byte_addressable_store_EXT} extension macro is not supported: + endif::cl_khr_byte_addressable_store[] Built-in types that are less than 32-bits in size, i.e. @@ -3953,7 +3953,7 @@ supported with `{global}` address space qualifier. -- ifdef::cl_khr_initialize_memory[] . [[restrictions-initialize-memory]] The following restriction only - applies if the `<>` extension is supported: + + applies if the {cl_khr_initialize_memory_EXT} extension is supported: + If the context is created with {CL_CONTEXT_MEMORY_INITIALIZE_KHR}, appropriate memory locations as specified by the bit-field are initialized with zeroes, prior to the start of execution of any kernel. @@ -4051,7 +4051,7 @@ The following predefined macro names are available. Used to determine the current rounding mode and is set to rte. Only affects the rounding mode of conversions to a float type. <> OpenCL C 1.1, along with the - `<>` extension. + {cl_khr_select_fprounding_mode_EXT} extension. `+__ENDIAN_LITTLE__+` :: Used to determine if the OpenCL device is a little endian architecture @@ -5065,7 +5065,7 @@ identifier of each work-item when this kernel is being executed on a device. NOTE: The functionality described in the following table <> support for -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. The following table describes the list of built-in work-item functions that @@ -5181,7 +5181,7 @@ that the function can take any of as the type for the arguments. NOTE: All functions taking or returning `half` types are supported only when -the `<>` extension macro is supported. +the {cl_khr_fp16_EXT} extension macro is supported. endif::cl_khr_fp16[] For any specific use of a function with `gentype*` arguments the actual type @@ -5944,7 +5944,7 @@ double type. |==== ifdef::cl_khr_fp16[] -If the `<>` extension macro is supported, then the following +If the {cl_khr_fp16_EXT} extension macro is supported, then the following macros and constants are also available: The `FP_FAST_FMA_HALF` macro indicates whether the *fma*() family of @@ -6224,7 +6224,7 @@ ifdef::cl_khr_extended_bit_ops[] [open,refpage='extendedBitOperations',desc='Extended Bit Operations',type='freeform',spec='clang',anchor='extended-bit-operations',xrefs='commonFunctions',alias='bitfield_insert bitfield_extract_signed bitfield_extract_unsigned bit_reverse'] -- -If the `<>` extension macro is supported, the +If the {cl_khr_extended_bit_ops_EXT} extension macro is supported, the functions described in the <> table can be used with built-in scalar or vector integer types to perform extended bit operations. @@ -6274,7 +6274,7 @@ gentype bitfield_insert( vector types), the result is undefined. <> support for the - `<>` extension macro. + {cl_khr_extended_bit_ops_EXT} extension macro. a| [source,opencl_c] ---- @@ -6299,7 +6299,7 @@ igentype bitfield_extract_signed( `gentype` (for vector types), the result is undefined. <> support for the - `<>` extension macro. + {cl_khr_extended_bit_ops_EXT} extension macro. a| [source,opencl_c] ---- @@ -6323,7 +6323,7 @@ ugentype bitfield_extract_unsigned( `gentype` (for vector types), the result is undefined. <> support for the - `<>` extension macro. + {cl_khr_extended_bit_ops_EXT} extension macro. a| [source,opencl_c] ---- @@ -6338,7 +6338,7 @@ gentype bit_reverse( vector types). <> support for the - `<>` extension macro. + {cl_khr_extended_bit_ops_EXT} extension macro. |=== -- endif::cl_khr_extended_bit_ops[] @@ -6442,7 +6442,7 @@ that the function can take any of as the type for the arguments. NOTE: All functions taking or returning `half` types are supported only when -the `<>` extension macro is supported. +the {cl_khr_fp16_EXT} extension macro is supported. endif::cl_khr_fp16[] [[table-builtin-common]] @@ -6569,7 +6569,7 @@ that the function can take any of as the type for the arguments. NOTE: All functions taking or returning `half` types are supported only when -the `<>` extension macro is supported. +the {cl_khr_fp16_EXT} extension macro is supported. endif::cl_khr_fp16[] For any specific use of a function with `gentype*` arguments the actual type @@ -6903,7 +6903,7 @@ ifdef::cl_khr_fp16[] NOTE: All functions taking or returning `half` types are supported only when -the `<>` extension macro is supported. +the {cl_khr_fp16_EXT} extension macro is supported. endif::cl_khr_fp16[] as the type for the arguments. @@ -7317,7 +7317,7 @@ in a work-group. NOTE: The functionality described in the following table <> support for -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL 3.0 or newer and the {opencl_c_subgroups} feature. The following table describes built-in functions to synchronize the work-items @@ -7525,7 +7525,7 @@ ifdef::cl_khr_fp16[] * `half` footnote:[{fn-half-supported}] or `half__n__` NOTE: All functions taking or returning `half` types are supported only when -the `<>` extension macro is supported. +the {cl_khr_fp16_EXT} extension macro is supported. endif::cl_khr_fp16[] as the type for the arguments unless otherwise stated. @@ -7635,7 +7635,7 @@ void async_work_group_copy_fence( non-uniform work-groups. <> support for the - `<>` extension macro. + {cl_khr_async_work_group_copy_fence_EXT} extension macro. endif::cl_khr_async_work_group_copy_fence[] |==== @@ -7654,7 +7654,7 @@ ifdef::cl_khr_extended_async_copies[] [open,refpage='extendedAsyncCopyFunctions',desc='Extended Async Copy Functions',type='freeform',spec='clang',anchor='extended-async-copies',xrefs='',alias='async_work_group_copy_2D2D async_work_group_copy_3D3D'] -- -If the `<>` extension macro is supported, +If the {cl_khr_extended_async_copies_EXT} extension macro is supported, additional <> are provided which interpret the source and destination as 2D or 3D data. @@ -8053,7 +8053,7 @@ The following table lists the enumeration constants: <> support for OpenCL C 2.0 or newer. | `memory_scope_sub_group` | <> support for -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. | `memory_scope_work_group` | <> support for OpenCL C 2.0 or newer. @@ -9168,7 +9168,7 @@ Extensions>> table below. |==== | Extension Macro | Supported Functions ifdef::cl_khr_global_int32_base_atomics[] -| `<>` +| {cl_khr_global_int32_base_atomics_EXT} | **atom_add** + **atom_sub** + **atom_xchg** + @@ -9178,7 +9178,7 @@ ifdef::cl_khr_global_int32_base_atomics[] (with {global} parameters) endif::cl_khr_global_int32_base_atomics[] ifdef::cl_khr_global_int32_extended_atomics[] -| `<>` +| {cl_khr_global_int32_extended_atomics_EXT} | **atom_min** + **atom_max** + **atom_and** + @@ -9187,7 +9187,7 @@ ifdef::cl_khr_global_int32_extended_atomics[] (with {global} parameters) endif::cl_khr_global_int32_extended_atomics[] ifdef::cl_khr_local_int32_base_atomics[] -| `<>` +| {cl_khr_local_int32_base_atomics_EXT} | **atom_add** + **atom_sub** + **atom_xchg** + @@ -9197,7 +9197,7 @@ ifdef::cl_khr_local_int32_base_atomics[] (with {local} parameters) endif::cl_khr_local_int32_base_atomics[] ifdef::cl_khr_local_int32_extended_atomics[] -| `<>` +| {cl_khr_local_int32_extended_atomics_EXT} | **atom_min** + **atom_max** + **atom_and** + @@ -9217,7 +9217,7 @@ Similar to the <>, atomic functions operating on 64-bit integers are provided by extensions. ifdef::cl_khr_int64_base_atomics[] -If the `<>` extension macro is supported, it +If the {cl_khr_int64_base_atomics_EXT} extension macro is supported, it provides the functions described in the <> table below. @@ -9285,7 +9285,7 @@ provides the functions described in the <>` extension macro is supported, it +If the {cl_khr_int64_extended_atomics_EXT} extension macro is supported, it provides the functions described in the <> table below. @@ -9389,7 +9389,7 @@ endif::cl_khr_int64_base_atomics,cl_khr_int64_extended_atomics[] newer and the {opencl_c_atomic_order_seq_cst} feature. * Using `memory_scope_sub_group` with any built-in atomic function <> support for -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. * Using `memory_scope_device` <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the @@ -10152,7 +10152,7 @@ The following built-in function calls to read images with a sampler are supported footnote:[{fn-read-image-with-sampler}]. ifdef::cl_khr_mipmap_image[] -If the `<>` extension macro is supported, read +If the {cl_khr_mipmap_image_EXT} extension macro is supported, read functions which do not either * explicitly specify a level of detail _lod_, or @@ -10225,7 +10225,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -10335,7 +10335,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -10444,7 +10444,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -10554,7 +10554,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -10666,7 +10666,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -10742,7 +10742,7 @@ endif::cl_khr_fp16[] above are undefined. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | | | float *read_imagef*(read_only image2d_array_depth_t _image_, sampler_t _sampler_, int4 _coord_) + @@ -10769,7 +10769,7 @@ endif::cl_khr_fp16[] above are undefined. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | | ifdef::cl_khr_mipmap_image[] @@ -10803,7 +10803,7 @@ float read_imagef( | Use the coordinate _coord.xy_ to do an element lookup in the mip level specified by _lod_ in the 2D image object specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -10840,7 +10840,7 @@ float read_imagef( an element lookup in the mip level specified by the computed lod in the 2D image object specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -10866,7 +10866,7 @@ uint4 read_imageui( | Use the coordinate _coord_ to do an element lookup in the mip level specified by _lod_ in the 1D image object specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -10896,7 +10896,7 @@ uint4 read_imageui( element lookup in the mip level specified by the computed lod in the 1D image object specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -10922,7 +10922,7 @@ uint4 read_imageui( | Use the coordinate _coord.xyz_ to do an element lookup in the mip level specified by _lod_ in the 3D image object specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -10952,7 +10952,7 @@ uint4 read_imageui( an element lookup in the mip level specified by the computed lod in the 3D image object specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -10979,7 +10979,7 @@ uint4 read_imageui( identified by _coord.x_ and mip level specified by _lod_ in the 1D image array specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -11009,7 +11009,7 @@ uint4 read_imageui( element lookup in the mip level specified by the computed lod in the 1D image array specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -11042,7 +11042,7 @@ float read_imagef( identified by _coord.z_ and mip level specified by _lod_ in the 2D image array specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. a| [source,opencl_c] @@ -11080,7 +11080,7 @@ float read_imagef( level specified by the computed lod in the 2D image array specified by _image_. - <> support for the `<>` + <> support for the {cl_khr_mipmap_image_EXT} extension macro. endif::cl_khr_mipmap_image[] @@ -11088,7 +11088,7 @@ endif::cl_khr_mipmap_image[] -- ifdef::cl_khr_mipmap_image[] -NOTE: If the `<>` extension macro is supported, +NOTE: If the {cl_khr_mipmap_image_EXT} extension macro is supported, {CL_SAMPLER_NORMALIZED_COORDS} must be {CL_TRUE} for built-in functions described in the table above that read from a mipmapped image; otherwise behavior is undefined. @@ -11169,7 +11169,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -11245,7 +11245,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -11320,7 +11320,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -11396,7 +11396,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -11472,7 +11472,7 @@ ifdef::cl_khr_fp16[] _image_channel_data_type_ values not specified in the description above are undefined. - <> support for the `<>` extension + <> support for the {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | @@ -11521,7 +11521,7 @@ endif::cl_khr_fp16[] above are undefined. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | | | float *read_imagef*(_aQual_ image2d_array_depth_t _image_, int4 _coord_) | Use _coord.xy_ to do an element lookup in the 2D image identified by @@ -11539,7 +11539,7 @@ endif::cl_khr_fp16[] above are undefined. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | | ifdef::cl_khr_gl_msaa_sharing[] @@ -11571,7 +11571,7 @@ float4 read_imagef( above are undefined. <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. a| [source,opencl_c] ---- @@ -11613,7 +11613,7 @@ uint4 read_imageui( values returned by *read_imageui* are undefined. <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. a| [source,opencl_c] ---- @@ -11642,7 +11642,7 @@ float4 read_imagef( above are undefined. <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. a| [source,opencl_c] ---- @@ -11684,7 +11684,7 @@ uint4 read_imageui( values returned by *read_imageui* are undefined. <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. a| [source,opencl_c] ---- @@ -11708,7 +11708,7 @@ float read_imagef( above are undefined. <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. a| [source,c] ---- @@ -11747,7 +11747,7 @@ float read_imagef(image2d_array_msaaa_depth_t image, image is undefined <> support for the - `<>` extension macro. + {cl_khr_gl_msaa_sharing_EXT} extension macro. endif::cl_khr_gl_msaa_sharing[] |==== -- @@ -11764,7 +11764,7 @@ _aQual_ in the following table refers to one of the access qualifiers. For write functions this may be `write_only` or `read_write`. ifdef::cl_khr_mipmap_image_writes[] -If the `<>` extension macro is supported, write +If the {cl_khr_mipmap_image_writes_EXT} extension macro is supported, write functions which do not explicitly specify a level of detail _lod_ write to mip level 0 if _image_ is a mipmapped image. _mipwidth_, _mipheight_, and _mipdepth_ in the table refer to the width, @@ -11774,7 +11774,7 @@ refers to the number of mip levels in _image_. endif::cl_khr_mipmap_image_writes[] ifdef::cl_khr_srgb_image_writes[] -If the `<>` extension macro is supported, the +If the {cl_khr_srgb_image_writes_EXT} extension macro is supported, the *write_imagef* functions described below may write to sRGB images. Linear to sRGB conversion is performed by the function. Only the R, G, and B components are converted from linear to sRGB; the A @@ -11847,7 +11847,7 @@ ifdef::cl_khr_fp16[*write_imageh*,] ifdef::cl_khr_fp16[] *write_imageh* <> support for the - `<>` extension macro. + {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | | void *write_imagef*( + @@ -11912,7 +11912,7 @@ ifdef::cl_khr_fp16[*write_imageh*,] ifdef::cl_khr_fp16[] *write_imageh* <> support for the - `<>` extension macro. + {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | | void *write_imagef*( + @@ -11993,7 +11993,7 @@ ifdef::cl_khr_fp16[*write_imageh*,] ifdef::cl_khr_fp16[] *write_imageh* <> support for the - `<>` extension macro. + {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] | | | void *write_imagef*( + @@ -12082,7 +12082,7 @@ ifdef::cl_khr_fp16[*write_imageh*,] height-1], respectively, is undefined. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | | | void *write_imagef*( + _aQual_ image2d_array_depth_t _image_, + @@ -12111,7 +12111,7 @@ ifdef::cl_khr_fp16[*write_imageh*,] height-1], [0, image number of layers-1], respectively, is undefined. <> support for OpenCL C 2.0 or newer, or for - the `<>` extension macro. + the {cl_khr_depth_images_EXT} extension macro. | | | void *write_imagef*( + _aQual_ image3d_t _image_, + @@ -12173,11 +12173,11 @@ ifdef::cl_khr_fp16[*write_imageh*,] <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_3d_image_writes} feature, or the - `<>` extension. + {cl_khr_3d_image_writes_EXT} extension. ifdef::cl_khr_fp16[] *write_imageh* <> support for the - `<>` extension macro. + {cl_khr_fp16_EXT} extension macro. endif::cl_khr_fp16[] ifdef::cl_khr_mipmap_image_writes[] @@ -12221,7 +12221,7 @@ void write_imagef( range. <> support for the - `<>` extension macro. + {cl_khr_mipmap_image_writes_EXT} extension macro. a| [source,opencl_c] ---- @@ -12254,7 +12254,7 @@ void write_imageui( Behavior is undefined if _lod_ or _coord_ is not in range. <> support for the - `<>` extension macro. + {cl_khr_mipmap_image_writes_EXT} extension macro. a| [source,opencl_c] ---- @@ -12289,7 +12289,7 @@ void write_imageui( Behavior is undefined if _lod_, _coord.x_, or _coord.y_ is not in range. <> support for the - `<>` extension macro. + {cl_khr_mipmap_image_writes_EXT} extension macro. a| [source,opencl_c] ---- @@ -12331,7 +12331,7 @@ void write_imagef( _lod_, _coord.x_, _coord.y_, or _coord.z_ is not in range. <> support for the - `<>` extension macro. + {cl_khr_mipmap_image_writes_EXT} extension macro. a| [source,opencl_c] ---- @@ -12366,7 +12366,7 @@ void write_imageui( not in range. <> support for the - `<>` extension macro. + {cl_khr_mipmap_image_writes_EXT} extension macro. endif::cl_khr_mipmap_image_writes[] |==== @@ -12400,14 +12400,14 @@ For query functions this may be `read_only`, `write_only` or `read_write`. int *get_image_width*(_aQual_ image1d_array_t _image_) + int *get_image_width*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, or if the `<>` extension + For OpenCL C 2.0 or newer, or if the {cl_khr_depth_images_EXT} extension macro is supported: int *get_image_width*(_aQual_ image2d_depth_t _image_) + int *get_image_width*(_aQual_ image2d_array_depth_t _image_) ifdef::cl_khr_gl_msaa_sharing[] - If the `<>` extension macro is supported: + If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: int *get_image_width*(_aQual_ image2d_msaa_t image) + int *get_image_width*(_aQual_ image2d_array_msaa_t image) + @@ -12423,14 +12423,14 @@ endif::cl_khr_gl_msaa_sharing[] int *get_image_height*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, or if the `<>` extension + For OpenCL C 2.0 or newer, or if the {cl_khr_depth_images_EXT} extension macro is supported: int *get_image_height*(_aQual_ image2d_depth_t _image_) + int *get_image_height*(_aQual_ image2d_array_depth_t _image_) ifdef::cl_khr_gl_msaa_sharing[] - If the `<>` extension macro is supported: + If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: int *get_image_height*(_aQual_ image2d_msaa_t image) + int *get_image_height*(_aQual_ image2d_array_msaa_t image) + @@ -12454,14 +12454,14 @@ endif::cl_khr_gl_msaa_sharing[] int *get_image_channel_data_type*(_aQual_ image1d_array_t _image_) + int *get_image_channel_data_type*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, or if the `<>` extension + For OpenCL C 2.0 or newer, or if the {cl_khr_depth_images_EXT} extension macro is supported: int *get_image_channel_data_type*(_aQual_ image2d_depth_t _image_) + int *get_image_channel_data_type*(_aQual_ image2d_array_depth_t _image_) ifdef::cl_khr_gl_msaa_sharing[] - If the `<>` extension macro is supported: + If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: int *get_image_channel_data_type*(_aQual_ image2d_msaa_t image) + int *get_image_channel_data_type*(_aQual_ image2d_array_msaa_t image) + @@ -12500,14 +12500,14 @@ endif::cl_khr_gl_msaa_sharing[] int *get_image_channel_order*(_aQual_ image1d_array_t _image_) + int *get_image_channel_order*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, or if the `<>` extension + For OpenCL C 2.0 or newer, or if the {cl_khr_depth_images_EXT} extension macro is supported: int *get_image_channel_order*(_aQual_ image2d_depth_t _image_) + int *get_image_channel_order*(_aQual_ image2d_array_depth_t _image_) ifdef::cl_khr_gl_msaa_sharing[] - If the `<>` extension macro is supported: + If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: int *get_image_channel_order*(_aQual_ image2d_msaa_t image) + int *get_image_channel_order*(_aQual_ image2d_array_msaa_t image) + @@ -12549,14 +12549,14 @@ endif::cl_khr_gl_msaa_sharing[] int2 *get_image_dim*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, or if the `<>` extension + For OpenCL C 2.0 or newer, or if the {cl_khr_depth_images_EXT} extension macro is supported: int2 *get_image_dim*(_aQual_ image2d_depth_t _image_) + int2 *get_image_dim*(_aQual_ image2d_array_depth_t _image_) ifdef::cl_khr_gl_msaa_sharing[] - If the `<>` extension macro is supported: + If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: int2 *get_image_dim*(_aQual_ image2d_msaa_t image) + int2 *get_image_dim*(_aQual_ image2d_array_msaa_t image) + @@ -12576,13 +12576,13 @@ endif::cl_khr_gl_msaa_sharing[] size_t *get_image_array_size*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, or if the `<>` extension + For OpenCL C 2.0 or newer, or if the {cl_khr_depth_images_EXT} extension macro is supported: size_t *get_image_array_size*(_aQual_ image2d_array_depth_t _image_) ifdef::cl_khr_gl_msaa_sharing[] - If the `<>` extension macro is supported: + If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: size_t *get_image_array_size*(_aQual_ image2d_array_msaa_depth_t _image_) endif::cl_khr_gl_msaa_sharing[] @@ -12594,7 +12594,7 @@ endif::cl_khr_gl_msaa_sharing[] | Return the number of images in the 1D image array. ifdef::cl_khr_gl_msaa_sharing[] -| If the `<>` extension macro is supported: +| If the {cl_khr_gl_msaa_sharing_EXT} extension macro is supported: int *get_image_num_samples*(_aQual_ image2d_msaa_t _image_) + int *get_image_num_samples*(_aQual_ image2d_array_msaa_t _image_) + @@ -12604,7 +12604,7 @@ ifdef::cl_khr_gl_msaa_sharing[] endif::cl_khr_gl_msaa_sharing[] ifdef::cl_khr_mipmap_image[] -| If the `<>` extension macro is supported: +| If the {cl_khr_mipmap_image_EXT} extension macro is supported: int *get_image_num_mip_levels*(_aQual_ image1d_t _image_) + int *get_image_num_mip_levels*(_aQual_ image2d_t _image_) + @@ -12699,7 +12699,7 @@ and will be set to 1.0 for the alpha channel. For {CL_DEPTH} images, a scalar value is returned by *read_imagef* or supplied to *write_imagef*. <> support for OpenCL C 2.0 or newer, or for -the `<>` extension macro. +the {cl_khr_depth_images_EXT} extension macro. [NOTE] ==== @@ -12840,7 +12840,7 @@ ifdef::cl_khr_work_group_uniform_arithmetic[] [open,refpage='workGroupUniformArithmeticFunctions',desc='Work-group Collective Uniform Arithmetic Functions',type='freeform',spec='clang',anchor='work-group-collective-uniform-arithmetic-functions',xrefs='workGroupFunctions',alias='work_group_all work_group_any work_group_broadcast work_group_reduce work_group_scan_exclusive work_group_scan_inclusive'] -- NOTE: The functionality described in this section <> -support for OpenCL C 2.0 and the `<>` +support for OpenCL C 2.0 and the {cl_khr_work_group_uniform_arithmetic_EXT} extension macro. The <> table describes the OpenCL C @@ -13297,7 +13297,7 @@ The following table describes the list of built-in functions that can be used to enqueue a kernel(s). ifdef::cl_khr_device_enqueue_local_arg_types[] -When the `<>` extension macro is +When the {cl_khr_device_enqueue_local_arg_types_EXT} extension macro is supported, the <> and <> described in this section can use any of the built-in OpenCL C @@ -13307,7 +13307,7 @@ their arguments. This is indicated by the generic type name `gentype` in those function signatures. -When the `<>` extension macro is +When the {cl_khr_device_enqueue_local_arg_types_EXT} extension macro is not supported, the pointee type of these functions must be `void`. :localArgType: gentype @@ -13984,7 +13984,7 @@ foo(queue_t q, ...) -- NOTE: The functionality described in this section <> support for -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. The <> describes OpenCL C @@ -13998,7 +13998,7 @@ footnote:[{fn-half-supported}], `float`, and `double` footnote:[{fn-double-supported}]. ifdef::cl_khr_subgroup_extended_types[] -NOTE: If the `<>` extension is supported, the +NOTE: If the {cl_khr_subgroup_extended_types_EXT} extension is supported, the generic type name `gentype` may additionally be `char`, `uchar`, `short`, and `ushort`. For the `sub_group_broadcast` function, `gentype` may additionally be one of @@ -14080,7 +14080,7 @@ The order of these floating-point operations is also non-deterministic for a giv NOTE: The functionality described in the following table <> support -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL C 3.0 or newer and the {opencl_c_subgroups} and {opencl_c_pipes} features. @@ -14135,7 +14135,7 @@ groups is implementation-defined. NOTE: The functionality described in the following table <> support -ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +ifdef::cl_khr_subgroups[the {cl_khr_subgroups_EXT} extension macro; or for] OpenCL C 3.0 or newer and the {opencl_c_subgroups} and {opencl_c_device_enqueue} features. @@ -14179,7 +14179,7 @@ ifdef::cl_khr_subgroup_ballot[] ==== Built-in Sub-Group Ballot Functions NOTE: The functionality described in this section <> -support for the `<>` extension. +support for the {cl_khr_subgroup_ballot_EXT} extension. The <> describes OpenCL C programming language built-in functions to allow work items in a sub-group @@ -14379,7 +14379,7 @@ ifdef::cl_khr_subgroup_clustered_reduce[] ==== Built-in Sub-Group Clustered Reduction Functions NOTE: The functionality described in this section <> -support for the `<>` extension. +support for the {cl_khr_subgroup_clustered_reduce_EXT} extension. This section describes arithmetic operations that are performed on a subset of work items in a sub-group, referred to as a cluster. @@ -14501,7 +14501,7 @@ ifdef::cl_khr_subgroup_non_uniform_arithmetic[] ==== Built-in Sub-Group Non-Uniform Scan and Reduction Functions NOTE: The functionality described in this section <> -support for the `<>` extension. +support for the {cl_khr_subgroup_non_uniform_arithmetic_EXT} extension. ===== Arithmetic Operations @@ -14536,7 +14536,7 @@ gentype sub_group_non_uniform_reduce_mul( for all active work items in the sub-group. Note: This behavior is the same as the *add*, *min*, and *max* reduction - built-in functions from `<>` and OpenCL 2.1, except + built-in functions from {cl_khr_subgroups_EXT} and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. |[source,opencl_c] @@ -14556,7 +14556,7 @@ gentype sub_group_non_uniform_scan_inclusive_mul( to this work item's sub-group local ID. Note: This behavior is the same as the *add*, *min*, and *max* inclusive - scan built-in functions from `<>` and OpenCL 2.1, + scan built-in functions from {cl_khr_subgroups_EXT} and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. |[source,opencl_c] @@ -14586,7 +14586,7 @@ gentype sub_group_non_uniform_scan_exclusive_mul( For *mul*, the identity value is `1`. Note: This behavior is the same as the *add*, *min*, and *max* exclusive - scan built-in functions from `<>` and OpenCL 2.1, + scan built-in functions from {cl_khr_subgroups_EXT} and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. |==== @@ -14727,7 +14727,7 @@ ifdef::cl_khr_subgroup_non_uniform_vote[] ==== Built-in Sub-Group Non-Uniform Vote Functions NOTE: The functionality described in this section <> -support for the `<>` extension. +support for the {cl_khr_subgroup_non_uniform_vote_EXT} extension. The <> describes the OpenCL C programming language built-in functions to elect a single work item @@ -14765,7 +14765,7 @@ int sub_group_non_uniform_all( items in the sub-group and zero otherwise. Note: This behavior is the same as `sub_group_all` from - `<>` and OpenCL 2.1, except this function need not be + {cl_khr_subgroups_EXT} and OpenCL 2.1, except this function need not be encountered by all work items in the sub-group executing the kernel. |[source,opencl_c] ---- @@ -14777,7 +14777,7 @@ int sub_group_non_uniform_any( item in the sub-group and zero otherwise. Note: This behavior is the same as `sub_group_any` from - `<>` and OpenCL 2.1, except this function need not be + {cl_khr_subgroups_EXT} and OpenCL 2.1, except this function need not be encountered by all work items in the sub-group executing the kernel. |[source,opencl_c] ---- @@ -14800,7 +14800,7 @@ ifdef::cl_khr_subgroup_rotate[] ==== Built-in Sub-Group Rotation Functions NOTE: The functionality described in this section <> -support for the `<>` extension. +support for the {cl_khr_subgroup_rotate_EXT} extension. The <> describes a specialized OpenCL C programming language built-in function that allow work items in a @@ -14861,7 +14861,7 @@ ifdef::cl_khr_subgroup_shuffle[] ==== Built-in Sub-Group General Purpose Shuffle Functions NOTE: The functionality described in this section <> -support for the `<>` extension. +support for the {cl_khr_subgroup_shuffle_EXT} extension. The <> describes the OpenCL C programming language built-in functions that allow work items in a sub-group @@ -14980,7 +14980,7 @@ This section is informational and non-normative. |==== | OpenCL C Function | SPIR-V BuiltIn or Instruction | Enabling SPIR-V Capability -3+| For OpenCL 2.1 or `<>`: +3+| For OpenCL 2.1 or {cl_khr_subgroups_EXT}: | `get_​sub_​group_​size` | *SubgroupSize* @@ -15067,7 +15067,7 @@ This section is informational and non-normative. | *DeviceEnqueue* ifdef::cl_khr_subgroup_ballot[] -3+| For `<>`: +3+| For {cl_khr_subgroup_ballot_EXT}: | `sub_​group_​non_​uniform_​broadcast` | *OpGroupNonUniformBroadcast* @@ -15119,7 +15119,7 @@ ifdef::cl_khr_subgroup_ballot[] endif::cl_khr_subgroup_ballot[] ifdef::cl_khr_subgroup_clustered_reduce[] -3+| For `<>`: +3+| For {cl_khr_subgroup_clustered_reduce_EXT}: | `sub_​group_​clustered_​reduce_​add` | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* @@ -15154,7 +15154,7 @@ ifdef::cl_khr_subgroup_clustered_reduce[] endif::cl_khr_subgroup_clustered_reduce[] ifdef::cl_khr_subgroup_extended_types[] -3+| For `<>`: + +3+| For {cl_khr_subgroup_extended_types_EXT}: + Note: This extension adds new types to uniform sub-group operations. | `sub_​group_​broadcast` @@ -15193,7 +15193,7 @@ ifdef::cl_khr_subgroup_extended_types[] endif::cl_khr_subgroup_extended_types[] ifdef::cl_khr_subgroup_non_uniform_arithmetic[] -3+| For `<>`: +3+| For {cl_khr_subgroup_non_uniform_arithmetic_EXT}: | `sub_​group_​non_​uniform_​reduce_​add` | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* @@ -15290,7 +15290,7 @@ ifdef::cl_khr_subgroup_non_uniform_arithmetic[] endif::cl_khr_subgroup_non_uniform_arithmetic[] ifdef::cl_khr_subgroup_non_uniform_vote[] -3+| For `<>`: +3+| For {cl_khr_subgroup_non_uniform_vote_EXT}: | `sub_​group_​elect` | *OpGroupNonUniformElect* @@ -15307,7 +15307,7 @@ ifdef::cl_khr_subgroup_non_uniform_vote[] endif::cl_khr_subgroup_non_uniform_vote[] ifdef::cl_khr_subgroup_shuffle[] -3+| For `<>`: +3+| For {cl_khr_subgroup_shuffle_EXT}: | `sub_​group_​shuffle` | *OpGroupNonUniformShuffle* @@ -15318,7 +15318,7 @@ ifdef::cl_khr_subgroup_shuffle[] endif::cl_khr_subgroup_shuffle[] ifdef::cl_khr_subgroup_shuffle_relative[] -3+| For `<>`: +3+| For {cl_khr_subgroup_shuffle_relative_EXT}: | `sub_​group_​shuffle_​up` | *OpGroupNonUniformShuffleUp* @@ -15335,7 +15335,7 @@ ifdef::cl_khr_kernel_clock[] === Kernel Clock Functions NOTE: The functionality described in this section <> -support for the `<>` extension. + +support for the {cl_khr_kernel_clock_EXT} extension. + The `clock_read_device` and `clock_read_hilo_device` functions require support for the {opencl_c_kernel_clock_scope_device} feature. The `clock_read_work_group` and `clock_read_hilo_work_group` functions require @@ -15418,7 +15418,7 @@ Dynamically reconfiguring the rounding modes as specified by the IEEE 754 spec is unsupported. ifdef::cl_khr_fp16[] -If the `<>` extension macro is supported, then +If the {cl_khr_fp16_EXT} extension macro is supported, then if {CL_FP_ROUND_TO_NEAREST} is supported, the default rounding mode for half-precision floating-point operations will be round to nearest even; otherwise the default rounding mode will be round to zero. @@ -15439,7 +15439,7 @@ ifdef::cl_khr_select_fprounding_mode[] -- [[select-rounding-mode]] -If the `<>` extension macro is supported, the +If the {cl_khr_select_fprounding_mode_EXT} extension macro is supported, the floating-point rounding mode may be specified using the following *#pragma* in the OpenCL program source: @@ -15472,7 +15472,7 @@ condition just before the compound statement. Except where otherwise documented, the callee functions do not inherit the rounding mode of the caller function. -If the `<>` extension is enabled, the +If the {cl_khr_select_fprounding_mode_EXT} extension is enabled, the `\\__ROUNDING_MODE__` preprocessor symbol shall be defined to be one of the following according to the current rounding mode: @@ -15502,7 +15502,7 @@ rounding mode. Conversions from floating-point to integer type always use `rtz` mode, except where the user specifically asks for another rounding mode. -NOTE: The `<>` extension was deprecated in +NOTE: The {cl_khr_select_fprounding_mode_EXT} extension was deprecated in OpenCL 1.1, and its use is not recommended. -- endif::cl_khr_select_fprounding_mode[] @@ -15571,7 +15571,7 @@ Conversion between floating-point formats and <> must be correctly rounded. ifdef::cl_khr_fp16[] -If the `<>` extension macro is supported, +If the {cl_khr_fp16_EXT} extension macro is supported, addition, subtraction, multiplication, fused multiply-add operations on half types are required to be correctly rounded using the default rounding mode for half-precision floating-point operations. @@ -16269,7 +16269,7 @@ is the infinitely precise result. ifdef::cl_khr_fp16[] -If the `<>` extension macro is supported, +If the {cl_khr_fp16_EXT} extension macro is supported, the following table describes the minimum accuracy of half-precision floating-point arithmetic operations given as ULP values. The reference value used to compute the ULP value of an arithmetic operation @@ -17125,7 +17125,7 @@ ifdef::cl_khr_fp16[] [[converting-normalized-integer-channel-data-types-to-half-precision-floating-point-values]] ==== Converting Normalized Integer Channel Data Types to Half-Precision Floating-Point Values -If the `<>` extension is supported, then +If the {cl_khr_fp16_EXT} extension is supported, then for images created with image channel data type of {CL_UNORM_INT8} and {CL_UNORM_INT16}, *read_imageh* will convert the channel values from an 8-bit or 16-bit unsigned integer to normalized half-precision floating-point @@ -17292,7 +17292,7 @@ ifdef::cl_khr_fp16[] [[converting-half-precision-floating-point-values-to-normalized-integer-channel-data-types]] ==== Converting Half-Precision Floating-point Values to Normalized Integer Channel Data Types -If the `<>` extension is supported, then +If the {cl_khr_fp16_EXT} extension is supported, then for images created with image channel data type of {CL_UNORM_INT8} and {CL_UNORM_INT16}, *write_imageh* will convert the floating-point color value to an 8-bit or 16-bit unsigned integer. diff --git a/api/appendix_c.asciidoc b/api/appendix_c.asciidoc index 2531dbb2d..6583f1c72 100644 --- a/api/appendix_c.asciidoc +++ b/api/appendix_c.asciidoc @@ -389,54 +389,54 @@ include::{generated}/api/version-notes/CL_FLT_EPSILON.asciidoc[] | {CL_DBL_DIG_anchor} include::{generated}/api/version-notes/CL_DBL_DIG.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Number of decimal digits of precision for the type {cl_double_TYPE} | {CL_DBL_MANT_DIG_anchor} include::{generated}/api/version-notes/CL_DBL_MANT_DIG.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Number of digits in the mantissa of type {cl_double_TYPE} | {CL_DBL_MAX_10_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MAX_10_EXP.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Maximum positive integer such that 10 raised to this power minus one can be represented as a normalized floating-point number of type {cl_double_TYPE} | {CL_DBL_MAX_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MAX_EXP.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Maximum exponent value of type {cl_double_TYPE} | {CL_DBL_MIN_10_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MIN_10_EXP.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Minimum negative integer such that 10 raised to this power minus one can be represented as a normalized floating-point number of type {cl_double_TYPE} | {CL_DBL_MIN_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MIN_EXP.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Minimum exponent value of type {cl_double_TYPE} | {CL_DBL_RADIX_anchor} include::{generated}/api/version-notes/CL_DBL_RADIX.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Base value of type {cl_double_TYPE} | {CL_DBL_MAX_anchor} include::{generated}/api/version-notes/CL_DBL_MAX.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Maximum value of type {cl_double_TYPE} | {CL_DBL_MIN_anchor} include::{generated}/api/version-notes/CL_DBL_MIN.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Minimum value of type {cl_double_TYPE} | {CL_DBL_EPSILON_anchor} include::{generated}/api/version-notes/CL_DBL_EPSILON.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | Minimum positive floating-point number of type {cl_double_TYPE} such that `1.0 {plus} {CL_DBL_EPSILON} != 1` is true. | {CL_NAN_anchor} diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index ec6626c2a..eebc7ff2d 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -74,11 +74,11 @@ The following features are added to the OpenCL C programming language _section 6.12.3_. ** *async_work_group_strided_copy* defined in section _6.15.11_. ** *vec_step*, *shuffle* and *shuffle2* defined in section _6.15.13_. - * `<>` extension is a core feature. - * `<>`, - `<>`, - `<>` and - `<>` extensions are core features. + * {cl_khr_byte_addressable_store_EXT} extension is a core feature. + * {cl_khr_global_int32_base_atomics_EXT}, + {cl_khr_global_int32_extended_atomics_EXT}, + {cl_khr_local_int32_base_atomics_EXT} and + {cl_khr_local_int32_extended_atomics_EXT} extensions are core features. The built-in atomic function names are changed to use the *atomic_* prefix instead of *atom_*. * Macros `CL_VERSION_1_0` and `CL_VERSION_1_1`. @@ -97,13 +97,13 @@ The following features in OpenCL 1.0 are deprecated (see glossary) in OpenCL * The `-cl-strict-aliasing` build option has been deprecated. It is no longer required after defining type-based aliasing rules. // Bugzilla 5593 and 6068 - * The `<>` extension is deprecated and its + * The {cl_khr_select_fprounding_mode_EXT} extension is deprecated and its use is no longer recommended. The following new extensions are added to _section 9_ in OpenCL 1.1: - * `<>` for creating a CL event object from a GL sync object. - * `<>` for sharing memory objects with Direct3D 10. + * {cl_khr_gl_event_EXT} for creating a CL event object from a GL sync object. + * {cl_khr_d3d10_sharing_EXT} for sharing memory objects with Direct3D 10. The following modifications are made to the OpenCL ES Profile described in _section 10_ in OpenCL 1.1: @@ -146,7 +146,7 @@ runtime (_sections 4 and 5_): a kernel. * {clEnqueueMarkerWithWaitList} and {clEnqueueBarrierWithWaitList} APIs. * {clUnloadPlatformCompiler} to request that a single platform's compiler is - unloaded. This is compatible with the `<>` extension if that is + unloaded. This is compatible with the {cl_khr_icd_EXT} extension if that is supported, unlike {clUnloadCompiler}. The following features are added to the OpenCL C programming language @@ -183,7 +183,7 @@ The following APIs in OpenCL 1.1 are deprecated (see glossary) in OpenCL // Bugzilla 5391 - cl_khr_icd specification * {clUnloadCompiler} and {clGetExtensionFunctionAddress} APIs are deprecated. The {clUnloadPlatformCompiler} and {clGetExtensionFunctionAddressForPlatform} - APIs provide equivalent functionality are compatible with the `<>` + APIs provide equivalent functionality are compatible with the {cl_khr_icd_EXT} extension. The following queries are deprecated (see glossary) in OpenCL 1.2: @@ -465,14 +465,14 @@ Changes from *v3.0.5*: * Fixed the calculation in "mapping work-items onto an ND-range". * Added new extensions: - ** `<>` - ** `<>` - ** `<>` - ** `<>` - ** `<>` - ** `<>` - ** `<>` - ** `<>` + ** {cl_khr_extended_versioning_EXT} + ** {cl_khr_subgroup_extended_types_EXT} + ** {cl_khr_subgroup_non_uniform_vote_EXT} + ** {cl_khr_subgroup_ballot_EXT} + ** {cl_khr_subgroup_non_uniform_arithmetic_EXT} + ** {cl_khr_subgroup_shuffle_EXT} + ** {cl_khr_subgroup_shuffle_relative_EXT} + ** {cl_khr_subgroup_clustered_reduce_EXT} Changes from *v3.0.6*: @@ -481,11 +481,11 @@ Changes from *v3.0.6*: * Clarified the table structure in the backwards compatibility appendix. * Clarified that `-cl-unsafe-math-optimizations` also implies `-cl-denorms-are-zero`. * Added new extensions: - ** `<>` - ** `<>` - ** `<>` - ** `<>` - ** `<>` + ** {cl_khr_extended_bit_ops_EXT} + ** {cl_khr_pci_bus_info_EXT} + ** {cl_khr_spirv_extended_debug_info_EXT} + ** {cl_khr_spirv_linkonce_odr_EXT} + ** {cl_khr_suggested_local_work_size_EXT} Changes from *v3.0.7*: @@ -493,7 +493,7 @@ Changes from *v3.0.7*: * Removed unnecessary phrase from sub-group mask function descriptions. * Added _input_slice_pitch_ error condition for read and write image APIs. * Added new extension: - ** `<>` + ** {cl_khr_integer_dot_product_EXT} Changes from *v3.0.8*: @@ -501,18 +501,18 @@ Changes from *v3.0.8*: * Clarified requirements for {CL_DEVICE_DOUBLE_FP_CONFIG} prior to OpenCL 2.0. * Clarified the behavior of ballot operations for remainder sub-groups. * Added new extensions: - ** `<>` (version 2) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) - ** `<>` (provisional) + ** {cl_khr_integer_dot_product_EXT} (version 2) + ** {cl_khr_semaphore_EXT} (provisional) + ** {cl_khr_external_semaphore_EXT} (provisional) + ** {cl_khr_external_semaphore_dx_fence_EXT} (provisional) + ** {cl_khr_external_semaphore_opaque_fd_EXT} (provisional) + ** {cl_khr_external_semaphore_sync_fd_EXT} (provisional) + ** {cl_khr_external_semaphore_win32_EXT} (provisional) + ** {cl_khr_external_memory_EXT} (provisional) + ** {cl_khr_external_memory_dma_buf_EXT} (provisional) + ** {cl_khr_external_memory_dx_EXT} (provisional) + ** {cl_khr_external_memory_opaque_fd_EXT} (provisional) + ** {cl_khr_external_memory_win32_EXT} (provisional) Changes from *v3.0.9*: @@ -521,10 +521,10 @@ Changes from *v3.0.9*: * Clarified that {clCompileProgram} is valid for programs created from SPIR. * Documented the possible state of a kernel object after a failed call to {clSetKernelArg}. * Added new extensions: - ** `<>` (final) - ** `<>` (final) - ** `<>` - ** `<>` (provisional) + ** {cl_khr_async_work_group_copy_fence_EXT} (final) + ** {cl_khr_extended_async_copies_EXT} (final) + ** {cl_khr_expect_assume_EXT} + ** {cl_khr_command_buffer_EXT} (provisional) Changes from *v3.0.10*: @@ -537,8 +537,8 @@ Changes from *v3.0.10*: * Clarified that the extended versioning extension is a core OpenCL 3.0 feature. * Clarified sub-group clustered reduction behavior when the cluster size is not an integer constant or a power of two. * Added new extensions: - ** `<>` - ** `<>` + ** {cl_khr_subgroup_rotate_EXT} + ** {cl_khr_work_group_uniform_arithmetic_EXT} Changes from *v3.0.11*: @@ -546,29 +546,29 @@ Changes from *v3.0.11*: * Added a maximum limit for the number of arguments supported by a kernel. * Clarified requirements for comparability and uniqueness of object handles. * Clarified behavior for invalid device-side enqueue `clk_event_t` handles. - * Clarified `<>` interactions with other extensions. + * Clarified {cl_khr_command_buffer_EXT} interactions with other extensions. * Specified error behavior when a command buffer is finalized multiple times. * Added new extension: - ** `<>` (provisional) + ** {cl_khr_command_buffer_mutable_dispatch_EXT} (provisional) Changes from *v3.0.12*: * Fixed the accuracy requirements description for half-precision math functions (those prefixed by `half_`). * Clarified that the semaphore type must always be provided when creating a semaphore. * Removed an unnecessary and contradictory error condition when creating a semaphore. - * Added an issue regarding non-linear image import to the `<>` extension. - * Added missing calls to {clBuildProgram} to the `<>` and `<>` sample code. + * Added an issue regarding non-linear image import to the {cl_khr_external_memory_EXT} extension. + * Added missing calls to {clBuildProgram} to the {cl_khr_command_buffer_EXT} and {cl_khr_command_buffer_mutable_dispatch_EXT} sample code. * Fixed a copy-paste error in the extensions quick reference appendix. * Fixed typos and improved formatting consistency in the extensions spec. Changes from *v3.0.13*: - * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `<>`, see {khronos-opencl-pr}/893[#893]. - * Added a context query for command-buffers to `<>`, see {khronos-opencl-pr}/899[#899]. - * Updated the semaphore wait and signal rules for binary semaphores in `<>`, see {khronos-opencl-pr}/882[#882]. - * Removed redundant error conditions from `<>` and `<>`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. + * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in {cl_khr_fp16_EXT}, see {khronos-opencl-pr}/893[#893]. + * Added a context query for command-buffers to {cl_khr_command_buffer_EXT}, see {khronos-opencl-pr}/899[#899]. + * Updated the semaphore wait and signal rules for binary semaphores in {cl_khr_semaphore_EXT}, see {khronos-opencl-pr}/882[#882]. + * Removed redundant error conditions from {cl_khr_external_semaphore_EXT} and {cl_khr_external_memory_EXT}, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. * Added new extension: - ** `<>` (provisional) + ** {cl_khr_command_buffer_multi_device_EXT} (provisional) Changes from *v3.0.14*: @@ -578,28 +578,28 @@ Changes from *v3.0.14*: * Clarified that {clSetCommandQueueProperty} is only required for OpenCL 1.0 devices and may return an error otherwise, see {khronos-opencl-pr}/980[#980]. * Clarified that the application must ensure the free function passed to {clEnqueueSVMFree} is thread safe, see {khronos-opencl-pr}/1016[#1016]. * Clarified that the application must ensure the user function passed to {clEnqueueNativeKernel} is thread safe, see {khronos-opencl-pr}/1026[#1026]. - * `<>` (provisional): + * {cl_khr_command_buffer_EXT} (provisional): ** Removed the "invalid" command buffer state, see {khronos-opencl-pr}/885[#885]. ** Added support for recording SVM memory copies and memory fills in a command buffer, see {khronos-opencl-pr}/915[#915]. - * `<>` (provisional): + * {cl_khr_command_buffer_multi_device_EXT} (provisional): ** Clarified that the sync devices query should only return root devices, see {khronos-opencl-pr}/925[#925]. - * `<>` (provisional): + * {cl_khr_external_memory_EXT} (provisional): ** Disallowed specifying a device handle list without also specifying an external memory handle, see {khronos-opencl-pr}/922[#922]. ** Added a query to determine the handle types an implementation will assume have a linear memory layout, see {khronos-opencl-pr}/940[#940]. ** Added an external memory-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. ** Clarified that implementations may acquire information about an image from an external memory handle when the image is created, see {khronos-opencl-pr}/970[#970]. - * `<>` (provisional): + * {cl_khr_external_semaphore_EXT} (provisional): ** Added the ability to re-import "sync fd" handles into an existing semaphore, see {khronos-opencl-pr}/939[#939]. ** Clarified that a semaphore may only export one handle type, and that a semaphore created from an external handle cannot also export a handle, see {khronos-opencl-pr}/975[#975]. - ** Clarified that `<>` requires support for `<>`, see {khronos-opencl-pr}/976[#976]. + ** Clarified that {cl_khr_external_semaphore_EXT} requires support for {cl_khr_semaphore_EXT}, see {khronos-opencl-pr}/976[#976]. ** Added a query to determine if a semaphore may export an external handle, see {khronos-opencl-pr}/997[#997]. - * `<>` (provisional): + * {cl_khr_semaphore_EXT} (provisional): ** Added an semaphore-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. ** Restricted semaphores to a single associated device, see {khronos-opencl-pr}/996[#996]. - * `<>`: + * {cl_khr_subgroup_rotate_EXT}: ** Clarified that only rotating within a subgroup is supported, see {khronos-opencl-pr}/967[#967]. Changes from *v3.0.15*: * Added new extensions: - ** `<>` (provisional) + ** {cl_khr_kernel_clock_EXT} (provisional) diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index 1bbd6f330..2a2d37598 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -313,7 +313,7 @@ When creating a 2D image from a buffer is not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `<>` extension if _device_ does not support creating a 2D image from a buffer. +| Will not describe support for the {cl_khr_image2d_from_buffer_EXT} extension if _device_ does not support creating a 2D image from a buffer. | {clCreateImage} or + {clCreateImageWithProperties}, passing + @@ -424,7 +424,7 @@ When sub-groups are not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `<>` extension if _device_ does not support sub-groups. +| Will not describe support for the {cl_khr_subgroups_EXT} extension if _device_ does not support sub-groups. | {clGetKernelSubGroupInfo} | Returns {CL_INVALID_OPERATION} if _device_ does not support sub-groups. @@ -468,7 +468,7 @@ When writing to 3D image objects is not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `<>` extension if _device_ does not support writing to 3D image objects. +| Will not describe support for the {cl_khr_3d_image_writes_EXT} extension if _device_ does not support writing to 3D image objects. | {clGetSupportedImageFormats}, passing + {CL_MEM_OBJECT_IMAGE3D} and one of + diff --git a/api/cl_khr_3d_image_writes.asciidoc b/api/cl_khr_3d_image_writes.asciidoc index 7c980404c..49e388f4b 100644 --- a/api/cl_khr_3d_image_writes.asciidoc +++ b/api/cl_khr_3d_image_writes.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_3d_image_writes.txt[] === Description -`cl_khr_3d_image_writes` adds built-in OpenCL C functions that allow a +{cl_khr_3d_image_writes_EXT} adds built-in OpenCL C functions that allow a kernel to write to 3D image objects in addition to 2D image objects. See the link:{OpenCLCSpecURL}#cl_khr_3d_image_writes[3D Image Writes] diff --git a/api/cl_khr_async_work_group_copy_fence.asciidoc b/api/cl_khr_async_work_group_copy_fence.asciidoc index 3ec9923c7..1c36daccb 100644 --- a/api/cl_khr_async_work_group_copy_fence.asciidoc +++ b/api/cl_khr_async_work_group_copy_fence.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_async_work_group_copy_fence.txt[] === Description -`cl_khr_async_work_group_copy_fence` adds a new built-in OpenCL C function +{cl_khr_async_work_group_copy_fence_EXT} adds a new built-in OpenCL C function to establish a memory synchronization ordering of asynchronous copies. See the link:{OpenCLCSpecURL}#cl_khr_async_work_group_copy_fence[Async diff --git a/api/cl_khr_byte_addressable_store.asciidoc b/api/cl_khr_byte_addressable_store.asciidoc index 56a32e6e2..cdff78462 100644 --- a/api/cl_khr_byte_addressable_store.asciidoc +++ b/api/cl_khr_byte_addressable_store.asciidoc @@ -16,7 +16,7 @@ include::{generated}/meta/{refprefix}cl_khr_byte_addressable_store.txt[] === Description -`cl_khr_byte_addressable_store` relaxes restrictions on pointers to `char`, +{cl_khr_byte_addressable_store_EXT} relaxes restrictions on pointers to `char`, `uchar`, `char2`, `uchar2`, `short`, `ushort` and `half` that were present in _Section 6.8m: Restrictions_ of the OpenCL 1.0 specification. With this extension, applications are able to read from and write to diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index e5da09a4f..8234aa43e 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -36,7 +36,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] === Description -`cl_khr_command_buffer` adds the ability to record and replay buffers of +{cl_khr_command_buffer_EXT} adds the ability to record and replay buffers of OpenCL commands. Command-buffers enable a reduction in overhead when enqueuing the same @@ -121,18 +121,18 @@ the capability is optional to enable optimizations on command-buffer recording. === Interactions With Other Extensions The introduction of the command-buffer abstraction enables functionality -beyond what the `cl_khr_command_buffer` extension currently provides, i.e. +beyond what the {cl_khr_command_buffer_EXT} extension currently provides, i.e. the recording of immutable commands to a single queue which can then be executed without commands synchronizing outside the command-buffer. Extra functionality expanding on this is provided as layered extensions on top of -`cl_khr_command_buffer`. The layered extensions that currently exist are: +{cl_khr_command_buffer_EXT}. The layered extensions that currently exist are: -* `<>` -* `<>` +* {cl_khr_command_buffer_multi_device_EXT} +* {cl_khr_command_buffer_mutable_dispatch_EXT} -Having `cl_khr_command_buffer` as a minimal base specification means that the +Having {cl_khr_command_buffer_EXT} as a minimal base specification means that the API defines mechanisms for functionality that is not enabled by this extension, -these are described in the following sub-sections. `cl_khr_command_buffer` will +these are described in the following sub-sections. {cl_khr_command_buffer_EXT} will retain its provisional extension status until other layered extensions are released, as these may reveal modifications needed to the base specification to support their intended use cases. @@ -141,45 +141,45 @@ support their intended use cases. The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined -in `cl_khr_command_buffer`, but the parameter enables layered extensions like -`<>` to define properties that inform +in {cl_khr_command_buffer_EXT}, but the parameter enables layered extensions like +{cl_khr_command_buffer_mutable_dispatch_EXT} to define properties that inform the characteristics of the kernel command. ==== Command Handles All command recording entry-points define a {cl_mutable_command_khr_TYPE} output parameter which provides a handle to the specific command being recorded. Use of -these output handles is not enabled by the `cl_khr_command_buffer` extension, +these output handles is not enabled by the {cl_khr_command_buffer_EXT} extension, but the handles allow individual commands in a command-buffer to be referenced by the user. -Use of these handles is enabled in `<>` +Use of these handles is enabled in {cl_khr_command_buffer_mutable_dispatch_EXT} to give the capability for an application to use the handles to modify commands between enqueues of a command-buffer. ==== List of Queues Only a single command-queue can be associated with a command-buffer in the -`cl_khr_command_buffer` extension, but the API is designed so that the layered -`<>` extension can relax this constraint +{cl_khr_command_buffer_EXT} extension, but the API is designed so that the layered +{cl_khr_command_buffer_multi_device_EXT} extension can relax this constraint to allow commands to be recorded across multiple queues in the same command-buffer, providing replay of heterogeneous task graphs. Using multiple queue functionality will result in an error without -`<>` to relax usage of the following API +{cl_khr_command_buffer_multi_device_EXT} to relax usage of the following API features: * When a command-buffer is created the API enables passing a list of queues that the command-buffer will record commands to. Only a single queue is - permitted in `cl_khr_command_buffer`. + permitted in {cl_khr_command_buffer_EXT}. * Individual command recording entry-points define a {cl_command_queue_TYPE} parameter for which of the queues set on command-buffer creation that command - should be record to. This must be passed as NULL in `cl_khr_command_buffer`. + should be record to. This must be passed as NULL in {cl_khr_command_buffer_EXT}. * {clEnqueueCommandBufferKHR} takes a list of queues for command-buffer execution, correspond to those set on creation. Only a single queue is permitted in - `cl_khr_command_buffer`. + {cl_khr_command_buffer_EXT}. // The 'New ...' section can be auto-generated diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc index 26d2d72ea..8a595a5b3 100644 --- a/api/cl_khr_command_buffer_multi_device.asciidoc +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -28,20 +28,20 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer_multi_device.txt[] === Description -The `cl_khr_command_buffer` extension separates command construction from +The {cl_khr_command_buffer_EXT} extension separates command construction from enqueue by providing a mechanism to record a set of commands which can then be repeatedly enqueued. However, the commands in a command-buffer can only be recorded to a single command-queue specified on command-buffer creation. -`cl_khr_command_buffer_multi_device` extends the scope of a command-buffer +{cl_khr_command_buffer_multi_device_EXT} extends the scope of a command-buffer to allow commands to be recorded across multiple queues in the same command-buffer, providing execution of heterogeneous task graphs from command-queues associated with different devices. The ability for a user to deep copy an existing command-buffer so that the commands target a different device is also made possible by -`cl_khr_command_buffer_multi_device`. +{cl_khr_command_buffer_multi_device_EXT}. Depending on platform support the mapping of commands to the new target device can be done either explicitly by the user, or automatically by the OpenCL runtime. diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index 8883fc837..13ada6c22 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -28,13 +28,13 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[ === Description -The `<>` extension separates command construction +The {cl_khr_command_buffer_EXT} extension separates command construction from enqueue by providing a mechanism to record a set of commands which can then be repeatedly enqueued. However, the commands recorded to the command-buffer are immutable between enqueues. -`cl_khr_command_buffer_mutable_dispatch` removes this restriction. +{cl_khr_command_buffer_mutable_dispatch_EXT} removes this restriction. In particular, this extension allows the configuration of a kernel execution command in a command-buffer, called a _mutable-dispatch_, to be modified. This allows inputs and outputs to the kernel, as well as work-item sizes and @@ -46,10 +46,10 @@ in a new command-buffer. The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this extension for the purpose of allowing expansion of mutable functionality in future extensions layered on top of -`cl_khr_command_buffer_mutable_dispatch`. +{cl_khr_command_buffer_mutable_dispatch_EXT}. Any parameter that is a structure containing a `void* next` member *must* have a value of `next` that is either `NULL`, or is a pointer to a valid -structure defined by `cl_khr_command_buffer_mutable_dispatch` or an +structure defined by {cl_khr_command_buffer_mutable_dispatch_EXT} or an extension layered on top. To be a valid structure in the pointer chain the first member of the structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier @@ -64,7 +64,7 @@ Vulkan specification. ==== This is designed so that another extension layered on -`cl_khr_command_buffer_mutable_dispatch` could allow modification of +{cl_khr_command_buffer_mutable_dispatch_EXT} could allow modification of commands recorded to a command-buffer other than kernel execution commands. As all command recording entry-points return a {cl_mutable_command_khr_TYPE} handle, and aspects like which {cl_mem_TYPE} object a command uses could diff --git a/api/cl_khr_create_command_queue.asciidoc b/api/cl_khr_create_command_queue.asciidoc index fe6bf0c08..89e22e1d6 100644 --- a/api/cl_khr_create_command_queue.asciidoc +++ b/api/cl_khr_create_command_queue.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_create_command_queue.txt[] === Description -`cl_khr_create_command_queue` allows OpenCL 1.x devices to support an +{cl_khr_create_command_queue_EXT} allows OpenCL 1.x devices to support an equivalent of the {clCreateCommandQueueWithProperties} API that was added in OpenCL 2.0. This allows OpenCL 1.x devices to support other optional extensions or diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc index 0a69e5e89..96532a430 100644 --- a/api/cl_khr_d3d10_sharing.asciidoc +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] === Description -`cl_khr_d3d10_sharing` provides interoperability between OpenCL and Direct3D 10. +{cl_khr_d3d10_sharing_EXT} provides interoperability between OpenCL and Direct3D 10. === New Commands diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc index e0573b296..c3fdd8154 100644 --- a/api/cl_khr_d3d11_sharing.asciidoc +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] === Description -`cl_khr_d3d11_sharing` provides interoperability between OpenCL and Direct3D 11. +{cl_khr_d3d11_sharing_EXT} provides interoperability between OpenCL and Direct3D 11. === New Commands diff --git a/api/cl_khr_depth_images.asciidoc b/api/cl_khr_depth_images.asciidoc index 73469eecc..61ceb60ae 100644 --- a/api/cl_khr_depth_images.asciidoc +++ b/api/cl_khr_depth_images.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_depth_images.txt[] === Description -`cl_khr_depth_images` adds OpenCL C support for depth images. +{cl_khr_depth_images_EXT} adds OpenCL C support for depth images. See the link:{OpenCLCSpecURL}#cl_khr_depth_images[Depth Images] section of the OpenCL C specification for more information. diff --git a/api/cl_khr_device_enqueue_local_arg_types.asciidoc b/api/cl_khr_device_enqueue_local_arg_types.asciidoc index ee3acb41b..1adcbd60c 100644 --- a/api/cl_khr_device_enqueue_local_arg_types.asciidoc +++ b/api/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_device_enqueue_local_arg_types.txt[] === Description -`cl_khr_device_enqueue_local_arg_types` allows arguments to blocks that are +{cl_khr_device_enqueue_local_arg_types_EXT} allows arguments to blocks that are passed to the *enqueue_kernel* built-in OpenCL C function to be pointers to any type (built-in or user-defined) in local memory, instead of requiring arguments to blocks to be pointers to void in local memory. diff --git a/api/cl_khr_device_uuid.asciidoc b/api/cl_khr_device_uuid.asciidoc index 87f803daf..74b4f00b8 100644 --- a/api/cl_khr_device_uuid.asciidoc +++ b/api/cl_khr_device_uuid.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_device_uuid.txt[] === Description -`cl_khr_device_uuid` adds the ability to query a universally unique +{cl_khr_device_uuid_EXT} adds the ability to query a universally unique identifier (UUID) for an OpenCL driver and OpenCL device. The UUIDs returned by the query may be used to identify drivers and devices across processes or APIs. diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc index 91fd3dad4..7f887bcf5 100644 --- a/api/cl_khr_dx9_media_sharing.asciidoc +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_dx9_media_sharing.txt[] === Description -`cl_khr_dx9_media_sharing` allows applications to use media surfaces as +{cl_khr_dx9_media_sharing_EXT} allows applications to use media surfaces as OpenCL memory objects. This allows efficient sharing of data between OpenCL and selected adapter APIs (only DX9 for now). diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc index 106bb82ec..278dec5a2 100644 --- a/api/cl_khr_egl_event.asciidoc +++ b/api/cl_khr_egl_event.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_egl_event.txt[] === Description -`cl_khr_egl_event` allows creating OpenCL event objects linked to EGL fence +{cl_khr_egl_event_EXT} allows creating OpenCL event objects linked to EGL fence sync objects, potentially improving efficiency of sharing images and buffers between the two APIs. The companion `EGL_KHR_cl_event` extension provides the complementary @@ -31,7 +31,7 @@ functionality of creating an EGL sync object from an OpenCL event object. === Issues -Most issues are shared with `<>` and are resolved as +Most issues are shared with {cl_khr_gl_event_EXT} and are resolved as described in that extension. . Should we support implicit synchronization? diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index 0d1b0df27..d31989430 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] === Description -`cl_khr_egl_image` provides a mechanism to creating OpenCL memory objects +{cl_khr_egl_image_EXT} provides a mechanism to creating OpenCL memory objects from from EGLImages. === New Commands @@ -96,7 +96,7 @@ needed to represent those image types. The basic portable form of synchronization is to use a {clFinish}, as is the case for GL interop. In addition implementations which support the synchronization extensions -`<>` and `EGL_KHR_cl_event` can interoperate more +{cl_khr_egl_event_EXT} and `EGL_KHR_cl_event` can interoperate more efficiently as described in those extensions. -- diff --git a/api/cl_khr_expect_assume.asciidoc b/api/cl_khr_expect_assume.asciidoc index c5559a134..615a0d559 100644 --- a/api/cl_khr_expect_assume.asciidoc +++ b/api/cl_khr_expect_assume.asciidoc @@ -19,7 +19,7 @@ include::{generated}/meta/{refprefix}cl_khr_expect_assume.txt[] === Description -`cl_khr_expect_assume` adds mechanisms to provide information to the +{cl_khr_expect_assume_EXT} adds mechanisms to provide information to the compiler that may improve the performance of some kernels. Specifically, this extension adds the ability to: diff --git a/api/cl_khr_extended_async_copies.asciidoc b/api/cl_khr_extended_async_copies.asciidoc index 0fac1890f..e04cf7a40 100644 --- a/api/cl_khr_extended_async_copies.asciidoc +++ b/api/cl_khr_extended_async_copies.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_extended_async_copies.txt[] === Description -`cl_khr_extended_async_copies` augments built-in OpenCL C asynchronous copy +{cl_khr_extended_async_copies_EXT} augments built-in OpenCL C asynchronous copy functions to support more patterns: . For async copy between 2D source and 2D destination. diff --git a/api/cl_khr_extended_bit_ops.asciidoc b/api/cl_khr_extended_bit_ops.asciidoc index b516f1a23..766306ff4 100644 --- a/api/cl_khr_extended_bit_ops.asciidoc +++ b/api/cl_khr_extended_bit_ops.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_extended_bit_ops.txt[] === Description -`cl_khr_extended_bit_ops` adds built-in OpenCL C functions for performing +{cl_khr_extended_bit_ops_EXT} adds built-in OpenCL C functions for performing extended bit operations. Specifically, the following functions are added: diff --git a/api/cl_khr_extended_versioning.asciidoc b/api/cl_khr_extended_versioning.asciidoc index d67e61f0a..27f651b8b 100644 --- a/api/cl_khr_extended_versioning.asciidoc +++ b/api/cl_khr_extended_versioning.asciidoc @@ -17,7 +17,7 @@ include::{generated}/meta/{refprefix}cl_khr_extended_versioning.txt[] === Description -The `cl_khr_extended_versioning` extension introduces new platform and +The {cl_khr_extended_versioning_EXT} extension introduces new platform and device queries that return detailed version information to applications. It makes it possible to return the exact revision of the specification or intermediate languages supported by an implementation. @@ -131,7 +131,7 @@ there are no elements to return? . Should the queries for which the old-style query doesn't exist in a given OpenCL version be present (e.g. {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} prior to OpenCL 2.1 or - without support for `<>` or + without support for {cl_khr_il_program_EXT} or {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} on OpenCL 1.0)? + -- diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index dbe5e6a95..6da4455de 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -35,7 +35,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory.txt[] === Description -`cl_khr_external_memory` defines a generic mechanism to share buffer and +{cl_khr_external_memory_EXT} defines a generic mechanism to share buffer and image objects between OpenCL and many other APIs, including: * Optional properties to import external memory exported by other APIs @@ -90,7 +90,7 @@ TODO ==== Example for Creating a CL Buffer From an Exported External Buffer in a Single Device Context -This example also requires use of the `<>` +This example also requires use of the {cl_khr_external_memory_opaque_fd_EXT} extension. [source] @@ -124,7 +124,7 @@ cl_mem extMemBuffer = clCreateBufferWithProperties(/*context*/ clContex ==== Example for Creating a CL Image From an Exported External Image for Single Device Usage in a Multi-Device Context -This example also requires use of the `<>` +This example also requires use of the {cl_khr_external_memory_opaque_fd_EXT} extension. [source] diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc index be578969e..071aed992 100644 --- a/api/cl_khr_external_memory_dma_buf.asciidoc +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -7,7 +7,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] //@ 0.9.3 //@ *Extension and Version Dependencies*:: //@ This extension requires OpenCL 3.0. -//@ This extension requires the `<>` extension. +//@ This extension requires the {cl_khr_external_memory_EXT} extension. === Other Extension Metadata @@ -36,7 +36,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] === Description -`cl_khr_external_memory_dma_buf` extends +{cl_khr_external_memory_dma_buf_EXT} extends {cl_external_memory_handle_type_khr_TYPE} to support Linux `dma_buf` as an external memory handle type that may be specified when creating a buffer or image memory object. diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc index a0be41b1b..ab79949f3 100644 --- a/api/cl_khr_external_memory_dx.asciidoc +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -7,7 +7,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] //@ 0.9.3 //@ *Extension and Version Dependencies*:: //@ This extension requires OpenCL 3.0. -//@ This extension requires the `<>` extension. +//@ This extension requires the {cl_khr_external_memory_EXT} extension. === Other Extension Metadata @@ -36,7 +36,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] === Description -`cl_khr_external_memory_dx` extends +{cl_khr_external_memory_dx_EXT} extends {cl_external_memory_handle_type_khr_TYPE} to support Windows handles referring to Direct 3D resources as external memory handle types that may be specified when creating a buffer or image memory object. diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc index 208b2391b..3d7c059e3 100644 --- a/api/cl_khr_external_memory_opaque_fd.asciidoc +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -7,7 +7,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] //@ 0.9.3 //@ *Extension and Version Dependencies*:: //@ This extension requires OpenCL 3.0. -//@ This extension requires the `<>` extension. +//@ This extension requires the {cl_khr_external_memory_EXT} extension. === Other Extension Metadata @@ -36,7 +36,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] === Description -`cl_khr_external_memory_opaque_fd` extends +{cl_khr_external_memory_opaque_fd_EXT} extends {cl_external_memory_handle_type_khr_TYPE} to support a POSIX file descriptor handle as an external memory handle type that may be specified when creating a buffer or image memory object. diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index 65f3dc337..bfb03d853 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -7,7 +7,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] //@ 0.9.3 //@ *Extension and Version Dependencies*:: //@ This extension requires OpenCL 3.0. -//@ This extension requires the `<>` extension. +//@ This extension requires the {cl_khr_external_memory_EXT} extension. === Other Extension Metadata @@ -36,7 +36,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] === Description -`cl_khr_external_memory_win32` extends +{cl_khr_external_memory_win32_EXT} extends {cl_external_memory_handle_type_khr_TYPE} to support Windows handles as external memory handle types that may be specified when creating a buffer or image memory object. diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index 0671f544a..c5debcc7f 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -9,7 +9,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] 2021-09-10 *Interactions and External Dependencies*:: * This extension requires OpenCL 1.2. - * The `<>` extension is required as it defines semaphore + * The {cl_khr_semaphore_EXT} extension is required as it defines semaphore objects as well as for wait and signal operations on semaphores. * For OpenCL to be able to import external semaphores from other APIs using this extension, the other API is required to provide below @@ -44,7 +44,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] === Description -`cl_khr_semaphore` introduced semaphores as a new type along with a set of +{cl_khr_semaphore_EXT} introduced semaphores as a new type along with a set of APIs for create, release, retain, wait and signal operations on it. This extension defines APIs and mechanisms to share semaphores created in an external API by importing into and exporting from OpenCL. @@ -65,10 +65,10 @@ This extension defines: semaphores created from different handle types. * API query exportable semaphores handles using specified handle type. -The layered extensions `<>`, -`<>`, -`<>`, and -`<>` define specific external semaphores +The layered extensions {cl_khr_external_semaphore_dx_fence_EXT}, +{cl_khr_external_semaphore_opaque_fd_EXT}, +{cl_khr_external_semaphore_sync_fd_EXT}, and +{cl_khr_external_semaphore_win32_EXT} define specific external semaphores that may be imported into or exported from OpenCL. === New Types @@ -91,7 +91,7 @@ that may be imported into or exported from OpenCL. === Sample Code -The following examples use the `<>` +The following examples use the {cl_khr_external_semaphore_opaque_fd_EXT} extension to obtain an external semaphore. Similar code can be written using the other layered extensions. @@ -281,5 +281,5 @@ while (true) { * Revision 0.9.1, 2023-11-16 ** Added {CL_SEMAPHORE_EXPORTABLE_KHR}. * Revision 0.9.2, 2023-11-21 - ** Added re-import function call to `<>` + ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} diff --git a/api/cl_khr_external_semaphore_dx_fence.asciidoc b/api/cl_khr_external_semaphore_dx_fence.asciidoc index 1fd274587..e3cc259b3 100644 --- a/api/cl_khr_external_semaphore_dx_fence.asciidoc +++ b/api/cl_khr_external_semaphore_dx_fence.asciidoc @@ -30,9 +30,9 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_dx_fence.txt[] === Description -`cl_khr_external_semaphore_dx_fence` supports importing and exporting a +{cl_khr_external_semaphore_dx_fence_EXT} supports importing and exporting a D3D12 fence as an external semaphore using the APIs introduced by -`<>`. +{cl_khr_external_semaphore_EXT}. === New Enums diff --git a/api/cl_khr_external_semaphore_opaque_fd.asciidoc b/api/cl_khr_external_semaphore_opaque_fd.asciidoc index b74a93884..eb7cc4563 100644 --- a/api/cl_khr_external_semaphore_opaque_fd.asciidoc +++ b/api/cl_khr_external_semaphore_opaque_fd.asciidoc @@ -30,9 +30,9 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_opaque_fd.txt[] === Description -`cl_khr_external_semaphore_opaque_fd` supports importing and exporting a +{cl_khr_external_semaphore_opaque_fd_EXT} supports importing and exporting a restricted POSIX file descriptor as an external semaphore using the APIs -introduced by `<>`. +introduced by {cl_khr_external_semaphore_EXT}. === New Enums diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index 0368bdee3..4d7e88922 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -30,10 +30,10 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_sync_fd.txt[] === Description -`cl_khr_external_semaphore_sync_fd` supports importing and exporting a POSIX +{cl_khr_external_semaphore_sync_fd_EXT} supports importing and exporting a POSIX file descriptor handle to a Linux Sync File or Android Fence object as an external semaphore using the APIs introduced by -`<>`. +{cl_khr_external_semaphore_EXT}. === New Commands @@ -55,4 +55,4 @@ external semaphore using the APIs introduced by * Revision 0.9.1, 2023-11-16 ** Added {CL_SEMAPHORE_EXPORTABLE_KHR}. * Revision 0.9.2, 2023-11-21 - ** Added re-import function call to `<>` + ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} diff --git a/api/cl_khr_external_semaphore_win32.asciidoc b/api/cl_khr_external_semaphore_win32.asciidoc index e8b1a772a..725a59966 100644 --- a/api/cl_khr_external_semaphore_win32.asciidoc +++ b/api/cl_khr_external_semaphore_win32.asciidoc @@ -30,9 +30,9 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_win32.txt[] === Description -`cl_khr_external_semaphore_win32` supports importing and exporting an NT +{cl_khr_external_semaphore_win32_EXT} supports importing and exporting an NT handle or global share handle as an external semaphore using the APIs -introduced by `<>`. +introduced by {cl_khr_external_semaphore_EXT}. === New Enums diff --git a/api/cl_khr_fp16.asciidoc b/api/cl_khr_fp16.asciidoc index d2ebcd284..4b7feb139 100644 --- a/api/cl_khr_fp16.asciidoc +++ b/api/cl_khr_fp16.asciidoc @@ -17,7 +17,7 @@ include::{generated}/meta/{refprefix}cl_khr_fp16.txt[] === Description -`cl_khr_fp16` adds support to OpenCL C for half scalar and vector types as +{cl_khr_fp16_EXT} adds support to OpenCL C for half scalar and vector types as built-in types that can be used for arithmetic operations, conversions, etc. See the link:{OpenCLCSpecURL}#cl_khr_fp16[Half-Precision Floating-Point] diff --git a/api/cl_khr_fp64.asciidoc b/api/cl_khr_fp64.asciidoc index 9bb28c3a5..5006d8328 100644 --- a/api/cl_khr_fp64.asciidoc +++ b/api/cl_khr_fp64.asciidoc @@ -17,7 +17,7 @@ include::{generated}/meta/{refprefix}cl_khr_fp64.txt[] === Description -`cl_khr_fp64` adds support to OpenCL C for double-precision scalar and +{cl_khr_fp64_EXT} adds support to OpenCL C for double-precision scalar and vector types as built-in types that can be used for arithmetic operations, conversions, etc. diff --git a/api/cl_khr_gl_depth_images.asciidoc b/api/cl_khr_gl_depth_images.asciidoc index 470af9859..05258a612 100644 --- a/api/cl_khr_gl_depth_images.asciidoc +++ b/api/cl_khr_gl_depth_images.asciidoc @@ -14,8 +14,8 @@ include::{generated}/meta/{refprefix}cl_khr_gl_depth_images.txt[] === Description -`cl_khr_gl_depth_images` extends OpenCL / OpenGL sharing defined by the -`<>` extension to allow an OpenCL image to be created +{cl_khr_gl_depth_images_EXT} extends OpenCL / OpenGL sharing defined by the +{cl_khr_gl_sharing_EXT} extension to allow an OpenCL image to be created from an OpenGL depth or depth-stencil texture. Depth images with an image channel order of {CL_DEPTH_STENCIL} can only be diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc index 2ee22f486..1239e79ff 100644 --- a/api/cl_khr_gl_event.asciidoc +++ b/api/cl_khr_gl_event.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_gl_event.txt[] === Description -`cl_khr_gl_event` allows creating OpenCL event objects linked to OpenGL +{cl_khr_gl_event_EXT} allows creating OpenCL event objects linked to OpenGL fence sync objects, potentially improving efficiency of sharing images and buffers between the two APIs. The companion `GL_ARB_cl_event` extension provides the complementary diff --git a/api/cl_khr_gl_msaa_sharing.asciidoc b/api/cl_khr_gl_msaa_sharing.asciidoc index eb0fed1ee..64f4557cd 100644 --- a/api/cl_khr_gl_msaa_sharing.asciidoc +++ b/api/cl_khr_gl_msaa_sharing.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_gl_msaa_sharing.txt[] === Description -`cl_khr_gl_msaa_sharing` extends the `<>` extension to +{cl_khr_gl_msaa_sharing_EXT} extends the {cl_khr_gl_sharing_EXT} extension to allow a shared OpenCL/OpenGL image object to be created from an OpenGL multi-sampled ("`MSAA`") color or depth texture. @@ -22,7 +22,7 @@ This extension adds multi-sample support to {clCreateFromGLTexture} and {clGetGLTextureInfo}, and allows <>. -This extension requires `<>`. +This extension requires {cl_khr_gl_depth_images_EXT}. See the link:{OpenCLCSpecURL}#cl_khr_gl_msaa_sharing[cl_khr_gl_msaa_sharing] section of the OpenCL C specification for more information. diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc index 237d96511..779b94e37 100644 --- a/api/cl_khr_gl_sharing.asciidoc +++ b/api/cl_khr_gl_sharing.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_gl_sharing.txt[] === Description -The `cl_khr_gl_sharing` extension allows use of OpenGL buffer, texture, and +The {cl_khr_gl_sharing_EXT} extension allows use of OpenGL buffer, texture, and renderbuffer objects as OpenCL memory objects, referred to as "`Shared OpenCL/OpenGL Memory Objects`". @@ -217,7 +217,7 @@ binding layer API in use. + -- *RESOLVED*: These were not actual extensions, but the result of splitting -the `cl_khr_gl_sharing` extension language into two separate sections for +the {cl_khr_gl_sharing_EXT} extension language into two separate sections for publication. All extension language has now been integrated into the unified Specification and this distinction is not useful. diff --git a/api/cl_khr_global_int32_base_atomics.asciidoc b/api/cl_khr_global_int32_base_atomics.asciidoc index 36b331670..079a834c5 100644 --- a/api/cl_khr_global_int32_base_atomics.asciidoc +++ b/api/cl_khr_global_int32_base_atomics.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_global_int32_base_atomics.txt[] === Description -`cl_khr_global_int32_base_atomics` allows OpenCL C atomic operations to be +{cl_khr_global_int32_base_atomics_EXT} allows OpenCL C atomic operations to be performed on 32-bit signed and unsigned integers in global memory. This extension became a core feature in OpenCL 1.1, with the built-in atomic diff --git a/api/cl_khr_global_int32_extended_atomics.asciidoc b/api/cl_khr_global_int32_extended_atomics.asciidoc index e4fd74210..59831cb02 100644 --- a/api/cl_khr_global_int32_extended_atomics.asciidoc +++ b/api/cl_khr_global_int32_extended_atomics.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_global_int32_extended_atomics.txt[] === Description -`cl_khr_global_int32_extended_atomics` allows OpenCL C extended atomic +{cl_khr_global_int32_extended_atomics_EXT} allows OpenCL C extended atomic operations to be performed on 32-bit signed and unsigned integers in global memory. diff --git a/api/cl_khr_icd.asciidoc b/api/cl_khr_icd.asciidoc index fb99fb74f..9a4b46cc8 100644 --- a/api/cl_khr_icd.asciidoc +++ b/api/cl_khr_icd.asciidoc @@ -17,7 +17,7 @@ include::{generated}/meta/{refprefix}cl_khr_icd.txt[] === Description -`cl_khr_icd` describes a platform extension which defines a simple mechanism +{cl_khr_icd_EXT} describes a platform extension which defines a simple mechanism through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. @@ -225,7 +225,7 @@ ignore the library. Next the ICD Loader queries available ICD-enabled platforms in the library using {clIcdGetPlatformIDsKHR}. For each of these platforms, the ICD Loader queries the platform's extension -string to verify that `<>` is supported, then queries the +string to verify that {cl_khr_icd_EXT} is supported, then queries the platform's Vendor ICD extension suffix using {clGetPlatformInfo} with the value {CL_PLATFORM_ICD_SUFFIX_KHR}. diff --git a/api/cl_khr_il_program.asciidoc b/api/cl_khr_il_program.asciidoc index 472823546..b8d0c4846 100644 --- a/api/cl_khr_il_program.asciidoc +++ b/api/cl_khr_il_program.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_il_program.txt[] === Description -`cl_khr_il_program` adds the ability to create programs with intermediate +{cl_khr_il_program_EXT} adds the ability to create programs with intermediate language (IL), usually SPIR-V. Further information about the format and contents of SPIR-V may be found in the SPIR-V Specification. diff --git a/api/cl_khr_image2d_from_buffer.asciidoc b/api/cl_khr_image2d_from_buffer.asciidoc index cb3f29eb0..be7783c35 100644 --- a/api/cl_khr_image2d_from_buffer.asciidoc +++ b/api/cl_khr_image2d_from_buffer.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_image2d_from_buffer.txt[] === Description -`cl_khr_image2d_from_buffer` allows a 2D image to be created from an +{cl_khr_image2d_from_buffer_EXT} allows a 2D image to be created from an existing OpenCL buffer memory object. This extension became a core feature in OpenCL 2.0. diff --git a/api/cl_khr_initialize_memory.asciidoc b/api/cl_khr_initialize_memory.asciidoc index 8eddeedff..f2c0ce800 100644 --- a/api/cl_khr_initialize_memory.asciidoc +++ b/api/cl_khr_initialize_memory.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_initialize_memory.txt[] === Description -`cl_khr_initialize_memory` adds OpenCL C support for initializing local and +{cl_khr_initialize_memory_EXT} adds OpenCL C support for initializing local and private memory before a kernel begins execution. This is accomplished by specifying a flag at context creation time affecting all such memory. diff --git a/api/cl_khr_int64_base_atomics.asciidoc b/api/cl_khr_int64_base_atomics.asciidoc index 8723cab2c..ac688d915 100644 --- a/api/cl_khr_int64_base_atomics.asciidoc +++ b/api/cl_khr_int64_base_atomics.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_int64_base_atomics.txt[] === Description -`cl_khr_int64_base_atomics` adds built-in OpenCL functions supporting atomic +{cl_khr_int64_base_atomics_EXT} adds built-in OpenCL functions supporting atomic operations to be performed on 64-bit signed and unsigned integers in global and local memory. diff --git a/api/cl_khr_int64_extended_atomics.asciidoc b/api/cl_khr_int64_extended_atomics.asciidoc index f4fab1075..69416081c 100644 --- a/api/cl_khr_int64_extended_atomics.asciidoc +++ b/api/cl_khr_int64_extended_atomics.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_int64_extended_atomics.txt[] === Description -`cl_khr_int64_extended_atomics` adds built-in OpenCL functions supporting +{cl_khr_int64_extended_atomics_EXT} adds built-in OpenCL functions supporting extended atomic operations to be performed on 64-bit signed and unsigned integers in global and local memory. diff --git a/api/cl_khr_integer_dot_product.asciidoc b/api/cl_khr_integer_dot_product.asciidoc index 38377238d..257401f9d 100644 --- a/api/cl_khr_integer_dot_product.asciidoc +++ b/api/cl_khr_integer_dot_product.asciidoc @@ -20,11 +20,11 @@ include::{generated}/meta/{refprefix}cl_khr_integer_dot_product.txt[] === Description -`cl_khr_integer_dot_product` adds support for SPIR-V instructions and OpenCL +{cl_khr_integer_dot_product_EXT} adds support for SPIR-V instructions and OpenCL C built-in functions to compute the dot product of vectors of integers. OpenCL C compilers supporting this extension will define the extension macro -`cl_khr_integer_dot_product`, and may define corresponding feature macros +{cl_khr_integer_dot_product_EXT}, and may define corresponding feature macros {opencl_c_integer_dot_product_input_4x8bit} and {opencl_c_integer_dot_product_input_4x8bit_packed} depending on the reported capabilities. diff --git a/api/cl_khr_local_int32_base_atomics.asciidoc b/api/cl_khr_local_int32_base_atomics.asciidoc index 4fba21aa5..51300f0c5 100644 --- a/api/cl_khr_local_int32_base_atomics.asciidoc +++ b/api/cl_khr_local_int32_base_atomics.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_local_int32_base_atomics.txt[] === Description -`cl_khr_local_int32_base_atomics` allows OpenCL C atomic operations to be +{cl_khr_local_int32_base_atomics_EXT} allows OpenCL C atomic operations to be performed on 32-bit signed and unsigned integers in local memory. This extension became a core feature in OpenCL 1.1, with the built-in atomic diff --git a/api/cl_khr_local_int32_extended_atomics.asciidoc b/api/cl_khr_local_int32_extended_atomics.asciidoc index 05b5d0cab..917d2e26a 100644 --- a/api/cl_khr_local_int32_extended_atomics.asciidoc +++ b/api/cl_khr_local_int32_extended_atomics.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_local_int32_extended_atomics.txt[] === Description -`cl_khr_local_int32_extended_atomics` allows OpenCL C extended atomic +{cl_khr_local_int32_extended_atomics_EXT} allows OpenCL C extended atomic operations to be performed on 32-bit signed and unsigned integers in local memory. diff --git a/api/cl_khr_mipmap_image.asciidoc b/api/cl_khr_mipmap_image.asciidoc index ab2a7fe0f..a500ed507 100644 --- a/api/cl_khr_mipmap_image.asciidoc +++ b/api/cl_khr_mipmap_image.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_mipmap_image.txt[] === Description -The `cl_khr_mipmap_image` extension adds the ability to create and access +The {cl_khr_mipmap_image_EXT} extension adds the ability to create and access mipmapped images: * {clCreateImage} is extended to create mipmapped images. diff --git a/api/cl_khr_mipmap_image_writes.asciidoc b/api/cl_khr_mipmap_image_writes.asciidoc index 84278ce60..7051ca598 100644 --- a/api/cl_khr_mipmap_image_writes.asciidoc +++ b/api/cl_khr_mipmap_image_writes.asciidoc @@ -14,11 +14,11 @@ include::{generated}/meta/{refprefix}cl_khr_mipmap_image_writes.txt[] === Description -The `cl_khr_mipmap_image_writes` extension adds OpenCL C built-in functions +The {cl_khr_mipmap_image_writes_EXT} extension adds OpenCL C built-in functions to write to a mipmapped image. -If `cl_khr_mipmap_image_writes` is supported by the OpenCL device, the -`<>` extension must also be supported. +If {cl_khr_mipmap_image_writes_EXT} is supported by the OpenCL device, the +{cl_khr_mipmap_image_EXT} extension must also be supported. See the link:{OpenCLCSpecURL}#cl_khr_mipmap_image_writes[Mipmapped Image Writes] section of the OpenCL C specification for more information. diff --git a/api/cl_khr_pci_bus_info.asciidoc b/api/cl_khr_pci_bus_info.asciidoc index 0f5a87ed4..3477565b5 100644 --- a/api/cl_khr_pci_bus_info.asciidoc +++ b/api/cl_khr_pci_bus_info.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_pci_bus_info.txt[] === Description -The `cl_khr_pci_bus_info` extension adds a new query to obtain PCI bus +The {cl_khr_pci_bus_info_EXT} extension adds a new query to obtain PCI bus information about an OpenCL device. Not all OpenCL devices have PCI bus information, either due to the device diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc index d36c20831..1e7981dd3 100644 --- a/api/cl_khr_priority_hints.asciidoc +++ b/api/cl_khr_priority_hints.asciidoc @@ -14,14 +14,14 @@ include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] === Description -The `cl_khr_priority_hints` extension adds priority hints for OpenCL, but +The {cl_khr_priority_hints_EXT} extension adds priority hints for OpenCL, but does not specify the scheduling behavior or minimum guarantees. It is expected that the the user guides associated with each implementation which supports this extension will describe the scheduling behavior guarantees. Note that the priority hint is orthogonal to functionality defined in the -`<>` extension. +{cl_khr_throttle_hints_EXT} extension. For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) but should at the same time be executed at an optimized throttle setting ({CL_QUEUE_THROTTLE_LOW_KHR}). diff --git a/api/cl_khr_select_fprounding_mode.asciidoc b/api/cl_khr_select_fprounding_mode.asciidoc index c97814158..25a1fa4c3 100644 --- a/api/cl_khr_select_fprounding_mode.asciidoc +++ b/api/cl_khr_select_fprounding_mode.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_select_fprounding_mode.txt[] === Description -`cl_khr_select_fprounding_mode` allows an application to specify the +{cl_khr_select_fprounding_mode_EXT} allows an application to specify the rounding mode for an instruction or group of instructions in the OpenCL C program source. diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 1d3af4634..5328e51c3 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -39,7 +39,7 @@ work-submissions, they suffer from following limitations: * They are immutable. * They are not reusable. -`cl_khr_semaphore` introduces a new type of synchronization object to +{cl_khr_semaphore_EXT} introduces a new type of synchronization object to represent _semaphores_ that can be reused, waited on, and signaled multiple times by OpenCL work-submissions. diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc index 6776a9cc6..d794497d6 100644 --- a/api/cl_khr_spir.asciidoc +++ b/api/cl_khr_spir.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_spir.txt[] === Description -`cl_khr_spir` adds the ability to create an OpenCL program object from a +{cl_khr_spir_EXT} adds the ability to create an OpenCL program object from a Standard Portable Intermediate Representation (SPIR) instance. A SPIR instance is a vendor-neutral non-source representation for OpenCL C programs. @@ -22,8 +22,8 @@ programs. See the <> for information on compiling SPIR binaries. -`cl_khr_spir` has been superseded by the SPIR-V intermediate representation, -which is supported by the `<>` extension, and is a core +{cl_khr_spir_EXT} has been superseded by the SPIR-V intermediate representation, +which is supported by the {cl_khr_il_program_EXT} extension, and is a core feature in OpenCL 2.1. === New Enums diff --git a/api/cl_khr_srgb_image_writes.asciidoc b/api/cl_khr_srgb_image_writes.asciidoc index e12ccb362..95ca39656 100644 --- a/api/cl_khr_srgb_image_writes.asciidoc +++ b/api/cl_khr_srgb_image_writes.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_srgb_image_writes.txt[] === Description -`cl_khr_srgb_image_writes` enables OpenCL C kernels to write to sRGB images +{cl_khr_srgb_image_writes_EXT} enables OpenCL C kernels to write to sRGB images using the *write_imagef* built-in function. The sRGB image formats that may be written to will be returned by {clGetSupportedImageFormats}. diff --git a/api/cl_khr_subgroup_ballot.asciidoc b/api/cl_khr_subgroup_ballot.asciidoc index 0978f2d66..f0cc70d64 100644 --- a/api/cl_khr_subgroup_ballot.asciidoc +++ b/api/cl_khr_subgroup_ballot.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_ballot.txt[] === Description -`cl_khr_subgroup_ballot` adds built-in OpenCL C functions with the ability +{cl_khr_subgroup_ballot_EXT} adds built-in OpenCL C functions with the ability to collect and operate on ballots from work items in a sub-group. See the link:{OpenCLCSpecURL}#cl_khr_subgroup_ballot[Sub-Group Ballots] diff --git a/api/cl_khr_subgroup_clustered_reduce.asciidoc b/api/cl_khr_subgroup_clustered_reduce.asciidoc index a2d60ca89..a6b8b5c65 100644 --- a/api/cl_khr_subgroup_clustered_reduce.asciidoc +++ b/api/cl_khr_subgroup_clustered_reduce.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_clustered_reduce.txt[] === Description -`cl_khr_subgroup_clustered_reduce` adds built-in OpenCL functions for +{cl_khr_subgroup_clustered_reduce_EXT} adds built-in OpenCL functions for clustered reductions that operate on a subset of work items in the sub-group. diff --git a/api/cl_khr_subgroup_extended_types.asciidoc b/api/cl_khr_subgroup_extended_types.asciidoc index b5cfc7939..222b3db32 100644 --- a/api/cl_khr_subgroup_extended_types.asciidoc +++ b/api/cl_khr_subgroup_extended_types.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_extended_types.txt[] === Description -`cl_khr_subgroup_extended_types` adds additional supported OpenCL C data +{cl_khr_subgroup_extended_types_EXT} adds additional supported OpenCL C data types to the existing sub-group broadcast, scan, and reduction functions. See the link:{OpenCLCSpecURL}#cl_khr_subgroup_extended_types[Sub-Group diff --git a/api/cl_khr_subgroup_named_barrier.asciidoc b/api/cl_khr_subgroup_named_barrier.asciidoc index 07df0c863..d9dd17dbc 100644 --- a/api/cl_khr_subgroup_named_barrier.asciidoc +++ b/api/cl_khr_subgroup_named_barrier.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_named_barrier.txt[] === Description -`cl_khr_subgroup_named_barrier` adds barrier operations that cover subsets +{cl_khr_subgroup_named_barrier_EXT} adds barrier operations that cover subsets of an OpenCL work-group. Only the OpenCL API changes are described in this section. Please refer to the SPIR-V specification for information about using diff --git a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc index 3389abe08..a1bee7e11 100644 --- a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc +++ b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_arithmetic.txt[ === Description -`cl_khr_subgroup_non_uniform_arithmetic` adds built-in OpenCL C functions +{cl_khr_subgroup_non_uniform_arithmetic_EXT} adds built-in OpenCL C functions providing the ability to use some sub-group functions within non-uniform flow control, including additional scan and reduction operators. diff --git a/api/cl_khr_subgroup_non_uniform_vote.asciidoc b/api/cl_khr_subgroup_non_uniform_vote.asciidoc index f884c58bc..2188866ca 100644 --- a/api/cl_khr_subgroup_non_uniform_vote.asciidoc +++ b/api/cl_khr_subgroup_non_uniform_vote.asciidoc @@ -17,7 +17,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_vote.txt[] [[extended-sub-groups]] === Description -`cl_khr_subgroup_non_uniform_vote` adds built-in OpenCL C functions with the +{cl_khr_subgroup_non_uniform_vote_EXT} adds built-in OpenCL C functions with the ability to elect a single work item from a sub-group to perform a task and to hold votes among work items in a sub-group. diff --git a/api/cl_khr_subgroup_rotate.asciidoc b/api/cl_khr_subgroup_rotate.asciidoc index 3f9d04dff..afddda58f 100644 --- a/api/cl_khr_subgroup_rotate.asciidoc +++ b/api/cl_khr_subgroup_rotate.asciidoc @@ -19,7 +19,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_rotate.txt[] === Description -`cl_khr_subgroup_rotate` adds built-in OpenCL C functions with support for a +{cl_khr_subgroup_rotate_EXT} adds built-in OpenCL C functions with support for a new sub-group data exchange operation that makes it possible to rotate values through the work items in a sub-group. diff --git a/api/cl_khr_subgroup_shuffle.asciidoc b/api/cl_khr_subgroup_shuffle.asciidoc index f90bd70bc..44981e98d 100644 --- a/api/cl_khr_subgroup_shuffle.asciidoc +++ b/api/cl_khr_subgroup_shuffle.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle.txt[] === Description -`cl_khr_subgroup_shuffle` adds built-in OpenCL C functions providing +{cl_khr_subgroup_shuffle_EXT} adds built-in OpenCL C functions providing additional ways to exchange data among work items in a sub-group. See the link:{OpenCLCSpecURL}#cl_khr_subgroup_shuffle[General Purpose diff --git a/api/cl_khr_subgroup_shuffle_relative.asciidoc b/api/cl_khr_subgroup_shuffle_relative.asciidoc index 1419828e2..48f5f9723 100644 --- a/api/cl_khr_subgroup_shuffle_relative.asciidoc +++ b/api/cl_khr_subgroup_shuffle_relative.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle_relative.txt[] === Description -`cl_khr_subgroup_shuffle_relative` adds built-in OpenCL C functions +{cl_khr_subgroup_shuffle_relative_EXT} adds built-in OpenCL C functions providing specialized ways to exchange data among work items in a sub-group that may perform better on some implementations. diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc index 44edb39c7..a4f833747 100644 --- a/api/cl_khr_subgroups.asciidoc +++ b/api/cl_khr_subgroups.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_subgroups.txt[] === Description -`cl_khr_subgroups` adds support for implementation-controlled groups of work +{cl_khr_subgroups_EXT} adds support for implementation-controlled groups of work items, known as sub-groups. Sub-groups behave similarly to work-groups and have their own sets of built-ins and synchronization primitives. diff --git a/api/cl_khr_suggested_local_work_size.asciidoc b/api/cl_khr_suggested_local_work_size.asciidoc index 558979623..d5c3faab4 100644 --- a/api/cl_khr_suggested_local_work_size.asciidoc +++ b/api/cl_khr_suggested_local_work_size.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_suggested_local_work_size.txt[] === Description -`cl_khr_suggested_local_work_size` adds the ability to query a suggested +{cl_khr_suggested_local_work_size_EXT} adds the ability to query a suggested local work-group size for a kernel running on a device for a specified global work size and global work offset. The suggested local work-group size will match the work-group size that diff --git a/api/cl_khr_terminate_context.asciidoc b/api/cl_khr_terminate_context.asciidoc index 103f5f7ea..bc89dcc7e 100644 --- a/api/cl_khr_terminate_context.asciidoc +++ b/api/cl_khr_terminate_context.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_terminate_context.txt[] === Description -The `cl_khr_terminate_context` extension provides a new query to check +The {cl_khr_terminate_context_EXT} extension provides a new query to check whether a device can terminate an OpenCL context, and adds an API to terminate a context. diff --git a/api/cl_khr_throttle_hints.asciidoc b/api/cl_khr_throttle_hints.asciidoc index af11a1507..0de50159d 100644 --- a/api/cl_khr_throttle_hints.asciidoc +++ b/api/cl_khr_throttle_hints.asciidoc @@ -14,13 +14,13 @@ include::{generated}/meta/{refprefix}cl_khr_throttle_hints.txt[] === Description -The `cl_khr_throttle_hints` extension adds throttle hints for OpenCL, but +The {cl_khr_throttle_hints_EXT} extension adds throttle hints for OpenCL, but does not specify the throttling behavior or minimum guarantees. It is expected that the user guide associated with each implementation which supports this extension will describe the throttling behavior guarantees. Note that the throttle hint is orthogonal to functionality defined in -`<>` extension. +{cl_khr_priority_hints_EXT} extension. For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) but should at the same time be executed at an optimized throttle setting ({CL_QUEUE_THROTTLE_LOW_KHR}). diff --git a/api/cl_khr_work_group_uniform_arithmetic.asciidoc b/api/cl_khr_work_group_uniform_arithmetic.asciidoc index 5dfb1281b..6ff2252fd 100644 --- a/api/cl_khr_work_group_uniform_arithmetic.asciidoc +++ b/api/cl_khr_work_group_uniform_arithmetic.asciidoc @@ -15,7 +15,7 @@ include::{generated}/meta/{refprefix}cl_khr_work_group_uniform_arithmetic.txt[] === Description -`cl_khr_work_group_uniform_arithmetic` adds additional built-in work-group +{cl_khr_work_group_uniform_arithmetic_EXT} adds additional built-in work-group collective functions to OpenCL C. Specifically, this extension adds support for work-group scans and reductions for the following operators: diff --git a/api/dictionary.asciidoc b/api/dictionary.asciidoc index a75c7a6df..a714d04b5 100644 --- a/api/dictionary.asciidoc +++ b/api/dictionary.asciidoc @@ -2,3 +2,4 @@ // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/api/api-dictionary.asciidoc[] +include::{generated}/api/ext-dictionary-local-links.asciidoc[] diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index 26e1454f3..cc407d98f 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -107,7 +107,7 @@ Rather than attempt to share {cl_kernel_TYPE} objects among multiple host thread ] :fn-readimageh: pass:n[ \ -And *read_imageh*, if the `cl_khr_fp16` extension is supported. \ +And *read_imageh*, if the {cl_khr_fp16_EXT} extension is supported. \ ] :fn-reference-count-usage: pass:n[ \ diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index d45e84c89..2e559530c 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -998,9 +998,9 @@ concepts such as _resources_, _reference counts_, and _devices_. Sharing is accomplished by creating an OpenCL context via the context create parameters {CL_CONTEXT_D3D10_DEVICE_KHR} (for Direct3D 10, if the -`<>` extension is supported) or +{cl_khr_d3d10_sharing_EXT} extension is supported) or {CL_CONTEXT_D3D11_DEVICE_KHR} (for Direct3D 11, if the -`<>` extension is supported. +{cl_khr_d3d11_sharing_EXT} extension is supported. An OpenCL memory object created from a Direct3D resource remains valid as long as the corresponding Direct3D resource has not been deleted. @@ -1869,7 +1869,7 @@ function instance. ==== Sub-Group Functions NOTE: Sub-group functions are <> version 2.1. -Also see `<>`. +Also see {cl_khr_subgroups_EXT}. The OpenCL kernel execution model includes collective operations across the work-items within a single sub-group. diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 7c39cb51e..99a6653cd 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -36,13 +36,13 @@ include::{generated}/api/version-notes/clGetPlatformIDs.asciidoc[] {clGetPlatformIDs} returns {CL_SUCCESS} if the function is executed ifndef::cl_khr_icd[successfully.] ifdef::cl_khr_icd[] -and, if the `<>` extension is supported, there are a non-zero +and, if the {cl_khr_icd_EXT} extension is supported, there are a non-zero number of platforms available. endif::cl_khr_icd[] Otherwise, it returns one of the following errors: ifdef::cl_khr_icd[] - * {CL_PLATFORM_NOT_FOUND_KHR} if the `<>` extension is + * {CL_PLATFORM_NOT_FOUND_KHR} if the {cl_khr_icd_EXT} extension is supported and zero platforms are available. endif::cl_khr_icd[] * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _platforms_ is @@ -561,7 +561,7 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_WORK_GROUP_SIZE.asciidoc[] If double precision is not supported, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE} must return 0. - If the `<>` extension is not supported, + If the {cl_khr_fp16_EXT} extension is not supported, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF} must return 0. | {CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR_anchor} + {CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT_anchor} + @@ -582,7 +582,7 @@ include::{generated}/api/version-notes/CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR.asciid If double precision is not supported, {CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE} must return 0. - If the `<>` extension is not supported, + If the {cl_khr_fp16_EXT} extension is not supported, {CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF} must return 0. | {CL_DEVICE_MAX_CLOCK_FREQUENCY_anchor} @@ -671,7 +671,7 @@ endif::cl_khr_il_program[] value must be `""` (an empty string). ifdef::cl_khr_il_program[] - A device that supports the `<>` extension must + A device that supports the {cl_khr_il_program_EXT} extension must support the `"SPIR-V"` IL prefix. endif::cl_khr_il_program[] | {CL_DEVICE_ILS_WITH_VERSION_anchor} @@ -770,7 +770,7 @@ include::{generated}/api/version-notes/CL_DEVICE_IMAGE_PITCH_ALIGNMENT.asciidoc[ ifdef::cl_khr_image2d_from_buffer[] The equivalent {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR_anchor} may be used if -the `<>` extension is supported. +the {cl_khr_image2d_from_buffer_EXT} extension is supported. endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} | The row pitch alignment size in pixels for 2D images created from a @@ -787,7 +787,7 @@ include::{generated}/api/version-notes/CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT.as ifdef::cl_khr_image2d_from_buffer[] The equivalent {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR_anchor} may be used -if the `<>` extension is supported. +if the {cl_khr_image2d_from_buffer_EXT} extension is supported. endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} | This query specifies the minimum alignment in pixels of the host_ptr @@ -888,7 +888,7 @@ include::{generated}/api/version-notes/CL_DEVICE_SINGLE_FP_CONFIG.asciidoc[] include::{generated}/api/version-notes/CL_DEVICE_DOUBLE_FP_CONFIG.asciidoc[] -Also see `<>`. +Also see {cl_khr_fp64_EXT}. | {cl_device_fp_config_TYPE} | Describes double precision floating-point capability of the OpenCL device. @@ -1360,26 +1360,26 @@ include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS.asciidoc[] The following Khronos extension names must be returned by all devices that support OpenCL 1.1: - `<>` + - `<>` + - `<>` + - `<>` + - `<>` + {cl_khr_fp64_EXT} Additionally, the following Khronos extension names must be returned by all devices that support OpenCL 2.0, OpenCL 2.1, or OpenCL 2.2. For devices that support OpenCL 3.0, these extension names must be returned when and only when the optional feature is supported: - `<>` + - `<>` + - `<>` + {cl_khr_3d_image_writes_EXT} + + {cl_khr_depth_images_EXT} + + {cl_khr_image2d_from_buffer_EXT} Please refer to the OpenCL Extension Specification or vendor provided documentation for a detailed description of these extensions. @@ -1577,7 +1577,7 @@ include::{generated}/api/version-notes/CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_P sub-groups, {CL_FALSE} otherwise. This query must return {CL_TRUE} for devices that support the - `<>` extension, and must return {CL_FALSE} for + {cl_khr_subgroups_EXT} extension, and must return {CL_FALSE} for devices that do not support sub-groups. | {CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES_anchor} @@ -1944,7 +1944,7 @@ include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIE {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR_anchor} is always set, indicating that all implementations that support - `<>` must support dot product built-in + {cl_khr_integer_dot_product_EXT} must support dot product built-in functions and, when SPIR-V is supported, SPIR-V instructions that take four-component vectors of 8-bit integers packed into 32-bit integers as input. @@ -3117,7 +3117,7 @@ returned in _errcode_ret_: required by the OpenCL implementation on the host. ifdef::cl_khr_dx9_media_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_dx9_media_sharing_EXT} extension is supported: * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the @@ -3129,7 +3129,7 @@ extension is supported: endif::cl_khr_dx9_media_sharing[] ifdef::cl_khr_d3d10_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d10_sharing_EXT} extension is supported: * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property @@ -3142,7 +3142,7 @@ extension is supported: endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d11_sharing_EXT} extension is supported: * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property @@ -3155,7 +3155,7 @@ extension is supported: endif::cl_khr_d3d11_sharing[] ifdef::cl_khr_gl_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_gl_sharing_EXT} extension is supported: * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for @@ -3186,15 +3186,15 @@ extension is supported: ** Any of the devices specified in the _devices_ argument cannot support OpenCL objects which share the data store of an OpenGL object. * {CL_INVALID_PROPERTY} if both {CL_CONTEXT_INTEROP_USER_SYNC}, and any of - the properties defined by the `<>` extension are + the properties defined by the {cl_khr_gl_sharing_EXT} extension are defined in _properties_. endif::cl_khr_gl_sharing[] ifdef::cl_khr_terminate_context[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_terminate_context_EXT} extension is supported: - * {CL_INVALID_PROPERTY} if the `<>` extension is + * {CL_INVALID_PROPERTY} if the {cl_khr_terminate_context_EXT} extension is supported and {CL_CONTEXT_TERMINATE_KHR} is set to {CL_TRUE} in _properties_, but not all of the devices associated with the context support the ability to support context termination (i.e. @@ -3272,7 +3272,7 @@ returned in _errcode_ret_: required by the OpenCL implementation on the host. ifdef::cl_khr_dx9_media_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_dx9_media_sharing_EXT} extension is supported: * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the @@ -3284,7 +3284,7 @@ extension is supported: endif::cl_khr_dx9_media_sharing[] ifdef::cl_khr_d3d10_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d10_sharing_EXT} extension is supported: * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property @@ -3297,7 +3297,7 @@ extension is supported: endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d11_sharing_EXT} extension is supported: * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property @@ -3310,7 +3310,7 @@ extension is supported: endif::cl_khr_d3d11_sharing[] ifdef::cl_khr_gl_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_gl_sharing_EXT} extension is supported: * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for @@ -3341,7 +3341,7 @@ extension is supported: ** Any of the devices specified in the _devices_ argument cannot support OpenCL objects which share the data store of an OpenGL object. * {CL_INVALID_PROPERTY} if both {CL_CONTEXT_INTEROP_USER_SYNC}, and any of - the properties defined by the `<>` extension are + the properties defined by the {cl_khr_gl_sharing_EXT} extension are defined in _properties_. endif::cl_khr_gl_sharing[] diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index b349d587a..27788730e 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -70,7 +70,7 @@ include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES.asciidoc[] | {cl_command_queue_properties_TYPE} ifdef::cl_khr_create_command_queue[] -or {cl_bitfield_TYPE} if the `<>` extension is supported +or {cl_bitfield_TYPE} if the {cl_khr_create_command_queue_EXT} extension is supported endif::cl_khr_create_command_queue[] | This is a bitfield and can be set to a combination of the following values: @@ -188,12 +188,12 @@ returned in _errcode_ret_: * {CL_INVALID_QUEUE_PROPERTIES} if values specified in _properties_ are valid but are not supported by the device. ifdef::cl_khr_priority_hints[] - * {CL_INVALID_QUEUE_PROPERTIES} if the `<>` + * {CL_INVALID_QUEUE_PROPERTIES} if the {cl_khr_priority_hints_EXT} extension is supported, the {CL_QUEUE_PRIORITY_KHR} property is specified, and the queue is a {CL_QUEUE_ON_DEVICE}. endif::cl_khr_priority_hints[] ifdef::cl_khr_throttle_hints[] - * {CL_INVALID_QUEUE_PROPERTIES} if the `<>` + * {CL_INVALID_QUEUE_PROPERTIES} if the {cl_khr_throttle_hints_EXT} extension is supported, the {CL_QUEUE_THROTTLE_KHR} property is specified, and the queue is a {CL_QUEUE_ON_DEVICE}. endif::cl_khr_throttle_hints[] @@ -2351,7 +2351,7 @@ include::{generated}/api/version-notes/CL_R.asciidoc[] include::{generated}/api/version-notes/CL_DEPTH.asciidoc[] ifdef::cl_khr_depth_images[] -Also supported if the `<>` extension is supported. +Also supported if the {cl_khr_depth_images_EXT} extension is supported. endif::cl_khr_depth_images[] | A single channel image format where the single channel represents a `DEPTH` component. | {CL_LUMINANCE_anchor} @@ -2450,7 +2450,7 @@ include::{generated}/api/version-notes/CL_UNORM_INT8.asciidoc[] include::{generated}/api/version-notes/CL_UNORM_INT16.asciidoc[] ifdef::cl_khr_depth_images[] -Also supported if the `<>` extension is supported. +Also supported if the {cl_khr_depth_images_EXT} extension is supported. endif::cl_khr_depth_images[] | Each channel component is a normalized unsigned 16-bit integer value | {CL_UNORM_SHORT_565_anchor} @@ -2514,7 +2514,7 @@ include::{generated}/api/version-notes/CL_HALF_FLOAT.asciidoc[] include::{generated}/api/version-notes/CL_FLOAT.asciidoc[] ifdef::cl_khr_depth_images[] -Also supported if the `<>` extension is supported. +Also supported if the {cl_khr_depth_images_EXT} extension is supported. endif::cl_khr_depth_images[] | Each channel component is a single precision floating-point value |==== @@ -2656,7 +2656,7 @@ endif::cl_khr_external_memory[] * _num_mip_levels_ must be ifndef::cl_khr_mipmap_image[0.] ifdef::cl_khr_mipmap_image[] - 0 unless the `<>` extension is supported, in which + 0 unless the {cl_khr_mipmap_image_EXT} extension is supported, in which case it must be a value greater than 1 specifying the number of mipmap levels in the image. endif::cl_khr_mipmap_image[] @@ -2871,7 +2871,7 @@ is: | {CL_DEPTH} footnote:[{fn-depth-image-requirements}] ifdef::cl_khr_depth_images[] -Also supported if the `<>` extension is supported. +Also supported if the {cl_khr_depth_images_EXT} extension is supported. endif::cl_khr_depth_images[] | {CL_UNORM_INT16} + {CL_FLOAT} @@ -3037,7 +3037,7 @@ image formats in that API and the corresponding OpenCL image format. ifdef::cl_khr_dx9_media_sharing[] ==== Image Formats for DirectX 9 Media Surface Sharing -When the `<>` extension is supported, image +When the {cl_khr_dx9_media_sharing_EXT} extension is supported, image objects sharing storage with Direct3D 9 surfaces can be created. This section describes the Direct3D 9 surface formats that are supported when the adapter type is one of the Direct 3D lineage. @@ -3109,7 +3109,7 @@ endif::cl_khr_dx9_media_sharing[] ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] ==== Image Formats for Direct3D Texture Sharing -When the `<>` or `<>` extensions +When the {cl_khr_d3d10_sharing_EXT} or {cl_khr_d3d11_sharing_EXT} extensions are supported, image objects sharing storage with Direct3D 10 and Direct3D 11 textures, respectively, can be created. The <>` extension is supported, image objects +When the {cl_khr_gl_sharing_EXT} extension is supported, image objects sharing storage with OpenGL texture and renderbuffer objects can be created. The <> table describes the supported OpenGL image @@ -3412,7 +3412,7 @@ Otherwise, it returns one of the following errors: * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. ifdef::cl_khr_mipmap_image[] - * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + * {CL_INVALID_MIP_LEVEL} if the {cl_khr_mipmap_image_EXT} extension is supported, and the mip level specified in _origin_ is not a valid level for _image_, endif::cl_khr_mipmap_image[] @@ -3585,7 +3585,7 @@ Otherwise, it returns one of the following errors: * {CL_MEM_COPY_OVERLAP} if _src_image_ and _dst_image_ are the same image object and the source and destination regions overlap. ifdef::cl_khr_mipmap_image[] - * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + * {CL_INVALID_MIP_LEVEL} if the {cl_khr_mipmap_image_EXT} extension is supported, and the mip level specified in _src_origin_ or _dst_origin_ is not a valid level for the corresponding _src_image_ or _dst_image_, endif::cl_khr_mipmap_image[] @@ -3706,7 +3706,7 @@ Otherwise, it returns one of the following errors: * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. ifdef::cl_khr_mipmap_image[] - * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + * {CL_INVALID_MIP_LEVEL} if the {cl_khr_mipmap_image_EXT} extension is supported, and the mip level specified in _origin_ is not a valid level for _image_, endif::cl_khr_mipmap_image[] @@ -3833,7 +3833,7 @@ Otherwise, it returns one of the following errors: * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. ifdef::cl_khr_mipmap_image[] - * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + * {CL_INVALID_MIP_LEVEL} if the {cl_khr_mipmap_image_EXT} extension is supported, and the mip level specified in _src_origin_ is not a valid level for _src_image_, endif::cl_khr_mipmap_image[] @@ -3960,7 +3960,7 @@ Otherwise, it returns one of the following errors: * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. ifdef::cl_khr_mipmap_image[] - * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + * {CL_INVALID_MIP_LEVEL} if the {cl_khr_mipmap_image_EXT} extension is supported, and the mip level specified in _dst_origin_ is not a valid level for _dst_image_, endif::cl_khr_mipmap_image[] @@ -4136,7 +4136,7 @@ ifdef::cl_khr_mipmap_image[] [[image-mipmap-access]] === Specifying Mipmap Levels to Image Operations -When the `<>` extension is supported, the +When the {cl_khr_mipmap_image_EXT} extension is supported, the {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueMapImage}, {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage}, and {clEnqueueFillImage} functions described @@ -4322,7 +4322,7 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. ifdef::cl_khr_dx9_media_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_dx9_media_sharing_EXT} extension is supported: * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is @@ -4331,7 +4331,7 @@ extension is supported: endif::cl_khr_dx9_media_sharing[] ifdef::cl_khr_d3d10_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d10_sharing_EXT} extension is supported: * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is @@ -4341,7 +4341,7 @@ extension is supported: endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d11_sharing_EXT} extension is supported: * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is @@ -4763,7 +4763,7 @@ CL_MEM_READ_WRITE. If OpenCL 1.2 is supported, _flags_ also accepts {CL_MEM_HOST_WRITE_ONLY}, {CL_MEM_HOST_READ_ONLY}, and {CL_MEM_HOST_NO_ACCESS}. -`<>` only requires support for {CL_MEM_READ_ONLY}, and for +{cl_khr_egl_image_EXT} only requires support for {CL_MEM_READ_ONLY}, and for CL_MEM_HOST_NO_ACCESS if OpenCL 1.2 or later is supported. For OpenCL 1.1, a {CL_INVALID_OPERATION} will be returned for images which do not support host mapping. @@ -4831,7 +4831,7 @@ ifdef::cl_khr_gl_msaa_sharing[] `GL_TEXTURE_2D_MULTISAMPLE` and `GL_TEXTURE_2D_MULTISAMPLE_ARRAY` may be specified if an OpenGL implementation supporting multi-sample two-dimensional textures is supported, and the - `<>` extension is supported. + {cl_khr_gl_msaa_sharing_EXT} extension is supported. Refer to the <> section for more information on multi-sample images. endif::cl_khr_gl_msaa_sharing[] @@ -4871,7 +4871,7 @@ endif::cl_khr_gl_msaa_sharing[] * an OpenCL 3D image object from an OpenGL 3D texture object. ifdef::cl_khr_mipmap_image[] -If both the `<>` and `<>` extensions +If both the {cl_khr_mipmap_image_EXT} and {cl_khr_gl_sharing_EXT} extensions are supported by the OpenCL device, {clCreateFromGLTexture} may also be used to create a mipmapped OpenCL image from a mipmapped OpenGL texture by specify a negative value for _miplevel_. @@ -4944,7 +4944,7 @@ ifdef::cl_khr_gl_depth_images[] Depth images with an image channel order of {CL_DEPTH_STENCIL} can only be created using the {clCreateFromGLTexture} API, and only when the -`<>` extension is supported. +{cl_khr_gl_depth_images_EXT} extension is supported. For the image format given by channel order of {CL_DEPTH_STENCIL} and channel data type of {CL_UNORM_INT24}, the depth is stored as an unsigned @@ -5563,7 +5563,7 @@ operation must create a distinct memory object. ==== File Descriptor Handle Types ifdef::cl_khr_external_memory_opaque_fd[] -The `<>` extension extends +The {cl_khr_external_memory_opaque_fd_EXT} extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: @@ -5580,7 +5580,7 @@ buffer or an image memory object from an external handle: endif::cl_khr_external_memory_opaque_fd[] ifdef::cl_khr_external_memory_dma_buf[] -The `<>` extension extends +The {cl_khr_external_memory_dma_buf_EXT} extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: @@ -5602,7 +5602,7 @@ The imported memory object holds a reference to its payload. ==== NT Handle Types ifdef::cl_khr_external_memory_dx[] -The `<>` extension extends +The {cl_khr_external_memory_dx_EXT} extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: @@ -5628,7 +5628,7 @@ buffer or an image memory object from an external handle: endif::cl_khr_external_memory_dx[] ifdef::cl_khr_external_memory_win32[] -The `<>` extension extends +The {cl_khr_external_memory_win32_EXT} extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: @@ -6142,7 +6142,7 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. ifdef::cl_khr_dx9_media_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_dx9_media_sharing_EXT} extension is supported: * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is @@ -6151,7 +6151,7 @@ extension is supported: endif::cl_khr_dx9_media_sharing[] ifdef::cl_khr_d3d10_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d10_sharing_EXT} extension is supported: * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is @@ -6161,7 +6161,7 @@ extension is supported: endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -The following errors may be returned if the `<>` +The following errors may be returned if the {cl_khr_d3d11_sharing_EXT} extension is supported: * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is @@ -7144,9 +7144,9 @@ Otherwise, it returns one of the following errors: ifdef::cl_khr_egl_image,cl_khr_gl_sharing[] ==== Synchronizing Access to Memory Objects Shared With EGL or OpenGL -When sharing objects such as EGL images (if the `<>` +When sharing objects such as EGL images (if the {cl_khr_egl_image_EXT} extension is supported) or OpenGL buffers, textures, and renderbuffers (if -the `<>` extension is supported), in order to ensure data +the {cl_khr_gl_sharing_EXT} extension is supported), in order to ensure data integrity, the application is responsible for synchronizing access to shared memory objects through the other API with which such objects are shared. @@ -7164,14 +7164,14 @@ extensions which may be used to synchronize with other APIs: ifdef::cl_khr_egl_image[] ===== Synchronization With EGL and EGL Client APIs -When sharing with an EGL context via the `<>` extension, -if the `<>` extension is supported, and the EGL context in +When sharing with an EGL context via the {cl_khr_egl_image_EXT} extension, +if the {cl_khr_egl_event_EXT} extension is supported, and the EGL context in question supports fence sync objects, _explicit synchronization_ with EGL or EGL client APIs can be achieved as described in the <> section. -If the `<>` extension is not supported, completion of EGL +If the {cl_khr_egl_event_EXT} extension is not supported, completion of EGL client API commands may be determined by issuing and waiting for completion of commands such as `glFinish` or `vgFinish` on all client API contexts with pending references to these objects. @@ -7181,18 +7181,18 @@ endif::cl_khr_egl_image[] ifdef::cl_khr_gl_sharing[] ===== Synchronization With OpenGL -When sharing with an OpenGL context via the `<>` +When sharing with an OpenGL context via the {cl_khr_gl_sharing_EXT} extension, the OpenCL implementation will ensure that any such pending OpenGL operations are complete for an OpenGL context bound to the same thread as the OpenCL context. This is referred to as _implicit synchronization_. -If the `<>` extension is supported, and the OpenGL context +If the {cl_khr_gl_event_EXT} extension is supported, and the OpenGL context in question supports fence sync objects, _explicit synchronization_ with OpenGL can be achieved as described in the <> section. -If the `<>` extension is not supported, completion of +If the {cl_khr_gl_event_EXT} extension is not supported, completion of OpenGL commands may be determined by issuing and waiting for completion of a `glFinish` command on all OpenGL contexts with pending references to these objects. @@ -7209,7 +7209,7 @@ Note that no synchronization method other than `glFinish` is portable between all OpenGL implementations and all OpenCL implementations. While this is the only way to ensure completion that is portable to all platforms, `glFinish` is an expensive operation and its use should be -avoided if the `<>` or `<>` extensions +avoided if the {cl_khr_egl_event_EXT} or {cl_khr_gl_event_EXT} extensions are supported on a platform. @@ -8143,7 +8143,7 @@ endif::cl_khr_mipmap_image[] |==== ifdef::cl_khr_mipmap_image[] -NOTE: When the `<>` extension is supported, the sampler +NOTE: When the {cl_khr_mipmap_image_EXT} extension is supported, the sampler properties {CL_SAMPLER_MIP_FILTER_MODE_KHR}, {CL_SAMPLER_LOD_MIN_KHR} and {CL_SAMPLER_LOD_MAX_KHR} cannot be specified with any samplers initialized in the OpenCL program source. @@ -8551,7 +8551,7 @@ The cached executables can be read and loaded by the application, which can help significantly reduce the application initialization time. ifdef::cl_khr_spir[] -If the `<>` extension is supported, {clCreateProgramWithBinary} +If the {cl_khr_spir_EXT} extension is supported, {clCreateProgramWithBinary} can be used to load a SPIR binary. Once a program object has been created from a SPIR binary, {clBuildProgram} can be called to build a program executable or {clCompileProgram} can be @@ -9085,7 +9085,7 @@ Otherwise, it returns one of the following errors: ifdef::cl_khr_il_program[or {clCreateProgramWithILKHR}] ifdef::cl_khr_spir[] ** {clCreateProgramWithBinary} where `-x spir` is present in _options_, - if the `<>` extension is supported. + if the {cl_khr_spir_EXT} extension is supported. endif::cl_khr_spir[] ** {clCreateProgramWithSource} * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required @@ -9571,7 +9571,7 @@ ifdef::cl_khr_spir[] [[spir-compilation-options]] ==== SPIR Compilation Options -If the `<>` extension is supported, the compile option +If the {cl_khr_spir_EXT} extension is supported, the compile option `-x spir` @@ -10254,7 +10254,7 @@ If the argument is of type _queue_t_, the _arg_value_ entry must be a pointer to the device queue object. ifdef::cl_khr_gl_msaa_sharing[] -If the `<>` extension is supported, then: +If the {cl_khr_gl_msaa_sharing_EXT} extension is supported, then: If the argument is a multi-sample 2D image, the _arg_value_ entry must be a pointer to a multi-sample image object. If the argument is a multi-sample 2D depth image, the _arg_value_ entry must @@ -10836,7 +10836,7 @@ To return information about a kernel object, call the function include::{generated}/api/protos/clGetKernelSubGroupInfo.txt[] include::{generated}/api/version-notes/clGetKernelSubGroupInfo.asciidoc[] -Also see `<>`. +Also see {cl_khr_subgroups_EXT}. * _kernel_ specifies the kernel object being queried. * _device_ identifies a specific device in the list of devices associated with @@ -10877,7 +10877,7 @@ include::{generated}/api/version-notes/CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE. ifdef::cl_khr_subgroups[] The equivalent {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR_anchor} may be used if -the `<>` extension is supported. +the {cl_khr_subgroups_EXT} extension is supported. endif::cl_khr_subgroups[] | {size_t_TYPE}* | {size_t_TYPE} @@ -10897,7 +10897,7 @@ include::{generated}/api/version-notes/CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE.asc ifdef::cl_khr_subgroups[] The equivalent {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR_anchor} may be used if -the `<>` extension is supported. +the {cl_khr_subgroups_EXT} extension is supported. endif::cl_khr_subgroups[] | {size_t_TYPE}* | {size_t_TYPE} @@ -10916,7 +10916,7 @@ endif::cl_khr_subgroups[] include::{generated}/api/version-notes/CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT.asciidoc[] -Also see `<>`. +Also see {cl_khr_subgroups_EXT}. | {size_t_TYPE} | {size_t_TYPE}[] | Returns the local size that will generate the requested number @@ -10937,7 +10937,7 @@ Also see `<>`. include::{generated}/api/version-notes/CL_KERNEL_MAX_NUM_SUB_GROUPS.asciidoc[] -Also see `<>`. +Also see {cl_khr_subgroups_EXT}. | ignored | {size_t_TYPE} | This provides a mechanism for the application to query the @@ -10953,7 +10953,7 @@ Also see `<>`. include::{generated}/api/version-notes/CL_KERNEL_COMPILE_NUM_SUB_GROUPS.asciidoc[] -Also see `<>`. +Also see {cl_khr_subgroups_EXT}. | ignored | {size_t_TYPE} | Returns the number of sub-groups per work-group specified in the kernel @@ -11020,7 +11020,7 @@ ifdef::cl_khr_spir[] * is created with {clCreateProgramWithBinary} and the program executable is built with the `-cl-kernel-arg-info` and `-x spir` options specified in the _options_ argument to {clBuildProgram} or {clCompileProgram}, if - the `<>` extension is supported; or, + the {cl_khr_spir_EXT} extension is supported; or, endif::cl_khr_spir[] * is created with {clCreateProgramWithSource} and the program executable is built with the `-cl-kernel-arg-info option` specified in the @@ -11835,7 +11835,7 @@ include::{generated}/api/version-notes/CL_EVENT_COMMAND_QUEUE.asciidoc[] For user event objects, a `NULL` value is returned. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues, `NULL` is returned. endif::cl_khr_command_buffer_multi_device[] @@ -11876,7 +11876,7 @@ include::{generated}/api/version-notes/CL_EVENT_COMMAND_EXECUTION_STATUS.asciido *errcode_ret* values. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues the semantics of execution status is as follows: @@ -12361,7 +12361,7 @@ include::{generated}/api/protos/clCreateEventFromEGLSyncKHR.txt[] include::{generated}/api/version-notes/clCreateEventFromEGLSyncKHR.asciidoc[] * _context_ is a valid OpenCL context created from an OpenGL context or - share group, using the `<>` extension. + share group, using the {cl_khr_gl_sharing_EXT} extension. * _sync_ is the name of a sync object of type `EGL_SYNC_FENCE_KHR` created with respect to `EGLDisplay` _display_. * _display_ is the `EGLDisplay` handle. @@ -12415,7 +12415,7 @@ returned in _errcode_ret_: [[explicit-sync-using-egl-fences]] ===== Explicit Synchronization Using EGL Fence Sync Objects -If the `<>` extension is supported, event objects created +If the {cl_khr_egl_event_EXT} extension is supported, event objects created with {clCreateEventFromEGLsyncKHR} provide another method of coordinating sharing between EGL / EGL client API objects, and OpenCL. @@ -12452,7 +12452,7 @@ include::{generated}/api/protos/clCreateEventFromGLsyncKHR.txt[] include::{generated}/api/version-notes/clCreateEventFromGLsyncKHR.asciidoc[] * _context_ is a valid OpenCL context created from an OpenGL context or - share group, using the `<>` extension. + share group, using the {cl_khr_gl_sharing_EXT} extension. * _sync_ is the name of a sync object in the GL share group associated with _context_. * _errcode_ret_ will return an appropriate error code. @@ -12505,7 +12505,7 @@ a {CL_INVALID_EVENT} error. [[explicit-sync-using-opengl-fences]] ===== Explicit Synchronization Using OpenGL Fence Sync Objects -If the `<>` extension is supported, event objects created +If the {cl_khr_gl_event_EXT} extension is supported, event objects created with {clCreateEventFromGLsyncKHR} provide another method of coordinating sharing of buffers and images between OpenGL and OpenCL. @@ -12766,7 +12766,7 @@ ifdef::cl_khr_semaphore[] == Semaphores This section describes the semaphore types and functions defined by the -`<>` extension. +{cl_khr_semaphore_EXT} extension. === Semaphore Types @@ -12783,7 +12783,7 @@ This section describes the semaphore types and functions defined by the * {cl_semaphore_info_khr_TYPE} represents queries for additional information about semaphores. ** All enums described in the "`New API Enums`" section of the - `<>` extension for {cl_semaphore_info_khr_TYPE} must + {cl_khr_semaphore_EXT} extension for {cl_semaphore_info_khr_TYPE} must be supported. * {cl_semaphore_payload_khr_TYPE} represents payload values of semaphores. * {cl_semaphore_khr_TYPE} represent semaphore objects. @@ -13015,7 +13015,7 @@ ifdef::cl_khr_external_semaphore_opaque_fd,cl_khr_external_semaphore_sync_fd[] ifdef::cl_khr_external_semaphore_opaque_fd[] -The `<>` extension extends +The {cl_khr_external_semaphore_opaque_fd_EXT} extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: @@ -13034,7 +13034,7 @@ endif::cl_khr_external_semaphore_opaque_fd[] ifdef::cl_khr_external_semaphore_sync_fd[] -The `<>` extension extends +The {cl_khr_external_semaphore_sync_fd_EXT} extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: @@ -13138,7 +13138,7 @@ ifdef::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] ifdef::cl_khr_external_semaphore_dx_fence[] -The `<>` extension extends +The {cl_khr_external_semaphore_dx_fence_EXT} extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: @@ -13170,7 +13170,7 @@ endif::cl_khr_external_semaphore_dx_fence[] ifdef::cl_khr_external_semaphore_win32[] -The `<>` extension extends +The {cl_khr_external_semaphore_win32_EXT} extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: @@ -13674,7 +13674,7 @@ include::{generated}/api/version-notes/CL_PROFILING_COMMAND_QUEUED.asciidoc[] command-queue by the host. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues, the host time when the command-buffer has been enqueued across the command-queues is used. @@ -13690,7 +13690,7 @@ include::{generated}/api/version-notes/CL_PROFILING_COMMAND_SUBMIT.asciidoc[] command-queue. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues, the host time is used when command-buffer commands have been submitted to any command-queue. @@ -13705,7 +13705,7 @@ include::{generated}/api/version-notes/CL_PROFILING_COMMAND_START.asciidoc[] the device. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues, the host time is used when any device starts executing a command-buffer command. @@ -13720,7 +13720,7 @@ include::{generated}/api/version-notes/CL_PROFILING_COMMAND_END.asciidoc[] execution on the device. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues, the host time is used when the last command-buffer command finishes execution on any device. @@ -13736,7 +13736,7 @@ include::{generated}/api/version-notes/CL_PROFILING_COMMAND_COMPLETE.asciidoc[] execution. ifdef::cl_khr_command_buffer_multi_device[] - If the `<>` extension is supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, for events returned by a command-buffer enqueue operation to multiple command-queues, the host time is used when the command-buffer has completed execution across all command-queues. @@ -13756,7 +13756,7 @@ incremented. ifdef::cl_khr_command_buffer_multi_device[] [NOTE] ==== -If the `<>` extension is supported, and +If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, and if no reliable device timer sources are available to inform the host side, or parallel runtime scheduling makes it impossible to identify a first/last command, then an implementation may fallback to reporting @@ -13780,7 +13780,7 @@ Otherwise, it returns one of the following errors: {clEnqueueSvmFree}. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, and if _event_ was created from a call to {clEnqueueCommandBufferKHR}, {CL_PROFILING_INFO_NOT_AVAILABLE} is returned if all the queues passed do not have {CL_QUEUE_PROFILING_ENABLE} set. @@ -13932,7 +13932,7 @@ Undefined behavior may result from the failure to follow this usage requirement for all the command-buffers an object is used as a kernel argument in. -If using layered extension `<>`, +If using layered extension {cl_khr_command_buffer_mutable_dispatch_EXT}, <>. ==== @@ -13941,7 +13941,7 @@ usage>>. ifdef::cl_khr_command_buffer_multi_device[] === Command-Buffers and Multiple Devices -If the `<>` extension is supported, a +If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, a command-buffer can contain commands recorded to the queues of different devices if a vendor provides support for inter-device {cl_sync_point_khr_TYPE} synchronization. @@ -14030,7 +14030,7 @@ include::{generated}/api/protos/clCreateCommandBufferKHR.txt[] include::{generated}/api/version-notes/clCreateCommandBufferKHR.asciidoc[] * _num_queues_ is the number of command-queues listed in _queues_. - If the `<>` extension is not + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, this **must** be one. * _queues_ is a pointer to a list of command-queues that the command-buffer commands will be recorded to. @@ -14113,7 +14113,7 @@ endif::cl_khr_command_buffer_mutable_dispatch[] If _errcode_ret_ is `NULL`, no error code is returned. ifdef::cl_khr_command_buffer_multi_device[] -.Summary of command-buffer creation configurations, for the `<>` extension +.Summary of command-buffer creation configurations, for the {cl_khr_command_buffer_multi_device_EXT} extension [width="100%",options="header"] |==== | All Devices Associated With `Queues` can Device-side Sync | Platform Supports Universal Sync | Condition | Result @@ -14174,9 +14174,9 @@ returned in _errcode_ret_: specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. * {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have the same OpenCL context. - * {CL_INVALID_VALUE} if the `<>` + * {CL_INVALID_VALUE} if the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _num_queues_ is zero, or if the - `<>` extension is not supported + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _num_queues_ is not one. * {CL_INVALID_VALUE} if _queues_ is `NULL`. * {CL_INVALID_VALUE} if values specified in _properties_ are not valid, or @@ -14190,7 +14190,7 @@ returned in _errcode_ret_: required by the OpenCL implementation on the host. ifdef::cl_khr_command_buffer_multi_device[] -If the `<>` extension is supported: +If the {cl_khr_command_buffer_multi_device_EXT} extension is supported: * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if _queues_ includes more than one command-queue associated with a device that does not support capability @@ -14465,12 +14465,12 @@ include::{generated}/api/version-notes/clCommandCopyBufferKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -14512,10 +14512,10 @@ Otherwise, it returns the errors defined by {clEnqueueCopyBuffer} except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -14552,12 +14552,12 @@ include::{generated}/api/version-notes/clCommandCopyBufferRectKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -14617,10 +14617,10 @@ except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -14657,12 +14657,12 @@ include::{generated}/api/version-notes/clCommandCopyBufferToImageKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -14705,10 +14705,10 @@ except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -14744,12 +14744,12 @@ include::{generated}/api/version-notes/clCommandCopyImageKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -14799,10 +14799,10 @@ Otherwise, it returns the errors defined by {clEnqueueCopyImage} except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -14839,12 +14839,12 @@ include::{generated}/api/version-notes/clCommandCopyImageToBufferKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -14887,10 +14887,10 @@ except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -14935,12 +14935,12 @@ value specified when _buffer_ is created is ignored by * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -14982,10 +14982,10 @@ Otherwise, it returns the errors defined by {clEnqueueFillBuffer} except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -15029,12 +15029,12 @@ value specified when image is created is ignored by {clCommandFillImageKHR}. * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -15075,10 +15075,10 @@ Otherwise, it returns the errors defined by {clEnqueueFillImage} except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -15114,12 +15114,12 @@ include::{generated}/api/version-notes/clCommandNDRangeKernelKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -15133,7 +15133,7 @@ ifdef::cl_khr_command_buffer_mutable_dispatch[] default value will be used. _properties_ may be `NULL`, in which case the default values for supported properties will be used. - The `<>` extension does not define any + The {cl_khr_command_buffer_EXT} extension does not define any properties, but supported properties defined by extensions are defined in the <> table. @@ -15173,7 +15173,7 @@ after the function returns. _sync_point_wait_list_ array. * _mutable_handle_ returns a handle to the command. ifdef::cl_khr_command_buffer_mutable_dispatch[] - If the `<>` extension is + If the {cl_khr_command_buffer_mutable_dispatch_EXT} extension is supported, and _mutable_handle_ is not `NULL`, it can be used in the {cl_mutable_dispatch_config_khr_TYPE} struct to update the command configuration between recordings. @@ -15181,7 +15181,7 @@ ifdef::cl_khr_command_buffer_mutable_dispatch[] that freeing the command-buffer will also free this handle. endif::cl_khr_command_buffer_mutable_dispatch[] ifndef::cl_khr_command_buffer_mutable_dispatch[] - If the `<>` extension is not + If the {cl_khr_command_buffer_mutable_dispatch_EXT} extension is not supported, this parameter is unused, and **must** be `NULL`. endif::cl_khr_command_buffer_mutable_dispatch[] @@ -15319,10 +15319,10 @@ Otherwise, it returns the errors defined by {clEnqueueNDRangeKernel} except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -15346,7 +15346,7 @@ New errors: command-buffer. * {CL_INVALID_VALUE} if values specified in _properties_ are not valid * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - * {CL_INVALID_VALUE} if the `<>` + * {CL_INVALID_VALUE} if the {cl_khr_command_buffer_mutable_dispatch_EXT} extension is not supported and _mutable_handle_ is not `NULL`. * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} and @@ -15356,7 +15356,7 @@ New errors: and _kernel_ contains a kernel-enqueue call. ifdef::cl_khr_command_buffer_mutable_dispatch[] -If the `<>` extension is supported: +If the {cl_khr_command_buffer_mutable_dispatch_EXT} extension is supported: * {CL_INVALID_OPERATION} if the requested {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} properties are not reported @@ -15385,12 +15385,12 @@ include::{generated}/api/version-notes/clCommandSVMMemcpyKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -15435,10 +15435,10 @@ Otherwise, it returns the errors defined by {clEnqueueSVMMemcpy} except: {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -15475,12 +15475,12 @@ include::{generated}/api/version-notes/clCommandSVMMemFillKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. * _command_queue_ specifies the command-queue the command will be recorded to. {empty} + - If the `<>` extension is not supported, + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, only a single command-queue is supported, and _command_queue_ must be `NULL`. ifdef::cl_khr_command_buffer_multi_device[] {empty} + - If the `<>` extension is supported and + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] @@ -15541,10 +15541,10 @@ successfully. Otherwise, it returns the errors defined by {CL_INVALID_COMMAND_QUEUE} is replaced with: * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is not supported and + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and _command_queue_ is not `NULL`. * {CL_INVALID_COMMAND_QUEUE} if the - `<>` extension is supported; and + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and either _command_queue_ is `NULL` and _command_buffer_ was created with more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. @@ -15574,7 +15574,7 @@ New errors: ifdef::cl_khr_command_buffer_multi_device[] === Remapping Command-Buffers -If the `<>` extension is supported, +If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, platforms reporting the {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} capability support generating a deep copy of a command-buffer with its commands remapped to a list of command-queues that are potentially @@ -15694,7 +15694,7 @@ ifdef::cl_khr_command_buffer_mutable_dispatch[] A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ object as it can be returned from any command recording entry-point in the -`<>` family of extensions. +{cl_khr_command_buffer_EXT} family of extensions. The mutable-command handles returned by {clCommandNDRangeKernelKHR} in particular are referred to as _mutable-dispatch_ objects, and can be modified through the fields of {cl_mutable_dispatch_config_khr_TYPE}. @@ -15715,7 +15715,7 @@ kernel object used when the command was recorded, and only influence the [[mutable-dispatch-kernel-argument-safe-usage]] [NOTE] ==== -The base `<>` extension +The base {cl_khr_command_buffer_EXT} extension <> that a command-buffer does not update the reference count of objects set as arguments on kernels recorded into the command-buffer. diff --git a/c/dictionary.asciidoc b/c/dictionary.asciidoc index bdf9c23c3..ef7a9401b 100644 --- a/c/dictionary.asciidoc +++ b/c/dictionary.asciidoc @@ -3,3 +3,4 @@ // http://creativecommons.org/licenses/by/4.0/ include::{generated}/api/api-dictionary-no-links.asciidoc[] +include::{generated}/api/ext-dictionary-no-links.asciidoc[] diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index c22db5889..fc85efc76 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -21,17 +21,17 @@ The <> consume operation is not supported. \ ] :fn-atomic-double-supported: pass:n[ \ -The `atomic_double` type is only supported if double precision is supported and the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ +The `atomic_double` type is only supported if double precision is supported and the {cl_khr_int64_base_atomics_EXT} and {cl_khr_int64_extended_atomics_EXT} extensions are supported and have been enabled. \ If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature. \ ] :fn-atomic-int64-supported: pass:n[ \ -The atomic_long and atomic_ulong types are supported if the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ +The atomic_long and atomic_ulong types are supported if the {cl_khr_int64_base_atomics_EXT} and {cl_khr_int64_extended_atomics_EXT} extensions are supported and have been enabled. \ If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_int64} feature. \ ] :fn-atomic-size_t-supported: pass:n[ \ -If the device address space is 64-bits, the data types `atomic_intptr_t`, `atomic_uintptr_t`, `atomic_size_t` and `atomic_ptrdiff_t` are supported if the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ +If the device address space is 64-bits, the data types `atomic_intptr_t`, `atomic_uintptr_t`, `atomic_size_t` and `atomic_ptrdiff_t` are supported if the {cl_khr_int64_base_atomics_EXT} and {cl_khr_int64_extended_atomics_EXT} extensions are supported and have been enabled. \ ] :fn-atomic-weak-rationale: pass:n[ \ @@ -48,11 +48,11 @@ When any scalar value is converted to `bool`, the result is 0 if the value compa :fn-cl_double: pass:n[ \ <> support for OpenCL C 1.2 or above. \ -Also see extension *cl_khr_fp64*. \ +Also see extension {cl_khr_fp64_EXT}. \ ] :fn-cl_khr_fp16: pass:n[ \ -Unless the *cl_khr_fp16* extension is supported and has been enabled. \ +Unless the {cl_khr_fp16_EXT} extension is supported and has been enabled. \ ] :fn-clang-block-syntax: pass:n[ \ @@ -131,7 +131,7 @@ If an implementation extends this specification to support IEEE-754 flags or exc ] :fn-float-types-supported: pass:n[ \ -The `half` scalar and vector types can only be used if the *cl_khr_fp16* extension is supported and has been enabled. \ +The `half` scalar and vector types can only be used if the {cl_khr_fp16_EXT} extension is supported and has been enabled. \ The `double` scalar and vector types can only be used if `double` precision is supported, e.g. for OpenCL C 3.0 the {opencl_c_fp64} feature macro is present. \ ] @@ -146,7 +146,7 @@ It returns the largest positive floating-point number less than 1.0. \ ] :fn-half-supported: pass:n[ \ -Only if the *cl_khr_fp16* extension is supported and has been enabled. \ +Only if the {cl_khr_fp16_EXT} extension is supported and has been enabled. \ ] :fn-image-functions: pass:n[ \ diff --git a/env/appendix_a.asciidoc b/env/appendix_a.asciidoc index 10ae42123..2c58efbf9 100644 --- a/env/appendix_a.asciidoc +++ b/env/appendix_a.asciidoc @@ -18,13 +18,13 @@ Changes from *v3.0.5*: * Clarified sub-group barrier behavior in non-uniform control flow. * Added required alignment of types. * Added new extensions: - ** `cl_khr_subgroup_extended_types` - ** `cl_khr_subgroup_non_uniform_vote` - ** `cl_khr_subgroup_ballot` - ** `cl_khr_subgroup_non_uniform_arithmetic` - ** `cl_khr_subgroup_shuffle` - ** `cl_khr_subgroup_shuffle_relative` - ** `cl_khr_subgroup_clustered_reduce` + ** {cl_khr_subgroup_extended_types_EXT} + ** {cl_khr_subgroup_non_uniform_vote_EXT} + ** {cl_khr_subgroup_ballot_EXT} + ** {cl_khr_subgroup_non_uniform_arithmetic_EXT} + ** {cl_khr_subgroup_shuffle_EXT} + ** {cl_khr_subgroup_shuffle_relative_EXT} + ** {cl_khr_subgroup_clustered_reduce_EXT} Changes from *v3.0.6*: @@ -32,9 +32,9 @@ Changes from *v3.0.6*: * Added the required type for SPIR-V built-in variables. * Fixed several bugs and formatting in the fast math ULP tables. * Added new extensions: - ** `cl_khr_extended_bit_ops` - ** `cl_khr_spirv_extended_debug_info` - ** `cl_khr_spirv_linkonce_odr` + ** {cl_khr_extended_bit_ops_EXT} + ** {cl_khr_spirv_extended_debug_info_EXT} + ** {cl_khr_spirv_linkonce_odr_EXT} Changes from *v3.0.8*: diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index c8ce83dff..8019b751d 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -19,9 +19,9 @@ version number, as described in the SPIR-V specification. An OpenCL environment describes the versions of SPIR-V modules that it supports using the {CL_DEVICE_IL_VERSION} query in OpenCL 2.1 or newer, the {CL_DEVICE_ILS_WITH_VERSION} query in OpenCL 3.0 or newer, or the -{CL_DEVICE_IL_VERSION_KHR} query in the `cl_khr_il_program` extension. +{CL_DEVICE_IL_VERSION_KHR} query in the {cl_khr_il_program_EXT} extension. -OpenCL environments that support the `cl_khr_il_program` extension or +OpenCL environments that support the {cl_khr_il_program_EXT} extension or OpenCL 2.1 must support SPIR-V 1.0 modules. OpenCL environments that support OpenCL 2.2 must support SPIR-V 1.0, 1.1, and 1.2 modules. Use the {CL_DEVICE_IL_VERSION} or {CL_DEVICE_ILS_WITH_VERSION} query diff --git a/env/dictionary.asciidoc b/env/dictionary.asciidoc index bdf9c23c3..ef7a9401b 100644 --- a/env/dictionary.asciidoc +++ b/env/dictionary.asciidoc @@ -3,3 +3,4 @@ // http://creativecommons.org/licenses/by/4.0/ include::{generated}/api/api-dictionary-no-links.asciidoc[] +include::{generated}/api/ext-dictionary-no-links.asciidoc[] diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index df0259550..f0ffc5d6b 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -35,15 +35,15 @@ in a SPIR-V module using *OpExtension*. === Full and Embedded Profile Extensions -==== `cl_khr_3d_image_writes` +==== {cl_khr_3d_image_writes_EXT} -If the OpenCL environment supports the extension `cl_khr_3d_image_writes`, +If the OpenCL environment supports the extension {cl_khr_3d_image_writes_EXT}, then the environment must accept _Image_ operands to *OpImageWrite* that are declared with with dimensionality _Dim_ equal to *3D*. -==== `cl_khr_depth_images` +==== {cl_khr_depth_images_EXT} -If the OpenCL environment supports the extension `cl_khr_depth_images`, +If the OpenCL environment supports the extension {cl_khr_depth_images_EXT}, then the environment must accept modules that declare 2D depth image types using *OpTypeImage* with dimensionality _Dim_ equal to *2D* and _Depth_ equal to 1, indicating a depth image. 2D depth images may optionally be @@ -54,32 +54,32 @@ Additionally, the following Image Channel Orders may be returned by * *Depth* -==== `cl_khr_device_enqueue_local_arg_types` +==== {cl_khr_device_enqueue_local_arg_types_EXT} If the OpenCL environment supports the extension -`cl_khr_device_enqueue_local_arg_types`, then then environment will allow +{cl_khr_device_enqueue_local_arg_types_EXT}, then then environment will allow _Invoke_ functions to be passed to *OpEnqueueKernel* with *Workgroup* memory pointer parameters of any type. -==== `cl_khr_fp16` +==== {cl_khr_fp16_EXT} -If the OpenCL environment supports the extension `cl_khr_fp16`, then the +If the OpenCL environment supports the extension {cl_khr_fp16_EXT}, then the environment must accept modules that declare the following SPIR-V capabilities: * *Float16* -==== `cl_khr_fp64` +==== {cl_khr_fp64_EXT} -If the OpenCL environment supports the extension `cl_khr_fp64`, then the +If the OpenCL environment supports the extension {cl_khr_fp64_EXT}, then the environment must accept modules that declare the following SPIR-V capabilities: * *Float64* -==== `cl_khr_gl_depth_images` +==== {cl_khr_gl_depth_images_EXT} -If the OpenCL environment supports the extension `cl_khr_gl_depth_images`, +If the OpenCL environment supports the extension {cl_khr_gl_depth_images_EXT}, then the following Image Channel Orders may additionally be returned by *OpImageQueryOrder*: @@ -90,11 +90,11 @@ Also, the following Image Channel Data Types may additionally be returned by * *UnormInt24* -==== `cl_khr_gl_msaa_sharing` +==== {cl_khr_gl_msaa_sharing_EXT} // TODO: How does this affect the *ImageMSArray* capability? This is currently a *Shader* capability. -If the OpenCL environment supports the extension `cl_khr_gl_msaa_sharing`, +If the OpenCL environment supports the extension {cl_khr_gl_msaa_sharing_EXT}, then the environment must accept modules that declare 2D multi-sampled image types using *OpTypeImage* with dimensionality _Dim_ equal to *2D* and _MS_ equal to 1, indicating multi-sampled content. 2D multi-sampled images @@ -108,13 +108,13 @@ The 2D multi-sampled images may be used with the following instructions: * *OpImageQueryOrder* * *OpImageQuerySamples* -//==== `cl_khr_initialize_memory` +//==== {cl_khr_initialize_memory_EXT} // Do we need to say anything about this extension in this spec? -==== `cl_khr_int64_base_atomics` and `cl_khr_int64_extended_atomics` +==== {cl_khr_int64_base_atomics_EXT} and {cl_khr_int64_extended_atomics_EXT} -If the OpenCL environment supports the extension `cl_khr_int64_base_atomics` -or `cl_khr_int64_extended_atomics`, then the environment must accept modules +If the OpenCL environment supports the extension {cl_khr_int64_base_atomics_EXT} +or {cl_khr_int64_extended_atomics_EXT}, then the environment must accept modules that declare the following SPIR-V capabilities: * *Int64Atomics* @@ -123,12 +123,12 @@ When the *Int64Atomics* capability is declared, 64-bit integer types are valid for the _Result Type_ and type of _Value_ for all *Atomic Instructions*. Note: OpenCL environments that consume SPIR-V must support both -`cl_khr_int64_base_atomics` and `cl_khr_int64_extended_atomics` or neither +{cl_khr_int64_base_atomics_EXT} and {cl_khr_int64_extended_atomics_EXT} or neither of these extensions. -==== `cl_khr_mipmap_image` +==== {cl_khr_mipmap_image_EXT} -If the OpenCL environment supports the extension `cl_khr_mipmap_image`, +If the OpenCL environment supports the extension {cl_khr_mipmap_image_EXT}, then the environment must accept non-zero optional *Lod* _Image Operands_ for the following instructions: @@ -136,25 +136,25 @@ for the following instructions: * *OpImageRead* * *OpImageQuerySizeLod* -Note: Implementations that support `cl_khr_mipmap_image` are not guaranteed +Note: Implementations that support {cl_khr_mipmap_image_EXT} are not guaranteed to support the *ImageMipmap* capability, since this extension does not require non-zero optional *Lod* _Image Operands_ for *OpImageWrite*. -==== `cl_khr_mipmap_image_writes` +==== {cl_khr_mipmap_image_writes_EXT} -If the OpenCL environment supports the extension `cl_khr_mipmap_image_writes`, +If the OpenCL environment supports the extension {cl_khr_mipmap_image_writes_EXT}, then the environment must accept non-zero optional *Lod* _Image Operands_ for the following instructions: * *OpImageWrite* -Note: An implementation that supports `cl_khr_mipmap_image_writes` must also -support `cl_khr_mipmap_image`, and support for both extensions does +Note: An implementation that supports {cl_khr_mipmap_image_writes_EXT} must also +support {cl_khr_mipmap_image_EXT}, and support for both extensions does guarantee support for the *ImageMipmap* capability. -==== `cl_khr_subgroups` +==== {cl_khr_subgroups_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroups`, then +If the OpenCL environment supports the extension {cl_khr_subgroups_EXT}, then for all instructions except *OpGroupAsyncCopy* and *OpGroupWaitEvents* the _Scope_ for _Execution_ may be: @@ -165,23 +165,23 @@ OpenCL 1.2 environment, the _Scope_ for _Memory_ may be: * *Subgroup* -==== `cl_khr_subgroup_named_barrier` +==== {cl_khr_subgroup_named_barrier_EXT} If the OpenCL environment supports the extension -`cl_khr_subgroup_named_barrier`, then the environment must accept modules +{cl_khr_subgroup_named_barrier_EXT}, then the environment must accept modules that declare the following SPIR-V capabilities: * *NamedBarrier* -==== `cl_khr_spirv_no_integer_wrap_decoration` +==== {cl_khr_spirv_no_integer_wrap_decoration_EXT} -If the OpenCL environment supports the extension `cl_khr_spirv_no_integer_wrap_decoration`, then the environment must accept modules that declare use of the extension `SPV_KHR_no_integer_wrap_decoration` via *OpExtension*. +If the OpenCL environment supports the extension {cl_khr_spirv_no_integer_wrap_decoration_EXT}, then the environment must accept modules that declare use of the extension `SPV_KHR_no_integer_wrap_decoration` via *OpExtension*. -If the OpenCL environment supports the extension `cl_khr_spirv_no_integer_wrap_decoration` and use of the SPIR-V extension `SPV_KHR_no_integer_wrap_decoration` is declared in the module via *OpExtension*, then the environment must accept modules that include the *NoSignedWrap* or *NoUnsignedWrap* decorations. +If the OpenCL environment supports the extension {cl_khr_spirv_no_integer_wrap_decoration_EXT} and use of the SPIR-V extension `SPV_KHR_no_integer_wrap_decoration` is declared in the module via *OpExtension*, then the environment must accept modules that include the *NoSignedWrap* or *NoUnsignedWrap* decorations. -==== `cl_khr_subgroup_extended_types` +==== {cl_khr_subgroup_extended_types_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_extended_types`, then additional types are valid for the following for *Groups* instructions with _Scope_ for _Execution_ equal to *Subgroup*: +If the OpenCL environment supports the extension {cl_khr_subgroup_extended_types_EXT}, then additional types are valid for the following for *Groups* instructions with _Scope_ for _Execution_ equal to *Subgroup*: * *OpGroupBroadcast* * *OpGroupIAdd*, *OpGroupFAdd* @@ -200,9 +200,9 @@ Additionally, for *OpGroupBroadcast*, valid types for _Value_ are: ** *OpTypeInt* (equivalent to `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, and `ulong__n__`) ** *OpTypeFloat* (equivalent to `half__n__`, `float__n__`, and `double__n__`) -==== `cl_khr_subgroup_non_uniform_vote` +==== {cl_khr_subgroup_non_uniform_vote_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_non_uniform_vote`, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_subgroup_non_uniform_vote_EXT}, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: * *GroupNonUniform* * *GroupNonUniformVote* @@ -217,9 +217,9 @@ For the instruction *OpGroupNonUniformAllEqual*, valid types for _Value_ are: ** *OpTypeInt* (equivalent to `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, and `ulong`) ** *OpTypeFloat* (equivalent to `half`, `float`, and `double`) -==== `cl_khr_subgroup_ballot` +==== {cl_khr_subgroup_ballot_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_ballot`, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_subgroup_ballot_EXT}, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: * *GroupNonUniformBallot* @@ -248,9 +248,9 @@ For the instructions *OpGroupNonUniformInverseBallot*, *OpGroupNonUniformBallotB For built-in variables decorated with *SubgroupEqMask*, *SubgroupGeMask*, *SubgroupGtMask*, *SubgroupLeMask*, or *SubgroupLtMask*, the supported variable type is an *OpTypeVector* with four _Component Count_ components of *OpTypeInt*, with _Width_ equal to 32 and _Signedness_ equal to 0 (equivalent to `uint4`). -==== `cl_khr_subgroup_non_uniform_arithmetic` +==== {cl_khr_subgroup_non_uniform_arithmetic_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_non_uniform_arithmetic`, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_subgroup_non_uniform_arithmetic_EXT}, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: * *GroupNonUniformArithmetic* @@ -268,9 +268,9 @@ Otherwise, for the *GroupNonUniformArithmetic* scan and reduction instructions, For the *GroupNonUniformArithmetic* scan and reduction instructions, the optional _ClusterSize_ operand must not be present. -==== `cl_khr_subgroup_shuffle` +==== {cl_khr_subgroup_shuffle_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_shuffle`, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_subgroup_shuffle_EXT}, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: * *GroupNonUniformShuffle* @@ -284,9 +284,9 @@ For the instructions *OpGroupNonUniformShuffle* and *OpGroupNonUniformShuffleXor ** *OpTypeInt* (equivalent to `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, and `ulong`) ** *OpTypeFloat* (equivalent to `half`, `float`, and `double`) -==== `cl_khr_subgroup_shuffle_relative` +==== {cl_khr_subgroup_shuffle_relative_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_shuffle_relative`, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_subgroup_shuffle_relative_EXT}, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: * *GroupNonUniformShuffleRelative* @@ -300,9 +300,9 @@ For the *GroupNonUniformShuffleRelative* instructions, valid types for _Value_ a ** *OpTypeInt* (equivalent to `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, and `ulong`) ** *OpTypeFloat* (equivalent to `half`, `float`, and `double`) -==== `cl_khr_subgroup_clustered_reduce` +==== {cl_khr_subgroup_clustered_reduce_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_clustered_reduce`, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_subgroup_clustered_reduce_EXT}, then the environment must accept SPIR-V modules that declare the following SPIR-V capabilities: * *GroupNonUniformClustered* @@ -312,26 +312,26 @@ For instructions requiring these capabilities, _Scope_ for _Execution_ may be: When the *GroupNonUniformClustered* capability is declared, the *GroupNonUniformArithmetic* scan and reduction instructions may include the optional _ClusterSize_ operand. -==== `cl_khr_spirv_extended_debug_info` +==== {cl_khr_spirv_extended_debug_info_EXT} -If the OpenCL environment supports the extension `cl_khr_spirv_extended_debug_info`, then the environment must accept modules +If the OpenCL environment supports the extension {cl_khr_spirv_extended_debug_info_EXT}, then the environment must accept modules that import the `OpenCL.DebugInfo.100` extended instruction set via *OpExtInstImport*. -==== `cl_khr_spirv_linkonce_odr` +==== {cl_khr_spirv_linkonce_odr_EXT} -If the OpenCL environment supports the extension `cl_khr_spirv_linkonce_odr`, then the environment must accept modules that declare use of the extension `SPV_KHR_linkonce_odr` via *OpExtension*. +If the OpenCL environment supports the extension {cl_khr_spirv_linkonce_odr_EXT}, then the environment must accept modules that declare use of the extension `SPV_KHR_linkonce_odr` via *OpExtension*. -If the OpenCL environment supports the extension `cl_khr_spirv_linkonce_odr` and use of the SPIR-V extension `SPV_KHR_linkonce_odr` is declared in the module via *OpExtension*, then the environment must accept modules that include the *LinkOnceODR* linkage type. +If the OpenCL environment supports the extension {cl_khr_spirv_linkonce_odr_EXT} and use of the SPIR-V extension `SPV_KHR_linkonce_odr` is declared in the module via *OpExtension*, then the environment must accept modules that include the *LinkOnceODR* linkage type. -==== `cl_khr_extended_bit_ops` +==== {cl_khr_extended_bit_ops_EXT} -If the OpenCL environment supports the extension `cl_khr_extended_bit_ops`, then the environment must accept modules that declare use of the extension `SPV_KHR_bit_instructions` via *OpExtension*. +If the OpenCL environment supports the extension {cl_khr_extended_bit_ops_EXT}, then the environment must accept modules that declare use of the extension `SPV_KHR_bit_instructions` via *OpExtension*. -If the OpenCL environment supports the extension `cl_khr_extended_bit_ops` and use of the SPIR-V extension `SPV_KHR_bit_instructions` is declared in the module via *OpExtension*, then the environment must accept modules that declare the *BitInstructions* capability. +If the OpenCL environment supports the extension {cl_khr_extended_bit_ops_EXT} and use of the SPIR-V extension `SPV_KHR_bit_instructions` is declared in the module via *OpExtension*, then the environment must accept modules that declare the *BitInstructions* capability. -==== `cl_khr_integer_dot_product` +==== {cl_khr_integer_dot_product_EXT} -If the OpenCL environment supports the extension `cl_khr_integer_dot_product`, +If the OpenCL environment supports the extension {cl_khr_integer_dot_product_EXT}, then the environment must accept modules that require `SPV_KHR_integer_dot_product` and declare the following SPIR-V capabilities: @@ -339,17 +339,17 @@ declare the following SPIR-V capabilities: * *DotProductInput4x8BitKHR* if {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} is supported * *DotProductInput4x8BitPackedKHR* -==== `cl_khr_expect_assume` +==== {cl_khr_expect_assume_EXT} -If the OpenCL environment supports the extension `cl_khr_expect_assume`, then the environment must accept modules that declare use of the extension `SPV_KHR_expect_assume` via *OpExtension*. +If the OpenCL environment supports the extension {cl_khr_expect_assume_EXT}, then the environment must accept modules that declare use of the extension `SPV_KHR_expect_assume` via *OpExtension*. -If the OpenCL environment supports the extension `cl_khr_expect_assume` and use of the SPIR-V extension `SPV_KHR_expect_assume` is declared in the module via *OpExtension*, then the environment must accept modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_expect_assume_EXT} and use of the SPIR-V extension `SPV_KHR_expect_assume` is declared in the module via *OpExtension*, then the environment must accept modules that declare the following SPIR-V capabilities: * *ExpectAssumeKHR* -==== `cl_khr_subgroup_rotate` +==== {cl_khr_subgroup_rotate_EXT} -If the OpenCL environment supports the extension `cl_khr_subgroup_rotate`, +If the OpenCL environment supports the extension {cl_khr_subgroup_rotate_EXT}, then the environment accept modules that require `SPV_KHR_subgroup_rotate` and declare the following SPIR-V capabilities: @@ -359,11 +359,11 @@ For instructions requiring these capabilities, _Scope_ for _Execution_ may be: * *Subgroup* -==== `cl_khr_work_group_uniform_arithmetic` +==== {cl_khr_work_group_uniform_arithmetic_EXT} -If the OpenCL environment supports the extension `cl_khr_work_group_uniform_arithmetic`, then the environment must accept modules that declare use of the extension `SPV_KHR_uniform_group_instructions` via *OpExtension*. +If the OpenCL environment supports the extension {cl_khr_work_group_uniform_arithmetic_EXT}, then the environment must accept modules that declare use of the extension `SPV_KHR_uniform_group_instructions` via *OpExtension*. -If the OpenCL environment supports the extension `cl_khr_work_group_uniform_arithmetic` and use of the SPIR-V extension `SPV_KHR_uniform_group_instructions` is declared in the module via *OpExtension*, then the environment must accept modules that declare the following SPIR-V capabilities: +If the OpenCL environment supports the extension {cl_khr_work_group_uniform_arithmetic_EXT} and use of the SPIR-V extension `SPV_KHR_uniform_group_instructions` is declared in the module via *OpExtension*, then the environment must accept modules that declare the following SPIR-V capabilities: * *GroupUniformArithmeticKHR* diff --git a/ext/deprecated_extensions.asciidoc b/ext/deprecated_extensions.asciidoc index 200eda14b..7004cd3d6 100644 --- a/ext/deprecated_extensions.asciidoc +++ b/ext/deprecated_extensions.asciidoc @@ -7,5 +7,5 @@ === For OpenCL 1.1: -* The *cl_khr_select_fprounding_mode* extension has been deprecated. +* The {cl_khr_select_fprounding_mode_EXT} extension has been deprecated. Its use is no longer recommended. diff --git a/ext/dictionary.asciidoc b/ext/dictionary.asciidoc index bdf9c23c3..ef7a9401b 100644 --- a/ext/dictionary.asciidoc +++ b/ext/dictionary.asciidoc @@ -3,3 +3,4 @@ // http://creativecommons.org/licenses/by/4.0/ include::{generated}/api/api-dictionary-no-links.asciidoc[] +include::{generated}/api/ext-dictionary-no-links.asciidoc[] diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 9c22399cc..4c4cf584e 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -145,7 +145,7 @@ extension is supported on a given implementation. *Example*: An extension which adds the extension string `"cl_khr_3d_image_writes"` -should also add a preprocessor `#define` called *`cl_khr_3d_image_writes`*. +should also add a preprocessor `#define` called *cl_khr_3d_image_writes*. A kernel can now use this preprocessor `#define` to do something like: [source,opencl_c] @@ -226,7 +226,7 @@ typedef return_type where `TAG` can be `KHR`, `EXT` or `vendor-specific`. -Consider, for example, the *cl_khr_gl_sharing* extension. +Consider, for example, the {cl_khr_gl_sharing_EXT} extension. This extension would add the following to cl_gl_ext.h: [source,opencl] diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 194c6df9d..a040f1333 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -24,287 +24,299 @@ Language Specifications. | Brief Description | Status -| [[cl_khr_3d_image_writes]] link:{APISpecURL}#cl_khr_3d_image_writes[`cl_khr_3d_image_writes`] +| [[cl_khr_3d_image_writes]] link:{APISpecURL}#cl_khr_3d_image_writes[{cl_khr_3d_image_writes_EXT}] | Write to 3D images | Core Feature in OpenCL 2.0 -| [[cl_khr_async_work_group_copy_fence]] link:{APISpecURL}#cl_khr_async_work_group_copy_fence[`cl_khr_async_work_group_copy_fence`] +| [[cl_khr_async_work_group_copy_fence]] link:{APISpecURL}#cl_khr_async_work_group_copy_fence[{cl_khr_async_work_group_copy_fence_EXT}] | Asynchronous Copy Fences | Extension -| [[cl_khr_byte_addressable_store]] link:{APISpecURL}#cl_khr_byte_addressable_store[`cl_khr_byte_addressable_store`] +| [[cl_khr_byte_addressable_store]] link:{APISpecURL}#cl_khr_byte_addressable_store[{cl_khr_byte_addressable_store_EXT}] | Read and write from 8-bit and 16-bit pointers | Core Feature in OpenCL 1.1 -| [[cl_khr_command_buffer]] link:{APISpecURL}#cl_khr_command_buffer[`cl_khr_command_buffer`] +| [[cl_khr_command_buffer]] link:{APISpecURL}#cl_khr_command_buffer[{cl_khr_command_buffer_EXT}] | Record and Replay Commands | Provisional Extension -| [[cl_khr_command_buffer_multi_device]] link:{APISpecURL}#cl_khr_command_buffer_multi_device[`cl_khr_command_buffer_multi_device`] +| [[cl_khr_command_buffer_multi_device]] link:{APISpecURL}#cl_khr_command_buffer_multi_device[{cl_khr_command_buffer_multi_device_EXT}] | Allow a command-buffer to contain commands targeting different devices | Provisional Extension -| [[cl_khr_command_buffer_mutable_dispatch]] link:{APISpecURL}#cl_khr_command_buffer_mutable_dispatch[`cl_khr_command_buffer_mutable_dispatch`] +| [[cl_khr_command_buffer_mutable_dispatch]] link:{APISpecURL}#cl_khr_command_buffer_mutable_dispatch[{cl_khr_command_buffer_mutable_dispatch_EXT}] | Modify kernel execution commands between enqueues of a command-buffer | Provisional Extension -| [[cl_khr_create_command_queue]] link:{APISpecURL}#cl_khr_create_command_queue[`cl_khr_create_command_queue`] +| [[cl_khr_create_command_queue]] link:{APISpecURL}#cl_khr_create_command_queue[{cl_khr_create_command_queue_EXT}] | API to Create Command-Queues with Properties | Core Feature in OpenCL 2.0 -| [[cl_khr_d3d10_sharing]] link:{APISpecURL}#cl_khr_d3d10_sharing[`cl_khr_d3d10_sharing`] +| [[cl_khr_d3d10_sharing]] link:{APISpecURL}#cl_khr_d3d10_sharing[{cl_khr_d3d10_sharing_EXT}] | Share Direct3D 10 Buffers and Textures with OpenCL | Extension -| [[cl_khr_d3d11_sharing]] link:{APISpecURL}#cl_khr_d3d11_sharing[`cl_khr_d3d11_sharing`] +| [[cl_khr_d3d11_sharing]] link:{APISpecURL}#cl_khr_d3d11_sharing[{cl_khr_d3d11_sharing_EXT}] | Share Direct3D 11 Buffers and Textures with OpenCL | Extension -| [[cl_khr_depth_images]] link:{APISpecURL}#cl_khr_depth_images[`cl_khr_depth_images`] +| [[cl_khr_depth_images]] link:{APISpecURL}#cl_khr_depth_images[{cl_khr_depth_images_EXT}] | Single Channel Depth Images | Core Feature in OpenCL 2.0 -| [[cl_khr_device_enqueue_local_arg_types]] link:{APISpecURL}#cl_khr_device_enqueue_local_arg_types[`cl_khr_device_enqueue_local_arg_types`] +| [[cl_khr_device_enqueue_local_arg_types]] link:{APISpecURL}#cl_khr_device_enqueue_local_arg_types[{cl_khr_device_enqueue_local_arg_types_EXT}] | Pass Non-Void Local Pointers to Child Kernels | Extension -| [[cl_khr_device_uuid]] link:{APISpecURL}#cl_khr_device_uuid[`cl_khr_device_uuid`] +| [[cl_khr_device_uuid]] link:{APISpecURL}#cl_khr_device_uuid[{cl_khr_device_uuid_EXT}] | Unique Device and Driver Identifier Queries | Extension -| [[cl_khr_dx9_media_sharing]] link:{APISpecURL}#cl_khr_dx9_media_sharing[`cl_khr_dx9_media_sharing`] +| [[cl_khr_dx9_media_sharing]] link:{APISpecURL}#cl_khr_dx9_media_sharing[{cl_khr_dx9_media_sharing_EXT}] | Share DirectX 9 Media Surfaces with OpenCL | Extension -| [[cl_khr_egl_event]] link:{APISpecURL}#cl_khr_egl_event[`cl_khr_egl_event`] +| [[cl_khr_egl_event]] link:{APISpecURL}#cl_khr_egl_event[{cl_khr_egl_event_EXT}] | Share EGL Sync Objects with OpenCL | Extension -| [[cl_khr_egl_image]] link:{APISpecURL}#cl_khr_egl_image[`cl_khr_egl_image`] +| [[cl_khr_egl_image]] link:{APISpecURL}#cl_khr_egl_image[{cl_khr_egl_image_EXT}] | Share EGL Images with OpenCL | Extension -| [[cl_khr_extended_async_copies]] link:{APISpecURL}#cl_khr_extended_async_copies[`cl_khr_extended_async_copies`] +| [[cl_khr_extended_async_copies]] link:{APISpecURL}#cl_khr_extended_async_copies[{cl_khr_extended_async_copies_EXT}] | 2D and 3D Async Copies | Extension -| [[cl_khr_extended_bit_ops]] link:{APISpecURL}#cl_khr_extended_bit_ops[`cl_khr_extended_bit_ops`] +| [[cl_khr_extended_bit_ops]] link:{APISpecURL}#cl_khr_extended_bit_ops[{cl_khr_extended_bit_ops_EXT}] | Bit Insert, Extract, and Reverse Operations | Extension -| [[cl_khr_extended_versioning]] link:{APISpecURL}#cl_khr_extended_versioning[`cl_khr_extended_versioning`] +| [[cl_khr_extended_versioning]] link:{APISpecURL}#cl_khr_extended_versioning[{cl_khr_extended_versioning_EXT}] | Extend versioning of platform, devices, extensions, etc. | Core Feature in OpenCL 3.0 (with minor changes) -| [[cl_khr_external_memory]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory`] +| [[cl_khr_external_memory]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_EXT}] | Common Functionality for External Memory Sharing | Provisional Extension -| [[cl_khr_external_memory_dma_buf]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_dma_buf`] +| [[cl_khr_external_memory_dma_buf]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_dma_buf_EXT}] | dma_buf External Memory Handles | Provisional Extension -| [[cl_khr_external_memory_dx]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_dx`] +| [[cl_khr_external_memory_dx]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_dx_EXT}] | Direct3D 11 and 12 External Memory Handles | Provisional Extension -| [[cl_khr_external_memory_opaque_fd]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_opaque_fd`] +| [[cl_khr_external_memory_opaque_fd]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_opaque_fd_EXT}] | Opaque File Descriptor External Memory Handles | Provisional Extension -| [[cl_khr_external_memory_win32]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_win32`] +| [[cl_khr_external_memory_win32]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_win32_EXT}] | NT Handle External Memory Handles | Provisional Extension -| [[cl_khr_expect_assume]] link:{APISpecURL}#cl_khr_expect_assume[`cl_khr_expect_assume`] +| [[cl_khr_expect_assume]] link:{APISpecURL}#cl_khr_expect_assume[{cl_khr_expect_assume_EXT}] | Kernel Optimization Hints | Extension -| [[cl_khr_external_semaphore]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore`] +| [[cl_khr_external_semaphore]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_EXT}] | Common Functionality for External Semaphore Sharing | Provisional Extension -| [[cl_khr_external_semaphore_dx_fence]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_dx_fence`] +| [[cl_khr_external_semaphore_dx_fence]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_dx_fence_EXT}] | Direct3D 12 External Semaphore Handles | Provisional Extension -| [[cl_khr_external_semaphore_opaque_fd]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_opaque_fd`] +| [[cl_khr_external_semaphore_opaque_fd]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_opaque_fd_EXT}] | Opaque File Descriptor External Semaphore Handles | Provisional Extension -| [[cl_khr_external_semaphore_sync_fd]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_sync_fd`] +| [[cl_khr_external_semaphore_sync_fd]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_sync_fd_EXT}] | Sync FD External Semaphore Handles | Provisional Extension -| [[cl_khr_external_semaphore_win32]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_win32`] +| [[cl_khr_external_semaphore_win32]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_win32_EXT}] | NT Handle External Semaphore Handles | Provisional Extension -| [[cl_khr_fp16]] link:{APISpecURL}#cl_khr_fp16[`cl_khr_fp16`] +| [[cl_khr_fp16]] link:{APISpecURL}#cl_khr_fp16[{cl_khr_fp16_EXT}] | Operations on 16-bit Floating-Point Values | Extension -| [[cl_khr_fp64]] link:{APISpecURL}#cl_khr_fp64[`cl_khr_fp64`] +| [[cl_khr_fp64]] link:{APISpecURL}#cl_khr_fp64[{cl_khr_fp64_EXT}] | Operations on 64-bit Floating-Point Values | Optional Core Feature in OpenCL 1.2 -| [[cl_khr_gl_depth_images]] link:{APISpecURL}#cl_khr_gl_depth_images[`cl_khr_gl_depth_images`] +| [[cl_khr_gl_depth_images]] link:{APISpecURL}#cl_khr_gl_depth_images[{cl_khr_gl_depth_images_EXT}] | Share OpenGL Depth Images with OpenCL | Extension -| [[cl_khr_gl_event]] link:{APISpecURL}#cl_khr_gl_event[`cl_khr_gl_event`] +| [[cl_khr_gl_event]] link:{APISpecURL}#cl_khr_gl_event[{cl_khr_gl_event_EXT}] | Share OpenGL Fence Sync Objects with OpenCL | Extension -| [[cl_khr_gl_msaa_sharing]] link:{APISpecURL}#cl_khr_gl_msaa_sharing[`cl_khr_gl_msaa_sharing`] +| [[cl_khr_gl_msaa_sharing]] link:{APISpecURL}#cl_khr_gl_msaa_sharing[{cl_khr_gl_msaa_sharing_EXT}] | Share OpenGL MSAA Textures with OpenCL | Extension -| [[cl_khr_gl_sharing]] link:{APISpecURL}#cl_khr_gl_sharing[`cl_khr_gl_sharing`] +| [[cl_khr_gl_sharing]] link:{APISpecURL}#cl_khr_gl_sharing[{cl_khr_gl_sharing_EXT}] | Sharing OpenGL Buffers and Textures with OpenCL | Extension -| [[cl_khr_global_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_global_int32_base_atomics`] +| [[cl_khr_global_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[{cl_khr_global_int32_base_atomics_EXT}] | Basic Atomic Operations on 32-bit Integers in Global Memory | Core Feature in OpenCL 1.1 -| [[cl_khr_global_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_global_int32_extended_atomics`] +| [[cl_khr_global_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[{cl_khr_global_int32_extended_atomics_EXT}] | Extended Atomic Operations on 32-bit Integers in Global Memory | Core Feature in OpenCL 1.1 -| [[cl_khr_icd]] link:{APISpecURL}#cl_khr_icd[`cl_khr_icd`] +| [[cl_khr_icd]] link:{APISpecURL}#cl_khr_icd[{cl_khr_icd_EXT}] | Installable Client Drivers | Extension -| [[cl_khr_il_program]] link:{APISpecURL}#cl_khr_il_program[`cl_khr_il_program`] +| [[cl_khr_il_program]] link:{APISpecURL}#cl_khr_il_program[{cl_khr_il_program_EXT}] | Support for Intermediate Language (IL) Programs (SPIR-V) | Core Feature in OpenCL 2.1 -| [[cl_khr_image2d_from_buffer]] link:{APISpecURL}#cl_khr_image2d_from_buffer[`cl_khr_image2d_from_buffer`] +| [[cl_khr_image2d_from_buffer]] link:{APISpecURL}#cl_khr_image2d_from_buffer[{cl_khr_image2d_from_buffer_EXT}] | Create 2D Images from Buffers | Core Feature in OpenCL 2.0 -| [[cl_khr_initialize_memory]] link:{APISpecURL}#cl_khr_initialize_memory[`cl_khr_initialize_memory`] +| [[cl_khr_initialize_memory]] link:{APISpecURL}#cl_khr_initialize_memory[{cl_khr_initialize_memory_EXT}] | Initialize Local and Private Memory on Allocation | Extension -| [[cl_khr_int64_base_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[`cl_khr_int64_base_atomics`] +| [[cl_khr_int64_base_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[{cl_khr_int64_base_atomics_EXT}] | Basic Atomic Operations on 64-bit Integers in Global and Local Memory | Extension -| [[cl_khr_int64_extended_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[`cl_khr_int64_extended_atomics`] +| [[cl_khr_int64_extended_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[{cl_khr_int64_extended_atomics_EXT}] | Extended Atomic Operations on 64-bit Integers in Global and Local Memory | Extension -| [[cl_khr_local_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_local_int32_base_atomics`] +| [[cl_khr_local_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[{cl_khr_local_int32_base_atomics_EXT}] | Basic Atomic Operations on 32-bit Integers in Local Memory | Core Feature in OpenCL 1.1 -| [[cl_khr_local_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_local_int32_extended_atomics`] +| [[cl_khr_local_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[{cl_khr_local_int32_extended_atomics_EXT}] | Extended Atomic Operations on 32-bit Integers in Local Memory | Core Feature in OpenCL 1.1 -| [[cl_khr_integer_dot_product]] link:{APISpecURL}#cl_khr_integer_dot_product[`cl_khr_integer_dot_product`] +| [[cl_khr_integer_dot_product]] link:{APISpecURL}#cl_khr_integer_dot_product[{cl_khr_integer_dot_product_EXT}] | Integer dot product operations | Extension -| [[cl_khr_kernel_clock]] link:{APISpecURL}#cl_khr_kernel_clock[`cl_khr_kernel_clock`] +| [[cl_khr_kernel_clock]] link:{APISpecURL}#cl_khr_kernel_clock[{cl_khr_kernel_clock_EXT}] | Sample Clock Values Within a Kernel | Extension -| [[cl_khr_mipmap_image]] link:{APISpecURL}#cl_khr_mipmap_image[`cl_khr_mipmap_image`] +| [[cl_khr_mipmap_image]] link:{APISpecURL}#cl_khr_mipmap_image[{cl_khr_mipmap_image_EXT}] | Create and Use Images with Mipmaps | Extension -| [[cl_khr_pci_bus_info]] link:{APISpecURL}#cl_khr_pci_bus_info[`cl_khr_pci_bus_info`] +| [[cl_khr_pci_bus_info]] link:{APISpecURL}#cl_khr_pci_bus_info[{cl_khr_pci_bus_info_EXT}] | Query PCI Bus Information for an OpenCL Device | Extension -| [[cl_khr_priority_hints]] link:{APISpecURL}#cl_khr_priority_hints[`cl_khr_priority_hints`] +| [[cl_khr_priority_hints]] link:{APISpecURL}#cl_khr_priority_hints[{cl_khr_priority_hints_EXT}] | Create Command-Queues with Different Priorities | Extension -| [[cl_khr_select_fprounding_mode]] link:{APISpecURL}#cl_khr_select_fprounding_mode[`cl_khr_select_fprounding_mode`] +| [[cl_khr_select_fprounding_mode]] link:{APISpecURL}#cl_khr_select_fprounding_mode[{cl_khr_select_fprounding_mode_EXT}] | Set the Current Kernel Rounding Mode | DEPRECATED -| [[cl_khr_semaphore]] link:{APISpecURL}#cl_khr_semaphore[`cl_khr_semaphore`] +| [[cl_khr_semaphore]] link:{APISpecURL}#cl_khr_semaphore[{cl_khr_semaphore_EXT}] | Semaphore Synchronization Primitives | Provisional Extension -| [[cl_khr_spir]] link:{APISpecURL}#cl_khr_spir[`cl_khr_spir`] +| [[cl_khr_spir]] link:{APISpecURL}#cl_khr_spir[{cl_khr_spir_EXT}] | Standard Portable Intermediate Representation Programs | Extension, Superseded by IL Programs / SPIR-V -| [[cl_khr_spirv_extended_debug_info]] link:{APISpecURL}#cl_khr_spirv_extended_debug_info[`cl_khr_spirv_extended_debug_info`] +| [[cl_khr_spirv_extended_debug_info]] link:{APISpecURL}#cl_khr_spirv_extended_debug_info[{cl_khr_spirv_extended_debug_info_EXT}] | Allows Use of the SPIR-V `OpenCL.DebugInfo.100` Extended Instruction Set | Extension -| [[cl_khr_spirv_linkonce_odr]] link:{APISpecURL}#cl_khr_spirv_linkonce_odr[`cl_khr_spirv_linkonce_odr`] +| [[cl_khr_spirv_linkonce_odr]] link:{APISpecURL}#cl_khr_spirv_linkonce_odr[{cl_khr_spirv_linkonce_odr_EXT}] | Allows Use of the SPIR-V `SPV_KHR_linkonce_odr` Extension | Extension -| [[cl_khr_spirv_no_integer_wrap_decoration]] link:{APISpecURL}#cl_khr_spirv_no_integer_wrap_decoration[`cl_khr_spirv_no_integer_wrap_decoration`] +| [[cl_khr_spirv_no_integer_wrap_decoration]] link:{APISpecURL}#cl_khr_spirv_no_integer_wrap_decoration[{cl_khr_spirv_no_integer_wrap_decoration_EXT}] | Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension | Extension -| [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[`cl_khr_srgb_image_writes`] +| [[cl_khr_spirv_extended_debug_info]] link:{APISpecURL}#cl_khr_spirv_extended_debug_info[{cl_khr_spirv_extended_debug_info_EXT}] +| Allows Use of the SPIR-V `OpenCL.DebugInfo.100` Extended Instruction Set +| Extension + +| [[cl_khr_spirv_linkonce_odr]] link:{APISpecURL}#cl_khr_spirv_linkonce_odr[{cl_khr_spirv_linkonce_odr_EXT}] +| Allows Use of the SPIR-V `SPV_KHR_linkonce_odr` Extension +| Extension + +| [[cl_khr_spirv_no_integer_wrap_decoration]] link:{APISpecURL}#cl_khr_spirv_no_integer_wrap_decoration[{cl_khr_spirv_no_integer_wrap_decoration_EXT}] +| Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension +| Extension + +| [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[{cl_khr_srgb_image_writes_EXT}] | Write to sRGB Images | Extension -| [[cl_khr_subgroups]] link:{APISpecURL}#cl_khr_subgroups[`cl_khr_subgroups`] +| [[cl_khr_subgroups]] link:{APISpecURL}#cl_khr_subgroups[{cl_khr_subgroups_EXT}] | Sub-Groupings of Work Items | Core Feature in OpenCL 2.1 (with minor changes) -| [[cl_khr_subgroup_ballot]] link:{APISpecURL}#cl_khr_subgroup_ballot[`cl_khr_subgroup_ballot`] +| [[cl_khr_subgroup_ballot]] link:{APISpecURL}#cl_khr_subgroup_ballot[{cl_khr_subgroup_ballot_EXT}] | Exchange Ballots Among Sub-Groupings of Work Items | Extension -| [[cl_khr_subgroup_clustered_reduce]] link:{APISpecURL}#cl_khr_subgroup_clustered_reduce[`cl_khr_subgroup_clustered_reduce`] +| [[cl_khr_subgroup_clustered_reduce]] link:{APISpecURL}#cl_khr_subgroup_clustered_reduce[{cl_khr_subgroup_clustered_reduce_EXT}] | Clustered Reductions for Sub-Groupings of Work Items | Extension -| [[cl_khr_subgroup_extended_types]] link:{APISpecURL}#cl_khr_subgroup_extended_types[`cl_khr_subgroup_extended_types`] +| [[cl_khr_subgroup_extended_types]] link:{APISpecURL}#cl_khr_subgroup_extended_types[{cl_khr_subgroup_extended_types_EXT}] | Additional Type Support for Sub-group Functions | Extension -| [[cl_khr_subgroup_named_barrier]] link:{APISpecURL}#cl_khr_subgroup_named_barrier[`cl_khr_subgroup_named_barrier`] +| [[cl_khr_subgroup_named_barrier]] link:{APISpecURL}#cl_khr_subgroup_named_barrier[{cl_khr_subgroup_named_barrier_EXT}] | Barriers for Subsets of a Work-group | Extension -| [[cl_khr_subgroup_non_uniform_arithmetic]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_arithmetic[`cl_khr_subgroup_non_uniform_arithmetic`] +| [[cl_khr_subgroup_non_uniform_arithmetic]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_arithmetic[{cl_khr_subgroup_non_uniform_arithmetic_EXT}] | Sub-group Arithmetic Functions in Non-Uniform Control Flow | Extension -| [[cl_khr_subgroup_non_uniform_vote]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_vote[`cl_khr_subgroup_non_uniform_vote`] +| [[cl_khr_subgroup_non_uniform_vote]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_vote[{cl_khr_subgroup_non_uniform_vote_EXT}] | Hold Votes Among Sub-Groupings of Work Items | Extension -| [[cl_khr_subgroup_rotate]] link:{APISpecURL}#cl_khr_subgroup_rotate[`cl_khr_subgroup_rotate`] +| [[cl_khr_subgroup_rotate]] link:{APISpecURL}#cl_khr_subgroup_rotate[{cl_khr_subgroup_rotate_EXT}] | Rotation Among Sub-Groupings of Work Items | Extension -| [[cl_khr_subgroup_shuffle]] link:{APISpecURL}#cl_khr_subgroup_shuffle[`cl_khr_subgroup_shuffle`] +| [[cl_khr_subgroup_shuffle]] link:{APISpecURL}#cl_khr_subgroup_shuffle[{cl_khr_subgroup_shuffle_EXT}] | General-Purpose Shuffles Among Sub-Groupings of Work Items | Extension -| [[cl_khr_subgroup_shuffle_relative]] link:{APISpecURL}#cl_khr_subgroup_shuffle_relative[`cl_khr_subgroup_shuffle_relative`] +| [[cl_khr_subgroup_shuffle_relative]] link:{APISpecURL}#cl_khr_subgroup_shuffle_relative[{cl_khr_subgroup_shuffle_relative_EXT}] | Relative Shuffles Among Sub-Groupings of Work Items | Extension -| [[cl_khr_suggested_local_work_size]] link:{APISpecURL}#cl_khr_suggested_local_work_size[`cl_khr_suggested_local_work_size`] +| [[cl_khr_suggested_local_work_size]] link:{APISpecURL}#cl_khr_suggested_local_work_size[{cl_khr_suggested_local_work_size_EXT}] | Query a Suggested Local Work Size | Extension -| [[cl_khr_terminate_context]] link:{APISpecURL}#cl_khr_terminate_context[`cl_khr_terminate_context`] +| [[cl_khr_terminate_context]] link:{APISpecURL}#cl_khr_terminate_context[{cl_khr_terminate_context_EXT}] | Terminate an OpenCL Context | Extension -| [[cl_khr_throttle_hints]] link:{APISpecURL}#cl_khr_throttle_hints[`cl_khr_throttle_hints`] +| [[cl_khr_throttle_hints]] link:{APISpecURL}#cl_khr_throttle_hints[{cl_khr_throttle_hints_EXT}] | Create Command-Queues with Different Throttle Policies | Extension -| [[cl_khr_work_group_uniform_arithmetic]] link:{APISpecURL}#cl_khr_work_group_uniform_arithmetic[`cl_khr_work_group_uniform_arithmetic`] +| [[cl_khr_work_group_uniform_arithmetic]] link:{APISpecURL}#cl_khr_work_group_uniform_arithmetic[{cl_khr_work_group_uniform_arithmetic_EXT}] | Work-group Uniform Arithmetic | Extension diff --git a/ext/to_core_features.asciidoc b/ext/to_core_features.asciidoc index f6b6d5baf..58c799fbc 100644 --- a/ext/to_core_features.asciidoc +++ b/ext/to_core_features.asciidoc @@ -7,57 +7,57 @@ === For OpenCL 1.1: -//* The OpenCL KHR extension *cl_khr_d3d10_sharing* has been added. -//* The OpenCL KHR extension *cl_khr_gl_event* has been added. +//* The OpenCL KHR extension {cl_khr_d3d10_sharing_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_gl_event_EXT} has been added. -* The functionality previously described by *cl_khr_byte_addressable_store* is now part of the core feature set. -* The functionality previously described by *cl_khr_global_int32_base_atomics*, *cl_khr_global_int32_extended_atomics*, *cl_khr_local_int32_base_atomics*, and *cl_khr_local_int32_extended_atomics* is now part of the core feature set. +* The functionality previously described by {cl_khr_byte_addressable_store_EXT} is now part of the core feature set. +* The functionality previously described by {cl_khr_global_int32_base_atomics_EXT}, {cl_khr_global_int32_extended_atomics_EXT}, {cl_khr_local_int32_base_atomics_EXT}, and {cl_khr_local_int32_extended_atomics_EXT} is now part of the core feature set. === For OpenCL 1.2: -//* The OpenCL KHR extension *cl_khr_d3d11_sharing* has been added. -//* The OpenCL KHR extension *cl_khr_depth_images* has been added. -//* The OpenCL KHR extension *cl_khr_dx9_media_sharing* has been added. -//* The OpenCL KHR extension *cl_khr_egl_event* has been added. -//* The OpenCL KHR extension *cl_khr_egl_image* has been added. -//* The OpenCL KHR extension *cl_khr_gl_depth_images* has been added. -//* The OpenCL KHR extension *cl_khr_gl_msaa_sharing* has been added. -//* The OpenCL KHR extension *cl_khr_il_program* has been added. -//* The OpenCL KHR extension *cl_khr_image2d_from_buffer* has been added. -//* The OpenCL KHR extension *cl_khr_initialize_memory* has been added. -//* The OpenCL KHR extension *cl_khr_spir* has been added. -//* The OpenCL KHR extension *cl_khr_terminate_context* has been added. - -* The functionality previously described by *cl_khr_fp64* is now an optional core feature. +//* The OpenCL KHR extension {cl_khr_d3d11_sharing_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_depth_images_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_dx9_media_sharing_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_egl_event_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_egl_image_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_gl_depth_images_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_gl_msaa_sharing_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_il_program_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_image2d_from_buffer_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_initialize_memory_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_spir_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_terminate_context_EXT} has been added. + +* The functionality previously described by {cl_khr_fp64_EXT} is now an optional core feature. === For OpenCL 2.0: -//* The OpenCL KHR extension *cl_khr_device_enqueue_local_arg_types* has been added. -//* The OpenCL KHR extensions *cl_khr_mipmap_image* and *cl_khr_mipmap_image_writes* have been added. -//* The OpenCL KHR extension *cl_khr_subgroups* has been added. +//* The OpenCL KHR extension {cl_khr_device_enqueue_local_arg_types_EXT} has been added. +//* The OpenCL KHR extensions {cl_khr_mipmap_image_EXT} and {cl_khr_mipmap_image_writes_EXT} have been added. +//* The OpenCL KHR extension {cl_khr_subgroups_EXT} has been added. -* The functionality described by *cl_khr_3d_image_writes* is part of the core feature set. -* The functionality described by *cl_khr_create_command_queue* is part of the core feature set. -* The functionality described by *cl_khr_depth_images* is now part of the core feature set. -* The functionality described by *cl_khr_image2d_from_buffer* is now part of the core feature set. +* The functionality described by {cl_khr_3d_image_writes_EXT} is part of the core feature set. +* The functionality described by {cl_khr_create_command_queue_EXT} is part of the core feature set. +* The functionality described by {cl_khr_depth_images_EXT} is now part of the core feature set. +* The functionality described by {cl_khr_image2d_from_buffer_EXT} is now part of the core feature set. === For OpenCL 2.1: -//* The OpenCL KHR extension *cl_khr_priority_hints* has been added. -//* The OpenCL KHR extension *cl_khr_throttle_hints* has been added. +//* The OpenCL KHR extension {cl_khr_priority_hints_EXT} has been added. +//* The OpenCL KHR extension {cl_khr_throttle_hints_EXT} has been added. // I recall having this discussion but I don't see this extension mentioned anywhere // in the OpenCL 2.1 spec, and it would be a language change anyhow. -//* The functionality described in *cl_khr_device_enqueue_local_arg_types* is now part of the core feature set. +//* The functionality described in {cl_khr_device_enqueue_local_arg_types_EXT} is now part of the core feature set. -* The functionality described by *cl_khr_il_program* is now part of the core feature set. -* The API functionality described by *cl_khr_subgroups* is now part of the core API feature set, but the built-in functions described by *cl_khr_subgroups* must still be accessed as an extension to the OpenCL 2.0 C Language specification. +* The functionality described by {cl_khr_il_program_EXT} is now part of the core feature set. +* The API functionality described by {cl_khr_subgroups_EXT} is now part of the core API feature set, but the built-in functions described by {cl_khr_subgroups_EXT} must still be accessed as an extension to the OpenCL 2.0 C Language specification. //=== For OpenCL 2.2: // -//* The OpenCL KHR extension *cl_khr_subgroup_named_barrier* has been added. +//* The OpenCL KHR extension {cl_khr_subgroup_named_barrier_EXT} has been added. === For OpenCL 3.0: -* The API functionality described by *cl_khr_extended_versioning* is now part of the core API feature set, with minor modifications. -* The built-in functions described by *cl_khr_subgroups* are now supported in OpenCL C 3.0 when the {opencl_c_subgroups} feature is supported. \ No newline at end of file +* The API functionality described by {cl_khr_extended_versioning_EXT} is now part of the core API feature set, with minor modifications. +* The built-in functions described by {cl_khr_subgroups_EXT} are now supported in OpenCL C 3.0 when the {opencl_c_subgroups} feature is supported. \ No newline at end of file diff --git a/scripts/clconventions.py b/scripts/clconventions.py index 2e601519b..c561eb73d 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -209,6 +209,7 @@ def extra_refpage_headers(self): 'include::{config}/opencl.asciidoc[]\n' + \ 'include::{config}/version-full-links.asciidoc[]\n' + \ 'include::{generated}/api/api-dictionary-no-links.asciidoc[]\n' + \ + 'include::{generated}/api/ext-dictionary-no-links.asciidoc[]\n' + \ 'include::{cspec}/feature-dictionary.asciidoc[]\n' + \ 'include::{apispec}/footnotes.asciidoc[]\n' + \ 'include::{cspec}/footnotes.asciidoc[]\n' diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 045646c4d..142fe55a4 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -41,9 +41,13 @@ def GetFooter(): args = parser.parse_args() - linkFileName = args.directory + '/api-dictionary.asciidoc' - nolinkFileName = args.directory + '/api-dictionary-no-links.asciidoc' - typeFileName = args.directory + '/api-types.txt' + apiLinkFileName = args.directory + '/api-dictionary.asciidoc' + apiNoLinkFileName = args.directory + '/api-dictionary-no-links.asciidoc' + apiTypeFileName = args.directory + '/api-types.txt' + + extNoLinkFileName = args.directory + '/ext-dictionary-no-links.asciidoc' + extFullLinkFileName = args.directory + '/ext-dictionary-full-links.asciidoc' + extLocalLinkFileName = args.directory + '/ext-dictionary-local-links.asciidoc' specpath = args.registry #specpath = "https://raw.githubusercontent.com/KhronosGroup/OpenCL-Registry/main/xml/cl.xml" @@ -52,11 +56,18 @@ def GetFooter(): spec = parse_xml(specpath) - linkFile = open(linkFileName, 'w') - nolinkFile = open(nolinkFileName, 'w') - linkFile.write( GetHeader() ) - nolinkFile.write( GetHeader() ) - typeFile = open(typeFileName, 'w') + apiLinkFile = open(apiLinkFileName, 'w') + apiNoLinkFile = open(apiNoLinkFileName, 'w') + apiLinkFile.write( GetHeader() ) + apiNoLinkFile.write( GetHeader() ) + apiTypeFile = open(apiTypeFileName, 'w') + + extNoLinkFile = open(extNoLinkFileName, 'w') + extNoLinkFile.write( GetHeader() ) + extFullLinkFile = open(extFullLinkFileName, 'w') + extFullLinkFile.write( GetHeader() ) + extLocalLinkFile = open(extLocalLinkFileName, 'w') + extLocalLinkFile.write( GetHeader() ) # Generate the API functions dictionaries: @@ -73,18 +84,18 @@ def GetFooter(): # // clEnqueueNDRangeKernel # :clEnqueueNDRangeKernel_label: pass:q[*clEnqueueNDRangeKernel*] # :clEnqueueNDRangeKernel: <> - linkFile.write('// ' + name + '\n') - linkFile.write(':' + name + '_label: pass:q[*' + name + '*]\n') - linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') - linkFile.write('\n') + apiLinkFile.write('// ' + name + '\n') + apiLinkFile.write(':' + name + '_label: pass:q[*' + name + '*]\n') + apiLinkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') + apiLinkFile.write('\n') # Example without link: # # // clEnqueueNDRangeKernel # :clEnqueueNDRangeKernel: pass:q[*clEnqueueNDRangeKernel*] - nolinkFile.write('// ' + name + '\n') - nolinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') - nolinkFile.write('\n') + apiNoLinkFile.write('// ' + name + '\n') + apiNoLinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + apiNoLinkFile.write('\n') numberOfFuncs = numberOfFuncs + 1 @@ -98,13 +109,13 @@ def GetFooter(): # # // clGetGLObjectInfo # :clGetGLObjectInfo: pass:q[*clGetGLObjectInfo*] - linkFile.write('// ' + name + '\n') - linkFile.write(':' + name + ': pass:q[*' + name + '*]\n') - linkFile.write('\n') + apiLinkFile.write('// ' + name + '\n') + apiLinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + apiLinkFile.write('\n') - nolinkFile.write('// ' + name + '\n') - nolinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') - nolinkFile.write('\n') + apiNoLinkFile.write('// ' + name + '\n') + apiNoLinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + apiNoLinkFile.write('\n') numberOfFuncs = numberOfFuncs + 1 @@ -132,31 +143,31 @@ def GetFooter(): #:CL_MEM_READ_ONLY_label: pass:q[`CL_MEM_READ_ONLY`] #:CL_MEM_READ_ONLY: <> #:CL_MEM_READ_ONLY_anchor: [[CL_MEM_READ_ONLY]]{CL_MEM_READ_ONLY} - linkFile.write('// ' + name + '\n') - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') - linkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') - linkFile.write('\n') + apiLinkFile.write('// ' + name + '\n') + apiLinkFile.write('ifdef::backend-html5[]\n') + apiLinkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write('ifndef::backend-html5[]\n') + apiLinkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') + apiLinkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') + apiLinkFile.write('\n') # Example without link: # # // CL_MEM_READ_ONLY #:CL_MEM_READ_ONLY: pass:q[`CL_MEM_READ_ONLY`] #:CL_MEM_READ_ONLY_anchor: {CL_MEM_READ_ONLY} - nolinkFile.write('// ' + name + '\n') - nolinkFile.write('ifdef::backend-html5[]\n') - nolinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('ifndef::backend-html5[]\n') - nolinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write(':' + name + '_anchor: {' + name + '}\n') - nolinkFile.write('\n') + apiNoLinkFile.write('// ' + name + '\n') + apiNoLinkFile.write('ifdef::backend-html5[]\n') + apiNoLinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') + apiNoLinkFile.write('endif::[]\n') + apiNoLinkFile.write('ifndef::backend-html5[]\n') + apiNoLinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') + apiNoLinkFile.write('endif::[]\n') + apiNoLinkFile.write(':' + name + '_anchor: {' + name + '}\n') + apiNoLinkFile.write('\n') numberOfEnums = numberOfEnums + 1 @@ -192,31 +203,31 @@ def GetFooter(): #:CL_MAKE_VERSION_label: pass:q[`CL_MAKE_VERSION`] #:CL_MAKE_VERSION: <> #:CL_MAKE_VERSION_anchor: [[CL_MAKE_VERSION]]{CL_MAKE_VERSION} - linkFile.write('// ' + name + '\n') - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') - linkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') - linkFile.write('\n') + apiLinkFile.write('// ' + name + '\n') + apiLinkFile.write('ifdef::backend-html5[]\n') + apiLinkFile.write(':' + name + '_label: pass:q[`' + htmlName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write('ifndef::backend-html5[]\n') + apiLinkFile.write(':' + name + '_label: pass:q[`' + otherName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') + apiLinkFile.write(':' + name + '_anchor: [[' + name + ']]{' + name + '}\n') + apiLinkFile.write('\n') # Example without link: # # // CL_MAKE_VERSION #:CL_MAKE_VERSION: pass:q[`CL_MAKE_VERSION`] #:CL_MAKE_VERSION_anchor: {CL_MAKE_VERSION} - nolinkFile.write('// ' + name + '\n') - nolinkFile.write('ifdef::backend-html5[]\n') - nolinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('ifndef::backend-html5[]\n') - nolinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write(':' + name + '_anchor: {' + name + '}\n') - nolinkFile.write('\n') + apiNoLinkFile.write('// ' + name + '\n') + apiNoLinkFile.write('ifdef::backend-html5[]\n') + apiNoLinkFile.write(':' + name + ': pass:q[`' + htmlName + '`]\n') + apiNoLinkFile.write('endif::[]\n') + apiNoLinkFile.write('ifndef::backend-html5[]\n') + apiNoLinkFile.write(':' + name + ': pass:q[`' + otherName + '`]\n') + apiNoLinkFile.write('endif::[]\n') + apiNoLinkFile.write(':' + name + '_anchor: {' + name + '}\n') + apiNoLinkFile.write('\n') numberOfMacros = numberOfMacros + 1 @@ -269,51 +280,128 @@ def GetFooter(): # // cl_image_desc # :cl_image_desc_TYPE_label: pass:q[`cl_image_desc`] # :cl_image_desc_TYPE: <> - linkFile.write('// ' + name + '\n') + apiLinkFile.write('// ' + name + '\n') if addLink: - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + attribName + '_label: pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + attribName + '_label: pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write(':' + attribName + ': <<' + name + ',{' + attribName + '_label}>>\n') + apiLinkFile.write('ifdef::backend-html5[]\n') + apiLinkFile.write(':' + attribName + '_label: pass:q[`' + htmlName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write('ifndef::backend-html5[]\n') + apiLinkFile.write(':' + attribName + '_label: pass:q[`' + otherName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write(':' + attribName + ': <<' + name + ',{' + attribName + '_label}>>\n') else: - linkFile.write('ifdef::backend-html5[]\n') - linkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('ifndef::backend-html5[]\n') - linkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') - linkFile.write('endif::[]\n') - linkFile.write('\n') + apiLinkFile.write('ifdef::backend-html5[]\n') + apiLinkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write('ifndef::backend-html5[]\n') + apiLinkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') + apiLinkFile.write('endif::[]\n') + apiLinkFile.write('\n') # // cl_image_desc # :cl_image_desc_TYPE: pass:q[`cl_image_desc`] - nolinkFile.write('// ' + name + '\n') - nolinkFile.write('ifdef::backend-html5[]\n') - nolinkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('ifndef::backend-html5[]\n') - nolinkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') - nolinkFile.write('endif::[]\n') - nolinkFile.write('\n') + apiNoLinkFile.write('// ' + name + '\n') + apiNoLinkFile.write('ifdef::backend-html5[]\n') + apiNoLinkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') + apiNoLinkFile.write('endif::[]\n') + apiNoLinkFile.write('ifndef::backend-html5[]\n') + apiNoLinkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') + apiNoLinkFile.write('endif::[]\n') + apiNoLinkFile.write('\n') # Print the type list to a file for custom syntax highlighting. # For this we only care about CL types, not base types. if category != 'basetype': - typeFile.write(' ' + name + '\n') + apiTypeFile.write(' ' + name + '\n') numberOfTypes = numberOfTypes + 1 print('Found ' + str(numberOfTypes) + ' API types.') - linkFile.write( GetFooter() ) - linkFile.close() - nolinkFile.write( GetFooter() ) - nolinkFile.close() - typeFile.close() - - print('Successfully generated file: ' + linkFileName) - print('Successfully generated file: ' + nolinkFileName) - print('Successfully generated file: ' + typeFileName) + # Generate the extension dictionaries: + + numberOfExtensions = 0 + + for extension in spec.findall('extensions/extension'): + name = extension.get('name') + #print('found extension: ' + name) + + # Create a variant of the name that precedes underscores with + # "zero width" spaces. This causes some long names to be + # broken at more intuitive places. + htmlName = name[:3] + name[3:].replace("_", "_") + otherName = name[:3] + name[3:].replace("_", "_​") + + # Append the extension suffix for disambiguation, since we use + # the extension name as an attribute to enable and disable + # inclusion of the extension. + attribName = name + "_EXT" + + # Example with no link: + # + # // cl_khr_fp64 + #:cl_khr_fp64_EXT_label: pass:q[`cl_khr_fp64`] + #:cl_khr_fp64_EXT: [{cl_khr_fp64_EXT_label}] + extNoLinkFile.write('// ' + name + '\n') + extNoLinkFile.write('ifdef::backend-html5[]\n') + extNoLinkFile.write(':' + attribName + ': pass:q[`' + htmlName + '`]\n') + extNoLinkFile.write('endif::[]\n') + extNoLinkFile.write('ifndef::backend-html5[]\n') + extNoLinkFile.write(':' + attribName + ': pass:q[`' + otherName + '`]\n') + extNoLinkFile.write('endif::[]\n') + extNoLinkFile.write('\n') + + # Example with full link: + # + # // cl_khr_fp64 + #:cl_khr_fp64_EXT_label: pass:q[`cl_khr_fp64`] + #:cl_khr_fp64_EXT: https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#cl_khr_fp64[{cl_khr_fp64_EXT_label}^] + extFullLinkFile.write('// ' + name + '\n') + extFullLinkFile.write('ifdef::backend-html5[]\n') + extFullLinkFile.write(':' + attribName + '_label: pass:q[`' + htmlName + '`]\n') + extFullLinkFile.write('endif::[]\n') + extFullLinkFile.write('ifndef::backend-html5[]\n') + extFullLinkFile.write(':' + attribName + '_label: pass:q[`' + otherName + '`]\n') + extFullLinkFile.write('endif::[]\n') + extFullLinkFile.write(':' + attribName + ': https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#' + name + '[{' + attribName + '_label}^]\n') + extFullLinkFile.write('\n') + + # Example with local link: + # + # // cl_khr_fp64 + #:cl_khr_fp64_EXT_label: pass:q[`cl_khr_fp64`] + #:cl_khr_fp64_EXT: <> + extLocalLinkFile.write('// ' + name + '\n') + extLocalLinkFile.write('ifdef::backend-html5[]\n') + extLocalLinkFile.write(':' + attribName + '_label: pass:q[`' + htmlName + '`]\n') + extLocalLinkFile.write('endif::[]\n') + extLocalLinkFile.write('ifndef::backend-html5[]\n') + extLocalLinkFile.write(':' + attribName + '_label: pass:q[`' + otherName + '`]\n') + extLocalLinkFile.write('endif::[]\n') + extLocalLinkFile.write(':' + attribName + ': <<' + name + ',{' + attribName + '_label}>>\n') + extLocalLinkFile.write('\n') + + numberOfExtensions = numberOfExtensions + 1 + + print('Found ' + str(numberOfExtensions) + ' extensions.') + + apiLinkFile.write( GetFooter() ) + apiLinkFile.close() + apiNoLinkFile.write( GetFooter() ) + apiNoLinkFile.close() + apiTypeFile.close() + + extNoLinkFile.write( GetFooter() ) + extNoLinkFile.close() + extFullLinkFile.write( GetFooter() ) + extFullLinkFile.close() + extLocalLinkFile.write( GetFooter() ) + extLocalLinkFile.close() + + print('Successfully generated file: ' + apiLinkFileName) + print('Successfully generated file: ' + apiNoLinkFileName) + print('Successfully generated file: ' + apiTypeFileName) + print('Successfully generated file: ' + extNoLinkFileName) + print('Successfully generated file: ' + extFullLinkFileName) + print('Successfully generated file: ' + extLocalLinkFileName) From ae466c0713592886684f2f4f592a1947767b592f Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 18:04:25 -0700 Subject: [PATCH 097/190] adding missing anchor for CL_MEM_DEVICE_HANDLE_LIST_END_KHR (#1134) --- api/opencl_runtime_layer.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 27788730e..015322282 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -595,8 +595,8 @@ ifdef::cl_khr_external_memory[] include::{generated}/api/version-notes/CL_MEM_DEVICE_HANDLE_LIST_KHR.asciidoc[] | {cl_device_id_TYPE}[] | Specifies the list of OpenCL devices (terminated with - {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external - memory handle. + {CL_MEM_DEVICE_HANDLE_LIST_END_KHR_anchor}) to associate with the + external memory handle. endif::cl_khr_external_memory[] |==== From 59c10bd05e41b8e55d742d153408f772424cef8b Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 2 Apr 2024 22:28:08 -0700 Subject: [PATCH 098/190] remove nextafter TODO (#1140) --- OpenCL_C.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 79a27f8f6..7e2fc8a12 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -5469,8 +5469,6 @@ endif::cl_khr_fp16[] | Returns a quiet NaN. The _nancode_ may be placed in the significand of the resulting NaN. | gentype *nextafter*(gentype _x_, gentype _y_) -// TODO shouldn't this be "next representable FP value of the precision of -// its arguments"? See the OpenCL-Docs issue. | Computes the next representable floating-point value following _x_ in the direction of _y_. Thus, if _y_ is less than _x_, *nextafter*() returns the largest From 8e6e55775493eb75b255b73aa312d8f4f43fc35a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 3 Apr 2024 07:09:44 -0700 Subject: [PATCH 099/190] briefly describe cl_mutable_base_config_khr structure (#1139) --- api/opencl_runtime_layer.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 015322282..b3b9d8f61 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -15838,8 +15838,8 @@ defined conditions: [open,refpage='cl_mutable_base_config_khr',desc='DESC',type='structs'] -- -The {cl_mutable_base_config_khr_TYPE} structure is TODO Add fuller -description here and is defined as: +The {cl_mutable_base_config_khr_TYPE} structure encapsulates all aspects of +mutation and is defined as: include::{generated}/api/structs/cl_mutable_base_config_khr.txt[] From 946edc8a541ef579a2a476664a838f89e078810d Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 3 Apr 2024 14:19:12 -0700 Subject: [PATCH 100/190] update non-provisional extensions to version 1.0.0 (#1133) * update non-provisional extensions to version 1.0.0 * update a few other revision comments, just in case * remove leftover conversion comments --- api/cl_khr_external_memory.asciidoc | 9 +++------ api/cl_khr_external_memory_dma_buf.asciidoc | 10 +++------- api/cl_khr_external_memory_dx.asciidoc | 6 ------ api/cl_khr_external_memory_opaque_fd.asciidoc | 10 +++------- api/cl_khr_external_memory_win32.asciidoc | 10 +++------- api/cl_khr_external_semaphore.asciidoc | 5 +++-- api/cl_khr_external_semaphore_opaque_fd.asciidoc | 4 +++- api/cl_khr_external_semaphore_sync_fd.asciidoc | 4 +++- api/cl_khr_semaphore.asciidoc | 6 +++--- 9 files changed, 24 insertions(+), 40 deletions(-) diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index 6da4455de..e06839996 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -3,15 +3,10 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory.txt[] -//@ *Revision*:: -//@ 0.9.3 -//@ *Extension and Version Dependencies*:: -//@ This extension requires OpenCL 3.0. - === Other Extension Metadata *Last Modified Date*:: - 2023-08-29 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -308,3 +303,5 @@ while (true) { ** Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc index 071aed992..0e01a9f46 100644 --- a/api/cl_khr_external_memory_dma_buf.asciidoc +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -3,16 +3,10 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] -//@ *Revision*:: -//@ 0.9.3 -//@ *Extension and Version Dependencies*:: -//@ This extension requires OpenCL 3.0. -//@ This extension requires the {cl_khr_external_memory_EXT} extension. - === Other Extension Metadata *Last Modified Date*:: - 2023-08-29 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -74,3 +68,5 @@ TODO ** Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc index ab79949f3..0f310c138 100644 --- a/api/cl_khr_external_memory_dx.asciidoc +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -3,12 +3,6 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] -//@ *Revision*:: -//@ 0.9.3 -//@ *Extension and Version Dependencies*:: -//@ This extension requires OpenCL 3.0. -//@ This extension requires the {cl_khr_external_memory_EXT} extension. - === Other Extension Metadata *Last Modified Date*:: diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc index 3d7c059e3..392db9b9a 100644 --- a/api/cl_khr_external_memory_opaque_fd.asciidoc +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -3,16 +3,10 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] -//@ *Revision*:: -//@ 0.9.3 -//@ *Extension and Version Dependencies*:: -//@ This extension requires OpenCL 3.0. -//@ This extension requires the {cl_khr_external_memory_EXT} extension. - === Other Extension Metadata *Last Modified Date*:: - 2023-08-29 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -74,3 +68,5 @@ TODO ** Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index bfb03d853..bddf7b55b 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -3,16 +3,10 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] -//@ *Revision*:: -//@ 0.9.3 -//@ *Extension and Version Dependencies*:: -//@ This extension requires OpenCL 3.0. -//@ This extension requires the {cl_khr_external_memory_EXT} extension. - === Other Extension Metadata *Last Modified Date*:: - 2023-08-29 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -75,3 +69,5 @@ TODO ** Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index c5debcc7f..2b466ad5c 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] === Other Extension Metadata *Last Modified Date*:: - 2021-09-10 + 2024-03-15 *Interactions and External Dependencies*:: * This extension requires OpenCL 1.2. * The {cl_khr_semaphore_EXT} extension is required as it defines semaphore @@ -282,4 +282,5 @@ while (true) { ** Added {CL_SEMAPHORE_EXPORTABLE_KHR}. * Revision 0.9.2, 2023-11-21 ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} - + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_external_semaphore_opaque_fd.asciidoc b/api/cl_khr_external_semaphore_opaque_fd.asciidoc index eb7cc4563..7e40df3ef 100644 --- a/api/cl_khr_external_semaphore_opaque_fd.asciidoc +++ b/api/cl_khr_external_semaphore_opaque_fd.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_opaque_fd.txt[] === Other Extension Metadata *Last Modified Date*:: - 2021-09-10 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -43,3 +43,5 @@ introduced by {cl_khr_external_semaphore_EXT}. * Revision 0.9.0, 2021-09-10 ** Initial version (provisional). + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index 4d7e88922..f8203ce90 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_sync_fd.txt[] === Other Extension Metadata *Last Modified Date*:: - 2021-09-10 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -56,3 +56,5 @@ external semaphore using the APIs introduced by ** Added {CL_SEMAPHORE_EXPORTABLE_KHR}. * Revision 0.9.2, 2023-11-21 ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 5328e51c3..4a9a259bc 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_semaphore.txt[] === Other Extension Metadata *Last Modified Date*:: - 2023-08-01 + 2024-03-15 *IP Status*:: No known IP claims. *Contributors*:: @@ -254,5 +254,5 @@ while (true) { * Revision 0.9.1, 2023-08-01 ** Changed device handle list enum to the semaphore-specific {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). - - + * Revision 1.0.0, 2024-03-15 + ** First non-provisional version. From 7cfa07c10cf03b49dde10c7fe8de139926381ffb Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 3 Apr 2024 14:47:01 -0700 Subject: [PATCH 101/190] fix fract for double and half arguments (#1136) --- OpenCL_C.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 7e2fc8a12..b7a0d788f 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -5282,9 +5282,9 @@ ifdef::cl_khr_fp16[gentypeh *fmax*(gentypeh _x_, half _y_)] {opencl_c_generic_address_space} feature: gentype *fract*(gentype _x_, gentype _*iptr_) -// TODO The fp16 extension uses the constant `0x1.ffcp-1f` below - unclear -// why, see the OpenCL-Docs issue. - | Returns *fmin*(_x_ - *floor*(_x_), `0x1.fffffep-1f`). + | Returns *fmin*(_x_ - *floor*(_x_), `C`), where `C` is the constant + `0x1.fffffep-1f` for `float` aguments, `0x1.fffffffffffffp-1` for `double` + arguments, and `0x1.ffcp-1h` for `half` arguments. *floor*(x) is returned in _iptr_. footnote:[{fn-fract-min}] ifdef::cl_khr_fp16[] From 6c7ec6edd1bc1eae942bad8dab2ab614b3cb45e7 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 3 Apr 2024 14:53:45 -0700 Subject: [PATCH 102/190] remove TODO and TBD placeholders (#1135) --- api/cl_khr_3d_image_writes.asciidoc | 2 -- api/cl_khr_async_work_group_copy_fence.asciidoc | 2 -- api/cl_khr_byte_addressable_store.asciidoc | 2 -- api/cl_khr_create_command_queue.asciidoc | 2 -- api/cl_khr_d3d10_sharing.asciidoc | 2 -- api/cl_khr_d3d11_sharing.asciidoc | 2 -- api/cl_khr_depth_images.asciidoc | 2 -- ...cl_khr_device_enqueue_local_arg_types.asciidoc | 2 -- api/cl_khr_device_uuid.asciidoc | 4 +--- api/cl_khr_dx9_media_sharing.asciidoc | 2 -- api/cl_khr_egl_event.asciidoc | 2 -- api/cl_khr_egl_image.asciidoc | 2 -- api/cl_khr_expect_assume.asciidoc | 2 -- api/cl_khr_extended_async_copies.asciidoc | 2 -- api/cl_khr_extended_bit_ops.asciidoc | 2 -- api/cl_khr_external_memory.asciidoc | 15 --------------- api/cl_khr_external_memory_dma_buf.asciidoc | 14 -------------- api/cl_khr_external_memory_dx.asciidoc | 14 -------------- api/cl_khr_external_memory_opaque_fd.asciidoc | 14 -------------- api/cl_khr_external_memory_win32.asciidoc | 14 -------------- api/cl_khr_fp16.asciidoc | 2 -- api/cl_khr_fp64.asciidoc | 2 -- api/cl_khr_gl_depth_images.asciidoc | 2 -- api/cl_khr_gl_event.asciidoc | 2 -- api/cl_khr_gl_msaa_sharing.asciidoc | 2 -- api/cl_khr_gl_sharing.asciidoc | 2 -- api/cl_khr_global_int32_base_atomics.asciidoc | 2 -- api/cl_khr_global_int32_extended_atomics.asciidoc | 2 -- api/cl_khr_icd.asciidoc | 12 ------------ api/cl_khr_il_program.asciidoc | 2 -- api/cl_khr_image2d_from_buffer.asciidoc | 2 -- api/cl_khr_initialize_memory.asciidoc | 2 -- api/cl_khr_int64_base_atomics.asciidoc | 2 -- api/cl_khr_int64_extended_atomics.asciidoc | 2 -- api/cl_khr_local_int32_base_atomics.asciidoc | 2 -- api/cl_khr_local_int32_extended_atomics.asciidoc | 2 -- api/cl_khr_mipmap_image.asciidoc | 2 -- api/cl_khr_mipmap_image_writes.asciidoc | 2 -- api/cl_khr_pci_bus_info.asciidoc | 2 -- api/cl_khr_priority_hints.asciidoc | 2 -- api/cl_khr_select_fprounding_mode.asciidoc | 2 -- api/cl_khr_semaphore.asciidoc | 1 - api/cl_khr_spir.asciidoc | 2 -- api/cl_khr_spirv_extended_debug_info.asciidoc | 2 -- api/cl_khr_spirv_linkonce_odr.asciidoc | 2 -- ..._khr_spirv_no_integer_wrap_decoration.asciidoc | 2 -- api/cl_khr_srgb_image_writes.asciidoc | 2 -- api/cl_khr_subgroup_ballot.asciidoc | 2 -- api/cl_khr_subgroup_clustered_reduce.asciidoc | 2 -- api/cl_khr_subgroup_extended_types.asciidoc | 2 -- api/cl_khr_subgroup_named_barrier.asciidoc | 2 -- ...l_khr_subgroup_non_uniform_arithmetic.asciidoc | 2 -- api/cl_khr_subgroup_non_uniform_vote.asciidoc | 2 -- api/cl_khr_subgroup_shuffle.asciidoc | 2 -- api/cl_khr_subgroup_shuffle_relative.asciidoc | 2 -- api/cl_khr_subgroups.asciidoc | 2 -- api/cl_khr_suggested_local_work_size.asciidoc | 2 -- api/cl_khr_terminate_context.asciidoc | 2 -- api/cl_khr_throttle_hints.asciidoc | 2 -- 59 files changed, 1 insertion(+), 189 deletions(-) diff --git a/api/cl_khr_3d_image_writes.asciidoc b/api/cl_khr_3d_image_writes.asciidoc index 49e388f4b..8495fa692 100644 --- a/api/cl_khr_3d_image_writes.asciidoc +++ b/api/cl_khr_3d_image_writes.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_3d_image_writes.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_async_work_group_copy_fence.asciidoc b/api/cl_khr_async_work_group_copy_fence.asciidoc index 1c36daccb..321cb1a90 100644 --- a/api/cl_khr_async_work_group_copy_fence.asciidoc +++ b/api/cl_khr_async_work_group_copy_fence.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_async_work_group_copy_fence.txt[] 2021-11-10 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_byte_addressable_store.asciidoc b/api/cl_khr_byte_addressable_store.asciidoc index cdff78462..7637d79b7 100644 --- a/api/cl_khr_byte_addressable_store.asciidoc +++ b/api/cl_khr_byte_addressable_store.asciidoc @@ -11,8 +11,6 @@ include::{generated}/meta/{refprefix}cl_khr_byte_addressable_store.txt[] - Promoted to OpenCL 1.1 core *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_create_command_queue.asciidoc b/api/cl_khr_create_command_queue.asciidoc index 89e22e1d6..1905b7e34 100644 --- a/api/cl_khr_create_command_queue.asciidoc +++ b/api/cl_khr_create_command_queue.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_create_command_queue.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc index 96532a430..32d9dc517 100644 --- a/api/cl_khr_d3d10_sharing.asciidoc +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc index c3fdd8154..d57b904f8 100644 --- a/api/cl_khr_d3d11_sharing.asciidoc +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_depth_images.asciidoc b/api/cl_khr_depth_images.asciidoc index 61ceb60ae..895202496 100644 --- a/api/cl_khr_depth_images.asciidoc +++ b/api/cl_khr_depth_images.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_depth_images.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_device_enqueue_local_arg_types.asciidoc b/api/cl_khr_device_enqueue_local_arg_types.asciidoc index 1adcbd60c..f241a7d7a 100644 --- a/api/cl_khr_device_enqueue_local_arg_types.asciidoc +++ b/api/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_device_enqueue_local_arg_types.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_device_uuid.asciidoc b/api/cl_khr_device_uuid.asciidoc index 74b4f00b8..7b669c135 100644 --- a/api/cl_khr_device_uuid.asciidoc +++ b/api/cl_khr_device_uuid.asciidoc @@ -6,11 +6,9 @@ include::{generated}/meta/{refprefix}cl_khr_device_uuid.txt[] === Other Extension Metadata *Last Modified Date*:: - DateTBD + 2020-08-27 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc index 7f887bcf5..455fd4359 100644 --- a/api/cl_khr_dx9_media_sharing.asciidoc +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_dx9_media_sharing.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc index 278dec5a2..463ca2633 100644 --- a/api/cl_khr_egl_event.asciidoc +++ b/api/cl_khr_egl_event.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_egl_event.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index d31989430..42e03b71e 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_expect_assume.asciidoc b/api/cl_khr_expect_assume.asciidoc index 615a0d559..704f87aab 100644 --- a/api/cl_khr_expect_assume.asciidoc +++ b/api/cl_khr_expect_assume.asciidoc @@ -14,8 +14,6 @@ include::{generated}/meta/{refprefix}cl_khr_expect_assume.txt[] describes how this extension modifies the OpenCL SPIR-V environment. *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_extended_async_copies.asciidoc b/api/cl_khr_extended_async_copies.asciidoc index e04cf7a40..3ac6bc321 100644 --- a/api/cl_khr_extended_async_copies.asciidoc +++ b/api/cl_khr_extended_async_copies.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_extended_async_copies.txt[] 2021-11-10 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_extended_bit_ops.asciidoc b/api/cl_khr_extended_bit_ops.asciidoc index 766306ff4..0bea6218f 100644 --- a/api/cl_khr_extended_bit_ops.asciidoc +++ b/api/cl_khr_extended_bit_ops.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_extended_bit_ops.txt[] 2021-04-22 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index e06839996..cd572a8fb 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -41,21 +41,6 @@ image objects between OpenCL and many other APIs, including: Other related extensions define specific external memory types that may be imported into OpenCL. - -==== Background - -TODO - -==== Rationale - -TODO - -=== Interactions With Other Extensions - -TODO - -// The 'New ...' section can be auto-generated - === New Commands * {clEnqueueAcquireExternalMemObjectsKHR} diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc index 0e01a9f46..193f1a712 100644 --- a/api/cl_khr_external_memory_dma_buf.asciidoc +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -35,20 +35,6 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] external memory handle type that may be specified when creating a buffer or image memory object. -==== Background - -TODO - -==== Rationale - -TODO - -=== Interactions With Other Extensions - -TODO - -// The 'New ...' section can be auto-generated - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc index 0f310c138..ab742fea5 100644 --- a/api/cl_khr_external_memory_dx.asciidoc +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -35,20 +35,6 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] referring to Direct 3D resources as external memory handle types that may be specified when creating a buffer or image memory object. -==== Background - -TODO - -==== Rationale - -TODO - -=== Interactions With Other Extensions - -TODO - -// The 'New ...' section can be auto-generated - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc index 392db9b9a..92a3ab0ea 100644 --- a/api/cl_khr_external_memory_opaque_fd.asciidoc +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -35,20 +35,6 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] handle as an external memory handle type that may be specified when creating a buffer or image memory object. -==== Background - -TODO - -==== Rationale - -TODO - -=== Interactions With Other Extensions - -TODO - -// The 'New ...' section can be auto-generated - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index bddf7b55b..005c9278f 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -35,20 +35,6 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] external memory handle types that may be specified when creating a buffer or image memory object. -==== Background - -TODO - -==== Rationale - -TODO - -=== Interactions With Other Extensions - -TODO - -// The 'New ...' section can be auto-generated - === New Enums * {cl_external_memory_handle_type_khr_TYPE} diff --git a/api/cl_khr_fp16.asciidoc b/api/cl_khr_fp16.asciidoc index 4b7feb139..aa6a2e801 100644 --- a/api/cl_khr_fp16.asciidoc +++ b/api/cl_khr_fp16.asciidoc @@ -12,8 +12,6 @@ include::{generated}/meta/{refprefix}cl_khr_fp16.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_fp64.asciidoc b/api/cl_khr_fp64.asciidoc index 5006d8328..e56a03f1c 100644 --- a/api/cl_khr_fp64.asciidoc +++ b/api/cl_khr_fp64.asciidoc @@ -12,8 +12,6 @@ include::{generated}/meta/{refprefix}cl_khr_fp64.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_gl_depth_images.asciidoc b/api/cl_khr_gl_depth_images.asciidoc index 05258a612..6bd403900 100644 --- a/api/cl_khr_gl_depth_images.asciidoc +++ b/api/cl_khr_gl_depth_images.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_gl_depth_images.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc index 1239e79ff..64e13ef66 100644 --- a/api/cl_khr_gl_event.asciidoc +++ b/api/cl_khr_gl_event.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_gl_event.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_gl_msaa_sharing.asciidoc b/api/cl_khr_gl_msaa_sharing.asciidoc index 64f4557cd..20042fe9f 100644 --- a/api/cl_khr_gl_msaa_sharing.asciidoc +++ b/api/cl_khr_gl_msaa_sharing.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_gl_msaa_sharing.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc index 779b94e37..124f6d770 100644 --- a/api/cl_khr_gl_sharing.asciidoc +++ b/api/cl_khr_gl_sharing.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_gl_sharing.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_global_int32_base_atomics.asciidoc b/api/cl_khr_global_int32_base_atomics.asciidoc index 079a834c5..0451eaa52 100644 --- a/api/cl_khr_global_int32_base_atomics.asciidoc +++ b/api/cl_khr_global_int32_base_atomics.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_global_int32_base_atomics.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_global_int32_extended_atomics.asciidoc b/api/cl_khr_global_int32_extended_atomics.asciidoc index 59831cb02..0733e27fd 100644 --- a/api/cl_khr_global_int32_extended_atomics.asciidoc +++ b/api/cl_khr_global_int32_extended_atomics.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_global_int32_extended_atomics.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_icd.asciidoc b/api/cl_khr_icd.asciidoc index 9a4b46cc8..39d4125f3 100644 --- a/api/cl_khr_icd.asciidoc +++ b/api/cl_khr_icd.asciidoc @@ -12,8 +12,6 @@ include::{generated}/meta/{refprefix}cl_khr_icd.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description @@ -260,11 +258,6 @@ Loader. . How are OpenCL extension to be handled? + -- -// TODO: This seems out-of-date and incorrect. -//RESOLVED: OpenCL extension functions may be added to the ICD Loader as soon as they -//are implemented by any vendor. -//The suffix mechanism provides access for vendor extensions which are not yet -//added to the ICD Loader. *RESOLVED*: Extension APIs must be queried using {clGetExtensionFunctionAddressForPlatform}. -- @@ -274,11 +267,6 @@ Loader. -- *RESOLVED*: The ICD will by default choose the first enumerated platform as the `NULL` platform. -// TODO: This seems out-of-date and incorrect. -//The user can override this default by setting an environment variable -//OPENCL_ICD_DEFAULT_PLATFORM to the desired platform index. -//The API calls that deal with platforms will return {CL_INVALID_PLATFORM} if -//the index is not between zero and (number of platforms - 1), both inclusive. -- . There exists no mechanism to unload the ICD Loader, should there be one? diff --git a/api/cl_khr_il_program.asciidoc b/api/cl_khr_il_program.asciidoc index b8d0c4846..ba99d2c9d 100644 --- a/api/cl_khr_il_program.asciidoc +++ b/api/cl_khr_il_program.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_il_program.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_image2d_from_buffer.asciidoc b/api/cl_khr_image2d_from_buffer.asciidoc index be7783c35..a698ce04e 100644 --- a/api/cl_khr_image2d_from_buffer.asciidoc +++ b/api/cl_khr_image2d_from_buffer.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_image2d_from_buffer.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_initialize_memory.asciidoc b/api/cl_khr_initialize_memory.asciidoc index f2c0ce800..ac0310242 100644 --- a/api/cl_khr_initialize_memory.asciidoc +++ b/api/cl_khr_initialize_memory.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_initialize_memory.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_int64_base_atomics.asciidoc b/api/cl_khr_int64_base_atomics.asciidoc index ac688d915..6026c1624 100644 --- a/api/cl_khr_int64_base_atomics.asciidoc +++ b/api/cl_khr_int64_base_atomics.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_int64_base_atomics.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_int64_extended_atomics.asciidoc b/api/cl_khr_int64_extended_atomics.asciidoc index 69416081c..6eeedca0c 100644 --- a/api/cl_khr_int64_extended_atomics.asciidoc +++ b/api/cl_khr_int64_extended_atomics.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_int64_extended_atomics.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_local_int32_base_atomics.asciidoc b/api/cl_khr_local_int32_base_atomics.asciidoc index 51300f0c5..cdffd332a 100644 --- a/api/cl_khr_local_int32_base_atomics.asciidoc +++ b/api/cl_khr_local_int32_base_atomics.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_local_int32_base_atomics.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_local_int32_extended_atomics.asciidoc b/api/cl_khr_local_int32_extended_atomics.asciidoc index 917d2e26a..e78b7a872 100644 --- a/api/cl_khr_local_int32_extended_atomics.asciidoc +++ b/api/cl_khr_local_int32_extended_atomics.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_local_int32_extended_atomics.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_mipmap_image.asciidoc b/api/cl_khr_mipmap_image.asciidoc index a500ed507..203694a3f 100644 --- a/api/cl_khr_mipmap_image.asciidoc +++ b/api/cl_khr_mipmap_image.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_mipmap_image.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_mipmap_image_writes.asciidoc b/api/cl_khr_mipmap_image_writes.asciidoc index 7051ca598..194e0c977 100644 --- a/api/cl_khr_mipmap_image_writes.asciidoc +++ b/api/cl_khr_mipmap_image_writes.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_mipmap_image_writes.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_pci_bus_info.asciidoc b/api/cl_khr_pci_bus_info.asciidoc index 3477565b5..0279a5191 100644 --- a/api/cl_khr_pci_bus_info.asciidoc +++ b/api/cl_khr_pci_bus_info.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_pci_bus_info.txt[] 2021-04-19 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc index 1e7981dd3..dfd29df15 100644 --- a/api/cl_khr_priority_hints.asciidoc +++ b/api/cl_khr_priority_hints.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_select_fprounding_mode.asciidoc b/api/cl_khr_select_fprounding_mode.asciidoc index 25a1fa4c3..1d9951826 100644 --- a/api/cl_khr_select_fprounding_mode.asciidoc +++ b/api/cl_khr_select_fprounding_mode.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_select_fprounding_mode.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 4a9a259bc..721a3da2e 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -89,7 +89,6 @@ In particular, this extension defines: ** {CL_SEMAPHORE_TYPE_KHR} ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR} -// TODO these are not described anywhere in the extension spec document * {cl_command_type_TYPE} ** {CL_COMMAND_SEMAPHORE_WAIT_KHR} ** {CL_COMMAND_SEMAPHORE_SIGNAL_KHR} diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc index d794497d6..16573d170 100644 --- a/api/cl_khr_spir.asciidoc +++ b/api/cl_khr_spir.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_spir.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_spirv_extended_debug_info.asciidoc b/api/cl_khr_spirv_extended_debug_info.asciidoc index 5e280fb1a..d63208501 100644 --- a/api/cl_khr_spirv_extended_debug_info.asciidoc +++ b/api/cl_khr_spirv_extended_debug_info.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_spirv_extended_debug_info.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_spirv_linkonce_odr.asciidoc b/api/cl_khr_spirv_linkonce_odr.asciidoc index f35df832f..887b5e74a 100644 --- a/api/cl_khr_spirv_linkonce_odr.asciidoc +++ b/api/cl_khr_spirv_linkonce_odr.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_spirv_linkonce_odr.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc b/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc index d8456b6ab..2f0ca0122 100644 --- a/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc +++ b/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_spirv_no_integer_wrap_decoration.txt 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_srgb_image_writes.asciidoc b/api/cl_khr_srgb_image_writes.asciidoc index 95ca39656..79c3ea16d 100644 --- a/api/cl_khr_srgb_image_writes.asciidoc +++ b/api/cl_khr_srgb_image_writes.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_srgb_image_writes.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_ballot.asciidoc b/api/cl_khr_subgroup_ballot.asciidoc index f0cc70d64..ae17ced98 100644 --- a/api/cl_khr_subgroup_ballot.asciidoc +++ b/api/cl_khr_subgroup_ballot.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_ballot.txt[] 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_clustered_reduce.asciidoc b/api/cl_khr_subgroup_clustered_reduce.asciidoc index a6b8b5c65..9e6b7a078 100644 --- a/api/cl_khr_subgroup_clustered_reduce.asciidoc +++ b/api/cl_khr_subgroup_clustered_reduce.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_clustered_reduce.txt[] 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_extended_types.asciidoc b/api/cl_khr_subgroup_extended_types.asciidoc index 222b3db32..3f73839af 100644 --- a/api/cl_khr_subgroup_extended_types.asciidoc +++ b/api/cl_khr_subgroup_extended_types.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_extended_types.txt[] 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_named_barrier.asciidoc b/api/cl_khr_subgroup_named_barrier.asciidoc index d9dd17dbc..d8f8da0e0 100644 --- a/api/cl_khr_subgroup_named_barrier.asciidoc +++ b/api/cl_khr_subgroup_named_barrier.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_named_barrier.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc index a1bee7e11..cc0657cdb 100644 --- a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc +++ b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_arithmetic.txt[ 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_non_uniform_vote.asciidoc b/api/cl_khr_subgroup_non_uniform_vote.asciidoc index 2188866ca..5803cf26b 100644 --- a/api/cl_khr_subgroup_non_uniform_vote.asciidoc +++ b/api/cl_khr_subgroup_non_uniform_vote.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_vote.txt[] 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_shuffle.asciidoc b/api/cl_khr_subgroup_shuffle.asciidoc index 44981e98d..12d4f6774 100644 --- a/api/cl_khr_subgroup_shuffle.asciidoc +++ b/api/cl_khr_subgroup_shuffle.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle.txt[] 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroup_shuffle_relative.asciidoc b/api/cl_khr_subgroup_shuffle_relative.asciidoc index 48f5f9723..f297be5a9 100644 --- a/api/cl_khr_subgroup_shuffle_relative.asciidoc +++ b/api/cl_khr_subgroup_shuffle_relative.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle_relative.txt[] 2020-12-15 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc index a4f833747..56c3191c1 100644 --- a/api/cl_khr_subgroups.asciidoc +++ b/api/cl_khr_subgroups.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_subgroups.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_suggested_local_work_size.asciidoc b/api/cl_khr_suggested_local_work_size.asciidoc index d5c3faab4..b1f46baab 100644 --- a/api/cl_khr_suggested_local_work_size.asciidoc +++ b/api/cl_khr_suggested_local_work_size.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_suggested_local_work_size.txt[] 2021-04-22 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_terminate_context.asciidoc b/api/cl_khr_terminate_context.asciidoc index bc89dcc7e..2bc9de8b0 100644 --- a/api/cl_khr_terminate_context.asciidoc +++ b/api/cl_khr_terminate_context.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_terminate_context.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description diff --git a/api/cl_khr_throttle_hints.asciidoc b/api/cl_khr_throttle_hints.asciidoc index 0de50159d..4155f1edf 100644 --- a/api/cl_khr_throttle_hints.asciidoc +++ b/api/cl_khr_throttle_hints.asciidoc @@ -9,8 +9,6 @@ include::{generated}/meta/{refprefix}cl_khr_throttle_hints.txt[] 2020-04-21 *IP Status*:: No known IP claims. -*Contributors*:: - TBD === Description From 916c5ded278530c679b11e897fd388519b91ddac Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 4 Apr 2024 07:32:22 -0700 Subject: [PATCH 103/190] tidy up cl_khr_create_command_queue (#1138) Removes unneeded special-case text regarding the type accepted for CL_QUEUE_PROPERTIES. Calling this a cl_bitfield was a bug all along, and using cl_command_queue_properties is fine because it is a bitfield anyhow. Generalizes some text so it is not specific to OpenCL 2.x devices. Removes a TODO - no changes required. --- api/cl_khr_create_command_queue.asciidoc | 18 +++--------------- api/opencl_runtime_layer.asciidoc | 4 ---- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/api/cl_khr_create_command_queue.asciidoc b/api/cl_khr_create_command_queue.asciidoc index 1905b7e34..7ac0a9ad6 100644 --- a/api/cl_khr_create_command_queue.asciidoc +++ b/api/cl_khr_create_command_queue.asciidoc @@ -24,21 +24,11 @@ No new command-queue properties are required by this extension. Applications may use the existing {CL_DEVICE_QUEUE_PROPERTIES} query to determine command-queue properties that are supported by the device. -OpenCL 2.x devices may support this extension for compatibility. +Newer OpenCL devices may support this extension for compatibility. In this scenario, the function added by this extension will have the same capabilities as the core {clCreateCommandQueueWithProperties} API. -Applications that only target OpenCL 2.x devices should use the core OpenCL -2.x {clCreateCommandQueueWithProperties} API instead of this extension API. - -NOTE: The type of the property value passed as {CL_QUEUE_PROPERTIES} to -{clCreateCommandQueueWithPropertiesKHR} is specified as -{cl_bitfield_TYPE} while the type passed to -{clCreateCommandQueueWithProperties} is -{cl_command_queue_properties_TYPE}. -While this is not a promotion in terms of the suffixing, both types -are aliased to {cl_ulong_TYPE}, so no ABI or compiler issues should -result if the extension and core APIs are supported. - +Applications that only target newer OpenCL devices should use the core +{clCreateCommandQueueWithProperties} API instead of this extension API. === New Commands @@ -48,8 +38,6 @@ result if the extension and core APIs are supported. * {cl_queue_properties_khr_TYPE} -//@ TODO Missing bitfield values allowed for CL_QUEUE_PROPERTIES? - === Version History * Revision 1.0.0, 2020-04-21 diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index b3b9d8f61..b8b7733c3 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -68,10 +68,6 @@ endif::cl_khr_create_command_queue[] include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES.asciidoc[] | {cl_command_queue_properties_TYPE} - -ifdef::cl_khr_create_command_queue[] -or {cl_bitfield_TYPE} if the {cl_khr_create_command_queue_EXT} extension is supported -endif::cl_khr_create_command_queue[] | This is a bitfield and can be set to a combination of the following values: From d5daa112a6d4d0970d9b71df79858da7bfdc6d85 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 4 Apr 2024 07:35:45 -0700 Subject: [PATCH 104/190] bugfix: the geometric functions do not operate componentwise (#1137) --- OpenCL_C.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index b7a0d788f..5c27f1049 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -6537,13 +6537,8 @@ endif::cl_khr_fp16[] [open,refpage='geometricFunctions',desc='Geometric Functions',type='freeform',spec='clang',anchor='geometric-functions',xrefs='integerFunctions',alias='cross dot distance length normalize fast_distance fast_length fast_normalize'] -- -// TODO It is not actually true that these functions operate - -// TODO in general they *combine* components. - The <> describes the list of built-in geometric functions. -These all operate component-wise. -The description is per-component. The generic type name `gentypef` indicates that the function can take any of From ce461d205f92aaf1e41c7ec61585f12306d484a3 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 4 Apr 2024 11:18:29 -0700 Subject: [PATCH 105/190] update the quick reference list for finalized extensions (#1144) --- ext/quick_reference.asciidoc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index a040f1333..d583e39a1 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -98,11 +98,11 @@ Language Specifications. | [[cl_khr_external_memory]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_EXT}] | Common Functionality for External Memory Sharing -| Provisional Extension +| Extension | [[cl_khr_external_memory_dma_buf]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_dma_buf_EXT}] | dma_buf External Memory Handles -| Provisional Extension +| Extension | [[cl_khr_external_memory_dx]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_dx_EXT}] | Direct3D 11 and 12 External Memory Handles @@ -110,11 +110,11 @@ Language Specifications. | [[cl_khr_external_memory_opaque_fd]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_opaque_fd_EXT}] | Opaque File Descriptor External Memory Handles -| Provisional Extension +| Extension | [[cl_khr_external_memory_win32]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_win32_EXT}] | NT Handle External Memory Handles -| Provisional Extension +| Extension | [[cl_khr_expect_assume]] link:{APISpecURL}#cl_khr_expect_assume[{cl_khr_expect_assume_EXT}] | Kernel Optimization Hints @@ -122,7 +122,7 @@ Language Specifications. | [[cl_khr_external_semaphore]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_EXT}] | Common Functionality for External Semaphore Sharing -| Provisional Extension +| Extension | [[cl_khr_external_semaphore_dx_fence]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_dx_fence_EXT}] | Direct3D 12 External Semaphore Handles @@ -130,11 +130,11 @@ Language Specifications. | [[cl_khr_external_semaphore_opaque_fd]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_opaque_fd_EXT}] | Opaque File Descriptor External Semaphore Handles -| Provisional Extension +| Extension | [[cl_khr_external_semaphore_sync_fd]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_sync_fd_EXT}] | Sync FD External Semaphore Handles -| Provisional Extension +| Extension | [[cl_khr_external_semaphore_win32]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_win32_EXT}] | NT Handle External Semaphore Handles @@ -230,7 +230,7 @@ Language Specifications. | [[cl_khr_semaphore]] link:{APISpecURL}#cl_khr_semaphore[{cl_khr_semaphore_EXT}] | Semaphore Synchronization Primitives -| Provisional Extension +| Extension | [[cl_khr_spir]] link:{APISpecURL}#cl_khr_spir[{cl_khr_spir_EXT}] | Standard Portable Intermediate Representation Programs From b4746277d4d356633053dd7b6584d861d0126044 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 4 Apr 2024 11:28:56 -0700 Subject: [PATCH 106/190] change log with changes from v3.0.15 (#1126) * initial change log with changes from v3.0.15 * update cl_khr_kernel_clock link in a few other places * final change log updates --- api/appendix_e.asciidoc | 21 ++++++++++++++++++++- c/appendix_a.asciidoc | 9 +++++++++ env/appendix_a.asciidoc | 7 +++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index eebc7ff2d..9df39d32f 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -601,5 +601,24 @@ Changes from *v3.0.14*: Changes from *v3.0.15*: - * Added new extensions: + * Moved all KHR extension text out of the OpenCL Extension specification and into the main specifications. + The OpenCL Extension specification will be removed in a subsequent revision. + * Clarified several error conditions that could return {CL_INVALID_PLATFORM}, see {khronos-opencl-pr}/1063[#1063]. + * Strengthened requirements for the {CL_DEVICE_TYPE} query, see {khronos-opencl-pr}/1069[#1069]. + * Clarified {clSetEventCallback} behavior for command errors, see {khronos-opencl-pr}/1071[#1071]. + * Moved footnote text for {CL_KERNEL_ARG_TYPE_QUALIFIER} into the main spec, see {khronos-opencl-pr}/1097[#1097]. + * {cl_khr_command_buffer_mutable_dispatch_EXT} (provisional): + ** Added {CL_MUTABLE_DISPATCH_ASSERTS_KHR}, see {khronos-opencl-pr}/992[#992]. + * {cl_khr_semaphore_EXT}: + ** Removed a redundant error condition, see {khronos-opencl-pr}/1052[#1052] + * The following extensions have been finalized and are no longer provisional: + ** {cl_khr_semaphore_EXT} + ** {cl_khr_external_semaphore_EXT} + ** {cl_khr_external_semaphore_opaque_fd_EXT} + ** {cl_khr_external_semaphore_sync_fd_EXT} + ** {cl_khr_external_memory_EXT} + ** {cl_khr_external_memory_dma_buf_EXT} + ** {cl_khr_external_memory_opaque_fd_EXT} + ** {cl_khr_external_memory_win32_EXT} + * Added new extension: ** {cl_khr_kernel_clock_EXT} (provisional) diff --git a/c/appendix_a.asciidoc b/c/appendix_a.asciidoc index 711fe7c39..2492e585d 100644 --- a/c/appendix_a.asciidoc +++ b/c/appendix_a.asciidoc @@ -43,3 +43,12 @@ Changes from *v3.0.14*: * Clarified that the *nextafter* built-in function works with all floating-point types, see {khronos-opencl-pr}/953[#953]. * Clarified that the async copy and wait group events built-in functions must be called within converged control flow, see {khronos-opencl-pr}/1015[#1015]. * Removed unnecessary rounding mode text from the descriptions of the geometric and common functions, see {khronos-opencl-pr}/1027[#1027]. + +Changes from *v3.0.15*: + + * Moved all KHR extension text out of the OpenCL Extension specification and into the main specifications. + The OpenCL Extension specification will be removed in a subsequent revision. + * Fixed the derived formula for `atanh`, see {khronos-opencl-pr}/1048[#1048]. + * Removed an incorrect statement about geometric functions operating component-wise, see {khronos-opencl-pr}/1137[#1137]. + * Added new extension: + ** {cl_khr_kernel_clock_EXT} (provisional) diff --git a/env/appendix_a.asciidoc b/env/appendix_a.asciidoc index 2c58efbf9..e385a0e45 100644 --- a/env/appendix_a.asciidoc +++ b/env/appendix_a.asciidoc @@ -43,3 +43,10 @@ Changes from *v3.0.8*: Changes from *v3.0.14*: * Fixed several numerical compliance bugs, see {khronos-opencl-pr}/937[#937]. + +Changes from *v3.0.15*: + + * Clarified that 16-bit and 64-bit floats can be passed as kernel arguments to SPIR-V kernels, see {khronos-opencl-pr}/1049[#1049]. + * Adds the numerical value of the image channel order and image channel data type to several tables, see {khronos-opencl-pr}/1050[#1050]. + * Added new extension: + ** {cl_khr_kernel_clock_EXT} (provisional) From 3317e0096d91696e031bcc1a699773b87004699d Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Mon, 8 Apr 2024 09:34:11 -0700 Subject: [PATCH 107/190] Fix a typo in the category ToC for the refpages (#1148) * Fix a typo in the category ToC for the refpages * Also fix a typo * One more typo --- man/toctail | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/toctail b/man/toctail index 82092a1ce..608ff7913 100644 --- a/man/toctail +++ b/man/toctail @@ -299,7 +299,7 @@
    • create_user_event
    • is_valid_event
    • set_user_event_status
    • -
    • capture_event_profiling_info
    • +
    • capture_event_profiling_info
    • @@ -363,7 +363,7 @@
    • mad24
    • mad_hi
    • mad_sat
    • -
    • ma
    • +
    • max
    • min
    • mul24
    • mul_hi
    • @@ -412,7 +412,7 @@
    • fdim
    • floor
    • fma
    • -
    • fma
    • +
    • fmax
    • fmin
    • fmod
    • fract
    • From 7f1b6cebc99a5a5c05cfa27904a9e92855a162bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 14 May 2024 13:38:40 +0100 Subject: [PATCH 108/190] Add two missing extensions to extension.txt (#1162) Change-Id: I44c78179ad6cfe9fc6da656aee72cae7adc0bf90 Signed-off-by: Kevin Petit --- extensions/extensions.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/extensions/extensions.txt b/extensions/extensions.txt index acf843e84..573ec1169 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -40,6 +40,8 @@ include::cl_ext_float_atomics.asciidoc[] <<< include::cl_ext_image_from_buffer.asciidoc[] <<< +include::cl_ext_image_raw10_raw12.asciidoc[] +<<< include::cl_ext_image_requirements_info.asciidoc[] // Vendor Extensions @@ -61,6 +63,8 @@ include::cl_arm_scheduling_controls.asciidoc[] <<< include::cl_img_cached_allocations.asciidoc[] <<< +include::cl_img_cancel_command.asciidoc[] +<<< include::cl_img_generate_mipmap.asciidoc[] <<< include::cl_img_mem_properties.asciidoc[] From da1e4c1da2880b7b23f9b905587fb7bcad168fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 14 May 2024 17:13:29 +0100 Subject: [PATCH 109/190] Define extension macros in environment specification (#1163) This was missed during the spec unification work. Change-Id: I23107b104431abdac2eaf8a3b1ebedd10303fc12 Signed-off-by: Kevin Petit --- OpenCL_Env.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/OpenCL_Env.txt b/OpenCL_Env.txt index 96b07f4ee..d504cd504 100644 --- a/OpenCL_Env.txt +++ b/OpenCL_Env.txt @@ -2,6 +2,10 @@ // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ +// Extensions to enable +// Must be included before the header and attribs.txt +include::{generated}/specattribs.adoc[] + = The OpenCL^(TM)^ SPIR-V Environment Specification :R: pass:q,r[^(R)^] Khronos{R} OpenCL Working Group From 1d24c5f2930b0f5f68513a91399bd71a0fac07d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 14 May 2024 18:12:48 +0100 Subject: [PATCH 110/190] Remove unused cl_khr_external_memory_dx and cl_khr_external_semaphore_dx_fence extensions (#1160) No implementers (and no interest expressed), no CTS tests. These extensions should not be part of the specification. Change-Id: I064076c8075b5947ef9109558c4b03da426e4505 Signed-off-by: Kevin Petit --- api/cl_khr_external_memory_dx.asciidoc | 59 ---------------- api/cl_khr_external_semaphore.asciidoc | 3 +- ...l_khr_external_semaphore_dx_fence.asciidoc | 45 ------------ api/opencl_runtime_layer.asciidoc | 70 +------------------ ext/quick_reference.asciidoc | 8 --- xml/cl.xml | 26 +------ 6 files changed, 5 insertions(+), 206 deletions(-) delete mode 100644 api/cl_khr_external_memory_dx.asciidoc delete mode 100644 api/cl_khr_external_semaphore_dx_fence.asciidoc diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc deleted file mode 100644 index ab742fea5..000000000 --- a/api/cl_khr_external_memory_dx.asciidoc +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2021-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] - -=== Other Extension Metadata - -*Last Modified Date*:: - 2023-08-29 -*IP Status*:: - No known IP claims. -*Contributors*:: - - Ajit Hakke-Patil, NVIDIA - - Amit Rao, NVIDIA - - Balaji Calidas, QUALCOMM - - Ben Ashbaugh, INTEL - - Carsten Rohde, NVIDIA - - Christoph Kubisch, NVIDIA - - Debalina Bhattacharjee, NVIDIA - - Faith Ekstrand, INTEL - - James Jones, NVIDIA - - Jeremy Kemp, IMAGINATION - - Joshua Kelly, QUALCOMM - - Karthik Raghavan Ravi, NVIDIA - - Kedar Patil, NVIDIA - - Kevin Petit, ARM - - Nikhil Joshi, NVIDIA - - Sharan Ashwathnarayan, NVIDIA - - Vivek Kini, NVIDIA - -=== Description - -{cl_khr_external_memory_dx_EXT} extends -{cl_external_memory_handle_type_khr_TYPE} to support Windows handles -referring to Direct 3D resources as external memory handle types that may be -specified when creating a buffer or image memory object. - -=== New Enums - - * {cl_external_memory_handle_type_khr_TYPE} - ** {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} - ** {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} - ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} - ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} - -=== Version History - - * Revision 0.9.0, 2021-09-10 - ** Initial version (provisional). - * Revision 0.9.1, 2023-05-04 - ** Clarified device handle list enum cannot be specified without an - external memory handle (provisional). - * Revision 0.9.2, 2023-08-01 - ** Changed device handle list enum to the memory-specific - {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). - * Revision 0.9.3, 2023-08-29 - ** Added query for - {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} - (provisional). diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index 2b466ad5c..1bbf56796 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -65,8 +65,7 @@ This extension defines: semaphores created from different handle types. * API query exportable semaphores handles using specified handle type. -The layered extensions {cl_khr_external_semaphore_dx_fence_EXT}, -{cl_khr_external_semaphore_opaque_fd_EXT}, +The layered extensions {cl_khr_external_semaphore_opaque_fd_EXT}, {cl_khr_external_semaphore_sync_fd_EXT}, and {cl_khr_external_semaphore_win32_EXT} define specific external semaphores that may be imported into or exported from OpenCL. diff --git a/api/cl_khr_external_semaphore_dx_fence.asciidoc b/api/cl_khr_external_semaphore_dx_fence.asciidoc deleted file mode 100644 index e3cc259b3..000000000 --- a/api/cl_khr_external_semaphore_dx_fence.asciidoc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2021-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -include::{generated}/meta/{refprefix}cl_khr_external_semaphore_dx_fence.txt[] - -=== Other Extension Metadata - -*Last Modified Date*:: - 2021-09-10 -*IP Status*:: - No known IP claims. -*Contributors*:: - - Ajit Hakke-Patil, NVIDIA - - Amit Rao, NVIDIA - - Balaji Calidas, QUALCOMM - - Ben Ashbaugh, INTEL - - Carsten Rohde, NVIDIA - - Christoph Kubisch, NVIDIA - - Debalina Bhattacharjee, NVIDIA - - Faith Ekstrand, INTEL - - James Jones, NVIDIA - - Jeremy Kemp, IMAGINATION - - Joshua Kelly, QUALCOMM - - Karthik Raghavan Ravi, NVIDIA - - Kedar Patil, NVIDIA - - Kevin Petit, ARM - - Nikhil Joshi, NVIDIA - - Sharan Ashwathnarayan, NVIDIA - - Vivek Kini, NVIDIA - -=== Description - -{cl_khr_external_semaphore_dx_fence_EXT} supports importing and exporting a -D3D12 fence as an external semaphore using the APIs introduced by -{cl_khr_external_semaphore_EXT}. - -=== New Enums - - * {cl_external_semaphore_handle_type_khr_TYPE} - ** {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} - -=== Version History - - * Revision 0.9.0, 2021-09-10 - ** Initial version (provisional). diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index b8b7733c3..bfa16ffd1 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -5597,32 +5597,6 @@ The imported memory object holds a reference to its payload. ==== NT Handle Types -ifdef::cl_khr_external_memory_dx[] -The {cl_khr_external_memory_dx_EXT} extension extends -{cl_external_memory_handle_type_khr_TYPE} to support the following new types -of handles, and adds as a property that may be specified when creating a -buffer or an image memory object from an external handle: - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR_anchor} specifies an NT handle - returned by `IDXGIResource1::CreateSharedHandle` referring to a Direct3D - 10 or 11 texture resource. - It owns a reference to the memory used by the Direct3D resource. - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR_anchor} specifies a global - share handle returned by `IDXGIResource::GetSharedHandle` referring to a - Direct3D 10 or 11 texture resource. - It does not own a reference to the underlying Direct3D resource, and - will therefore become invalid when all memory objects and Direct3D - resources associated with it are destroyed. - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR_anchor} specifies an NT handle - returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D - 12 heap resource. - It owns a reference to the resources used by the Direct3D heap. - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR_anchor} specifies an NT handle - returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D - 12 committed resource. - It owns a reference to the memory used by the Direct3D resource. -endif::cl_khr_external_memory_dx[] - ifdef::cl_khr_external_memory_win32[] The {cl_khr_external_memory_win32_EXT} extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types @@ -13128,42 +13102,10 @@ endif::cl_khr_external_semaphore_sync_fd[] endif::cl_khr_external_semaphore_opaque_fd,cl_khr_external_semaphore_sync_fd[] -ifdef::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] +ifdef::cl_khr_external_semaphore_win32[] ==== NT Handle Types -ifdef::cl_khr_external_semaphore_dx_fence[] - -The {cl_khr_external_semaphore_dx_fence_EXT} extension extends -{cl_external_semaphore_handle_type_khr_TYPE} to support the following new -types of handles, and adds as a property that may be specified when creating -a semaphore from an external handle: - - * {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} specifies an NT handle returned by - `ID3D12Device::CreateSharedHandle` referring to a Direct3D 12 fence, or - `ID3D11Device5::CreateFence` referring to a Direct3D 11 fence. - It owns a reference to the underlying synchronization primitive - associated with the Direct3D fence. - -When waiting on semaphores using {clEnqueueWaitSemaphoresKHR} or signaling -semaphores using {clEnqueueSignalSemaphoresKHR}, the semaphore payload must -be provided for semaphores created from -{CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR}. - - * If _sema_objects_ list has a mix of semaphores obtained from - {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and other handle types, then the - _sema_payload_list_ should point to a list of _num_sema_objects_ payload - values for each semaphore in _sema_objects_. - However, the payload values corresponding to semaphores with type - {CL_SEMAPHORE_TYPE_BINARY_KHR} can be set to 0 or will be ignored. - -{clEnqueueWaitSemaphoresKHR} and {clEnqueueSignalSemaphoresKHR} may return -{CL_INVALID_VALUE} if _sema_objects_ list has one or more semaphores -obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and _sema_payload_list_ -is NULL. - -endif::cl_khr_external_semaphore_dx_fence[] - ifdef::cl_khr_external_semaphore_win32[] The {cl_khr_external_semaphore_win32_EXT} extension extends @@ -13193,14 +13135,6 @@ endif::cl_khr_external_semaphore_win32[] |==== | Handle Type | Transference | Permanence -ifdef::cl_khr_external_semaphore_dx_fence[] -| {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR_anchor} - -include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR.asciidoc[] - | Reference - | Temporary, Permanent -endif::cl_khr_external_semaphore_dx_fence[] - ifdef::cl_khr_external_semaphore_win32[] | {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR_anchor} @@ -13223,7 +13157,7 @@ For handle types defined as NT handles, the application must release ownership using the `CloseHandle` system call when the handle is no longer needed. -endif::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] +endif::cl_khr_external_semaphore_win32[] endif::cl_khr_external_semaphore[] diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index d583e39a1..804c30fae 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -104,10 +104,6 @@ Language Specifications. | dma_buf External Memory Handles | Extension -| [[cl_khr_external_memory_dx]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_dx_EXT}] -| Direct3D 11 and 12 External Memory Handles -| Provisional Extension - | [[cl_khr_external_memory_opaque_fd]] link:{APISpecURL}#cl_khr_external_memory[{cl_khr_external_memory_opaque_fd_EXT}] | Opaque File Descriptor External Memory Handles | Extension @@ -124,10 +120,6 @@ Language Specifications. | Common Functionality for External Semaphore Sharing | Extension -| [[cl_khr_external_semaphore_dx_fence]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_dx_fence_EXT}] -| Direct3D 12 External Semaphore Handles -| Provisional Extension - | [[cl_khr_external_semaphore_opaque_fd]] link:{APISpecURL}#cl_khr_external_semaphore[{cl_khr_external_semaphore_opaque_fd_EXT}] | Opaque File Descriptor External Semaphore Handles | Extension diff --git a/xml/cl.xml b/xml/cl.xml index 6f1ae87bd..7700355ad 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1888,14 +1888,11 @@ server's OpenCL/api-docs repository. - + - - - - + @@ -7057,14 +7054,6 @@ server's OpenCL/api-docs repository.
      - - - - - - - - @@ -7131,17 +7120,6 @@ server's OpenCL/api-docs repository. - - - - - - - - - - - From 48a00b712ea6c574edbdb94e17e24ddf901fa520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 14 May 2024 19:01:17 +0100 Subject: [PATCH 111/190] Mention dependency on SPV_EXT_shader_atomic_float16_add in cl_ext_float_atomics (#1153) The AtomicFloat16AddEXT capability it defines is referred to. Change-Id: Icbae5f1ad02561e11517a47e9640eea4bc6a4283 Signed-off-by: Kevin Petit --- extensions/cl_ext_float_atomics.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/cl_ext_float_atomics.asciidoc b/extensions/cl_ext_float_atomics.asciidoc index ad4ab26fc..c3ae27883 100644 --- a/extensions/cl_ext_float_atomics.asciidoc +++ b/extensions/cl_ext_float_atomics.asciidoc @@ -93,7 +93,7 @@ The functionality added by this extension uses the OpenCL C 2.0 atomic syntax an This extension interacts with `cl_khr_fp16` by optionally adding the ability to atomically operate on 16-bit floating-point values in memory. -This extension depends on `SPV_EXT_shader_atomic_float_add` and `SPV_EXT_shader_atomic_float_min_max` for implementations that support SPIR-V and floating-point atomic add, min, or max operations. +This extension depends on `SPV_EXT_shader_atomic_float_add`, `SPV_EXT_shader_atomic_float16_add`, and `SPV_EXT_shader_atomic_float_min_max` for implementations that support SPIR-V and floating-point atomic add, min, or max operations. == Overview From 5adeed2385dbfa1f0cbcd44165a63d6308c27f94 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 20 May 2024 21:39:54 -0700 Subject: [PATCH 112/190] remove an extra character in an OpenCL C example (#1172) --- OpenCL_C.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 5c27f1049..610b54e3c 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -1534,7 +1534,7 @@ For example: ---------- float8 vf; -float *f = &vf.x; m // is illegal +float *f = &vf.x; // is illegal float2 *f2 = &vf.s07; // is illegal float4 *odd = &vf.odd; // is illegal From aeb1ab952569d6d1832c58fff3610b126a66e5a1 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 21 May 2024 17:47:46 +0100 Subject: [PATCH 113/190] Add revision to XML extensions (#1161) This extension adds the revision field to the XML tag for extensions. This allows a version macro to be generated with: * https://github.com/KhronosGroup/OpenCL-Headers/pull/251 * https://github.com/KhronosGroup/OpenCL-Headers/issues/248 KHR extensions are given a revision based on the semantic version of the spec. However other extensions don't use semantic versioning, and so are given a placeholder `0.0.0` value until they can be updated by the owner. The XML schema is also updated to make the revision field mandatory in the XML entry for extensions and the existence of the macro this enables is advertised to users in the spec. --- api/opencl_architecture.asciidoc | 19 +++ xml/cl.xml | 254 +++++++++++++++---------------- xml/registry.rnc | 4 +- 3 files changed, 148 insertions(+), 129 deletions(-) diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 2e559530c..57cc2c17d 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2263,6 +2263,25 @@ typedef cl_uint cl_version; ((patch) & CL_VERSION_PATCH_MASK)) ---- +[NOTE] +-- +The available version of an extension is exposed to the user via a macro +defined by the OpenCL Headers. This macro takes the format of the uppercase +extension name followed by the `_EXTENSION_VERSION` suffix. For example, +`CL_KHR_SEMAPHORE_EXTENSION_VERSION` is the macro defining the version of the +{cl_khr_semaphore_EXT} extension. + +The value of this macro is set to the {cl_version_TYPE} of the extension using +the semantic version of the extension. If no semantic version is defined for +the extension, then the value of the macro is set to `0` to represent semantic +version `0.0.0`. + +Applications can use these version macros along with the convience macros +defined in this section to guard their code against breaking changes to the API +of extensions, in particular provisional KHR extensions which have yet to +finalize an API. +-- + [[version-name-pairing]] ==== Version-Name Pairing diff --git a/xml/cl.xml b/xml/cl.xml index 7700355ad..bca2621d9 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -5389,7 +5389,7 @@ server's OpenCL/api-docs repository. - + @@ -5436,7 +5436,7 @@ server's OpenCL/api-docs repository. - + @@ -5483,7 +5483,7 @@ server's OpenCL/api-docs repository. - + @@ -5532,7 +5532,7 @@ server's OpenCL/api-docs repository. - + @@ -5560,7 +5560,7 @@ server's OpenCL/api-docs repository. - + @@ -5574,7 +5574,7 @@ server's OpenCL/api-docs repository. - + @@ -5582,7 +5582,7 @@ server's OpenCL/api-docs repository. - + @@ -5590,7 +5590,7 @@ server's OpenCL/api-docs repository. - + @@ -5598,7 +5598,7 @@ server's OpenCL/api-docs repository. - + @@ -5608,7 +5608,7 @@ server's OpenCL/api-docs repository. - + @@ -5622,7 +5622,7 @@ server's OpenCL/api-docs repository. - + @@ -5642,7 +5642,7 @@ server's OpenCL/api-docs repository. - + @@ -5659,7 +5659,7 @@ server's OpenCL/api-docs repository. - + @@ -5673,13 +5673,13 @@ server's OpenCL/api-docs repository. - + - + @@ -5694,7 +5694,7 @@ server's OpenCL/api-docs repository. - + @@ -5717,7 +5717,7 @@ server's OpenCL/api-docs repository. - + @@ -5728,7 +5728,7 @@ server's OpenCL/api-docs repository. - + @@ -5739,7 +5739,7 @@ server's OpenCL/api-docs repository. - + @@ -5753,7 +5753,7 @@ server's OpenCL/api-docs repository. - + @@ -5781,7 +5781,7 @@ server's OpenCL/api-docs repository. - + @@ -5790,7 +5790,7 @@ server's OpenCL/api-docs repository. - + @@ -5834,7 +5834,7 @@ server's OpenCL/api-docs repository. - + @@ -5851,7 +5851,7 @@ server's OpenCL/api-docs repository. - + @@ -5880,7 +5880,7 @@ server's OpenCL/api-docs repository. - + @@ -5888,7 +5888,7 @@ server's OpenCL/api-docs repository. - + @@ -5900,7 +5900,7 @@ server's OpenCL/api-docs repository. - + @@ -5912,7 +5912,7 @@ server's OpenCL/api-docs repository. - + @@ -5921,7 +5921,7 @@ server's OpenCL/api-docs repository. - + @@ -5930,7 +5930,7 @@ server's OpenCL/api-docs repository. - + @@ -5940,7 +5940,7 @@ server's OpenCL/api-docs repository. - + @@ -5960,7 +5960,7 @@ server's OpenCL/api-docs repository. - + @@ -5975,7 +5975,7 @@ server's OpenCL/api-docs repository. - + @@ -5991,7 +5991,7 @@ server's OpenCL/api-docs repository. - + @@ -6007,7 +6007,7 @@ server's OpenCL/api-docs repository. - + @@ -6015,7 +6015,7 @@ server's OpenCL/api-docs repository. - + @@ -6053,7 +6053,7 @@ server's OpenCL/api-docs repository. - + @@ -6101,7 +6101,7 @@ server's OpenCL/api-docs repository. - + @@ -6109,7 +6109,7 @@ server's OpenCL/api-docs repository. - + @@ -6118,13 +6118,13 @@ server's OpenCL/api-docs repository. - + - + @@ -6149,7 +6149,7 @@ server's OpenCL/api-docs repository. - + @@ -6176,7 +6176,7 @@ server's OpenCL/api-docs repository. - + @@ -6236,18 +6236,18 @@ server's OpenCL/api-docs repository. - + - + - + @@ -6255,7 +6255,7 @@ server's OpenCL/api-docs repository. - + @@ -6266,7 +6266,7 @@ server's OpenCL/api-docs repository. - + @@ -6278,7 +6278,7 @@ server's OpenCL/api-docs repository. - + @@ -6291,7 +6291,7 @@ server's OpenCL/api-docs repository. - + @@ -6443,7 +6443,7 @@ server's OpenCL/api-docs repository. - + @@ -6454,7 +6454,7 @@ server's OpenCL/api-docs repository. - + @@ -6495,7 +6495,7 @@ server's OpenCL/api-docs repository. - + @@ -6541,7 +6541,7 @@ server's OpenCL/api-docs repository. - + @@ -6552,7 +6552,7 @@ server's OpenCL/api-docs repository. - + @@ -6562,7 +6562,7 @@ server's OpenCL/api-docs repository. - + @@ -6623,7 +6623,7 @@ server's OpenCL/api-docs repository. - + @@ -6696,7 +6696,7 @@ server's OpenCL/api-docs repository. - + @@ -6709,7 +6709,7 @@ server's OpenCL/api-docs repository. - + @@ -6717,12 +6717,12 @@ server's OpenCL/api-docs repository. - + - + @@ -6733,7 +6733,7 @@ server's OpenCL/api-docs repository. - + @@ -6767,17 +6767,17 @@ server's OpenCL/api-docs repository. - + - + - + @@ -6785,7 +6785,7 @@ server's OpenCL/api-docs repository. - + @@ -6823,7 +6823,7 @@ server's OpenCL/api-docs repository. - + @@ -6838,7 +6838,7 @@ server's OpenCL/api-docs repository. - + @@ -6857,7 +6857,7 @@ server's OpenCL/api-docs repository. - + @@ -6882,7 +6882,7 @@ server's OpenCL/api-docs repository. - + @@ -6922,7 +6922,7 @@ server's OpenCL/api-docs repository. - + @@ -6933,7 +6933,7 @@ server's OpenCL/api-docs repository. - + @@ -6941,7 +6941,7 @@ server's OpenCL/api-docs repository. - + @@ -6980,7 +6980,7 @@ server's OpenCL/api-docs repository. - + @@ -7027,7 +7027,7 @@ server's OpenCL/api-docs repository. - + @@ -7054,7 +7054,7 @@ server's OpenCL/api-docs repository. - + @@ -7062,7 +7062,7 @@ server's OpenCL/api-docs repository. - + @@ -7085,7 +7085,7 @@ server's OpenCL/api-docs repository. - + @@ -7112,7 +7112,7 @@ server's OpenCL/api-docs repository. - + @@ -7120,7 +7120,7 @@ server's OpenCL/api-docs repository. - + @@ -7128,7 +7128,7 @@ server's OpenCL/api-docs repository. - + @@ -7137,37 +7137,37 @@ server's OpenCL/api-docs repository. - + - + - + - + - + - + - + @@ -7246,12 +7246,12 @@ server's OpenCL/api-docs repository. - + - + @@ -7272,7 +7272,7 @@ server's OpenCL/api-docs repository. - + @@ -7289,7 +7289,7 @@ server's OpenCL/api-docs repository. - + @@ -7306,12 +7306,12 @@ server's OpenCL/api-docs repository. - + - + @@ -7382,12 +7382,12 @@ server's OpenCL/api-docs repository. - + - + @@ -7422,41 +7422,41 @@ server's OpenCL/api-docs repository. - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -7464,7 +7464,7 @@ server's OpenCL/api-docs repository. - + diff --git a/xml/registry.rnc b/xml/registry.rnc index 210073c24..ff85c0d1d 100644 --- a/xml/registry.rnc +++ b/xml/registry.rnc @@ -445,7 +445,7 @@ Extensions = element extensions { # exactly match an API being generated (implicit ^$ surrounding). # name - extension name string # number - extension number (positive integer, should be unique) -# revision - extension spec revision (text, usually numeric major.minor.patch) +# revision - extension spec revision, must be numeric 'major.minor.patch' # sortorder - order relative to other extensions, default 0 # protect - C preprocessor symbol to conditionally define the interface # platform - should be one of the platform names defined in the @@ -479,8 +479,8 @@ Extensions = element extensions { # Not a regular expression. Extension = element extension { Name , + attribute revision { text }, attribute number { Integer } ? , - attribute revision { text } ? , attribute sortorder { xsd:integer } ?, attribute protect { text } ? , attribute platform { text } ? , From c5072bde05a91e5a93d1e8d64c93278234ffb01b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 4 Jun 2024 19:54:08 +0100 Subject: [PATCH 114/190] Fix links to commands in API specification (#1179) Links were not created for commands defined by extensions. With this changes all uses of e.g. {clCreateSemaphoreWithPropertiesKHR} link to the definition of the command which makes navigating the specification much easier. Change-Id: I4a9458609f4ba3229b66e3d169a68cb4564e2538 Signed-off-by: Kevin Petit --- scripts/gen_dictionaries.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 142fe55a4..4f220aa14 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -99,7 +99,7 @@ def GetFooter(): numberOfFuncs = numberOfFuncs + 1 - # Add extension API functions without links: + # Add extension API functions with and without links: for extension in spec.findall('extensions/extension/require'): for api in extension.findall('command'): name = api.get('name') @@ -110,7 +110,8 @@ def GetFooter(): # // clGetGLObjectInfo # :clGetGLObjectInfo: pass:q[*clGetGLObjectInfo*] apiLinkFile.write('// ' + name + '\n') - apiLinkFile.write(':' + name + ': pass:q[*' + name + '*]\n') + apiLinkFile.write(':' + name + '_label: pass:q[*' + name + '*]\n') + apiLinkFile.write(':' + name + ': <<' + name + ',{' + name + '_label}>>\n') apiLinkFile.write('\n') apiNoLinkFile.write('// ' + name + '\n') From 0cc6b2d64227a564ff4f23b188b0e0ab037c7a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 2 Jul 2024 17:08:30 +0100 Subject: [PATCH 115/190] Clarify the definition of prerequisites to commands (#923) * Clarify the definition of prerequisites to commands - Reword the first source of prerequisites so the wording is symmetrical with respect to the others (i.e. the first/second/third ...). - Broaden the first source of prerequisites to cover all implicit dependencies and provide an exhaustive list of how they arise: either because of barriers or because of ordering in in-order command-queues. Signed-off-by: Kevin Petit Change-Id: Ic464066261fe13756347bafb4878cd6ffb5a8427 * Update api/opencl_architecture.asciidoc --------- Signed-off-by: Kevin Petit --- api/opencl_architecture.asciidoc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 57cc2c17d..63dc7d503 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -198,10 +198,12 @@ A command submitted to a device will not launch until prerequisites that constrain the order of commands have been resolved. These prerequisites have three sources: - * They may arise from commands submitted to a command-queue that constrain - the order in which commands are launched. - For example, commands that follow a command-queue barrier will not - launch until all commands prior to the barrier are complete. + * The first source of prerequisites is implicit dependencies between commands + enqueued to the same command-queue which arise as follows: + ** Commands enqueued after a command-queue barrier have the preceding barrier command + as a prerequisite. + ** Commands enqueued in an in-order command-queue have the command enqueued + before them as a prerequisite. * The second source of prerequisites is dependencies between commands expressed through events. A command may include an optional list of events. From eb71732c1b832f808c29b913d297e84e6a7864a4 Mon Sep 17 00:00:00 2001 From: Jon Leech <4693344+oddhack@users.noreply.github.com> Date: Tue, 9 Jul 2024 08:54:27 -0700 Subject: [PATCH 116/190] Use Khronos asciidoctor-spec Docker image in CI (#1196) * Use Khronos asciidoctor-spec Docker image in CI Per discussion with $bashbaug N.b. at present the CI script has less parallelism than it could, at least as I understand Actions. Some of the 'steps' could be split off into 'jobs'. Might try that next once the basic build is working. Net performance is still somewhat faster than current CI since it's generally faster to load the container than to add needed packages at each invocation, and the spec build is pretty fast, so there's not much to be gained. There was odd error behavior from shifting to the container which I have never seen in Vulkan CI, having to do with mixed ownership of files in the checked-out repository. I inserted a brute-force workaround right after the checkout action. * Update image (SHA changed, though not contents) * Update to 20240702 Docker image which sets HOME=/tmp to avoid asciidoctor-pdf permission problems with tmpfiles in home directory / * Fix SHA and remove fixed parallel job limit on manhtmlpages build * Switch to '#!/usr/bin/env python3' shebang lines Since the Docker build image runs a python virtual environment now. Also added 'scripts/runDocker' script which will invoke docker locally with the same image used in Github CI, for testing. Note this script will pull over a GB of Docker stuff onto the machine it's invoked on, if the image is not already cached. * Empty commit to try and re-trigger the 'fatal' message... ... which appears sporadic, not easily replicable. * Try to bulletproof the git invocations in Makefile * Remove diagnostic job stage after bulletproofing (hopefully) the Makefile For future reference, some of the git operations in CI and the Makefile appear to *sporadically* fail in CI because of different checked-out repo configurations. I modified the 'git symbolic-ref' and 'git log' operations invoked from the Makefile to detect errors and substitute a placeholder message, based on similar changes to the Vulkan Makefile a while back. This (appears) to eliminate the sporadic 'fatal' messages. We may need to do that to the 'git describe' as well. None of this reads on the generated artifacts, except that they may or may not contain accurate tag / commit comments. --- .github/workflows/presubmit.yml | 55 ++++++++++++++++------------ Makefile | 6 ++- scripts/apiconventions.py | 2 +- scripts/cgenerator.py | 2 +- scripts/checklinks.py | 2 +- scripts/clconventions.py | 2 +- scripts/docgenerator.py | 2 +- scripts/extensionmetadocgenerator.py | 2 +- scripts/genRef.py | 2 +- scripts/gen_dictionaries.py | 2 +- scripts/gen_version_notes.py | 2 +- scripts/gencl.py | 2 +- scripts/generator.py | 2 +- scripts/parse_dependency.py | 2 +- scripts/pygenerator.py | 2 +- scripts/realign.py | 2 +- scripts/reflib.py | 2 +- scripts/reg.py | 2 +- scripts/runDocker | 30 +++++++++++++++ scripts/scriptgenerator.py | 2 +- scripts/spec_tools/conventions.py | 2 +- 21 files changed, 83 insertions(+), 44 deletions(-) create mode 100755 scripts/runDocker diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 605571254..6a2d54b32 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -3,47 +3,54 @@ name: Presubmit permissions: contents: read -on: [push, pull_request] +# Controls when the action will run. +on: + push: + workflow_dispatch: + pull_request: +# These jobs are potentially parallelizeable jobs: build: - name: Build all specs + name: Build spec artifacts runs-on: ubuntu-latest + # Refer to the build container by its SHA instead of the name, to + # prevent caching problems when updating the image. + # container: khronosgroup/docker-images:asciidoctor-spec.20240702 + container: khronosgroup/docker-images@sha256:4aab96a03ef292439c9bd0f972adfa29cdf838d0909b1cb4ec2a6d7b2d14a37f steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 submodules: recursive + # If fetch-depth: 0 is not specified, then + # git describe --tags --dirty + # below fails. + # This could also affect SPECREVISION in the Makefile. + fetch-depth: 0 - - name: Install required packages + # Ownerships in the working directory are odd. + # . is owned by UID 1001, while repo files are owned by root. + # This leads to many odd messages like + # fatal: detected dubious ownership in repository at '/__w/OpenCL-Docs/OpenCL-Docs' + # The 'git config' is a brute-force workaround. + - name: Git safe directory workaround run: | - sudo apt-get install -y libpango1.0-dev libwebp-dev ghostscript fonts-lyx jing libavalon-framework-java libbatik-java python3-pyparsing - sudo gem install asciidoctor -v 2.0.16 - sudo gem install coderay -v 1.1.1 - sudo gem install rouge -v 3.19.0 - sudo gem install ttfunk -v 1.7.0 - sudo gem install hexapdf -v 0.27.0 - sudo gem install asciidoctor-pdf -v 2.3.4 - sudo gem install asciidoctor-mathematical -v 0.3.5 - sudo pip install pyparsing - - - name: List git tag + git config --global --add safe.directory '*' + ls -lda . .. .git Makefile + + - name: Validate XML run: | - git describe --tags --dirty + make -C xml validate - name: Generate core specs (HTML and PDF) run: | - python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 api c env ext cxx4opencl - + python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 -O api c env ext cxx4opencl + - name: Generate core + extension specs (HTML) run: | - python3 makeSpec -clean -spec khr OUTDIR=out.khr -j 12 html + python3 makeSpec -clean -spec khr OUTDIR=out.khr -j -O html - name: Generate reference pages run: | - python3 makeSpec -spec khr OUTDIR=out.refpages -j 12 manhtmlpages - - - name: Validate XML - run: | - make -C xml validate + python3 makeSpec -spec khr OUTDIR=out.refpages -j -O manhtmlpages diff --git a/Makefile b/Makefile index 1540b8343..c2e00d073 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ EXTOPTIONS := $(foreach ext,$(EXTS),-extension $(ext)) QUIET ?= VERYQUIET ?= @ +PYTHON ?= python3 ASCIIDOCTOR ?= asciidoctor RM = rm -f RMRF = rm -rf @@ -72,8 +73,8 @@ SPECREVISION = $(shell echo `git describe --tags --dirty`) # This used to be a dependency in the spec html/pdf targets, # but that's likely to lead to merge conflicts. Just regenerate # when pushing a new spec for review to the sandbox. -SPECREMARK = from git branch: $(shell echo `git symbolic-ref --short HEAD`) \ - commit: $(shell echo `git log -1 --format="%H"`) +SPECREMARK = from git branch: $(shell echo `git symbolic-ref --short HEAD 2> /dev/null || echo Git branch not available`) \ + commit: $(shell echo `git log -1 --format="%H" 2> /dev/null || echo Git commit not available`) endif # The C++ for OpenCL document revision scheme is aligned with its release date. # Revision naming scheme is as follows: @@ -545,6 +546,7 @@ $(METADEPEND): $(APIXML) $(GENSCRIPT) attribs: $(ATTRIBFILE) $(ATTRIBFILE): + $(QUIET)$(MKDIR) $(dir $@) for attrib in $(EXTS) ; do \ echo ":$${attrib}:" ; \ done > $@ diff --git a/scripts/apiconventions.py b/scripts/apiconventions.py index 4d27d04f9..f16dcd479 100644 --- a/scripts/apiconventions.py +++ b/scripts/apiconventions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2021-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/cgenerator.py b/scripts/cgenerator.py index f86658ee0..713113c02 100644 --- a/scripts/cgenerator.py +++ b/scripts/cgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/checklinks.py b/scripts/checklinks.py index 816f601a7..5d79a676c 100755 --- a/scripts/checklinks.py +++ b/scripts/checklinks.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/clconventions.py b/scripts/clconventions.py index c561eb73d..734041ae1 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index b714ef7c4..b8c2afd0a 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index a200bab95..9f93a29e8 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/genRef.py b/scripts/genRef.py index 9b78fd0dc..2eabd231e 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2016-2024 The Khronos Group Inc. # diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 4f220aa14..e513385f6 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright 2019-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index 21271643d..d5cd81ff0 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright 2019-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/gencl.py b/scripts/gencl.py index 04f821ced..9cc8a1d2c 100755 --- a/scripts/gencl.py +++ b/scripts/gencl.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/generator.py b/scripts/generator.py index dea2ffa37..c534faf85 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/parse_dependency.py b/scripts/parse_dependency.py index 5d204959c..071d7b3c3 100755 --- a/scripts/parse_dependency.py +++ b/scripts/parse_dependency.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright 2022-2024 The Khronos Group Inc. # Copyright 2003-2019 Paul McGuire diff --git a/scripts/pygenerator.py b/scripts/pygenerator.py index 6656b4605..8656587e9 100644 --- a/scripts/pygenerator.py +++ b/scripts/pygenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/realign.py b/scripts/realign.py index 495cb74ba..71f9f85eb 100755 --- a/scripts/realign.py +++ b/scripts/realign.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/reflib.py b/scripts/reflib.py index 41fec4928..bd873a365 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2016-2024 The Khronos Group Inc. # diff --git a/scripts/reg.py b/scripts/reg.py index b8f8af7ce..d5495212c 100755 --- a/scripts/reg.py +++ b/scripts/reg.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/runDocker b/scripts/runDocker new file mode 100755 index 000000000..19bf00588 --- /dev/null +++ b/scripts/runDocker @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright 2022-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# runDocker - run the Khronos `asciidoctor-spec` Docker image with a local +# clone of the specification repository. +# The following command-line tools are required to run this script: +# awk dirname docker grep id realpath +# These are all normal Linux developer tools except for 'docker' itself. + +# Determine path to repository root directory +scriptpath=`dirname $0` +repopath=`realpath $scriptpath/..` + +# Get SHA256 of the asciidoctor-spec image build used by CI. +image=`grep -m 1 khronosgroup/docker-images@sha256: $repopath/.github/workflows/presubmit.yml | \ + awk '{print $2}'` + +uid=`id -u` +gid=`id -g` +echo "Executing Docker with spec build image and mounted spec repository root:" + +# --user causes Docker to run as the specified UID:GID instead of as root +# -it runs interactively and uses a pseudotty +# --rm removes the container on exit +# -v mounts the repository clone as /vulkan in the container +# $image is image to run +# /bin/bash drops into a shell in the container +set -x +docker run --network=host --user ${uid}:${gid} -it --rm -v ${repopath}:/opencl $image /bin/bash diff --git a/scripts/scriptgenerator.py b/scripts/scriptgenerator.py index f5ed14d00..27339b26f 100644 --- a/scripts/scriptgenerator.py +++ b/scripts/scriptgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/spec_tools/conventions.py b/scripts/spec_tools/conventions.py index 50ca75d41..190bd3b88 100644 --- a/scripts/spec_tools/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # From d4bbfc50a523bb41aa81c2b0d770ed50d70fd422 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 9 Jul 2024 18:09:07 +0200 Subject: [PATCH 117/190] Fix typo in clCreateProgramWithSource introduction (#1204) Signed-off-by: Sven van Haastregt --- api/opencl_runtime_layer.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index bfa16ffd1..9609c2466 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -8362,7 +8362,7 @@ A program object encapsulates the following information: [open,refpage='clCreateProgramWithSource',desc='Creates a program object for a context, and loads source code specified by text strings into the program object.',type='protos'] -- -To creates a program object for a context and load source code into that +To create a program object for a context and load source code into that object, call the function include::{generated}/api/protos/clCreateProgramWithSource.txt[] From 495fa8df56e21a6dddb5f3a91f5d6775b0333421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 9 Jul 2024 17:25:03 +0100 Subject: [PATCH 118/190] Update extension template (#1197) - Update format for specification versions. We now use MAJOR.MINOR.PATCH as opposed to MAJOR.MINOR Revision PATCH. - Include generated dictionaries by default. Change-Id: Ie2cd8fc08ae6ec71d340bf9f274ffb17d8ebb118 Signed-off-by: Kevin Petit --- extensions/cl_extension_template.asciidoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extensions/cl_extension_template.asciidoc b/extensions/cl_extension_template.asciidoc index 2020299c7..6666c6a96 100644 --- a/extensions/cl_extension_template.asciidoc +++ b/extensions/cl_extension_template.asciidoc @@ -5,6 +5,7 @@ :data-uri: :icons: font include::../config/attribs.txt[] +include::{generated}/api/api-dictionary.asciidoc[] :source-highlighter: coderay = cl_khr_extension_template @@ -137,8 +138,7 @@ Write dates in https://en.wikipedia.org/wiki/ISO_8601[ISO 8601] date format. == Dependencies -This extension is written against the OpenCL Specification -Version 1.0, Revision 1. +This extension is written against the OpenCL Specification version 3.X.Y. This extension requires OpenCL 1.0. @@ -498,6 +498,7 @@ best not to renumber issues, either. | 0.6.0 | 2020-04-20 | Alastair Murray | Use naming conventions in the new type example. | 0.7.0 | 2021-10-05 | Ben Ashbaugh | Added recommendation for bits in bitfields. | 0.8.0 | 2021-12-13 | Ben Ashbaugh | Added OpenCL C feature names section +| 0.9.0 | 2024-07-01 | Kévin Petit | Update format for spec version and include generated definitions. |==== **** From 7863cade01200674d978387ba5d1e0666339ea35 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 9 Jul 2024 10:31:00 -0600 Subject: [PATCH 119/190] cl_khr_external_semaphore: Clarify language (#938) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cl_khr_external_semaphore: Clarify language Refined the cl_khr_external_semaphore spec. Removed references to permanence which appear to have been leveraged from the Vulkan spec but don’t apply to the OpenCL spec in its current form. * Fix table format --- api/opencl_runtime_layer.asciidoc | 81 +++++++++---------------------- 1 file changed, 23 insertions(+), 58 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 9609c2466..cb32ecab4 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -12866,6 +12866,10 @@ ifdef::cl_khr_external_semaphore[] [open,refpage='clGetSemaphoreHandleForTypeKHR',desc='Export external handle from a semaphore',type='protos'] -- + +Export operations have the same transference as the specified handle type's import operations. Additionally, exporting a semaphore payload to a handle with copy transference has the same side effects on the source semaphore's payload as executing a semaphore wait operation. + +Please refer to handle specific documentation for more details on transference requirements per handle type. To export an external handle from a semaphore, call the function include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] @@ -12919,53 +12923,19 @@ Otherwise, it returns one of the following errors: === Importing Semaphore External Handles -Applications can import a semaphore payload into an existing semaphore using -an external semaphore handle. -The effects of the import operation will be either temporary or permanent, -as specified by the application. -If the import is temporary, the implementation must restore the semaphore to -its prior permanent state after submitting the next semaphore wait -operation. -Performing a subsequent temporary import on a semaphore before performing a -semaphore wait has no effect on this requirement; the next wait submitted on -the semaphore must still restore its last permanent state. -A permanent payload import behaves as if the target semaphore was destroyed, -and a new semaphore was created with the same handle but the imported -payload. -Because importing a semaphore payload temporarily or permanently detaches -the existing payload from a semaphore, similar usage restrictions to those -applied to {clReleaseSemaphoreKHR} are applied to any command that imports a -semaphore payload. -Which of these import types is used is referred to as the import operation's -permanence. -Each handle type supports either one or both types of permanence. - -The implementation must perform the import operation by either referencing -or copying the payload referred to by the specified external semaphore -handle, depending on the handle's type. -The import method used is referred to as the handle type's transference. -When using handle types with reference transference, importing a payload to -a semaphore adds the semaphore to the set of all semaphores sharing that -payload. -This set includes the semaphore from which the payload was exported. -Semaphore signaling and waiting operations performed on any semaphore in the -set must behave as if the set were a single semaphore. -Importing a payload using handle types with copy transference creates a -duplicate copy of the payload at the time of import, but makes no further -reference to it. -Semaphore signaling and waiting operations performed on the target of copy -imports must not affect any other semaphore or payload. - -Export operations have the same transference as the specified handle type's -import operations. -Additionally, exporting a semaphore payload to a handle with copy -transference has the same side effects on the source semaphore's payload as -executing a semaphore wait operation. -If the semaphore was using a temporarily imported payload, the semaphore's -prior permanent payload will be restored. - -Please refer to handle specific specifications for more details on -transference and permanence requirements specific to handle type. +Applications can import a semaphore payload by creating a semaphore from an external handle. The +implementation must perform the import operation by either referencing or copying the payload +referred to by the specified external semaphore handle, depending on the handle's type. When using +handle types with reference transference, importing a payload to a semaphore adds the semaphore to +the set of all semaphores sharing that payload. This set includes the semaphore from which the payload +was exported. Semaphore signaling and waiting operations performed on any semaphore in the set must +behave as if the set were a single semaphore. Importing a payload using handle types with copy +transference creates a duplicate copy of the payload at the time of import, but makes no further +reference to it. Semaphore signaling and waiting operations performed on the target of copy imports +must not affect any other semaphore or payload. + +Please refer to handle specific documentation for more details on transference requirements per +handle type. === Descriptions of External Semaphore Handle Types @@ -13034,17 +13004,16 @@ descriptor when exporting a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} from a endif::cl_khr_external_semaphore_sync_fd[] -.Transference and Permanence Properties for File Descriptor Handles -[width="100%",cols="60%,<20%,<20%",options="header"] +.Transference Properties for File Descriptor Handles +[width="100%",cols="60%,<40%",options="header"] |==== -| Handle Type | Transference | Permanence +| Handle Type | Transference ifdef::cl_khr_external_semaphore_opaque_fd[] | {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR.asciidoc[] | Reference - | Temporary, Permanent endif::cl_khr_external_semaphore_opaque_fd[] ifdef::cl_khr_external_semaphore_sync_fd[] @@ -13052,7 +13021,6 @@ ifdef::cl_khr_external_semaphore_sync_fd[] include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_SYNC_FD_KHR.asciidoc[] | Copy - | Temporary endif::cl_khr_external_semaphore_sync_fd[] |==== @@ -13130,24 +13098,21 @@ a semaphore from an external handle: endif::cl_khr_external_semaphore_win32[] -.Transference and Permanence Properties for NT Handle Types -[width="100%",cols="60%,<20%,<20%",options="header"] +.Transference Properties for NT Handle Types +[width="100%",cols="60%,<40%",options="header"] |==== -| Handle Type | Transference | Permanence +| Handle Type | Transference ifdef::cl_khr_external_semaphore_win32[] | {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR.asciidoc[] | Reference - | Temporary, Permanent | {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR.asciidoc[] | Reference - | Temporary, Permanent endif::cl_khr_external_semaphore_win32[] -|==== // TODO Why "Windows handles" here but "NT handles" elsewhere? From a879dfb2772cd34501414444b0f81a6c551283ef Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 9 Jul 2024 09:46:47 -0700 Subject: [PATCH 120/190] clarify CL_DEVICE_TYPE_DEFAULT and CL_DEVICE_TYPE_ALL for custom devices (#1117) --- api/opencl_platform_layer.asciidoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 99a6653cd..ceb6e3193 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -395,13 +395,13 @@ include::{generated}/api/version-notes/CL_DEVICE_TYPE_DEFAULT.asciidoc[] {clGetDeviceIDs} or to create OpenCL contexts using {clCreateContextFromType}, and will never be returned in {CL_DEVICE_TYPE} for any OpenCL device. - The default OpenCL device must not be a {CL_DEVICE_TYPE_CUSTOM} device. + The default OpenCL device must not be a {CL_DEVICE_TYPE_CUSTOM} device + unless it is the only device in the platform. | {CL_DEVICE_TYPE_ALL_anchor} include::{generated}/api/version-notes/CL_DEVICE_TYPE_ALL.asciidoc[] - | All OpenCL devices available in the platform, except for - {CL_DEVICE_TYPE_CUSTOM} devices. + | All OpenCL devices in the platform. {CL_DEVICE_TYPE_ALL} is only used to query OpenCL devices using {clGetDeviceIDs} or to create OpenCL contexts using {clCreateContextFromType}, and will never be returned in {CL_DEVICE_TYPE} From cc81a65af64cc38b0077c16c2019e6dbfcd237cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 10 Jul 2024 16:20:10 +0100 Subject: [PATCH 121/190] Fix typos in description of clReImportSemaphoreSyncFdKHR (#1208) Also add to list of new commands in extension appendix. Change-Id: I80b1d25368c2cefb0967120bdc429a8187c518c7 Signed-off-by: Kevin Petit --- api/cl_khr_external_semaphore_sync_fd.asciidoc | 1 + api/opencl_runtime_layer.asciidoc | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index f8203ce90..aee60ec16 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -38,6 +38,7 @@ external semaphore using the APIs introduced by === New Commands * {clGetSemaphoreHandleForTypeKHR} + * {clReImportSemaphoreSyncFdKHR} === New Types diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index cb32ecab4..66e889320 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -13032,7 +13032,7 @@ a successful import. ifdef::cl_khr_external_semaphore_sync_fd[] [open,refpage='clReImportSemaphoreSyncFdKHR',desc='Re-import sync fd handle into an existing semaphore',type='protos'] -- -To re-imported a handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} into an +To re-import a handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} into an existing semaphore, call the function: include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] @@ -13051,7 +13051,7 @@ of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. // refError -{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore +{clReImportSemaphoreSyncFdKHR} returns {CL_SUCCESS} if the semaphore handle is re-imported successfully. Otherwise, it returns one of the following errors: From 1cde9db68c9402af3266e1407564242589f113f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 10 Jul 2024 16:20:53 +0100 Subject: [PATCH 122/190] Add missing table end (#1206) Introduced by #938 Change-Id: Ibef16bceb5398c49a14e88818a45236d0e17acf0 Signed-off-by: Kevin Petit --- api/opencl_runtime_layer.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 66e889320..7adab199e 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -13113,6 +13113,7 @@ include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR.asci include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR.asciidoc[] | Reference endif::cl_khr_external_semaphore_win32[] +|==== // TODO Why "Windows handles" here but "NT handles" elsewhere? From aa001c56521772ba8cf3aa1edf62e7dc18011c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 10 Jul 2024 17:38:08 +0100 Subject: [PATCH 123/190] Fail spec creation if asciidoctor errors are encountered (#1205) * Fail spec creation if asciidoctor errors are encountered This would have enabled the CI to catch a markup issue introduced by #938. Signed-off-by: Kevin Petit Change-Id: I49de3eaf623117f7c29d1019dedf5b342766a029 * attempt to fix asciidoctor errors in API spec Change-Id: I0f9cbeddb72e0d76ba508b336d91c4ee640d77ad --------- Signed-off-by: Kevin Petit --- Makefile | 2 +- api/opencl_runtime_layer.asciidoc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index c2e00d073..6aef16a92 100644 --- a/Makefile +++ b/Makefile @@ -117,7 +117,7 @@ ADOCCOMMONOPTS = -a apispec="$(CURDIR)/api" \ -a cspec="$(CURDIR)/c" \ -a images="$(CURDIR)/images" \ $(ATTRIBOPTS) $(NOTEOPTS) $(VERBOSE) $(ADOCEXTS) -ADOCOPTS = -d book $(ADOCCOMMONOPTS) +ADOCOPTS = --failure-level ERROR -d book $(ADOCCOMMONOPTS) # Asciidoctor options to build refpages # diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 7adab199e..d6d23bdf2 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -2524,14 +2524,14 @@ The memory layout of this image format is described below: [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| R | G | B | A | ... | +| R | G | B | A | ... |==== with the corresponding byte offsets [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| 0 | 1 | 2 | 3 | ... | +| 0 | 1 | 2 | 3 | ... |==== Similar, if `image_channel_order` = {CL_RGBA} and `image_channel_data_type` = @@ -2539,14 +2539,14 @@ Similar, if `image_channel_order` = {CL_RGBA} and `image_channel_data_type` = [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| R | G | B | A | ... | +| R | G | B | A | ... |==== with the corresponding byte offsets [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| 0 | 2 | 4 | 6 | ... | +| 0 | 2 | 4 | 6 | ... |==== `image_channel_data_type` values of {CL_UNORM_SHORT_565}, {CL_UNORM_SHORT_555}, From 5d894a7233aa6467e04597ef348aad96fe88c1e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Thu, 11 Jul 2024 15:38:32 +0100 Subject: [PATCH 124/190] xml: require CL/GL types for cl_khr_gl_sharing (#1210) So the type definitions are included in the generated cl_gl.h Change-Id: I65a666dde8066958897acf13fb755ae2a3f3b52d Signed-off-by: Kevin Petit --- xml/cl.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xml/cl.xml b/xml/cl.xml index bca2621d9..ac7236c12 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -6566,6 +6566,11 @@ server's OpenCL/api-docs repository. + + + + + From c382c22d1a95b7a16018ae4738db6b822407c293 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 16 Jul 2024 17:28:08 +0100 Subject: [PATCH 125/190] Add multi-device wording to clCommandBarrierWithWaitListKHR (#1146) `clCommandBarrierWithWaitListKHR` does not having wording for the `command_queue` parameter regarding when `cl_khr_command_buffer_multi_device` is enabled. --- api/opencl_runtime_layer.asciidoc | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index d6d23bdf2..fc26bc289 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14281,10 +14281,17 @@ include::{generated}/api/protos/clCommandBarrierWithWaitListKHR.txt[] include::{generated}/api/version-notes/clCommandBarrierWithWaitListKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. - * _command_queue_ specifies the command-queue the command will be recorded - to. - This parameter is unused, as only a single - command-queue is supported, and **must** be `NULL`. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify synchronization-points that need to complete before this particular command can be executed. @@ -14332,7 +14339,14 @@ recorded after it do not execute until it completes. executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and From f4ce8f6034d74626e3e49df231fb7ce0379e9d5e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 16 Jul 2024 17:29:03 +0100 Subject: [PATCH 126/190] Fix CL_INVALID_CONTEXT command-buffer error definitions (#1149) * Fix CL_INVALID_CONTEXT command-buffer error definitions See issue https://github.com/KhronosGroup/OpenCL-Docs/issues/1147 documenting that the error specification for `CL_INVALID_CONTEXT` doesn't take into account the variation when `cl_khr_command_buffer_multi_device` is enabled. Doing this change also picked up that the error wording for `clCommandSVMMemcpyKHR` and `clCommandSVMMemFillKHR` referenced the _kernel_ parameter which doesn't exist. * Address review feedback * Remove extraneous `cl_khr_command_buffer_multi_device` precondition from error wording. * Change "enabled" terminology to "supported" with regards to extensions. --- api/opencl_runtime_layer.asciidoc | 91 +++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 22 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index fc26bc289..21ea41524 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14349,8 +14349,10 @@ Otherwise, it returns one of the following errors: command-queue listed on _command_buffer_ creation. * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - _command_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is @@ -14432,8 +14434,12 @@ Otherwise, it returns the errors defined by {clEnqueueCopyBuffer} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_buffer_, and _dst_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14537,8 +14543,12 @@ except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_buffer_, and _dst_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14625,8 +14635,12 @@ except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_image_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_buffer_, and _dst_image_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14719,8 +14733,12 @@ Otherwise, it returns the errors defined by {clEnqueueCopyImage} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_image_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_image_, and _dst_image_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14807,8 +14825,12 @@ except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_image_, and _dst_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14902,8 +14924,12 @@ Otherwise, it returns the errors defined by {clEnqueueFillBuffer} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_ and + _buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14995,8 +15021,12 @@ Otherwise, it returns the errors defined by {clEnqueueFillImage} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _image_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_ and + _image_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15239,8 +15269,12 @@ Otherwise, it returns the errors defined by {clEnqueueNDRangeKernel} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_ and + _kernel_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15353,10 +15387,17 @@ Otherwise, it returns the errors defined by {clEnqueueSVMMemcpy} except: more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. +ifdef::cl_khr_command_buffer_multi_device[] {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. + +endif::cl_khr_command_buffer_multi_device[] + +ifndef::cl_khr_command_buffer_multi_device[] +{CL_INVALID_CONTEXT} error case is removed. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15459,10 +15500,16 @@ successfully. Otherwise, it returns the errors defined by more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. +ifdef::cl_khr_command_buffer_multi_device[] {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] + +ifndef::cl_khr_command_buffer_multi_device[] +{CL_INVALID_CONTEXT} error case is removed. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: From e958090d501cd383c371fdd2c6c5972dc0c291b3 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 16 Jul 2024 17:33:16 +0100 Subject: [PATCH 127/190] Use array for clUpdateMutableCommandsKHR. (#1045) Proposal to pass the update configs to `clUpdateMutableCommandsKHR` as an array, rather than pointer changed linked list. See https://github.com/KhronosGroup/OpenCL-Docs/issues/1041 for motivation. --- ...r_command_buffer_mutable_dispatch.asciidoc | 60 +++++------- api/opencl_runtime_layer.asciidoc | 93 +++++++++---------- xml/cl.xml | 34 +++---- 3 files changed, 80 insertions(+), 107 deletions(-) diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index 13ada6c22..486d01d12 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[ === Other Extension Metadata *Last Modified Date*:: - 2022-08-31 + 2024-06-19 *IP Status*:: No known IP claims. *Contributors*:: @@ -43,32 +43,15 @@ in a new command-buffer. === Interactions With Other Extensions -The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this -extension for the purpose of allowing expansion of mutable functionality in -future extensions layered on top of -{cl_khr_command_buffer_mutable_dispatch_EXT}. -Any parameter that is a structure containing a `void* next` member *must* -have a value of `next` that is either `NULL`, or is a pointer to a valid -structure defined by {cl_khr_command_buffer_mutable_dispatch_EXT} or an -extension layered on top. -To be a valid structure in the pointer chain the first member of the -structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier -for the structure being iterated through, and the second member a `void* -next` pointer to the next structure in the chain. - -[NOTE] -==== -This approach is based on structure pointer chains in Vulkan, for more -details see the "`Valid Usage for Structure Pointer Chains`" section of the -Vulkan specification. -==== - -This is designed so that another extension layered on -{cl_khr_command_buffer_mutable_dispatch_EXT} could allow modification of -commands recorded to a command-buffer other than kernel execution commands. -As all command recording entry-points return a {cl_mutable_command_khr_TYPE} -handle, and aspects like which {cl_mem_TYPE} object a command uses could -also be updated between enqueues of the command-buffer. +The {clUpdateMutableCommandsKHR} entry-point has been designed for the purpose +of allowing expansion of mutable functionality in future extensions layered on +top of `cl_khr_command_buffer_mutable_dispatch`. + +A new extension can define its own structure type to specify the update +configuration it requires, with a matching +{cl_command_buffer_update_type_khr_TYPE} value. This new structure type can +then be passed to {clUpdateMutableCommandsKHR} where it is reinterpreted from a +void pointer using {cl_command_buffer_update_type_khr_TYPE}. === New Commands @@ -79,8 +62,7 @@ also be updated between enqueues of the command-buffer. * {cl_mutable_dispatch_fields_khr_TYPE} * {cl_mutable_command_info_khr_TYPE} - * {cl_command_buffer_structure_type_khr_TYPE} - * {cl_mutable_base_config_khr_TYPE} + * {cl_command_buffer_update_type_khr_TYPE} * {cl_mutable_dispatch_asserts_khr_TYPE} * {cl_mutable_dispatch_config_khr_TYPE} * {cl_mutable_dispatch_exec_info_khr_TYPE} @@ -115,8 +97,7 @@ also be updated between enqueues of the command-buffer. ** {CL_COMMAND_BUFFER_MUTABLE_KHR} * {cl_command_buffer_properties_khr_TYPE} ** {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} - * {cl_command_buffer_structure_type_khr_TYPE} - ** {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + * {cl_command_buffer_update_type_khr_TYPE} ** {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR} * New Error Codes ** {CL_INVALID_MUTABLE_COMMAND_KHR} @@ -274,8 +255,6 @@ kernel void vector_addition(global int* tile1, global int* tile2, cl_mutable_dispatch_arg_khr arg_2{2, sizeof(cl_mem), &output_buffer}; cl_mutable_dispatch_arg_khr args[] = {arg_0, arg_1, arg_2}; cl_mutable_dispatch_config_khr dispatch_config{ - CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, - nullptr, command_handle, 3 /* num_args */, 0 /* num_svm_arg */, @@ -287,12 +266,16 @@ kernel void vector_addition(global int* tile1, global int* tile2, nullptr /* global_work_offset */, nullptr /* global_work_size */, nullptr /* local_work_size */}; - cl_mutable_base_config_khr mutable_config{ - CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, - &dispatch_config}; // Update the command buffer with the mutable configuration - error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void* configs[1] = {&dispatch_config}; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + CL_CHECK(error); } @@ -374,3 +357,6 @@ may be a introduced as a stand alone extension. * Revision 0.9.1, 2023-11-07 ** Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values (provisional). + * Revision 0.9.2, 2024-06-19 + ** Change {clUpdateMutableCommandsKHR} API to pass configs as an array rather + than linked list (provisional). diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 21ea41524..29e69e7b3 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -15647,7 +15647,7 @@ endif::cl_khr_command_buffer_multi_device[] ifdef::cl_khr_command_buffer_mutable_dispatch[] [[mutable-commands]] -=== Mutable Commands: +=== Mutable Commands A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ object as it can be returned from any command recording entry-point in the @@ -15658,11 +15658,10 @@ modified through the fields of {cl_mutable_dispatch_config_khr_TYPE}. Mutable-command handles are updated between enqueues using entry-point {clUpdateMutableCommandsKHR}. -To enable performant usage, all aspects of mutation are encapsulated inside -a single {cl_mutable_base_config_khr_TYPE} parameter. -This means that the runtime has access to all the information about how the -command-buffer will change, allowing the command-buffer to be rebuilt as -efficiently as possible. +To enable performant usage, all aspects of mutation can be passed in a single +call using an array. This means that the runtime has access to all the +information about how the command-buffer will change, allowing the +command-buffer to be rebuilt as efficiently as possible. Any modifications to the arguments or execution info of a mutable-dispatch handle using {cl_mutable_dispatch_arg_khr_TYPE} or {cl_mutable_dispatch_exec_info_khr_TYPE} have no affect on the original @@ -15703,8 +15702,13 @@ include::{generated}/api/protos/clUpdateMutableCommandsKHR.txt[] include::{generated}/api/version-notes/clUpdateMutableCommandsKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. - * _mutable_config_ is a pointer to a {cl_mutable_base_config_khr_TYPE} - structure defining updates to make to mutable-commands. + * _num_configs_ Number of elements in the _config_types_ and _config_ arrays. + * _config_types_ An array of length _num_configs_ with each element identifying + the type of each config in _configs_ at the same array index. + * _configs_ An array of length _num_configs_ containing structs which define how a + mutable-command handle in _command_buffer_ is to be updated, each of which is + interpreted using _config_types_ at the same index with the mapping defined + in the <> section. // refError @@ -15718,16 +15722,13 @@ one of the errors below is returned: * {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. * {CL_INVALID_OPERATION} if _command_buffer_ was not created with the {CL_COMMAND_BUFFER_MUTABLE_KHR} flag. - * {CL_INVALID_VALUE} if the _type_ member of _mutable_config_ is not - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR}. - * {CL_INVALID_VALUE} if the _mutable_dispatch_list_ member of - _mutable_config_ is `NULL` and _num_mutable_dispatch_ > 0, or - _mutable_dispatch_list_ is not `NULL` and _num_mutable_dispatch_ is 0. - * {CL_INVALID_VALUE} if the _next_ member of _mutable_config_ is not - `NULL` and any iteration of the structure pointer chain does not contain - valid _type_ and _next_ members. - * {CL_INVALID_VALUE} if _mutable_config_ is `NULL`, or if both _next_ and - _mutable_dispatch_list_ members of _mutable_config_ are `NULL`. + * {CL_INVALID_VALUE} if _config_types_ is `NULL` and _num_configs_ > 0, or + _config_types_ is not `NULL` and _num_configs_ is 0. + * {CL_INVALID_VALUE} if _configs_ is `NULL` and _num_configs_ > 0, or + _configs_ is not `NULL` and _num_configs_ is 0. + * {CL_INVALID_VALUE} if any element of _config_types_ is not a valid + {cl_command_buffer_update_type_khr_TYPE} enum. + * {CL_INVALID_VALUE} if any element of _configs_ is NULL. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -15753,19 +15754,17 @@ parameters are updated so that the new number of work-groups exceeds the number when the ND-range command was recorded, the behavior is undefined. ==== -If the _mutable_dispatch_list_ member of _mutable_config_ is non-`NULL`, -then errors defined by {clEnqueueNDRangeKernel}, {clSetKernelExecInfo}, -{clSetKernelArg}, and {clSetKernelArgSVMPointer} are returned by -{clUpdateMutableCommandsKHR} if any of the array elements are set to an -invalid value. -Additionally, the following errors are returned if any -{cl_mutable_dispatch_config_khr_TYPE} element of the array violates the -defined conditions: +If _configs_ is non-`NULL`, then for any {cl_mutable_dispatch_config_khr_TYPE} +element of the array the errors defined by {clEnqueueNDRangeKernel}, +{clSetKernelExecInfo}, {clSetKernelArg}, and {clSetKernelArgSVMPointer} are +returned by {clUpdateMutableCommandsKHR} if any of the struct elements are set +to an invalid value. Additionally, the following errors are returned if any +{cl_mutable_dispatch_config_khr_TYPE} element of the array violates the defined +conditions: * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable - command object, or created from _command_buffer_. - * {CL_INVALID_VALUE} if _type_ is not - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + command object returned from {clCommandNDRangeKernelKHR}, or created from + _command_buffer_. * {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or _global_work_size_ result in a change to work-group uniformity. * {CL_INVALID_OPERATION} if the _work_dim_ is different from the @@ -15793,24 +15792,25 @@ defined conditions: 0, or _exec_info_list_ is not `NULL` and _num_exec_infos_ is 0. -- -[open,refpage='cl_mutable_base_config_khr',desc='DESC',type='structs'] --- -The {cl_mutable_base_config_khr_TYPE} structure encapsulates all aspects of -mutation and is defined as: +[[mutable-commands-update-structs]] +==== Mutable Command Update Structs -include::{generated}/api/structs/cl_mutable_base_config_khr.txt[] +The following table defines the mapping of +{cl_command_buffer_update_type_khr_TYPE} values to the structs they define +reinterpreting a void pointer as when passed to {clUpdateMutableCommandsKHR}. - * _type_ is the type of this structure, and must be - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR_anchor} - * _next_ is `NULL` or a pointer to an extending structure. - * _num_mutable_dispatch_ is the number of mutable-dispatch objects to - configure in this enqueue of the command-buffer. - * _mutable_dispatch_list_ is an array containing _num_mutable_dispatch_ - elements describing the configurations of mutable kernel execution - commands in the command-buffer. - For a description of struct members making up each array element see - {cl_mutable_dispatch_config_khr_TYPE}. --- +[[update-config-mapping]] +[cols=",,",options="header",] +|==== +| Enum Value | Struct Type | Entry Point + +| {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR_anchor} +| {cl_mutable_dispatch_config_khr_TYPE} +| {clCommandNDRangeKernelKHR} + +|==== + +==== Kernel Command Update Structs [open,refpage='cl_mutable_dispatch_config_khr',desc='Set kernel configuration of a mutable clCommandNDRangeKernelKHR command',type='structs'] -- @@ -15820,9 +15820,6 @@ The {cl_mutable_dispatch_arg_khr_TYPE} structure is passed to include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] - * _type_ is the type of this structure, and must be - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR_anchor}. - * _next_ is `NULL` or a pointer to an extending structure. * _command_ is a mutable-command object returned by {clCommandNDRangeKernelKHR} representing a kernel execution as part of a command-buffer. diff --git a/xml/cl.xml b/xml/cl.xml index ac7236c12..53b9a9be4 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -249,7 +249,7 @@ server's OpenCL/api-docs repository. typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; typedef cl_bitfield cl_mutable_dispatch_fields_khr; typedef cl_uint cl_mutable_command_info_khr; - typedef cl_uint cl_command_buffer_structure_type_khr; + typedef cl_uint cl_command_buffer_update_type_khr; typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; @@ -370,8 +370,6 @@ server's OpenCL/api-docs repository. const void* param_value
      - cl_command_buffer_structure_type_khr type - const void* next cl_mutable_command_khr command cl_uint num_args cl_uint num_svm_args @@ -384,13 +382,6 @@ server's OpenCL/api-docs repository. const size_t* global_work_size const size_t* local_work_size - - - cl_command_buffer_structure_type_khr type - const void* next - cl_uint num_mutable_dispatch - const cl_mutable_dispatch_config_khr* mutable_dispatch_list - @@ -1370,10 +1361,9 @@ server's OpenCL/api-docs repository. - - - - + + + @@ -3280,9 +3270,11 @@ server's OpenCL/api-docs repository. size_t* param_value_size_ret - cl_int clUpdateMutableCommandsKHR - cl_command_buffer_khr command_buffer - const cl_mutable_base_config_khr* mutable_config + cl_int clUpdateMutableCommandsKHR + cl_command_buffer_khr command_buffer + cl_uint num_configs + const cl_command_buffer_update_type_khr* config_types + const void** configs cl_int clGetMutableCommandInfoKHR @@ -7324,18 +7316,17 @@ server's OpenCL/api-docs repository. - + - + - @@ -7369,8 +7360,7 @@ server's OpenCL/api-docs repository. - - + From c63b28832709c780eb2f76fa35c7fabb4c3deed5 Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 16 Jul 2024 22:42:05 +0530 Subject: [PATCH 128/190] Clarify Acquire/Release behavior for external memory (#1176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Clarify Acquire/Release behavior for external memory Clarify Acquire/Release behavior for external memory specs to call out the scope of operations as well as the behavior in case of multiple acquire/release calls. Fixes #1078, #1086 * Updates to Acquire/Release clarifications (#1183) Address review comments on PR#1176 Fixes #1078, #1086 * Address left-over comments (#1194) Address some of the comments that were left out in earlier update. * Fix the typo for "acquired" Fix the typo suggested by Kevin to replace aquired to acquired. Co-authored-by: Kévin Petit --------- Co-authored-by: Kévin Petit --- api/opencl_runtime_layer.asciidoc | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 29e69e7b3..b4e3e5087 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -5427,6 +5427,23 @@ handle is used by an OpenCL command queued to a command-queue without being acquired. This is to guarantee that the state of the memory objects is up-to-date and they are accessible to OpenCL. + +The following restrictions shall apply - + * Each memory object must be acquired only once. Acquiring a memory object + multiple times without releasing it results in implementation-defined + behavior. + * The acquire must be performed on a command-queue associated with a device + that was one of the devices specified via {CL_MEM_DEVICE_HANDLE_LIST_KHR} + when the memory object was imported using {clCreateBufferWithProperties} or + {clCreateImageWithProperties}. If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not + specified, the acquire can be performed on a command-queue associated with + any device in the context. + * The memory object will be acquired for all devices specified + via {CL_MEM_DEVICE_HANDLE_LIST_KHR} when the memory object was imported + using {clCreateBufferWithProperties} or {clCreateImageWithProperties}. + If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not specified, the memory object + will be acquired for all devices in the context. + See <> for more details on how to use this API. @@ -5503,6 +5520,23 @@ Applications must release the memory objects that are acquired using commands in the other API. This is to guarantee that the state of memory objects is up-to-date and they are accessible to the other API. + +The following restrictions shall apply - + * Each memory object must be released only once. Releasing a memory object + multiple times without acquiring it results in implementation-defined + behavior. + * The release must be performed on a command-queue associated with a device + that was one of the devices specified via {CL_MEM_DEVICE_HANDLE_LIST_KHR} + when the memory object was imported using {clCreateBufferWithProperties} or + {clCreateImageWithProperties}. If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not + specified, the release can be performed on a command-queue associated with + any device in the context. + * The memory object will be released for all devices specified via + {CL_MEM_DEVICE_HANDLE_LIST_KHR} when the memory object was imported + using {clCreateBufferWithProperties} or {clCreateImageWithProperties}. + If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not specified, the memory object + will be released for all devices in the context. + See "`Example with Acquire / Release`" provided in <> for more details on how to use this API. From 418d3f53734875f306a56312ee795521818713f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Fri, 9 Aug 2024 00:13:42 +0100 Subject: [PATCH 129/190] Fix typo in description of clGetSemaphoreHandleForTypeKHR (#1220) Change-Id: I694d985147ae8a78c25e6a29c49381e58faa3d8b Signed-off-by: Kevin Petit --- api/opencl_runtime_layer.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index b4e3e5087..3027cc730 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -12918,7 +12918,7 @@ include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] * _handle_size_ specifies the size of memory pointed by _handle_ptr_. * _handle_ptr_ is a pointer to memory where the exported external handle is returned. - If _param_value_ is `NULL`, it is ignored. + If _handle_ptr_ is `NULL`, it is ignored. * _handle_size_ret_ returns the actual size in bytes for the external handle. If _handle_size_ret_ is `NULL`, it is ignored. From 6ea8cc9fb62b05882d2813772f6d93ce8d0a0ac3 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Fri, 9 Aug 2024 01:14:02 +0200 Subject: [PATCH 130/190] New Intel extension cl_intel_subgroup_buffer_prefetch (#1195) --- ...cl_intel_subgroup_buffer_prefetch.asciidoc | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 extensions/cl_intel_subgroup_buffer_prefetch.asciidoc diff --git a/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc b/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc new file mode 100644 index 000000000..142bef7ce --- /dev/null +++ b/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc @@ -0,0 +1,241 @@ += cl_intel_subgroup_buffer_prefetch + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C, +// for syntax highlighting purposes. +:language: c + +== Name Strings + +`cl_intel_subgroup_buffer_prefetch` + +== Contact + +Grzegorz Wawiorko Intel (grzegorz 'dot' wawiorko 'at' intel 'dot' com) + +== Contributors + +// spell-checker: disable +Grzegorz Wawiorko, Intel + +Ben Ashbaugh, Intel + +Andrzej Ratajewski, Intel + +// spell-checker: enable + +== Notice + +Copyright (c) 2024 Intel Corporation. All rights reserved. + +== Status + +Complete + +== Version + +Built On: {docdate} + +Revision: 1 + +== Dependencies + +OpenCL 1.2 and support for `cl_intel_subgroups` is required. + +This extension requires OpenCL support for SPIR-V, either via OpenCL 2.1 or via the `cl_khr_il_program` extension. + +This extension is written against the OpenCL 3.0 C Language specification, V3.0.16. + +== Overview + +The extension adds the ability to prefetch data from a buffer as a sub-group operation. +The functionality added by this extension can improve the performance of some kernels by prefetching data into a cache, so future reads of the data are from a fast cache rather than slower memory. + +The new block prefetch operations are supported both in the OpenCL C kernel programming language and in the SPIR-V intermediate language. + +The prefetch functions are companions to the sub-group block reads described by the extensions `cl_intel_subgroups`, `cl_intel_subgroups_char`, `cl_intel_subgroups_short` and `cl_intel_subgroups_long`. + + +== New API Functions + +None. + +== New API Enums + +None. + +== New OpenCL C Functions + +Add `uchar` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_uc( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc2( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc4( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc8( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc16( const __global uchar* p ) +---- +-- + +Add `ushort` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_us( const __global ushort* p ) +void intel_sub_group_block_prefetch_us2( const __global ushort* p ) +void intel_sub_group_block_prefetch_us4( const __global ushort* p ) +void intel_sub_group_block_prefetch_us8( const __global ushort* p ) +void intel_sub_group_block_prefetch_us16( const __global ushort* p ) +---- +-- + +Add `uint` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_ui( const __global uint* p ) +void intel_sub_group_block_prefetch_ui2( const __global uint* p ) +void intel_sub_group_block_prefetch_ui4( const __global uint* p ) +void intel_sub_group_block_prefetch_ui8( const __global uint* p ) +---- +-- + +Add `ulong` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_ul( const __global ulong* p ) +void intel_sub_group_block_prefetch_ul2( const __global ulong* p ) +void intel_sub_group_block_prefetch_ul4( const __global ulong* p ) +void intel_sub_group_block_prefetch_ul8( const __global ulong* p ) +---- +-- + +== Modifications to the OpenCL C Specification + +=== Add a new Section 6.15.X - "Sub-group Prefetch Functions" + +-- +[cols="5a,4",options="header"] +|================================== +|*Function* +|*Description* + +|[source,c] +---- +void intel_sub_group_block_prefetch_uc( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc2( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc4( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc8( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc16( + const __global uchar* p ) +---- + +| Takes 1, 2, 4, 8 or 16 uchars of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|[source,c] +---- +void intel_sub_group_block_prefetch_us( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us2( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us4( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us8( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us16( + const __global ushort* p ) +---- + +| Takes 1, 2, 4, 8 or 16 ushorts of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|[source,c] +---- +void intel_sub_group_block_prefetch_ui( + const __global uint* p ) +void intel_sub_group_block_prefetch_ui2( + const __global uint* p ) +void intel_sub_group_block_prefetch_ui4( + const __global uint* p ) +void intel_sub_group_block_prefetch_ui8( + const __global uint* p ) +---- + +| Takes 1, 2, 4 or 8 uints of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|[source,c] +---- +void intel_sub_group_block_prefetch_ul( + const __global ulong* p ) +void intel_sub_group_block_prefetch_ul2( + const __global ulong* p ) +void intel_sub_group_block_prefetch_ul4( + const __global ulong* p ) +void intel_sub_group_block_prefetch_ul8( + const __global ulong* p ) +---- + +| Takes 1, 2, 4 or 8 ulongs of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|================================== +-- + +== Modifications to the OpenCL SPIR-V Environment Specification + +=== Add a new section 5.2.X - `cl_intel_subgroup_buffer_prefetch` + +If the OpenCL environment supports the extension `cl_intel_subgroup_buffer_prefetch`, then the environment must accept modules that declare use of the extension `SPV_INTEL_subgroup_buffer_prefetch` via *OpExtension*. + +If the OpenCL environment supports the extension `cl_intel_subgroup_buffer_prefetch` and use of the SPIR-V extension `SPV_INTEL_subgroup_buffer_prefetch` is declared in the module via *OpExtension*, then the environment must accept modules that declare the *SubgroupBufferPrefetchINTEL* capability. + +Note that the restrictions described in Section 7.1.X.3 - _Notes and Restrictions_ in the `cl_intel_spirv_subgroups` extension are unchanged and continue to apply for this extension. + +== Issues + +None. + +//. Issue? +//+ +//-- +//`STATUS`: Description. +//-- + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2024-06-28|Grzegorz Wawiorko|*First public revision.* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use `mono` text for device APIs, or [source] syntax highlighting. +//* Use `mono` text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ From 71913fba94ffa981b09c210b36ed97599675fb0d Mon Sep 17 00:00:00 2001 From: tomasz-platek <165791413+tomasz-platek@users.noreply.github.com> Date: Fri, 9 Aug 2024 01:14:37 +0200 Subject: [PATCH 131/190] Publish the cl_img_matrix_multiply extension specification. (#1199) * Publish cl_img_matrix_multiply extension specification. * The final draft of the cl_img_matrix_multiply extension. * Publish the cl_img_bitwise_ops extension specification. * Revert "Publish the cl_img_bitwise_ops extension specification." This reverts commit b17a1f7b3596601b314bdd3dd599c5b1afd85afd. * Update extensions/cl_img_matrix_multiply.asciidoc Listing the initial extension version. Co-authored-by: Ben Ashbaugh * Update cl_img_matrix_multiply.asciidoc Adding execution results to the coding samples --------- Co-authored-by: Ben Ashbaugh --- extensions/cl_img_matrix_multiply.asciidoc | 303 +++++++++++++++++++++ extensions/extensions.txt | 2 + 2 files changed, 305 insertions(+) create mode 100644 extensions/cl_img_matrix_multiply.asciidoc diff --git a/extensions/cl_img_matrix_multiply.asciidoc b/extensions/cl_img_matrix_multiply.asciidoc new file mode 100644 index 000000000..068830280 --- /dev/null +++ b/extensions/cl_img_matrix_multiply.asciidoc @@ -0,0 +1,303 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_matrix_multiply + +== Name Strings + +`cl_img_matrix_multiply` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Joe Molleson, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +This extension requires the `cl_khr_fp16` extension. + +== Overview + +This extension adds built-in functions that exercise hardware capabilities of Imagination GPU IP and allow to implement matrix multiplication in highly efficient and performant manner. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_dot_interleaved +__opencl_img_matmul_2x4_4x4 +---- + +== New OpenCL C Functions + +Perform the interleaved dot product operation: + +[source,c] +---- +float2 img_dot_interleaved(float a,__local float2 * b); +float2 img_dot_interleaved(float2 a,__local float4 * b); +float2 img_dot_interleaved(float4 a,__local float8 * b); +float2 img_dot_interleaved(float8 a,__local float16 * b); +float2 img_dot_interleaved_acc(float a,__local float2 * b, float2 acc); +float2 img_dot_interleaved_acc(float2 a,__local float4 * b, float2 acc); +float2 img_dot_interleaved_acc(float4 a,__local float8 * b, float2 acc); +float2 img_dot_interleaved_acc(float8 a,__local float16 * b, float2 acc); +---- + +Perform the matrix multiplication operation: + +[source,c] +---- +float8 img_matmul_2x4_4x4f(half4 a0, half4 a1,__local half16 * b); +half8 img_matmul_2x4_4x4h(half4 a0, half4 a1,__local half16 * b); +float8 img_matmul_acc_2x4_4x4f(half4 a0, half4 a1,__local half16 * b, float4 acc0, float4 acc1); +half8 img_matmul_acc_2x4_4x4h(half4 a0, half4 a1,__local half16 * b, half4 acc0, half4 acc1); +float8 img_matmul_2x4_4x4transposedf(half4 a0, half4 a1,__local half16 * b); +half8 img_matmul_2x4_4x4transposedh(half4 a0, half4 a1,__local half16 * b); +float8 img_matmul_acc_2x4_4x4transposedf(half4 a0, half4 a1,__local half16 * b, float4 acc0, float4 acc1); +half8 img_matmul_acc_2x4_4x4transposedh(half4 a0, half4 a1,__local half16 * b, half4 acc0, half4 acc1); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 11 - Built-in Scalar and Vector Argument Math Functions in Section 6.15.2 - Math Functions) :: ++ +-- +[cols="1,2",options="header"] +|==== +| Function | Description +| float2 *img_dot_interleaved*(float _a_,pass:[__local] float2 * _b_) + + float2 *img_dot_interleaved*(float2 _a_,pass:[__local] float4 * _b_) + + float2 *img_dot_interleaved*(float4 _a_,pass:[__local] float8 * _b_) + + float2 *img_dot_interleaved*(float8 _a_,pass:[__local] float16 * _b_) + a| `img_dot_interleaved` performs the dual dot product operation. + The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. + +For example, given: + +---- +a = [a0 a1] +b = [b0 b1 b2 b3] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1] x [b0 b1] + [b2 b3] +---- + +Requires that the `__opencl_img_dot_interleaved` feature macro is defined. +| float2 *img_dot_interleaved_acc*(float _a_,pass:[__local] float2 * _b_, float2 _acc_) + + float2 *img_dot_interleaved_acc*(float2 _a_,pass:[__local] float4 * _b_, float2 _acc_) + + float2 *img_dot_interleaved_acc*(float4 _a_,pass:[__local] float8 * _b_, float2 _acc_) + + float2 *img_dot_interleaved_acc*(float8 _a_,pass:[__local] float16 * _b_, float2 _acc_) + a| `img_dot_interleaved_acc` performs the dual dot product operation with the accumulator `acc`. + The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. + +For example, given: + +---- +a = [a0 a1] +b = [b0 b1 b2 b3] +acc = [acc0 acc1] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1] x [b0 b1] + [acc0 acc1] + [b2 b3] +---- + +Requires that the `__opencl_img_dot_interleaved` feature macro is defined. +| float8 *img_matmul_2x4_4x4f*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + + half8 *img_matmul_2x4_4x4h*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + a| `img_matmul_2x4_4x4f` and `img_matmul_2x4_4x4h` perform the matrix multiplication operation of matrices A and B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +B = [b0 b1 b2 b3] + [b4 b5 b6 b7] + [b8 b9 b10 b11] + [b12 b13 b14 b15] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x B +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. +| float8 *img_matmul_acc_2x4_4x4f*(half4 _a0_, half4 _a1_,pass:[__local] half16 _b_, float4 _acc0_, float4 _acc1_) + + half8 *img_matmul_acc_2x4_4x4h*(half4 _a0_, half4 _a1_,pass:[__local] half16 _b_, half4 _acc0_, half4 _acc1_) + a| `img_matmul_acc_2x4_4x4f` and `img_matmul_acc_2x4_4x4h` perform the matrix multiplication operation with the accumulator of matrices A and B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A, and where `acc0` is the first row and `acc1` is the second row of the accumulator. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +B = [b0 b1 b2 b3] + [b4 b5 b6 b7] + [b8 b9 b10 b11] + [b12 b13 b14 b15] +C = [acc00 acc01 acc02 acc03] + [acc10 acc11 acc12 acc13] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x B + C +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. + +| float8 *img_matmul_2x4_4x4transposedf*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + + half8 *img_matmul_2x4_4x4transposedh*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + a| `img_matmul_2x4_4x4transposedf` and `img_matmul_2x4_4x4transposedh` perform the matrix multiplication operation of matrix A and transposed matrix B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +BT = [b0 b4 b8 b12] + [b1 b5 b9 b13] + [b2 b6 b10 b14] + [b3 b7 b11 b15] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x BT +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. +| float8 *img_matmul_acc_2x4_4x4transposedf*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_, float4 _acc0_, float4 _acc1_) + + half8 *img_matmul_acc_2x4_4x4transposedh*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_, half4 _acc0_, half4 _acc1_) + a| `img_matmul_acc_2x4_4x4transposedf` and `img_matmul_acc_2x4_4x4transposedh` perform the matrix multiplication operation with the accumulator of matrix A and transposed matrix B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A, and where `acc0` is the first row and `acc1` is the second row of the accumulator. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +BT = [b0 b4 b8 b12] + [b1 b5 b9 b13] + [b2 b6 b10 b14] + [b3 b7 b11 b15] +C = [acc00 acc01 acc02 acc03] + [acc10 acc11 acc12 acc13] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x BT + C +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. +|==== +-- + +== Coding Sample + +This coding sample shows how to initialize the input vectors, use the *img_dot_interleaved_acc* function, and access the output vector: +[source] +---- +float4 a = (float4) (1.0f, 1.0f, 1.0f, 1.0f); +__local float8 b; +b = (float8) (0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f); + +float2 acc = (float2) (1.0f, 1.0f); +float2 res = img_dot_interleaved_acc(a, &b, acc); + +printf("res = [ %f %f ]\n", res.s0, res.s1); +---- + +Executing a work-item containing this code gives the following result: +[source] +---- +res = [ 1.000000 5.000000 ] +---- + +This coding sample shows how to initialize the input vectors, use the *img_matmul_acc_2x4_4x4f* function, and access the output vector: +[source] +---- +half4 a0 = (half4) (1.0h, 0.0h, 0.0h, 0.0h); +half4 a1 = (half4) (0.0h, 1.0h, 0.0h, 0.0h); + +local half16 b; +b = (half16) (0.0h, 1.0h, 2.0h, 3.0h, + 4.0h, 5.0h, 6.0h, 7.0h, + 8.0h, 9.0h, 10.0h, 11.0h, + 12.0h, 13.0h, 14.0h, 15.0h); + +float4 acc0 = (float4) (1.0f, 1.0f, 1.0f, 1.0f); +float4 acc1 = (float4) (1.0f, 1.0f, 1.0f, 1.0f); + +float8 res = img_matmul_acc_2x4_4x4f(a0, a1, &b, acc0, acc1); + +printf("res = [ %f %f %f %f ]\n", res.s0, res.s1, res.s2, res.s3); +printf(" [ %f %f %f %f ]\n", res.s4, res.s5, res.s6, res.s7); +---- + +Executing a work-item containing this code gives the following result: +[source] +---- +res = [ 1.000000 2.000000 3.000000 4.000000 ] + [ 5.000000 6.000000 7.000000 8.000000 ] +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-07 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/extensions.txt b/extensions/extensions.txt index 573ec1169..46596b9f8 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -67,6 +67,8 @@ include::cl_img_cancel_command.asciidoc[] <<< include::cl_img_generate_mipmap.asciidoc[] <<< +include::cl_img_matrix_multiply.asciidoc[] +<<< include::cl_img_mem_properties.asciidoc[] <<< include::cl_img_use_gralloc_ptr.asciidoc[] From 9be81653014102fa33d3430b9b6cbaa30322b04f Mon Sep 17 00:00:00 2001 From: tomasz-platek <165791413+tomasz-platek@users.noreply.github.com> Date: Fri, 9 Aug 2024 01:14:53 +0200 Subject: [PATCH 132/190] Publish the cl_img_bitwise_ops extension specification. (#1200) * Publish the cl_img_bitwise_ops extension specification. * Update extensions/cl_img_bitwise_ops.asciidoc Listing the initial extension version. Co-authored-by: Ben Ashbaugh --------- Co-authored-by: Ben Ashbaugh --- extensions/cl_img_bitwise_ops.asciidoc | 118 +++++++++++++++++++++++++ extensions/extensions.txt | 2 + 2 files changed, 120 insertions(+) create mode 100644 extensions/cl_img_bitwise_ops.asciidoc diff --git a/extensions/cl_img_bitwise_ops.asciidoc b/extensions/cl_img_bitwise_ops.asciidoc new file mode 100644 index 000000000..fbbd370fa --- /dev/null +++ b/extensions/cl_img_bitwise_ops.asciidoc @@ -0,0 +1,118 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_bitwise_ops + +== Name Strings + +`cl_img_bitwise_ops` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +== Overview + +This extension adds built-in functions that expose the bitwise operations of Imagination GPU IP that are not accessible by standard OpenCL C functions. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_bit_interleave +---- + +== New OpenCL C Functions + +Performs the bit interleave operation: + +[source,c] +---- +gentype img_bit_interleave(gentype a, gentype b); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 16 - Built-in Scalar and Vector Argument Common Functions in Section 6.15.4 - Common Functions) :: ++ +-- +[cols="1,2",options="header"] +|==== +| Function | Description +| gentype *img_bit_interleave*(gentype a, gentype b) + a| `img_bit_interleave` interleaves the first `n` bits from two sources where `n` is half of the size of gentype in bits. + +For `a` and `b`, where a0 and b0 are the least significant bits: +[source] +---- +a = a(N-1)\|a(N-2)\|a(N-3)\|...\|a3\|a2\|a1\|a0 +b = b(N-1)\|b(N-2)\|b(N-3)\|...\|b3\|b2\|b1\|b0 +---- + +the output is: +[source] +---- +res = b(N/2-1)\|a(N/2-1)\|b(N/2-2)\|a(N/2-2)\|b(N/2-3)\|a(N/2-3)\|...\|b3\|a3\|b2\|a2\|b1\|a1\|b0\|a0 +---- +so the sizes of `a`,`b`, and `res` are equal. + +Requires that the `__opencl_img_bit_interleave` feature macro is defined. +|==== +-- + +== Coding Sample + +This coding sample shows how to use the *img_bit_interleave* function: +[source] +---- +int4 a = (int4) ( 0x00000000, 0x00000000, 0x0000FFFF, 0xFFFFFFFF); +int4 b = (int4) ( 0xFFFFFFFF, 0x0000FFFF, 0x00000000, 0x00000000); + +int4 res = img_bit_interleave(a,b); + +printf("res = [ 0x%x 0x%x 0x%x 0x%x]\n", res.s0, res.s1, res.s2, res.s3); +---- + +Executing a work-item of this kernel gives the following result: +[source] +---- +res = [ 0xaaaaaaaa 0xaaaaaaaa 0x55555555 0x55555555] +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/extensions.txt b/extensions/extensions.txt index 46596b9f8..aae06c841 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -61,6 +61,8 @@ include::cl_arm_scheduling_controls.asciidoc[] == Imagination Technologies Extensions :leveloffset: 2 <<< +include::cl_img_bitwise_ops.asciidoc[] +<<< include::cl_img_cached_allocations.asciidoc[] <<< include::cl_img_cancel_command.asciidoc[] From 154244c6b31e8b77e32400e3f6c046bb9833fbce Mon Sep 17 00:00:00 2001 From: tomasz-platek <165791413+tomasz-platek@users.noreply.github.com> Date: Fri, 9 Aug 2024 01:15:09 +0200 Subject: [PATCH 133/190] Publish the cl_img_swap_ops extension specification. (#1201) * Publish the cl_img_swap_ops extension specification. * Update extensions/cl_img_swap_ops.asciidoc Listing the initial extension version. Co-authored-by: Ben Ashbaugh * Update cl_img_swap_ops.asciidoc Defining behavior as undefined for cases when the number of work-items is not evenly divisible by four and if some work-items in the block of four are inactive, defining 1-dimensional local ID as a base for grouping work-items. --------- Co-authored-by: Ben Ashbaugh --- extensions/cl_img_swap_ops.asciidoc | 134 ++++++++++++++++++++++++++++ extensions/extensions.txt | 2 + 2 files changed, 136 insertions(+) create mode 100644 extensions/cl_img_swap_ops.asciidoc diff --git a/extensions/cl_img_swap_ops.asciidoc b/extensions/cl_img_swap_ops.asciidoc new file mode 100644 index 000000000..ea9578022 --- /dev/null +++ b/extensions/cl_img_swap_ops.asciidoc @@ -0,0 +1,134 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_swap_ops + +== Name Strings + +`cl_img_swap_ops` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +== Overview + +This extension adds built-in functions that exercise hardware capabilities of Imagination GPU IP and expose cross work-items swap functions. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_swap +---- + +== New OpenCL C Functions + +Perform the swap operation: + +[source,c] +---- +gentype img_swap_x(gentype value); +gentype img_swap_y(gentype value); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 16 - Built-in Scalar and Vector Argument Common Functions in Section 6.15.4 - Common Functions) :: ++ +-- +[cols="1,2",options="header"] +|==== +| Function | Description +| gentype *img_swap_x*(gentype value) + a| `img_swap_x` swaps `values` between work-items in the following way: all work-items are divided into blocks of four consecutive elements. For each block: + +* In the first work-item, `img_swap_x` returns `value` passed as an argument in the second work-item. +* In the second work-item, `img_swap_x` returns `value` passed as an argument in the first work-item. +* In the third work-item, `img_swap_x` returns `value` passed as an argument in the fourth work-item. +* In the fourth work-item, `img_swap_x` returns `value` passed as an argument in the third work-item. + +The work-items are assigned into blocks based on their 1-dimensional local ID (see `get_local_linear_id`). + +The number of work-items that make up a work-group must be evenly divisible by four; otherwise, the behaviour is undefined. + +The function must be called in all four work-items of the block; otherwise, the behaviour is undefined. + +Requires that the `__opencl_img_swap` feature macro is defined. +| gentype *img_swap_y*(gentype value) + a| `img_swap_y` swaps `values` between work-items in the following way: all work-items are divided into blocks of four consecutive elements. For each block: + +* In the first work-item, `img_swap_y` returns `value` passed as an argument in the third work-item. +* In the third work-item, `img_swap_y` returns `value` passed as an argument in the first work-item. +* In the second work-item, `img_swap_y` returns `value` passed as an argument in the fourth work-item. +* In the fourth work-item, `img_swap_y` returns `value` passed as an argument in the second work-item. + +The work-items are assigned into blocks based on their 1-dimensional local ID (see `get_local_linear_id`). + +The number of work-items that make up a work-group must be evenly divisible by four; otherwise, the behaviour is undefined. + +The function must be called in all four work-items of the block; otherwise, the behaviour is undefined. + +Requires that the `__opencl_img_swap` feature macro is defined. +|==== +-- + +== Coding Sample + +This coding sample shows how to use the *img_swap_x* function: +[source] +---- +__kernel void swap() { + int i = get_global_id(0); + int res = img_swap_x(i); + + printf("id: %d, res = [ %d ]\n", i, res); +} +---- + +Executing four work-items of this kernel in one work-group gives the following result: +[source] +---- +id: 0, res = [ 1 ] +id: 1, res = [ 0 ] +id: 2, res = [ 3 ] +id: 3, res = [ 2 ] +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/extensions.txt b/extensions/extensions.txt index aae06c841..cc4849ccc 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -73,6 +73,8 @@ include::cl_img_matrix_multiply.asciidoc[] <<< include::cl_img_mem_properties.asciidoc[] <<< +include::cl_img_swap_ops.asciidoc[] +<<< include::cl_img_use_gralloc_ptr.asciidoc[] <<< include::cl_img_yuv_image.asciidoc[] From 56c46e4ff747cfe17e8214809ff4cb8aead8a2d7 Mon Sep 17 00:00:00 2001 From: tomasz-platek <165791413+tomasz-platek@users.noreply.github.com> Date: Fri, 9 Aug 2024 01:24:36 +0200 Subject: [PATCH 134/190] Publish the cl_img_memory_management extension specification. (#1202) * Publish the cl_img_memory_management extension specification. * Update extensions/cl_img_memory_management.asciidoc Listing the initial extension version. Co-authored-by: Ben Ashbaugh * Update cl_img_memory_management.asciidoc Status changed to Final Draft * Update cl_img_memory_management.asciidoc Fix typo (unnecessary "new_alloc" in the enum item name). --------- Co-authored-by: Ben Ashbaugh --- extensions/cl_img_memory_management.asciidoc | 247 +++++++++++++++++++ extensions/extensions.txt | 2 + 2 files changed, 249 insertions(+) create mode 100644 extensions/cl_img_memory_management.asciidoc diff --git a/extensions/cl_img_memory_management.asciidoc b/extensions/cl_img_memory_management.asciidoc new file mode 100644 index 000000000..f9aa61e83 --- /dev/null +++ b/extensions/cl_img_memory_management.asciidoc @@ -0,0 +1,247 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_memory_management + +== Name Strings + +`cl_img_memory_management` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +== Overview + +This extension adds built-in functions that expose the low-level memory and cache control instructions of Imagination GPU IP that are not accessible by standard OpenCL C functions. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_fence +__opencl_img_cache +__opencl_img_load_store +---- + +== New OpenCL C Functions + +Issues a data fence: + +[source,c] +---- +void img_fence(cache_target_img target); +---- + +Perform the cache flush/invalidate operation: + +[source,c] +---- +void img_cache_flush(cache_target_img target); +void img_cache_invalidate(cache_target_img target); +void img_cache_flush_invalidate(cache_target_img target); +---- + +Load to/store from memory: + +[source,c] +---- +gentype img_load(gentype *p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); +gentype img_load(const gentype *p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); +void img_store(gentype *p, gentype value, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 4 - Other Built-in Data Types in Section 6.3.3. Other Built-in Data Types) :: ++ +[cols=",",options="header",] +|==== +| Type | Description +| `cache_target_img` + | Target of the cache control functions. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +| `cache_coherence_img` + | Level of cache coherence. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `L2_cache_policy_img` + | Cache policy for the L2 cache. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_img` + | Level of cache persistence. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +(Add a new Section 6.15.22, *Low-level Memory and Cache Control Functions*) :: ++ +-- +The OpenCL C programming language implements the following built-in functions +to perform low-level memory and cache control operations: + +[cols="1,2",options="header"] +|==== +| Function | Description +| void *img_fence*(cache_target_img target) + a| `img_fence` issues a data fence as far as the specified `target`. For example, using `cache_target_L2_img` issues a data fence for the L1 and L2 caches. + +Requires that the `__opencl_img_fence` feature macro is defined. +| void *img_cache_flush*(cache_target_img target) + a| `img_cache_flush` flushes cache, `target` determines how far through the memory hierarchy caches are flushed. For example, using `cache_target_L2_img` flushes the L1 and L2 caches. + +Requires that the `__opencl_img_cache` feature macro is defined. +| void *img_cache_invalidate*(cache_target_img target) + a| `img_cache_invalidate` invalidates cache, `target` determines how far through the memory hierarchy caches are invalidated. For example, using `cache_target_L2_img` invalidates the L1 and L2 caches. + +Requires that the `__opencl_img_cache` feature macro is defined. +| void *img_cache_flush_invalidate*(cache_target_img target) + a| `img_cache_flush_invalidate` flushes and invalidates cache, `target` determines how far through the memory hierarchy caches are flushed and invalidated. For example, using `cache_target_L2_img` flushes and invalidates the L1 and L2 caches. + +Requires that the `__opencl_img_cache` feature macro is defined. +| gentype *img_load*(gentype pass:[*]p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + + gentype *img_load*(const gentype pass:[*]p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + a| `img_load` returns sizeof(gentype) bytes of data from `p`, where `coherence` specifies the level of cache coherence, `policy` specifies the cache policy for the L2 cache, `persistence` specifies the level of cache persistence, and `volatile` specifies volatility. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| void *img_store*(gentype pass:[*]p, gentype value, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + a| `img_store` writes 'value' to `p`, where `coherence` specifies the level of cache coherence, `policy` specifies the cache policy for the L2 cache, `persistence` specifies the level of cache persistence, and `volatile` specifies volatility. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== +-- + +=== Cache Target + +The enumerated type `cache_target_img` specifies the target of the cache control functions. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| Cache Target | Additional Notes +| `cache_target_L1_img` + | Performs the operation on the L1 cache. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +| `cache_target_L2_img` + | Performs the operation on the L1 and L2 caches. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +| `cache_target_external_img` + | Performs the operation on the L1, L2, and external caches. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +|==== + +=== Cache Persistence +Cache persistence modifies the priority of the request in the cache where low level means that requests are evicted quickly and high level means that requests remain in cache for a long time. +The enumerated type `cache_persistence_level_img` specifies the level of cache persistence. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| Cache Persistence | Additional Notes +| `cache_persistence_level_default_img` + | `cache_persistence_level_min_img` is the default persistence level. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_min_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_low_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_high_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_max_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +[[cache-coherence]] +==== Cache Coherence +The enumerated type `cache_coherence_img` specifies the level of cache coherence. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| Cache Coherence | Additional Notes +| `cache_coherence_L1_img` + | Cache coherence is guaranteed at the L1 level. Explicit flush or invalidate may be needed to ensure data coherency at higher levels. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_coherence_L2_img` + | Cache coherence is guaranteed at the L2 level. Explicit flush or invalidate may be needed to ensure data coherency at higher levels. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +=== L2 Cache Policy +The enumerated type `L2_cache_policy_img` specifies the cache policy for the L2 cache. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| L2 Cache Policy | Additional Notes +| `L2_cache_policy_new_alloc_img` + | Allocates a new cache line on a cache miss. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `L2_cache_policy_bypass_img` + | Permits to bypass the cache and access memory directly. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +== Coding Sample + +This coding sample shows how to use the *img_load* and *img_store* functions: +[source] +---- +__kernel void test(__global int *in, __global int *out) { + int a = img_load(in, cache_coherence_L1_img, L2_cache_policy_new_alloc_img, cache_persistence_level_min_img, true); + a += 1; + img_store(out, a, cache_coherence_L1_img, L2_cache_policy_new_alloc_img, cache_persistence_level_min_img, true); +} +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/extensions.txt b/extensions/extensions.txt index cc4849ccc..1e1e98049 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -71,6 +71,8 @@ include::cl_img_generate_mipmap.asciidoc[] <<< include::cl_img_matrix_multiply.asciidoc[] <<< +include::cl_img_memory_management.asciidoc[] +<<< include::cl_img_mem_properties.asciidoc[] <<< include::cl_img_swap_ops.asciidoc[] From 497fb748a52412ec32808649d122be6c0562381d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 13 Aug 2024 01:54:29 +0100 Subject: [PATCH 135/190] Generate OpenCL C feature dictionary (#1212) * Generate OpenCL C feature dictionary Features are stored in a text file for now. Ultimately, we probably want to use the XML registry for this. Generation script taken from #1174 with a few modifications. Contributes to #1166. Signed-off-by: Ben Ashbaugh Signed-off-by: Kevin Petit Change-Id: Ie2c14148d75457030aa1a97cf601daba2c007397 * Update scripts/gen_c_feature_dictionary.py Co-authored-by: Ben Ashbaugh * define __opencl_c_ outside of the list of features Signed-off-by: Kevin Petit Change-Id: I8e0947c30775338dd70803d09c7059d340e86f5a --------- Signed-off-by: Ben Ashbaugh Signed-off-by: Kevin Petit Co-authored-by: Ben Ashbaugh --- Makefile | 3 + c/feature-dictionary.asciidoc | 160 +--------------------------- c/features.txt | 20 ++++ scripts/gen_c_feature_dictionary.py | 87 +++++++++++++++ 4 files changed, 111 insertions(+), 159 deletions(-) create mode 100644 c/features.txt create mode 100644 scripts/gen_c_feature_dictionary.py diff --git a/Makefile b/Makefile index 6aef16a92..5dcaae6f9 100644 --- a/Makefile +++ b/Makefile @@ -512,9 +512,11 @@ $(MANHTMLDIR)/intro.html: $(REFPATH)/intro.txt $(MANCOPYRIGHT) REGISTRY = $(ROOTDIR)/xml APIXML = $(REGISTRY)/cl.xml +CFEATURES = c/features.txt GENSCRIPT = $(SCRIPTS)/gencl.py DICTSCRIPT = $(SCRIPTS)/gen_dictionaries.py VERSIONSCRIPT = $(SCRIPTS)/gen_version_notes.py +CFEATSCRIPT = $(SCRIPTS)/gen_c_feature_dictionary.py GENSCRIPTOPTS = $(VERSIONOPTIONS) $(EXTOPTIONS) $(GENSCRIPTEXTRA) -registry $(APIXML) GENSCRIPTEXTRA = @@ -540,6 +542,7 @@ extinc: $(METADEPEND) $(METADEPEND): $(APIXML) $(GENSCRIPT) $(QUIET)$(MKDIR) $(METAPATH) $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(METAPATH) extinc + $(QUIET)$(PYTHON) $(CFEATSCRIPT) -features $(CFEATURES) -o $(METAPATH)/c-feature-dictionary.asciidoc # This generates a single file containing asciidoc attributes for each # extension in the spec being built. diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index e8375eb57..6e558f24c 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -10,162 +10,4 @@ ifndef::backend-html5[] :opencl_c_feature_name: pass:q[`\__opencl_c_​<feature_​name>`] endif::[] -// opencl_c_3d_image_writes -ifdef::backend-html5[] -:opencl_c_3d_image_writes: pass:q[`\__opencl_c_3d_image_writes`] -endif::[] -ifndef::backend-html5[] -:opencl_c_3d_image_writes: pass:q[`\__opencl_c_​3d_​image_​writes`] -endif::[] - -// opencl_c_atomic_order_acq_rel -ifdef::backend-html5[] -:opencl_c_atomic_order_acq_rel: pass:q[`\__opencl_c_atomic_order_acq_rel`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_order_acq_rel: pass:q[`\__opencl_c_​atomic_​order_​​`] -endif::[] - -// opencl_c_atomic_order_seq_cst -ifdef::backend-html5[] -:opencl_c_atomic_order_seq_cst: pass:q[`\__opencl_c_atomic_order_seq_cst`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_order_seq_cst: pass:q[`\__opencl_c_​atomic_​order_​seq_​cst`] -endif::[] - -// opencl_c_atomic_scope_device -ifdef::backend-html5[] -:opencl_c_atomic_scope_device: pass:q[`\__opencl_c_atomic_scope_device`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_scope_device: pass:q[`\__opencl_c_​atomic_​scope_​device`] -endif::[] - -// opencl_c_atomic_scope_all_devices -ifdef::backend-html5[] -:opencl_c_atomic_scope_all_devices: pass:q[`\__opencl_c_atomic_scope_all_devices`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_scope_all_devices: pass:q[`\__opencl_c_​atomic_​scope_​all_​devices`] -endif::[] - -// opencl_c_device_enqueue -ifdef::backend-html5[] -:opencl_c_device_enqueue: pass:q[`\__opencl_c_device_enqueue`] -endif::[] -ifndef::backend-html5[] -:opencl_c_device_enqueue: pass:q[`\__opencl_c_​device_​enqueue`] -endif::[] - -// opencl_c_generic_address_space -ifdef::backend-html5[] -:opencl_c_generic_address_space: pass:q[`\__opencl_c_generic_address_space`] -endif::[] -ifndef::backend-html5[] -:opencl_c_generic_address_space: pass:q[`\__opencl_c_​generic_​address_​space`] -endif::[] - -// opencl_c_fp64 -ifdef::backend-html5[] -:opencl_c_fp64: pass:q[`\__opencl_c_fp64`] -endif::[] -ifndef::backend-html5[] -:opencl_c_fp64: pass:q[`\__opencl_c_​fp64`] -endif::[] - -// opencl_c_images -ifdef::backend-html5[] -:opencl_c_images: pass:q[`\__opencl_c_images`] -endif::[] -ifndef::backend-html5[] -:opencl_c_images: pass:q[`\__opencl_c_​images`] -endif::[] - -// opencl_c_int64 -ifdef::backend-html5[] -:opencl_c_int64: pass:q[`\__opencl_c_int64`] -endif::[] -ifndef::backend-html5[] -:opencl_c_int64: pass:q[`\__opencl_c_​int64`] -endif::[] - -// opencl_c_pipes -ifdef::backend-html5[] -:opencl_c_pipes: pass:q[`\__opencl_c_pipes`] -endif::[] -ifndef::backend-html5[] -:opencl_c_pipes: pass:q[`\__opencl_c_​pipes`] -endif::[] - -// opencl_c_program_scope_global_variables -ifdef::backend-html5[] -:opencl_c_program_scope_global_variables: pass:q[`\__opencl_c_program_scope_global_variables`] -endif::[] -ifndef::backend-html5[] -:opencl_c_program_scope_global_variables: pass:q[`\__opencl_c_​program_​scope_​global_​variables`] -endif::[] - -// opencl_c_read_write_images -ifdef::backend-html5[] -:opencl_c_read_write_images: pass:q[`\__opencl_c_read_write_images`] -endif::[] -ifndef::backend-html5[] -:opencl_c_read_write_images: pass:q[`\__opencl_c_​read_​write_​images`] -endif::[] - -// opencl_c_subgroups -ifdef::backend-html5[] -:opencl_c_subgroups: pass:q[`\__opencl_c_subgroups`] -endif::[] -ifndef::backend-html5[] -:opencl_c_subgroups: pass:q[`\__opencl_c_​subgroups`] -endif::[] - -// opencl_c_work_group_collective_functions -ifdef::backend-html5[] -:opencl_c_work_group_collective_functions: pass:q[`\__opencl_c_work_group_collective_functions`] -endif::[] -ifndef::backend-html5[] -:opencl_c_work_group_collective_functions: pass:q[`\__opencl_c_​work_​group_​collective_​functions`] -endif::[] - -// opencl_c_integer_dot_product_input_4x8bit -ifdef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit`] -endif::[] -ifndef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit`] -endif::[] - -// opencl_c_integer_dot_product_input_4x8bit_packed -ifdef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit_packed`] -endif::[] -ifndef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit_​packed`] -endif::[] - -// opencl_c_kernel_clock_scope_device -ifdef::backend-html5[] -:opencl_c_kernel_clock_scope_device: pass:q[`\__opencl_c_kernel_clock_scope_device`] -endif::[] -ifndef::backend-html5[] -:opencl_c_kernel_clock_scope_device: pass:q[`\__opencl_c_​kernel_​clock_​scope_​device`] -endif::[] - -// opencl_c_kernel_clock_scope_work_group -ifdef::backend-html5[] -:opencl_c_kernel_clock_scope_work_group: pass:q[`\__opencl_c_kernel_clock_scope_work_group`] -endif::[] -ifndef::backend-html5[] -:opencl_c_kernel_clock_scope_work_group: pass:q[`\__opencl_c_​kernel_​clock_​scope_​work_​group`] -endif::[] - -// opencl_c_kernel_clock_scope_sub_group -ifdef::backend-html5[] -:opencl_c_kernel_clock_scope_sub_group: pass:q[`\__opencl_c_kernel_clock_scope_sub_group`] -endif::[] -ifndef::backend-html5[] -:opencl_c_kernel_clock_scope_sub_group: pass:q[`\__opencl_c_​kernel_​clock_​scope_​sub_​group`] -endif::[] +include::{generated}/meta/c-feature-dictionary.asciidoc[] diff --git a/c/features.txt b/c/features.txt new file mode 100644 index 000000000..b7e636dda --- /dev/null +++ b/c/features.txt @@ -0,0 +1,20 @@ +__opencl_c_3d_image_writes +__opencl_c_atomic_order_acq_rel +__opencl_c_atomic_order_seq_cst +__opencl_c_atomic_scope_device +__opencl_c_atomic_scope_all_devices +__opencl_c_device_enqueue +__opencl_c_generic_address_space +__opencl_c_fp64 +__opencl_c_images +__opencl_c_int64 +__opencl_c_pipes +__opencl_c_program_scope_global_variables +__opencl_c_read_write_images +__opencl_c_subgroups +__opencl_c_work_group_collective_functions +__opencl_c_integer_dot_product_input_4x8bit +__opencl_c_integer_dot_product_input_4x8bit_packed +__opencl_c_kernel_clock_scope_device +__opencl_c_kernel_clock_scope_work_group +__opencl_c_kernel_clock_scope_sub_group diff --git a/scripts/gen_c_feature_dictionary.py b/scripts/gen_c_feature_dictionary.py new file mode 100644 index 000000000..f9b071735 --- /dev/null +++ b/scripts/gen_c_feature_dictionary.py @@ -0,0 +1,87 @@ +#!/usr/bin/python3 + +# Copyright 2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +from collections import OrderedDict + +import argparse +import sys + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-features', action='store', + default='', + help='File with OpenCL C features to generate, one per line') + parser.add_argument('-o', action='store', default='', + help='Output file in which to store the feature dictionary. stdout is used if no file is provided.') + + args = parser.parse_args() + + features = [] + if len(args.features) > 0: + print('Generating feature dictionaries from: ' + args.features) + with open(args.features) as f: + features = f.readlines() + else: + print('Reading feature dictionaries from stdin...') + for line in sys.stdin: + features.append(line) + print('Generating...\n') + + numberOfFeatures = 0 + + if args.o: + outfile = open(args.o, 'w') + else: + outfile = sys.stdout + + for name in features: + name = name.strip() + if len(name) == 0: + continue + + # OpenCL C features start with __opencl_c + if name.startswith('__opencl_c'): + #print('found enum: ' + name) + + # Create a variant of the name that precedes underscores with + # "zero width" spaces. This causes some long names to be + # broken at more intuitive places. + htmlName = name[:10] + name[10:].replace("_", "_") + otherName = name[:10] + name[10:].replace("_", "_​") + + # Remove the leading underscores. + name = name[2:] + + # Example: + # + # // opencl_c_images + # ifdef::backend-html5[] + # :opencl_c_images: pass:q[`\__opencl_c_images`] + # endif::[] + # ifndef::backend-html5[] + # :opencl_c_images: pass:q[`\__opencl_c_​images`] + # endif::[] + outfile.write('// ' + name + '\n') + outfile.write('ifdef::backend-html5[]\n') + outfile.write(':' + name + ': pass:q[`\\' + htmlName + '`]\n') + outfile.write('endif::[]\n') + outfile.write('ifndef::backend-html5[]\n') + outfile.write(':' + name + ': pass:q[`\\' + otherName + '`]\n') + outfile.write('endif::[]\n') + + numberOfFeatures = numberOfFeatures + 1 + + # everything else is a function + else: + print('Unexpected feature name: ' + name + ', features should start with __opencl_c!') + sys.exit(1) + + outfile.write('\n') + + if args.o: + outfile.close() + + print('Found ' + str(numberOfFeatures) + ' features.') From 16e38354beb8baded292da26a0ff77a9c01e2535 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 13 Aug 2024 18:40:56 +0200 Subject: [PATCH 136/190] further clarify a clCreateBuffer with SVM pointer error condition (#1189) It should also be invalid to use SVM host_ptr smaller than `size` in the CL_MEM_COPY_HOST_PTR case --- api/opencl_runtime_layer.asciidoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 3027cc730..1e7c97126 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -628,8 +628,9 @@ returned in _errcode_ret_: in the <> table. * {CL_INVALID_BUFFER_SIZE} if _size_ is 0, or if _size_ is greater than {CL_DEVICE_MAX_MEM_ALLOC_SIZE} for all devices in _context_, or if - {CL_MEM_USE_HOST_PTR} is set in _flags_ and _host_ptr_ is a pointer returned by - {clSVMAlloc} and _size_ is greater than the size passed to {clSVMAlloc}. + {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} is set in _flags_ and + _host_ptr_ is a pointer returned by {clSVMAlloc} and _size_ is greater than + the size passed to {clSVMAlloc}. * {CL_INVALID_HOST_PTR} if _host_ptr_ is `NULL` and {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} are set in _flags_ or if _host_ptr_ is not `NULL` but {CL_MEM_COPY_HOST_PTR} or {CL_MEM_USE_HOST_PTR} are not set in _flags_. From 1f7aceabdad5339571b9f467fdd9baafdf3b611d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 13 Aug 2024 17:42:02 +0100 Subject: [PATCH 137/190] Begin integration of EXT extensions into the unified specification (#1213) * Begin integration of EXT extensions into the unified specification - Add a khr+ext spec type to makeSpec and cover in CI - Document the version and dependencies of all EXT extensions in the XML - Integrate cl_ext_cxx_for_opencl into the unified specification - Add placeholder descriptions for all EXT extensions linking to either core/KHR features that supersede old extensions that were never part of the specification or the OpenCL Extensions document. This enables us to integrate EXT extensions incrementally and integrate future EXT extensions directly into the unified specification. Change-Id: Ic634ce000ad3ebfb56e56bce91f9c0de3e786383 Signed-off-by: Kevin Petit * Update api/cl_ext_cxx_for_opencl.asciidoc Co-authored-by: Ewan Crawford * remove dangling links in cl_ext_migrate_memobject appendix Change-Id: I13b4860dfcd3d6d865b269847c5876bf75516e87 * add links to latest published ext specs Change-Id: Ifddbbc47ddb0ac9be6327d9925682b96829d0946 --------- Signed-off-by: Kevin Petit Co-authored-by: Ewan Crawford --- .github/workflows/presubmit.yml | 6 +- api/cl_ext_cxx_for_opencl.asciidoc | 59 ++++++++ api/cl_ext_device_fission.asciidoc | 20 +++ api/cl_ext_float_atomics.asciidoc | 21 +++ api/cl_ext_image_from_buffer.asciidoc | 21 +++ api/cl_ext_image_raw10_raw12.asciidoc | 21 +++ api/cl_ext_image_requirements_info.asciidoc | 21 +++ api/cl_ext_migrate_memobject.asciidoc | 20 +++ api/opencl_platform_layer.asciidoc | 10 ++ api/opencl_runtime_layer.asciidoc | 11 ++ extensions/cl_ext_cxx_for_opencl.asciidoc | 152 -------------------- extensions/extensions.txt | 2 - makeSpec | 9 +- scripts/extdependency.py | 8 ++ xml/cl.xml | 14 +- 15 files changed, 230 insertions(+), 165 deletions(-) create mode 100644 api/cl_ext_cxx_for_opencl.asciidoc create mode 100644 api/cl_ext_device_fission.asciidoc create mode 100644 api/cl_ext_float_atomics.asciidoc create mode 100644 api/cl_ext_image_from_buffer.asciidoc create mode 100644 api/cl_ext_image_raw10_raw12.asciidoc create mode 100644 api/cl_ext_image_requirements_info.asciidoc create mode 100644 api/cl_ext_migrate_memobject.asciidoc delete mode 100644 extensions/cl_ext_cxx_for_opencl.asciidoc diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 6a2d54b32..8d446b606 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -47,10 +47,14 @@ jobs: run: | python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 -O api c env ext cxx4opencl - - name: Generate core + extension specs (HTML) + - name: Generate core + KHR extension specs (HTML) run: | python3 makeSpec -clean -spec khr OUTDIR=out.khr -j -O html + - name: Generate core + KHR + EXT extension specs (HTML) + run: | + python3 makeSpec -clean -spec khr+ext OUTDIR=out.khr+ext -j -O html + - name: Generate reference pages run: | python3 makeSpec -spec khr OUTDIR=out.refpages -j -O manhtmlpages diff --git a/api/cl_ext_cxx_for_opencl.asciidoc b/api/cl_ext_cxx_for_opencl.asciidoc new file mode 100644 index 000000000..51ead70cb --- /dev/null +++ b/api/cl_ext_cxx_for_opencl.asciidoc @@ -0,0 +1,59 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_cxx_for_opencl.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-08-25 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kevin Petit, Arm Ltd. + + - Sven Van Haastregt, Arm Ltd. + + - Anastasia Stulova, Arm Ltd. + + - Marco Antognini, Arm Ltd. + + - Neil Hickey, Arm Ltd. + + - Alastair Murray, Codeplay + + +=== Description + +This extension adds support for building programs written using the C++ for +OpenCL kernel language documented in the *OpenCL-Docs* repository +(https://github.com/KhronosGroup/OpenCL-Docs) +with stable versions published in releases of the repository. + +This extension also enables applications to query the version of the language +supported by the device compiler. + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT} + +=== New build option + +This extension adds support for a new `CLC++` value to be passed to the +`-cl-std` build option accepted by {clBuildProgram} and {clCompileProgram}. + +=== Preprocessor Macros + +This extension defines a new language, instead of extending an existing +language. As such, there will be no preprocessor `#define` matching the +extension name string. Instead, dedicated preprocessor macros conveying +language version information are available as described in the C++ for +OpenCL Programming Language Documentation, section 2.2.2.2 "Predefined +macros". + +=== Conformance tests + +. Test that a program can successfully be compiled with `-cl-std=CLC++`. +. Test with a program compiled with `-cl-std=CLC++` that the value of the + +__OPENCL_CPP_VERSION__+ macro agrees with the version returned by + `CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT`. + +=== Version History + + * Revision 1.0.0, 2020-08-24 + ** Initial version. diff --git a/api/cl_ext_device_fission.asciidoc b/api/cl_ext_device_fission.asciidoc new file mode 100644 index 000000000..8b038ef68 --- /dev/null +++ b/api/cl_ext_device_fission.asciidoc @@ -0,0 +1,20 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_device_fission.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-07-17 +*IP Status*:: + No known IP claims. + +=== Description + +Precursor to the functionality described in <>. + +=== Version History + + * Revision 1.0.0, 2024-07-17 + ** First version. diff --git a/api/cl_ext_float_atomics.asciidoc b/api/cl_ext_float_atomics.asciidoc new file mode 100644 index 000000000..473feb40e --- /dev/null +++ b/api/cl_ext_float_atomics.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_float_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-08-12 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_float_atomics.html[OpenCL registry]. + +=== Version History + + * Revision 1.0.0, 2020-08-12 + ** First version. diff --git a/api/cl_ext_image_from_buffer.asciidoc b/api/cl_ext_image_from_buffer.asciidoc new file mode 100644 index 000000000..6bd6fa9bd --- /dev/null +++ b/api/cl_ext_image_from_buffer.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_from_buffer.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-01-25 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_from_buffer.html[OpenCL registry]. + +=== Version History + + * Revision 1.0.0, 2022-01-25 + ** First version. diff --git a/api/cl_ext_image_raw10_raw12.asciidoc b/api/cl_ext_image_raw10_raw12.asciidoc new file mode 100644 index 000000000..d7d36b55d --- /dev/null +++ b/api/cl_ext_image_raw10_raw12.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_raw10_raw12.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-05-03 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_raw10_raw12.html[OpenCL registry]. + +=== Version History + + * Revision 1.0.0, 2023-05-03 + ** First version. diff --git a/api/cl_ext_image_requirements_info.asciidoc b/api/cl_ext_image_requirements_info.asciidoc new file mode 100644 index 000000000..6de780853 --- /dev/null +++ b/api/cl_ext_image_requirements_info.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_requirements_info.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-01-18 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_requirements_info.html[OpenCL registry]. + +=== Version History + + * Revision 0.5.0, 2022-01-18 + ** First version. diff --git a/api/cl_ext_migrate_memobject.asciidoc b/api/cl_ext_migrate_memobject.asciidoc new file mode 100644 index 000000000..b987f48f2 --- /dev/null +++ b/api/cl_ext_migrate_memobject.asciidoc @@ -0,0 +1,20 @@ +// Copyright 2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_migrate_memobject.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-07-17 +*IP Status*:: + No known IP claims. + +=== Description + +Precursor to {clEnqueueMigrateMemObjects}. + +=== Version History + + * Revision 1.0.0, 2024-07-17 + ** Initial version. diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index ceb6e3193..2b1ae266d 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -2083,6 +2083,15 @@ include::{generated}/api/version-notes/CL_DEVICE_TERMINATE_CAPABILITY_KHR.asciid {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR_anchor} - Indicates that context termination is supported. endif::cl_khr_terminate_context[] + +ifdef::cl_ext_cxx_for_opencl[] +| {CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT.asciidoc[] + | {cl_version_TYPE} + | Returns the version of the C++ for OpenCL language supported by the + device compiler. +endif::cl_ext_cxx_for_opencl[] |==== ifdef::cl_khr_integer_dot_product[] @@ -2630,6 +2639,7 @@ Otherwise it may return endif::cl_khr_d3d11_sharing[] +[[platform-device-partitioning]] == Partitioning a Device NOTE: Partitioning devices is <> version 1.2. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 1e7c97126..4925edd62 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -8427,6 +8427,10 @@ header or implementation-defined source for custom devices that support an online compiler. OpenCL {cpp} is not supported as an online-compiled kernel language through this interface. +ifdef::cl_ext_cxx_for_opencl[] +If the {cl_ext_cxx_for_opencl_EXT} extension is supported, the source code +specified by _strings_ may also be a C++ for OpenCL program source or header. +endif::cl_ext_cxx_for_opencl[] // refError @@ -9519,6 +9523,13 @@ IMPORTANT: Debugging options are <> version 2.0. built-in functions that allow you to enqueue commands on a device (refer to OpenCL kernel languages specifications). +ifdef::cl_ext_cxx_for_opencl[] +==== C++ for OpenCL + +Applications may pass `-cl-std=CLC\++` to {clCompileProgram} or {clBuildProgram} +for programs created using {clCreateProgramFromSource} to request the program +be built as C++ for OpenCL. +endif::cl_ext_cxx_for_opencl[] [[linker-options]] === Linker Options diff --git a/extensions/cl_ext_cxx_for_opencl.asciidoc b/extensions/cl_ext_cxx_for_opencl.asciidoc deleted file mode 100644 index 12bd4406f..000000000 --- a/extensions/cl_ext_cxx_for_opencl.asciidoc +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -:data-uri: -:icons: font -include::../config/attribs.txt[] -:source-highlighter: coderay - -= cl_ext_cxx_for_opencl -:R: pass:q,r[^(R)^] -Khronos{R} OpenCL Working Group - -== Name Strings - -`cl_ext_cxx_for_opencl` - -== Contact - -Please see the *Issues* list in the Khronos *OpenCL-Docs* repository: + -https://github.com/KhronosGroup/OpenCL-Docs - -== Contributors - -Kevin Petit, Arm Ltd. + -Sven Van Haastregt, Arm Ltd. + -Anastasia Stulova, Arm Ltd. + -Marco Antognini, Arm Ltd. + -Neil Hickey, Arm Ltd. + -Alastair Murray, Codeplay + - -== Notice - -include::../copyrights.txt[] - -== Version - -Built On: {docdate} + -Version: 1.0.0 - -== Dependencies - -This extension is written against the OpenCL Specification -Version 3.0.3. - -This extension requires OpenCL 3.0 with OpenCL C 2.0 support or OpenCL 2.x and -`cl_khr_extended_versioning`. - -== Overview - -This extension adds support for building programs written using the C++ for -OpenCL kernel language documented in the *OpenCL-Docs* repository -(https://github.com/KhronosGroup/OpenCL-Docs) -with stable versions published in releases of the repository. - -This extension also enables applications to query the version of the language -supported by the device compiler. - -== New build option - -This extension adds support for a new `CLC++` value to be passed to the -`-cl-std` build option accepted by *clBuildProgram* and *clCompileProgram*. - -== New API Enums - -Accepted value for the _param_name_ parameter to *clGetDeviceInfo*: - -[source,c] ----- -CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230 ----- - -== Preprocessor Macros - -This extension defines a new language, instead of extending an existing -language. As such, there will be no preprocessor `#define` matching the -extension name string. Instead, dedicated preprocessor macros conveying -language version information are available as described in the C++ for -OpenCL Programming Language Documentation, section 2.2.2.2 "Predefined -macros". - - -== Modifications to the OpenCL API Specification - -(Modify Section 4.2, *Querying Devices*) :: -+ --- - -(Add the following to Table 4.3, _Device Queries_) :: -+ --- - -[cols="1,1,4",options="header"] -|==== -| cl_device_info -| Return Type -| Description - -| `CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT` -| `cl_version` -| Returns the version of the C++ for OpenCL language supported by the - device compiler. - -|==== - --- --- - -(Modify Section 5.8.1, *Creating Program Objects*) :: -+ --- -Add the following text to the description for *clCreateProgramWithSource*: - -The source code specified by _strings_ may also be a C++ for OpenCL program source -or header. --- - -(Modify section to 5.8.6, *Compiler Options*) :: -+ --- - -(Add subsection, *C++ for OpenCL*) :: -+ --- -Applications may pass `-cl-std=CLC\++` to *clCompileProgram* and *clBuildProgram* -for programs created using *clCreateProgramFromSource* to request the program -be built as C++ for OpenCL. --- - --- - -== Conformance tests - -. Test that a program can successfully be compiled with `-cl-std=CLC++`. -. Test with a program compiled with `-cl-std=CLC++` that the value of the - +__OPENCL_CPP_VERSION__+ macro agrees with the version returned by - `CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT`. - -== Issues - -None. - -== Version History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|==== -| Version | Date | Author | Changes -| 1.0.0 | 2020-08-25 | Kevin Petit | *Initial revision* -|==== - diff --git a/extensions/extensions.txt b/extensions/extensions.txt index 1e1e98049..d28468b9c 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -34,8 +34,6 @@ Khronos{R} OpenCL Working Group == Multi-Vendor Extensions :leveloffset: 2 <<< -include::cl_ext_cxx_for_opencl.asciidoc[] -<<< include::cl_ext_float_atomics.asciidoc[] <<< include::cl_ext_image_from_buffer.asciidoc[] diff --git a/makeSpec b/makeSpec index 4c3decf3c..ac17ffb34 100755 --- a/makeSpec +++ b/makeSpec @@ -42,7 +42,7 @@ if __name__ == '__main__': default='gen', help='Path to directory containing generated files') parser.add_argument('-spec', action='store', - choices=[ 'core', 'khr', 'all' ], + choices=[ 'core', 'khr', 'khr+ext', 'all' ], default='core', help='Type of spec to generate') parser.add_argument('-registry', action='store', @@ -79,8 +79,8 @@ if __name__ == '__main__': # extension appendices yet. if results.spec == 'all': - results.spec = 'khr' - print("WARNING: 'all' argument to -results interpreted as 'khr' at present", file=sys.stderr) + results.spec = 'khr+ext' + print("WARNING: 'all' argument to -results interpreted as 'khr+ext' at present", file=sys.stderr) if results.spec == 'core': title = '' @@ -88,6 +88,9 @@ if __name__ == '__main__': elif results.spec == 'khr': title = 'with all KHR extensions' exts = set(deps.khrExtensions()) + elif results.spec == 'khr+ext': + title = 'with all KHR and EXT extensions' + exts = set(deps.khrAndextExtensions()) elif results.spec == 'all': title = 'with all registered extensions' exts = set(deps.allExtensions()) diff --git a/scripts/extdependency.py b/scripts/extdependency.py index 69dbec3cc..59bfc8381 100755 --- a/scripts/extdependency.py +++ b/scripts/extdependency.py @@ -106,6 +106,7 @@ def __init__(self, self.allExts = set() self.khrExts = set() + self.extExts = set() self.ratifiedExts = set() self.graph = DiGraph() self.extensions = {} @@ -133,6 +134,9 @@ def __init__(self, if conventions.KHR_prefix in name: self.khrExts.add(name) + if conventions.EXT_prefix in name: + self.extExts.add(name) + if api_name in ratified.split(','): self.ratifiedExts.add(name) @@ -159,6 +163,10 @@ def khrExtensions(self): """Returns a set of all KHR extensions in the graph""" return self.khrExts + def khrAndextExtensions(self): + """Returns a set of all KHR and EXT extensions in the graph""" + return self.khrExts | self.extExts + def ratifiedExtensions(self): """Returns a set of all ratified extensions in the graph""" return self.ratifiedExts diff --git a/xml/cl.xml b/xml/cl.xml index 53b9a9be4..e07843004 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -5782,7 +5782,7 @@ server's OpenCL/api-docs repository. - + @@ -5826,7 +5826,7 @@ server's OpenCL/api-docs repository. - + @@ -6764,7 +6764,7 @@ server's OpenCL/api-docs repository. - + @@ -7248,7 +7248,7 @@ server's OpenCL/api-docs repository. - + @@ -7286,7 +7286,7 @@ server's OpenCL/api-docs repository. - + @@ -7377,7 +7377,7 @@ server's OpenCL/api-docs repository. - + @@ -7417,7 +7417,7 @@ server's OpenCL/api-docs repository. - + From 76db5559f6cf69009517e469088035698f19a6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 13 Aug 2024 18:04:51 +0100 Subject: [PATCH 138/190] Clarify what re-import properties are accepted by clReImportSemaphoreSyncFdKHR (#1219) Align the language to clCreateBufferWithProperties. Change-Id: I58659fc9cd7fd3ae5178826285fd84d6932b29d8 Signed-off-by: Kevin Petit --- api/cl_khr_external_semaphore_sync_fd.asciidoc | 2 ++ api/opencl_runtime_layer.asciidoc | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index aee60ec16..ae1bc7891 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -59,3 +59,5 @@ external semaphore using the APIs introduced by ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} * Revision 1.0.0, 2024-03-15 ** First non-provisional version. + * Revision 1.0.1, 2024-08-06 + ** Clarify what re-import properties are accepted by {clReImportSemaphoreSyncFdKHR}. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 4925edd62..263ca1854 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -13085,7 +13085,10 @@ include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] * _sema_object_ specifies a valid semaphore object with importable properties. - * _reimport_props_ must be `NULL`, and is reserved for future use. + * _reimport_props_ is an optional list of properties that affect the + re-import behavior. The list is terminated with the special property `0`. + If no properties are required, _reimport_props_ may be `NULL`. This extension + does not define any optional properties. * _fd_ specifies an external file descriptor handle to import Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying From 0db7c307524a8b781c844925e1ef66dc37655b90 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sat, 17 Aug 2024 16:00:14 -0700 Subject: [PATCH 139/190] fix EPSILON typo (#1225) --- OpenCL_C.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 610b54e3c..bfd658f92 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -5845,7 +5845,7 @@ the application. | `FLT_RADIX` | {CL_FLT_RADIX} | `FLT_MAX` | {CL_FLT_MAX} | `FLT_MIN` | {CL_FLT_MIN} -| `FLT_EPSILSON` | {CL_FLT_EPSILON} +| `FLT_EPSILON` | {CL_FLT_EPSILON} |==== The following macros shall expand to integer constant expressions whose @@ -5916,7 +5916,7 @@ the application. | `DBL_MIN_EXP` | {CL_DBL_MIN_EXP} | `DBL_MAX` | {CL_DBL_MAX} | `DBL_MIN` | {CL_DBL_MIN} -| `DBL_EPSILSON` | {CL_DBL_EPSILON} +| `DBL_EPSILON` | {CL_DBL_EPSILON} |==== The following constants are also available. @@ -5986,7 +5986,7 @@ the application. | `HALF_RADIX` | {CL_HALF_RADIX} | `HALF_MAX` | {CL_HALF_MAX} | `HALF_MIN` | {CL_HALF_MIN} -| `HALF_EPSILSON` | {CL_HALF_EPSILON} +| `HALF_EPSILON` | {CL_HALF_EPSILON} |==== The following constants are also available. From c9d19df2655a0e3e2f441c066dce46ddaf7fc9be Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sat, 17 Aug 2024 17:26:06 -0700 Subject: [PATCH 140/190] remove duplicated extensions from quick reference table (#1229) --- ext/quick_reference.asciidoc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 804c30fae..86f979c82 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -240,18 +240,6 @@ Language Specifications. | Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension | Extension -| [[cl_khr_spirv_extended_debug_info]] link:{APISpecURL}#cl_khr_spirv_extended_debug_info[{cl_khr_spirv_extended_debug_info_EXT}] -| Allows Use of the SPIR-V `OpenCL.DebugInfo.100` Extended Instruction Set -| Extension - -| [[cl_khr_spirv_linkonce_odr]] link:{APISpecURL}#cl_khr_spirv_linkonce_odr[{cl_khr_spirv_linkonce_odr_EXT}] -| Allows Use of the SPIR-V `SPV_KHR_linkonce_odr` Extension -| Extension - -| [[cl_khr_spirv_no_integer_wrap_decoration]] link:{APISpecURL}#cl_khr_spirv_no_integer_wrap_decoration[{cl_khr_spirv_no_integer_wrap_decoration_EXT}] -| Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension -| Extension - | [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[{cl_khr_srgb_image_writes_EXT}] | Write to sRGB Images | Extension From 34f2fe83176883a61d3613637ad0be9d6b42fd65 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 22 Aug 2024 15:53:16 -0700 Subject: [PATCH 141/190] fix asciidoctor rightarrow typo (#1234) --- OpenCL_C.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index bfd658f92..c6a096a7d 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -17300,7 +17300,7 @@ used in the conversions described below. When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. -`half` {rightarrow` {CL_UNORM_INT8} (8-bit unsigned integer) +`half` {rightarrow} {CL_UNORM_INT8} (8-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) @@ -17308,7 +17308,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_UNORM_INT_101010} (10-bit unsigned integer) +`half` {rightarrow} {CL_UNORM_INT_101010} (10-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) @@ -17317,7 +17317,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_UNORM_INT16} (16-bit unsigned integer) +`half` {rightarrow} {CL_UNORM_INT16} (16-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) @@ -17326,7 +17326,7 @@ result of the conversion must satisfy the bound given below. 65535.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_SNORM_INT8} (8-bit signed integer) +`half` {rightarrow} {CL_SNORM_INT8} (8-bit signed integer) [none] * Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) @@ -17334,7 +17334,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_SNORM_INT16} (16-bit signed integer) +`half` {rightarrow} {CL_SNORM_INT16} (16-bit signed integer) [none] * Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) From f625dbf48817831e894527d38732bb4aad6ac36d Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Fri, 23 Aug 2024 10:00:23 -0700 Subject: [PATCH 142/190] update SPIR-V spec reference to SPIR-V 1.6 (#1237) --- env/references.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/references.asciidoc b/env/references.asciidoc index f5dabc473..6de0c4e4a 100644 --- a/env/references.asciidoc +++ b/env/references.asciidoc @@ -36,7 +36,7 @@ 3.0, Unified`", https://www.khronos.org/registry/OpenCL/ . // References are to sections and tables of this specific version, although // other versions exists. - . [[spirv-spec]] "`SPIR-V Specification, Version 1.5, Unified`", + . [[spirv-spec]] "`SPIR-V Specification, Version 1.6, Unified`", https://www.khronos.org/registry/spir-v/ . . [[opencl-extended-instruction-set]] "`OpenCL Extended Instruction Set Specification`", https://www.khronos.org/registry/spir-v/ . From b4f2aee25aeca13d7b81c2b2a1e7616dc5df92e7 Mon Sep 17 00:00:00 2001 From: Gowtham Tammana <128911018+gowtham-sarc@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:16:50 -0500 Subject: [PATCH 143/190] Fix bullet typo in clEnqueueAcquireExternalMemObjectsKHR (#1224) Signed-off-by: Gowtham Tammana --- api/opencl_runtime_layer.asciidoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 263ca1854..b20a2e7b8 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -5468,9 +5468,9 @@ Otherwise, it returns one of the following errors: ** if _command_queue_ is not a valid command-queue, or ** if device associated with _command_queue_ is not one of the devices specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating - one or more of _mem_objects_, or ** if one or more of _mem_objects_ - belong to a context that does not contain a device associated with - _command_queue_. + one or more of _mem_objects_, or + ** if one or more of _mem_objects_ belong to a context that does not + contain a device associated with _command_queue_. * {CL_INVALID_EVENT_WAIT_LIST} ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or From c9fa5c89fcfccef0a20c6e5d37c345a01f640c6a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 27 Aug 2024 11:07:32 -0700 Subject: [PATCH 144/190] eliminate some duplicated words found in the spec (#1226) --- api/appendix_e.asciidoc | 2 +- api/appendix_h.asciidoc | 2 +- api/cl_khr_egl_image.asciidoc | 2 +- api/cl_khr_external_memory.asciidoc | 2 +- api/cl_khr_priority_hints.asciidoc | 2 +- api/footnotes.asciidoc | 2 +- api/opencl_architecture.asciidoc | 2 +- api/opencl_runtime_layer.asciidoc | 8 ++++---- env/extensions.asciidoc | 4 ++-- ext/introduction.asciidoc | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 9df39d32f..ba9534480 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -377,7 +377,7 @@ device: capabilities of a device. * {CL_DEVICE_PIPE_SUPPORT} to determine whether a device supports pipe memory objects. - * {CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE} to determine the + * {CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE} to determine the preferred work-group size multiple for a device. OpenCL 3.0 adds new queries to conveniently and precisely diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index 2a2d37598..d1cef63a1 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -334,7 +334,7 @@ When sRGB images are not supported: |*Behavior* | {clGetSupportedImageFormats} -| Will not return return any image formats with `image_channel_order` equal to an sRGB image channel order if no devices in _context_ support sRGB images. +| Will not return any image formats with `image_channel_order` equal to an sRGB image channel order if no devices in _context_ support sRGB images. |==== diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index 42e03b71e..dcea8fd3e 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -13,7 +13,7 @@ include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] === Description {cl_khr_egl_image_EXT} provides a mechanism to creating OpenCL memory objects -from from EGLImages. +from EGLImages. === New Commands diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index cd572a8fb..3d61b564a 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -60,7 +60,7 @@ imported into OpenCL. * {cl_mem_properties_TYPE} ** {CL_MEM_DEVICE_HANDLE_LIST_KHR} ** {CL_MEM_DEVICE_HANDLE_LIST_END_KHR} - * Return values from from {clGetEventInfo} when _param_name_ is + * Return values from {clGetEventInfo} when _param_name_ is {cl_command_type_TYPE}: ** {CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR} diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc index dfd29df15..988498b53 100644 --- a/api/cl_khr_priority_hints.asciidoc +++ b/api/cl_khr_priority_hints.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] The {cl_khr_priority_hints_EXT} extension adds priority hints for OpenCL, but does not specify the scheduling behavior or minimum guarantees. -It is expected that the the user guides associated with each implementation +It is expected that the user guides associated with each implementation which supports this extension will describe the scheduling behavior guarantees. diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index cc407d98f..b5452357e 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -11,7 +11,7 @@ Note that this flag does not provide meaning for atomic memory operations, but o ] :fn-create-context-all-or-subset: pass:n[ \ -{clCreateContextfromType} may may create a context for all or a subset of the actual physical devices present in the platform that match _device_type_. \ +{clCreateContextfromType} may create a context for all or a subset of the actual physical devices present in the platform that match _device_type_. \ ] :fn-default-device-queue: pass:n[ \ diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 63dc7d503..38d33377f 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -679,7 +679,7 @@ The OpenCL execution model supports three types of kernels: * *OpenCL kernels* are managed by the OpenCL API as kernel objects associated with kernel functions within program objects. OpenCL program objects are created and built using OpenCL APIs. - The OpenCL API includes functions to query the kernel languages and + The OpenCL API includes functions to query the kernel languages and intermediate languages that may be used to create OpenCL program objects for a device. * *Native kernels* are accessed through a host function pointer. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index b20a2e7b8..438957c55 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1837,7 +1837,7 @@ include::{generated}/api/version-notes/clCreateFromGLBuffer.asciidoc[] Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} flags specified in that table can be used. * _bufobj_ is the name of an OpenGL buffer object. - The data store of the OpenGL buffer object must have have been + The data store of the OpenGL buffer object must have been previously created by calling `glBufferData`, although its contents need not be initialized. The size of the data store will be used to determine the size of the @@ -2646,7 +2646,7 @@ ifdef::cl_khr_external_memory[] If _image_slice_pitch_ is zero and the image is created from an external memory handle, then the image slice pitch is implementation-defined. endif::cl_khr_external_memory[] - The image slice pitch must be {geq} the image image row pitch {times} + The image slice pitch must be {geq} the image row pitch {times} _image_height_ for a 2D image array or a 3D image, must be {geq} the image row pitch for a 1D image array, and must be a multiple of the image row pitch. @@ -7088,7 +7088,7 @@ include::{generated}/api/version-notes/clEnqueueReleaseGLObjects.asciidoc[] to an element of the _event_wait_list_ array. ifdef::cl_khr_gl_event[] -If an OpenGL context is bound to the current thread, then then any OpenGL +If an OpenGL context is bound to the current thread, then any OpenGL commands which . affect or access the contents of the memory objects listed in the @@ -12864,7 +12864,7 @@ _errcode_ret_ returns an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. {clCreateSemaphoreWithPropertiesKHR} returns a valid semaphore object in an -un-signaled state and and _errcode_ret_ is set to {CL_SUCCESS} if the +un-signaled state and _errcode_ret_ is set to {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index f0ffc5d6b..aa963e514 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -39,7 +39,7 @@ in a SPIR-V module using *OpExtension*. If the OpenCL environment supports the extension {cl_khr_3d_image_writes_EXT}, then the environment must accept _Image_ operands to *OpImageWrite* that -are declared with with dimensionality _Dim_ equal to *3D*. +are declared with dimensionality _Dim_ equal to *3D*. ==== {cl_khr_depth_images_EXT} @@ -57,7 +57,7 @@ Additionally, the following Image Channel Orders may be returned by ==== {cl_khr_device_enqueue_local_arg_types_EXT} If the OpenCL environment supports the extension -{cl_khr_device_enqueue_local_arg_types_EXT}, then then environment will allow +{cl_khr_device_enqueue_local_arg_types_EXT}, then the environment will allow _Invoke_ functions to be passed to *OpEnqueueKernel* with *Workgroup* memory pointer parameters of any type. diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 4c4cf584e..3ff391ded 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -189,7 +189,7 @@ that extension on different devices for a platform. The behavior of calling a device extension function on a device not supporting that extension is undefined. -{clGetExtensionFunctionAddressForPlatform} may not be be used to query for core +{clGetExtensionFunctionAddressForPlatform} may not be used to query for core (non-extension) functions in OpenCL. For extension functions that may be queried using {clGetExtensionFunctionAddressForPlatform}, implementations may also choose to From 735c352a6e3d159803124da2e1e0bad3caa9de54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 3 Sep 2024 17:44:10 +0100 Subject: [PATCH 145/190] Add specification for cl_ext_image_unorm_int_2_101010 (#1223) * Add specification for cl_ext_image_unorm_int_2_101010 Change-Id: I51b0ec6f719c30a01637c56047a86d31fd78b7ca * Update OpenCL_C.txt Co-authored-by: Ben Ashbaugh * review comments Change-Id: Ib1ac6320c4f391336074f468158187e76099c7b9 --------- Co-authored-by: Ben Ashbaugh --- OpenCL_C.txt | 14 +++++++ api/cl_ext_image_unorm_int_2_101010.asciidoc | 39 ++++++++++++++++++++ api/opencl_runtime_layer.asciidoc | 13 +++++++ c/features.txt | 1 + env/common_properties.asciidoc | 5 +++ env/image_addressing_and_filtering.asciidoc | 6 +++ xml/cl.xml | 8 +++- 7 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 api/cl_ext_image_unorm_int_2_101010.asciidoc diff --git a/OpenCL_C.txt b/OpenCL_C.txt index c6a096a7d..347a4a3d8 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -246,6 +246,12 @@ clock shared by all work-items executing in the same work-group. clock shared by all work-items executing in the same sub-group. endif::cl_khr_kernel_clock[] +ifdef::cl_ext_image_unorm_int_2_101010[] +| {opencl_c_ext_image_unorm_int_2_101010} +| The OpenCL C compiler supports `CLK_UNORM_INT_2_101010_EXT` and returning it +from `get_image_channel_data_type`. +endif::cl_ext_image_unorm_int_2_101010[] + |==== In OpenCL C 3.0 or newer, feature macros must expand to the value `1` if the @@ -12483,6 +12489,14 @@ endif::cl_khr_gl_msaa_sharing[] `CLK_UNORM_INT_101010_2` footnote:[{fn-CLK_UNORM_INT_101010_2}] +ifdef::cl_ext_image_unorm_int_2_101010[] + Additionally, if the {opencl_c_ext_image_unorm_int_2_101010} feature is + supported: + + `CLK_UNORM_INT_2_101010_EXT` + +endif::cl_ext_image_unorm_int_2_101010[] + | int *get_image_channel_order*(_aQual_ image2d_t _image_) + int *get_image_channel_order*(_aQual_ image3d_t _image_) diff --git a/api/cl_ext_image_unorm_int_2_101010.asciidoc b/api/cl_ext_image_unorm_int_2_101010.asciidoc new file mode 100644 index 000000000..1320526df --- /dev/null +++ b/api/cl_ext_image_unorm_int_2_101010.asciidoc @@ -0,0 +1,39 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_unorm_int_2_101010.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-05-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + +=== Description + +{cl_ext_image_unorm_int_2_101010_EXT} adds support for the {CL_UNORM_INT_2_101010_EXT} +image format. + +OpenCL C compilers supporting this extension will define the +{opencl_c_ext_image_unorm_int_2_101010} feature macro. + +=== New feature macro + + * {opencl_c_ext_image_unorm_int_2_101010} + +=== New Enums + + * {cl_channel_type_TYPE} + ** {CL_UNORM_INT_2_101010_EXT} + +=== New OpenCL C channel data type + + * `CLK_UNORM_INT_2_101010_EXT` + +=== Version History + + * Revision 1.0.0, 2024-05-10 + ** Initial version diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 438957c55..049a00505 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -2474,6 +2474,15 @@ include::{generated}/api/version-notes/CL_UNORM_INT_101010_2.asciidoc[] include::{generated}/api/version-notes/CL_SIGNED_INT8.asciidoc[] | Each channel component is an unnormalized signed 8-bit integer value + +ifdef::cl_ext_image_unorm_int_2_101010[] +| {CL_UNORM_INT_2_101010_EXT_anchor} + +include::{generated}/api/version-notes/CL_UNORM_INT_2_101010_EXT.asciidoc[] + | Represents a normalized 2-10-10-10 four-channel ABGR image. + The channel order must be {CL_ABGR}. +endif::cl_ext_image_unorm_int_2_101010[] + | {CL_SIGNED_INT16_anchor} include::{generated}/api/version-notes/CL_SIGNED_INT16.asciidoc[] @@ -2565,6 +2574,10 @@ For {CL_UNORM_INT_101010}, bits 31:30 are undefined, R is in bits 29:20, G in bits 19:10 and B in bits 9:0. For {CL_UNORM_INT_101010_2}, R is in bits 31:22, G in bits 21:12, B in bits 11:2 and A in bits 1:0. +ifdef::cl_ext_image_unorm_int_2_101010[] +For {CL_UNORM_INT_2_101010_EXT}, A is in bits 31:30, B in bits 29:20, G in bits +19:10 and R in bits 9:0. +endif::cl_ext_image_unorm_int_2_101010[] OpenCL implementations must maintain the minimum precision specified by the number of bits in `image_channel_data_type`. diff --git a/c/features.txt b/c/features.txt index b7e636dda..35a649c8c 100644 --- a/c/features.txt +++ b/c/features.txt @@ -18,3 +18,4 @@ __opencl_c_integer_dot_product_input_4x8bit_packed __opencl_c_kernel_clock_scope_device __opencl_c_kernel_clock_scope_work_group __opencl_c_kernel_clock_scope_sub_group +__opencl_c_ext_image_unorm_int_2_101010 diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index 8019b751d..2b732c78c 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -384,6 +384,11 @@ channel data types. | *UnormInt101010_2* | {CL_UNORM_INT_101010_2} +ifdef::cl_ext_image_unorm_int_2_101010[] +| 21 +| *UnormInt2_101010EXT* +| {CL_UNORM_INT_2_101010_EXT} +endif::cl_ext_image_unorm_int_2_101010[] |==== [NOTE] diff --git a/env/image_addressing_and_filtering.asciidoc b/env/image_addressing_and_filtering.asciidoc index bd9be74de..c6f4f1fd9 100644 --- a/env/image_addressing_and_filtering.asciidoc +++ b/env/image_addressing_and_filtering.asciidoc @@ -1030,6 +1030,12 @@ to the data vector component type or scalar type: `UnormInt24`, + `HalfFloat`, + `Float` + + +ifdef::cl_ext_image_unorm_int_2_101010[] +Additionally if the {cl_ext_image_unorm_int_2_101010_EXT} extension is +supported: `UnormInt2_101010EXT` +endif::cl_ext_image_unorm_int_2_101010[] + |*OpTypeFloat*, with _Width_ equal to 16 or 32. |`SignedInt8`, diff --git a/xml/cl.xml b/xml/cl.xml index e07843004..12ff3d09a 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1608,7 +1608,8 @@ server's OpenCL/api-docs repository. - + + @@ -7475,5 +7476,10 @@ server's OpenCL/api-docs repository. + + + + + From 2d85ec8ef3f4dca7baa49c80bdf566966b3ec26b Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 3 Sep 2024 22:35:29 +0530 Subject: [PATCH 146/190] Add mechanism to specify named win32 NT handles (#1177) * Add mechanism to specify named win32 NT handles Currently, cl_khr_external_semaphore and cl_khr_external_memory define properties to specify external win32 NT handle via pointer. Win32 NT handles can also be specified via named strings. Add properties to specify external Win32 NT handles via named strings too. Fixes #943 * Updates to win32 named handle type changes. (#1180) Address review comments. Fixes #943 * Drop NULL from named win32 handle description (#1187) * Updates to win32 named handle type changes. Address review comments. Fixes #943 * Drop NULL from named win32 handle description. Address review comments from Ben to remove NULL from named win32 handle description. Fixes #943 * Bump up spec versions for win32 (#1193) Bump up spec versions for cl_khr_external_memory_win32 from 1.0.0 to 1.1.0 and cl_khr_external_semaphore_win32 from 0.9.0 to 0.9.1 * fix typo and add OPAQUE_WIN32_NAME_KHR to semaphore enums list * Update opencl_runtime_layer.asciidoc Fix typo during conflict resolution opencl_runtime_layer.asciidoc. --------- Co-authored-by: Ben Ashbaugh --- api/cl_khr_external_memory_win32.asciidoc | 6 ++++- api/cl_khr_external_semaphore_win32.asciidoc | 5 +++- api/opencl_runtime_layer.asciidoc | 24 ++++++++++++++++++++ xml/cl.xml | 10 +++++--- 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index 005c9278f..fb18ff470 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] === Other Extension Metadata *Last Modified Date*:: - 2024-03-15 + 2024-06-11 *IP Status*:: No known IP claims. *Contributors*:: @@ -40,6 +40,7 @@ image memory object. * {cl_external_memory_handle_type_khr_TYPE} ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_NAME_KHR} === Version History @@ -57,3 +58,6 @@ image memory object. (provisional). * Revision 1.0.0, 2024-03-15 ** First non-provisional version. + * Revision 1.1.0, 2024-06-11 + ** Added {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_NAME_KHR}. + diff --git a/api/cl_khr_external_semaphore_win32.asciidoc b/api/cl_khr_external_semaphore_win32.asciidoc index 725a59966..543741d3c 100644 --- a/api/cl_khr_external_semaphore_win32.asciidoc +++ b/api/cl_khr_external_semaphore_win32.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore_win32.txt[] === Other Extension Metadata *Last Modified Date*:: - 2021-09-10 + 2024-06-11 *IP Status*:: No known IP claims. *Contributors*:: @@ -39,8 +39,11 @@ introduced by {cl_khr_external_semaphore_EXT}. * {cl_external_semaphore_handle_type_khr_TYPE} ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAME_KHR} === Version History * Revision 0.9.0, 2021-09-10 ** Initial version (provisional). + * Revision 0.9.1, 2024-06-11 + ** Added {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAME_KHR}. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 049a00505..276b56d2c 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -5666,6 +5666,16 @@ buffer or an image memory object from an external handle: It does not own a reference to the underlying memory resource represented by its memory object, and will therefore become invalid when all memory objects associated with it are destroyed. + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_NAME_KHR_anchor} specifies an NT handle name that + has only limited valid usage outside of OpenCL and other compatible + APIs. + NT handle name is a null-terminated UTF-16 string naming the payload to import. + It must be compatible with the functions `DuplicateHandle`, + `CloseHandle`, `CompareObjectHandles`, `GetHandleInformation`, and + `SetHandleInformation`. + It owns a reference to the underlying memory resource represented by its + memory object. + endif::cl_khr_external_memory_win32[] For these extensions, importing memory object payloads from Windows handles @@ -13157,6 +13167,14 @@ a semaphore from an external handle: It does not own a reference to the underlying synchronization primitive represented by its semaphore object, and will therefore become invalid when all semaphore objects associated with it are destroyed. + * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAME_KHR} specifies an NT handle name that has + only limited valid usage outside of OpenCL and other compatible APIs. + NT handle name is a null-terminated UTF-16 string naming the payload to import. + It must be compatible with the functions `DuplicateHandle`, + `CloseHandle`, `CompareObjectHandles`, `GetHandleInformation`, and + `SetHandleInformation`. + It owns a reference to the underlying synchronization primitive + represented by its semaphore object. endif::cl_khr_external_semaphore_win32[] @@ -13174,6 +13192,12 @@ include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR.asci include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR.asciidoc[] | Reference + +| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAME_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAME_KHR.asciidoc[] + | Reference + endif::cl_khr_external_semaphore_win32[] |==== diff --git a/xml/cl.xml b/xml/cl.xml index 12ff3d09a..8dc46339c 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1885,7 +1885,9 @@ server's OpenCL/api-docs repository. - + + + @@ -7074,13 +7076,14 @@ server's OpenCL/api-docs repository. - + + @@ -7126,13 +7129,14 @@ server's OpenCL/api-docs repository. - + + From e969c445e53ac7874193f6a4f87d24ace465e6c5 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 3 Sep 2024 10:17:18 -0700 Subject: [PATCH 147/190] clarify no implicit dependencies when waiting on or signaling semaphores (#1231) --- api/opencl_runtime_layer.asciidoc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 276b56d2c..feb7c88cc 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -13266,6 +13266,16 @@ events in _event_wait_list_ when {clEnqueueWaitSemaphoresKHR} returns. Waiting on the same binary semaphore twice without an interleaving signal may lead to undefined behavior. +[NOTE] +==== +When _command_queue_ is an out-of-order command-queue there are no implicit +dependencies between the semaphore wait command and commands enqueued into the +command-queue after the semaphore wait command. +If such dependencies are required, applications may enqueue a command-queue +barrier after the semaphore wait command, to explicitly add dependencies between +the semaphore wait command and subsequent commands. +==== + // refError {clEnqueueWaitSemaphoresKHR} returns {CL_SUCCESS} if the function is @@ -13355,6 +13365,16 @@ events in _event_wait_list_ when {clEnqueueSignalSemaphoresKHR} returns. Signaling the same binary semaphore twice without an interleaving wait may lead to undefined behavior. +[NOTE] +==== +When _command_queue_ is an out-of-order command-queue there are no implicit +dependencies between commands enqueued into the command-queue before the +semaphore signal command and the semaphore signal command. +If such dependencies are required, applications may enqueue a command-queue +barrier before the semaphore signal command, to explicitly add dependencies between +the preceding commands and the semaphore signal command. +==== + // refError {clEnqueueSignalSemaphoresKHR} returns {CL_SUCCESS} if the function is From 305451dc7ebd54a1bf6f8dc008d788bb29991211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 3 Sep 2024 21:47:26 +0100 Subject: [PATCH 148/190] Group or shorten the names of some API specification sections (#1250) As discussed in teleconferences, the unification of the API specification has led to some long-winded and/or redundant section headers. Here are a few proposed changes that I find make the unified specification easier to navigate. Change-Id: I326f4195e5585a821d38f128bbc9d300f490631f Signed-off-by: Kevin Petit --- api/opencl_runtime_layer.asciidoc | 44 ++++++++++++++----------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index feb7c88cc..7f6904dc5 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1704,10 +1704,11 @@ include::{generated}/api/version-notes/CL_MAP_WRITE_INVALIDATE_REGION.asciidoc[] |==== -- +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +=== Creating Buffer Objects From Direct3D Buffer Resources +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] ifdef::cl_khr_d3d10_sharing[] -=== Creating OpenCL Buffer Objects From Direct3D 10 Buffer Resources - [open,refpage='clCreateFromD3D10BufferKHR',desc='Create OpenCL buffer object from a Direct3D 10 buffer',type='protos'] -- To create an OpenCL buffer object from a Direct3D 10 buffer, call the @@ -1763,8 +1764,6 @@ endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -=== Creating OpenCL Buffer Objects From Direct3D 11 Buffer Resources - [open,refpage='clCreateFromD3D11BufferKHR',desc='Create OpenCL buffer object from a Direct3D 11 buffer',type='protos'] -- To create an OpenCL buffer object from a Direct3D 11 buffer, call the @@ -1820,7 +1819,7 @@ endif::cl_khr_d3d11_sharing[] ifdef::cl_khr_gl_sharing[] -=== Creating OpenCL Buffer Objects From OpenGL Buffer Objects +=== Creating Buffer Objects From OpenGL Buffer Objects [open,refpage='clCreateFromGLBuffer',desc='Create OpenCL buffer object from an OpenGL buffer object',type='protos'] -- @@ -4364,7 +4363,7 @@ endif::cl_khr_d3d11_sharing[] ifdef::cl_khr_dx9_media_sharing[] -=== Creating OpenCL Image Objects From DirectX 9 Media Resources +=== Creating Image Objects From DirectX 9 Media Resources [open,refpage='clCreateFromDX9MediaSurfaceKHR',desc='Create OpenCL image object from a media surface',type='protos'] -- @@ -4464,9 +4463,11 @@ performance. endif::cl_khr_dx9_media_sharing[] -ifdef::cl_khr_d3d10_sharing[] -=== Creating OpenCL Image Objects From Direct3D 10 Textures and Resources +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +=== Creating Image Objects From Direct3D Textures and Resources +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +ifdef::cl_khr_d3d10_sharing[] [open,refpage='clCreateFromD3D10Texture2DKHR',desc='Create OpenCL 2D image object from a Direct3D 10 2D texture',type='protos'] -- To create an OpenCL 2D image object from a subresource of a Direct3D 10 2D @@ -4599,8 +4600,6 @@ endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -=== Creating OpenCL Image Objects From Direct3D 11 Textures and Resources - [open,refpage='clCreateFromD3D11Texture2DKHR',desc='Create OpenCL 2D image object from a Direct3D 11 2D texture',type='protos'] -- To create an OpenCL 2D image object from a subresource of a Direct3D 11 2D @@ -4733,7 +4732,7 @@ endif::cl_khr_d3d11_sharing[] ifdef::cl_khr_egl_image[] -=== Creating OpenCL Image Objects From EGL Images +=== Creating Image Objects From EGL Images [open,refpage='clCreateFromEGLImageKHR',desc='Create cl_mem target from EGLImage source',type='protos'] -- @@ -4812,7 +4811,7 @@ endif::cl_khr_egl_image[] ifdef::cl_khr_gl_sharing[] -=== Creating OpenCL Image Objects From OpenGL Textures and Renderbuffers +=== Creating Image Objects From OpenGL Textures and Renderbuffers [open,refpage='clCreateFromGLTexture',desc='Create OpenCL image object from an OpenGL texture object',type='protos'] -- @@ -5240,10 +5239,7 @@ include::{generated}/api/version-notes/CL_PIPE_PROPERTIES.asciidoc[] -- -== Querying, Unmapping, Migrating, Retaining and Releasing Memory Objects - -// === Handling Memory Objects - +== Memory Objects === Retaining and Releasing Memory Objects @@ -6210,9 +6206,11 @@ and {clGetImageInfo} with _param_name_ {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR}, endif::cl_khr_dx9_media_sharing[] -ifdef::cl_khr_d3d10_sharing[] -=== Querying Direct3D Properties of Memory Objects Created From Direct3D 10 Resources +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +=== Querying Direct3D Properties of Memory Objects Created From Direct3D Resources +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +ifdef::cl_khr_d3d10_sharing[] Properties of Direct3D 10 objects may be queried using {clGetMemObjectInfo} and {clGetImageInfo} with _param_name_ {CL_MEM_D3D10_RESOURCE_KHR} and {CL_IMAGE_D3D10_SUBRESOURCE_KHR} respectively. @@ -6220,8 +6218,6 @@ endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -=== Querying Direct3D Properties of Memory Objects Created From Direct3D 11 Resources - Properties of Direct3D 11 objects may be queried using {clGetMemObjectInfo} and {clGetImageInfo} with _param_name_ {CL_MEM_D3D11_RESOURCE_KHR} and {CL_IMAGE_D3D11_SUBRESOURCE_KHR} respectively. @@ -6508,9 +6504,11 @@ Otherwise it returns one of the following errors: endif::cl_khr_dx9_media_sharing[] -ifdef::cl_khr_d3d10_sharing[] -=== Sharing Memory Objects Created From Direct3D 10 Resources Between Direct3D 10 and OpenCL Contexts +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +=== Sharing Memory Objects Created From Direct3D Resources Between Direct3D and OpenCL Contexts +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +ifdef::cl_khr_d3d10_sharing[] [open,refpage='clEnqueueAcquireD3D10ObjectsKHR',desc='Acquire OpenCL memory objects created from Direct3D 10 resources',type='protos'] -- To acquire OpenCL memory objects that have been created from Direct3D 10 @@ -6681,8 +6679,6 @@ endif::cl_khr_d3d10_sharing[] ifdef::cl_khr_d3d11_sharing[] -=== Sharing Memory Objects Created From Direct3D 11 Resources Between Direct3D 11 and OpenCL Contexts - [open,refpage='clEnqueueAcquireD3D11ObjectsKHR',desc='Acquire OpenCL memory objects created from Direct3D 11 resources',type='protos'] -- To acquire OpenCL memory objects that have been created from Direct3D 11 From 23b8e528647340f6d1a7ca7231e8e8a48d89238c Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 3 Sep 2024 23:15:49 -0700 Subject: [PATCH 149/190] fix formatting for bulleted list (#1253) --- api/opencl_runtime_layer.asciidoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 7f6904dc5..1282b8830 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -5438,7 +5438,8 @@ acquired. This is to guarantee that the state of the memory objects is up-to-date and they are accessible to OpenCL. -The following restrictions shall apply - +The following restrictions shall apply: + * Each memory object must be acquired only once. Acquiring a memory object multiple times without releasing it results in implementation-defined behavior. @@ -5531,7 +5532,8 @@ commands in the other API. This is to guarantee that the state of memory objects is up-to-date and they are accessible to the other API. -The following restrictions shall apply - +The following restrictions shall apply: + * Each memory object must be released only once. Releasing a memory object multiple times without acquiring it results in implementation-defined behavior. From 3d2365f0002af035e4b5244f318e5791bdca554e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 4 Sep 2024 07:26:39 -0700 Subject: [PATCH 150/190] add nofooter by default when building the OpenCL specs (#1252) This will minimize diffs in the future, especially for things like the reference pages that may change infrequently. --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 5dcaae6f9..023d8cf0f 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,7 @@ COMMONATTRIBOPTS = -a revdate="$(SPECDATE)" \ -a stem=latexmath \ -a generated=$(GENERATED) \ -a sectnumlevels=5 \ + -a nofooter \ -a refprefix ATTRIBOPTS = -a revnumber="$(SPECREVISION)" \ From caa518229418981fb79fe735b4abfe217b8053fa Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Fri, 6 Sep 2024 15:37:15 +0100 Subject: [PATCH 151/190] Add properties parameter to all command-buffer commands (#1215) * Add properties parameter to all command-buffer commands See Issue https://github.com/KhronosGroup/OpenCL-Docs/issues/1207 disucssion how it could make the `cl_khr_command_buffer` extension more extensible if we allowed all the command-recording parameters to have a `properties` parameter that could be used. * fix misspelling * Update clGetMutableCommandInfoKHR table * Rename query to `CL_MUTABLE_COMMAND_PROPERTIES_ARRAY_KHR` --------- Co-authored-by: Ben Ashbaugh --- api/cl_khr_command_buffer.asciidoc | 23 +++--- ...r_command_buffer_mutable_dispatch.asciidoc | 11 ++- api/opencl_runtime_layer.asciidoc | 79 ++++++++++++++++--- config/rouge/lib/rouge/lexers/opencl.rb | 2 +- xml/cl.xml | 28 ++++--- 5 files changed, 109 insertions(+), 34 deletions(-) diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index 8234aa43e..a97e067d6 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -4,7 +4,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] // *Revision*:: -// 0.9.4 +// 0.9.5 // *Extension and Version Dependencies*:: // This extension requires OpenCL 1.2 or later. // Buffering of SVM commands requires OpenCL 2.0 or later. @@ -12,7 +12,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] === Other Extension Metadata *Last Modified Date*:: - 2023-03-31 + 2024-07-24 *IP Status*:: No known IP claims. *Contributors*:: @@ -137,13 +137,15 @@ retain its provisional extension status until other layered extensions are released, as these may reveal modifications needed to the base specification to support their intended use cases. -==== ND-range Kernel Command Properties +==== Command Properties -The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of -new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined -in {cl_khr_command_buffer_EXT}, but the parameter enables layered extensions like -{cl_khr_command_buffer_mutable_dispatch_EXT} to define properties that inform -the characteristics of the kernel command. +The command recording entry-points allow a `properties` parameter of +new type {cl_command_properties_khr_TYPE} to be passed. No properties are +defined in {cl_khr_command_buffer_EXT}, but the parameter enables layered +extensions to define characteristics of the individual commands. + +For example, {cl_khr_command_buffer_mutable_dispatch_EXT} defines properties +that can be set when appending a kernel command with {clCommandNDRangeKernelKHR}. ==== Command Handles @@ -214,7 +216,7 @@ features: * {cl_command_buffer_state_khr_TYPE} * {cl_command_buffer_properties_khr_TYPE} * {cl_command_buffer_flags_khr_TYPE} - * {cl_ndrange_kernel_command_properties_khr_TYPE} + * {cl_command_properties_khr_TYPE} * {cl_mutable_command_khr_TYPE} === New Enums @@ -433,4 +435,7 @@ features: * 0.9.4, 2023-05-11 ** Add clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR command entries (provisional). + * 0.9.5, 2024-07-24 + ** Add a properties parameter to all command recording entry-points + (provisional). diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index 486d01d12..51e2b8696 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[ === Other Extension Metadata *Last Modified Date*:: - 2024-06-19 + 2024-09-05 *IP Status*:: No known IP claims. *Contributors*:: @@ -72,7 +72,7 @@ void pointer using {cl_command_buffer_update_type_khr_TYPE}. * {cl_device_info_TYPE} ** {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} - * {cl_ndrange_kernel_command_properties_khr_TYPE} + * {cl_command_properties_khr_TYPE} ** {CL_MUTABLE_DISPATCH_ASSERTS_KHR} ** {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} * {cl_mutable_dispatch_asserts_khr_TYPE} @@ -86,7 +86,7 @@ void pointer using {cl_command_buffer_update_type_khr_TYPE}. * {cl_mutable_command_info_khr_TYPE} ** {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR} ** {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR} - ** {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR} + ** {CL_MUTABLE_COMMAND_PROPERTIES_ARRAY_KHR} ** {CL_MUTABLE_DISPATCH_KERNEL_KHR} ** {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR} ** {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR} @@ -196,7 +196,7 @@ kernel void vector_addition(global int* tile1, global int* tile2, CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_buffers[0])); // Instruct the nd-range command to allow for mutable kernel arguments - cl_ndrange_kernel_command_properties_khr mutable_properties[] = { + cl_command_properties_khr mutable_properties[] = { CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0}; @@ -360,3 +360,6 @@ may be a introduced as a stand alone extension. * Revision 0.9.2, 2024-06-19 ** Change {clUpdateMutableCommandsKHR} API to pass configs as an array rather than linked list (provisional). + * Revision 0.9.3, 2024-09-05 + ** Rename `CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR` to + `CL_MUTABLE_COMMAND_PROPERTIES_ARRAY_KHR` (provisional). diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 1282b8830..e92935549 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14396,6 +14396,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify synchronization-points that need to complete before this particular command can be executed. @@ -14458,6 +14463,7 @@ ifdef::cl_khr_command_buffer_multi_device[] associated with _command_queue_ and _command_buffer_ is not the same. endif::cl_khr_command_buffer_multi_device[] * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and _num_sync_points_in_wait_list_ is > 0, or @@ -14490,6 +14496,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _src_buffer_, _dst_buffer_, _src_offset_, _dst_offset_, _size_ refer to {clEnqueueCopyBuffer}. * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify @@ -14558,6 +14569,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -14581,6 +14593,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _src_origin_, _dst_origin_, _region_, _src_row_pitch_, _src_slice_pitch_, _dst_row_pitch_, _dst_slice_pitch_ refer to {clEnqueueCopyBufferRect}. @@ -14667,6 +14684,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -14690,6 +14708,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _src_buffer_, _dst_image_, _src_offset_, _dst_origin_, _region_ refer to {clEnqueueCopyBufferToImage} * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify @@ -14759,6 +14782,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -14781,6 +14805,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _src_image_, _dst_image_, _src_origin_, _dst_origin_, _region_ refer to {clEnqueueCopyImage}. * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify @@ -14857,6 +14886,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -14880,6 +14910,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _src_image_, _dst_buffer_, _src_origin_, _region_, _dst_offset_ refer to {clEnqueueCopyImageToBuffer}. * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify @@ -14949,6 +14984,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -14980,6 +15016,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _buffer_, _pattern_, _pattern_size_, _offset_, _size_ refer to {clEnqueueFillBuffer}. * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify @@ -15048,6 +15089,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -15078,6 +15120,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _image_, _fill_color_, _origin_, _region_ refer to {clEnqueueFillImage}. * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify synchronization-points that need to complete before this particular @@ -15145,6 +15192,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -15180,7 +15228,7 @@ ifdef::cl_khr_command_buffer_mutable_dispatch[] The {cl_khr_command_buffer_EXT} extension does not define any properties, but supported properties defined by extensions are defined in the <> table. + by clCommandNDRangeKernelKHR>> table. endif::cl_khr_command_buffer_mutable_dispatch[] * _kernel_ is a valid kernel object which **must** have its arguments set. Any changes to _kernel_ after calling {clCommandNDRangeKernelKHR}, such @@ -15392,8 +15440,8 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. - * {CL_INVALID_VALUE} if values specified in _properties_ are not valid * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if the {cl_khr_command_buffer_mutable_dispatch_EXT} extension is not supported and _mutable_handle_ is not `NULL`. * {CL_INVALID_OPERATION} if the device associated with _command_queue_ @@ -15442,6 +15490,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and thier + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _dst_ptr_ is the pointer to a host (if the device supports system SVM) or SVM memory allocation where data is copied to. * _src_ptr_ is the pointer to a host (if the device supports system SVM) @@ -15516,6 +15569,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -15539,6 +15593,11 @@ ifdef::cl_khr_command_buffer_multi_device[] _command_queue_ is `NULL`, then only one command-queue must have been set on _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the command and their + corresponding values. Each property name is immediately followed by the + corresponding desired value. The list is terminated with 0. The + {cl_khr_command_buffer_EXT} extension does not define any properties, but + supported properties may be defined by layered extensions in future. * _svm_ptr_ is a pointer to a (if the device supports system SVM) or SVM memory region that will be filled with _pattern_. It must be aligned to _pattern_size_ bytes. @@ -15628,6 +15687,7 @@ New errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. -- @@ -16165,12 +16225,11 @@ include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR.ascii The list of supported event command types defined by {clGetEventInfo} is used with the matching command. -| {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR_anchor} +| {CL_MUTABLE_COMMAND_PROPERTIES_ARRAY_KHR_anchor} -include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR.asciidoc[] - | {cl_ndrange_kernel_command_properties_khr_TYPE}[] - | Return the properties argument specified on _command_ recording with - {clCommandNDRangeKernelKHR}. +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_PROPERTIES_ARRAY_KHR.asciidoc[] + | {cl_command_properties_khr_TYPE}[] + | Return the properties argument specified on _command_ recording. If the properties argument specified on creation of _command_ was not `NULL`, the implementation must return the values specified in the @@ -16178,10 +16237,8 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR. properties. If the properties argument specified on creation of _command_ was - `NULL`, or _command_ was not recorded from a - {clCommandNDRangeKernelKHR} command, the implementation must return - _param_value_size_ret_ equal to 0, indicating that there are no - properties to be returned. + `NULL`, the implementation must return _param_value_size_ret_ equal to 0, + indicating that there are no properties to be returned. | {CL_MUTABLE_DISPATCH_KERNEL_KHR_anchor} include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_KERNEL_KHR.asciidoc[] diff --git a/config/rouge/lib/rouge/lexers/opencl.rb b/config/rouge/lib/rouge/lexers/opencl.rb index 1e1457c80..47c6edc51 100644 --- a/config/rouge/lib/rouge/lexers/opencl.rb +++ b/config/rouge/lib/rouge/lexers/opencl.rb @@ -158,7 +158,7 @@ class OpenCL < Cpp cl_command_buffer_state_khr cl_command_buffer_properties_khr cl_command_buffer_flags_khr - cl_ndrange_kernel_command_properties_khr + cl_command_properties_khr cl_mutable_command_khr cl_mutable_dispatch_fields_khr cl_mutable_command_info_khr diff --git a/xml/cl.xml b/xml/cl.xml index 8dc46339c..db8a06848 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -245,7 +245,7 @@ server's OpenCL/api-docs repository. typedef cl_uint cl_command_buffer_state_khr; typedef cl_properties cl_command_buffer_properties_khr; typedef cl_bitfield cl_command_buffer_flags_khr; - typedef cl_properties cl_ndrange_kernel_command_properties_khr; + typedef cl_properties cl_command_properties_khr; typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; typedef cl_bitfield cl_mutable_dispatch_fields_khr; typedef cl_uint cl_mutable_command_info_khr; @@ -1783,7 +1783,7 @@ server's OpenCL/api-docs repository. - + @@ -3118,6 +3118,7 @@ server's OpenCL/api-docs repository. cl_int clCommandBarrierWithWaitListKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_uint num_sync_points_in_wait_list const cl_sync_point_khr* sync_point_wait_list cl_sync_point_khr* sync_point @@ -3127,6 +3128,7 @@ server's OpenCL/api-docs repository. cl_int clCommandCopyBufferKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem src_buffer cl_mem dst_buffer size_t src_offset @@ -3141,6 +3143,7 @@ server's OpenCL/api-docs repository. cl_int clCommandCopyBufferRectKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem src_buffer cl_mem dst_buffer const size_t* src_origin @@ -3159,6 +3162,7 @@ server's OpenCL/api-docs repository. cl_int clCommandCopyBufferToImageKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem src_buffer cl_mem dst_image size_t src_offset @@ -3173,6 +3177,7 @@ server's OpenCL/api-docs repository. cl_int clCommandCopyImageKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem src_image cl_mem dst_image const size_t* src_origin @@ -3187,6 +3192,7 @@ server's OpenCL/api-docs repository. cl_int clCommandCopyImageToBufferKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem src_image cl_mem dst_buffer const size_t* src_origin @@ -3201,6 +3207,7 @@ server's OpenCL/api-docs repository. cl_int clCommandFillBufferKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem buffer const void* pattern size_t pattern_size @@ -3215,6 +3222,7 @@ server's OpenCL/api-docs repository. cl_int clCommandFillImageKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties cl_mem image const void* fill_color const size_t* origin @@ -3228,7 +3236,7 @@ server's OpenCL/api-docs repository. cl_int clCommandNDRangeKernelKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue - const cl_ndrange_kernel_command_properties_khr* properties + const cl_command_properties_khr* properties cl_kernel kernel cl_uint work_dim const size_t* global_work_offset @@ -3243,6 +3251,7 @@ server's OpenCL/api-docs repository. cl_int clCommandSVMMemcpyKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties void* dst_ptr const void* src_ptr size_t size @@ -3255,6 +3264,7 @@ server's OpenCL/api-docs repository. cl_int clCommandSVMMemFillKHR cl_command_buffer_khr command_buffer cl_command_queue command_queue + const cl_command_properties_khr* properties void* svm_ptr const void* pattern size_t pattern_size @@ -7174,7 +7184,7 @@ server's OpenCL/api-docs repository. - + @@ -7186,7 +7196,7 @@ server's OpenCL/api-docs repository. - + @@ -7321,7 +7331,7 @@ server's OpenCL/api-docs repository. - + @@ -7343,7 +7353,7 @@ server's OpenCL/api-docs repository. - + @@ -7358,7 +7368,7 @@ server's OpenCL/api-docs repository. - + @@ -7371,7 +7381,7 @@ server's OpenCL/api-docs repository. - + From 7e876d0392f18bfac68a1cf79910e1d719e0a630 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 9 Sep 2024 15:29:01 -0700 Subject: [PATCH 152/190] add an issue about zero-sized allocations (#1083) --- .../cl_intel_unified_shared_memory.asciidoc | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/extensions/cl_intel_unified_shared_memory.asciidoc b/extensions/cl_intel_unified_shared_memory.asciidoc index 934dbafa8..05538f926 100644 --- a/extensions/cl_intel_unified_shared_memory.asciidoc +++ b/extensions/cl_intel_unified_shared_memory.asciidoc @@ -1278,6 +1278,29 @@ For some devices, this query will return the same value as `CL_DEVICE_MAX_MEM_AL * Do nothing and keep the existing error behavior. -- +. Should it be an error to allocate zero bytes? ++ +-- +*UNRESOLVED*: +Currently, attempting to allocate zero bytes fails and returns `CL_INVALID_BUFFER_SIZE`. +This is consistent with SVM, where *clSVMAlloc* fails and returns a `NULL` pointer if the size to allocate is zero. +It is also consistent with CUDA, where *cuMemAlloc*, etc. returns an error if the size to allocate is zero. + +However, it is not necessarily consistent with other memory allocation functions. For example: + +* The result of calling `malloc(0)` is implementation-defined: it can either return a `NULL` pointer or a unique non-null pointer that must be freed. +If a `NULL` pointer is returned then `errno` may be set to an implementation-defined value. +If a unique non-null pointer is returned then it cannot be dereferenced. +* Allocating an array of zero elements using `new` must return a non-null pointer, though dereferencing the pointer is undefined. + +Possible resolutions: + +* Allow zero-sized allocations and require returning a non-null pointer that must be freed. +* Allow zero-sized allocations but allow returning a `NULL` pointer. No error would be generated, even if a `NULL` pointer is returned. +* Specify that this case is implementation-defined. +* Do nothing and keep the existing error behavior. +-- + == Revision History [cols="5,15,15,70"] From 5ef65f811e940319f29a3383ce71cb70bb7a3240 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 9 Sep 2024 15:29:50 -0700 Subject: [PATCH 153/190] document int4 functions and functions with other return types (#1181) --- ...bgroup_matrix_multiply_accumulate.asciidoc | 72 ++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc b/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc index d6f492bab..64d4b3487 100644 --- a/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc +++ b/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc @@ -95,6 +95,27 @@ int2 intel_sub_group_u8_u8_matrix_mad_k32(uint2 a, uint8 b, int2 acc); int4 intel_sub_group_u8_u8_matrix_mad_k32(uint4 a, uint8 b, int4 acc); int8 intel_sub_group_u8_u8_matrix_mad_k32(uint8 a, uint8 b, int8 acc); +// 4-bit matrices: +int intel_sub_group_i4_i4_matrix_mad_k64(int a, int8 b, int acc); +int2 intel_sub_group_i4_i4_matrix_mad_k64(int2 a, int8 b, int2 acc); +int4 intel_sub_group_i4_i4_matrix_mad_k64(int4 a, int8 b, int4 acc); +int8 intel_sub_group_i4_i4_matrix_mad_k64(int8 a, int8 b, int8 acc); + +int intel_sub_group_i4_u4_matrix_mad_k64(int a, uint8 b, int acc); +int2 intel_sub_group_i4_u4_matrix_mad_k64(int2 a, uint8 b, int2 acc); +int4 intel_sub_group_i4_u4_matrix_mad_k64(int4 a, uint8 b, int4 acc); +int8 intel_sub_group_i4_u4_matrix_mad_k64(int8 a, uint8 b, int8 acc); + +int intel_sub_group_u4_i4_matrix_mad_k64(uint a, int8 b, int acc); +int2 intel_sub_group_u4_i4_matrix_mad_k64(uint2 a, int8 b, int2 acc); +int4 intel_sub_group_u4_i4_matrix_mad_k64(uint4 a, int8 b, int4 acc); +int8 intel_sub_group_u4_i4_matrix_mad_k64(uint8 a, int8 b, int8 acc); + +int intel_sub_group_u4_u4_matrix_mad_k64(uint a, uint8 b, int acc); +int2 intel_sub_group_u4_u4_matrix_mad_k64(uint2 a, uint8 b, int2 acc); +int4 intel_sub_group_u4_u4_matrix_mad_k64(uint4 a, uint8 b, int4 acc); +int8 intel_sub_group_u4_u4_matrix_mad_k64(uint8 a, uint8 b, int8 acc); + // bfloat16 matrices: float intel_sub_group_bf16_bf16_matrix_mad_k16(int a, int8 b, float acc); float2 intel_sub_group_bf16_bf16_matrix_mad_k16(int2 a, int8 b, float2 acc); @@ -134,17 +155,50 @@ int2 intel_sub_group_u8_u8_matrix_mad_k32(ushort2 a, uint8 b, int2 acc); int4 intel_sub_group_u8_u8_matrix_mad_k32(ushort4 a, uint8 b, int4 acc); int8 intel_sub_group_u8_u8_matrix_mad_k32(ushort8 a, uint8 b, int8 acc); -// bfloat16 matrices: +// 4-bit matrices: +int intel_sub_group_i4_i4_matrix_mad_k64(short a, int8 b, int acc); +int2 intel_sub_group_i4_i4_matrix_mad_k64(short2 a, int8 b, int2 acc); +int4 intel_sub_group_i4_i4_matrix_mad_k64(short4 a, int8 b, int4 acc); +int8 intel_sub_group_i4_i4_matrix_mad_k64(short8 a, int8 b, int8 acc); + +int intel_sub_group_i4_u4_matrix_mad_k64(short a, uint8 b, int acc); +int2 intel_sub_group_i4_u4_matrix_mad_k64(short2 a, uint8 b, int2 acc); +int4 intel_sub_group_i4_u4_matrix_mad_k64(short4 a, uint8 b, int4 acc); +int8 intel_sub_group_i4_u4_matrix_mad_k64(short8 a, uint8 b, int8 acc); + +int intel_sub_group_u4_i4_matrix_mad_k64(ushort a, int8 b, int acc); +int2 intel_sub_group_u4_i4_matrix_mad_k64(ushort2 a, int8 b, int2 acc); +int4 intel_sub_group_u4_i4_matrix_mad_k64(ushort4 a, int8 b, int4 acc); +int8 intel_sub_group_u4_i4_matrix_mad_k64(ushort8 a, int8 b, int8 acc); + +int intel_sub_group_u4_u4_matrix_mad_k64(ushort a, uint8 b, int acc); +int2 intel_sub_group_u4_u4_matrix_mad_k64(ushort2 a, uint8 b, int2 acc); +int4 intel_sub_group_u4_u4_matrix_mad_k64(ushort4 a, uint8 b, int4 acc); +int8 intel_sub_group_u4_u4_matrix_mad_k64(ushort8 a, uint8 b, int8 acc); + +// bfloat16 matrices with float accumulator: float intel_sub_group_bf16_bf16_matrix_mad_k16(short a, int8 b, float acc); float2 intel_sub_group_bf16_bf16_matrix_mad_k16(short2 a, int8 b, float2 acc); float4 intel_sub_group_bf16_bf16_matrix_mad_k16(short4 a, int8 b, float4 acc); float8 intel_sub_group_bf16_bf16_matrix_mad_k16(short8 a, int8 b, float8 acc); -// fp16 matrices: +// fp16 matrices with float accumulator: float intel_sub_group_f16_f16_matrix_mad_k16(short a, int8 b, float acc); float2 intel_sub_group_f16_f16_matrix_mad_k16(short2 a, int8 b, float2 acc); float4 intel_sub_group_f16_f16_matrix_mad_k16(short4 a, int8 b, float4 acc); float8 intel_sub_group_f16_f16_matrix_mad_k16(short8 a, int8 b, float8 acc); + +// bfloat16 with bfloat16 accumulator: +short intel_sub_group_bf16_bf16_matrix_mad_k16(short a, int8 b, short acc); +short2 intel_sub_group_bf16_bf16_matrix_mad_k16(short2 a, int8 b, short2 acc); +short4 intel_sub_group_bf16_bf16_matrix_mad_k16(short4 a, int8 b, short4 acc); +short8 intel_sub_group_bf16_bf16_matrix_mad_k16(short8 a, int8 b, short8 acc); + +// fp16 matrices with fp16 accumulator: +half intel_sub_group_f16_f16_matrix_mad_k16(short a, int8 b, half acc); +half2 intel_sub_group_f16_f16_matrix_mad_k16(short2 a, int8 b, half2 acc); +half4 intel_sub_group_f16_f16_matrix_mad_k16(short4 a, int8 b, half4 acc); +half8 intel_sub_group_f16_f16_matrix_mad_k16(short8 a, int8 b, half8 acc); ---- == Modifications to the OpenCL C Specification @@ -213,10 +267,13 @@ For this list of functions: * `M` may be equal to 1, 2, 4, or 8. * `N` must be equal to 8 for some devices or 16 for other devices. In other words, the only supported subgroup sizes are 8 and 16. -* Supported integer matrix types for `a` and `b` are any combination of signed or unsigned 8-bit integers. -For these integer matrix types, the accumulation value `acc` and result value are signed 32-bit integers, and `K` must be equal to 32. +* Supported integer matrix types for `a` and `b` are any combination of signed or unsigned 8-bit integers, or any combination of signed or unsigned 4-bit integers. +For 8-bit matrices, `K` must be equal to 32. For 4-bit matrices, `K` must be equal to 64. +For these integer matrix types, the accumulation value `acc` and result value are signed 32-bit integers. * The supported floating-point matrix types for `a` and `b` are fp16 (half) or bfloat16. -For these floating-point matrix type, the accumulation value `acc` and result value are 32-bit floating-point values, and `K` must be equal to 16. +For these floating-point matrices, `K` must be equal to 16. +The accumulation value `acc` and result value are 32-bit floating-point values. +For devices with `N` equal to 16, the accumulation value `acc` and result value may also be fp16 for fp16 matrices, or bfloat16 for bfloat16 matrices. == Coding Sample @@ -288,12 +345,10 @@ int2 intel_sub_group_i8_i8_matrix_mad_k32(int2 a, int8 b, int2 acc) == Issues -None. - . Should this extension use signed or unsigned types to represent fp16 and bf16 data? + -- -`RESOLVED`: This extension will use signed types to represent fp16 and bf16 data even though this is inconsistent with other extensions such as cl_intel_bfloat16 conversions. +`RESOLVED`: This extension will use signed types to represent fp16 and bf16 data even though this is inconsistent with other extensions, such as the `cl_intel_bfloat16_conversions` extension. This inconsistency may be addressed in a future extension or in a future version of this extension. Applications are encouraged to use `as_type` to reinterpret unsigned data as signed data as needed to use the functions added by this extension. -- @@ -306,6 +361,7 @@ Applications are encouraged to use `as_type` to reinterpret unsigned data as sig |======================================== |Rev|Date|Author|Changes |1.0.0|2022-05-18|Ben Ashbaugh|*Initial public revision* +|1.0.0|2024-06-06|Ben Ashbaugh|Document additional functions. |======================================== //************************************************************************ From 047b862a8c44ffae43441adad01de49229b69559 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 9 Sep 2024 19:41:09 -0700 Subject: [PATCH 154/190] generate an OpenCL C built-in functions dictionary (#1228) * generate an OpenCL C built-in functions dictionary * switch command line argument to specify input file to -i --- Makefile | 7 +- OpenCL_C.txt | 204 +++++++++--------- c/dictionary.asciidoc | 2 + c/functions.txt | 99 +++++++++ ...tionary.py => gen_dictionary_from_file.py} | 43 ++-- 5 files changed, 233 insertions(+), 122 deletions(-) create mode 100644 c/functions.txt rename scripts/{gen_c_feature_dictionary.py => gen_dictionary_from_file.py} (61%) diff --git a/Makefile b/Makefile index 023d8cf0f..d08fcd600 100644 --- a/Makefile +++ b/Makefile @@ -514,10 +514,12 @@ $(MANHTMLDIR)/intro.html: $(REFPATH)/intro.txt $(MANCOPYRIGHT) REGISTRY = $(ROOTDIR)/xml APIXML = $(REGISTRY)/cl.xml CFEATURES = c/features.txt +CFUNCTIONS = c/functions.txt GENSCRIPT = $(SCRIPTS)/gencl.py DICTSCRIPT = $(SCRIPTS)/gen_dictionaries.py VERSIONSCRIPT = $(SCRIPTS)/gen_version_notes.py -CFEATSCRIPT = $(SCRIPTS)/gen_c_feature_dictionary.py +CFEATSCRIPT = $(SCRIPTS)/gen_dictionary_from_file.py +CFUNCSCRIPT = $(SCRIPTS)/gen_dictionary_from_file.py GENSCRIPTOPTS = $(VERSIONOPTIONS) $(EXTOPTIONS) $(GENSCRIPTEXTRA) -registry $(APIXML) GENSCRIPTEXTRA = @@ -543,7 +545,8 @@ extinc: $(METADEPEND) $(METADEPEND): $(APIXML) $(GENSCRIPT) $(QUIET)$(MKDIR) $(METAPATH) $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(METAPATH) extinc - $(QUIET)$(PYTHON) $(CFEATSCRIPT) -features $(CFEATURES) -o $(METAPATH)/c-feature-dictionary.asciidoc + $(QUIET)$(PYTHON) $(CFEATSCRIPT) -i $(CFEATURES) -o $(METAPATH)/c-feature-dictionary.asciidoc + $(QUIET)$(PYTHON) $(CFUNCSCRIPT) -i $(CFUNCTIONS) -o $(METAPATH)/c-function-dictionary.asciidoc # This generates a single file containing asciidoc attributes for each # extension in the spec being built. diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 347a4a3d8..7a2347faa 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -13690,10 +13690,6 @@ execution. [[built-in-functions-kernel-query-functions]] ==== Built-in Functions - Kernel Query Functions -// Note: the Unicode "zero width space" (​) is used in some places to -// cause long function names to break much more sensibly. -// Probably the asciidoc built-in {zwsp} should be used instead. - [open,refpage='kernelQueryFunctions',desc='Built-in Functions - Kernel Query Functions',type='freeform',spec='clang',anchor='built-in-functions-kernel-query-functions',xrefs='enqueue_kernel',alias='get_kernel_preferred get_kernel_work_group_size'] -- [[table-builtin-kernel-query]] @@ -13707,9 +13703,9 @@ execution. can be used to execute a block on a specific device given by _device_. _block_ specifies the block to be enqueued. -| uint *get_kernel_preferred_​work_group_size_multiple*( +| uint *{get_kernel_preferred_work_group_size_multiple}*( void (^block)(void)) + - uint *get_kernel_preferred_​work_group_size_multiple*( + uint *{get_kernel_preferred_work_group_size_multiple}*( void (^block)(local {localArgType} *, ...)) | Returns the preferred multiple of work-group size for launch. This is a performance hint. @@ -14989,138 +14985,138 @@ This section is informational and non-normative. 3+| For OpenCL 2.1 or {cl_khr_subgroups_EXT}: -| `get_​sub_​group_​size` +| `{get_sub_group_size}` | *SubgroupSize* | *Kernel* -| `get_​max_​sub_​group_​size` +| `{get_max_sub_group_size}` | *SubgroupMaxSize* | *Kernel* -| `get_​num_​sub_​groups` +| `{get_num_sub_groups}` | *NumSubgroups* | *Kernel* -| `get_​enqueued_​num_​sub_​groups` +| `{get_enqueued_num_sub_groups}` | *NumEnqueuedSubgroups* | *Kernel* -| `get_​sub_​group_​id` +| `{get_sub_group_id}` | *SubgroupId* | *Kernel* -| `get_​sub_​group_​local_​id` +| `{get_sub_group_local_id}` | *SubgroupLocalInvocationId* | *Kernel* -| `sub_​group_​barrier` +| `{sub_group_barrier}` | *OpControlBarrier* | None Needed -| `sub_​group_​all` +| `{sub_group_all}` | *OpGroupAll* | *Groups* -| `sub_​group_​any` +| `{sub_group_any}` | *OpGroupAny* | *Groups* -| `sub_​group_​broadcast` +| `{sub_group_broadcast}` | *OpGroupBroadcast* | *Groups* -| `sub_​group_​reduce_​add` +| `{sub_group_reduce_add}` | *OpGroupIAdd*, *OpGroupFAdd* | *Groups* -| `sub_​group_​reduce_​min` +| `{sub_group_reduce_min}` | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* | *Groups* -| `sub_​group_​reduce_​max` +| `{sub_group_reduce_max}` | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* | *Groups* -| `sub_​group_​scan_​exclusive_​add` +| `{sub_group_scan_exclusive_add}` | *OpGroupIAdd*, *OpGroupFAdd* | *Groups* -| `sub_​group_​scan_​exclusive_​min` +| `{sub_group_scan_exclusive_min}` | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* | *Groups* -| `sub_​group_​scan_​exclusive_​max` +| `{sub_group_scan_exclusive_max}` | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* | *Groups* -| `sub_​group_​scan_​inclusive_​add` +| `{sub_group_scan_inclusive_add}` | *OpGroupIAdd*, *OpGroupFAdd* | *Groups* -| `sub_​group_​scan_​inclusive_​min` +| `{sub_group_scan_inclusive_min}` | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* | *Groups* -| `sub_​group_​scan_​inclusive_​max` +| `{sub_group_scan_inclusive_max}` | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* | *Groups* -| `sub_​group_​reserve_​read_​pipe` +| `{sub_group_reserve_read_pipe}` | *OpGroupReserveReadPipePackets* | *Pipes* -| `sub_​group_​reserve_​write_​pipe` +| `{sub_group_reserve_write_pipe}` | *OpGroupReserveReadWritePackets* | *Pipes* -| `sub_​group_​commit_​read_​pipe` +| `{sub_group_commit_read_pipe}` | *OpGroupCommitReadPipe* | *Pipes* -| `sub_​group_​commit_​write_​pipe` +| `{sub_group_commit_write_pipe}` | *OpGroupCommitWritePipe* | *Pipes* -| `get_​kernel_​sub_​group_​count_​for_​ndrange` +| `{get_kernel_sub_group_count_for_ndrange}` | *OpGetKernelNDrangeSubGroupCount* | *DeviceEnqueue* -| `get_​kernel_​max_​sub_​group_​size_​for_​ndrange` +| `{get_kernel_max_sub_group_size_for_ndrange}` | *OpGetKernelNDrangeMaxSubGroupSize* | *DeviceEnqueue* ifdef::cl_khr_subgroup_ballot[] 3+| For {cl_khr_subgroup_ballot_EXT}: -| `sub_​group_​non_​uniform_​broadcast` +| `{sub_group_non_uniform_broadcast}` | *OpGroupNonUniformBroadcast* | *GroupNonUniformBallot* -| `sub_​group_​broadcast_​first` +| `{sub_group_broadcast_first}` | *OpGroupNonUniformBroadcastFirst* | *GroupNonUniformBallot* -| `sub_​group_​ballot` +| `{sub_group_ballot}` | *OpGroupNonUniformBallot* | *GroupNonUniformBallot* -| `sub_​group_​inverse_​ballot` +| `{sub_group_inverse_ballot}` | *OpGroupNonUniformInverseBallot* | *GroupNonUniformBallot* -| `sub_​group_​ballot_​bit_​extract` +| `{sub_group_ballot_bit_extract}` | *OpGroupNonUniformBallotBitExtract* | *GroupNonUniformBallot* -| `sub_​group_​ballot_​bit_​count` +| `{sub_group_ballot_bit_count}` | *OpGroupNonUniformBallotBitCount* | *GroupNonUniformBallot* -| `sub_​group_​ballot_​inclusive_​scan` +| `{sub_group_ballot_inclusive_scan}` | *OpGroupNonUniformBallotBitCount* | *GroupNonUniformBallot* -| `sub_​group_​ballot_​exclusive_​scan` +| `{sub_group_ballot_exclusive_scan}` | *OpGroupNonUniformBallotBitCount* | *GroupNonUniformBallot* -| `sub_​group_​ballot_​find_​lsb` +| `{sub_group_ballot_find_lsb}` | *OpGroupNonUniformBallotFindLSB* | *GroupNonUniformBallot* -| `sub_​group_​ballot_​find_​msb` +| `{sub_group_ballot_find_msb}` | *OpGroupNonUniformBallotFindMSB* | *GroupNonUniformBallot* -| `get_​sub_​group_​eq_​mask` +| `{get_sub_group_eq_mask}` | *SubgroupEqMask* | *GroupNonUniformBallot* -| `get_​sub_​group_​ge_​mask` +| `{get_sub_group_ge_mask}` | *SubgroupGeMask* | *GroupNonUniformBallot* -| `get_​sub_​group_​gt_​mask` +| `{get_sub_group_gt_mask}` | *SubgroupGtMask* | *GroupNonUniformBallot* -| `get_​sub_​group_​le_​mask` +| `{get_sub_group_le_mask}` | *SubgroupLeMask* | *GroupNonUniformBallot* -| `get_​sub_​group_​lt_​mask` +| `{get_sub_group_lt_mask}` | *SubgroupLtMask* | *GroupNonUniformBallot* endif::cl_khr_subgroup_ballot[] @@ -15128,34 +15124,34 @@ endif::cl_khr_subgroup_ballot[] ifdef::cl_khr_subgroup_clustered_reduce[] 3+| For {cl_khr_subgroup_clustered_reduce_EXT}: -| `sub_​group_​clustered_​reduce_​add` +| `{sub_group_clustered_reduce_add}` | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​mul` +| `{sub_group_clustered_reduce_mul}` | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​min` +| `{sub_group_clustered_reduce_min}` | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​max` +| `{sub_group_clustered_reduce_max}` | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​and` +| `{sub_group_clustered_reduce_and}` | *OpGroupNonUniformBitwiseAnd* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​or` +| `{sub_group_clustered_reduce_or}` | *OpGroupNonUniformBitwiseOr* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​xor` +| `{sub_group_clustered_reduce_xor}` | *OpGroupNonUniformBitwiseXor* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​and` +| `{sub_group_clustered_reduce_logical_and}` | *OpGroupNonUniformLogicalAnd* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​or` +| `{sub_group_clustered_reduce_logical_or}` | *OpGroupNonUniformLogicalOr* | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​xor` +| `{sub_group_clustered_reduce_logical_xor}` | *OpGroupNonUniformLogicalXor* | *GroupNonUniformClustered* endif::cl_khr_subgroup_clustered_reduce[] @@ -15164,37 +15160,37 @@ ifdef::cl_khr_subgroup_extended_types[] 3+| For {cl_khr_subgroup_extended_types_EXT}: + Note: This extension adds new types to uniform sub-group operations. -| `sub_​group_​broadcast` +| `{sub_group_broadcast}` | *OpGroupBroadcast* | *Groups* -| `sub_​group_​reduce_​add` +| `{sub_group_reduce_add}` | *OpGroupIAdd*, *OpGroupFAdd* | *Groups* -| `sub_​group_​reduce_​min` +| `{sub_group_reduce_min}` | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* | *Groups* -| `sub_​group_​reduce_​max` +| `{sub_group_reduce_max}` | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* | *Groups* -| `sub_​group_​scan_​exclusive_​add` +| `{sub_group_scan_exclusive_add}` | *OpGroupIAdd*, *OpGroupFAdd* | *Groups* -| `sub_​group_​scan_​exclusive_​min` +| `{sub_group_scan_exclusive_min}` | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* | *Groups* -| `sub_​group_​scan_​exclusive_​max` +| `{sub_group_scan_exclusive_max}` | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* | *Groups* -| `sub_​group_​scan_​inclusive_​add` +| `{sub_group_scan_inclusive_add}` | *OpGroupIAdd*, *OpGroupFAdd* | *Groups* -| `sub_​group_​scan_​inclusive_​min` +| `{sub_group_scan_inclusive_min}` | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* | *Groups* -| `sub_​group_​scan_​inclusive_​max` +| `{sub_group_scan_inclusive_max}` | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* | *Groups* endif::cl_khr_subgroup_extended_types[] @@ -15202,96 +15198,96 @@ endif::cl_khr_subgroup_extended_types[] ifdef::cl_khr_subgroup_non_uniform_arithmetic[] 3+| For {cl_khr_subgroup_non_uniform_arithmetic_EXT}: -| `sub_​group_​non_​uniform_​reduce_​add` +| `{sub_group_non_uniform_reduce_add}` | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​mul` +| `{sub_group_non_uniform_reduce_mul}` | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​min` +| `{sub_group_non_uniform_reduce_min}` | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​max` +| `{sub_group_non_uniform_reduce_max}` | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​and` +| `{sub_group_non_uniform_reduce_and}` | *OpGroupNonUniformBitwiseAnd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​or` +| `{sub_group_non_uniform_reduce_or}` | *OpGroupNonUniformBitwiseOr* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​xor` +| `{sub_group_non_uniform_reduce_xor}` | *OpGroupNonUniformBitwiseXor* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​and` +| `{sub_group_non_uniform_reduce_logical_and}` | *OpGroupNonUniformLogicalAnd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​or` +| `{sub_group_non_uniform_reduce_logical_or}` | *OpGroupNonUniformLogicalOr* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​xor` +| `{sub_group_non_uniform_reduce_logical_xor}` | *OpGroupNonUniformLogicalXor* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​add` +| `{sub_group_non_uniform_scan_inclusive_add}` | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​mul` +| `{sub_group_non_uniform_scan_inclusive_mul}` | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​min` +| `{sub_group_non_uniform_scan_inclusive_min}` | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​max` +| `{sub_group_non_uniform_scan_inclusive_max}` | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​and` +| `{sub_group_non_uniform_scan_inclusive_and}` | *OpGroupNonUniformBitwiseAnd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​or` +| `{sub_group_non_uniform_scan_inclusive_or}` | *OpGroupNonUniformBitwiseOr* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​xor` +| `{sub_group_non_uniform_scan_inclusive_xor}` | *OpGroupNonUniformBitwiseXor* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​and` +| `{sub_group_non_uniform_scan_inclusive_logical_and}` | *OpGroupNonUniformLogicalAnd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​or` +| `{sub_group_non_uniform_scan_inclusive_logical_or}` | *OpGroupNonUniformLogicalOr* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​xor` +| `{sub_group_non_uniform_scan_inclusive_logical_xor}` | *OpGroupNonUniformLogicalXor* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​add` +| `{sub_group_non_uniform_scan_exclusive_add}` | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​mul` +| `{sub_group_non_uniform_scan_exclusive_mul}` | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​min` +| `{sub_group_non_uniform_scan_exclusive_min}` | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​max` +| `{sub_group_non_uniform_scan_exclusive_max}` | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​and` +| `{sub_group_non_uniform_scan_exclusive_and}` | *OpGroupNonUniformBitwiseAnd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​or` +| `{sub_group_non_uniform_scan_exclusive_or}` | *OpGroupNonUniformBitwiseOr* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​xor` +| `{sub_group_non_uniform_scan_exclusive_xor}` | *OpGroupNonUniformBitwiseXor* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​and` +| `{sub_group_non_uniform_scan_exclusive_logical_and}` | *OpGroupNonUniformLogicalAnd* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​or` +| `{sub_group_non_uniform_scan_exclusive_logical_or}` | *OpGroupNonUniformLogicalOr* | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​xor` +| `{sub_group_non_uniform_scan_exclusive_logical_xor}` | *OpGroupNonUniformLogicalXor* | *GroupNonUniformArithmetic* endif::cl_khr_subgroup_non_uniform_arithmetic[] @@ -15299,16 +15295,16 @@ endif::cl_khr_subgroup_non_uniform_arithmetic[] ifdef::cl_khr_subgroup_non_uniform_vote[] 3+| For {cl_khr_subgroup_non_uniform_vote_EXT}: -| `sub_​group_​elect` +| `{sub_group_elect}` | *OpGroupNonUniformElect* | *GroupNonUniform* -| `sub_​group_​non_​uniform_​all` +| `{sub_group_non_uniform_all}` | *OpGroupNonUniformAll* | *GroupNonUniformVote* -| `sub_​group_​non_​uniform_​any` +| `{sub_group_non_uniform_any}` | *OpGroupNonUniformAny* | *GroupNonUniformVote* -| `sub_​group_​non_​uniform_​all_​equal` +| `{sub_group_non_uniform_all_equal}` | *OpGroupNonUniformAllEqual* | *GroupNonUniformVote* endif::cl_khr_subgroup_non_uniform_vote[] @@ -15316,10 +15312,10 @@ endif::cl_khr_subgroup_non_uniform_vote[] ifdef::cl_khr_subgroup_shuffle[] 3+| For {cl_khr_subgroup_shuffle_EXT}: -| `sub_​group_​shuffle` +| `{sub_group_shuffle}` | *OpGroupNonUniformShuffle* | *GroupNonUniformShuffle* -| `sub_​group_​shuffle_​xor` +| `{sub_group_shuffle_xor}` | *OpGroupNonUniformShuffleXor* | *GroupNonUniformShuffle* endif::cl_khr_subgroup_shuffle[] @@ -15327,10 +15323,10 @@ endif::cl_khr_subgroup_shuffle[] ifdef::cl_khr_subgroup_shuffle_relative[] 3+| For {cl_khr_subgroup_shuffle_relative_EXT}: -| `sub_​group_​shuffle_​up` +| `{sub_group_shuffle_up}` | *OpGroupNonUniformShuffleUp* | *GroupNonUniformShuffleRelative* -| `sub_​group_​shuffle_​down` +| `{sub_group_shuffle_down}` | *OpGroupNonUniformShuffleDown* | *GroupNonUniformShuffleRelative* endif::cl_khr_subgroup_shuffle_relative[] diff --git a/c/dictionary.asciidoc b/c/dictionary.asciidoc index ef7a9401b..bde39ebc1 100644 --- a/c/dictionary.asciidoc +++ b/c/dictionary.asciidoc @@ -4,3 +4,5 @@ include::{generated}/api/api-dictionary-no-links.asciidoc[] include::{generated}/api/ext-dictionary-no-links.asciidoc[] + +include::{generated}/meta/c-function-dictionary.asciidoc[] diff --git a/c/functions.txt b/c/functions.txt new file mode 100644 index 000000000..c9a9f640c --- /dev/null +++ b/c/functions.txt @@ -0,0 +1,99 @@ +get_kernel_preferred_work_group_size_multiple +get_sub_group_size +get_max_sub_group_size +get_num_sub_groups +get_enqueued_num_sub_groups +get_sub_group_id +get_sub_group_local_id +sub_group_barrier +sub_group_all +sub_group_any +sub_group_broadcast +sub_group_reduce_add +sub_group_reduce_min +sub_group_reduce_max +sub_group_scan_exclusive_add +sub_group_scan_exclusive_min +sub_group_scan_exclusive_max +sub_group_scan_inclusive_add +sub_group_scan_inclusive_min +sub_group_scan_inclusive_max +sub_group_reserve_read_pipe +sub_group_reserve_write_pipe +sub_group_commit_read_pipe +sub_group_commit_write_pipe +get_kernel_sub_group_count_for_ndrange +get_kernel_max_sub_group_size_for_ndrange +sub_group_non_uniform_broadcast +sub_group_broadcast_first +sub_group_ballot +sub_group_inverse_ballot +sub_group_ballot_bit_extract +sub_group_ballot_bit_count +sub_group_ballot_inclusive_scan +sub_group_ballot_exclusive_scan +sub_group_ballot_find_lsb +sub_group_ballot_find_msb +get_sub_group_eq_mask +get_sub_group_ge_mask +get_sub_group_gt_mask +get_sub_group_le_mask +get_sub_group_lt_mask +sub_group_clustered_reduce_add +sub_group_clustered_reduce_mul +sub_group_clustered_reduce_min +sub_group_clustered_reduce_max +sub_group_clustered_reduce_and +sub_group_clustered_reduce_or +sub_group_clustered_reduce_xor +sub_group_clustered_reduce_logical_and +sub_group_clustered_reduce_logical_or +sub_group_clustered_reduce_logical_xor +sub_group_broadcast +sub_group_reduce_add +sub_group_reduce_min +sub_group_reduce_max +sub_group_scan_exclusive_add +sub_group_scan_exclusive_min +sub_group_scan_exclusive_max +sub_group_scan_inclusive_add +sub_group_scan_inclusive_min +sub_group_scan_inclusive_max +sub_group_non_uniform_reduce_add +sub_group_non_uniform_reduce_mul +sub_group_non_uniform_reduce_min +sub_group_non_uniform_reduce_max +sub_group_non_uniform_reduce_and +sub_group_non_uniform_reduce_or +sub_group_non_uniform_reduce_xor +sub_group_non_uniform_reduce_logical_and +sub_group_non_uniform_reduce_logical_or +sub_group_non_uniform_reduce_logical_xor +sub_group_non_uniform_scan_inclusive_add +sub_group_non_uniform_scan_inclusive_mul +sub_group_non_uniform_scan_inclusive_min +sub_group_non_uniform_scan_inclusive_max +sub_group_non_uniform_scan_inclusive_and +sub_group_non_uniform_scan_inclusive_or +sub_group_non_uniform_scan_inclusive_xor +sub_group_non_uniform_scan_inclusive_logical_and +sub_group_non_uniform_scan_inclusive_logical_or +sub_group_non_uniform_scan_inclusive_logical_xor +sub_group_non_uniform_scan_exclusive_add +sub_group_non_uniform_scan_exclusive_mul +sub_group_non_uniform_scan_exclusive_min +sub_group_non_uniform_scan_exclusive_max +sub_group_non_uniform_scan_exclusive_and +sub_group_non_uniform_scan_exclusive_or +sub_group_non_uniform_scan_exclusive_xor +sub_group_non_uniform_scan_exclusive_logical_and +sub_group_non_uniform_scan_exclusive_logical_or +sub_group_non_uniform_scan_exclusive_logical_xor +sub_group_elect +sub_group_non_uniform_all +sub_group_non_uniform_any +sub_group_non_uniform_all_equal +sub_group_shuffle +sub_group_shuffle_xor +sub_group_shuffle_up +sub_group_shuffle_down diff --git a/scripts/gen_c_feature_dictionary.py b/scripts/gen_dictionary_from_file.py similarity index 61% rename from scripts/gen_c_feature_dictionary.py rename to scripts/gen_dictionary_from_file.py index f9b071735..f2f6a72da 100644 --- a/scripts/gen_c_feature_dictionary.py +++ b/scripts/gen_dictionary_from_file.py @@ -11,40 +11,41 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('-features', action='store', + parser.add_argument('-i', action='store', default='', - help='File with OpenCL C features to generate, one per line') + help='Input file with dictionary source to generate, one per line, stdin is used if no file is provided.') parser.add_argument('-o', action='store', default='', - help='Output file in which to store the feature dictionary. stdout is used if no file is provided.') + help='Output file in which to store the generated dictionary, stdout is used if no file is provided.') args = parser.parse_args() - features = [] - if len(args.features) > 0: - print('Generating feature dictionaries from: ' + args.features) - with open(args.features) as f: - features = f.readlines() + entries = [] + if args.i: + print('Generating dictionary from source file: ' + args.i) + with open(args.i) as f: + entries = f.readlines() else: - print('Reading feature dictionaries from stdin...') + print('Generating dictionary from stdin.') for line in sys.stdin: - features.append(line) + entries.append(line) print('Generating...\n') numberOfFeatures = 0 + numberOfEntries = 0 if args.o: outfile = open(args.o, 'w') else: outfile = sys.stdout - for name in features: + for name in entries: name = name.strip() if len(name) == 0: continue # OpenCL C features start with __opencl_c if name.startswith('__opencl_c'): - #print('found enum: ' + name) + #print('found feature: ' + name) # Create a variant of the name that precedes underscores with # "zero width" spaces. This causes some long names to be @@ -74,14 +75,24 @@ numberOfFeatures = numberOfFeatures + 1 - # everything else is a function + # everything else else: - print('Unexpected feature name: ' + name + ', features should start with __opencl_c!') - sys.exit(1) + htmlName = name[:4] + name[4:].replace("_", "_") + otherName = name[:4] + name[4:].replace("_", "_​") + + outfile.write('// ' + name + '\n') + outfile.write('ifdef::backend-html5[]\n') + outfile.write(':' + name + ': pass:q[' + htmlName + ']\n') + outfile.write('endif::[]\n') + outfile.write('ifndef::backend-html5[]\n') + outfile.write(':' + name + ': pass:q[' + otherName + ']\n') + outfile.write('endif::[]\n') + + numberOfEntries = numberOfEntries + 1 outfile.write('\n') if args.o: outfile.close() - print('Found ' + str(numberOfFeatures) + ' features.') + print('Found ' + str(numberOfFeatures) + ' OpenCL C features and ' + str(numberOfEntries) + ' other entries.') From 3b9d286dfe7e02f4d4dc79f633eac764fc12076a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 9 Sep 2024 19:41:33 -0700 Subject: [PATCH 155/190] clarify num_mip_levels (#1255) --- api/opencl_runtime_layer.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index e92935549..89985ed80 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -2666,7 +2666,7 @@ endif::cl_khr_external_memory[] ifndef::cl_khr_mipmap_image[0.] ifdef::cl_khr_mipmap_image[] 0 unless the {cl_khr_mipmap_image_EXT} extension is supported, in which - case it must be a value greater than 1 specifying the number of mipmap + case it may be a nonzero value specifying the number of mipmap levels in the image. endif::cl_khr_mipmap_image[] * _num_samples_ must be 0. From a16414593975dc6a6030ad5c1a9c25267ac7316e Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Thu, 12 Sep 2024 11:02:04 -0600 Subject: [PATCH 156/190] platform: Clarify behavior for ATOMIC_SCOPE_ALL_DEVICES #1129 (#1171) * platform: Clarify behavior for ATOMIC_SCOPE_ALL_DEVICES * Adjusted table widths to prevent overflow beyond a page Asciidr has a limitation that prevents table cells from spanning across pages * Update api/opencl_platform_layer.asciidoc Co-authored-by: Ben Ashbaugh --------- Co-authored-by: Ben Ashbaugh --- api/opencl_platform_layer.asciidoc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 2b1ae266d..81f08b585 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -473,7 +473,7 @@ device except for the following queries: [[device-queries-table]] .List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] +[width="100%",cols="<28%,<15%,<57%",options="header"] |==== | Device Info | Return Type | Description | {CL_DEVICE_TYPE_anchor} @@ -1606,6 +1606,9 @@ include::{generated}/api/version-notes/CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES.asci {CL_DEVICE_ATOMIC_ORDER_RELAXED} \| + {CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP} + A device that does not support {CL_DEVICE_SVM_ATOMICS} (and hence does not support {CL_MEM_SVM_ATOMICS}) may still support {CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES}. On these devices, an atomic operation with + *memory_scope_all_svm_devices* will behave the same as if the scope were *memory_scope_device* - refer to the <>. + | {CL_DEVICE_ATOMIC_FENCE_CAPABILITIES_anchor} include::{generated}/api/version-notes/CL_DEVICE_ATOMIC_FENCE_CAPABILITIES.asciidoc[] From db295f2ebf0a640627fe6a57db7102524d9e7789 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 12 Sep 2024 10:02:35 -0700 Subject: [PATCH 157/190] clarify clGetSemaphoreHandleForTypeKHR is part of cl_khr_external_semaphore (#1257) --- api/cl_khr_external_semaphore.asciidoc | 4 ++++ api/cl_khr_external_semaphore_sync_fd.asciidoc | 1 - api/opencl_runtime_layer.asciidoc | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index 1bbf56796..cdd7ea10a 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -70,6 +70,10 @@ The layered extensions {cl_khr_external_semaphore_opaque_fd_EXT}, {cl_khr_external_semaphore_win32_EXT} define specific external semaphores that may be imported into or exported from OpenCL. +=== New Commands + + * {clGetSemaphoreHandleForTypeKHR} + === New Types * {cl_external_semaphore_handle_type_khr_TYPE} diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index ae1bc7891..19162666e 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -37,7 +37,6 @@ external semaphore using the APIs introduced by === New Commands - * {clGetSemaphoreHandleForTypeKHR} * {clReImportSemaphoreSyncFdKHR} === New Types diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 89985ed80..5a769f285 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -12940,6 +12940,7 @@ Please refer to handle specific documentation for more details on transference r To export an external handle from a semaphore, call the function include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] +include::{generated}/api/version-notes/clGetSemaphoreHandleForTypeKHR.asciidoc[] * _sema_object_ specifies a valid semaphore object with exportable properties. From 9ebd444da54dc71e5ada3b91da19cff95df49fe4 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 12 Sep 2024 10:03:17 -0700 Subject: [PATCH 158/190] unify the CL_INVALID_COMMAND_QUEUE behavior for semaphore signals and waits (#1256) --- api/cl_khr_semaphore.asciidoc | 3 +++ api/opencl_runtime_layer.asciidoc | 15 ++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 721a3da2e..40afe4c12 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -255,3 +255,6 @@ while (true) { {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). * Revision 1.0.0, 2024-03-15 ** First non-provisional version. + * Revision 1.0.1, 2024-09-08 + ** Unified {CL_INVALID_COMMAND_QUEUE} error behavior for + {clEnqueueSignalSemaphoresKHR} and {clEnqueueWaitSemaphoresKHR}. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 5a769f285..a10d5fb92 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -13285,7 +13285,9 @@ Otherwise, it returns one of the following errors: ** if _command_queue_ is not a valid command-queue, or ** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time - of creating one or more of _sema_objects_. + of creating one or more of _sema_objects_, or + ** if one or more of _sema_objects_ belong to a context that does not + contain a device associated with _command_queue_. * {CL_INVALID_VALUE} if _num_sema_objects_ is 0. * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. @@ -13398,11 +13400,11 @@ Otherwise, it returns one of the following errors: _sema_objects_ requires a semaphore payload and _sema_payload_list_ is `NULL`. * {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not - 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is - 0, or - ** if event objects in _event_wait_list_ are not valid events. + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources @@ -13411,7 +13413,6 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- - === Retaining and Releasing Semaphores [open,refpage='clReleaseSemaphoreKHR',desc='Release a semaphore object',type='protos'] From e9c8e39fbf3636f1aa2762da1addee1deef410b3 Mon Sep 17 00:00:00 2001 From: Sun Serega Date: Sun, 15 Sep 2024 22:08:30 +0300 Subject: [PATCH 159/190] ARM => Arm (#1263) --- xml/cl.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index db8a06848..e96ebf8f1 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -502,7 +502,7 @@ server's OpenCL/api-docs repository. - + @@ -977,7 +977,7 @@ server's OpenCL/api-docs repository. - + @@ -985,7 +985,7 @@ server's OpenCL/api-docs repository. - + @@ -2076,7 +2076,7 @@ server's OpenCL/api-docs repository. - + From 310475b7e15cab7d07e625ae06f9419d8441a7f1 Mon Sep 17 00:00:00 2001 From: Sun Serega Date: Wed, 18 Sep 2024 21:16:19 +0300 Subject: [PATCH 160/190] `cl_intel_driver_diagnostics` (#1260) `cl_diagnostics_verbose_level` => `cl_diagnostic_verbose_level_intel` --- xml/cl.xml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index e96ebf8f1..e71aebc93 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -125,7 +125,7 @@ server's OpenCL/api-docs repository. typedef struct _cl_accelerator_intel* cl_accelerator_intel; typedef cl_uint cl_accelerator_type_intel; typedef cl_uint cl_accelerator_info_intel; - typedef cl_uint cl_diagnostics_verbose_level; + typedef cl_bitfield cl_diagnostic_verbose_level_intel; typedef cl_uint cl_va_api_device_source_intel; typedef cl_uint cl_va_api_device_set_intel; typedef struct __GLsync * cl_GLsync; @@ -991,7 +991,7 @@ server's OpenCL/api-docs repository. - + @@ -6273,10 +6273,12 @@ server's OpenCL/api-docs repository. - + + + From a11f0d23e5b831c4122780f3b966643a880cdbe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 24 Sep 2024 18:25:19 +0100 Subject: [PATCH 161/190] Add error when memory objects or semaphores are created with more than one external handle (#1249) Also link the description of external semaphore handles to the spec for clCreateSemaphoreWithPropertiesKHR. Fixes #1246 Change-Id: Ifb4c02795c6d4db8aee9b5f14b10fecd26992fd5 Signed-off-by: Kevin Petit --- api/cl_khr_external_memory.asciidoc | 5 ++++- api/cl_khr_external_semaphore.asciidoc | 5 ++++- api/opencl_runtime_layer.asciidoc | 11 +++++++++++ xml/cl.xml | 4 ++-- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index 3d61b564a..9d28be739 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_memory.txt[] === Other Extension Metadata *Last Modified Date*:: - 2024-03-15 + 2024-09-03 *IP Status*:: No known IP claims. *Contributors*:: @@ -290,3 +290,6 @@ while (true) { (provisional). * Revision 1.0.0, 2024-03-15 ** First non-provisional version. + * Revision 1.0.1, 2024-09-03 + ** Return {CL_INVALID_PROPERTY} when multiple external handles are provided + when creating a memory object. diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index cdd7ea10a..d3b1c98b8 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] === Other Extension Metadata *Last Modified Date*:: - 2024-03-15 + 2024-09-03 *Interactions and External Dependencies*:: * This extension requires OpenCL 1.2. * The {cl_khr_semaphore_EXT} extension is required as it defines semaphore @@ -287,3 +287,6 @@ while (true) { ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} * Revision 1.0.0, 2024-03-15 ** First non-provisional version. + * Revision 1.0.1, 2024-09-03 + ** Return {CL_INVALID_PROPERTY} when multiple external handles are provided + when creating a semaphore. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index a10d5fb92..e91c1e785 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -659,6 +659,7 @@ ifdef::cl_khr_external_memory[] * {CL_INVALID_PROPERTY} ** if _properties_ does not include a supported external memory handle and {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. + ** if _properties_ includes more than one external memory handle. endif::cl_khr_external_memory[] [[memory-flags-table]] @@ -2117,6 +2118,7 @@ ifdef::cl_khr_external_memory[] * {CL_INVALID_PROPERTY} ** if _properties_ does not include a supported external memory handle and {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. + ** if _properties_ includes more than one external memory handle. endif::cl_khr_external_memory[] [[host-ptr-buffer-size-table]] @@ -12879,6 +12881,12 @@ in the _context_. For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be specified in _sema_props_. +ifdef::cl_khr_external_semaphore[] +The properties used to create a semaphore from an external semaphore handle are +<>. +endif::cl_khr_external_semaphore[] + // refError _errcode_ret_ returns an appropriate error code. @@ -12924,6 +12932,8 @@ ifdef::cl_khr_external_semaphore[] well as {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR}. Exporting a semaphore handle from a semaphore that was created by importing an external semaphore handle is not permitted. + * {CL_INVALID_PROPERTY} if _sema_props_ includes more than one external + semaphore handle. endif::cl_khr_external_semaphore[] -- @@ -13006,6 +13016,7 @@ Please refer to handle specific documentation for more details on transference r handle type. +[[external-semaphore-handle-types]] === Descriptions of External Semaphore Handle Types This section describes the external semaphore handle types that are added by diff --git a/xml/cl.xml b/xml/cl.xml index e71aebc93..33f45ce82 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7039,7 +7039,7 @@ server's OpenCL/api-docs repository. - + @@ -7098,7 +7098,7 @@ server's OpenCL/api-docs repository. - + From e3171f275800fe50f5b63571fc9f630ed2226e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 24 Sep 2024 20:09:20 +0100 Subject: [PATCH 162/190] Clarify that we do not remove functionality in minor versions of OpenCL (#1265) Change-Id: Iee7de47816ae8f40f684d713ad53bd886f4fcbd5 Signed-off-by: Kevin Petit --- api/introduction.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/introduction.asciidoc b/api/introduction.asciidoc index 84ba8e1ff..8b134ea26 100644 --- a/api/introduction.asciidoc +++ b/api/introduction.asciidoc @@ -98,8 +98,8 @@ _major.minor_ components of the version number. A difference in the _major_ or _minor_ version number indicates that some amount of new functionality has been added to the specification, and may also include behavior changes and bug fixes. -Functionality may also be deprecated or removed when the _major_ or _minor_ -version changes. +Functionality may also be deprecated when the _major_ or _minor_ version +changes or removed when the _major_ version changes. A difference in the _revision_ number indicates small changes to the specification, typically to fix a bug or to clarify language. From ef69683c6b089a242b69995b1a77fd737f4a482c Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Wed, 2 Oct 2024 22:37:44 +0530 Subject: [PATCH 163/190] Use "associated with" for external memory and semaphroes (#1268) Replace "accessible to" with "associated with" for describing the devices the external memory and semaphore are valid for. Fixes #1240 --- api/opencl_runtime_layer.asciidoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index e91c1e785..138b6ea0b 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -599,7 +599,7 @@ endif::cl_khr_external_memory[] ifdef::cl_khr_external_memory[] If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, the memory object created by {clCreateBufferWithProperties} or -{clCreateImageWithProperties} is by default accessible to all devices in the +{clCreateImageWithProperties} is by default associated with all devices in the _context_. The properties used to create a buffer from an external memory handle are @@ -2027,7 +2027,7 @@ endif::cl_khr_external_memory[] ifdef::cl_khr_external_memory[] If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, the memory object created by {clCreateBufferWithProperties} or -{clCreateImageWithProperties} is by default accessible to all devices in the +{clCreateImageWithProperties} is by default associated with all devices in the _context_. The properties used to create an image from an external memory handle are @@ -12876,7 +12876,7 @@ endif::cl_khr_external_semaphore[] If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of _sema_props_, the semaphore object created by -{clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices +{clCreateSemaphoreWithPropertiesKHR} is by default associated with all devices in the _context_. For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be specified in _sema_props_. From 6dc1b74f787a26f71cc3830fce7974017507748a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 8 Oct 2024 09:50:31 -0700 Subject: [PATCH 164/190] add description of supported printf operand types (#1236) --- env/common_properties.asciidoc | 82 ++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index 2b732c78c..027f29895 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -576,3 +576,85 @@ pointer is appropriately aligned as required by the _Type_ that the pointer points to. Behavior of an unaligned load or store is undefined. + +=== Printf Operands + +For the *printf* instruction in the *OpenCL.std* +<>, the +format specifiers in the _format_ operand determine how to print the +additional argument operands. + +Behavior is undefined unless the source operand type for an additional argument +matches the supported operand types for the format specifier in the following +table. + +[cols="1,1,1,3",options="header"] +|==== +|*Vector Specifier* +|*Length Modifier* +|*Conversion Specifier* +|*Supported Operand Type* + +// Scalar Integers: + +| (none) +| *hh*, *h*, (none) +| *d*, *i*, *o*, *u*, *x*, *X* + | *OpTypeInt* with _Width_ equal to 32 + +| (none) +| *l* +| *d*, *i*, *o*, *u*, *x*, *X* + | *OpTypeInt* with _Width_ equal to 64 + +// Scalar Floats: + +| (none) +| (none) +| *a*, *A*, *e*, *E*, *f*, *F*, *g*, *G* + | *OpTypeFloat* with _Width_ equal to 32 + footnote:[This allows printing 16-bit `half` and 32-bit `float` values on + devices that both do and do not support the *Float64* capability for + double-precision floating-point.], + + *OpTypeFloat* with _Width_ equal to 64 + +// Vector Integers: + +| **v**_n_ +| *hh* +| *d*, *i*, *o*, *u*, *x*, *X* + | *OpTypeVector* with _n_ components of *OpTypeInt* with _Width_ equal to 8 + +| **v**_n_ +| *h* +| *d*, *i*, *o*, *u*, *x*, *X* + | *OpTypeVector* with _n_ components of *OpTypeInt* with _Width_ equal to 16 + +| **v**_n_ +| *hl* +| *d*, *i*, *o*, *u*, *x*, *X* + | *OpTypeVector* with _n_ components of *OpTypeInt* with _Width_ equal to 32 + +| **v**_n_ +| *l* +| *d*, *i*, *o*, *u*, *x*, *X* + | *OpTypeVector* with _n_ components of *OpTypeInt* with _Width_ equal to 64 + +// Vector Floats: + +| **v**_n_ +| *h* +| *a*, *A*, *e*, *E*, *f*, *F*, *g*, *G* + | *OpTypeVector* with _n_ components of *OpTypeFloat* with _Width_ equal to 16 + +| **v**_n_ +| *hl* +| *a*, *A*, *e*, *E*, *f*, *F*, *g*, *G* + | *OpTypeVector* with _n_ components of *OpTypeFloat* with _Width_ equal to 32 + +| **v**_n_ +| *l* +| *a*, *A*, *e*, *E*, *f*, *F*, *g*, *G* + | *OpTypeVector* with _n_ components of *OpTypeFloat* with _Width_ equal to 64 + +|==== From 6f06c55cbf9a5c05ffd37389a592024ddcad2343 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 8 Oct 2024 13:04:58 -0700 Subject: [PATCH 165/190] improve param_value_size consistency (#1254) * fix descriptions of param_value_size for consistency Still need to check and update error conditions for consistency. * improve consistency for error descriptions too * fix CL_INVALID_VALUE error for clGetGLContextInfoKHR --- api/opencl_platform_layer.asciidoc | 70 ++--- api/opencl_runtime_layer.asciidoc | 412 +++++++++++++++-------------- 2 files changed, 255 insertions(+), 227 deletions(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 81f08b585..2a23d2996 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -107,8 +107,9 @@ include::{generated}/api/version-notes/clGetPlatformInfo.asciidoc[] If _param_value_ is `NULL`, it is ignored. * _param_value_size_ specifies the size in bytes of memory pointed to by _param_value_. - This size in bytes must be {geq} size of return type specified in the - <> table. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -311,10 +312,11 @@ Otherwise, it returns one of the following errors footnote:[{fn-error-precedence}]. * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or - if size in bytes specified by _param_value_size_ is < size of return - type as specified in the <> table, and _param_value_ is not a `NULL` value. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- @@ -454,8 +456,9 @@ include::{generated}/api/version-notes/clGetDeviceInfo.asciidoc[] If _param_value_ is `NULL`, it is ignored. * _param_value_size_ specifies the size in bytes of memory pointed to by _param_value_. - This size in bytes must be {geq} size of return type specified in the - <> table. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -2162,12 +2165,11 @@ successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_DEVICE} if _device_ is not a valid device. - * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or - if size in bytes specified by _param_value_size_ is < size of return - type as specified in the <> table - and _param_value_ is not a `NULL` value or if _param_name_ is a value - that is available as an extension and the corresponding extension is not - supported by the device. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -2459,17 +2461,15 @@ include::{generated}/api/version-notes/clGetDeviceIDsFromD3D10KHR.asciidoc[] * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. * _d3d_device_source_ specifies the type of _d3d_object_, and must be one - of the values shown in the <> - table. + of the values shown in the + <> table. * _d3d_object_ specifies the object whose corresponding OpenCL devices are being queried. The type of _d3d_object_ must be as specified in the - <> table. + <> table. * _d3d_device_set_ specifies the set of devices to return, and must be one - of the values shown in the <> table. + of the values shown in the + <> table. * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to _devices_. If _devices_ is not `NULL` then _num_entries_ must be greater than zero. @@ -2566,17 +2566,15 @@ include::{generated}/api/version-notes/clGetDeviceIDsFromD3D11KHR.asciidoc[] * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. * _d3d_device_source_ specifies the type of _d3d_object_, and must be one - of the values shown in the <> - table. + of the values shown in the + <> table. * _d3d_object_ specifies the object whose corresponding OpenCL devices are being queried. The type of _d3d_object_ must be as specified in the - <> table. + <> table. * _d3d_device_set_ specifies the set of devices to return, and must be one - of the values shown in the <> table. + of the values shown in the + <> table. * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to _devices_. If _devices_ is not `NULL` then _num_entries_ must be greater than zero. @@ -3505,15 +3503,16 @@ include::{generated}/api/version-notes/clGetContextInfo.asciidoc[] If _param_value_ is `NULL`, it is ignored. * _param_value_size_ specifies the size in bytes of memory pointed to by _param_value_. - This size must be greater than or equal to the size of return type as - described in the <> table. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. The list of supported _param_name_ values and the information returned in _param_value_ by {clGetContextInfo} is described in the -<> table. +<> table. [[context-info-table]] .List of supported param_names by {clGetContextInfo} @@ -3586,10 +3585,11 @@ successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or - if size in bytes specified by _param_value_size_ is < size of return - type as specified in the <> - table and _param_value_ is not a `NULL` value. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 138b6ea0b..9f5ea299a 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -374,10 +374,10 @@ include::{generated}/api/version-notes/clGetCommandQueueInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. @@ -385,9 +385,9 @@ include::{generated}/api/version-notes/clGetCommandQueueInfo.asciidoc[] The list of supported _param_name_ values and the information returned in _param_value_ by {clGetCommandQueueInfo} is described in the -<> table. +<> table. -[[command-queue-param-table]] +[[command-queue-info-table]] .List of supported param_names by {clGetCommandQueueInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== @@ -463,10 +463,11 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue, or if _command_queue_ is not a valid command-queue for _param_name_. - * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or - if size in bytes specified by _param_value_size_ is < size of return - type as specified in the <> table, and _param_value_ is not a `NULL` value. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -4194,10 +4195,11 @@ include::{generated}/api/version-notes/clGetImageInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -4322,11 +4324,12 @@ endif::cl_khr_d3d11_sharing[] successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and _param_value_ is - not `NULL`. * {CL_INVALID_MEM_OBJECT} if _image_ is a not a valid image object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -5181,29 +5184,15 @@ include::{generated}/api/version-notes/clGetPipeInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -// refError - -{clGetPipeInfo} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_MEM_OBJECT} if _pipe_ is a not a valid pipe object. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and _param_value_ is - not `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - [[pipe-info-table]] .List of supported param_names by {clGetPipeInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] @@ -5238,6 +5227,22 @@ include::{generated}/api/version-notes/CL_PIPE_PROPERTIES.asciidoc[] _param_value_size_ret_ equal to 0, indicating that there are no properties to be returned. |==== + +// refError + +{clGetPipeInfo} returns {CL_SUCCESS} if the function is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_MEM_OBJECT} if _pipe_ is a not a valid pipe object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- @@ -5970,14 +5975,15 @@ include::{generated}/api/version-notes/clGetMemObjectInfo.asciidoc[] * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in _param_value_ by {clGetMemObjectInfo} is described in the - <> table. + <> table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -6160,10 +6166,11 @@ successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_MEM_OBJECT} if _memobj_ is a not a valid memory object. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and _param_value_ is not - `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -6280,15 +6287,16 @@ include::{generated}/api/version-notes/clGetGLTextureInfo.asciidoc[] * _param_value_ is a pointer to memory where the result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory - pointed to by _param_value_. - This size must be >= size of return type as described in the table - below. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data copied to _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[gl-texture-info-queries-table]] +[[gl-texture-info-table]] .OpenGL texture info that may be queried with {clGetGLTextureInfo} [cols=",,",options="header",] |==== @@ -6324,10 +6332,12 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid OpenCL memory object. * {CL_INVALID_GL_OBJECT} if there is no OpenGL texture object associated with _memobj_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is less than the size of the return type - as described in the table above and _param_value_ is not `NULL`, or if - _param_value_ and _param_value_size_ret_ are `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if _param_value_ and _param_value_size_ret_ are `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -8316,10 +8326,11 @@ include::{generated}/api/version-notes/clGetSamplerInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -8385,11 +8396,12 @@ include::{generated}/api/version-notes/CL_SAMPLER_PROPERTIES.asciidoc[] successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. * {CL_INVALID_SAMPLER} if _sampler_ is a not a valid sampler object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -9700,10 +9712,11 @@ include::{generated}/api/version-notes/clGetProgramInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -9881,11 +9894,12 @@ include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT.asc successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_INVALID_PROGRAM_EXECUTABLE} if _param_name_ is {CL_PROGRAM_NUM_KERNELS}, {CL_PROGRAM_KERNEL_NAMES}, {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT}, or @@ -9916,10 +9930,11 @@ include::{generated}/api/version-notes/clGetProgramBuildInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -10032,13 +10047,14 @@ include::{generated}/api/version-notes/CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SI successfully. Otherwise, it returns one of the following errors: + * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated with _program_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -10660,10 +10676,11 @@ include::{generated}/api/version-notes/clGetKernelInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -10726,11 +10743,12 @@ include::{generated}/api/version-notes/CL_KERNEL_ATTRIBUTES.asciidoc[] successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and _param_value_ - is not `NULL`. * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -10759,10 +10777,11 @@ include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -10777,12 +10796,12 @@ include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] include::{generated}/api/version-notes/CL_KERNEL_GLOBAL_WORK_SIZE.asciidoc[] | {size_t_TYPE}[3] | This provides a mechanism for the application to query the maximum - global size that can be used to execute a kernel (i.e. + global size that can be used to execute a kernel (i.e. the _global_work_size_ argument to {clEnqueueNDRangeKernel}) on a custom - device given by device or a built-in kernel on an OpenCL device - given by device. + device given by _device_ or a built-in kernel on an OpenCL device + given by _device_. - If device is not a custom device and kernel is not a built-in + If _device_ is not a custom device and _kernel_ is not a built-in kernel, {clGetKernelWorkGroupInfo} returns the error {CL_INVALID_VALUE}. | {CL_KERNEL_WORK_GROUP_SIZE_anchor} @@ -10852,16 +10871,17 @@ include::{generated}/api/version-notes/CL_KERNEL_PRIVATE_MEM_SIZE.asciidoc[] successfully. Otherwise, it returns one of the following errors: + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated with _kernel_ or if _device_ is `NULL` but there is more than one device associated with _kernel_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table and _param_value_ is not `NULL`. * {CL_INVALID_VALUE} if _param_name_ is {CL_KERNEL_GLOBAL_WORK_SIZE} and _device_ is not a custom device and _kernel_ is not a built-in kernel. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -10897,10 +10917,11 @@ Also see {cl_khr_subgroups_EXT}. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -11005,20 +11026,21 @@ Also see {cl_khr_subgroups_EXT}. successfully. Otherwise, it returns one of the following errors: + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated with _kernel_ or if _device_ is `NULL` but there is more than one device associated with _kernel_. * {CL_INVALID_OPERATION} if _device_ does not support sub-groups. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table and _param_value_ is not `NULL`. * {CL_INVALID_VALUE} if _param_name_ is {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE}, {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE} or {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT} and the size in bytes specified by _input_value_size_ is not valid or if _input_value_ is `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -11044,10 +11066,11 @@ include::{generated}/api/version-notes/clGetKernelArgInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be > size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -11162,14 +11185,19 @@ include::{generated}/api/version-notes/CL_KERNEL_ARG_NAME.asciidoc[] successfully. Otherwise, it returns one of the following errors: + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_ size is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_KERNEL_ARG_INFO_NOT_AVAILABLE} if the argument information is not available for kernel. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- ifdef::cl_khr_suggested_local_work_size[] @@ -11853,10 +11881,11 @@ include::{generated}/api/version-notes/clGetEventInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -12211,13 +12240,14 @@ associated with _event_ will be visible to other enqueued commands. successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if information to query given in _param_name_ cannot be - queried for _event_. * {CL_INVALID_EVENT} if _event_ is a not a valid event object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if the information to query given in _param_name_ cannot be + queried for _event_. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -12959,10 +12989,14 @@ include::{generated}/api/version-notes/clGetSemaphoreHandleForTypeKHR.asciidoc[] * _handle_type_ specifies the type of semaphore handle that should be returned for this exportable _sema_object_, and must be one of the values specified when _sema_object_ was created. - * _handle_size_ specifies the size of memory pointed by _handle_ptr_. * _handle_ptr_ is a pointer to memory where the exported external handle is returned. If _handle_ptr_ is `NULL`, it is ignored. + * _handle_size_ specifies the size in bytes of memory pointed to by + _handle_ptr_. + This size must be greater than or equal to the size of the handle type + specified by _handle_type_. + If _handle_ptr_ is `NULL`, it is ignored. * _handle_size_ret_ returns the actual size in bytes for the external handle. If _handle_size_ret_ is `NULL`, it is ignored. @@ -12973,10 +13007,7 @@ include::{generated}/api/version-notes/clGetSemaphoreHandleForTypeKHR.asciidoc[] handle is queried successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_SEMAPHORE_KHR} - ** if _sema_object_ is not a valid semaphore -// This is redundant with the error below. - ** if _sema_object_ is not exportable + * {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore. * {CL_INVALID_DEVICE} ** if _device_ is not a valid device, or ** if _sema_object_ belongs to a context that is not associated with @@ -12984,14 +13015,8 @@ Otherwise, it returns one of the following errors: ** if _sema_object_ can not be shared with _device_. * {CL_INVALID_VALUE} if the requested external semaphore handle type was not specified when _sema_object_ was created. - * {CL_INVALID_VALUE} if _handle_size_ is less than the size needed to - store the returned handle. -// I don't think this can happen. This would have been checked when the semaphore was created. -// ** if CL_SEMAPHORE_HANDLE_*_KHR is specified as one of the _sema_props_ and -// the property CL_SEMAPHORE_HANDLE_*_KHR does not identify a valid external -// memory handle poperty reported by -// {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} or -// {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} queries. + * {CL_INVALID_VALUE} if the size in bytes specified by _handle_size_ is + less than size of the requested handle and _handle_ptr_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -13496,21 +13521,21 @@ include::{generated}/api/version-notes/clGetSemaphoreInfoKHR.asciidoc[] * _sema_object_ specifies the semaphore object being queried. * _param_name_ is a constant that specifies the semaphore information to query, and must be one of the values shown in the - <> table. + <> table. * _param_value_ is a pointer to memory where the result of the query is - returned as described in the <> table. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ specifies the size in bytes of memory pointed to + * _param_value_size_ specifies the size in bytes of memory pointed to by _param_value_. This size must be greater than or equal to the size of the return type - described in the <> - table. + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[cl_khr_semaphore_info-table]] +[[semaphore-info-table]] .List of parameter names supported by {clGetSemaphoreInfoKHR} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== @@ -13562,14 +13587,12 @@ endif::cl_khr_external_semaphore[] successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_SEMAPHORE_KHR} - ** if _sema_object_ is not a valid semaphore - * {CL_INVALID_VALUE} - ** if _param_name_ is not one of the attribute defined in the - <> table or - ** if _param_value_size_ is less than the size of Return Type of the - corresponding _param_name_ attribute as defined in the - <> table. + * {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -13665,10 +13688,11 @@ include::{generated}/api/version-notes/clGetEventProfilingInfo.asciidoc[] * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -13784,6 +13808,7 @@ endif::cl_khr_command_buffer_multi_device[] successfully and the profiling information has been recorded. Otherwise, it returns one of the following errors: + * {CL_INVALID_EVENT} if _event_ is a not a valid event object. * {CL_PROFILING_INFO_NOT_AVAILABLE} if the {CL_QUEUE_PROFILING_ENABLE} flag is not set for the command-queue, if the execution status of the command identified by _event_ is not {CL_COMPLETE} or if _event_ is a user event @@ -13798,11 +13823,11 @@ ifdef::cl_khr_command_buffer_multi_device[] {CL_PROFILING_INFO_NOT_AVAILABLE} is returned if all the queues passed do not have {CL_QUEUE_PROFILING_ENABLE} set. endif::cl_khr_command_buffer_multi_device[] - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_EVENT} if _event_ is a not a valid event object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table and _param_value_ is not + a `NULL` value. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -16081,14 +16106,14 @@ include::{generated}/api/version-notes/clGetCommandBufferInfoKHR.asciidoc[] * _command_buffer_ specifies the command-buffer being queried. * _param_name_ specifies the information to query. - * _param_value_size_ specifies the size in bytes of memory pointed to by - _param_value_. - This size must be {geq} size of return type as described in the table - below. - If _param_value_ is `NULL`, it is ignored. * _param_value_ is a pointer to a memory location where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -16096,6 +16121,7 @@ include::{generated}/api/version-notes/clGetCommandBufferInfoKHR.asciidoc[] The list of supported _param_name_ values and the information returned in _param_value_ by {clGetCommandBufferInfoKHR} is described in the table below. +[[command-buffer-info-table]] .{clGetCommandBufferInfoKHR} values [cols=",,",options="header",] |==== @@ -16177,9 +16203,11 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. - * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or - if size in bytes specified by _param_value_size_ is less than size of - return type and _param_value_ is not a `NULL` value. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -16198,21 +16226,21 @@ include::{generated}/api/version-notes/clGetMutableCommandInfoKHR.asciidoc[] * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in _param_value_ by {clGetMutableCommandInfoKHR} is described in the - <> - table. - * _param_value_size_ is used to specify the size in bytes of memory - pointed to by _param_value_. - This size must be {geq} size of return type as described in the - <> + <> table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory + pointed to by _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[mutable-command-object-queries]] +[[mutable-command-info-table]] ._Mutable Command Object Queries_ [width="100%",cols="<33%,<17%,<50%",options="header"] |==== @@ -16330,12 +16358,13 @@ include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR.a executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> - table and _param_value_ is not `NULL`. * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable command object. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -16381,7 +16410,8 @@ include::{generated}/api/protos/clGetGLContextInfoKHR.txt[] * _param_value_size_ specifies the size in bytes of memory pointed to by _param_value_. This size must be greater than or equal to the size of the return type - described in the table below. + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. @@ -16438,15 +16468,13 @@ Otherwise, it returns one of the following errors: to a non-default value. ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} are set to non-default values. - ** Any of the devices specified in the argument cannot support - OpenCL objects which share the data store of an OpenGL object. - * {CL_INVALID_VALUE} if an property name other than those specified in - _table 4.5_ is specified in _properties_. - * {CL_INVALID_VALUE} if _param_name_ is not one of the values listed in - the <> table, or if the size in bytes - specified by _param_value_size_ is less than the size of the return type - shown in the table and _param_value_ is not a `NULL` value + * {CL_INVALID_VALUE} if a property name specified in _properties_ is + invalid. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or + if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources From 3090882a57e143c49340c5f67e9515a4cfd446f4 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 15 Oct 2024 07:58:53 -0700 Subject: [PATCH 166/190] clarify how to properly use and modify shared OpenCL objects (#1243) Refer to the OpenCL memory consistency model vs. duplicating requirements. --- api/appendix_a.asciidoc | 47 ++++++++++++----------------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/api/appendix_a.asciidoc b/api/appendix_a.asciidoc index 0078df977..bef67d2e8 100644 --- a/api/appendix_a.asciidoc +++ b/api/appendix_a.asciidoc @@ -8,46 +8,25 @@ == Shared OpenCL Objects This section describes which objects can be shared across multiple -command-queues created within a host process. +command-queues. +The command-queues can be created in one host thread or across multiple host +threads within a host process. -OpenCL memory objects, program objects and kernel objects are created using -a context and can be shared across multiple command-queues created using the -same context. +OpenCL memory objects, program objects, and kernel objects are created using +an OpenCL context and can be shared across multiple command-queues created using +the same context. Event objects can be created when a command is queued to a command-queue. These event objects can be shared across multiple command-queues created using the same context. -The application needs to implement appropriate synchronization across -threads on the host processor to ensure that the changes to the state of a -shared object (such as a command-queue object, memory object, program or -kernel object) happen in the correct order (deemed correct by the -application) when multiple command-queues in multiple threads are making -changes to the state of a shared object. - -A command-queue can cache changes to the state of a memory object on the -device associated with the command-queue. -To synchronize changes to a memory object across command-queues, the -application must do the following: - -In the command-queue that includes commands that modify the state of a -memory object, the application must do the following: - - * Get appropriate event objects for commands that modify the state of the - shared memory object. - * Call the {clFlush} (or {clFinish}) API to issue any outstanding commands - from this command-queue. - -In the command-queue that wants to synchronize to the latest state of a -memory object, commands queued by the application must use the appropriate -event objects that represent commands that modify the state of the shared -memory object as event objects to wait on. -This is to ensure that commands that use this shared memory object complete -in the previous command-queue before the memory objects are used by commands -executing in this command-queue. - -The results of modifying a shared resource in one command-queue while it is -being used by another command-queue are undefined. +The application must implement appropriate synchronization to ensure that the +changes to the state of a shared object (such as a command-queue object, memory +object, program object, or kernel object) happen in the correct order (deemed +correct by the application) when multiple host threads or command-queues change +the state of a shared object. +The OpenCL <> describes +how to correctly order operations that change the state of a shared object. == Multiple Host Threads From 0c276bcdebd168ab50956dce7df8c9a74a2bb91d Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 15 Oct 2024 07:59:05 -0700 Subject: [PATCH 167/190] document valid coordinate types when reading from or writing to images (#1242) --- env/common_properties.asciidoc | 4 +- env/image_addressing_and_filtering.asciidoc | 126 ++++++++++++++++++++ 2 files changed, 128 insertions(+), 2 deletions(-) diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index 027f29895..c40b633b1 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -128,13 +128,13 @@ OpenCL environments: | `0` | A 2D depth image. -// image1d_array_t +// image2d_array_t | *2D* | `0` | `1` | A 2D image array. -// image1d_array_depth_t +// image2d_array_depth_t | *2D* | `1` | `1` diff --git a/env/image_addressing_and_filtering.asciidoc b/env/image_addressing_and_filtering.asciidoc index c6f4f1fd9..84c66fcad 100644 --- a/env/image_addressing_and_filtering.asciidoc +++ b/env/image_addressing_and_filtering.asciidoc @@ -956,6 +956,132 @@ layer = v (since v is already an integer) and the result is undefined if v is not one of the integers 0, 1, ... `h~t~` - 1. +=== Coordinate Format for Reading and Writing Images + +This section describes valid types for the _Coordinate_ operand used by image +read instructions (such as *OpImageRead*) or image write instructions (such as +*OpImageWrite*). +The valid operand types are determined by the *OpImageType* for the image and +whether the image is being read from or written to. + +The following table describes the valid types for the _Coordinate_ operand when +reading from the specified image type. +The integer types for the _Coordinate_ operand are only valid when the image is +read without a sampler (such as *OpImageRead*), or with a sampler using +non-normalized texel coordinates, *Nearest* filtering, and either the *None*, +*ClampToEdge*, or *Clamp* addressing mode. + +._Mapping Image Types to Coordinate Types for Reading_ +[cols="1,1,1,6",options="header"] +|==== +| _Dim_ | _Depth_ | _Arrayed_ +| *Supported Coordinate Type* + +// image1d_t +| *1D* | `0` | `0` +| *OpTypeInt* with _Width_ equal to 32 + + *OpTypeFloat* with _Width_ equal to 32 + +// image1d_array_t +| *1D* | `0` | `1` +| *OpTypeVector* with 2 components of *OpTypeInt* with _Width_ equal to 32 + + *OpTypeVector* with 2 components of *OpTypeFloat* with _Width_ equal to 32 + + The array index is provided by the second component of the _Coordinate_. + +// image2d_t +| *2D* | `0` | `0` +| *OpTypeVector* with 2 components of *OpTypeInt* with _Width_ equal to 32 + + *OpTypeVector* with 2 components of *OpTypeFloat* with _Width_ equal to 32 + +// image2d_depth_t +| *2D* | `1` | `0` +| *OpTypeVector* with 2 components of *OpTypeInt* with _Width_ equal to 32 + + *OpTypeVector* with 2 components of *OpTypeFloat* with _Width_ equal to 32 + +// image2d_array_t +| *2D* | `0` | `1` +| *OpTypeVector* with 4 components of *OpTypeInt* with _Width_ equal to 32 + + *OpTypeVector* with 4 components of *OpTypeFloat* with _Width_ equal to 32 + + The array index is provided by the third component of the _Coordinate_. + The fourth component of the _Coordinate_ is ignored. + +// image2d_array_depth_t +| *2D* | `1` | `1` +| *OpTypeVector* with 4 components of *OpTypeInt* with _Width_ equal to 32 + + *OpTypeVector* with 4 components of *OpTypeFloat* with _Width_ equal to 32 + + The array index is provided by the third component of the _Coordinate_. + The fourth component of the _Coordinate_ is ignored. + +// image3d_t +| *3D* | `0` | `0` +| *OpTypeVector* with 4 components of *OpTypeInt* with _Width_ equal to 32 + + *OpTypeVector* with 4 components of *OpTypeFloat* with _Width_ equal to 32 + + The fourth component of the _Coordinate_ is ignored. + +// image1d_buffer_t +| *Buffer* | `0` | `0` +| *OpTypeInt* with _Width_ equal to 32 + + *OpTypeFloat* with _Width_ equal to 32 + +|==== + +The following table describes the valid types for the _Coordinate_ operand when +writing to the specified image type. + +._Mapping Image Types to Coordinate Types for Writing_ +[cols="1,1,1,6",options="header"] +|==== +| _Dim_ | _Depth_ | _Arrayed_ +| *Supported Coordinate Type* + +// image1d_t +| *1D* | `0` | `0` +| *OpTypeInt* with _Width_ equal to 32 + +// image1d_array_t +| *1D* | `0` | `1` +| *OpTypeVector* with 2 components of *OpTypeInt* with _Width_ equal to 32 + + The array index is provided by the second component of the _Coordinate_. + +// image2d_t +| *2D* | `0` | `0` +| *OpTypeVector* with 2 components of *OpTypeInt* with _Width_ equal to 32 + +// image2d_depth_t +| *2D* | `1` | `0` +| *OpTypeVector* with 2 components of *OpTypeInt* with _Width_ equal to 32 + +// image2d_array_t +| *2D* | `0` | `1` +| *OpTypeVector* with 4 components of *OpTypeInt* with _Width_ equal to 32 + + The array index is provided by the third component of the _Coordinate_. + The fourth component of the _Coordinate_ is ignored. + +// image2d_array_depth_t +| *2D* | `1` | `1` +| *OpTypeVector* with 4 components of *OpTypeInt* with _Width_ equal to 32 + + The array index is provided by the third component of the _Coordinate_. + The fourth component of the _Coordinate_ is ignored. + +// image3d_t +| *3D* | `0` | `0` +| *OpTypeVector* with 4 components of *OpTypeInt* with _Width_ equal to 32 + + The fourth component of the _Coordinate_ is ignored. + +// image1d_buffer_t +| *Buffer* | `0` | `0` +| *OpTypeInt* with _Width_ equal to 32 + +|==== + === Data Format for Reading and Writing Images This section describes how image element data is returned by an From ea217c03518b04094469aefe6df53e940b8db09c Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 15 Oct 2024 08:00:08 -0700 Subject: [PATCH 168/190] add unsafe math optimization error bounds for the non-derived atan2 (#1073) --- OpenCL_C.txt | 1 + cxx/numerical_compliance/relative_error_as_ulps.txt | 1 + env/numerical_compliance.asciidoc | 1 + 3 files changed, 3 insertions(+) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 7a2347faa..e6ddbd907 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -15997,6 +15997,7 @@ requires>> support for OpenCL C 2.0 or newer. | Derived implementations may implement as *atan*(_y_ / _x_) for _x_ > 0, *atan*(_y_ / _x_) + `M_PI_F` for _x_ < 0 and _y_ > 0, and *atan*(_y_ / _x_) - `M_PI_F` for _x_ < 0 and _y_ < 0. + For non-derived implementations, the error is {leq} 8192 ulp. | *atan2pi*(_y_, _x_) | Derived implementations may implement as *atan2*(_y_, _x_) * `M_1_PI_F`. diff --git a/cxx/numerical_compliance/relative_error_as_ulps.txt b/cxx/numerical_compliance/relative_error_as_ulps.txt index cc4ad9de1..28c4aaf83 100644 --- a/cxx/numerical_compliance/relative_error_as_ulps.txt +++ b/cxx/numerical_compliance/relative_error_as_ulps.txt @@ -630,6 +630,7 @@ The reference value used to compute the ULP value of an arithmetic operation is | atan2(y, x) | Implemented as atan(y/x) for x > 0, atan(y/x) + M_PI_F for x < 0 and y > 0 and atan(y/x) - M_PI_F for x < 0 and y < 0. + For non-derived implementations, the error is \<= 8192 ulp. | atanpi(x) | Implemented as atan(x) * M_1_PI_F. diff --git a/env/numerical_compliance.asciidoc b/env/numerical_compliance.asciidoc index bbb895b73..f70b81c1b 100644 --- a/env/numerical_compliance.asciidoc +++ b/env/numerical_compliance.asciidoc @@ -1454,6 +1454,7 @@ profile. | Derived implementations may implement as *atan*(_y_ / _x_) for _x_ > 0, *atan*(_y_ / _x_) + `M_PI_F` for _x_ < 0 and _y_ > 0, and *atan*(_y_ / _x_) - `M_PI_F` for _x_ < 0 and _y_ < 0. + For non-derived implementations, the error is {leq} 8192 ulp. | *OpExtInst* *atan2pi* | Derived implementations may implement as *atan2*(_y_, _x_) * `M_1_PI_F`. From 156d8a8cd2e3191dd381a6398fcc92251b254a37 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 22 Oct 2024 08:58:47 -0700 Subject: [PATCH 169/190] clarify the minimum value for CL_DEVICE_HALF_FP_CONFIG (#1273) The minimum value for CL_DEVICE_HALF_FP_CONFIG applies to devices supporting all OpenCL versions, not just for OpenCL 2.0 or newer devices. --- api/opencl_platform_layer.asciidoc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 2a23d2996..584c4ce4c 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1929,8 +1929,7 @@ include::{generated}/api/version-notes/CL_DEVICE_HALF_FP_CONFIG.asciidoc[] addition, subtraction, multiplication) are implemented in software If half-precision is supported by the device, then the minimum - half-precision floating-point capability for OpenCL 2.0 or newer - devices is: + half-precision floating-point capability is either: {CL_FP_ROUND_TO_ZERO} From dd5571b323a081dff3dead264f4cf77a402fd9c7 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 22 Oct 2024 09:57:49 -0700 Subject: [PATCH 170/190] add additional clarification for num_mip_levels (#1272) --- api/footnotes.asciidoc | 5 +++++ api/opencl_runtime_layer.asciidoc | 15 +++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index b5452357e..4f7de3ec1 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -120,6 +120,11 @@ This feature is provided for identifying memory leaks. \ Implementations are encouraged to favor this option as it makes it more likely that errors will be managed by applications. \ ] +:fn-single-mipmap-level: pass:n[ \ +Therefore, specifying _num_mip_levels_ equal to either `0` or `1` creates an image with a single mipmap level. \ +] + + :fn-srgb-image-requirements: pass:n[ \ Support for reading from the {CL_sRGBA} image channel order is optional for 1D image buffers. \ Support for writing to the {CL_sRGBA} image channel order is optional for all image types. \ diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 9f5ea299a..64369f125 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -2665,14 +2665,17 @@ endif::cl_khr_external_memory[] _image_height_ for a 2D image array or a 3D image, must be {geq} the image row pitch for a 1D image array, and must be a multiple of the image row pitch. - * _num_mip_levels_ must be -ifndef::cl_khr_mipmap_image[0.] + * _num_mip_levels_ must be `0`, indicating that the image has a single +ifndef::cl_khr_mipmap_image[] + mipmap level. +endif::cl_khr_mipmap_image[] ifdef::cl_khr_mipmap_image[] - 0 unless the {cl_khr_mipmap_image_EXT} extension is supported, in which - case it may be a nonzero value specifying the number of mipmap - levels in the image. + mipmap level, unless the {cl_khr_mipmap_image_EXT} extension is supported. + When the {cl_khr_mipmap_image_EXT} extension is supported, _num_mip_levels_ + may additionally specify the total number of mipmap levels in the image, + including the base level footnote:[{fn-single-mipmap-level}]. endif::cl_khr_mipmap_image[] - * _num_samples_ must be 0. + * _num_samples_ must be `0`. * _mem_object_ may refer to a valid buffer or image memory object. `mem_object` can be a buffer memory object if `image_type` is {CL_MEM_OBJECT_IMAGE1D_BUFFER} or From 89d43e9fe307464c1ea417ecaddfe7b873a23fab Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 23 Oct 2024 07:33:27 -0700 Subject: [PATCH 171/190] rephrase and correct the descriptions for clSetKernelExecInfo (#1245) * rephrase and correct the descriptions for clSetKernelExecInfo * further wordsmithing clarify that CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM does not affect kernel arguments * fix typo * simplify CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM --- api/opencl_runtime_layer.asciidoc | 138 +++++++++++------------------- 1 file changed, 48 insertions(+), 90 deletions(-) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 64369f125..1716b9aa8 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -10475,16 +10475,15 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- -[open,refpage='clSetKernelExecInfo',desc='Pass additional information other than argument values to a kernel.',type='protos'] +[open,refpage='clSetKernelExecInfo',desc='Set additional execution information for a kernel.',type='protos'] -- -To pass additional information other than argument values to a kernel, call -the function +To set additional execution information for a kernel, call the function include::{generated}/api/protos/clSetKernelExecInfo.txt[] include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] - * _kernel_ specifies the kernel object being queried. - * _param_name_ specifies the information to be passed to kernel. + * _kernel_ is a valid kernel object. + * _param_name_ specifies the type of information to set. The list of supported _param_name_ types and the corresponding values passed in _param_value_ is described in the <> table. @@ -10502,22 +10501,46 @@ include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_PTRS.asciidoc[] | {void_TYPE}*[] - | SVM pointers must reference locations contained entirely within - buffers that are passed to kernel as arguments, or that are passed - through the execution information. - - Non-argument SVM buffers must be specified by passing pointers to - those buffers via {clSetKernelExecInfo} for coarse-grain and - fine-grain buffer SVM allocations but not for finegrain system SVM - allocations. + | Specifies a set of pointers to SVM allocations that may be accessed + by the kernel in addition to those set directly as kernel arguments. + Each of the pointers can be the pointer returned by {clSVMAlloc} or can + be a pointer to the middle of an SVM allocation. + It is sufficient to specify one pointer for each SVM allocation. + + Behavior is undefined if the kernel accesses a coarse-grain or + fine-grain buffer SVM allocation that is not set as a kernel argument + and is not in the set specified by {CL_KERNEL_EXEC_INFO_SVM_PTRS}. + + The complete set of pointers is specified by each call to + {clSetKernelExecInfo} and replaces any previously specified set of + pointers. + To specify that no SVM allocations will be accessed by a kernel other + than those set as kernel arguments, specify an empty set by passing + _param_value_size_ equal to zero and _param_value_ equal to `NULL`. + + Non-argument pointers to SVM allocations must be specified for + coarse-grain and fine-grain buffer SVM allocations, but not for + fine-grain system SVM allocations. | {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_anchor} include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM.asciidoc[] | {cl_bool_TYPE} - | This flag indicates whether the kernel uses pointers that are fine - grain system SVM allocations. - These fine grain system SVM pointers may be passed as arguments or - defined in SVM buffers that are passed as arguments to _kernel_. + | Specifies whether the kernel may use pointers to system allocations + that are not set directly as kernel arguments on devices that support + fine-grain system SVM allocations. + + When a device supports fine-grain system SVM allocations and + {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is {CL_TRUE}, the kernel may + access system allocations that are not set directly as kernel arguments. + + Otherwise, if a device does not support fine-grain system SVM + allocations or when {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is + {CL_FALSE}, behavior is undefined if the kernel accesses a system + allocation that is not set as a kernel argument. + + If {clSetKernelExecInfo} has not been called with a value for + {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is + {CL_TRUE}. |==== // refError @@ -10528,80 +10551,19 @@ Otherwise, it returns one of the following errors: * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. - * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is - `NULL` or if the size specified by _param_value_size_ is not valid. * {CL_INVALID_OPERATION} if _param_name_ is {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} and _param_value_ is {CL_TRUE} - but no devices in context associated with _kernel_ support fine-grain + and no devices in the context associated with _kernel_ support fine-grain system SVM allocations. + * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is + `NULL` and _param_value_size_ is greater than zero, or if the size specified + by _param_value_size_ is not valid. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[NOTE] -==== -Coarse-grain or fine-grain buffer SVM pointers used by a kernel which -are not passed as a kernel arguments must be specified using -{clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS}. -For example, if SVM buffer A contains a pointer to another SVM buffer B, -and the kernel dereferences that pointer, then a pointer to B must -either be passed as an argument in the call to that kernel or it must be -made available to the kernel using {clSetKernelExecInfo}. -For example, we might pass extra SVM pointers as follows: - -[source,opencl] ----- -clSetKernelExecInfo(kernel, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - num_ptrs * sizeof(void *), - extra_svm_ptr_list); ----- - -Here `num_ptrs` specifies the number of additional SVM pointers while -`extra_svm_ptr_list` specifies a pointer to memory containing those SVM -pointers. - -When calling {clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS} to -specify pointers to non-argument SVM buffers as extra arguments to a kernel, -each of these pointers can be the SVM pointer returned by {clSVMAlloc} or -can be a pointer + offset into the SVM region. -It is sufficient to provide one pointer for each SVM buffer used. - -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is used to indicate whether -SVM pointers used by a kernel will refer to system allocations or not. - -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_FALSE} indicates that the -OpenCL implementation may assume that system pointers are not passed as -kernel arguments and are not stored inside SVM allocations passed as kernel -arguments. - -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_TRUE} indicates that the -OpenCL implementation must assume that system pointers might be passed as -kernel arguments and/or stored inside SVM allocations passed as kernel -arguments. -In this case, if the device to which the kernel is enqueued does not support -system SVM pointers, {clEnqueueNDRangeKernel} and {clEnqueueTask} will return a -{CL_INVALID_OPERATION} error. -If none of the devices in the context associated with kernel support -fine-grain system SVM allocations, {clSetKernelExecInfo} will return a -{CL_INVALID_OPERATION} error. - -If {clSetKernelExecInfo} has not been called with a value for -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is used for -this kernel attribute. -The default value depends on whether the device on which the kernel is -enqueued supports fine-grain system SVM allocations. -If so, the default value used is {CL_TRUE} (system pointers might be passed); -otherwise, the default is {CL_FALSE}. - -A call to {clSetKernelExecInfo} for a given value of _param_name_ -replaces any prior value passed for that value of _param_name_. -Only one _param_value_ will be stored for each value of _param_name_. -==== - - === Copying Kernel Objects NOTE: Copying kernel objects is <> version 2.1. @@ -11488,10 +11450,8 @@ Otherwise, it returns one of the following errors: _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. + and the device does not support SVM, or if system pointers are passed as + arguments to a kernel and the device does not support fine-grain system SVM. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -11583,10 +11543,8 @@ Otherwise, it returns one of the following errors: _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. + and the device does not support SVM, or if system pointers are passed as + arguments to a kernel and the device does not support fine-grain system SVM. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources From 3212aaa18ca33f09cf5e40a401e2948093d65125 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 24 Oct 2024 07:26:25 -0700 Subject: [PATCH 172/190] change log with changes from v3.0.16 (#1274) * initial change log for changes to 3.0.16 * update with last-minute changes * add named NT handles to external memory in addition to semaphores --- api/appendix_e.asciidoc | 44 +++++++++++++++++++++++++++++++++++++++-- c/appendix_a.asciidoc | 5 +++++ env/appendix_a.asciidoc | 6 ++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index ba9534480..d589edf60 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -504,13 +504,13 @@ Changes from *v3.0.8*: ** {cl_khr_integer_dot_product_EXT} (version 2) ** {cl_khr_semaphore_EXT} (provisional) ** {cl_khr_external_semaphore_EXT} (provisional) - ** {cl_khr_external_semaphore_dx_fence_EXT} (provisional) + ** `cl_khr_external_semaphore_dx_fence` (provisional) ** {cl_khr_external_semaphore_opaque_fd_EXT} (provisional) ** {cl_khr_external_semaphore_sync_fd_EXT} (provisional) ** {cl_khr_external_semaphore_win32_EXT} (provisional) ** {cl_khr_external_memory_EXT} (provisional) ** {cl_khr_external_memory_dma_buf_EXT} (provisional) - ** {cl_khr_external_memory_dx_EXT} (provisional) + ** `cl_khr_external_memory_dx` (provisional) ** {cl_khr_external_memory_opaque_fd_EXT} (provisional) ** {cl_khr_external_memory_win32_EXT} (provisional) @@ -622,3 +622,43 @@ Changes from *v3.0.15*: ** {cl_khr_external_memory_win32_EXT} * Added new extension: ** {cl_khr_kernel_clock_EXT} (provisional) + +Changes from *v3.0.16*: + + * Clarified the definition of command prerequisites, see {khronos-opencl-pr}/923[#923]. + * Clarified the behavior of {CL_DEVICE_TYPE_DEFAULT} and {CL_DEVICE_TYPE_ALL} for custom devices, see {khronos-opencl-pr}/1117[#1117]. + * Clarified how {CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES} behaves for devices that do not support {CL_DEVICE_SVM_ATOMICS}, see {khronos-opencl-pr}/1171[#1171]. + * Fixed links to extension API functions, see {khronos-opencl-pr}/1179[#1179]. + * Further clarified an error condition for {clCreateBuffer} with {CL_MEM_COPY_HOST_PTR} and an SVM pointer that is too small, see {khronos-opencl-pr}/1189[#1189]. + * Fixed a minor typo in the {clCreateProgramWithSource} introduction, see {khronos-opencl-pr}/1204[#1204]. + * Clarified how to properly use and modify OpenCL objects across multiple command-queues, see {khronos-opencl-pr}/1243[#1243]. + * Clarified and corrected many parts of {clSetKernelExecInfo}, see {khronos-opencl-pr}/1245[#1245]. + * Improved wording consistency for _param_value_size_ parameters, see {khronos-opencl-pr}/1254[#1254]. + * Clarified the meaning of _num_mip_levels_ in {cl_image_desc_TYPE}, see {khronos-opencl-pr}/1255[#1255] and {khronos-opencl-pr}/1272[#1272]. + * Clarified that functionality will never be removed in minor OpenCL specification revisions, see {khronos-opencl-pr}/1265[#1265]. + * Clarified that the minimum value for {CL_DEVICE_HALF_FP_CONFIG} applies to all OpenCL versions, see {khronos-opencl-pr}/1273[#1273]. + * {cl_khr_command_buffer_EXT} (provisional): + ** Added multi-device wording to {clCommandBarrierWithWaitListKHR}, see {khronos-opencl-pr}/1146[#1146]. + ** Fixed {CL_INVALID_CONTEXT} command-buffer error definitions, see {khronos-opencl-pr}/1149[#1149]. + ** Added a _properties_ parameter to all command-buffer commands to improve extensibility, see {khronos-opencl-pr}/1215[#1215]. + * {cl_khr_command_buffer_mutable_dispatch_EXT} (provisional): + ** Modified the extension to pass update configs as arrays, rather than linked lists, see {khronos-opencl-pr}/1045[#1045]. + * {cl_khr_external_memory_EXT}: + ** Clarified acquire and release behavior, see {khronos-opencl-pr}/1176[#1176]. + ** Added a mechanism to import NT handles by name, see {khronos-opencl-pr}/1177[#1177]. + ** Documented which error condition should be returned when attempting to create a memory object with more than one external handle, see {khronos-opencl-pr}/1249[#1249]. + * {cl_khr_external_semaphore_EXT}: + ** Added a mechanism to import NT handles by name, see {khronos-opencl-pr}/1177[#1177]. + ** Fixed a typo in the description of {clGetSemaphoreHandleForTypeKHR}, see {khronos-opencl-pr}/1220[#1220]. + ** Clarified that there are no implicit dependencies when waiting on or signaling semaphores using out-of-order queues, see {khronos-opencl-pr}/1231[#1231]. + ** Documented which error condition should be returned when attempting to create a semaphore with more than one external handle, see {khronos-opencl-pr}/1249[#1249]. + ** Unified the {CL_INVALID_COMMAND_QUEUE} behavior for semaphore signals and waits, see {khronos-opencl-pr}/1256[#1256]. + ** Clarified that {clGetSemaphoreHandleForTypeKHR} is part of {cl_khr_external_semaphore_EXT} and not {cl_khr_external_semaphore_sync_fd_EXT}, see {khronos-opencl-pr}/1257[#1257]. + * {cl_khr_external_semaphore_sync_fd_EXT}: + ** Fixed typos in the description of {clReImportSemaphoreSyncFdKHR}, see {khronos-opencl-pr}/1208[#1208]. + ** Clarified which re-import properties are accepted by {clReImportSemaphoreSyncFdKHR}, see {khronos-opencl-pr}/1219[#1219]. + * {cl_khr_semaphore_EXT}: + ** Clarified external semaphore behavior, removing references to permanence, see {khronos-opencl-pr}/938[#938]. + * Removed provisional extensions due to lack of implementations and tests, see {khronos-opencl-pr}/1160[#1160]. + ** `cl_khr_external_semaphore_dx_fence` (provisional) + ** `cl_khr_external_memory_dx` (provisional) diff --git a/c/appendix_a.asciidoc b/c/appendix_a.asciidoc index 2492e585d..6838a5543 100644 --- a/c/appendix_a.asciidoc +++ b/c/appendix_a.asciidoc @@ -52,3 +52,8 @@ Changes from *v3.0.15*: * Removed an incorrect statement about geometric functions operating component-wise, see {khronos-opencl-pr}/1137[#1137]. * Added new extension: ** {cl_khr_kernel_clock_EXT} (provisional) + +Changes from *v3.0.16*: + + * Documented the error bounds for a non-derived `atan2` implementation with unsafe math optimizations, see {khronos-opencl-pr}/1073[#1073]. + * Fixed a typo affecting `EPSILON` macros, see {khronos-opencl-pr}/1225[#1225]. diff --git a/env/appendix_a.asciidoc b/env/appendix_a.asciidoc index e385a0e45..e2a35212d 100644 --- a/env/appendix_a.asciidoc +++ b/env/appendix_a.asciidoc @@ -50,3 +50,9 @@ Changes from *v3.0.15*: * Adds the numerical value of the image channel order and image channel data type to several tables, see {khronos-opencl-pr}/1050[#1050]. * Added new extension: ** {cl_khr_kernel_clock_EXT} (provisional) + +Changes from *v3.0.16*: + + * Documented the error bounds for a non-derived `atan2` implementation with unsafe math optimizations, see {khronos-opencl-pr}/1073[#1073]. + * Documented supported `printf` operand types, see {khronos-opencl-pr}/1236[#1236]. + * Documented valid coordinate types when reading from or writing to images, see {khronos-opencl-pr}/1242[#1242]. From ff00e42272bb247f6cb931f2b9231f151e49b638 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 29 Oct 2024 09:57:27 -0700 Subject: [PATCH 173/190] clarify cl_mem_flags to not affect copies (#1230) --- api/opencl_runtime_layer.asciidoc | 65 +++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 1716b9aa8..019288efc 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1256,6 +1256,11 @@ include::{generated}/api/version-notes/clEnqueueCopyBuffer.asciidoc[] If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. +The usage information which indicates whether the memory object can be read +or written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} +argument value specified when _src_buffer_ or _dst_buffer is created is ignored by +{clEnqueueCopyBuffer}. + // refError {clEnqueueCopyBuffer} returns {CL_SUCCESS} if the function is executed @@ -1383,6 +1388,11 @@ must equal _dst_row_pitch_ and _src_slice_pitch_ must equal _dst_slice_pitch_. ==== +The usage information which indicates whether the memory object can be read +or written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} +argument value specified when _src_buffer_ or _dst_buffer_ is created is ignored by +{clEnqueueCopyBufferRect}. + // refError {clEnqueueCopyBufferRect} returns {CL_SUCCESS} if the function is executed @@ -3553,6 +3563,11 @@ memory objects for {clEnqueueCopyImage} must have the exact same image format (i.e. the {cl_image_format_TYPE} descriptor specified when _src_image_ and _dst_image_ are created must match). +The usage information which indicates whether the memory object can be read +or written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} +argument value specified when _src_image_ or _dst_image_ is created is ignored by +{clEnqueueCopyImage}. + // refError {clEnqueueCopyImage} returns {CL_SUCCESS} if the function is executed @@ -3800,6 +3815,11 @@ endif::cl_khr_mipmap_image[] If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. +The usage information which indicates whether the memory object can be read +or written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} +argument value specified when _src_image_ or _dst_buffer_ is created is ignored by +{clEnqueueCopyImageToBuffer}. + // refError {clEnqueueCopyImageToBuffer} returns {CL_SUCCESS} if the function is executed @@ -3927,6 +3947,11 @@ image or 1D image buffer object and is computed as _width_ {times} _arraysize_ {times} _bytes/image element_ if _dst_image_ is a 1D image array object. +The usage information which indicates whether the memory object can be read +or written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} +argument value specified when _src_buffer_ or _dst_image_ is created is ignored by +{clEnqueueCopyBufferToImage}. + // refError {clEnqueueCopyBufferToImage} returns {CL_SUCCESS} if the function is executed @@ -14529,6 +14554,14 @@ after the function returns. * _mutable_handle_ returns a handle to the command. This parameter is unused, and **must** be `NULL`. +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _src_buffer_ or _dst_buffer_ is created is ignored by +{clCommandCopyBufferKHR}. +==== + // refError {clCommandCopyBufferKHR} returns {CL_SUCCESS} if the function is executed @@ -14643,6 +14676,14 @@ After copying each 2D rectangle, the source and destination offsets are incremented by their respective source and destination slice pitches. ==== +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _src_buffer_ or _dst_buffer_ is created is ignored by +{clCommandCopyBufferRectKHR}. +==== + // refError {clCommandCopyBufferRectKHR} returns {CL_SUCCESS} if the function is @@ -14741,6 +14782,14 @@ after the function returns. * _mutable_handle_ returns a handle to the command. This parameter is unused, and **must** be `NULL`. +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _src_buffer_ or _dst_image_ is created is ignored by +{clCommandCopyBufferToImageKHR}. +==== + // refError {clCommandCopyBufferToImageKHR} returns {CL_SUCCESS} if the function is executed @@ -14846,6 +14895,14 @@ format, i.e. the {cl_image_format_TYPE} descriptor specified when _src_image_ and _dst_image_ are created must match. ==== +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _src_image_ or _dst_image_ is created is ignored by +{clCommandCopyImageKHR}. +==== + // refError {clCommandCopyImageKHR} returns {CL_SUCCESS} if the function is executed @@ -14943,6 +15000,14 @@ after the function returns. * _mutable_handle_ returns a handle to the command. This parameter is unused, and **must** be `NULL`. +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _src_image_ or _dst_buffer_ is created is ignored by +{clCommandCopyImageToBufferKHR}. +==== + // refError {clCommandCopyImageToBufferKHR} returns {CL_SUCCESS} if the function is From 0f29e0274ba30eb36b77bb65ef71190f31de5872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 12 Nov 2024 18:36:20 +0000 Subject: [PATCH 174/190] Reserve cl_mem_flags bit for cl_ext_immutable_memory_objects (#1285) Relates to #1280 Change-Id: I66b553b4708b913a219d803e45ab7dd6cfb8fe93 Signed-off-by: Kevin Petit --- xml/cl.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xml/cl.xml b/xml/cl.xml index 33f45ce82..561b74bf7 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -882,7 +882,7 @@ server's OpenCL/api-docs repository. - + From 506eb11195fbb5879fe712074cc297ee54ac2908 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 12 Dec 2024 16:11:35 +0000 Subject: [PATCH 175/190] cl_khr_command_buffer sync-point capacity (#1286) * cl_khr_command_buffer sync-point capacity Document the behaviour when command-buffer command capacity is reached, and track under "Issues" the possible future use-cases for being able to optimize based on the capacity of a command-buffer. Closes https://github.com/KhronosGroup/OpenCL-Docs/issues/844 * Refine out-of-order command-buffer enqueue wording * Update api/cl_khr_command_buffer.asciidoc Co-authored-by: Ben Ashbaugh --------- Co-authored-by: Ben Ashbaugh --- api/cl_khr_command_buffer.asciidoc | 46 +++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index a97e067d6..9f3cd8868 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -43,16 +43,6 @@ Command-buffers enable a reduction in overhead when enqueuing the same workload multiple times. By separating the command-queue setup from dispatch, the ability to replay a set of previously created commands is introduced. -Device-side _cl_sync_point_khr_ synchronization-points can be used within -command-buffers to define command dependencies. This allows the commands of a -command-buffer to execute out-of-order on a single <> -command-queue. The command-buffer itself has no inherent in-order/out-of-order -property, this ordering is inferred from the command-queue used on command -recording. Out-of-order enqueues without event dependencies of both regular -commands, such as {clEnqueueFillBuffer}, and command-buffers are allowed to -execute concurrently, and it is up to the user to express any dependencies using -events. - The command-queues a command-buffer will be executed on can be set on replay via parameters to {clEnqueueCommandBufferKHR}, provided they are <> with the command-queues used on command-buffer @@ -110,6 +100,29 @@ following reasons: other extensions layered on top to take advantage of them to provide additional mutable functionality. +==== Command Synchronization + +Device-side {cl_sync_point_khr_TYPE} synchronization-points can be used within +command-buffers to define command dependencies. This allows the commands of a +command-buffer to execute out-of-order on a single <> +command-queue. The command-buffer itself has no inherent in-order/out-of-order +property, this ordering is inferred from the command-queue used on command +recording. {clEnqueueCommandBufferKHR} submissions to an out-of-order queue +have the same execution semantics are other operations enqueued to an +out-of-order queue, such as {clEnqueueFillBuffer}, where execution between +enqueued operations may happen concurrently unless dependencies between the +operations are expressed with events. + +The {cl_sync_point_khr_TYPE} type is defined as a `cl_uint`, giving a hard +upper limit on the number of commands a command-buffer can hold as +{CL_UINT_MAX}, at which point {CL_OUT_OF_RESOURCES} will be returned. However, +it is likely an implementation will reach capacity before this threshold is +hit. + +There are no gurantees made around the values of sync-points returned from +adding commands to a command-buffer. Any semantics that a could be inferred +from the sync-point values returned is implementation defined. + ==== Simultaneous Use The optional simultaneous use capability was added to the extension so that @@ -420,6 +433,19 @@ features: -- *UNRESOLVED* -- +. Give users more control over command-buffer command capacity via some or all + of the following mechanisms. + ** Provide a way for a user to query a command-buffer for the maximum number + of commands it can hold. + ** Guarantee a minimum command capacity that an implementation must support. + ** Provide a mechanism for users to reserve command-buffer capacity on + command-buffer creation. + ++ +-- +*RESOLVED* - Mechanisms to achieve this could be provided as a layered extension. +-- + === Version History From bf24e316dcbb90394ed680feef0a3c3d1c724ef7 Mon Sep 17 00:00:00 2001 From: Chuang-Yu Cheng Date: Wed, 18 Dec 2024 01:21:11 +0900 Subject: [PATCH 176/190] Document img 1x2_2x2 matmul functions. (#1283) * document img 1x2_2x2 matmul functions. * Correct the description of matmul with saturation. * Address review comments. --- extensions/cl_img_matrix_multiply.asciidoc | 252 +++++++++++++++++++-- 1 file changed, 227 insertions(+), 25 deletions(-) diff --git a/extensions/cl_img_matrix_multiply.asciidoc b/extensions/cl_img_matrix_multiply.asciidoc index 068830280..573d4e4fe 100644 --- a/extensions/cl_img_matrix_multiply.asciidoc +++ b/extensions/cl_img_matrix_multiply.asciidoc @@ -20,7 +20,9 @@ Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) CY Cheng, Imagination Technologies. + Joe Molleson, Imagination Technologies. + -Tomasz Platek, Imagination Technologies. +Tomasz Platek, Imagination Technologies. + +Szabolcs Csefalvay, Imagination Technologies. + +David Welch, Imagination Technologies. == Notice @@ -33,7 +35,7 @@ Final Draft == Version Built On: {docdate} + -Version: 1.0.0 +Version: 1.1.0 == Dependencies @@ -50,6 +52,7 @@ This extension adds built-in functions that exercise hardware capabilities of Im [source,c] ---- __opencl_img_dot_interleaved +__opencl_img_matmul_1x2_2x2 __opencl_img_matmul_2x4_4x4 ---- @@ -69,7 +72,24 @@ float2 img_dot_interleaved_acc(float4 a,__local float8 * b, float2 acc); float2 img_dot_interleaved_acc(float8 a,__local float16 * b, float2 acc); ---- -Perform the matrix multiplication operation: +Perform the matrix multiplication of a 1x2 matrix `a` with a 2x2 matrix `b`, adding the result to a 1x2 matrix `c`: + +[source,c] +---- +float2 img_matmul_float_acc_1x2_2x2(float2 a, __local float4 * b, float2 acc) +float2 img_matmul_half2_acc_1x2_2x2f(half4 a, __local half8 * b, float2 acc) +half2 img_matmul_half2_acc_1x2_2x2h(half4 a, __local half8 * b, half2 acc) +uint2 img_matmul_uchar4_acc_1x2_2x2(uchar8 a, __local uchar16 * b, uint2 acc); +int2 img_matmul_char4_acc_1x2_2x2(char8 a, __local char16 * b, int2 acc); +int2 img_matmul_char4_acc_1x2_2x2(uchar8 a, __local char16 * b, int2 acc); +int2 img_matmul_char4_acc_1x2_2x2(char8 a, __local uchar16 * b, int2 acc); +uint2 img_matmul_uchar4_acc_1x2_2x2_sat(uchar8 a, __local uchar16 * b, uint2 acc); +int2 img_matmul_char4_acc_1x2_2x2_sat(char8 a, __local char16 * b, int2 acc); +int2 img_matmul_char4_acc_1x2_2x2_sat(uchar8 a, __local char16 * b, int2 acc); +int2 img_matmul_char4_acc_1x2_2x2_sat(char8 a, __local uchar16 * b, int2 acc); +---- + +Perform the matrix multiplication of a 2x4 matrix `a` with a 4x4 matrix `b`, adding the result to a 2x4 matrix `acc`: [source,c] ---- @@ -95,12 +115,12 @@ half8 img_matmul_acc_2x4_4x4transposedh(half4 a0, half4 a1,__local half16 * b, h float2 *img_dot_interleaved*(float2 _a_,pass:[__local] float4 * _b_) + float2 *img_dot_interleaved*(float4 _a_,pass:[__local] float8 * _b_) + float2 *img_dot_interleaved*(float8 _a_,pass:[__local] float16 * _b_) - a| `img_dot_interleaved` performs the dual dot product operation. + a| `img_dot_interleaved` performs the dual dot product operation. The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. - + For example, given: - + ---- a = [a0 a1] b = [b0 b1 b2 b3] @@ -111,6 +131,9 @@ the output vector is: ---- [res0 res1] = [a0 a1] x [b0 b1] [b2 b3] + +res0 = a0b0 + a1b2 +res1 = a0b1 + a1b3 ---- Requires that the `__opencl_img_dot_interleaved` feature macro is defined. @@ -118,7 +141,7 @@ Requires that the `__opencl_img_dot_interleaved` feature macro is defined. float2 *img_dot_interleaved_acc*(float2 _a_,pass:[__local] float4 * _b_, float2 _acc_) + float2 *img_dot_interleaved_acc*(float4 _a_,pass:[__local] float8 * _b_, float2 _acc_) + float2 *img_dot_interleaved_acc*(float8 _a_,pass:[__local] float16 * _b_, float2 _acc_) - a| `img_dot_interleaved_acc` performs the dual dot product operation with the accumulator `acc`. + a| `img_dot_interleaved_acc` performs the dual dot product operation with the accumulator `acc`. The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. @@ -135,9 +158,129 @@ the output vector is: ---- [res0 res1] = [a0 a1] x [b0 b1] + [acc0 acc1] [b2 b3] + +res0 = a0b0 + a1b2 + acc0 +res1 = a0b1 + a1b3 + acc1 ---- Requires that the `__opencl_img_dot_interleaved` feature macro is defined. +| float2 *img_matmul_float_acc_1x2_2x2*(float2 _a_, pass:[__local] float4 * _b_, float2 _acc_) + a| `img_matmul_float_acc_1x2_2x2` performs the dual dot product operation with the accumulator `acc` + The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. + +For example, given: +---- +a = [a0 a1] +b = [b0 b1 b2 b3] +acc = [acc0 acc1] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1] x [b0 b1] + [acc0 acc1] + [b2 b3] + +res0 = a0b0 + a1b2 + acc0 +res1 = a0b1 + a1b3 + acc1 +---- + +Requires that the `__opencl_img_matmul_1x2_2x2` feature macro is defined. +| float2 *img_matmul_half2_acc_1x2_2x2f*(half4 _a_, pass:[__local] half8 * _b_, float2 _acc_) + + half2 *img_matmul_half2_acc_1x2_2x2h*(half4 _a_, pass:[__local] half8 * _b_, half2 _acc_) + a| `img_matmul_half2_acc_1x2_2x2f` and `img_matmul_half2_acc_1x2_2x2h` perform the dual dot product operation with the accumulator `acc` + The input vectors of the first dot product are `a` and the vector containing the even-indexed *32-bit elements* of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed *32-bit elements* of `b`. The result is stored into the second element of the output vector. + +For example, given: +---- +a = [a0 a1, a2 a3] +b = [b0 b1, b2 b3] + [b4 b5, b6 b7] +acc = [acc0 acc1] + +a's memory layout = LSB [a0 a1 a2 a3] +b's memory layout = LSB [b0 b1 b2 b3 b4 b5 b6 b7] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1, a2 a3] x [b0 b1, b2 b3] + [acc0 acc1] + [b4 b5, b6 b7] + +res0 = (a0b0 + a1b1) + (a2b4 + a3b5) + acc0 +res1 = (a0b2 + a1b3) + (a2b6 + a3b7) + acc1 + +Note: The parentheses are only used to help the reader see that the dot computation is a [1x2] x [2x2] with half2 elements; they do not indicate the accumulation order. +---- + +Requires that the `__opencl_img_matmul_1x2_2x2` feature macro is defined. +| uint2 *img_matmul_uchar4_acc_1x2_2x2*(uchar8 _a_, pass:[__local] uchar16 * _b_, uint2 _acc_); + int2 *img_matmul_char4_acc_1x2_2x2*(char8 _a_, pass:[__local] char16 * _b_, int2 _acc_); + int2 *img_matmul_char4_acc_1x2_2x2*(uchar8 _a_, pass:[__local] char16 * _b_, int2 _acc_); + int2 *img_matmul_char4_acc_1x2_2x2*(char8 _a_, pass:[__local] uchar16 * _b_, int2 _acc_); + a| `img_matmul_uchar4_acc_1x2_2x2` and `img_matmul_char4_acc_1x2_2x2` perform the dual dot product operation with the accumulator `acc` + The input vectors of the first dot product are `a` and the vector containing the even-indexed *32-bit elements* of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed *32-bit elements* of `b`. The result is stored into the second element of the output vector. + +For example, given: +---- +a = [a0 a1 a2 a3, a4 a5 a6 a7] +b = [b0 b1 b2 b3, b4 b5 b6 b7] + [b8 b9 b10 b11, b12 b13 b14 b15] +acc = [acc0 acc1] + +a's memory layout = LSB [a0 a1 a2 a3] +b's memory layout = LSB [b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b12 b13 b14 b15] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1 a2 a3, a4 a5 a6 a7] x [b0 b1 b2 b3, b4 b5 b6 b7] + [acc0 acc1] + [b8 b9 b10 b11, b12 b13 b14 b15] +res0 = (a0b0 + a1b1 + a2b2 + a3b3) + ( a4b8 + a5b9 + a6b10 + a7b11) + acc0 +res1 = (a0b4 + a1b5 + a2b6 + a3b7) + (a4b12 + a5b13 + a6b14 + a7b15) + acc1 + +Note: The parentheses are only used to help the reader see that the dot computation is a [1x2] x [2x2] with char4/uchar4 elements; they do not indicate the accumulation order. +---- + +Requires that the `__opencl_img_matmul_1x2_2x2` feature macro is defined. +| uint2 *img_matmul_uchar4_acc_1x2_2x2_sat*(uchar8 _a_, pass:[__local] uchar16 * _b_, uint2 _acc_); + int2 *img_matmul_char4_acc_1x2_2x2_sat*(char8 _a_, pass:[__local] char16 * _b_, int2 _acc_); + int2 *img_matmul_char4_acc_1x2_2x2_sat*(uchar8 _a_, pass:[__local] char16 * _b_, int2 _acc_); + int2 *img_matmul_char4_acc_1x2_2x2_sat*(char8 _a_, pass:[__local] uchar16 * _b_, int2 _acc_); + a| `img_matmul_uchar4_acc_1x2_2x2_sat` and `img_matmul_char4_acc_1x2_2x2_sat` perform the dual dot product operation, add the accumulator `acc`, and saturate the result. + The input vectors of the first dot product are `a` and the vector containing the even-indexed *32-bit elements* of `b`. The result is saturated and stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed *32-bit elements* of `b`. The result is saturated and stored into the second element of the output vector. + +For example, given: +---- +a = [a0 a1 a2 a3, a4 a5 a6 a7] +b = [b0 b1 b2 b3, b4 b5 b6 b7] + [b8 b9 b10 b11, b12 b13 b14 b15] +acc = [acc0 acc1] + +a's memory layout = LSB [a0 a1 a2 a3] +b's memory layout = LSB [b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b12 b13 b14 b15] +---- + +the output vector is: +---- +[res0 res1] = [a0 a1 a2 a3, a4 a5 a6 a7] x [b0 b1 b2 b3, b4 b5 b6 b7] + [acc0 acc1] + [b8 b9 b10 b11, b12 b13 b14 b15] +product0 = (a0b0 + a1b1 + a2b2 + a3b3) + ( a4b8 + a5b9 + a6b10 + a7b11) +res0 = add_sat(product0, acc0) + +product1 = (a0b4 + a1b5 + a2b6 + a3b7) + (a4b12 + a5b13 + a6b14 + a7b15) +res1 = add_sat(product1, acc1) + +Note: The parentheses are only used to help the reader see that the dot computation is a [1x2] x [2x2] with char4/uchar4 elements; they do not indicate the accumulation order. +---- + +Requires that the `__opencl_img_matmul_1x2_2x2` feature macro is defined. | float8 *img_matmul_2x4_4x4f*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + half8 *img_matmul_2x4_4x4h*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) a| `img_matmul_2x4_4x4f` and `img_matmul_2x4_4x4h` perform the matrix multiplication operation of matrices A and B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A. @@ -158,7 +301,7 @@ the output vector is: ---- [res0 res1 res2 res3] = A x B -[res4 res5 res6 res7] +[res4 res5 res6 res7] ---- Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. @@ -184,7 +327,7 @@ the output vector is: ---- [res0 res1 res2 res3] = A x B + C -[res4 res5 res6 res7] +[res4 res5 res6 res7] ---- Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. @@ -209,7 +352,7 @@ the output vector is: ---- [res0 res1 res2 res3] = A x BT -[res4 res5 res6 res7] +[res4 res5 res6 res7] ---- Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. @@ -228,14 +371,14 @@ BT = [b0 b4 b8 b12] [b2 b6 b10 b14] [b3 b7 b11 b15] C = [acc00 acc01 acc02 acc03] - [acc10 acc11 acc12 acc13] + [acc10 acc11 acc12 acc13] ---- the output vector is: ---- [res0 res1 res2 res3] = A x BT + C -[res4 res5 res6 res7] +[res4 res5 res6 res7] ---- Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. @@ -245,7 +388,7 @@ Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. == Coding Sample This coding sample shows how to initialize the input vectors, use the *img_dot_interleaved_acc* function, and access the output vector: -[source] +[source,c] ---- float4 a = (float4) (1.0f, 1.0f, 1.0f, 1.0f); __local float8 b; @@ -257,14 +400,80 @@ float2 res = img_dot_interleaved_acc(a, &b, acc); printf("res = [ %f %f ]\n", res.s0, res.s1); ---- -Executing a work-item containing this code gives the following result: -[source] +This coding sample shows how to use the *img_matmul_float_acc_1x2_2x2* function: +[source,c] +---- +__attribute__((reqd_work_group_size(128, 1, 1))) +void __kernel f32Matmul(__global float2 *a, __global float4 *b, __global float2 *acc, int step) { + __local float4 cachedB[..]; + int id = ..; + // load data from the matrix b which is shared in a workgroup. + // We can let each thread copies the data or use async_work_group_copy: + // cachedB[id] = ..; + // + // event_t e = async_work_group_copy(cachedB, &b[group_id], .. /* num elements */, 0 /* event */); + // wait_group_events(1, &e); + + float2 results = acc[id]; + for (int i = 0; i < step; ++i) + results = img_matmul_float_acc_1x2_2x2(a[id + i], &cachedB[i], results); + + acc[id] = results; +} + +// Note: It is preferable to use a workgroup size of 128 for optimal performance. +---- + +This coding sample shows how to use the *img_matmul_half2_acc_1x2_2x2h* function: +[source,c] ---- -res = [ 1.000000 5.000000 ] +__attribute__((reqd_work_group_size(128, 1, 1))) +void __kernel f16Matmul(__global half4 *a, __global half8 *b, __global half2 *acc, int step) { + __local half8 cachedB[..]; + int id = ..; + // load data from the matrix b which is shared in a workgroup. + // We can let each thread copies the data or use async_work_group_copy: + // cachedB[id] = ..; + // + // event_t e = async_work_group_copy(cachedB, &b[group_id], .. /* num elements */, 0 /* event */); + // wait_group_events(1, &e); + + half2 results = acc[id]; + for (int i = 0; i < step; ++i) + results = img_matmul_half2_acc_1x2_2x2h(a[id + i], &cachedB[i], results); + + acc[id] = results; +} + +// Note: It is preferable to use a workgroup size of 128 for optimal performance. +---- + +This coding sample shows how to use the *img_matmul_char4_acc_1x2_2x2_sat* function: +[source,c] +---- +__attribute__((reqd_work_group_size(128, 1, 1))) +void __kernel char4Matmul(__global char8 *a, __global char16 *b, __global int2 *acc, int step) { + __local char16 cachedB[..]; + int id = ..; + // load data from the matrix b which is shared in a workgroup. + // We can let each thread copies the data or use async_work_group_copy: + // cachedB[id] = ..; + // + // event_t e = async_work_group_copy(cachedB, &b[group_id], .. /* num elements */, 0 /* event */); + // wait_group_events(1, &e); + + int2 results = acc[id]; + for (int i = 0; i < step; ++i) + results = img_matmul_char4_acc_1x2_2x2_sat(a[id + i], &cachedB[i], results); + + acc[id] = results; +} + +// Note: It is preferable to use a workgroup size of 128 for optimal performance. ---- This coding sample shows how to initialize the input vectors, use the *img_matmul_acc_2x4_4x4f* function, and access the output vector: -[source] +[source,c] ---- half4 a0 = (half4) (1.0h, 0.0h, 0.0h, 0.0h); half4 a1 = (half4) (0.0h, 1.0h, 0.0h, 0.0h); @@ -284,13 +493,6 @@ printf("res = [ %f %f %f %f ]\n", res.s0, res.s1, res.s2, res.s3); printf(" [ %f %f %f %f ]\n", res.s4, res.s5, res.s6, res.s7); ---- -Executing a work-item containing this code gives the following result: -[source] ----- -res = [ 1.000000 2.000000 3.000000 4.000000 ] - [ 5.000000 6.000000 7.000000 8.000000 ] ----- - == Version History [cols="5,15,15,70"] @@ -299,5 +501,5 @@ res = [ 1.000000 2.000000 3.000000 4.000000 ] |==== | Version | Date | Author | Changes | 1.0.0 | 2024-06-07 | Tomasz Platek | *Initial revision* +| 1.1.0 | 2024-11-11 | CY Cheng | Document 1x2_2x2 matrix functions |==== - From 218ed051f1ee0e6b97554796948ef946423b1713 Mon Sep 17 00:00:00 2001 From: ssugumar-mstk Date: Mon, 6 Jan 2025 23:23:06 +0400 Subject: [PATCH 177/190] Update cl.xml (#1289) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reserving OpenCL Enumerant range for vendor Mastiṣka AI --- xml/cl.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xml/cl.xml b/xml/cl.xml index 561b74bf7..309cff77d 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -2306,8 +2306,12 @@ server's OpenCL/api-docs repository. - - + + + + + + From 20c705129520b1a6a17eb4ac14afeee8f8c2ef34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Thu, 9 Jan 2025 19:41:59 +0000 Subject: [PATCH 178/190] Integrate cl_ext_image_requirements_info into unified specification (#1295) * Integrate cl_ext_image_requirements_info into unified specification Signed-off-by: Kevin Petit Change-Id: Ia249f78aa521a8d202dfafbb736c9887574e56f9 * Update api/opencl_runtime_layer.asciidoc Co-authored-by: Ben Ashbaugh * Update api/opencl_runtime_layer.asciidoc Co-authored-by: Ben Ashbaugh --------- Signed-off-by: Kevin Petit Co-authored-by: Ben Ashbaugh --- api/cl_ext_image_requirements_info.asciidoc | 93 ++++- api/opencl_platform_layer.asciidoc | 21 + api/opencl_runtime_layer.asciidoc | 189 +++++++++ .../cl_ext_image_requirements_info.asciidoc | 388 ------------------ extensions/extensions.txt | 2 - 5 files changed, 299 insertions(+), 394 deletions(-) delete mode 100644 extensions/cl_ext_image_requirements_info.asciidoc diff --git a/api/cl_ext_image_requirements_info.asciidoc b/api/cl_ext_image_requirements_info.asciidoc index 6de780853..01577f1e0 100644 --- a/api/cl_ext_image_requirements_info.asciidoc +++ b/api/cl_ext_image_requirements_info.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_image_requirements_info.txt[] @@ -6,14 +6,99 @@ include::{generated}/meta/{refprefix}cl_ext_image_requirements_info.txt[] === Other Extension Metadata *Last Modified Date*:: - 2022-01-18 + 2025-01-06 *IP Status*:: No known IP claims. +*Interactions and External Dependencies*:: + - This extension interacts with {cl_khr_image2d_from_buffer_EXT} +*Contributors*:: + - Kevin Petit, Arm Ltd. + - Jeremy Kemp, Imagination Technologies + - Alastair Murray, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm === Description -The latest published specification for this extension is available on -the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_requirements_info.html[OpenCL registry]. +This extension enables applications to query requirements for an image without +having to create the image. + +=== New Commands + + * {clGetImageRequirementsInfoEXT} + +=== New Types + + * {cl_image_requirements_info_ext_TYPE} + +=== New Enums + + * {cl_image_requirements_info_ext_TYPE} + ** {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} + ** {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} + ** {CL_IMAGE_REQUIREMENTS_SIZE_EXT} + ** {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} + ** {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} + ** {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} + ** {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} + +=== Conformance tests + +. Basic checks for {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} and {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} +* For all image formats and types +** Check that the {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} and {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} queries can be performed successfully and that the values returned are a power of two. + +. Check consistency with `cl_khr_image2d_from_buffer` +* When `cl_khr_image2d_from_buffer` is supported, check that the value returned by {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} after converting in bytes for the supported format with the biggest element size (channel data type size * number of channels) is greater than or equal to the value returned by {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} for all supported formats. +* When `cl_khr_image2d_from_buffer` is supported, check that the value returned by {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} after converting in bytes for the supported format with the biggest element size (channel data type size * number of channels) is greater than or equal to the value returned by {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} for all supported formats. + +. Negative tests for {CL_IMAGE_REQUIREMENTS_SIZE_EXT} +* Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_format_ results in {CL_INVALID_VALUE} being returned. +* Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_desc_ results in {CL_INVALID_VALUE} being returned. + +. Consistency checks for {CL_IMAGE_REQUIREMENTS_SIZE_EXT} +* When creating 2D images from a buffer is supported, for all formats and a selection of image dimensions +** Check that the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query can be performed successfully. +** Create a buffer with the size returned and check that an image can successfully be created from the buffer. +** Check that the value returned for {CL_MEM_SIZE} for the image is the same as the value returned for {CL_IMAGE_REQUIREMENTS_SIZE_EXT}. + +. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} +* For all image formats, image types and a selection of values for other members in _image_desc_ (that MUST include `0`) +** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed successfully +** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE} for images of {CL_MEM_OBJECT_IMAGE1D_BUFFER} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE3D_MAX_WIDTH} for images of {CL_MEM_OBJECT_IMAGE3D} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE2D_MAX_WIDTH} for all other image types. + +. Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} +* Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all image types for which it is not valid +* Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. + +. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} +* For all image formats, valid image types and a selection of values for other members in _image_desc_ (that MUST include `0`) +** Check that the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query can be performed successfully +** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE2D_MAX_HEIGHT} for 2D or 2D array images or {CL_DEVICE_IMAGE3D_MAX_HEIGHT} for 3D images. + +. Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} +* Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query on all image types for which it is not valid +* Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. + +. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} +* For all image formats, valid image types and a selection of values for other members in _image_desc_ (that MUST include `0`) +** Check that the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query can be performed successfully +** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE3D_MAX_DEPTH}. + +. Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} +* Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query on all image types for which it is not valid +* Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. + +. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} +* For all image formats, valid image types and a selection of values for other members in _image_desc_ (that MUST include `0`) +** Check that the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query can be performed successfully +** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE_MAX_ARRAY_SIZE}. + +. General negative testing for {clGetImageRequirementsInfoEXT} +** Write tests for all possible testable generic error codes. + +=== Issues + +None. === Version History diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 584c4ce4c..bf6c329ae 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -784,6 +784,16 @@ endif::cl_khr_image2d_from_buffer[] or 2.2 device if {CL_DEVICE_IMAGE_SUPPORT} is {CL_TRUE}. This value must be 0 for devices that do not support 2D images created from a buffer. + +ifdef::cl_ext_image_requirements_info+cl_khr_image2d_from_buffer[] + If the {cl_khr_image2d_from_buffer_EXT} and {cl_ext_image_requirements_info_EXT} + extensions are supported, the value returned by {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} + after converting in bytes for the supported format with the biggest element size + (channel data type size {times} number of channels) must be greater than or equal to + the value returned by {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} for any + supported format. +endif::cl_ext_image_requirements_info+cl_khr_image2d_from_buffer[] + | {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_anchor} include::{generated}/api/version-notes/CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT.asciidoc[] @@ -802,6 +812,17 @@ endif::cl_khr_image2d_from_buffer[] or 2.2 device if {CL_DEVICE_IMAGE_SUPPORT} is {CL_TRUE}. This value must be 0 for devices that do not support 2D images created from a buffer. + +ifdef::cl_ext_image_requirements_info+cl_khr_image2d_from_buffer[] + If the {cl_khr_image2d_from_buffer_EXT} and {cl_ext_image_requirements_info_EXT} + extensions are supported, the value returned by + {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} after converting in bytes for the + supported format with the biggest element size + (channel data type size {times} number of channels) must be greater than or equal to + the value returned by {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} for any + supported format. +endif::cl_ext_image_requirements_info+cl_khr_image2d_from_buffer[] + | {CL_DEVICE_MAX_PIPE_ARGS_anchor} include::{generated}/api/version-notes/CL_DEVICE_MAX_PIPE_ARGS.asciidoc[] diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 019288efc..e10bdcd32 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -2654,9 +2654,19 @@ endif::cl_khr_external_memory[] The image row pitch must be {geq} _image_width_ {times} the size of an image element in bytes, and must be a multiple of the size of an image element in bytes. +ifndef::cl_ext_image_requirements_info[] For a 2D image created from a buffer the image row pitch must also be a multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in the context that support images. +endif::cl_ext_image_requirements_info[] +ifdef::cl_ext_image_requirements_info[] + For a 2D image created from a buffer, the image row pitch must also + - Be a multiple of the {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} value + returned for parameters compatible with those used to create the image, if + the {cl_ext_image_requirements_info_EXT} extension is supported, or + - Be a multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value + for all devices in the context that support images, otherwise. +endif::cl_ext_image_requirements_info[] * _image_slice_pitch_ is the size in bytes of each 2D slice in a 3D image, or the size in bytes of each image in a 1D or 2D image array. The _image_slice_pitch_ must be zero if _host_ptr_ is `NULL` @@ -2718,12 +2728,26 @@ descriptor information associated with `mem_object`. Image elements are stored according to their image format as described in <>. +ifndef::cl_ext_image_requirements_info[] If the buffer object specified by `mem_object` was created with {CL_MEM_USE_HOST_PTR}, the _host_ptr_ specified to {clCreateBuffer} or {clCreateBufferWithProperties} must be aligned to the maximum of the {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} value for all devices in the context associated with the buffer specified by `mem_object` that support images. +endif::cl_ext_image_requirements_info[] +ifdef::cl_ext_image_requirements_info[] +If the buffer object specified by `mem_object` was created with +{CL_MEM_USE_HOST_PTR}, the _host_ptr_ specified to {clCreateBuffer} or +{clCreateBufferWithProperties} must + + * Be aligned to the {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} value + for the returned for parameters compatible with those used to create the + image, if the {cl_ext_image_requirements_info_EXT} extension is supported, or + * Be aligned to the maximum of the {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} + value for all devices in the context associated with the buffer specified by + `mem_object` that support images, otherwise. +endif::cl_ext_image_requirements_info[] Creating a 2D image object from another 2D image object creates a new 2D image object that shares the image data store with `mem_object` but views @@ -5112,6 +5136,171 @@ returned in _errcode_ret_: -- endif::cl_khr_gl_sharing[] +ifdef::cl_ext_image_requirements_info[] +=== Querying Image Requirements + +[open,refpage='clGetImageRequirementsInfoEXT',desc='Get information on image requirements.',type='protos'] +-- +To get information specific to the requirements of an image before creating it call the function + +include::{generated}/api/protos/clGetImageRequirementsInfoEXT.txt[] +include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] + + * _context_ is the OpenCL context in which the query will be performed. + * _properties_ is an optional list of properties for the image object and their + corresponding values. The list is terminated with the special property 0. If + no properties are required, properties may be `NULL`. + * _flags_ is a bit-field that is used to specify allocation and usage information + about the image format being queried and is described in the + <> table. _flags_ may be {CL_MEM_READ_WRITE} to + specialize the query for images that may be read from and written to by different + kernel instances when correctly ordered by event dependencies, or {CL_MEM_READ_ONLY} + to specialize the query for images that may be read from by a kernel, or + {CL_MEM_WRITE_ONLY} to specialiaze the query for images that may be written to by + a kernel, or {CL_MEM_KERNEL_READ_AND_WRITE} to specialize the query for images that + may be both read from and written to by the same kernel instance. When _flags_ is + `0` the value returned for the query must be correct for all possible values of _flags_. + * _image_format_ is a pointer to a structure describing the format of the image + for which requirements are being queried. Refer to the + <> section for a detailed + description. + * _image_desc_ is a pointer to a structure that describes type and dimensions of + the image for which requirements are being queried. Refer to the + <> section for a detailed description of + the image descriptor. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetImageRequirementsInfoEXT} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[image-requirements-info-table]] +.List of supported param_names by {clGetImageRequirementsInfoEXT} +[cols="4,1,4",options="header"] +|==== +| Image Format Info | Return type | Info. returned in _param_value_ + +| {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT_anchor} +| `size_t` +| Returns the minimum alignment in bytes required for the data store backing + an image created using the parameters passed to {clGetImageRequirementsInfoEXT}. + _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is + `NULL` the implementation will return an alignment that would be sufficient for + all possible values of the missing argument. + + The value returned is a power of two. + +|{CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT_anchor} +| `size_t` +| Returns the row pitch alignment required in bytes for images created from + a buffer with the parameters passed to {clGetImageRequirementsInfoEXT}. + The value returned is a power of two. _image_format_ or _image_desc_ + are allowed to be `NULL`. When either or both is `NULL` the value returned is + the minimum row pitch alignment that works for all possible values of the missing + argument(s). + +| {CL_IMAGE_REQUIREMENTS_SIZE_EXT_anchor} +| `size_t` +| Returns the minimal size in bytes that a buffer would need to be to back an + image created using the parameters passed to {clGetImageRequirementsInfoEXT}. + + + Both _image_format_ and _image_desc_ must be non-`NULL`, otherwise + {CL_INVALID_VALUE} is returned. + +| {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT_anchor} +| `cl_uint` +| Returns the max width supported for creating images with the parameters passed + to {clGetImageRequirementsInfoEXT}. + _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is + `NULL` the implementation will return a max width that would be supported for + all possible values of the missing argument(s). + + When _image_desc_ is not `NULL`, the value of its _image_width_ member is + ignored and has no effect on the value returned. The value of all other members, + except `mem_object` may be `0` to require that the value returned be supported + for all possible values of the members that are set to `0`. + +// TODO: should we require _image_width_ to be `0`? + +| {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT_anchor} +| `cl_uint` +| Returns the max height supported for creating images with the parameters passed + to {clGetImageRequirementsInfoEXT}. + _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is + `NULL` the implementation will return a max height that would be supported for + all possible values of the missing argument(s). + + When _image_desc_ is not `NULL`, the value of its _image_height_ member is + ignored and has no effect on the value returned. The value of all other members, + except `mem_object` may be `0` to require that the value returned be supported + for all possible values of the members that are set to `0`. + + If _image_desc_ is not `NULL`, then _image_type_ must be either `0`, + {CL_MEM_OBJECT_IMAGE2D}, {CL_MEM_OBJECT_IMAGE2D_ARRAY}, or {CL_MEM_OBJECT_IMAGE3D}, + otherwise {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + +// TODO: should we require _image_height_ to be `0`? + +| {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT_anchor} +| `cl_uint` +| Returns the max depth supported for creating images with the parameters passed + to {clGetImageRequirementsInfoEXT}. + _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is + `NULL` the implementation will return a max depth that would be supported for + all possible values of the missing argument(s). + + When _image_desc_ is not `NULL`, the value of its _image_depth_ member is + ignored and has no effect on the value returned. The value of all other members, + except `mem_object` may be `0` to require that the value returned be supported + for all possible values of the members that are set to `0`. + + If _image_desc_ is not `NULL`, then _image_type_ must be either `0` or + {CL_MEM_OBJECT_IMAGE3D}, otherwise {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + +// TODO: should we require _image_depth_ to be `0`? + +| {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT_anchor} +| `cl_uint` +| Returns the max array size supported for creating images with the parameters passed + to {clGetImageRequirementsInfoEXT}. + _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is + `NULL` the implementation will return a max array size that would be supported for + all possible values of the missing argument(s). + + When _image_desc_ is not `NULL`, the value of its _image_array_size_ member is + ignored and has no effect on the value returned. The value of all other members, + except `mem_object` may be `0` to require that the value returned be supported + for all possible values of the members that are set to `0`. + + If _image_desc_ is not `NULL`, then _image_type_ must be either `0`, + {CL_MEM_OBJECT_IMAGE1D_ARRAY} or {CL_MEM_OBJECT_IMAGE2D_ARRAY}, otherwise + {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + +// TODO: should we require _image_array_size_ to be `0`? +|==== + +// refError + +{clGetImageRequirementsInfoEXT} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_CONTEXT} if _context_ if not a valid context. + * {CL_INVALID_PROPERTY} if a property name in properties is not a supported + property name, if the value specified for a supported property name is not + valid, or if the same property name is specified more than once. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if values specified in _image_format_ + are not valid. + * {CL_INVALID_IMAGE_DESCRIPTOR} if values specified in _image_desc_ + are not valid. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_ext_image_requirements_info[] == Pipes diff --git a/extensions/cl_ext_image_requirements_info.asciidoc b/extensions/cl_ext_image_requirements_info.asciidoc deleted file mode 100644 index aedc71bc5..000000000 --- a/extensions/cl_ext_image_requirements_info.asciidoc +++ /dev/null @@ -1,388 +0,0 @@ -// Copyright 2018-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::../config/attribs.txt[] -include::{generated}/api/api-dictionary-no-links.asciidoc[] -:source-highlighter: coderay - -= cl_ext_image_requirements_info -:R: pass:q,r[^(R)^] -Khronos{R} OpenCL Working Group - -== Name Strings - -`cl_ext_image_requirements_info` - -== Contact - -Please see the *Issues* list in the Khronos *OpenCL-Docs* repository: + -https://github.com/KhronosGroup/OpenCL-Docs - -== Contributors - -Kevin Petit, Arm Ltd. + -Jeremy Kemp, Imagination Technologies + -Alastair Murray, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm + - -== Notice - -include::../copyrights.txt[] - -== Status - -Shipping. - -== Version - -Built On: {docdate} + -Version: 0.5.0 - -== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.9. - -This extension requires OpenCL 3.0. - -== Overview - -This extension enables applications to query requirements for an image without -having to create the image. - -== New API Functions - -[source,c] ----- -cl_int clGetImageRequirementsInfoEXT( - cl_context context, - const cl_mem_properties* properties, - cl_mem_flags flags, - const cl_image_format* image_format, - const cl_image_desc* image_desc, - cl_image_requirements_info_ext param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); ----- - -== New API Types - -[source,c] ----- -typedef cl_uint cl_image_requirements_info_ext; ----- - -== New API Enums - -Accepted value for the _param_name_ parameter to {clGetImageRequirementsInfoEXT}: - -[source,c] ----- -CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT 0x1290 -CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT 0x1292 -CL_IMAGE_REQUIREMENTS_SIZE_EXT 0x12B2 -CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT 0x12B3 -CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT 0x12B4 -CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT 0x12B5 -CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT 0x12B6 ----- - -== Modifications to the OpenCL API Specification - -(Modify Section 5.3.1, *Creating Image Objects*) :: -+ --- -The following text: - --- -For a 2D image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the maximum of the -{CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in the context associated -with the buffer specified by mem_object that support images. --- - -is replaced with: - --- -For a 2D image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the -{CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} value returned for parameters -compatible with those used to create the image. --- - -The following text: --- -If the buffer object specified by mem_object was created with {CL_MEM_USE_HOST_PTR}, -the _host_ptr_ specified to {clCreateBuffer} or {clCreateBufferWithProperties} must -be aligned to the maximum of the {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} value for -all devices in the context associated with the buffer specified by mem_object that -support images. --- - -is replaced with: - --- -If the buffer object specified by mem_object was created with {CL_MEM_USE_HOST_PTR}, -the _host_ptr_ specified to {clCreateBuffer} or {clCreateBufferWithProperties} must -be aligned to the {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} value for the -returned for parameters compatible with those used to create the image. --- - --- - -(Modify Section 5.3, *Image Objects*) :: -+ --- - -(Add a new subsection 5.3.X, *Querying image requirements*) :: -+ --- -[open,refpage='clGetImageRequirementsInfoEXT',desc='Get information on image requirements.',type='protos'] - -To get information specific to the requirements of an image before creating it call the function - -include::{generated}/api/protos/clGetImageRequirementsInfoEXT.txt[] - - * _context_ is the OpenCL context in which the query will be performed. - * _properties_ is an optional list of properties for the image object and their - corresponding values. The list is terminated with the special property 0. If - no properties are required, properties may be `NULL`. - * _flags_ is a bit-field that is used to specify allocation and usage information - about the image format being queried and is described in the - <> table. _flags_ may be {CL_MEM_READ_WRITE} to - specialize the query for images that may be read from and written to by different - kernel instances when correctly ordered by event dependencies, or {CL_MEM_READ_ONLY} - to specialize the query for images that may be read from by a kernel, or - {CL_MEM_WRITE_ONLY} to specialiaze the query for images that may be written to by - a kernel, or {CL_MEM_KERNEL_READ_AND_WRITE} to specialize the query for images that - may be both read from and written to by the same kernel instance. When _flags_ is - `0` the value returned for the query must be correct for all possible values of _flags_. - * _image_format_ is a pointer to a structure describing the format of the image - for which requirements are being queried. Refer to the - <> section for a detailed - description. - * _image_desc_ is a pointer to a structure that describes type and dimensions of - the image for which requirements are being queried. Refer to the - <> section for a detailed description of - the image descriptor. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetImageRequirementsInfoEXT} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. - -[[image-requirements-info-table]] -.List of supported param_names by <> -[cols="4,1,4",options="header"] -|==== -| Image Format Info | Return type | Info. returned in _param_value_ - -| {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} -| `size_t` -| Returns the minimum alignment in bytes required for the data store backing - an image created using the parameters passed to {clGetImageRequirementsInfoEXT}. - _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is - `NULL` the implementation will return an alignment that would be sufficient for - all possible values of the missing argument. + - The value returned is a power of two. - -|{CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} -| `size_t` -| Returns the row pitch alignment required in bytes for images created from - a buffer with the parameters passed to {clGetImageRequirementsInfoEXT}. - The value returned is a power of two. _image_format_ or _image_desc_ - are allowed to be `NULL`. When either or both is `NULL` the value returned is - the minimum row pitch alignment that works for all possible values of the missing - argument(s). - -| {CL_IMAGE_REQUIREMENTS_SIZE_EXT} -| `size_t` -| Returns the minimal size in bytes that a buffer would need to be to back an - image created using the parameters passed to {clGetImageRequirementsInfoEXT}. - + - Both _image_format_ and _image_desc_ must be non-`NULL`, otherwise - {CL_INVALID_VALUE} is returned. - -| {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} -| `cl_uint` -| Returns the max width supported for creating images with the parameters passed - to {clGetImageRequirementsInfoEXT}. - _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is - `NULL` the implementation will return a max width that would be supported for - all possible values of the missing argument(s). + - When _image_desc_ is not `NULL`, the value of its _image_width_ member is - ignored and has no effect on the value returned. The value of all other members, - except `mem_object` may be `0` to require that the value returned be supported - for all possible values of the members that are set to `0`. + - TODO: should we require _image_width_ to be `0`? - -| {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} -| `cl_uint` -| Returns the max height supported for creating images with the parameters passed - to {clGetImageRequirementsInfoEXT}. - _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is - `NULL` the implementation will return a max height that would be supported for - all possible values of the missing argument(s). + - When _image_desc_ is not `NULL`, the value of its _image_height_ member is - ignored and has no effect on the value returned. The value of all other members, - except `mem_object` may be `0` to require that the value returned be supported - for all possible values of the members that are set to `0`. + - If _image_desc_ is not `NULL`, then _image_type_ must be either `0`, - {CL_MEM_OBJECT_IMAGE2D}, {CL_MEM_OBJECT_IMAGE2D_ARRAY}, or {CL_MEM_OBJECT_IMAGE3D}, - otherwise {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + - TODO: should we require _image_height_ to be `0`? - -| {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} -| `cl_uint` -| Returns the max depth supported for creating images with the parameters passed - to {clGetImageRequirementsInfoEXT}. - _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is - `NULL` the implementation will return a max depth that would be supported for - all possible values of the missing argument(s). + - When _image_desc_ is not `NULL`, the value of its _image_depth_ member is - ignored and has no effect on the value returned. The value of all other members, - except `mem_object` may be `0` to require that the value returned be supported - for all possible values of the members that are set to `0`. + - If _image_desc_ is not `NULL`, then _image_type_ must be either `0` or - {CL_MEM_OBJECT_IMAGE3D}, otherwise {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + - TODO: should we require _image_depth_ to be `0`? - -| {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} -| `cl_uint` -| Returns the max array size supported for creating images with the parameters passed - to {clGetImageRequirementsInfoEXT}. - _image_format_ or _image_desc_ are allowed to be `NULL`. When either or both is - `NULL` the implementation will return a max array size that would be supported for - all possible values of the missing argument(s). + - When _image_desc_ is not `NULL`, the value of its _image_array_size_ member is - ignored and has no effect on the value returned. The value of all other members, - except `mem_object` may be `0` to require that the value returned be supported - for all possible values of the members that are set to `0`. + - If _image_desc_ is not `NULL`, then _image_type_ must be either `0`, - {CL_MEM_OBJECT_IMAGE1D_ARRAY} or {CL_MEM_OBJECT_IMAGE2D_ARRAY}, otherwise - {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + - TODO: should we require _image_array_size_ to be `0`? -|==== - -// refError - -{clGetImageRequirementsInfoEXT} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_CONTEXT} if _context_ if not a valid context. - * {CL_INVALID_PROPERTY} if a property name in properties is not a supported - property name, if the value specified for a supported property name is not - valid, or if the same property name is specified more than once. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if values specified in _image_format_ - are not valid. - * {CL_INVALID_IMAGE_DESCRIPTOR} if values specified in _image_desc_ - are not valid. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- --- - -== Interactions with Other Extensions - -This extension interacts with `cl_khr_image2d_from_buffer`. - -When `cl_khr_image2d_from_buffer` is supported: - -- The value returned by {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} after converting in - bytes for the supported format with the biggest element size - (channel data type size * number of channels) must be greater than or equal - to the value returned by {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} for any - supported format. -- The value returned by {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} after converting - in bytes for the supported format with the biggest element size - (channel data type size * number of channels) must be greater than or equal to - the value returned by {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} for any - supported format. - -== Conformance tests - -. Basic checks for {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} and {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} -* For all image formats and types -** Check that the {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} and {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} queries can be performed successfully and that the values returned are a power of two. - -. Check consistency with `cl_khr_image2d_from_buffer` -* When `cl_khr_image2d_from_buffer` is supported, check that the value returned by {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} after converting in bytes for the supported format with the biggest element size (channel data type size * number of channels) is greater than or equal to the value returned by {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} for all supported formats. -* When `cl_khr_image2d_from_buffer` is supported, check that the value returned by {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT} after converting in bytes for the supported format with the biggest element size (channel data type size * number of channels) is greater than or equal to the value returned by {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT} for all supported formats. - -. Negative tests for {CL_IMAGE_REQUIREMENTS_SIZE_EXT} -* Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_format_ results in {CL_INVALID_VALUE} being returned. -* Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_desc_ results in {CL_INVALID_VALUE} being returned. - -. Consistency checks for {CL_IMAGE_REQUIREMENTS_SIZE_EXT} -* When creating 2D images from a buffer is supported, for all formats and a selection of image dimensions -** Check that the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query can be performed successfully. -** Create a buffer with the size returned and check that an image can successfully be created from the buffer. -** Check that the value returned for {CL_MEM_SIZE} for the image is the same as the value returned for {CL_IMAGE_REQUIREMENTS_SIZE_EXT}. - -. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} -* For all image formats, image types and a selection of values for other members in _image_desc_ (that MUST include `0`) -** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed successfully -** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE} for images of {CL_MEM_OBJECT_IMAGE1D_BUFFER} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE3D_MAX_WIDTH} for images of {CL_MEM_OBJECT_IMAGE3D} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE2D_MAX_WIDTH} for all other image types. - -. Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} -* Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all image types for which it is not valid -* Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. - -. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} -* For all image formats, valid image types and a selection of values for other members in _image_desc_ (that MUST include `0`) -** Check that the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query can be performed successfully -** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE2D_MAX_HEIGHT} for 2D or 2D array images or {CL_DEVICE_IMAGE3D_MAX_HEIGHT} for 3D images. - -. Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} -* Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query on all image types for which it is not valid -* Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. - -. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} -* For all image formats, valid image types and a selection of values for other members in _image_desc_ (that MUST include `0`) -** Check that the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query can be performed successfully -** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE3D_MAX_DEPTH}. - -. Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} -* Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query on all image types for which it is not valid -* Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. - -. Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} -* For all image formats, valid image types and a selection of values for other members in _image_desc_ (that MUST include `0`) -** Check that the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query can be performed successfully -** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE_MAX_ARRAY_SIZE}. - -. General negative testing for {clGetImageRequirementsInfoEXT} -** Write tests for all possible testable generic error codes. - -== Issues - -None. - -== Version History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|==== -| Version | Date | Author | Changes -| 0.5.0 | 2022-01-18 | Kevin Petit | *Initial EXT revision* -|==== diff --git a/extensions/extensions.txt b/extensions/extensions.txt index d28468b9c..6414878d7 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -39,8 +39,6 @@ include::cl_ext_float_atomics.asciidoc[] include::cl_ext_image_from_buffer.asciidoc[] <<< include::cl_ext_image_raw10_raw12.asciidoc[] -<<< -include::cl_ext_image_requirements_info.asciidoc[] // Vendor Extensions :leveloffset: 0 From d38347eaba4a748749c22e5aa335339857a8ca6e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 14 Jan 2025 17:13:59 +0000 Subject: [PATCH 179/190] Command-buffer query for supported queue properties (#850) This change introduces a new device query related to the command-buffer extension - `CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR`. This is different from `CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR`, as we want to convey to the user that an implementation supports using a queue property with a command-buffer, but is not *required* to use the property. This supersedes reporting queue related values from the `CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR` query. The flaw with `CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR` is that it contains bits explicitly added by the command-buffer extension for reporting support for queue properties. This is a brittle design, as any new queue property added in future would need to have a new bit added here in the command-buffer extension to report support when used with command-buffers. Instead a better design is to have a new query reporting queue properties supported, `CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR`, and keeping `CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR` for capabilities unrelated to the command-queue properties. The `CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR` use-case can now be covered by returning `CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE` from `CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR`, so it is removed. --- api/cl_khr_command_buffer.asciidoc | 9 +++++---- api/opencl_platform_layer.asciidoc | 18 ++++++++++++------ api/opencl_runtime_layer.asciidoc | 7 +++---- xml/cl.xml | 9 +++++---- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index 9f3cd8868..97d706e8a 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -4,7 +4,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] // *Revision*:: -// 0.9.5 +// 0.9.6 // *Extension and Version Dependencies*:: // This extension requires OpenCL 1.2 or later. // Buffering of SVM commands requires OpenCL 2.0 or later. @@ -12,7 +12,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] === Other Extension Metadata *Last Modified Date*:: - 2024-07-24 + 2024-10-02 *IP Status*:: No known IP claims. *Contributors*:: @@ -237,11 +237,11 @@ features: * {cl_device_info_TYPE} ** {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} ** {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} + ** {CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR} * {cl_device_command_buffer_capabilities_khr_TYPE} ** {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} ** {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} ** {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR} - ** {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} * {cl_command_buffer_properties_khr_TYPE} ** {CL_COMMAND_BUFFER_FLAGS_KHR} * {cl_command_buffer_flags_khr_TYPE} @@ -464,4 +464,5 @@ features: * 0.9.5, 2024-07-24 ** Add a properties parameter to all command recording entry-points (provisional). - + * 0.9.6, 2024-10-02 + ** Add device query for supported queue properties (provisional). diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index bf6c329ae..6377aca6b 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1742,12 +1742,6 @@ include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR.asciidoc[] - {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR_anchor} Device - supports the ability to record command-buffers to out-of-order - command-queues. - -include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR.asciidoc[] - ifdef::cl_khr_command_buffer_multi_device[] {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR_anchor} Device supports the ability to record commands to more than one @@ -1766,6 +1760,18 @@ include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_P It is valid for a command-queue to be created with extra properties in addition to this base requirement and still be compatible with command-buffer execution. + +| {CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR.asciidoc[] + + | {cl_command_queue_properties_TYPE} + | Bitmask of the supported properties with which a command-queue may be + created to allow a command-buffer to be executed on it. It is invalid + for a command-queue to be created with a property not reported and + still be compatible with command-buffer execution. + + The mandated minimum capability is: {CL_QUEUE_PROFILING_ENABLE}. endif::cl_khr_command_buffer[] ifdef::cl_khr_command_buffer_multi_device[] diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index e10bdcd32..22978b83e 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14378,10 +14378,9 @@ returned in _errcode_ret_: * {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a valid command-queue. - * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any command-queue in _queues_ is - an out-of-order command-queue and the device associated with the - command-queue does not support the - {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} capability. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any command-queue + in _queues_ contains a property not specified by + {CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR}. * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any command-queue in _queues_ does not contain the minimum properties specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. diff --git a/xml/cl.xml b/xml/cl.xml index 309cff77d..e147f9770 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1330,7 +1330,7 @@ server's OpenCL/api-docs repository. - + @@ -1780,7 +1780,8 @@ server's OpenCL/api-docs repository. - + + @@ -7190,7 +7191,7 @@ server's OpenCL/api-docs repository. - + @@ -7207,13 +7208,13 @@ server's OpenCL/api-docs repository. + - From d8a9ecb086e8d8dd716c63e99eb0882f8d398ab0 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 21 Jan 2025 10:46:22 -0700 Subject: [PATCH 180/190] OpenCL C: Update ULP requirements for half-precision divide and reciprocal (#1293) * OpenCL C: Update ULP requirements for half-precision divide and reciprocal Update ULP requirements for these builtins to 1.0 as per discussion on #1278 * Restrict update to divide for now * Update SPIR-V environment spec to set fp-16 divide ULP to 1.0 * relax reciprocal ULP requirement also --------- Co-authored-by: Ben Ashbaugh --- OpenCL_C.txt | 4 ++-- env/numerical_compliance.asciidoc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index e6ddbd907..457a95a81 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -16288,8 +16288,8 @@ is the infinitely precise result. | *_x_ + _y_* | Correctly rounded | Correctly rounded | *_x_ - _y_* | Correctly rounded | Correctly rounded | *_x_ * _y_* | Correctly rounded | Correctly rounded -| *1.0 / _x_* | Correctly rounded | \<= 1 ulp -| *_x_ / _y_* | Correctly rounded | \<= 1 ulp +| *1.0 / _x_* | \<= 1 ulp | \<= 1 ulp +| *_x_ / _y_* | \<= 1 ulp | \<= 1 ulp | | | | *acos* | \<= 2 ulp | \<= 3 ulp | *acosh* | \<= 2 ulp | \<= 3 ulp diff --git a/env/numerical_compliance.asciidoc b/env/numerical_compliance.asciidoc index f70b81c1b..af35aeb6d 100644 --- a/env/numerical_compliance.asciidoc +++ b/env/numerical_compliance.asciidoc @@ -196,7 +196,7 @@ given as ULP values for the full profile. | *OpFDiv* | Correctly rounded | \<= 2.5 ulp -| Correctly rounded +| \<= 1.0 ulp | *OpExtInst* *acos* | \<= 4 ulp From 03eed803fe51e17ff689419f80b3e2c310fb2665 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 22 Jan 2025 19:52:44 +0000 Subject: [PATCH 181/190] Fix formatting issues in builtins tables (#1290) 1. Add missing newline between double and half versions of 'ldexp' 2. Fix formatting of snippets in 'mix' --- OpenCL_C.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 457a95a81..f3701bdb8 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -5368,6 +5368,7 @@ endif::cl_khr_fp16[] double__n__ *ldexp*(double__n__ _x_, int__n__ _k_) + double__n__ *ldexp*(double__n__ _x_, int _k_) + double *ldexp*(double _x_, int _k_) + ifdef::cl_khr_fp16[] half__n__ *ldexp*(half__n__ _x_, int__n__ _k_) + half__n__ *ldexp*(half__n__ _x_, int _k_) + @@ -6484,11 +6485,10 @@ ifdef::cl_khr_fp16[gentypeh *min*(gentypeh _x_, half _y_)] ifdef::cl_khr_fp16[gentypeh *mix*(gentypeh _x_, gentypeh _y_, half _a_)] a| Returns the linear blend of _x_ and _y_ implemented as: - _x_ + (_y_ - _x_) * _a_ +_x_ + (_y_ - _x_) * _a_ - _a_ must be a value in the range [0.0, 1.0]. - If _a_ is not in the range [0.0, 1.0], the return values are - undefined. +_a_ must be a value in the range [0.0, 1.0]. If _a_ is not in the range [0.0, +1.0], the return values are undefined. ifdef::cl_khr_fp16[] NOTE: The half-precision *mix* function can be implemented using From 1224a1bca8b7f4148caa2c293139105d95a94f35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Fri, 24 Jan 2025 17:44:09 +0000 Subject: [PATCH 182/190] Integrate cl_ext_image_from_buffer into unified specification (#1299) * Integrate cl_ext_image_from_buffer into unified specification Also add version notes for all CL_IMAGE_REQUIREMENTS_* enums. Signed-off-by: Kevin Petit Change-Id: I7b7b093034121a9215786beff7318b18e7d0c24a * Update api/opencl_runtime_layer.asciidoc Co-authored-by: Ben Ashbaugh * Update api/footnotes.asciidoc Co-authored-by: Ben Ashbaugh * Update api/cl_ext_image_from_buffer.asciidoc --------- Signed-off-by: Kevin Petit Co-authored-by: Ben Ashbaugh --- api/cl_ext_image_from_buffer.asciidoc | 67 +++- api/footnotes.asciidoc | 2 +- api/opencl_runtime_layer.asciidoc | 107 ++++++- extensions/cl_ext_image_from_buffer.asciidoc | 302 ------------------- extensions/extensions.txt | 2 - 5 files changed, 162 insertions(+), 318 deletions(-) delete mode 100644 extensions/cl_ext_image_from_buffer.asciidoc diff --git a/api/cl_ext_image_from_buffer.asciidoc b/api/cl_ext_image_from_buffer.asciidoc index 6bd6fa9bd..1c39426a8 100644 --- a/api/cl_ext_image_from_buffer.asciidoc +++ b/api/cl_ext_image_from_buffer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_image_from_buffer.txt[] @@ -6,14 +6,73 @@ include::{generated}/meta/{refprefix}cl_ext_image_from_buffer.txt[] === Other Extension Metadata *Last Modified Date*:: - 2022-01-25 + 2025-01-15 *IP Status*:: No known IP claims. +*Contributors*:: + - Kevin Petit, Arm Ltd. + - Jeremy Kemp, Imagination Technologies + - Alastair Murray, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm === Description -The latest published specification for this extension is available on -the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_from_buffer.html[OpenCL registry]. +This extension enables all types of images to be created from an existing buffer +object. + +=== New Enums + + * {cl_image_requirements_info_ext_TYPE} + ** {CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT} + +=== Conformance tests + +. Test access from kernel + - For all image types + - For a few/all image formats + - For several values of row/slice pitch + - With or without a host_ptr + - Create buffer and fill with data + - Optionally create a sub-buffer with a randomly selected offset? + - Create an image from the buffer + - Read the image from a kernel and compare with values read using the buffer and direct addressing. They must match. + +//. TODO Test access via read/write/map commands? + +//. TODO Test copy to/from buffer? + +//. TODO Test fill? + +//. TODO Test copy to/from another image? + +. Test clGetImageInfo + - For all image types (one format per element size) + - For a few different row/pitch sizes (image dimensions being equal or not) + - Create an image from a buffer + - Check that the returned values for {CL_IMAGE_ROW_PITCH} and {CL_IMAGE_SLICE_PITCH} are correct. + +. Test clGetMemObjectInfo + - For all image types (1 format only) + - Create an image from a buffer + - Check that {CL_MEM_ASSOCIATED_MEMOBJECT} correctly returns the buffer that was used. + +. Negative testing for {clCreateImage} (alignment) + - For a few/all image formats + - For all image types + - Query row pitch, slice pitch and base image address alignment for the format + - Create an image from a buffer with invalid row pitch (not a multiple of required alignment) and check that {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} is returned. + - Create an image from a buffer with invalid slice pitch (not a multiple of required alignment) and check that {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} is returned. + - Create an image from a buffer with invalid base address alignment (not a multiple of required alignment) and check that {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} is returned. + +. Negative testing for {clCreateImage} (buffer size) + - For a few image formats (at least smallest and biggest element types) + - For all image types + - Create a buffer too small + - Check that image creation from that buffer is rejected with {CL_INVALID_IMAGE_SIZE} + +=== Issues + +None. === Version History diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index 4f7de3ec1..15fe29a34 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -53,7 +53,7 @@ Note that reading and writing 2D image arrays from a kernel with `image_array_si ] :fn-image-from-buffer: pass:n[ \ -To create a 2D image from a buffer object that share the data store between the image and buffer object. \ +To create an image from a buffer object that shares the data store between the image and buffer object. \ ] :fn-image-from-image: pass:n[ \ diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 22978b83e..92af20156 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1933,8 +1933,16 @@ include::{generated}/api/version-notes/clCreateImageWithProperties.asciidoc[] the <> table. * _image_format_ is a pointer to a structure that describes format properties of the image to be allocated. - A 1D image buffer or 2D image can be created from a buffer by specifying a - buffer object in the __image_desc__->__mem_object__. + A 1D image buffer can be created from a buffer by specifying a buffer object + in __image_desc__->__mem_object__. + If the {cl_khr_image2d_from_buffer_EXT} extension is supported, a 2D image + can be created from a buffer by specifying a buffer object in + __image_desc__->__mem_object__. +ifdef::cl_ext_image_from_buffer[] + If the {cl_ext_image_from_buffer_EXT} extension is supported, an image of + any type can be created from a buffer by specifying a buffer object in + __image_desc__->__mem_object__. +endif::cl_ext_image_from_buffer[] A 2D image can be created from another 2D image object by specifying an image object in the __image_desc__->__mem_object__. Refer to the <> section @@ -2071,9 +2079,23 @@ returned in _errcode_ret_: * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if values specified in _image_format_ are not valid or if _image_format_ is `NULL`. +ifndef::cl_ext_image_from_buffer[] * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if a 2D image is created from a buffer and the row pitch and base address alignment does not follow the rules described for creating a 2D image from a buffer. +endif::cl_ext_image_from_buffer[] +ifdef::cl_ext_image_from_buffer[] + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if an image is created from a + buffer and the row pitch, or slice pitch, if the + {cl_ext_image_from_buffer_EXT} extension is supported, or base address + alignment do not follow the rules described for creating an image from a + buffer. + * {CL_INVALID_IMAGE_SIZE} if the {cl_ext_image_from_buffer_EXT} extension is + supported and an image is created from a buffer and the buffer passed in + __image_desc__->__mem_object__ is too small to be used as a data store for the + image, e.g. if its size is smaller than the value returned for + {CL_IMAGE_REQUIREMENTS_SIZE_EXT} for the parameters used to create the image. +endif::cl_ext_image_from_buffer[] * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if a 2D image is created from a 2D image object and the rules described above are not followed. * {CL_INVALID_IMAGE_DESCRIPTOR} if values specified in _image_desc_ are not @@ -2660,9 +2682,9 @@ ifndef::cl_ext_image_requirements_info[] for all devices in the context that support images. endif::cl_ext_image_requirements_info[] ifdef::cl_ext_image_requirements_info[] - For a 2D image created from a buffer, the image row pitch must also + For an image created from a buffer, the image row pitch must also - Be a multiple of the {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} value - returned for parameters compatible with those used to create the image, if + for the _image_format_, _image_type_ and _flags_ used to create the image, if the {cl_ext_image_requirements_info_EXT} extension is supported, or - Be a multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in the context that support images, otherwise. @@ -2685,6 +2707,12 @@ endif::cl_khr_external_memory[] _image_height_ for a 2D image array or a 3D image, must be {geq} the image row pitch for a 1D image array, and must be a multiple of the image row pitch. +ifdef::cl_ext_image_from_buffer[] + For an image created from a buffer, the image slice pitch must be a multiple + of the {CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT} value for the + _image_format_, _image_type_ and _flags_ used to create the image, if the + {cl_ext_image_from_buffer_EXT} extension is supported. +endif::cl_ext_image_from_buffer[] * _num_mip_levels_ must be `0`, indicating that the image has a single ifndef::cl_khr_mipmap_image[] mipmap level. @@ -2699,7 +2727,12 @@ endif::cl_khr_mipmap_image[] * _mem_object_ may refer to a valid buffer or image memory object. `mem_object` can be a buffer memory object if `image_type` is {CL_MEM_OBJECT_IMAGE1D_BUFFER} or - {CL_MEM_OBJECT_IMAGE2D} footnote:[{fn-image-from-buffer}]. + {CL_MEM_OBJECT_IMAGE2D} +ifdef::cl_ext_image_from_buffer[] + , or any other `image_type` if the {cl_ext_image_from_buffer_EXT} extension + is supported. +endif::cl_ext_image_from_buffer[] + footnote:[{fn-image-from-buffer}] `mem_object` can be an image object if `image_type` is {CL_MEM_OBJECT_IMAGE2D} footnote:[{fn-image-from-image}]. Otherwise it must be `NULL`. @@ -2713,6 +2746,19 @@ size of element in bytes must be {leq} size of the buffer object. The image data in the buffer object is stored as a single scanline which is a linear sequence of adjacent elements. +ifdef::cl_ext_image_from_buffer[] +For a 1D image created from a buffer object, the `image_width` {times} size of +element in bytes must be {leq} size of the buffer object. The image data in the +buffer object is stored as a single scanline which is a linear sequence of +adjacent elements. + +For a 1D image array created from a buffer object, the `image_slice_pitch` {times} +`image_array_size` must be {leq} size of the buffer object specified by `mem_object`. +The image data in the buffer object is stored as a linear sequence of adjacent 1D +slices. Each slice is a single scanline padded to `image_slice_pitch` bytes. +Each scanline is a linear sequence of image elements. +endif::cl_ext_image_from_buffer[] + For a 2D image created from a buffer object, the `image_row_pitch` {times} `image_height` must be {leq} size of the buffer object specified by `mem_object`. @@ -2721,6 +2767,22 @@ adjacent scanlines. Each scanline is a linear sequence of image elements padded to `image_row_pitch` bytes. +ifdef::cl_ext_image_from_buffer[] +For a 2D image array created from a buffer object, the `image_slice_pitch` {times} +`image_array_size` must be {leq} size of the buffer object specified by `mem_object`. +The image data in the buffer object is stored as a linear sequence of adjacent 2D +slices. Each slice is a linear sequence of adjacent scanlines padded to +`image_slice_pitch` bytes. Each scanline is a linear sequence of image elements padded +to `image_row_pitch` bytes. + +For a 3D image created from a buffer object, the `image_slice_pitch` {times} +`image_depth` must be {leq} size of the buffer object specified by `mem_object`. +The image data in the buffer object is stored as a linear sequence of adjacent 2D +slices padded to `image_slice_pitch` bytes. Each slice is a linear sequence of adjacent +scanlines. Each scanline is a linear sequence of image elements padded to +`image_row_pitch` bytes. +endif::cl_ext_image_from_buffer[] + For an image object created from another image object, the values specified in the image descriptor except for `mem_object` must match the image descriptor information associated with `mem_object`. @@ -2796,10 +2858,9 @@ stored in the image can be accessed as linear RGB or sRGB values. [NOTE] ==== Concurrent reading from, writing to and copying between both a buffer object -and 1D image buffer or 2D image object associated with the buffer object is -undefined. -Only reading from both a buffer object and 1D image buffer or 2D image -object associated with the buffer object is defined. +and buffer or image object associated with the buffer object is undefined. +Only reading from both a buffer object and buffer or image object associated +with the buffer object is defined. Writing to an image created from a buffer and then reading from this buffer in a kernel even if appropriate synchronization operations (such as a @@ -5190,6 +5251,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] | Image Format Info | Return type | Info. returned in _param_value_ | {CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT.asciidoc[] | `size_t` | Returns the minimum alignment in bytes required for the data store backing an image created using the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5199,6 +5262,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] The value returned is a power of two. |{CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT.asciidoc[] | `size_t` | Returns the row pitch alignment required in bytes for images created from a buffer with the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5208,6 +5273,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] argument(s). | {CL_IMAGE_REQUIREMENTS_SIZE_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_SIZE_EXT.asciidoc[] | `size_t` | Returns the minimal size in bytes that a buffer would need to be to back an image created using the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5216,6 +5283,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] {CL_INVALID_VALUE} is returned. | {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT.asciidoc[] | `cl_uint` | Returns the max width supported for creating images with the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5229,6 +5298,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] // TODO: should we require _image_width_ to be `0`? | {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT.asciidoc[] | `cl_uint` | Returns the max height supported for creating images with the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5245,6 +5316,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] // TODO: should we require _image_height_ to be `0`? | {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT.asciidoc[] | `cl_uint` | Returns the max depth supported for creating images with the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5260,6 +5333,8 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] // TODO: should we require _image_depth_ to be `0`? | {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT.asciidoc[] | `cl_uint` | Returns the max array size supported for creating images with the parameters passed to {clGetImageRequirementsInfoEXT}. @@ -5274,6 +5349,20 @@ include::{generated}/api/version-notes/clGetImageRequirementsInfoEXT.asciidoc[] {CL_MEM_OBJECT_IMAGE1D_ARRAY} or {CL_MEM_OBJECT_IMAGE2D_ARRAY}, otherwise {CL_INVALID_IMAGE_DESCRIPTOR} is returned. + // TODO: should we require _image_array_size_ to be `0`? + +ifdef::cl_ext_image_from_buffer[] +| {CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT.asciidoc[] +| `size_t` +| Returns the slice pitch alignment required in bytes for images created from + a buffer with the parameters passed to {clGetImageRequirementsInfoEXT}. + The value returned is a power of two. _image_format_ and _image_desc_ are + allowed to be `NULL`. When either or both is `NULL` the value returned is + the minimum slice pitch alignment that is supported for all possible values + of the missing argument(s). +endif::cl_ext_image_from_buffer[] + |==== // refError diff --git a/extensions/cl_ext_image_from_buffer.asciidoc b/extensions/cl_ext_image_from_buffer.asciidoc deleted file mode 100644 index 1ef094c5d..000000000 --- a/extensions/cl_ext_image_from_buffer.asciidoc +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -:data-uri: -:icons: font -include::../config/attribs.txt[] -include::{generated}/api/api-dictionary-no-links.asciidoc[] -:source-highlighter: coderay - -= cl_ext_image_from_buffer -:R: pass:q,r[^(R)^] -Khronos{R} OpenCL Working Group - -== Name Strings - -`cl_ext_image_from_buffer` - -== Contact - -Please see the *Issues* list in the Khronos *OpenCL-Docs* repository: + -https://github.com/KhronosGroup/OpenCL-Docs - -== Contributors - -Kevin Petit, Arm Ltd. + -Jeremy Kemp, Imagination Technologies + -Alastair Murray, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm + - -== Notice - -include::../copyrights.txt[] - -== Status - -Shipping. - -== Version - -Built On: {docdate} + -Version: 1.0.0 - -== Dependencies - -This extension is written against the OpenCL Specification version 3.0.9. - -This extension requires OpenCL 3.0. - -This extension requires `cl_ext_image_requirements_info`. - -== Overview - -This extension enables all types of images to be created from an existing buffer -object. - -== New API Enums - -Accepted value for the _param_name_ parameter to {clGetImageRequirementsInfoEXT}: - -[source,c] ----- -CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT 0x1291 ----- - -== Modifications to the OpenCL API Specification - -(Modify Section 5.3.1, *Creating Image Objects*) :: -+ --- -The following text: - --- -_image_format_ is a pointer to a structure that describes format properties of the image to be -allocated. A 1D image buffer or 2D image can be created from a buffer by specifying a buffer -object in the image_desc→mem_object. A 2D image can be created from another 2D image object -by specifying an image object in the image_desc→mem_object. Refer to the Image Format -Descriptor section for a detailed description of the image format descriptor. --- - -is replaced with: - --- -_image_format_ is a pointer to a structure that describes format properties of -the image to be allocated. An image can be created from a buffer by specifying -a buffer object in the _image_desc_->_mem_object_. A 2D image can be created from -another 2D image object by specifying an image object in the -_image_desc_->_mem_object_. Refer to the Image Format Descriptor section for a -detailed description of the image format descriptor. --- - -The following text: - --- -{CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if a 2D image is created from a buffer and -the row pitch and base address alignment does not follow the rules described -for creating a 2D image from a buffer. --- - -is replaced with: - --- -{CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if an image is created from a buffer and -the row or slice pitch and base address alignment do not follow the rules -described for creating an image from a buffer. --- - -The following text is added to the list of error conditions for {clCreateImageWithProperties}: - --- -{CL_INVALID_IMAGE_SIZE} if an image is created from a buffer and the buffer -passed in _image_desc->_mem_object_ is too small to be used as a data store -for the image, e.g. if its size is smaller than the value returned for -{CL_IMAGE_REQUIREMENTS_SIZE_EXT} for the parameters used to create the image. --- - - -The following text: - --- -For a 2D image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the maximum of the -{CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in the context associated -with the buffer specified by mem_object that support images. --- - -is replaced with: - --- -For an image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the -{CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} value for the _image_format_, -_image_type_ and _flags_ used to create the image. --- - - -The following text is added to the description for `image_slice_pitch`: - --- -For an image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the -{CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT} value for the _image_format_, -_image_type_ and _flags_ used to create the image. --- - -The following text: - --- -`mem_object` may refer to a valid buffer or image memory object. `mem_object` -can be a buffer memory object if image_type is {CL_MEM_OBJECT_IMAGE1D_BUFFER} -or {CL_MEM_OBJECT_IMAGE2D}. `mem_object` can be an image object if _image_type_ -is {CL_MEM_OBJECT_IMAGE2D}. Otherwise it must be `NULL`. The image pixels are -taken from the memory objects data store. When the contents of the specified -memory objects data store are modified, those changes are reflected in the -contents of the image object and vice-versa at corresponding synchronization -points. --- - -is replaced with: - --- -`mem_object` may refer to a valid buffer or image memory object. `mem_object` -can be an image object if _image_type_ is {CL_MEM_OBJECT_IMAGE2D}. -Otherwise it must be `NULL`. The image pixels are taken from the memory objects -data store. When the contents of the specified memory objects data store are -modified, those changes are reflected in the contents of the image object and -vice-versa at corresponding synchronization points. --- - -The following text is added: - --- -For a 1D image created from a buffer object, the `image_width` {times} size of -element in bytes must be {leq} size of the buffer object. The image data in the -buffer object is stored as a single scanline which is a linear sequence of -adjacent elements. - -For a 1D image array created from a buffer object, the `image_slice_pitch` {times} -`image_array_size` must be {leq} size of the buffer object specified by `mem_object`. -The image data in the buffer object is stored as a linear sequence of adjacent 1D -slices. Each slice is a single scanline padded to `image_slice_pitch` bytes. -Each scanline is a linear sequence of image elements. - -For a 2D image array created from a buffer object, the `image_slice_pitch` {times} -`image_array_size` must be {leq} size of the buffer object specified by `mem_object`. -The image data in the buffer object is stored as a linear sequence of adjacent 2D -slices. Each slice is a linear sequence of adjacent scanlines padded to -`image_slice_pitch` bytes. Each scanline is a linear sequence of image elements padded -to `image_row_pitch` bytes. - -For a 3D image created from a buffer object, the `image_slice_pitch` {times} -`image_depth` must be {leq} size of the buffer object specified by `mem_object`. -The image data in the buffer object is stored as a linear sequence of adjacent 2D -slices padded to `image_slice_pitch` bytes. Each slice is a linear sequence of adjacent -scanlines. Each scanline is a linear sequence of image elements padded to -`image_row_pitch` bytes. --- - -The following text: - --- -Concurrent reading from, writing to and copying between both a buffer object and -1D image buffer or 2D image object associated with the buffer object is undefined. -Only reading from both a buffer object and 1D image buffer or 2D image object -associated with the buffer object is defined. --- - -is replaced with: - --- -Concurrent reading from, writing to and copying between both a buffer object and -an image object associated with the buffer object is undefined. Only reading from -both a buffer object and image object associated with the buffer object is defined. --- - -(Modify section 5.3.X, *Querying image requirements*) :: -+ --- -The following is added to the _List of supported param_names by -*clGetImageRequirementsInfoEXT*: - -[width="100%",cols="<34%,<33%,<33%",options="header"] -|==== -| Image Requirement Info | Return type | Info. returned in _param_value_ - -| {CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT} -| `size_t` -| Returns the slice pitch alignment required in bytes for images created from - a buffer with the parameters passed to {clGetImageRequirementsInfoEXT}. - The value returned is a power of two. _image_format_ and _image_desc_ are - allowed to be `NULL`. When either or both is `NULL` the value returned is - the minimum slice pitch alignment that is supported for all possible values - of the missing argument(s). - -|==== --- --- - -== Interactions with Other Extensions - -None. - -== Conformance tests - - -. Test access from kernel - - For all image types - - For a few/all image formats - - For several values of row/slice pitch - - With or without a host_ptr - - Create buffer and fill with data - - Optionally create a sub-buffer with a randomly selected offset? - - Create an image from the buffer - - Read the image from a kernel and compare with values read using the buffer and direct addressing. They must match. - -. TODO Test access via read/write/map commands? - -. TODO Test copy to/from buffer? - -. TODO Test fill? - -. TODO Test copy to/from another image? - -. Test clGetImageInfo - - For all image types (one format per element size) - - For a few different row/pitch sizes (image dimensions being equal or not) - - Create an image from a buffer - - Check that the returned values for {CL_IMAGE_ROW_PITCH} and {CL_IMAGE_SLICE_PITCH} are correct. - -. Test clGetMemObjectInfo - - For all image types (1 format only) - - Create an image from a buffer - - Check that {CL_MEM_ASSOCIATED_MEMOBJECT} correctly returns the buffer that was used. - -. Negative testing for {clCreateImage} (alignment) - - For a few/all image formats - - For all image types - - Query row pitch, slice pitch and base image address alignment for the format - - Create an image from a buffer with invalid row pitch (not a multiple of required alignment) and check that {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} is returned. - - Create an image from a buffer with invalid slice pitch (not a multiple of required alignment) and check that {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} is returned. - - Create an image from a buffer with invalid base address alignment (not a multiple of required alignment) and check that {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} is returned. - -. Negative testing for {clCreateImage} (buffer size) - - For a few image formats (at least smallest and biggest element types) - - For all image types - - Create a buffer too small - - Check that image creation from that buffer is rejected with {CL_INVALID_IMAGE_SIZE} - -== Issues - -None. - -== Version History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|==== -| Version | Date | Author | Changes -| 1.0.0 | 2022-01-25 | Kevin Petit | *Initial EXT revision* -|==== - diff --git a/extensions/extensions.txt b/extensions/extensions.txt index 6414878d7..cf5c98519 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -36,8 +36,6 @@ Khronos{R} OpenCL Working Group <<< include::cl_ext_float_atomics.asciidoc[] <<< -include::cl_ext_image_from_buffer.asciidoc[] -<<< include::cl_ext_image_raw10_raw12.asciidoc[] // Vendor Extensions From 161fc79edaaf8afb7a4d63a0f5f2577b11afbb5b Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 28 Jan 2025 17:06:39 +0000 Subject: [PATCH 183/190] Refactor command-buffer queue compatability (#1292) * Refactor command-buffer queue compatability As proposed in https://github.com/KhronosGroup/OpenCL-Docs/issues/1142 the PR changes the semantics of the command-queues parameters used for command-buffer creation and enqueue. The queues used on command-buffer creation now only inform the device and dependencies of commands, rather than restricting the properties set on the queues used for command-buffer enqueue. This is based ontop on the change in https://github.com/KhronosGroup/OpenCL-Docs/pull/850 to add supported queue property semantics. * Address review feedback Clarify wording around default list of command-queues used for command-buffer enqueue. * Update XML version --- api/cl_khr_command_buffer.asciidoc | 38 ++++---- ...l_khr_command_buffer_multi_device.asciidoc | 5 +- api/opencl_platform_layer.asciidoc | 3 +- api/opencl_runtime_layer.asciidoc | 88 +++++++++++-------- xml/cl.xml | 4 +- 5 files changed, 78 insertions(+), 60 deletions(-) diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index 97d706e8a..6da252866 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -12,7 +12,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] === Other Extension Metadata *Last Modified Date*:: - 2024-10-02 + 2024-12-13 *IP Status*:: No known IP claims. *Contributors*:: @@ -43,11 +43,6 @@ Command-buffers enable a reduction in overhead when enqueuing the same workload multiple times. By separating the command-queue setup from dispatch, the ability to replay a set of previously created commands is introduced. -The command-queues a command-buffer will be executed on can be set on replay via -parameters to {clEnqueueCommandBufferKHR}, provided they are -<> with the command-queues used on command-buffer -recording. - ==== Background On embedded devices where building a command stream accounts for a significant @@ -74,7 +69,7 @@ or writes memory objects; or enqueues a native kernel, is not available for command-buffer recording. Finally commands recorded into a command buffer do not wait for or return event objects, these are instead replaced with device-side synchronization-point identifiers which enable out-of-order -execution when enqueued on <> command-queues. +execution of the command-buffer commands. Adding new entry-points for individual commands, rather than recording existing command-queue APIs with begin/end markers was a design decision made for the @@ -102,16 +97,22 @@ following reasons: ==== Command Synchronization -Device-side {cl_sync_point_khr_TYPE} synchronization-points can be used within -command-buffers to define command dependencies. This allows the commands of a -command-buffer to execute out-of-order on a single <> -command-queue. The command-buffer itself has no inherent in-order/out-of-order -property, this ordering is inferred from the command-queue used on command -recording. {clEnqueueCommandBufferKHR} submissions to an out-of-order queue -have the same execution semantics are other operations enqueued to an -out-of-order queue, such as {clEnqueueFillBuffer}, where execution between -enqueued operations may happen concurrently unless dependencies between the -operations are expressed with events. +The command-buffer object has no in-order/out-of-order property set on creation, +it is out-of-order, and command ordering is defined by the dependencies set when +commands are created. Command dependencies can be defined in 3 ways: + +1. Device-side {cl_sync_point_khr_TYPE} synchronization-points, providing an + explicit list of the commands to depend on. +2. Appending a {clCommandBarrierWithWaitListKHR} barrier command. +3. Passing an in-order queue when creating the command, creating an implicit + dependency on the previous command created in the command-buffer using + the same queue. + +{clEnqueueCommandBufferKHR} submissions to an out-of-order queue have the same +execution semantics as other operations enqueued to an out-of-order queue, +such as {clEnqueueFillBuffer}, where execution between enqueued operations may +happen concurrently unless dependencies between the operations are expressed +with events. The {cl_sync_point_khr_TYPE} type is defined as a `cl_uint`, giving a hard upper limit on the number of commands a command-buffer can hold as @@ -466,3 +467,6 @@ features: (provisional). * 0.9.6, 2024-10-02 ** Add device query for supported queue properties (provisional). + * 0.9.7, 2024-12-13 + ** Refactor queue compatability between command-buffer creation and enqueue + (provisional). diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc index 8a595a5b3..fa3f3047b 100644 --- a/api/cl_khr_command_buffer_multi_device.asciidoc +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer_multi_device.txt[] === Other Extension Metadata *Last Modified Date*:: - 2023-04-30 + 2024-12-13 *IP Status*:: No known IP claims. *Contributors*:: @@ -312,3 +312,6 @@ require it. * Revision 0.9.1, 2023-04-30 ** Added clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR as affected functions (provisional). + * Revision 0.9.2, 2024-12-13 + ** Update clRemapCommandBufferKHR behavior to match cl_khr_command_buffer + version 0.9.7 (provisional). diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 6377aca6b..97cec2d8c 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -240,8 +240,7 @@ include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR_anchor} - Platform supports the ability to create a deep copy of an existing - command-buffer with the commands explicitly remapped to different, - potentially <>, queues. + command-buffer with the commands explicitly remapped to different queues. include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR.asciidoc[] diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 92af20156..5a1defac0 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -14193,16 +14193,34 @@ of 0 or 1. The simultaneous use capability removes this restriction and allows command-buffers to have a <> greater than 1. -[[compatible]] Command-buffers are created using an ordered list of command-queues that -commands are recorded to and execute on by default. -These command-queues can be replaced on command-buffer enqueue with -different command-queues, provided for each element in the replacement list -the substitute command-queue is compatible with the command-queue used on -command-buffer creation. -A _compatible_ command-queue is defined as a command-queue with -identical properties targeting the same device and in the same OpenCL -context. +commands are recorded to and execute on by default. All these queue objects +must share the same context, but may be associated with different devices when +the {cl_khr_command_buffer_multi_device_EXT} extension is supported. + +When constructing a command-buffer by appending commands, the queue parameter +passed for the command being created is used to set the device with which the +command will be associated with, and also inform the scheduling of the command. +If the queue is an in-order queue, then an additional dependency is created on the +last command appended to the command-buffer using the same queue parameter. If +the queue is an out-of-order queue, then no extra dependencies on previous +commands using the same queue are created. All queue properties other than +{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} are ignored for the purposes of command +creation, with the exception of any vendor extension defined queue properties +that explicitly define semantics for this purpose. + +When enqueuing a command-buffer, a list of command-queues to execute the +command-buffer on can be passed by the user, otherwise the command-queues set +on command-buffer creation are used by default for execution. A user passed +list may contain different command-queues, provided for each element the +substitute command-queue matches the device and context of the command-queue +used on command-buffer creation. Each command-queue in the enqueue list must +also have the minimum properties defined by +{CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} and no properties +which are not reported by +{CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR}. These queue +properties have the same execution semantics for {clEnqueueCommandBufferKHR} +as other operations enqueued to the queue. While constructing a command-buffer it is valid for the user to interleave calls to the same queue which create commands, such as @@ -14266,7 +14284,7 @@ target the same device. Commands recorded to different command-queues in the same command-buffer may be executed concurrently to each other unless synchronized explicitly with -sync-points. +sync-points, barrier commands, or in-order queue implicit dependencies. Ordering of other commands submitted to the same command-queues as used to enqueue a command-buffer is the responsibility of the programmer. A command-buffer enqueue spanning multiple queues can return an event to use @@ -14467,12 +14485,6 @@ returned in _errcode_ret_: * {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a valid command-queue. - * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any command-queue - in _queues_ contains a property not specified by - {CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR}. - * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any - command-queue in _queues_ does not contain the minimum properties - specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. * {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have the same OpenCL context. * {CL_INVALID_VALUE} if the {cl_khr_command_buffer_multi_device_EXT} @@ -14605,10 +14617,10 @@ include::{generated}/api/protos/clEnqueueCommandBufferKHR.txt[] include::{generated}/api/version-notes/clEnqueueCommandBufferKHR.asciidoc[] * _num_queues_ is the number of command-queues listed in _queues_. - * _queues_ is a pointer to an ordered list of command-queues <> with the command-queues used on recording. - _queues_ can be `NULL`, in which case the default command-queues used on - command-buffer creation are used and _num_queues_ must be 0. + * _queues_ is a pointer to an ordered list of command-queues to execute the + command-buffer on. _queues_ can be `NULL`, in which case the default + command-queues used on command-buffer creation are used and _num_queues_ + must be 0. * _command_buffer_ refers to a valid command-buffer object. * _event_wait_list_, _num_events_in_wait_list_ specify events that need to complete before this particular command can be executed. @@ -14653,9 +14665,15 @@ execution was successfully queued, or one of the errors below: _num_queues_ set on _command_buffer_ creation. * {CL_INVALID_COMMAND_QUEUE} if any element of _queues_ is not a valid command-queue. - * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any element of _queues_ is not - <> with the command-queue set on - _command_buffer_ creation at the same list index. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any command-queue + in _queues_ contains a property not specified by + {CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any + command-queue in _queues_ does not contain the minimum properties + specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. + * {CL_INVALID_DEVICE} if any element of _queues_ does not have the same + device as the command-queue set on _command_buffer_ creation at the + same list index. * {CL_INVALID_CONTEXT} if any element of _queues_ does not have the same context as the command-queue set on _command_buffer_ creation at the same list index. @@ -16039,22 +16057,18 @@ ifdef::cl_khr_command_buffer_multi_device[] If the {cl_khr_command_buffer_multi_device_EXT} extension is supported, platforms reporting the {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} capability support generating a deep copy of a command-buffer with its -commands remapped to a list of command-queues that are potentially -<> with the queues used to create the -command-buffer. -That is, the remapped command-buffer can execute on queues that differ in -terms of properties and/or associated device from the original +commands remapped to different devices than the devices used to create the +commands. That is, the remapped command-buffer can execute on queues that +differ in terms of properties and/or associated device from the original command-buffer queues. This functionality is invoked through a new synchronous entry-point {clRemapCommandBufferKHR} which takes a list of queues to which the commands -should now target. -It then returns a command-buffer containing the same commands as the -original, with the same command dependencies, but targeting different -queues. -A list of command handles may also be passed to the entry-point, which -allows handles to the equivalent commands in the remapped command-buffer to -be returned by an output parameter. +should now target the associated devices of. It then returns a command-buffer +containing the same commands as the original, with the same command +dependencies, but targeting different devices. A list of command handles may +also be passed to the entry-point, which allows handles to the equivalent +commands in the remapped command-buffer to be returned by an output parameter. Device properties restrict remapping possibilities, as existing commands can have a configuration which is not supported by another device, and so @@ -16077,7 +16091,7 @@ appear and disappear during runtime. [open,refpage='clRemapCommandBufferKHR',desc='Create copy of a command-buffer remapped to specified command-queues',type='protos'] -- To create a deep copy of the input command-buffer with the copied commands -remapped to target the passed command-queues, call the function +remapped to target devices of the passed command-queues, call the function include::{generated}/api/protos/clRemapCommandBufferKHR.txt[] include::{generated}/api/version-notes/clRemapCommandBufferKHR.asciidoc[] @@ -16136,8 +16150,6 @@ one of the following error values returned in _errcode_ret_: * {CL_INVALID_OPERATION} if the platform does not support the {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} flag and _automatic_ is {CL_TRUE}. - * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if such an error would be returned - by passing _queues_ to {clCreateCommandBufferKHR}. * Any error relating to device support that can be returned by a command recording entry-point may also be returned. As a command in _command_buffer_ can have a configuration that is not diff --git a/xml/cl.xml b/xml/cl.xml index e147f9770..b01922db8 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -7191,7 +7191,7 @@ server's OpenCL/api-docs repository. - + @@ -7410,7 +7410,7 @@ server's OpenCL/api-docs repository. - + From 32f65e287baf5006bf57499395aae64014aca269 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 30 Jan 2025 10:38:32 -0800 Subject: [PATCH 184/190] update the spec source copyright dates to 2025 (#1301) --- CXX_for_OpenCL.txt | 2 +- Makefile | 2 +- OpenCL_API.txt | 2 +- OpenCL_C.txt | 2 +- OpenCL_Cxx.txt | 2 +- OpenCL_Env.txt | 2 +- OpenCL_Ext.txt | 2 +- OpenCL_ICD_Installation.txt | 2 +- OpenCL_LangExt.txt | 2 +- api/acknowledgements.asciidoc | 2 +- api/appendix_a.asciidoc | 2 +- api/appendix_b.asciidoc | 2 +- api/appendix_c.asciidoc | 2 +- api/appendix_d.asciidoc | 2 +- api/appendix_e.asciidoc | 2 +- api/appendix_extensions.asciidoc | 2 +- api/appendix_f.asciidoc | 2 +- api/appendix_g.asciidoc | 2 +- api/appendix_h.asciidoc | 2 +- api/cl_ext_cxx_for_opencl.asciidoc | 2 +- api/cl_ext_device_fission.asciidoc | 2 +- api/cl_ext_float_atomics.asciidoc | 2 +- api/cl_ext_image_raw10_raw12.asciidoc | 2 +- api/cl_ext_image_unorm_int_2_101010.asciidoc | 2 +- api/cl_ext_migrate_memobject.asciidoc | 2 +- api/cl_khr_3d_image_writes.asciidoc | 2 +- api/cl_khr_async_work_group_copy_fence.asciidoc | 2 +- api/cl_khr_byte_addressable_store.asciidoc | 2 +- api/cl_khr_command_buffer.asciidoc | 2 +- api/cl_khr_command_buffer_multi_device.asciidoc | 2 +- api/cl_khr_command_buffer_mutable_dispatch.asciidoc | 2 +- api/cl_khr_create_command_queue.asciidoc | 2 +- api/cl_khr_d3d10_sharing.asciidoc | 2 +- api/cl_khr_d3d11_sharing.asciidoc | 2 +- api/cl_khr_depth_images.asciidoc | 2 +- api/cl_khr_device_enqueue_local_arg_types.asciidoc | 2 +- api/cl_khr_device_uuid.asciidoc | 2 +- api/cl_khr_dx9_media_sharing.asciidoc | 2 +- api/cl_khr_egl_event.asciidoc | 2 +- api/cl_khr_egl_image.asciidoc | 2 +- api/cl_khr_expect_assume.asciidoc | 2 +- api/cl_khr_extended_async_copies.asciidoc | 2 +- api/cl_khr_extended_bit_ops.asciidoc | 2 +- api/cl_khr_extended_versioning.asciidoc | 2 +- api/cl_khr_external_memory.asciidoc | 2 +- api/cl_khr_external_memory_dma_buf.asciidoc | 2 +- api/cl_khr_external_memory_opaque_fd.asciidoc | 2 +- api/cl_khr_external_memory_win32.asciidoc | 2 +- api/cl_khr_external_semaphore.asciidoc | 2 +- api/cl_khr_external_semaphore_opaque_fd.asciidoc | 2 +- api/cl_khr_external_semaphore_sync_fd.asciidoc | 2 +- api/cl_khr_external_semaphore_win32.asciidoc | 2 +- api/cl_khr_fp16.asciidoc | 2 +- api/cl_khr_fp64.asciidoc | 2 +- api/cl_khr_gl_depth_images.asciidoc | 2 +- api/cl_khr_gl_event.asciidoc | 2 +- api/cl_khr_gl_msaa_sharing.asciidoc | 2 +- api/cl_khr_gl_sharing.asciidoc | 2 +- api/cl_khr_global_int32_base_atomics.asciidoc | 2 +- api/cl_khr_global_int32_extended_atomics.asciidoc | 2 +- api/cl_khr_icd.asciidoc | 2 +- api/cl_khr_il_program.asciidoc | 2 +- api/cl_khr_image2d_from_buffer.asciidoc | 2 +- api/cl_khr_initialize_memory.asciidoc | 2 +- api/cl_khr_int64_base_atomics.asciidoc | 2 +- api/cl_khr_int64_extended_atomics.asciidoc | 2 +- api/cl_khr_integer_dot_product.asciidoc | 2 +- api/cl_khr_kernel_clock.asciidoc | 2 +- api/cl_khr_local_int32_base_atomics.asciidoc | 2 +- api/cl_khr_local_int32_extended_atomics.asciidoc | 2 +- api/cl_khr_mipmap_image.asciidoc | 2 +- api/cl_khr_mipmap_image_writes.asciidoc | 2 +- api/cl_khr_pci_bus_info.asciidoc | 2 +- api/cl_khr_priority_hints.asciidoc | 2 +- api/cl_khr_select_fprounding_mode.asciidoc | 2 +- api/cl_khr_semaphore.asciidoc | 2 +- api/cl_khr_spir.asciidoc | 2 +- api/cl_khr_spirv_extended_debug_info.asciidoc | 2 +- api/cl_khr_spirv_linkonce_odr.asciidoc | 2 +- api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc | 2 +- api/cl_khr_srgb_image_writes.asciidoc | 2 +- api/cl_khr_subgroup_ballot.asciidoc | 2 +- api/cl_khr_subgroup_clustered_reduce.asciidoc | 2 +- api/cl_khr_subgroup_extended_types.asciidoc | 2 +- api/cl_khr_subgroup_named_barrier.asciidoc | 2 +- api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc | 2 +- api/cl_khr_subgroup_non_uniform_vote.asciidoc | 2 +- api/cl_khr_subgroup_rotate.asciidoc | 2 +- api/cl_khr_subgroup_shuffle.asciidoc | 2 +- api/cl_khr_subgroup_shuffle_relative.asciidoc | 2 +- api/cl_khr_subgroups.asciidoc | 2 +- api/cl_khr_suggested_local_work_size.asciidoc | 2 +- api/cl_khr_terminate_context.asciidoc | 2 +- api/cl_khr_throttle_hints.asciidoc | 2 +- api/cl_khr_work_group_uniform_arithmetic.asciidoc | 2 +- api/dictionary.asciidoc | 2 +- api/embedded_profile.asciidoc | 2 +- api/footnotes.asciidoc | 2 +- api/glossary.asciidoc | 2 +- api/introduction.asciidoc | 2 +- api/opencl_architecture.asciidoc | 2 +- api/opencl_assoc_spec.asciidoc | 2 +- api/opencl_platform_layer.asciidoc | 2 +- api/opencl_runtime_layer.asciidoc | 2 +- api/provisional_notice.asciidoc | 2 +- c/appendix_a.asciidoc | 2 +- c/dictionary.asciidoc | 2 +- c/feature-dictionary.asciidoc | 2 +- c/footnotes.asciidoc | 2 +- config/copyright-ccby.txt | 2 +- config/katex_replace.rb | 2 +- config/katex_replace/extension.rb | 2 +- config/opencl.asciidoc | 2 +- config/rouge_opencl.rb | 2 +- config/spec-macros.rb | 2 +- config/spec-macros/extension.rb | 2 +- config/version-full-links.asciidoc | 2 +- config/version-local-links.asciidoc | 2 +- copyrights-ccby.txt | 2 +- copyrights.txt | 2 +- cxx/acknowledgements.txt | 2 +- cxx/annotation.txt | 2 +- cxx/compiler_options.txt | 2 +- cxx/generic_type_name_notation.txt | 2 +- cxx/image_addressing_and_filtering.txt | 2 +- cxx/lang/address_spaces.txt | 2 +- cxx/lang/attribute_qualifiers.txt | 2 +- cxx/lang/builtin_data_types.txt | 2 +- cxx/lang/expressions.txt | 2 +- cxx/lang/implicit_type_conversions.txt | 2 +- cxx/lang/kernel_functions.txt | 2 +- cxx/lang/keywords.txt | 2 +- cxx/lang/lang.txt | 2 +- cxx/lang/preprocessor.txt | 2 +- cxx/lang/restrictions.txt | 2 +- cxx/numerical_compliance/edge_case_behavior.txt | 2 +- cxx/numerical_compliance/floating_point_exceptions.txt | 2 +- cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt | 2 +- cxx/numerical_compliance/numerical_compliance.txt | 2 +- cxx/numerical_compliance/relative_error_as_ulps.txt | 2 +- cxx/numerical_compliance/rounding_modes.txt | 2 +- cxx/stdlib/address_spaces.txt | 2 +- cxx/stdlib/array.txt | 2 +- cxx/stdlib/atomic_operations.txt | 2 +- cxx/stdlib/common.txt | 2 +- cxx/stdlib/conversions.txt | 2 +- cxx/stdlib/definitions.txt | 2 +- cxx/stdlib/device_enqueue.txt | 2 +- cxx/stdlib/general_utilities.txt | 2 +- cxx/stdlib/geometric.txt | 2 +- cxx/stdlib/half_wrapper.txt | 2 +- cxx/stdlib/images_and_samplers.txt | 2 +- cxx/stdlib/integer.txt | 2 +- cxx/stdlib/iterator.txt | 2 +- cxx/stdlib/limits.txt | 2 +- cxx/stdlib/marker_types.txt | 2 +- cxx/stdlib/math.txt | 2 +- cxx/stdlib/math_constants.txt | 2 +- cxx/stdlib/pipes.txt | 2 +- cxx/stdlib/printf.txt | 2 +- cxx/stdlib/range.txt | 2 +- cxx/stdlib/reinterpreting_data.txt | 2 +- cxx/stdlib/relational.txt | 2 +- cxx/stdlib/specialization_constants.txt | 2 +- cxx/stdlib/stdlib.txt | 2 +- cxx/stdlib/synchronization.txt | 2 +- cxx/stdlib/tuple.txt | 2 +- cxx/stdlib/type_traits.txt | 2 +- cxx/stdlib/vector_data_load_and_store.txt | 2 +- cxx/stdlib/vector_iterator.txt | 2 +- cxx/stdlib/vector_utilities.txt | 2 +- cxx/stdlib/vector_wrapper.txt | 2 +- cxx/stdlib/work_group.txt | 2 +- cxx/stdlib/work_item.txt | 2 +- cxx4opencl/acknowledgements.txt | 2 +- cxx4opencl/address_spaces.txt | 2 +- cxx4opencl/cxxcasts.txt | 2 +- cxx4opencl/diff2cxx.txt | 2 +- cxx4opencl/diff2openclc.txt | 2 +- cxx4opencl/intro.txt | 2 +- cxx4opencl/kernel.txt | 2 +- cxx4opencl/references.txt | 2 +- env/appendix_a.asciidoc | 2 +- env/common_properties.asciidoc | 2 +- env/dictionary.asciidoc | 2 +- env/extensions.asciidoc | 2 +- env/image_addressing_and_filtering.asciidoc | 2 +- env/introduction.asciidoc | 2 +- env/numerical_compliance.asciidoc | 2 +- env/references.asciidoc | 2 +- env/required_capabilities.asciidoc | 2 +- env/validation_rules.asciidoc | 2 +- ext/deprecated_extensions.asciidoc | 2 +- ext/dictionary.asciidoc | 2 +- ext/index.asciidoc | 2 +- ext/introduction.asciidoc | 2 +- ext/quick_reference.asciidoc | 2 +- ext/to_core_features.asciidoc | 2 +- extensions/cl_arm_controlled_kernel_termination.asciidoc | 2 +- extensions/cl_arm_printf.asciidoc | 2 +- extensions/cl_arm_protected_memory_allocation.asciidoc | 2 +- extensions/cl_arm_scheduling_controls.asciidoc | 2 +- extensions/cl_ext_float_atomics.asciidoc | 4 ++-- extensions/cl_ext_image_raw10_raw12.asciidoc | 2 +- extensions/cl_extension_template.asciidoc | 4 ++-- extensions/cl_img_bitwise_ops.asciidoc | 2 +- extensions/cl_img_cached_allocations.asciidoc | 2 +- extensions/cl_img_cancel_command.asciidoc | 2 +- extensions/cl_img_generate_mipmap.asciidoc | 2 +- extensions/cl_img_matrix_multiply.asciidoc | 2 +- extensions/cl_img_mem_properties.asciidoc | 2 +- extensions/cl_img_memory_management.asciidoc | 2 +- extensions/cl_img_swap_ops.asciidoc | 2 +- extensions/cl_img_use_gralloc_ptr.asciidoc | 2 +- extensions/cl_img_yuv_image.asciidoc | 2 +- extensions/cl_intel_bfloat16_conversions.asciidoc | 2 +- extensions/cl_intel_command_queue_families.asciidoc | 2 +- extensions/cl_intel_create_buffer_with_properties.asciidoc | 2 +- extensions/cl_intel_device_attribute_query.asciidoc | 2 +- extensions/cl_intel_mem_alloc_buffer_location.asciidoc | 2 +- extensions/cl_intel_mem_channel_property.asciidoc | 2 +- extensions/cl_intel_mem_force_host_memory.asciidoc | 2 +- extensions/cl_intel_packed_yuv.asciidoc | 2 +- extensions/cl_intel_planar_yuv.asciidoc | 2 +- extensions/cl_intel_program_scope_host_pipe.asciidoc | 2 +- extensions/cl_intel_required_subgroup_size.asciidoc | 2 +- extensions/cl_intel_sharing_format_query.asciidoc | 2 +- .../cl_intel_spirv_device_side_avc_motion_estimation.asciidoc | 2 +- extensions/cl_intel_spirv_media_block_io.asciidoc | 2 +- extensions/cl_intel_spirv_subgroups.asciidoc | 2 +- extensions/cl_intel_split_work_group_barrier.asciidoc | 2 +- extensions/cl_intel_subgroup_buffer_prefetch.asciidoc | 2 +- .../cl_intel_subgroup_matrix_multiply_accumulate.asciidoc | 2 +- ...l_intel_subgroup_split_matrix_multiply_accumulate.asciidoc | 2 +- extensions/cl_intel_subgroups.asciidoc | 2 +- extensions/cl_intel_subgroups_char.asciidoc | 2 +- extensions/cl_intel_subgroups_long.asciidoc | 2 +- extensions/cl_intel_subgroups_short.asciidoc | 2 +- extensions/cl_intel_unified_shared_memory.asciidoc | 2 +- extensions/cl_loader_info.asciidoc | 4 ++-- extensions/cl_loader_layers.asciidoc | 2 +- extensions/cl_pocl_content_size.asciidoc | 2 +- extensions/extensions.txt | 2 +- langext/acknowledgements.txt | 2 +- langext/intro.txt | 2 +- langext/variadic_macro.txt | 2 +- makeSpec | 2 +- man/static/EXTENSION.txt | 2 +- man/static/abstractDataTypes.txt | 2 +- man/static/clGetExtensionFunctionAddressForPlatform.txt | 2 +- man/static/convert_T.txt | 2 +- man/static/deadLinks.txt | 2 +- man/static/enums.txt | 2 +- man/static/footer.txt | 2 +- man/static/intro.txt | 2 +- scripts/apiconventions.py | 2 +- scripts/cgenerator.py | 2 +- scripts/checklinks.py | 2 +- scripts/clconventions.py | 2 +- scripts/docgenerator.py | 2 +- scripts/extdependency.py | 2 +- scripts/extensionmetadocgenerator.py | 2 +- scripts/find_adoc_deps | 2 +- scripts/genRef.py | 4 ++-- scripts/gen_dictionaries.py | 4 ++-- scripts/gen_dictionary_from_file.py | 2 +- scripts/gen_version_notes.py | 4 ++-- scripts/gencl.py | 4 ++-- scripts/generator.py | 2 +- scripts/parse_dependency.py | 2 +- scripts/pygenerator.py | 2 +- scripts/realign.py | 2 +- scripts/reflib.py | 2 +- scripts/reg.py | 2 +- scripts/runDocker | 2 +- scripts/scriptgenerator.py | 2 +- scripts/spec_tools/conventions.py | 2 +- scripts/spec_tools/util.py | 2 +- xml/Makefile | 2 +- xml/cl.xml | 2 +- xml/registry.rnc | 2 +- 281 files changed, 288 insertions(+), 288 deletions(-) diff --git a/CXX_for_OpenCL.txt b/CXX_for_OpenCL.txt index e182fd9ee..c1fcfdc88 100644 --- a/CXX_for_OpenCL.txt +++ b/CXX_for_OpenCL.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/Makefile b/Makefile index d08fcd600..c8723555f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # OpenCL Specifications Makefile diff --git a/OpenCL_API.txt b/OpenCL_API.txt index e7e67a578..656e24bfd 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group +// Copyright 2017-2025 The Khronos Group // SPDX-License-Identifier: CC-BY-4.0 // Extensions to enable diff --git a/OpenCL_C.txt b/OpenCL_C.txt index f3701bdb8..f593362ae 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. +// Copyright 2017-2025 The Khronos Group. // SPDX-License-Identifier: CC-BY-4.0 // Extensions to enable diff --git a/OpenCL_Cxx.txt b/OpenCL_Cxx.txt index bf0d71f76..d0987e6da 100644 --- a/OpenCL_Cxx.txt +++ b/OpenCL_Cxx.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_Env.txt b/OpenCL_Env.txt index d504cd504..9a8f2655b 100644 --- a/OpenCL_Env.txt +++ b/OpenCL_Env.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_Ext.txt b/OpenCL_Ext.txt index b84dddf87..212440916 100644 --- a/OpenCL_Ext.txt +++ b/OpenCL_Ext.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_ICD_Installation.txt b/OpenCL_ICD_Installation.txt index 072e706f4..4a0ee96d0 100644 --- a/OpenCL_ICD_Installation.txt +++ b/OpenCL_ICD_Installation.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/OpenCL_LangExt.txt b/OpenCL_LangExt.txt index acb91342b..4bebbc2b2 100644 --- a/OpenCL_LangExt.txt +++ b/OpenCL_LangExt.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/api/acknowledgements.asciidoc b/api/acknowledgements.asciidoc index 72fec4365..17cac2bd2 100644 --- a/api/acknowledgements.asciidoc +++ b/api/acknowledgements.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2016-2024 The Khronos Group Inc. +// Copyright 2016-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 = Acknowledgements diff --git a/api/appendix_a.asciidoc b/api/appendix_a.asciidoc index bef67d2e8..b2ed63c34 100644 --- a/api/appendix_a.asciidoc +++ b/api/appendix_a.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_b.asciidoc b/api/appendix_b.asciidoc index c5698515e..65efd040a 100644 --- a/api/appendix_b.asciidoc +++ b/api/appendix_b.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2016-2024 The Khronos Group Inc. +// Copyright 2016-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_c.asciidoc b/api/appendix_c.asciidoc index 6583f1c72..583cd355f 100644 --- a/api/appendix_c.asciidoc +++ b/api/appendix_c.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2016-2024 The Khronos Group Inc. +// Copyright 2016-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_d.asciidoc b/api/appendix_d.asciidoc index 7ae631b06..bc4a921f8 100644 --- a/api/appendix_d.asciidoc +++ b/api/appendix_d.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index d589edf60..a4a1aa5e7 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_extensions.asciidoc b/api/appendix_extensions.asciidoc index 0c2d4dc4e..c02aaa487 100644 --- a/api/appendix_extensions.asciidoc +++ b/api/appendix_extensions.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023-2024 The Khronos Group Inc. +// Copyright 2023-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_f.asciidoc b/api/appendix_f.asciidoc index b20d88295..40b4f71c3 100644 --- a/api/appendix_f.asciidoc +++ b/api/appendix_f.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_g.asciidoc b/api/appendix_g.asciidoc index 1c4703727..095cf6f99 100644 --- a/api/appendix_g.asciidoc +++ b/api/appendix_g.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group Inc. +// Copyright 2019-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [appendix] diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index d1cef63a1..b04d5cc94 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. [appendix] [[opencl-3.0-backwards-compatibility]] diff --git a/api/cl_ext_cxx_for_opencl.asciidoc b/api/cl_ext_cxx_for_opencl.asciidoc index 51ead70cb..3db37985f 100644 --- a/api/cl_ext_cxx_for_opencl.asciidoc +++ b/api/cl_ext_cxx_for_opencl.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_cxx_for_opencl.txt[] diff --git a/api/cl_ext_device_fission.asciidoc b/api/cl_ext_device_fission.asciidoc index 8b038ef68..8292e28b0 100644 --- a/api/cl_ext_device_fission.asciidoc +++ b/api/cl_ext_device_fission.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_device_fission.txt[] diff --git a/api/cl_ext_float_atomics.asciidoc b/api/cl_ext_float_atomics.asciidoc index 473feb40e..c3eca6693 100644 --- a/api/cl_ext_float_atomics.asciidoc +++ b/api/cl_ext_float_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_float_atomics.txt[] diff --git a/api/cl_ext_image_raw10_raw12.asciidoc b/api/cl_ext_image_raw10_raw12.asciidoc index d7d36b55d..68c29dc0a 100644 --- a/api/cl_ext_image_raw10_raw12.asciidoc +++ b/api/cl_ext_image_raw10_raw12.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_image_raw10_raw12.txt[] diff --git a/api/cl_ext_image_unorm_int_2_101010.asciidoc b/api/cl_ext_image_unorm_int_2_101010.asciidoc index 1320526df..7c09d0ff1 100644 --- a/api/cl_ext_image_unorm_int_2_101010.asciidoc +++ b/api/cl_ext_image_unorm_int_2_101010.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_image_unorm_int_2_101010.txt[] diff --git a/api/cl_ext_migrate_memobject.asciidoc b/api/cl_ext_migrate_memobject.asciidoc index b987f48f2..ad5df7af9 100644 --- a/api/cl_ext_migrate_memobject.asciidoc +++ b/api/cl_ext_migrate_memobject.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2024 The Khronos Group Inc. +// Copyright 2024-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_ext_migrate_memobject.txt[] diff --git a/api/cl_khr_3d_image_writes.asciidoc b/api/cl_khr_3d_image_writes.asciidoc index 8495fa692..525a6f65e 100644 --- a/api/cl_khr_3d_image_writes.asciidoc +++ b/api/cl_khr_3d_image_writes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_3d_image_writes.txt[] diff --git a/api/cl_khr_async_work_group_copy_fence.asciidoc b/api/cl_khr_async_work_group_copy_fence.asciidoc index 321cb1a90..15b3b65b6 100644 --- a/api/cl_khr_async_work_group_copy_fence.asciidoc +++ b/api/cl_khr_async_work_group_copy_fence.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_async_work_group_copy_fence.txt[] diff --git a/api/cl_khr_byte_addressable_store.asciidoc b/api/cl_khr_byte_addressable_store.asciidoc index 7637d79b7..3dcf0445a 100644 --- a/api/cl_khr_byte_addressable_store.asciidoc +++ b/api/cl_khr_byte_addressable_store.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_byte_addressable_store.txt[] diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc index 6da252866..a3cf1e83a 100644 --- a/api/cl_khr_command_buffer.asciidoc +++ b/api/cl_khr_command_buffer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc index fa3f3047b..43b3b351d 100644 --- a/api/cl_khr_command_buffer_multi_device.asciidoc +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_command_buffer_multi_device.txt[] diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index 51e2b8696..94efdc029 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[] diff --git a/api/cl_khr_create_command_queue.asciidoc b/api/cl_khr_create_command_queue.asciidoc index 7ac0a9ad6..9104c6a88 100644 --- a/api/cl_khr_create_command_queue.asciidoc +++ b/api/cl_khr_create_command_queue.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_create_command_queue.txt[] diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc index 32d9dc517..d89643feb 100644 --- a/api/cl_khr_d3d10_sharing.asciidoc +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc index d57b904f8..a3770dbd8 100644 --- a/api/cl_khr_d3d11_sharing.asciidoc +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] diff --git a/api/cl_khr_depth_images.asciidoc b/api/cl_khr_depth_images.asciidoc index 895202496..c08acbc6f 100644 --- a/api/cl_khr_depth_images.asciidoc +++ b/api/cl_khr_depth_images.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_depth_images.txt[] diff --git a/api/cl_khr_device_enqueue_local_arg_types.asciidoc b/api/cl_khr_device_enqueue_local_arg_types.asciidoc index f241a7d7a..d57b48254 100644 --- a/api/cl_khr_device_enqueue_local_arg_types.asciidoc +++ b/api/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_device_enqueue_local_arg_types.txt[] diff --git a/api/cl_khr_device_uuid.asciidoc b/api/cl_khr_device_uuid.asciidoc index 7b669c135..335de7ae8 100644 --- a/api/cl_khr_device_uuid.asciidoc +++ b/api/cl_khr_device_uuid.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_device_uuid.txt[] diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc index 455fd4359..832471002 100644 --- a/api/cl_khr_dx9_media_sharing.asciidoc +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_dx9_media_sharing.txt[] diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc index 463ca2633..32a66ecb7 100644 --- a/api/cl_khr_egl_event.asciidoc +++ b/api/cl_khr_egl_event.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_egl_event.txt[] diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index dcea8fd3e..8c6efe68c 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] diff --git a/api/cl_khr_expect_assume.asciidoc b/api/cl_khr_expect_assume.asciidoc index 704f87aab..08cb30a20 100644 --- a/api/cl_khr_expect_assume.asciidoc +++ b/api/cl_khr_expect_assume.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_expect_assume.txt[] diff --git a/api/cl_khr_extended_async_copies.asciidoc b/api/cl_khr_extended_async_copies.asciidoc index 3ac6bc321..7386f3560 100644 --- a/api/cl_khr_extended_async_copies.asciidoc +++ b/api/cl_khr_extended_async_copies.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_extended_async_copies.txt[] diff --git a/api/cl_khr_extended_bit_ops.asciidoc b/api/cl_khr_extended_bit_ops.asciidoc index 0bea6218f..66dab13b6 100644 --- a/api/cl_khr_extended_bit_ops.asciidoc +++ b/api/cl_khr_extended_bit_ops.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_extended_bit_ops.txt[] diff --git a/api/cl_khr_extended_versioning.asciidoc b/api/cl_khr_extended_versioning.asciidoc index 27f651b8b..495601342 100644 --- a/api/cl_khr_extended_versioning.asciidoc +++ b/api/cl_khr_extended_versioning.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group Inc. +// Copyright 2019-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_extended_versioning.txt[] diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index 9d28be739..1f3826326 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_memory.txt[] diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc index 193f1a712..fb6a0b364 100644 --- a/api/cl_khr_external_memory_dma_buf.asciidoc +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc index 92a3ab0ea..58bad791d 100644 --- a/api/cl_khr_external_memory_opaque_fd.asciidoc +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc index fb18ff470..6949d4b6b 100644 --- a/api/cl_khr_external_memory_win32.asciidoc +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc index d3b1c98b8..746ba78dd 100644 --- a/api/cl_khr_external_semaphore.asciidoc +++ b/api/cl_khr_external_semaphore.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] diff --git a/api/cl_khr_external_semaphore_opaque_fd.asciidoc b/api/cl_khr_external_semaphore_opaque_fd.asciidoc index 7e40df3ef..8b67dcfb2 100644 --- a/api/cl_khr_external_semaphore_opaque_fd.asciidoc +++ b/api/cl_khr_external_semaphore_opaque_fd.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_semaphore_opaque_fd.txt[] diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index 19162666e..87d9a8362 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_semaphore_sync_fd.txt[] diff --git a/api/cl_khr_external_semaphore_win32.asciidoc b/api/cl_khr_external_semaphore_win32.asciidoc index 543741d3c..ccca0e37e 100644 --- a/api/cl_khr_external_semaphore_win32.asciidoc +++ b/api/cl_khr_external_semaphore_win32.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_external_semaphore_win32.txt[] diff --git a/api/cl_khr_fp16.asciidoc b/api/cl_khr_fp16.asciidoc index aa6a2e801..7e1791ff2 100644 --- a/api/cl_khr_fp16.asciidoc +++ b/api/cl_khr_fp16.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 //@ TODO There are API elements (defines) to this, as well as OpenCL C diff --git a/api/cl_khr_fp64.asciidoc b/api/cl_khr_fp64.asciidoc index e56a03f1c..6df2b44ef 100644 --- a/api/cl_khr_fp64.asciidoc +++ b/api/cl_khr_fp64.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 //@ TODO There are API elements (defines) to this, like DBL_RADIX, as well as OpenCL C diff --git a/api/cl_khr_gl_depth_images.asciidoc b/api/cl_khr_gl_depth_images.asciidoc index 6bd403900..a55b3ee75 100644 --- a/api/cl_khr_gl_depth_images.asciidoc +++ b/api/cl_khr_gl_depth_images.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_gl_depth_images.txt[] diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc index 64e13ef66..e5384e44d 100644 --- a/api/cl_khr_gl_event.asciidoc +++ b/api/cl_khr_gl_event.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_gl_event.txt[] diff --git a/api/cl_khr_gl_msaa_sharing.asciidoc b/api/cl_khr_gl_msaa_sharing.asciidoc index 20042fe9f..94639cc09 100644 --- a/api/cl_khr_gl_msaa_sharing.asciidoc +++ b/api/cl_khr_gl_msaa_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_gl_msaa_sharing.txt[] diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc index 124f6d770..502afef83 100644 --- a/api/cl_khr_gl_sharing.asciidoc +++ b/api/cl_khr_gl_sharing.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_gl_sharing.txt[] diff --git a/api/cl_khr_global_int32_base_atomics.asciidoc b/api/cl_khr_global_int32_base_atomics.asciidoc index 0451eaa52..19bdc4a19 100644 --- a/api/cl_khr_global_int32_base_atomics.asciidoc +++ b/api/cl_khr_global_int32_base_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_global_int32_base_atomics.txt[] diff --git a/api/cl_khr_global_int32_extended_atomics.asciidoc b/api/cl_khr_global_int32_extended_atomics.asciidoc index 0733e27fd..f399ff4e3 100644 --- a/api/cl_khr_global_int32_extended_atomics.asciidoc +++ b/api/cl_khr_global_int32_extended_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_global_int32_extended_atomics.txt[] diff --git a/api/cl_khr_icd.asciidoc b/api/cl_khr_icd.asciidoc index 39d4125f3..a856cbf59 100644 --- a/api/cl_khr_icd.asciidoc +++ b/api/cl_khr_icd.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 //@ TODO This should probably be in an appendix? It is a "platform diff --git a/api/cl_khr_il_program.asciidoc b/api/cl_khr_il_program.asciidoc index ba99d2c9d..e06945bac 100644 --- a/api/cl_khr_il_program.asciidoc +++ b/api/cl_khr_il_program.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_il_program.txt[] diff --git a/api/cl_khr_image2d_from_buffer.asciidoc b/api/cl_khr_image2d_from_buffer.asciidoc index a698ce04e..98e819cdf 100644 --- a/api/cl_khr_image2d_from_buffer.asciidoc +++ b/api/cl_khr_image2d_from_buffer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_image2d_from_buffer.txt[] diff --git a/api/cl_khr_initialize_memory.asciidoc b/api/cl_khr_initialize_memory.asciidoc index ac0310242..5c7774b49 100644 --- a/api/cl_khr_initialize_memory.asciidoc +++ b/api/cl_khr_initialize_memory.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_initialize_memory.txt[] diff --git a/api/cl_khr_int64_base_atomics.asciidoc b/api/cl_khr_int64_base_atomics.asciidoc index 6026c1624..f46185729 100644 --- a/api/cl_khr_int64_base_atomics.asciidoc +++ b/api/cl_khr_int64_base_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_int64_base_atomics.txt[] diff --git a/api/cl_khr_int64_extended_atomics.asciidoc b/api/cl_khr_int64_extended_atomics.asciidoc index 6eeedca0c..85f5b7824 100644 --- a/api/cl_khr_int64_extended_atomics.asciidoc +++ b/api/cl_khr_int64_extended_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_int64_extended_atomics.txt[] diff --git a/api/cl_khr_integer_dot_product.asciidoc b/api/cl_khr_integer_dot_product.asciidoc index 257401f9d..14990f6a5 100644 --- a/api/cl_khr_integer_dot_product.asciidoc +++ b/api/cl_khr_integer_dot_product.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_integer_dot_product.txt[] diff --git a/api/cl_khr_kernel_clock.asciidoc b/api/cl_khr_kernel_clock.asciidoc index 7f4c4a0de..dfefb9376 100644 --- a/api/cl_khr_kernel_clock.asciidoc +++ b/api/cl_khr_kernel_clock.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2024 The Khronos Group Inc. +// Copyright 2024-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_kernel_clock.txt[] diff --git a/api/cl_khr_local_int32_base_atomics.asciidoc b/api/cl_khr_local_int32_base_atomics.asciidoc index cdffd332a..574149f55 100644 --- a/api/cl_khr_local_int32_base_atomics.asciidoc +++ b/api/cl_khr_local_int32_base_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_local_int32_base_atomics.txt[] diff --git a/api/cl_khr_local_int32_extended_atomics.asciidoc b/api/cl_khr_local_int32_extended_atomics.asciidoc index e78b7a872..d43c9f614 100644 --- a/api/cl_khr_local_int32_extended_atomics.asciidoc +++ b/api/cl_khr_local_int32_extended_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_local_int32_extended_atomics.txt[] diff --git a/api/cl_khr_mipmap_image.asciidoc b/api/cl_khr_mipmap_image.asciidoc index 203694a3f..e1e9d66db 100644 --- a/api/cl_khr_mipmap_image.asciidoc +++ b/api/cl_khr_mipmap_image.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_mipmap_image.txt[] diff --git a/api/cl_khr_mipmap_image_writes.asciidoc b/api/cl_khr_mipmap_image_writes.asciidoc index 194e0c977..ca54a2361 100644 --- a/api/cl_khr_mipmap_image_writes.asciidoc +++ b/api/cl_khr_mipmap_image_writes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_mipmap_image_writes.txt[] diff --git a/api/cl_khr_pci_bus_info.asciidoc b/api/cl_khr_pci_bus_info.asciidoc index 0279a5191..5dfc199b8 100644 --- a/api/cl_khr_pci_bus_info.asciidoc +++ b/api/cl_khr_pci_bus_info.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_pci_bus_info.txt[] diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc index 988498b53..89ff77acf 100644 --- a/api/cl_khr_priority_hints.asciidoc +++ b/api/cl_khr_priority_hints.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] diff --git a/api/cl_khr_select_fprounding_mode.asciidoc b/api/cl_khr_select_fprounding_mode.asciidoc index 1d9951826..5ba504f1d 100644 --- a/api/cl_khr_select_fprounding_mode.asciidoc +++ b/api/cl_khr_select_fprounding_mode.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_select_fprounding_mode.txt[] diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc index 40afe4c12..379ec7eed 100644 --- a/api/cl_khr_semaphore.asciidoc +++ b/api/cl_khr_semaphore.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_semaphore.txt[] diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc index 16573d170..ff1fad6df 100644 --- a/api/cl_khr_spir.asciidoc +++ b/api/cl_khr_spir.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_spir.txt[] diff --git a/api/cl_khr_spirv_extended_debug_info.asciidoc b/api/cl_khr_spirv_extended_debug_info.asciidoc index d63208501..7c042ae7f 100644 --- a/api/cl_khr_spirv_extended_debug_info.asciidoc +++ b/api/cl_khr_spirv_extended_debug_info.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_spirv_extended_debug_info.txt[] diff --git a/api/cl_khr_spirv_linkonce_odr.asciidoc b/api/cl_khr_spirv_linkonce_odr.asciidoc index 887b5e74a..24e63f3f5 100644 --- a/api/cl_khr_spirv_linkonce_odr.asciidoc +++ b/api/cl_khr_spirv_linkonce_odr.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_spirv_linkonce_odr.txt[] diff --git a/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc b/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc index 2f0ca0122..0cab5496c 100644 --- a/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc +++ b/api/cl_khr_spirv_no_integer_wrap_decoration.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_spirv_no_integer_wrap_decoration.txt[] diff --git a/api/cl_khr_srgb_image_writes.asciidoc b/api/cl_khr_srgb_image_writes.asciidoc index 79c3ea16d..05c76b186 100644 --- a/api/cl_khr_srgb_image_writes.asciidoc +++ b/api/cl_khr_srgb_image_writes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_srgb_image_writes.txt[] diff --git a/api/cl_khr_subgroup_ballot.asciidoc b/api/cl_khr_subgroup_ballot.asciidoc index ae17ced98..90cd9d843 100644 --- a/api/cl_khr_subgroup_ballot.asciidoc +++ b/api/cl_khr_subgroup_ballot.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_ballot.txt[] diff --git a/api/cl_khr_subgroup_clustered_reduce.asciidoc b/api/cl_khr_subgroup_clustered_reduce.asciidoc index 9e6b7a078..15f1dd083 100644 --- a/api/cl_khr_subgroup_clustered_reduce.asciidoc +++ b/api/cl_khr_subgroup_clustered_reduce.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_clustered_reduce.txt[] diff --git a/api/cl_khr_subgroup_extended_types.asciidoc b/api/cl_khr_subgroup_extended_types.asciidoc index 3f73839af..542fcf43f 100644 --- a/api/cl_khr_subgroup_extended_types.asciidoc +++ b/api/cl_khr_subgroup_extended_types.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_extended_types.txt[] diff --git a/api/cl_khr_subgroup_named_barrier.asciidoc b/api/cl_khr_subgroup_named_barrier.asciidoc index d8f8da0e0..bf8c2c5bc 100644 --- a/api/cl_khr_subgroup_named_barrier.asciidoc +++ b/api/cl_khr_subgroup_named_barrier.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_named_barrier.txt[] diff --git a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc index cc0657cdb..9ef7546ea 100644 --- a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc +++ b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_arithmetic.txt[] diff --git a/api/cl_khr_subgroup_non_uniform_vote.asciidoc b/api/cl_khr_subgroup_non_uniform_vote.asciidoc index 5803cf26b..ec48ae041 100644 --- a/api/cl_khr_subgroup_non_uniform_vote.asciidoc +++ b/api/cl_khr_subgroup_non_uniform_vote.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_vote.txt[] diff --git a/api/cl_khr_subgroup_rotate.asciidoc b/api/cl_khr_subgroup_rotate.asciidoc index afddda58f..d8254ff80 100644 --- a/api/cl_khr_subgroup_rotate.asciidoc +++ b/api/cl_khr_subgroup_rotate.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2022-2024 The Khronos Group Inc. +// Copyright 2022-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_rotate.txt[] diff --git a/api/cl_khr_subgroup_shuffle.asciidoc b/api/cl_khr_subgroup_shuffle.asciidoc index 12d4f6774..a2dcc0b9e 100644 --- a/api/cl_khr_subgroup_shuffle.asciidoc +++ b/api/cl_khr_subgroup_shuffle.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle.txt[] diff --git a/api/cl_khr_subgroup_shuffle_relative.asciidoc b/api/cl_khr_subgroup_shuffle_relative.asciidoc index f297be5a9..6db6699af 100644 --- a/api/cl_khr_subgroup_shuffle_relative.asciidoc +++ b/api/cl_khr_subgroup_shuffle_relative.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2020-2024 The Khronos Group Inc. +// Copyright 2020-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle_relative.txt[] diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc index 56c3191c1..b15155754 100644 --- a/api/cl_khr_subgroups.asciidoc +++ b/api/cl_khr_subgroups.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_subgroups.txt[] diff --git a/api/cl_khr_suggested_local_work_size.asciidoc b/api/cl_khr_suggested_local_work_size.asciidoc index b1f46baab..56e55ff2d 100644 --- a/api/cl_khr_suggested_local_work_size.asciidoc +++ b/api/cl_khr_suggested_local_work_size.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group Inc. +// Copyright 2018-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_suggested_local_work_size.txt[] diff --git a/api/cl_khr_terminate_context.asciidoc b/api/cl_khr_terminate_context.asciidoc index 2bc9de8b0..2456bf720 100644 --- a/api/cl_khr_terminate_context.asciidoc +++ b/api/cl_khr_terminate_context.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_terminate_context.txt[] diff --git a/api/cl_khr_throttle_hints.asciidoc b/api/cl_khr_throttle_hints.asciidoc index 4155f1edf..1aaae7c88 100644 --- a/api/cl_khr_throttle_hints.asciidoc +++ b/api/cl_khr_throttle_hints.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_throttle_hints.txt[] diff --git a/api/cl_khr_work_group_uniform_arithmetic.asciidoc b/api/cl_khr_work_group_uniform_arithmetic.asciidoc index 6ff2252fd..1f505ef32 100644 --- a/api/cl_khr_work_group_uniform_arithmetic.asciidoc +++ b/api/cl_khr_work_group_uniform_arithmetic.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2022-2024 The Khronos Group Inc. +// Copyright 2022-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/meta/{refprefix}cl_khr_work_group_uniform_arithmetic.txt[] diff --git a/api/dictionary.asciidoc b/api/dictionary.asciidoc index a714d04b5..f0d7e07bf 100644 --- a/api/dictionary.asciidoc +++ b/api/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 include::{generated}/api/api-dictionary.asciidoc[] diff --git a/api/embedded_profile.asciidoc b/api/embedded_profile.asciidoc index 8f841a1e2..1af5fda60 100644 --- a/api/embedded_profile.asciidoc +++ b/api/embedded_profile.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [[opencl-embedded-profile]] diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index 15fe29a34..2655cab44 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 // Please keep footnotes in alphabetical order! diff --git a/api/glossary.asciidoc b/api/glossary.asciidoc index 4794c760a..109c9aac3 100644 --- a/api/glossary.asciidoc +++ b/api/glossary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 // [glossary] // MK:Don't enable [glossary] - prevents chapter numbering. diff --git a/api/introduction.asciidoc b/api/introduction.asciidoc index 8b134ea26..867c70081 100644 --- a/api/introduction.asciidoc +++ b/api/introduction.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 = Introduction diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 38d33377f..7ebdf7a18 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 = The OpenCL Architecture diff --git a/api/opencl_assoc_spec.asciidoc b/api/opencl_assoc_spec.asciidoc index 08627472c..c8ca57200 100644 --- a/api/opencl_assoc_spec.asciidoc +++ b/api/opencl_assoc_spec.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 = Associated OpenCL specification diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 97cec2d8c..721869d3b 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [[opencl-platform-layer]] diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 5a1defac0..c1331ffc1 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group Inc. +// Copyright 2017-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [[opencl-runtime]] diff --git a/api/provisional_notice.asciidoc b/api/provisional_notice.asciidoc index 7f0720c55..e2549b74c 100644 --- a/api/provisional_notice.asciidoc +++ b/api/provisional_notice.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023-2024 The Khronos Group Inc. +// Copyright 2023-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 [NOTE] diff --git a/c/appendix_a.asciidoc b/c/appendix_a.asciidoc index 6838a5543..c6c5555f7 100644 --- a/c/appendix_a.asciidoc +++ b/c/appendix_a.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/c/dictionary.asciidoc b/c/dictionary.asciidoc index bde39ebc1..44ded0f41 100644 --- a/c/dictionary.asciidoc +++ b/c/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index 6e558f24c..27a109f5a 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index fc85efc76..fb908c4c9 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/config/copyright-ccby.txt b/config/copyright-ccby.txt index 7a63dbaa6..3ef582f41 100644 --- a/config/copyright-ccby.txt +++ b/config/copyright-ccby.txt @@ -1,3 +1,3 @@ -Copyright 2014-2024 The Khronos Group Inc. +Copyright 2014-2025 The Khronos Group Inc. SPDX-License-Identifier: CC-BY-4.0 diff --git a/config/katex_replace.rb b/config/katex_replace.rb index 12465a460..d5ff4d87a 100644 --- a/config/katex_replace.rb +++ b/config/katex_replace.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2024 The Khronos Group Inc. +# Copyright (c) 2016-2025 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/katex_replace/extension.rb b/config/katex_replace/extension.rb index 24f0e95d6..67ac5112e 100644 --- a/config/katex_replace/extension.rb +++ b/config/katex_replace/extension.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2024 The Khronos Group Inc. +# Copyright (c) 2016-2025 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/opencl.asciidoc b/config/opencl.asciidoc index 7839cc441..dddeb70bf 100644 --- a/config/opencl.asciidoc +++ b/config/opencl.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/config/rouge_opencl.rb b/config/rouge_opencl.rb index d30913eb0..ba338ef34 100644 --- a/config/rouge_opencl.rb +++ b/config/rouge_opencl.rb @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true -# Copyright 2011-2024 The Khronos Group Inc. +# Copyright 2011-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 #puts "Loading rouge_opencl extensions for source code highlighting..." diff --git a/config/spec-macros.rb b/config/spec-macros.rb index 5fc043436..dde8cd250 100644 --- a/config/spec-macros.rb +++ b/config/spec-macros.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2024 The Khronos Group Inc. +# Copyright (c) 2016-2025 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/spec-macros/extension.rb b/config/spec-macros/extension.rb index cce81f9a9..0992ba19b 100644 --- a/config/spec-macros/extension.rb +++ b/config/spec-macros/extension.rb @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2024 The Khronos Group Inc. +# Copyright (c) 2016-2025 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/version-full-links.asciidoc b/config/version-full-links.asciidoc index f7e506950..80e6342c4 100644 --- a/config/version-full-links.asciidoc +++ b/config/version-full-links.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023-2024 The Khronos Group. This work is licensed under a +// Copyright 2023-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/config/version-local-links.asciidoc b/config/version-local-links.asciidoc index 7ce8377e4..a846b2634 100644 --- a/config/version-local-links.asciidoc +++ b/config/version-local-links.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2023-2024 The Khronos Group. This work is licensed under a +// Copyright 2023-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/copyrights-ccby.txt b/copyrights-ccby.txt index d6b85b3e5..e122ac610 100644 --- a/copyrights-ccby.txt +++ b/copyrights-ccby.txt @@ -1,4 +1,4 @@ -Copyright 2019-2024 The Khronos Group. +Copyright 2019-2025 The Khronos Group. Khronos licenses this file to you under the Creative Commons Attribution 4.0 International (CC BY 4.0) License (the "License"); you may not use this file diff --git a/copyrights.txt b/copyrights.txt index 11333319c..e071a89cd 100644 --- a/copyrights.txt +++ b/copyrights.txt @@ -1,4 +1,4 @@ -Copyright 2008-2024 The Khronos Group Inc. +Copyright 2008-2025 The Khronos Group Inc. This Specification is protected by copyright laws and contains material proprietary to Khronos. Except as described by these terms, it or any components may not be reproduced, republished, diff --git a/cxx/acknowledgements.txt b/cxx/acknowledgements.txt index e00801c8c..f994e5ddd 100644 --- a/cxx/acknowledgements.txt +++ b/cxx/acknowledgements.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/annotation.txt b/cxx/annotation.txt index d3f95b606..75878c61f 100644 --- a/cxx/annotation.txt +++ b/cxx/annotation.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/compiler_options.txt b/cxx/compiler_options.txt index 5f601f00c..7c821edbd 100644 --- a/cxx/compiler_options.txt +++ b/cxx/compiler_options.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/generic_type_name_notation.txt b/cxx/generic_type_name_notation.txt index 578c677ae..eb2bfb4d9 100644 --- a/cxx/generic_type_name_notation.txt +++ b/cxx/generic_type_name_notation.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/image_addressing_and_filtering.txt b/cxx/image_addressing_and_filtering.txt index 7b27f3d04..e9b5c574a 100644 --- a/cxx/image_addressing_and_filtering.txt +++ b/cxx/image_addressing_and_filtering.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/address_spaces.txt b/cxx/lang/address_spaces.txt index dbaab05ef..a583d0c65 100644 --- a/cxx/lang/address_spaces.txt +++ b/cxx/lang/address_spaces.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/attribute_qualifiers.txt b/cxx/lang/attribute_qualifiers.txt index 0eb344836..d993cfc30 100644 --- a/cxx/lang/attribute_qualifiers.txt +++ b/cxx/lang/attribute_qualifiers.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/builtin_data_types.txt b/cxx/lang/builtin_data_types.txt index 7925ac28d..7251fb47e 100644 --- a/cxx/lang/builtin_data_types.txt +++ b/cxx/lang/builtin_data_types.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/expressions.txt b/cxx/lang/expressions.txt index 10dfde9f0..d25dd9a95 100644 --- a/cxx/lang/expressions.txt +++ b/cxx/lang/expressions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/implicit_type_conversions.txt b/cxx/lang/implicit_type_conversions.txt index 695cdfb8b..464cb4135 100644 --- a/cxx/lang/implicit_type_conversions.txt +++ b/cxx/lang/implicit_type_conversions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/kernel_functions.txt b/cxx/lang/kernel_functions.txt index 9efb5251b..389a955e8 100644 --- a/cxx/lang/kernel_functions.txt +++ b/cxx/lang/kernel_functions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/keywords.txt b/cxx/lang/keywords.txt index 5381756e6..1891c1ebf 100644 --- a/cxx/lang/keywords.txt +++ b/cxx/lang/keywords.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/lang.txt b/cxx/lang/lang.txt index 78360fa63..6f503f599 100644 --- a/cxx/lang/lang.txt +++ b/cxx/lang/lang.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/preprocessor.txt b/cxx/lang/preprocessor.txt index 95880a19b..b0334db30 100644 --- a/cxx/lang/preprocessor.txt +++ b/cxx/lang/preprocessor.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/lang/restrictions.txt b/cxx/lang/restrictions.txt index cb212b30c..b5788a604 100644 --- a/cxx/lang/restrictions.txt +++ b/cxx/lang/restrictions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/edge_case_behavior.txt b/cxx/numerical_compliance/edge_case_behavior.txt index 814e112b8..d2a4efece 100644 --- a/cxx/numerical_compliance/edge_case_behavior.txt +++ b/cxx/numerical_compliance/edge_case_behavior.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/floating_point_exceptions.txt b/cxx/numerical_compliance/floating_point_exceptions.txt index f2a043c0c..bb56d7677 100644 --- a/cxx/numerical_compliance/floating_point_exceptions.txt +++ b/cxx/numerical_compliance/floating_point_exceptions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt b/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt index 08d11b047..0afdca6b5 100644 --- a/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt +++ b/cxx/numerical_compliance/inf_nan_and_denormalized_numbers.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/numerical_compliance.txt b/cxx/numerical_compliance/numerical_compliance.txt index d2606a35d..84a9213b1 100644 --- a/cxx/numerical_compliance/numerical_compliance.txt +++ b/cxx/numerical_compliance/numerical_compliance.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/relative_error_as_ulps.txt b/cxx/numerical_compliance/relative_error_as_ulps.txt index 28c4aaf83..4e309a446 100644 --- a/cxx/numerical_compliance/relative_error_as_ulps.txt +++ b/cxx/numerical_compliance/relative_error_as_ulps.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/numerical_compliance/rounding_modes.txt b/cxx/numerical_compliance/rounding_modes.txt index f607a6774..ed25b51b5 100644 --- a/cxx/numerical_compliance/rounding_modes.txt +++ b/cxx/numerical_compliance/rounding_modes.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/address_spaces.txt b/cxx/stdlib/address_spaces.txt index 83688504e..00bb118b9 100644 --- a/cxx/stdlib/address_spaces.txt +++ b/cxx/stdlib/address_spaces.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/array.txt b/cxx/stdlib/array.txt index 2483fa04e..2b00943df 100644 --- a/cxx/stdlib/array.txt +++ b/cxx/stdlib/array.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/atomic_operations.txt b/cxx/stdlib/atomic_operations.txt index 5d326c8b8..c6f33f556 100644 --- a/cxx/stdlib/atomic_operations.txt +++ b/cxx/stdlib/atomic_operations.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/common.txt b/cxx/stdlib/common.txt index c0b932bff..4a74fa72d 100644 --- a/cxx/stdlib/common.txt +++ b/cxx/stdlib/common.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/conversions.txt b/cxx/stdlib/conversions.txt index 032690e0e..268a89df7 100644 --- a/cxx/stdlib/conversions.txt +++ b/cxx/stdlib/conversions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/definitions.txt b/cxx/stdlib/definitions.txt index f167cb110..899d09cdc 100644 --- a/cxx/stdlib/definitions.txt +++ b/cxx/stdlib/definitions.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/device_enqueue.txt b/cxx/stdlib/device_enqueue.txt index e3dd94167..2c6f7ba3f 100644 --- a/cxx/stdlib/device_enqueue.txt +++ b/cxx/stdlib/device_enqueue.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/general_utilities.txt b/cxx/stdlib/general_utilities.txt index bc6bf3c8d..3587135dc 100644 --- a/cxx/stdlib/general_utilities.txt +++ b/cxx/stdlib/general_utilities.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/geometric.txt b/cxx/stdlib/geometric.txt index 18ed5108d..ace0fd824 100644 --- a/cxx/stdlib/geometric.txt +++ b/cxx/stdlib/geometric.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/half_wrapper.txt b/cxx/stdlib/half_wrapper.txt index 0ad1e5ec6..96b35f200 100644 --- a/cxx/stdlib/half_wrapper.txt +++ b/cxx/stdlib/half_wrapper.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/images_and_samplers.txt b/cxx/stdlib/images_and_samplers.txt index 601ec04c1..3f78edfd1 100644 --- a/cxx/stdlib/images_and_samplers.txt +++ b/cxx/stdlib/images_and_samplers.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/integer.txt b/cxx/stdlib/integer.txt index fff5e91f3..dab02a026 100644 --- a/cxx/stdlib/integer.txt +++ b/cxx/stdlib/integer.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/iterator.txt b/cxx/stdlib/iterator.txt index b2592b5cb..f8e0eb9c1 100644 --- a/cxx/stdlib/iterator.txt +++ b/cxx/stdlib/iterator.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/limits.txt b/cxx/stdlib/limits.txt index 60bdad986..332b93197 100644 --- a/cxx/stdlib/limits.txt +++ b/cxx/stdlib/limits.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/marker_types.txt b/cxx/stdlib/marker_types.txt index fb09da812..1f8c1e4e2 100644 --- a/cxx/stdlib/marker_types.txt +++ b/cxx/stdlib/marker_types.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/math.txt b/cxx/stdlib/math.txt index 6cce28211..cd8af5850 100644 --- a/cxx/stdlib/math.txt +++ b/cxx/stdlib/math.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/math_constants.txt b/cxx/stdlib/math_constants.txt index 63c01f976..64d222e4f 100644 --- a/cxx/stdlib/math_constants.txt +++ b/cxx/stdlib/math_constants.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/pipes.txt b/cxx/stdlib/pipes.txt index 9ec6d157b..c526fd856 100644 --- a/cxx/stdlib/pipes.txt +++ b/cxx/stdlib/pipes.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/printf.txt b/cxx/stdlib/printf.txt index a4479dc25..052ce5382 100644 --- a/cxx/stdlib/printf.txt +++ b/cxx/stdlib/printf.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/range.txt b/cxx/stdlib/range.txt index d2172d2bc..ad877efbe 100644 --- a/cxx/stdlib/range.txt +++ b/cxx/stdlib/range.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/reinterpreting_data.txt b/cxx/stdlib/reinterpreting_data.txt index 6fd5b96c9..77748cf75 100644 --- a/cxx/stdlib/reinterpreting_data.txt +++ b/cxx/stdlib/reinterpreting_data.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/relational.txt b/cxx/stdlib/relational.txt index 80d0fd596..5ee368d20 100644 --- a/cxx/stdlib/relational.txt +++ b/cxx/stdlib/relational.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/specialization_constants.txt b/cxx/stdlib/specialization_constants.txt index f5a24ab54..fb731d69c 100644 --- a/cxx/stdlib/specialization_constants.txt +++ b/cxx/stdlib/specialization_constants.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/stdlib.txt b/cxx/stdlib/stdlib.txt index 1b61c1c36..72201e595 100644 --- a/cxx/stdlib/stdlib.txt +++ b/cxx/stdlib/stdlib.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/synchronization.txt b/cxx/stdlib/synchronization.txt index 08e433e8d..8f1fbe18c 100644 --- a/cxx/stdlib/synchronization.txt +++ b/cxx/stdlib/synchronization.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/tuple.txt b/cxx/stdlib/tuple.txt index 6e492a286..ce7e3dcdc 100644 --- a/cxx/stdlib/tuple.txt +++ b/cxx/stdlib/tuple.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/type_traits.txt b/cxx/stdlib/type_traits.txt index ea2e6b7af..15360eb25 100644 --- a/cxx/stdlib/type_traits.txt +++ b/cxx/stdlib/type_traits.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_data_load_and_store.txt b/cxx/stdlib/vector_data_load_and_store.txt index eb5cf60f6..b18095ecb 100644 --- a/cxx/stdlib/vector_data_load_and_store.txt +++ b/cxx/stdlib/vector_data_load_and_store.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_iterator.txt b/cxx/stdlib/vector_iterator.txt index 710a243fe..a604c284b 100644 --- a/cxx/stdlib/vector_iterator.txt +++ b/cxx/stdlib/vector_iterator.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_utilities.txt b/cxx/stdlib/vector_utilities.txt index 87c2979b4..963f11a11 100644 --- a/cxx/stdlib/vector_utilities.txt +++ b/cxx/stdlib/vector_utilities.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/vector_wrapper.txt b/cxx/stdlib/vector_wrapper.txt index 480da4ba4..b071eae56 100644 --- a/cxx/stdlib/vector_wrapper.txt +++ b/cxx/stdlib/vector_wrapper.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/work_group.txt b/cxx/stdlib/work_group.txt index f496a85a6..bfab764fc 100644 --- a/cxx/stdlib/work_group.txt +++ b/cxx/stdlib/work_group.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx/stdlib/work_item.txt b/cxx/stdlib/work_item.txt index ca561d406..7699b1e14 100644 --- a/cxx/stdlib/work_item.txt +++ b/cxx/stdlib/work_item.txt @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/acknowledgements.txt b/cxx4opencl/acknowledgements.txt index 932bad1aa..5958e0d6c 100644 --- a/cxx4opencl/acknowledgements.txt +++ b/cxx4opencl/acknowledgements.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/address_spaces.txt b/cxx4opencl/address_spaces.txt index c36384e71..321bb9dbf 100644 --- a/cxx4opencl/address_spaces.txt +++ b/cxx4opencl/address_spaces.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/cxxcasts.txt b/cxx4opencl/cxxcasts.txt index b23e31133..5ccc209e6 100644 --- a/cxx4opencl/cxxcasts.txt +++ b/cxx4opencl/cxxcasts.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/diff2cxx.txt b/cxx4opencl/diff2cxx.txt index 3b8821d2c..8af7beaed 100644 --- a/cxx4opencl/diff2cxx.txt +++ b/cxx4opencl/diff2cxx.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/diff2openclc.txt b/cxx4opencl/diff2openclc.txt index a8ea5ae8a..3b212ee34 100644 --- a/cxx4opencl/diff2openclc.txt +++ b/cxx4opencl/diff2openclc.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/intro.txt b/cxx4opencl/intro.txt index 9c6593894..f9b0fd520 100644 --- a/cxx4opencl/intro.txt +++ b/cxx4opencl/intro.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/kernel.txt b/cxx4opencl/kernel.txt index d7ccd2331..50f74d8a5 100644 --- a/cxx4opencl/kernel.txt +++ b/cxx4opencl/kernel.txt @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a +// Copyright 2021-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/cxx4opencl/references.txt b/cxx4opencl/references.txt index ca89409ee..8038764d5 100644 --- a/cxx4opencl/references.txt +++ b/cxx4opencl/references.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/appendix_a.asciidoc b/env/appendix_a.asciidoc index e2a35212d..68e75e2ef 100644 --- a/env/appendix_a.asciidoc +++ b/env/appendix_a.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/common_properties.asciidoc b/env/common_properties.asciidoc index c40b633b1..4c70de98c 100644 --- a/env/common_properties.asciidoc +++ b/env/common_properties.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/dictionary.asciidoc b/env/dictionary.asciidoc index ef7a9401b..4dc29b081 100644 --- a/env/dictionary.asciidoc +++ b/env/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index aa963e514..ccff17e5e 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/image_addressing_and_filtering.asciidoc b/env/image_addressing_and_filtering.asciidoc index 84c66fcad..a0d2769ac 100644 --- a/env/image_addressing_and_filtering.asciidoc +++ b/env/image_addressing_and_filtering.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/introduction.asciidoc b/env/introduction.asciidoc index 90962b5bf..ddef49bdb 100644 --- a/env/introduction.asciidoc +++ b/env/introduction.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/numerical_compliance.asciidoc b/env/numerical_compliance.asciidoc index af35aeb6d..013b87ece 100644 --- a/env/numerical_compliance.asciidoc +++ b/env/numerical_compliance.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/references.asciidoc b/env/references.asciidoc index 6de0c4e4a..c2d9e31d3 100644 --- a/env/references.asciidoc +++ b/env/references.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/required_capabilities.asciidoc b/env/required_capabilities.asciidoc index f5314c3e9..5fc537b8c 100644 --- a/env/required_capabilities.asciidoc +++ b/env/required_capabilities.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/env/validation_rules.asciidoc b/env/validation_rules.asciidoc index 698729c35..95e2c83b0 100644 --- a/env/validation_rules.asciidoc +++ b/env/validation_rules.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/deprecated_extensions.asciidoc b/ext/deprecated_extensions.asciidoc index 7004cd3d6..cc2adb1b3 100644 --- a/ext/deprecated_extensions.asciidoc +++ b/ext/deprecated_extensions.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/dictionary.asciidoc b/ext/dictionary.asciidoc index ef7a9401b..4dc29b081 100644 --- a/ext/dictionary.asciidoc +++ b/ext/dictionary.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/index.asciidoc b/ext/index.asciidoc index 5064392a6..78adbb7cf 100644 --- a/ext/index.asciidoc +++ b/ext/index.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 3ff391ded..e218f766b 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 86f979c82..b813d072e 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/ext/to_core_features.asciidoc b/ext/to_core_features.asciidoc index 58c799fbc..e9767587b 100644 --- a/ext/to_core_features.asciidoc +++ b/ext/to_core_features.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_arm_controlled_kernel_termination.asciidoc b/extensions/cl_arm_controlled_kernel_termination.asciidoc index 174f496c6..7263a2890 100644 --- a/extensions/cl_arm_controlled_kernel_termination.asciidoc +++ b/extensions/cl_arm_controlled_kernel_termination.asciidoc @@ -21,7 +21,7 @@ Anastasia Stulova, Arm Ltd. + == Notice -Copyright (c) 2021-2024 Arm Ltd. +Copyright (c) 2021-2025 Arm Ltd. == Status diff --git a/extensions/cl_arm_printf.asciidoc b/extensions/cl_arm_printf.asciidoc index 9af0add7f..0686e8970 100644 --- a/extensions/cl_arm_printf.asciidoc +++ b/extensions/cl_arm_printf.asciidoc @@ -24,7 +24,7 @@ Kevin Petit, Arm Ltd. + == Notice -Copyright (c) 2014-2024 Arm Ltd. +Copyright (c) 2014-2025 Arm Ltd. == Status diff --git a/extensions/cl_arm_protected_memory_allocation.asciidoc b/extensions/cl_arm_protected_memory_allocation.asciidoc index cc4f43ab3..c4820fc0c 100644 --- a/extensions/cl_arm_protected_memory_allocation.asciidoc +++ b/extensions/cl_arm_protected_memory_allocation.asciidoc @@ -19,7 +19,7 @@ Kevin Petit, Arm Ltd. + == Notice -Copyright (c) 2021-2024 Arm Ltd. +Copyright (c) 2021-2025 Arm Ltd. == Status diff --git a/extensions/cl_arm_scheduling_controls.asciidoc b/extensions/cl_arm_scheduling_controls.asciidoc index ef07f8e5f..d12472289 100644 --- a/extensions/cl_arm_scheduling_controls.asciidoc +++ b/extensions/cl_arm_scheduling_controls.asciidoc @@ -22,7 +22,7 @@ Radek Szymanski, Arm Ltd. + == Notice -Copyright (c) 2020-2024 Arm Ltd. +Copyright (c) 2020-2025 Arm Ltd. == Status diff --git a/extensions/cl_ext_float_atomics.asciidoc b/extensions/cl_ext_float_atomics.asciidoc index c3ae27883..930e22525 100644 --- a/extensions/cl_ext_float_atomics.asciidoc +++ b/extensions/cl_ext_float_atomics.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ @@ -74,7 +74,7 @@ Ruihao Zhang, Qualcomm == Notice -Copyright (c) 2021-2024 The Khronos Group Inc. +Copyright (c) 2021-2025 The Khronos Group Inc. == Status diff --git a/extensions/cl_ext_image_raw10_raw12.asciidoc b/extensions/cl_ext_image_raw10_raw12.asciidoc index d90173f7a..ba8e1ef6f 100644 --- a/extensions/cl_ext_image_raw10_raw12.asciidoc +++ b/extensions/cl_ext_image_raw10_raw12.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_extension_template.asciidoc b/extensions/cl_extension_template.asciidoc index 6666c6a96..6f8539fea 100644 --- a/extensions/cl_extension_template.asciidoc +++ b/extensions/cl_extension_template.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ @@ -94,7 +94,7 @@ the time of their contribution, one person per line. == Notice -Copyright (c) 2023-2024 Some Company. Copyright terms at: + +Copyright (c) 2023-2025 Some Company. Copyright terms at: + http://link/copyright.html **** diff --git a/extensions/cl_img_bitwise_ops.asciidoc b/extensions/cl_img_bitwise_ops.asciidoc index fbbd370fa..f36b65978 100644 --- a/extensions/cl_img_bitwise_ops.asciidoc +++ b/extensions/cl_img_bitwise_ops.asciidoc @@ -23,7 +23,7 @@ Tomasz Platek, Imagination Technologies. == Notice -Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2024-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_cached_allocations.asciidoc b/extensions/cl_img_cached_allocations.asciidoc index 0faf142df..e6701845a 100644 --- a/extensions/cl_img_cached_allocations.asciidoc +++ b/extensions/cl_img_cached_allocations.asciidoc @@ -24,7 +24,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_cancel_command.asciidoc b/extensions/cl_img_cancel_command.asciidoc index 9b9599268..a624ddb38 100644 --- a/extensions/cl_img_cancel_command.asciidoc +++ b/extensions/cl_img_cancel_command.asciidoc @@ -26,7 +26,7 @@ Paul Fradgley, Imagination Technologies. == Notice -Copyright (c) 2023-2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2023-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_generate_mipmap.asciidoc b/extensions/cl_img_generate_mipmap.asciidoc index 6ea7bd256..566549719 100644 --- a/extensions/cl_img_generate_mipmap.asciidoc +++ b/extensions/cl_img_generate_mipmap.asciidoc @@ -26,7 +26,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_matrix_multiply.asciidoc b/extensions/cl_img_matrix_multiply.asciidoc index 573d4e4fe..c3188e1fd 100644 --- a/extensions/cl_img_matrix_multiply.asciidoc +++ b/extensions/cl_img_matrix_multiply.asciidoc @@ -26,7 +26,7 @@ David Welch, Imagination Technologies. == Notice -Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2024-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_mem_properties.asciidoc b/extensions/cl_img_mem_properties.asciidoc index 2e3b4bb37..aa99a5fe3 100644 --- a/extensions/cl_img_mem_properties.asciidoc +++ b/extensions/cl_img_mem_properties.asciidoc @@ -24,7 +24,7 @@ Jeba Samuel, Imagination Technologies. == Notice -Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_memory_management.asciidoc b/extensions/cl_img_memory_management.asciidoc index f9aa61e83..60b8961bd 100644 --- a/extensions/cl_img_memory_management.asciidoc +++ b/extensions/cl_img_memory_management.asciidoc @@ -23,7 +23,7 @@ Tomasz Platek, Imagination Technologies. == Notice -Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2024-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_swap_ops.asciidoc b/extensions/cl_img_swap_ops.asciidoc index ea9578022..bc06731f5 100644 --- a/extensions/cl_img_swap_ops.asciidoc +++ b/extensions/cl_img_swap_ops.asciidoc @@ -23,7 +23,7 @@ Tomasz Platek, Imagination Technologies. == Notice -Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2024-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_use_gralloc_ptr.asciidoc b/extensions/cl_img_use_gralloc_ptr.asciidoc index cf05ec786..c63293a37 100644 --- a/extensions/cl_img_use_gralloc_ptr.asciidoc +++ b/extensions/cl_img_use_gralloc_ptr.asciidoc @@ -25,7 +25,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_img_yuv_image.asciidoc b/extensions/cl_img_yuv_image.asciidoc index d33c2fd0f..7896ded66 100644 --- a/extensions/cl_img_yuv_image.asciidoc +++ b/extensions/cl_img_yuv_image.asciidoc @@ -25,7 +25,7 @@ Jeremy Kemp, Imagination Technologies. == Notice -Copyright (c) 2020-2024 Imagination Technologies Ltd. All Rights Reserved. +Copyright (c) 2020-2025 Imagination Technologies Ltd. All Rights Reserved. == Status diff --git a/extensions/cl_intel_bfloat16_conversions.asciidoc b/extensions/cl_intel_bfloat16_conversions.asciidoc index 5262b6a83..ae74d44ad 100644 --- a/extensions/cl_intel_bfloat16_conversions.asciidoc +++ b/extensions/cl_intel_bfloat16_conversions.asciidoc @@ -23,7 +23,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2022-2024 Intel Corporation. All rights reserved. +Copyright (c) 2022-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_command_queue_families.asciidoc b/extensions/cl_intel_command_queue_families.asciidoc index 9e967f76c..829a1c9af 100644 --- a/extensions/cl_intel_command_queue_families.asciidoc +++ b/extensions/cl_intel_command_queue_families.asciidoc @@ -34,7 +34,7 @@ Michal Mrozek, Intel + == Notice -Copyright (c) 2021-2024 Intel Corporation. All rights reserved. +Copyright (c) 2021-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_create_buffer_with_properties.asciidoc b/extensions/cl_intel_create_buffer_with_properties.asciidoc index 4948de276..d1f4df1cd 100644 --- a/extensions/cl_intel_create_buffer_with_properties.asciidoc +++ b/extensions/cl_intel_create_buffer_with_properties.asciidoc @@ -32,7 +32,7 @@ Ben Ashbaugh, Intel == Notice -Copyright (c) 2020-2024 Intel Corporation. All rights reserved. +Copyright (c) 2020-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_device_attribute_query.asciidoc b/extensions/cl_intel_device_attribute_query.asciidoc index 71fdcfa8d..b357b2008 100644 --- a/extensions/cl_intel_device_attribute_query.asciidoc +++ b/extensions/cl_intel_device_attribute_query.asciidoc @@ -44,7 +44,7 @@ Rafik Saliev, Intel == Notice -Copyright (c) 2021-2024 Intel Corporation. All rights reserved. +Copyright (c) 2021-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_mem_alloc_buffer_location.asciidoc b/extensions/cl_intel_mem_alloc_buffer_location.asciidoc index f079a9b22..62fd232eb 100644 --- a/extensions/cl_intel_mem_alloc_buffer_location.asciidoc +++ b/extensions/cl_intel_mem_alloc_buffer_location.asciidoc @@ -40,7 +40,7 @@ Contributors Notice ------ -Copyright (c) 2020-2024 Intel Corporation. All rights reserved. +Copyright (c) 2020-2025 Intel Corporation. All rights reserved. Status ------ diff --git a/extensions/cl_intel_mem_channel_property.asciidoc b/extensions/cl_intel_mem_channel_property.asciidoc index 2336c763b..bec452a35 100644 --- a/extensions/cl_intel_mem_channel_property.asciidoc +++ b/extensions/cl_intel_mem_channel_property.asciidoc @@ -37,7 +37,7 @@ Contributors Notice ------ -Copyright (c) 2020-2024 Intel Corporation. All rights reserved. +Copyright (c) 2020-2025 Intel Corporation. All rights reserved. Status ------ diff --git a/extensions/cl_intel_mem_force_host_memory.asciidoc b/extensions/cl_intel_mem_force_host_memory.asciidoc index 474852870..7ce6a9f4f 100644 --- a/extensions/cl_intel_mem_force_host_memory.asciidoc +++ b/extensions/cl_intel_mem_force_host_memory.asciidoc @@ -30,7 +30,7 @@ Filip Hazubski, Intel == Notice -Copyright (c) 2020-2024 Intel Corporation. All rights reserved. +Copyright (c) 2020-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_packed_yuv.asciidoc b/extensions/cl_intel_packed_yuv.asciidoc index 8395af491..b66f75846 100644 --- a/extensions/cl_intel_packed_yuv.asciidoc +++ b/extensions/cl_intel_packed_yuv.asciidoc @@ -30,7 +30,7 @@ Ben Ashbaugh, Intel == Notice -Copyright (c) 2021-2024 Intel Corporation. All rights reserved. +Copyright (c) 2021-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_planar_yuv.asciidoc b/extensions/cl_intel_planar_yuv.asciidoc index 07f4388e8..bd3fc7027 100644 --- a/extensions/cl_intel_planar_yuv.asciidoc +++ b/extensions/cl_intel_planar_yuv.asciidoc @@ -34,7 +34,7 @@ Biju George, Intel == Notice -Copyright (c) 2021-2024 Intel Corporation. All rights reserved. +Copyright (c) 2021-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_program_scope_host_pipe.asciidoc b/extensions/cl_intel_program_scope_host_pipe.asciidoc index 51a3badfc..ba23d061b 100644 --- a/extensions/cl_intel_program_scope_host_pipe.asciidoc +++ b/extensions/cl_intel_program_scope_host_pipe.asciidoc @@ -31,7 +31,7 @@ Zibai Wang, Intel + == Notice -Copyright (c) 2023-2024 Intel Corporation. All rights reserved. +Copyright (c) 2023-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_required_subgroup_size.asciidoc b/extensions/cl_intel_required_subgroup_size.asciidoc index c30323730..af16b5c79 100644 --- a/extensions/cl_intel_required_subgroup_size.asciidoc +++ b/extensions/cl_intel_required_subgroup_size.asciidoc @@ -39,7 +39,7 @@ Ben Ashbaugh, Intel == Notice -Copyright (c) 2018-2024 Intel Corporation. All rights reserved. +Copyright (c) 2018-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_sharing_format_query.asciidoc b/extensions/cl_intel_sharing_format_query.asciidoc index c1b8e0164..d452e827e 100644 --- a/extensions/cl_intel_sharing_format_query.asciidoc +++ b/extensions/cl_intel_sharing_format_query.asciidoc @@ -26,7 +26,7 @@ Pawel Wilma, Intel == Notice -Copyright (c) 2021-2024 Intel Corporation. All rights reserved. +Copyright (c) 2021-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc b/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc index 9101fd86b..921e8aa89 100644 --- a/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc +++ b/extensions/cl_intel_spirv_device_side_avc_motion_estimation.asciidoc @@ -29,7 +29,7 @@ Biju George, Intel == Notice -Copyright (c) 2018-2024 Intel Corporation. All rights reserved. +Copyright (c) 2018-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_spirv_media_block_io.asciidoc b/extensions/cl_intel_spirv_media_block_io.asciidoc index 322cbafc8..39ea03c78 100644 --- a/extensions/cl_intel_spirv_media_block_io.asciidoc +++ b/extensions/cl_intel_spirv_media_block_io.asciidoc @@ -30,7 +30,7 @@ Pawel Jurek, Intel == Notice -Copyright (c) 2018-2024 Intel Corporation. All rights reserved. +Copyright (c) 2018-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_spirv_subgroups.asciidoc b/extensions/cl_intel_spirv_subgroups.asciidoc index c2f3faf38..302009eda 100644 --- a/extensions/cl_intel_spirv_subgroups.asciidoc +++ b/extensions/cl_intel_spirv_subgroups.asciidoc @@ -31,7 +31,7 @@ Mariusz Merecki, Intel == Notice -Copyright (c) 2018-2024 Intel Corporation. All rights reserved. +Copyright (c) 2018-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_split_work_group_barrier.asciidoc b/extensions/cl_intel_split_work_group_barrier.asciidoc index ed2d1ee73..104c30e8b 100644 --- a/extensions/cl_intel_split_work_group_barrier.asciidoc +++ b/extensions/cl_intel_split_work_group_barrier.asciidoc @@ -24,7 +24,7 @@ John Pennycook, Intel == Notice -Copyright (c) 2022-2024 Intel Corporation. All rights reserved. +Copyright (c) 2022-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc b/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc index 142bef7ce..709730771 100644 --- a/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc +++ b/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc @@ -31,7 +31,7 @@ Andrzej Ratajewski, Intel + == Notice -Copyright (c) 2024 Intel Corporation. All rights reserved. +Copyright (c) 2024-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc b/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc index 64d4b3487..b7f868067 100644 --- a/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc +++ b/extensions/cl_intel_subgroup_matrix_multiply_accumulate.asciidoc @@ -28,7 +28,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2022-2024 Intel Corporation. All rights reserved. +Copyright (c) 2022-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc b/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc index 630078a46..603fb9635 100644 --- a/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc +++ b/extensions/cl_intel_subgroup_split_matrix_multiply_accumulate.asciidoc @@ -25,7 +25,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2022-2024 Intel Corporation. All rights reserved. +Copyright (c) 2022-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups.asciidoc b/extensions/cl_intel_subgroups.asciidoc index b17b9afe5..0a27ca06d 100644 --- a/extensions/cl_intel_subgroups.asciidoc +++ b/extensions/cl_intel_subgroups.asciidoc @@ -42,7 +42,7 @@ Biju George, Intel == Notice -Copyright (c) 2018-2024 Intel Corporation. All rights reserved. +Copyright (c) 2018-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups_char.asciidoc b/extensions/cl_intel_subgroups_char.asciidoc index 1658cfacb..adaa6f6f5 100644 --- a/extensions/cl_intel_subgroups_char.asciidoc +++ b/extensions/cl_intel_subgroups_char.asciidoc @@ -33,7 +33,7 @@ Konrad Trifunovic, Intel == Notice -Copyright (c) 2020-2024 Intel Corporation. All rights reserved. +Copyright (c) 2020-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups_long.asciidoc b/extensions/cl_intel_subgroups_long.asciidoc index b010c4e5d..d559299b9 100644 --- a/extensions/cl_intel_subgroups_long.asciidoc +++ b/extensions/cl_intel_subgroups_long.asciidoc @@ -30,7 +30,7 @@ Konrad Trifunovic, Intel == Notice -Copyright (c) 2020-2024 Intel Corporation. All rights reserved. +Copyright (c) 2020-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_subgroups_short.asciidoc b/extensions/cl_intel_subgroups_short.asciidoc index c9d75d3ac..93d78cb4f 100644 --- a/extensions/cl_intel_subgroups_short.asciidoc +++ b/extensions/cl_intel_subgroups_short.asciidoc @@ -29,7 +29,7 @@ Insoo Woo, Intel == Notice -Copyright (c) 2018-2024 Intel Corporation. All rights reserved. +Copyright (c) 2018-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_intel_unified_shared_memory.asciidoc b/extensions/cl_intel_unified_shared_memory.asciidoc index 05538f926..4b85901af 100644 --- a/extensions/cl_intel_unified_shared_memory.asciidoc +++ b/extensions/cl_intel_unified_shared_memory.asciidoc @@ -43,7 +43,7 @@ Lukasz Towarek, Intel == Notice -Copyright (c) 2021-2024 Intel Corporation. All rights reserved. +Copyright (c) 2021-2025 Intel Corporation. All rights reserved. == Status diff --git a/extensions/cl_loader_info.asciidoc b/extensions/cl_loader_info.asciidoc index 3702583b7..1c0f8a4e7 100644 --- a/extensions/cl_loader_info.asciidoc +++ b/extensions/cl_loader_info.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ @@ -29,7 +29,7 @@ Brice Videau, Argonne National Laboratory == Notice -Copyright (c) 2023-2024 The Khronos Group Inc. +Copyright (c) 2023-2025 The Khronos Group Inc. == Status diff --git a/extensions/cl_loader_layers.asciidoc b/extensions/cl_loader_layers.asciidoc index c286706de..c0d4e23ef 100644 --- a/extensions/cl_loader_layers.asciidoc +++ b/extensions/cl_loader_layers.asciidoc @@ -1,4 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a +// Copyright 2017-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/extensions/cl_pocl_content_size.asciidoc b/extensions/cl_pocl_content_size.asciidoc index 613e58f14..053c51996 100644 --- a/extensions/cl_pocl_content_size.asciidoc +++ b/extensions/cl_pocl_content_size.asciidoc @@ -31,7 +31,7 @@ Jan Solanti, Tampere University == Notice -Copyright (c) 2020-2024 Tampere University +Copyright (c) 2020-2025 Tampere University == Status diff --git a/extensions/extensions.txt b/extensions/extensions.txt index cf5c98519..ab17caa3f 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -1,4 +1,4 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a +// Copyright 2018-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/langext/acknowledgements.txt b/langext/acknowledgements.txt index 9c7d379e4..b5b48a95d 100644 --- a/langext/acknowledgements.txt +++ b/langext/acknowledgements.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/langext/intro.txt b/langext/intro.txt index 361ddbe4d..628da5e89 100644 --- a/langext/intro.txt +++ b/langext/intro.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/langext/variadic_macro.txt b/langext/variadic_macro.txt index 97da9739b..e2b5ea9d5 100644 --- a/langext/variadic_macro.txt +++ b/langext/variadic_macro.txt @@ -1,4 +1,4 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a +// Copyright 2019-2025 The Khronos Group. This work is licensed under a // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ diff --git a/makeSpec b/makeSpec index ac17ffb34..df9132233 100755 --- a/makeSpec +++ b/makeSpec @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2020-2024 The Khronos Group Inc. +# Copyright 2020-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/man/static/EXTENSION.txt b/man/static/EXTENSION.txt index b24743843..0fcbf4e3c 100644 --- a/man/static/EXTENSION.txt +++ b/man/static/EXTENSION.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2024 The Khronos Group Inc. +// Copyright 2014-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/abstractDataTypes.txt b/man/static/abstractDataTypes.txt index 1bbc80324..049d59692 100644 --- a/man/static/abstractDataTypes.txt +++ b/man/static/abstractDataTypes.txt @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/clGetExtensionFunctionAddressForPlatform.txt b/man/static/clGetExtensionFunctionAddressForPlatform.txt index 7a63323cc..32ffa21a6 100644 --- a/man/static/clGetExtensionFunctionAddressForPlatform.txt +++ b/man/static/clGetExtensionFunctionAddressForPlatform.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2024 The Khronos Group Inc. +// Copyright 2014-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/convert_T.txt b/man/static/convert_T.txt index 4fd29230e..6cb059644 100644 --- a/man/static/convert_T.txt +++ b/man/static/convert_T.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2024 The Khronos Group Inc. +// Copyright 2014-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/deadLinks.txt b/man/static/deadLinks.txt index 38a3d92e3..cd9456287 100644 --- a/man/static/deadLinks.txt +++ b/man/static/deadLinks.txt @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The Khronos Group Inc. +// Copyright 2021-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/enums.txt b/man/static/enums.txt index f9778cfb2..79dd1d667 100644 --- a/man/static/enums.txt +++ b/man/static/enums.txt @@ -1,4 +1,4 @@ -// Copyright 2014-2024 The Khronos Group Inc. +// Copyright 2014-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/man/static/footer.txt b/man/static/footer.txt index d40bf0d75..d45cd722f 100644 --- a/man/static/footer.txt +++ b/man/static/footer.txt @@ -1,4 +1,4 @@ -// Copyright 2016-2024 The Khronos Group Inc. +// Copyright 2016-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 ifdef::doctype-manpage[] diff --git a/man/static/intro.txt b/man/static/intro.txt index cbfd26dea..d65aa2049 100644 --- a/man/static/intro.txt +++ b/man/static/intro.txt @@ -1,4 +1,4 @@ -// Copyright 2007-2024 The Khronos Group Inc. +// Copyright 2007-2025 The Khronos Group Inc. // SPDX-License-Identifier: CC-BY-4.0 :data-uri: diff --git a/scripts/apiconventions.py b/scripts/apiconventions.py index f16dcd479..dc828ec37 100644 --- a/scripts/apiconventions.py +++ b/scripts/apiconventions.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2021-2024 The Khronos Group Inc. +# Copyright 2021-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # Generic alias for working group-specific API conventions interface. diff --git a/scripts/cgenerator.py b/scripts/cgenerator.py index 713113c02..888059efc 100644 --- a/scripts/cgenerator.py +++ b/scripts/cgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/checklinks.py b/scripts/checklinks.py index 5d79a676c..c1652ca9d 100755 --- a/scripts/checklinks.py +++ b/scripts/checklinks.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 import argparse diff --git a/scripts/clconventions.py b/scripts/clconventions.py index 734041ae1..0ea6352bc 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # Working-group-specific style conventions, diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index b8c2afd0a..46b6a464e 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/extdependency.py b/scripts/extdependency.py index 59bfc8381..34fdc94ac 100755 --- a/scripts/extdependency.py +++ b/scripts/extdependency.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2017-2024 The Khronos Group Inc. +# Copyright 2017-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 """Generate a mapping of extension name -> all required extension names for diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index 9f93a29e8..f39c50c32 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/find_adoc_deps b/scripts/find_adoc_deps index d20d04976..798969a65 100755 --- a/scripts/find_adoc_deps +++ b/scripts/find_adoc_deps @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2024 The Khronos Group Inc. +# Copyright 2024-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # find_adoc_deps - find include:: dependencies of an asciidoc file diff --git a/scripts/genRef.py b/scripts/genRef.py index 2eabd231e..393251cbf 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2016-2024 The Khronos Group Inc. +# Copyright 2016-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 @@ -79,7 +79,7 @@ def printCopyrightSourceComments(fp): Writes an asciidoc comment block, which copyrights the source file.""" - print('// Copyright 2014-2024 The Khronos Group Inc.', file=fp) + print('// Copyright 2014-2025 The Khronos Group Inc.', file=fp) print('//', file=fp) # This works around constraints of the 'reuse' tool print('// SPDX' + '-License-Identifier: CC-BY-4.0', file=fp) diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index e513385f6..a60210204 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2019-2024 The Khronos Group Inc. +# Copyright 2019-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 from collections import OrderedDict @@ -19,7 +19,7 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2024 The Khronos Group. + return """// Copyright 2017-2025 The Khronos Group. // SPDX-License-Identifier: CC-BY-4.0 """ diff --git a/scripts/gen_dictionary_from_file.py b/scripts/gen_dictionary_from_file.py index f2f6a72da..b73bc7972 100644 --- a/scripts/gen_dictionary_from_file.py +++ b/scripts/gen_dictionary_from_file.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2024 The Khronos Group Inc. +# Copyright 2024-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 from collections import OrderedDict diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index d5cd81ff0..6611bedde 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2019-2024 The Khronos Group Inc. +# Copyright 2019-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 from collections import OrderedDict @@ -23,7 +23,7 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2024 The Khronos Group. + return """// Copyright 2017-2025 The Khronos Group. // SPDX-License-Identifier: CC-BY-4.0 """ diff --git a/scripts/gencl.py b/scripts/gencl.py index 9cc8a1d2c..cb2a6eb14 100755 --- a/scripts/gencl.py +++ b/scripts/gencl.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 @@ -107,7 +107,7 @@ def makeGenOpts(args): # The SPDX formatting below works around constraints of the 'reuse' tool prefixStrings = [ '/*', - '** Copyright 2015-2024 The Khronos Group Inc.', + '** Copyright 2015-2025 The Khronos Group Inc.', '**', '** SPDX' + '-License-Identifier: Apache-2.0', '*/', diff --git a/scripts/generator.py b/scripts/generator.py index c534faf85..671d6f4e0 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 """Base class for source/header/doc generators, as well as some utility functions.""" diff --git a/scripts/parse_dependency.py b/scripts/parse_dependency.py index 071d7b3c3..7fbf63275 100755 --- a/scripts/parse_dependency.py +++ b/scripts/parse_dependency.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2022-2024 The Khronos Group Inc. +# Copyright 2022-2025 The Khronos Group Inc. # Copyright 2003-2019 Paul McGuire # SPDX-License-Identifier: MIT diff --git a/scripts/pygenerator.py b/scripts/pygenerator.py index 8656587e9..b4c3e77ad 100644 --- a/scripts/pygenerator.py +++ b/scripts/pygenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/realign.py b/scripts/realign.py index 71f9f85eb..b1a6b093d 100755 --- a/scripts/realign.py +++ b/scripts/realign.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # Usage: realign [infile] > outfile diff --git a/scripts/reflib.py b/scripts/reflib.py index bd873a365..61ced92c5 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2016-2024 The Khronos Group Inc. +# Copyright 2016-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/reg.py b/scripts/reg.py index d5495212c..930bc5194 100755 --- a/scripts/reg.py +++ b/scripts/reg.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/runDocker b/scripts/runDocker index 19bf00588..e753ed7c1 100755 --- a/scripts/runDocker +++ b/scripts/runDocker @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2022-2024 The Khronos Group Inc. +# Copyright 2022-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # runDocker - run the Khronos `asciidoctor-spec` Docker image with a local diff --git a/scripts/scriptgenerator.py b/scripts/scriptgenerator.py index 27339b26f..174233210 100644 --- a/scripts/scriptgenerator.py +++ b/scripts/scriptgenerator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/spec_tools/conventions.py b/scripts/spec_tools/conventions.py index 190bd3b88..0298e434b 100644 --- a/scripts/spec_tools/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -i # -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/spec_tools/util.py b/scripts/spec_tools/util.py index e67038a5a..7f99a7682 100644 --- a/scripts/spec_tools/util.py +++ b/scripts/spec_tools/util.py @@ -1,6 +1,6 @@ """Utility functions not closely tied to other spec_tools types.""" # Copyright (c) 2018-2019 Collabora, Ltd. -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/xml/Makefile b/xml/Makefile index 28df30091..7c8569fce 100644 --- a/xml/Makefile +++ b/xml/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2024 The Khronos Group Inc. +# Copyright (c) 2013-2025 The Khronos Group Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/xml/cl.xml b/xml/cl.xml index b01922db8..39c3a4c4c 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -1,7 +1,7 @@ -Copyright 2013-2024 The Khronos Group Inc. +Copyright 2013-2025 The Khronos Group Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/xml/registry.rnc b/xml/registry.rnc index ff85c0d1d..b85fac6db 100644 --- a/xml/registry.rnc +++ b/xml/registry.rnc @@ -1,4 +1,4 @@ -# Copyright 2013-2024 The Khronos Group Inc. +# Copyright 2013-2025 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 # Relax NG schema for Khronos API Registry XML From 73971af1504ab5942796d5b308b8ef1c2b2de273 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Fri, 21 Feb 2025 15:40:37 +0000 Subject: [PATCH 185/190] [NFC] Fix typo in address space conversion example (#1322) --- OpenCL_C.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenCL_C.txt b/OpenCL_C.txt index f593362ae..e447d9256 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -3336,7 +3336,7 @@ constant int *cp; int *p; gp = (global int *)lp; // illegal to cast between named address spaces -p = (int *)lp; // legal to cast from global to generic +p = (int *)gp; // legal to cast from global to generic gp = (global int*)p; // legal to cast from generic to global ---------- From 859b53b8944b3a7842347d60d4fcad0cc35d4d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20J=C3=A4=C3=A4skel=C3=A4inen?= Date: Tue, 25 Feb 2025 19:59:49 +0200 Subject: [PATCH 186/190] cl_ext_buffer_device_address (#1159) * cl_ext_buffer_device_address The basic cl_mem buffer API doesn't enable access to the underlying raw pointers in the device memory, preventing its use in host side data structures that need pointer references to objects. This API adds a minimal increment on top of cl_mem that provides such capabilities. * BDA: Removed CL_MEM_DEVICE_SHARED_ADDRESS_EXT as unneeded. Also made the enums globally unique. * cl_ext_buffer_device_address to 1.0.0 The only content addition since the previous version is "If the device supports SVM and {clCreateBufferWithProperties} is called with a pointer returned by {clSVMAlloc} as its _host_ptr_ argument, and {CL_MEM_USE_HOST_PTR} is set in its _flags_ argument, the device-side address is guaranteed to match the _host_ptr." * cl_ext_buffer_device_address: Revision 1.0.1 * Made it explicit that passing illegal pointers is legal as long as they are not referenced. * Removed CL_INVALID_ARG_VALUE as a possible error in clSetKernelArgDevicePointerEXT() as there are no illegal pointer cases when calling this function. Return CL_INVALID_OPERATION for clGetMemObjectInfo() if the pointer is not a buffer device pointer. * clSetKernelExecInfo() and clSetKernelArgDevicePointerEXT() now only error out if no devices in the context associated with kernel support device pointers. * cl_ext_buffer_device_address: Revision 1.0.2 Converted the clSetKernelArgDevicePointerEXT() address parameter to a value instead of a pointer to the value. --- api/cl_ext_buffer_device_address.asciidoc | 80 +++++++++++++++ api/opencl_runtime_layer.asciidoc | 120 +++++++++++++++++++++- xml/cl.xml | 30 ++++++ 3 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 api/cl_ext_buffer_device_address.asciidoc diff --git a/api/cl_ext_buffer_device_address.asciidoc b/api/cl_ext_buffer_device_address.asciidoc new file mode 100644 index 000000000..b76998925 --- /dev/null +++ b/api/cl_ext_buffer_device_address.asciidoc @@ -0,0 +1,80 @@ +// Copyright 2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_buffer_device_address.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2025-02-04 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Pekka Jääskeläinen, Intel + + - Karol Herbst, Red Hat + + - Ben Ashbaugh, Intel + + - Kevin Petit, Arm + + - Henry Linjamäki, Intel + + +=== Description + +This extension provides access to raw device pointers for cl_mem buffers +without requiring a shared virtual address space between the host and +the device. + +==== Background + +Shared Virtual Memory (SVM) introduced in OpenCL 2.0 is the first feature +that enables raw pointers in the OpenCL standard. Its coarse-grain +variant is relatively simple to implement on various platforms in terms of +coherency requirements, but it requires mapping the buffer's address range +to the host virtual address space. +However, various higher-level heterogeneous APIs present a memory allocation +routine which can allocate device-only memory and provide raw addresses to +it without guarentees of system-wide uniqueness. For example, minimal +implementations of OpenMP's omp_target_alloc() and CUDA/HIP's +cudaMalloc()/hipMalloc() do not require a shared address space between the host and the device. + +Host-device unified addressing might not be a major implementation issue in +systems which can provide virtual memory across the platform, but might +bring challenges in cases where the device presents a global memory with +a disjoint address space (that can also be a physical memory address space) or, +for example, when a barebone embedded system lacks virtual memory support altogether. +This extension is targeted to complement the OpenCL SVM extension by providing +an additional lower-end step in the spectrum of type of pointers/buffers OpenCL +can allocate. + +=== New Command + + * {clSetKernelArgDevicePointerEXT} + +=== New Types + + * {cl_mem_device_address_EXT} + +=== New Enums + + * {cl_mem_properties_TYPE} + ** {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT} + * {cl_mem_info_TYPE} + ** {CL_MEM_DEVICE_ADDRESS_EXT} + * {cl_kernel_exec_info_TYPE} + ** {CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT} + +=== Version History + + * Revision 1.0.0, 2025-01-15 + ** Initial version for detailed review. + * Revision 1.0.1, 2025-01-28 + ** Made it explicit that passing illegal pointers is legal as long as they are + not referenced. Removed CL_INVALID_ARG_VALUE as a possible error in + clSetKernelArgDevicePointerEXT() as there are no illegal pointer + cases when calling this function. Return CL_INVALID_OPERATION for + clGetMemObjectInfo() if the pointer is not a buffer device pointer. + clSetKernelExecInfo() and clSetKernelArgDevicePointerEXT() now only + error out if no devices in the context associated with kernel support + device pointers. + * Revision 1.0.2, 2025-02-04 + ** Converted the clSetKernelArgDevicePointerEXT() address parameter to + a value instead of a pointer to the value. + diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index c1331ffc1..f9ccf894e 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -595,6 +595,35 @@ include::{generated}/api/version-notes/CL_MEM_DEVICE_HANDLE_LIST_KHR.asciidoc[] {CL_MEM_DEVICE_HANDLE_LIST_END_KHR_anchor}) to associate with the external memory handle. endif::cl_khr_external_memory[] + +ifdef::cl_ext_buffer_device_address[] + +| {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT_anchor} + +include::{generated}/api/version-notes/CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT.asciidoc[] + | {cl_bool_TYPE} + | When set to {CL_TRUE}, specifies that the buffer must have a single fixed + device-side address for its lifetime, and the address can be queried via {clGetMemObjectInfo}. + + Each device in the context can have their own (fixed) device-side address and + a copy of the created buffer which are synchronized + implicitly by the runtime. + + The flag might imply that the buffer will be "pinned" permanently to + a device's memory, but might not be necessarily so, as long as the address + range of the buffer remains constant. + + The device addresses of sub-buffers derived from {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT} + allocated buffers can be computed by adding the sub-buffer origin to the + device-specific start address. + + If the device supports SVM and {clCreateBufferWithProperties} is called with a pointer + returned by {clSVMAlloc} as its _host_ptr_ argument, and {CL_MEM_USE_HOST_PTR} is + set in its _flags_ argument, the device-side address is guaranteed to match + the _host_ptr_. + +endif::cl_ext_buffer_device_address[] + |==== ifdef::cl_khr_external_memory[] @@ -662,6 +691,12 @@ ifdef::cl_khr_external_memory[] {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. ** if _properties_ includes more than one external memory handle. endif::cl_khr_external_memory[] +ifdef::cl_ext_buffer_device_address[] + * {CL_INVALID_OPERATION} + ** If _properties_ includes {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT} and there + are no devices in the context that support the {cl_ext_buffer_device_address_EXT} + extension. +endif::cl_ext_buffer_device_address[] [[memory-flags-table]] .List of supported memory flag values @@ -6463,6 +6498,20 @@ include::{generated}/api/version-notes/CL_MEM_D3D11_RESOURCE_KHR.asciidoc[] returns the _resource_ argument specified when _memobj_ was created. endif::cl_khr_d3d11_sharing[] +ifdef::cl_ext_buffer_device_address[] +| {CL_MEM_DEVICE_ADDRESS_EXT_anchor} + +include::{generated}/api/version-notes/CL_MEM_DEVICE_ADDRESS_EXT.asciidoc[] + | {cl_mem_device_address_EXT_TYPE}[] + | If _memobj_ was created using {clCreateBufferWithProperties} with + the {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT} property set to {CL_TRUE}, + returns a list of device addresses for the buffer, one for each + device in the context in the same order as the list of devices + passed to {clCreateContext}. + +endif::cl_ext_buffer_device_address[] + + |==== // refError @@ -6472,6 +6521,11 @@ successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_MEM_OBJECT} if _memobj_ is a not a valid memory object. +ifdef::cl_ext_buffer_device_address[] + * {CL_INVALID_OPERATION} is returned for the {CL_MEM_DEVICE_ADDRESS_EXT} query if + the {cl_ext_buffer_device_address_EXT} is not supported or if the + buffer was not allocated with {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT}. +endif::cl_ext_buffer_device_address[] * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values, or if the size in bytes specified by _param_value_size_ is less than size of the return type specified in the @@ -10778,6 +10832,48 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- +ifdef::cl_ext_buffer_device_address[] +[open,refpage='clSetKernelArgDevicePointerEXT',desc='Set a device pointer as the argument value for a specific argument of a kernel.',type='protos'] +-- +To set a device pointer as the argument value for a specific argument of a +kernel, call the function + +include::{generated}/api/protos/clSetKernelArgDevicePointerEXT.txt[] +include::{generated}/api/version-notes/clSetKernelArgDevicePointerEXT.asciidoc[] + + * _kernel_ is a valid kernel object. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel. + * _arg_value_ is the device pointer that should be used as the argument value for + argument specified by _arg_index_. + The device pointer specified is the value used by all API calls that enqueue + _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument + value is changed by a call to {clSetKernelArgDevicePointerEXT} for _kernel_. + The device pointer can only be used for arguments that are declared to be a + pointer to `global` memory allocated with {clCreateBufferWithProperties} with + the {CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT} property. The pointer value specified as + the argument value can be the pointer to the beginning of the buffer or any offset into + the buffer region. The device pointer value must be naturally aligned according to + the argument's type. It should be noted that it's legal to pass invalid + pointers as the value (similarly to C/C++ function calls with pointer arguments) as + long as the kernel doesn't dereference the pointer. + +{clSetKernelArgDevicePointerEXT} returns {CL_SUCCESS} if the argument was set +successfully. Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support + the {cl_ext_buffer_device_address_EXT} extension. + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_ext_buffer_device_address[] + [open,refpage='clSetKernelExecInfo',desc='Set additional execution information for a kernel.',type='protos'] -- To set additional execution information for a kernel, call the function @@ -10844,6 +10940,19 @@ include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM If {clSetKernelExecInfo} has not been called with a value for {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is {CL_TRUE}. + +ifdef::cl_ext_buffer_device_address[] +| {CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT.asciidoc[] + | {cl_mem_device_address_EXT_TYPE}[] + | Device pointers must reference locations contained entirely within + buffers that are passed to kernel as arguments, or that are passed + through the execution information. Non-argument device pointers accessed + by the kernel must be specified by passing pointers to those buffers + via this {clSetKernelExecInfo} option. +endif::cl_ext_buffer_device_address[] + |==== // refError @@ -10853,7 +10962,16 @@ successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. + * {CL_INVALID_OPERATION} for {CL_KERNEL_EXEC_INFO_SVM_PTRS} and + {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} if no devices in + the context associated with _kernel_ support SVM. +ifdef::cl_ext_buffer_device_address[] + * {CL_INVALID_OPERATION} for {CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT} if no + device in the context associated with _kernel_ support the {cl_ext_buffer_device_address_EXT} + extension. +endif::cl_ext_buffer_device_address[] + * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is + `NULL` or if the size specified by _param_value_size_ is not valid. * {CL_INVALID_OPERATION} if _param_name_ is {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} and _param_value_ is {CL_TRUE} and no devices in the context associated with _kernel_ support fine-grain diff --git a/xml/cl.xml b/xml/cl.xml index 39c3a4c4c..a2b0556b6 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -255,6 +255,7 @@ server's OpenCL/api-docs repository. typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; typedef cl_bitfield cl_mutable_dispatch_asserts_khr typedef cl_bitfield cl_device_kernel_clock_capabilities_khr; + typedef cl_ulong cl_mem_device_address_ext; Structure types @@ -2315,6 +2316,12 @@ server's OpenCL/api-docs repository. + + + + + + @@ -7627,5 +7664,14 @@ server's OpenCL/api-docs repository. + + + + + + + + + From 696e82027d2baa650c894fb1b2524afef717f52e Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Tue, 21 Oct 2025 11:54:51 +0300 Subject: [PATCH 190/190] Refactor producer and consumer kernels to support tile and subtile sizes; remove unused API type --- .../cl_mobileye_reservation_sets.asciidoc | 101 ++++++++++-------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/extensions/cl_mobileye_reservation_sets.asciidoc b/extensions/cl_mobileye_reservation_sets.asciidoc index 5dbd51f22..626d6fdb6 100644 --- a/extensions/cl_mobileye_reservation_sets.asciidoc +++ b/extensions/cl_mobileye_reservation_sets.asciidoc @@ -125,7 +125,6 @@ image::../images/reservation_set_pipe.png[align="center", title="Reservation-set == New API Types * `cl_reservation_set_mobileye` - * `cl_reservation_set_pipe_mobileye` == New API Enums @@ -466,20 +465,26 @@ the write is complete; otherwise, it blocks until there is space in the pipe. Kernel code: [source,opencl_c] ---- -__kernel void producer(rs_pipe_mobileye pipe) { - size_t group_linear_id = - get_group_id(2) * get_num_groups(1) * get_num_groups(0) + - get_group_id(1) * get_num_groups(0) + get_group_id(0); - - const int write_val = group_linear_id; - write_rs_pipe_mobileye(pipe, &group_linear_id); +__kernel void producer(__global int *buf, size_t tile_size, size_t subtile_size, + rs_pipe_mobileye pipe) { + __global int *tile_addr = &buf[get_group_id(0) * tile_size]; + for (size_t i = 0; i < tile_size; i += subtile_size) { + __global int *subtile_addr = &tile_addr[i]; + do_production(subtile_addr); + unsigned char pipe_data; // dummy data - we only use the pipe is a semaphore + write_rs_pipe_mobileye(pipe, &pipe_data); + } } -__kernel void consumer(__global int *res, rs_pipe_mobileye pipe) { - size_t group_linear_id = - get_group_id(2) * get_num_groups(1) * get_num_groups(0) + - get_group_id(1) * get_num_groups(0) + get_group_id(0); - read_rs_pipe_mobileye(pipe, &res[group_linear_id]); +__kernel void consumer(__global int *buf, size_t tile_size, size_t subtile_size, + rs_pipe_mobileye pipe) { + __global int *tile_addr = &buf[get_group_id(0) * tile_size]; + for (size_t i = 0; i < tile_size; i += subtile_size) { + __global int *subtile_addr = &tile_addr[i]; + unsigned char pipe_data; + read_rs_pipe_mobileye(pipe, &pipe_data); + do_consumption(subtile_addr); + } } ---- @@ -487,38 +492,44 @@ Host code: [source,c] ---- - cl_context context; - cl_command_queue queue; - cl_kernel producer_kernel, consumer_kernel; - ... - const cl_int width = 4; - cl_mem res_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, - width * sizeof(cl_int), nullptr, nullptr); - cl_command_buffer_khr cmd_buf = - clCreateCommandBufferKHR(1, &queue, nullptr, nullptr); - cl_reservation_set_mobileye reservation_set = clCreateReservationSetMOBILEYE( - cmd_buf, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, nullptr, nullptr); - cl_mem pipe = clCreateReservationSetPipeMOBILEYE(reservation_set, - sizeof(cl_int), width, - nullptr); - - clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &res_buffer); - clSetKernelArg(consumer_kernel, 1, sizeof(cl_mem), &pipe); - - clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &pipe); - - const size_t gwz = width, lwz = 1; - clCommandNDRangeKernelReservationSetMOBILEYE(reservation_set, queue, nullptr, - producer_kernel, 1, nullptr, - &gwz, &lwz, nullptr, nullptr); - clCommandNDRangeKernelReservationSetMOBILEYE(reservation_set, queue, nullptr, - consumer_kernel, 1, nullptr, - &gwz, &lwz, nullptr, nullptr); - clFinalizeCommandBufferKHR(cmd_buf); - - cl_event event; - clEnqueueCommandBufferKHR(1, &queue, cmd_buf, 0, nullptr, &event); - clWaitForEvents(1, &event); +cl_context context; +cl_command_queue queue; +cl_kernel producer_kernel, consumer_kernel; +// ... +size_t num_tiles = 4; +cl_int tile_size = 128, subtile_size = 16; +cl_mem res_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_tiles * tile_size * sizeof(cl_int), nullptr, nullptr); +cl_command_buffer_khr cmd_buf = + clCreateCommandBufferKHR(1, &queue, nullptr, nullptr); +cl_reservation_set_mobileye reservation_set = clCreateReservationSetMOBILEYE( + cmd_buf, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, nullptr, nullptr); +cl_mem pipe = clCreateReservationSetPipeMOBILEYE( + reservation_set, sizeof(cl_uchar), tile_size / subtile_size, nullptr); + +clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &res_buffer); +clSetKernelArg(consumer_kernel, 1, sizeof(cl_int), &tile_size); +clSetKernelArg(consumer_kernel, 2, sizeof(cl_int), &subtile_size); +clSetKernelArg(consumer_kernel, 3, sizeof(cl_mem), &pipe); + +clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &res_buffer); +clSetKernelArg(producer_kernel, 1, sizeof(cl_int), &tile_size); +clSetKernelArg(producer_kernel, 2, sizeof(cl_int), &subtile_size); +clSetKernelArg(producer_kernel, 3, sizeof(cl_mem), &pipe); + +size_t lwz = 1; +clCommandNDRangeKernelReservationSetMOBILEYE( + reservation_set, queue, nullptr, producer_kernel, 1, nullptr, &num_tiles, + &lwz, nullptr, nullptr); +clCommandNDRangeKernelReservationSetMOBILEYE( + reservation_set, queue, nullptr, consumer_kernel, 1, nullptr, &num_tiles, + &lwz, nullptr, nullptr); +clFinalizeCommandBufferKHR(cmd_buf); + +cl_event event; +clEnqueueCommandBufferMOBILEYE(1, &queue, cmd_buf, 0, nullptr, &event); +clWaitForEvents(1, &event); ---- == Issues