From 9b5919ce44745e19cfdafdf7c6ac4445ce4dd88a Mon Sep 17 00:00:00 2001 From: horobert Date: Mon, 18 Jul 2022 15:30:05 -0400 Subject: [PATCH 01/11] Initial task_sequence code sample Signed-off-by: horobert --- .../Features/task_sequence/CMakeLists.txt | 20 ++ .../Features/task_sequence/License.txt | 23 ++ .../Features/task_sequence/README.md | 313 ++++++++++++++++++ .../Features/task_sequence/sample.json | 61 ++++ .../Features/task_sequence/src/CMakeLists.txt | 73 ++++ .../task_sequence/src/task_sequence.cpp | 145 ++++++++ .../Features/task_sequence/task_sequence.sln | 25 ++ .../task_sequence/task_sequence.vcxproj | 160 +++++++++ .../task_sequence/third-party-programs.txt | 253 ++++++++++++++ 9 files changed, 1073 insertions(+) create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj create mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt new file mode 100644 index 0000000000..3078228ee4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use dpcpp rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER dpcpp) +else() # Windows + # Force CMake to use dpcpp rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (dpcpp IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.4) + +project(LoopIvdep CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt new file mode 100644 index 0000000000..7c8b8a36c6 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt @@ -0,0 +1,23 @@ +Copyright Intel Corporation + +SPDX-License-Identifier: MIT +https://opensource.org/licenses/MIT + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md new file mode 100644 index 0000000000..2fc92a1e4c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md @@ -0,0 +1,313 @@ + + +# task_sequence +This FPGA tutorial demonstrates how to use the task_sequence extension to asynchronously run sub-kernel sets of operations, called tasks, in parallel. The task_sequence extension provides a templated class, task_sequence, that defines an API for asynchronously launching a parallel task, and for retrieving the results of that task. Objects of this class represent a FIFO queue of tasks matching the order in which these tasks were invoked, as well as an instantiation of the FPGA hardware used to perform the operations of that task. + +***Documentation***: The [DPC++ FPGA Code Samples Guide](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of DPC++ for FPGA.
+The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) is the reference manual for targeting FPGAs through DPC++.
+The [oneAPI Programming Guide](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04/20.04, RHEL*/CentOS* 8, SUSE* 15; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA
Intel® FPGA Programmable Acceleration Card (PAC) D5005 (with Intel Stratix® 10 SX)
Intel® FPGA 3rd party / custom platforms with oneAPI support
*__Note__: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04* +| Software | Intel® oneAPI DPC++ Compiler
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Basics of task_sequence declaration and usage +| Time to complete | 30 minutes + + + +## Purpose + +Use objects of a task_sequence class to asychronously run parallel tasks, and to define the hardware that is instantiated to perform those tasks. An API for invoking tasks and retriving results of these tasks imposes a FIFO ordering on outstanding tasks and their results. The scope of a task_sequence object defines the lifetime in which the hardware represented by that object can be used to perform a task. Users can control hardware reuse and replication by declaring single or multiple objects of the same task_sequence class. + +### Declaring a task_sequence +A task_sequence is a templated class that defines a set of operations (task), and methods for asynchronously invoking parallel instances of that task, and retrieiving the results of those parallel tasks in FIFO order. The first template parameter is an auto reference to a Callable f that defines the asynchronous task to be associated with the task_sequence. The requirement for an auto reference amounts to a requirement that f be statically resolvable at compile time, i.e., not a function pointer. Furthermore, the return type and argument types of f must be resolvable and fixed for each definition of task_sequence. + +The task_sequence class optionally takes two additional unsigned int template parameters specifying the invocation capacity and response capacity for instantiated task_sequence objects. The invocation capacity parameter defines the minimum number of task invocations (see [async](async) in [task_sequence API](task_sequence API) below) that must be supported without any response being collected (see [get](get) in [task_sequence API](task_sequence API) below). This number of async invocations without a get call is the minimum number that will be supported before a subsequenct async member function may block. A default value of 1 is assumed if the invocation capacity parameter is not specified. + +The response capacity paramter defines the maximum number of outstanding async invocations such that all outstanding invocations are guaranteed to make forward progress. Further async invocations may block until enough get calls are invoked such that the number of outstanding async invocations is reduced to the response capacity. A default value of 1 is assumed if the response capacity parameter is not specified. + +Object instances of a templated task_sequence class represent a specific instantiation of FPGA hardware to perform the operations of the task f. Users can control the amount of replication of FPGA hardware by the number of object declarations they use. + +```c++ +int someTask(int intArg, float floatArg) { + ... +} + +// FPGA code +{ + sycl::ext::intel::experimental::task_sequence firstInstance; + sycl::ext::intel::experimental::task_sequence secondInstance; + ... +} + +``` + +In this example, `firstInstance` and `secondInstance` are two task_sequence objects that implement the task `someTask`, which takes an integer argument and returns and integer result. Since they are two different object instances, they represent two distinct instances of FPGA hardware implementing `someTask`, as well as two separate queues for holding the results of parallel invocations of `someTask`. + +### task_sequence API + +task_sequence provides two methods for asynchronously invoking and collecting parallel instances of the templated task function. + + - [async](#asymc) + - [get](#get) + +#### async + +The async method asynchronously invokes a parallel instance of the templated task function. The async method takes the same arguments (the same type and same order) as those defined in the templated task function's signature. + +```c++ +int someTask(int intArg, float floatArg); + +// FPGA code +{ + sycl::ext::intel::experimental::task_sequence firstInstance; + int argA = ...; + float argB = ...; + int argC = ...; + float argD = ...; + + ... + + firstInstance.async(argA, argB); // first async invocation + firstInstance.async(argC, argD); // second async invocation + ... +} + +``` + +In the above example, two asynchronous parallel invocations of 'someTask' are invoked on the FPGA hardware represented by the firstInstance task_sequence object. The first parallel task is invoked with arguments 'argA' and 'argB', and the second invocation with 'argC' and 'argD'. + + +#### get +The get method collects the result of a task_sequence task previously invoked through the async method. The get method for a particular task_sequence object has the same return type as the templated task function for the object. Calling get returns results in the same order in which the tasks were invoked. + +```c++ +// FPGA code +{ + sycl::ext::intel::experimental::task_sequence firstInstance; + int argA = ...; + float argB = ...; + int argC = ...; + float argD = ...; + + ... + + firstInstance.async(argA, argB); // first async invocation + firstInstance.async(argC, argD); // second async invocation + ... + auto firstResult = firstInstance.get(); // returns the result of invocation with (argA, argB) + auto secondResult = firstInstance.get(); // returns the result of invocation with (argC, argD) + ... +} + +``` + +In this continuation of the async example, firstResult contains the return value of the async invocation using (argA, argB), and secondResult contains the return value of the async invocation using (argC, argD). + +The get method is a blocking call. That is, if no previous async invocation has completed, get will block until one has. + +### Testing the Tutorial +In `task_sequence.cpp`, the dot product of a 16k element vector is calculated twice. The first calculation, performed in the 'SequentialTask' kernel, is performed by a single async invocation of the 'dotProduct' function by a single task_sequence object. + + +```c++ +h.single_task([=]() { + sycl::ext::intel::experimental::task_sequence whole; + whole.async(in_acc.get_pointer(), 0, count); + out_acc[0] = whole.get(); +}); +``` +The second calculation, performed by the `ParallelTask` kernel, is performed by 4 async invocations of the `dotProduct` function via 4 different task_sequence objects. Each async invocation operates on one-quarter of the vector. Since each async invocation utilizes its own FPGA hardware and operates on a different quarter of the vector, each partial dot product calculation can be done in parallel, speeding up the result. + +```c++ +h.single_task([=]() { + sycl::ext::intel::experimental::task_sequence firstQuarter; + sycl::ext::intel::experimental::task_sequence secondQuarter; + sycl::ext::intel::experimental::task_sequence thirdQuarter; + sycl::ext::intel::experimental::task_sequence fourthQuarter; + int quarterCount = count/4; + firstQuarter.async(in_acc.get_pointer(), 0, quarterCount); + secondQuarter.async(in_acc.get_pointer(), quarterCount, quarterCount); + thirdQuarter.async(in_acc.get_pointer(), 2*quarterCount, quarterCount); + fourthQuarter.async(in_acc.get_pointer(), 3*quarterCount, quarterCount); + out_acc[1] = firstQuarter.get() + secondQuarter.get() + thirdQuarter.get() + fourthQuarter.get(); +}); +``` + +## Key Concepts +* Basics of declaring task_sequence objects +* Using task_sequence async and get API for invoking and collecting parallel tasks + +## License +Code samples are licensed under the MIT license. See +[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. + +Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt) + +## Building the `task_sequence` Tutorial + +> **Note**: If you have not already done so, set up your CLI +> environment by sourcing the `setvars` script located in +> the root of your oneAPI installation. +> +> Linux Sudo: `. /opt/intel/oneapi/setvars.sh` +> +> Linux User: `. ~/intel/oneapi/setvars.sh` +> +> Windows: `C:\Program Files(x86)\Intel\oneAPI\setvars.bat` +> +>For more information on environment variables, see Use the setvars Script for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html). + + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If you are running a sample in the Intel DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are supported only on `fpga_compile` nodes. Executing programs on FPGA hardware is supported only on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`. You cannot compile or execute programs on FPGA hardware on the `login` nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)). + +When compiling for FPGA hardware, increase the job timeout to 12h. + +### Using Visual Studio Code* (Optional) + +You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations, +and browse and download samples. + +The basic steps to build and run a sample using VS Code include: + - Download a sample using the extension **Code Sample Browser for Intel oneAPI Toolkits**. + - Configure the oneAPI environment with the extension **Environment Configurator for Intel oneAPI Toolkits**. + - Open a Terminal in VS Code (**Terminal>New Terminal**). + - Run the sample in the VS Code terminal using the instructions below. + +To learn more about the extensions and how to configure the oneAPI environment, see +[Using Visual Studio Code with Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html). + +After learning how to use the extensions for Intel oneAPI Toolkits, return to this readme for instructions on how to build and run a sample. + +### On a Linux* System + +1. Generate the Makefile by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the following command: + + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX), run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + You can also compile for a custom FPGA platform. Ensure that the board support package is installed on your system. Then run `cmake` using the command: + ``` + cmake .. -DFPGA_BOARD=: + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) can be downloaded here. + +### On a Windows* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + + ``` + cmake -G "NMake Makefiles" .. + ``` + Alternatively, to compile for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX), run `cmake` using the command: + + ``` + cmake -G "NMake Makefiles" .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + You can also compile for a custom FPGA platform. Ensure that the board support package is installed on your system. Then run `cmake` using the command: + ``` + cmake -G "NMake Makefiles" .. -DFPGA_BOARD=: + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + nmake fpga_emu + ``` + * Generate the optimization report: + ``` + nmake report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + nmake fpga + ``` + +>Note:* The Intel® PAC with Intel Arria® 10 GX FPGA and Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) do not support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support.
+ +>**Tip**: If you encounter issues with long paths when compiling under Windows*, you might have to create your ‘build’ directory in a shorter path, for example `c:\samples\build`. You can then run `cmake` from that directory, and provide `cmake` with the full path to your sample directory. + +### Troubleshooting + +If an error occurs, get more details by running `make` with +the `VERBOSE=1` argument: +``make VERBOSE=1`` +For more comprehensive troubleshooting, use the Diagnostics Utility for +Intel® oneAPI Toolkits, which provides system checks to find missing +dependencies and permissions errors. +[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html). + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs]([https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html)) + +## Examining the Reports + +Locate `report.html` in the `task_sequence_report.prj/reports/` directory. Open the report in any of the following web browsers: Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Open the **Views** menu and select **System Viewer**. + +In the left-hand pane, select **SequentialTask.B0** under the System hierarchy. + +In the main **System Viewer** pane, the task_sequence async and get for the single 'whole' task_sequence object are highlighted as a 'WR' and 'RD' node respectively. These represent a write pipe for writing the arguments and start command to the 'dotProduct' task function, and a read pipe for returning the results. + +Now select **ParallelTask.B0** in the left-hand pane. + +In the main **System Viewer(( pane, the four task_sequence async and get commands for the four task_sequence objects of the parallelTask kernel are highlighted. These represent the four parallel async invocations in this kernel. As in the the sequentialTask kernel, the 'WR' nodes represent pipes for writing the arguments and start command to each instance of the 'dotProduct' task function (since there are 4 task_sequence objects, there are 4 hardware instances), and the 'RD' nodes represent pipes for returning the results. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./task_sequence.fpga_emu (Linux) + task_sequence.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./task_sequence.fpga (Linux) + ``` + +### Example of Output + +``` +PASSED sequential test +PASSED parallel test +Sequential time: 29489.7 ms +Parallel time: 12050.7 ms +``` diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json new file mode 100644 index 0000000000..0f7a8082ad --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json @@ -0,0 +1,61 @@ +{ + "guid": "7DCC2921-6B6C-47AB-A22D-D77A4A754184", + "name": "task_sequence", + "categories": ["Toolkit/oneAPI Direct Programming/DPC++ FPGA/Tutorials/Features/experimental"], + "description": "An Intel® FPGA tutorial demonstrating the usage of the task_sequence extenstion", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "dpcpp --version", + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./task_sequence.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "dpcpp --version", + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "dpcpp --version", + "cd ../../..", + "mkdir build", + "cd build", + "cmake -G \"NMake Makefiles\" ../Tutorials/Features/task_sequence", + "nmake fpga_emu", + "task_sequence.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "dpcpp --version", + "cd ../../..", + "mkdir build", + "cd build", + "cmake -G \"NMake Makefiles\" ../Tutorials/Features/task_sequence", + "nmake report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt new file mode 100644 index 0000000000..f354a4fec5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt @@ -0,0 +1,73 @@ +# To see a Makefile equivalent of this build system: +# https://github.com/oneapi-src/oneAPI-samples/blob/master/DirectProgramming/DPC++/ProjectTemplates/makefile-fpga + +set(SOURCE_FILE task_sequence.cpp) +set(TARGET_NAME task_sequence) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA device selection +if(NOT DEFINED FPGA_BOARD) + set(FPGA_BOARD "intel_a10sx_pac:pac_a10") + message(STATUS "FPGA_BOARD was not specified.\ + \nConfiguring the design to run on the default FPGA device ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \ + \nPlease refer to the README for information on board selection.") +else() + message(STATUS "Configuring the design to run on FPGA device ${FPGA_BOARD}") +endif() + +# This is a Windows-specific flag that enables exception handling in host code +if(WIN32) + set(WIN_FLAG "/EHsc") +endif() + +# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR -g0") +set(EMULATOR_LINK_FLAGS "-fintelfpga -g0") +set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga") +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# dpcpp -fintelfpga -DFPGA_EMULATOR task_sequence.cpp -o task_sequence.fpga_emu +# CMake executes: +# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o task_sequence.cpp.o -c task_sequence.cpp +# [link] dpcpp -fintelfpga task_sequence.cpp.o -o task_sequence.fpga_emu +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# dpcpp -fintelfpga -Xshardware -Xsboard= -fsycl-link=early task_sequence.cpp -o task_sequence_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to DPC++. +add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile in a single command: +# dpcpp -fintelfpga -Xshardware -Xsboard= task_sequence.cpp -o task_sequence.fpga +# CMake executes: +# [compile] dpcpp -fintelfpga -o task_sequence.cpp.o -c task_sequence.cpp +# [link] dpcpp -fintelfpga -Xshardware -Xsboard= task_sequence.cpp.o -o task_sequence.fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See DPC++FPGA/GettingStarted/fast_recompile for details. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp new file mode 100644 index 0000000000..6482f950d4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp @@ -0,0 +1,145 @@ +#include +#include +#include + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include +#include + +#include "dpc_common.hpp" + +using ValueT = float; + +// compute the dot product of 'sz' elements of vector 'v', beginning at index +// 's' +ValueT dotProduct(ValueT* v, size_t s, size_t sz) { + int result = 1; + for (size_t i = s; i < s + sz; i++) result *= v[i]; + + return result; +} + +// return the absolute value of 'x' +template +T abs(T x) { + if (x > 0) + return x; + else + return -x; +} + +// Kernel identifiers +class SequentialTask; +class ParallelTask; + +int main(int argc, char* argv[]) { +#if defined(FPGA_EMULATOR) + sycl::ext::intel::fpga_emulator_selector selector; +#else + sycl::ext::intel::fpga_selector selector; +#endif + + size_t count = 16384; + if (argc > 1) count = atoi(argv[1]); + + if (count <= 0) { + std::cerr << "ERROR: 'count' must be positive" << std::endl; + return 1; + } + + try { + // create the device queue + sycl::queue q(selector, dpc_common::exception_handler, + sycl::property::queue::enable_profiling{}); + + // create input and golden output data + std::random_device rd; + std::default_random_engine eng(rd()); + std::uniform_real_distribution distr(0, 1); + std::vector in(count), out(2); + for (size_t i = 0; i < count; i++) { + in[i] = distr(eng); + } + + ValueT golden = dotProduct(in.data(), 0, count); + + // variables for profiling times + double start, end, sequentialTime, parallelTime; + + // create scope so that buffer destructors are invoked before output + // is checked + { + sycl::buffer in_buf(in); + sycl::buffer out_buf(out); + + sycl::event e = q.submit([&](sycl::handler& h) { + sycl::accessor in_acc(in_buf, h, sycl::read_only); + sycl::accessor out_acc(out_buf, h, sycl::write_only); + h.single_task([=]() { + sycl::ext::intel::experimental::task_sequence whole; + whole.async(in_acc.get_pointer(), 0, count); + out_acc[0] = whole.get(); + }); + }); + q.wait(); + + start = + e.get_profiling_info(); + end = e.get_profiling_info(); + + // unit is nano second, convert to ms + sequentialTime = (double)(end - start) * 1e-6; + + e = q.submit([&](sycl::handler& h) { + sycl::accessor in_acc(in_buf, h, sycl::read_only); + sycl::accessor out_acc(out_buf, h, sycl::write_only); + h.single_task([=]() { + sycl::ext::intel::experimental::task_sequence + firstQuarter; + sycl::ext::intel::experimental::task_sequence + secondQuarter; + sycl::ext::intel::experimental::task_sequence + thirdQuarter; + sycl::ext::intel::experimental::task_sequence + fourthQuarter; + int quarterCount = count / 4; + firstQuarter.async(in_acc.get_pointer(), 0, quarterCount); + secondQuarter.async(in_acc.get_pointer(), quarterCount, quarterCount); + thirdQuarter.async(in_acc.get_pointer(), 2 * quarterCount, + quarterCount); + fourthQuarter.async(in_acc.get_pointer(), 3 * quarterCount, + quarterCount); + out_acc[1] = firstQuarter.get() + secondQuarter.get() + + thirdQuarter.get() + fourthQuarter.get(); + }); + }); + q.wait(); + + start = + e.get_profiling_info(); + end = e.get_profiling_info(); + + // unit is nano second, convert to ms + parallelTime = (double)(end - start) * 1e-6; + } + + if (abs(out[0] - golden) < (ValueT)0.001) + std::cout << "PASSED sequential test" << std::endl; + else + std::cout << "FAILED" << std::endl; + + if (abs(out[1] - golden) < (ValueT)0.001) + std::cout << "PASSED parallel test" << std::endl; + else + std::cout << "FAILED" << std::endl; + + std::cout << "Sequential time: " << sequentialTime << " ms" << std::endl; + std::cout << "Parallel time: " << parallelTime << " ms" << std::endl; + + } catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + std::terminate(); + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln new file mode 100644 index 0000000000..ac10327a83 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "task_sequence", "task_sequence.vcxproj", "{3F5364B3-F987-4676-89A5-1F19BA3D4B75}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Debug|x64.ActiveCfg = Debug|x64 + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Debug|x64.Build.0 = Debug|x64 + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Release|x64.ActiveCfg = Release|x64 + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {C0550E85-8C31-40EE-BFFA-F267DC16329D} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj new file mode 100644 index 0000000000..93c595e971 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {3f5364b3-f987-4676-89a5-1f19ba3d4b75} + Win32Proj + task_sequence + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler 2022 + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler 2022 + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler 2022 + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler 2022 + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)task_sequence.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)task_sequence.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt new file mode 100644 index 0000000000..8377fa0c7a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt @@ -0,0 +1,253 @@ +oneAPI Code Samples - Third Party Programs File + +This file contains the list of third party software ("third party programs") +contained in the Intel software and their required notices and/or license +terms. This third party software, even if included with the distribution of the +Intel software, may be governed by separate license terms, including without +limitation, third party license terms, other Intel software license terms, and +open source software license terms. These separate license terms govern your use +of the third party programs as set forth in the “third-party-programs.txt” or +other similarly named text file. + +Third party programs and their corresponding required notices and/or license +terms are listed below. + +-------------------------------------------------------------------------------- + +1. Nothings STB Libraries + +stb/LICENSE + + This software is available under 2 licenses -- choose whichever you prefer. + ------------------------------------------------------------------------------ + ALTERNATIVE A - MIT License + Copyright (c) 2017 Sean Barrett + Permission is hereby granted, free of charge, to any person obtaining a copy of + this software and associated documentation files (the "Software"), to deal in + the Software without restriction, including without limitation the rights to + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished to do + so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + ------------------------------------------------------------------------------ + ALTERNATIVE B - Public Domain (www.unlicense.org) + This is free and unencumbered software released into the public domain. + Anyone is free to copy, modify, publish, use, compile, sell, or distribute this + software, either in source code form or as a compiled binary, for any purpose, + commercial or non-commercial, and by any means. + In jurisdictions that recognize copyright laws, the author or authors of this + software dedicate any and all copyright interest in the software to the public + domain. We make this dedication for the benefit of the public at large and to + the detriment of our heirs and successors. We intend this dedication to be an + overt act of relinquishment in perpetuity of all present and future rights to + this software under copyright law. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +2. FGPA example designs-gzip + + SDL2.0 + +zlib License + + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + +-------------------------------------------------------------------------------- + +3. Nbody + (c) 2019 Fabio Baruffa + + Plotly.js + Copyright (c) 2020 Plotly, Inc + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +© 2020 GitHub, Inc. + +-------------------------------------------------------------------------------- + +4. GNU-EFI + Copyright (c) 1998-2000 Intel Corporation + +The files in the "lib" and "inc" subdirectories are using the EFI Application +Toolkit distributed by Intel at http://developer.intel.com/technology/efi + +This code is covered by the following agreement: + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. THE EFI SPECIFICATION AND ALL OTHER INFORMATION +ON THIS WEB SITE ARE PROVIDED "AS IS" WITH NO WARRANTIES, AND ARE SUBJECT +TO CHANGE WITHOUT NOTICE. + +-------------------------------------------------------------------------------- + +5. Edk2 + Copyright (c) 2019, Intel Corporation. All rights reserved. + + Edk2 Basetools + Copyright (c) 2019, Intel Corporation. All rights reserved. + +SPDX-License-Identifier: BSD-2-Clause-Patent + +-------------------------------------------------------------------------------- + +6. Heat Transmission + +GNU LESSER GENERAL PUBLIC LICENSE +Version 3, 29 June 2007 + +Copyright © 2007 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + +This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. + +0. Additional Definitions. +As used herein, “this License” refers to version 3 of the GNU Lesser General Public License, and the “GNU GPL” refers to version 3 of the GNU General Public License. + +“The Library” refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. + +An “Application” is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. + +A “Combined Work” is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the “Linked Version”. + +The “Minimal Corresponding Source” for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. + +The “Corresponding Application Code” for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. + +1. Exception to Section 3 of the GNU GPL. +You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. + +2. Conveying Modified Versions. +If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: + +a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or +b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. +3. Object Code Incorporating Material from Library Header Files. +The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: + +a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. +b) Accompany the object code with a copy of the GNU GPL and this license document. +4. Combined Works. +You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: + +a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. +b) Accompany the Combined Work with a copy of the GNU GPL and this license document. +c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. +d) Do one of the following: +0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. +1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. +e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) +5. Combined Libraries. +You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: + +a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. +b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. +6. Revised Versions of the GNU Lesser General Public License. +The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. + +If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. + +-------------------------------------------------------------------------------- +7. Rodinia + Copyright (c)2008-2011 University of Virginia +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted without royalty fees or other restrictions, provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of the University of Virginia, the Dept. of Computer Science, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF VIRGINIA OR THE SOFTWARE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +If you use this software or a modified version of it, please cite the most relevant among the following papers: + + - M. A. Goodrum, M. J. Trotter, A. Aksel, S. T. Acton, and K. Skadron. Parallelization of Particle Filter Algorithms. In Proceedings of the 3rd Workshop on Emerging Applications and Many-core Architecture (EAMA), in conjunction with the IEEE/ACM International +Symposium on Computer Architecture (ISCA), June 2010. + + - S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, Sang-Ha Lee and K. Skadron. +Rodinia: A Benchmark Suite for Heterogeneous Computing. IEEE International Symposium +on Workload Characterization, Oct 2009. + +- J. Meng and K. Skadron. "Performance Modeling and Automatic Ghost Zone Optimization +for Iterative Stencil Loops on GPUs." In Proceedings of the 23rd Annual ACM International +Conference on Supercomputing (ICS), June 2009. + +- L.G. Szafaryn, K. Skadron and J. Saucerman. "Experiences Accelerating MATLAB Systems +Biology Applications." in Workshop on Biomedicine in Computing (BiC) at the International +Symposium on Computer Architecture (ISCA), June 2009. + +- M. Boyer, D. Tarjan, S. T. Acton, and K. Skadron. "Accelerating Leukocyte Tracking using CUDA: +A Case Study in Leveraging Manycore Coprocessors." In Proceedings of the International Parallel +and Distributed Processing Symposium (IPDPS), May 2009. + +- S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, and K. Skadron. "A Performance +Study of General Purpose Applications on Graphics Processors using CUDA" Journal of +Parallel and Distributed Computing, Elsevier, June 2008. + +-------------------------------------------------------------------------------- +Other names and brands may be claimed as the property of others. + +-------------------------------------------------------------------------------- From 539c1ce171057929ad2132ef439e7926b518f35e Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Wed, 26 Oct 2022 09:53:06 -0400 Subject: [PATCH 02/11] Update DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt Co-authored-by: yuguen-intel --- .../DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt index 3078228ee4..18c7ca4fc9 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt @@ -11,7 +11,7 @@ endif() cmake_minimum_required (VERSION 3.4) -project(LoopIvdep CXX) +project(TaskSequenceCXX) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) From f57c6f0f795954995d90ca51dc3cd51992d32f97 Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Wed, 26 Oct 2022 09:53:22 -0400 Subject: [PATCH 03/11] Update DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json Co-authored-by: yuguen-intel --- .../DPC++FPGA/Tutorials/Features/task_sequence/sample.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json index 0f7a8082ad..2881dd914b 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json @@ -1,7 +1,7 @@ { "guid": "7DCC2921-6B6C-47AB-A22D-D77A4A754184", "name": "task_sequence", - "categories": ["Toolkit/oneAPI Direct Programming/DPC++ FPGA/Tutorials/Features/experimental"], + "categories": ["Toolkit/oneAPI Direct Programming/DPC++ FPGA/Tutorials/Features"], "description": "An Intel® FPGA tutorial demonstrating the usage of the task_sequence extenstion", "toolchain": ["dpcpp"], "os": ["linux", "windows"], From 497036706c829f435f2d5a216ec66b323dcaef6d Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Wed, 26 Oct 2022 09:56:34 -0400 Subject: [PATCH 04/11] Update DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md Co-authored-by: yuguen-intel --- .../DPC++FPGA/Tutorials/Features/task_sequence/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md index 2fc92a1e4c..93c3bc8673 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md @@ -1,6 +1,6 @@ -# task_sequence +# The `task_sequence` extension This FPGA tutorial demonstrates how to use the task_sequence extension to asynchronously run sub-kernel sets of operations, called tasks, in parallel. The task_sequence extension provides a templated class, task_sequence, that defines an API for asynchronously launching a parallel task, and for retrieving the results of that task. Objects of this class represent a FIFO queue of tasks matching the order in which these tasks were invoked, as well as an instantiation of the FPGA hardware used to perform the operations of that task. ***Documentation***: The [DPC++ FPGA Code Samples Guide](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of DPC++ for FPGA.
From 0acaf3a8722cdaa2092036746d35000eff302f71 Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Wed, 26 Oct 2022 09:57:11 -0400 Subject: [PATCH 05/11] Update DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt Co-authored-by: yuguen-intel --- .../DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt index 18c7ca4fc9..f88598195c 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt @@ -1,6 +1,6 @@ if(UNIX) # Direct CMake to use dpcpp rather than the default C++ compiler/linker - set(CMAKE_CXX_COMPILER dpcpp) + set(CMAKE_CXX_COMPILER icpx) else() # Windows # Force CMake to use dpcpp rather than the default C++ compiler/linker # (needed on Windows only) From 1bf4ecd7e1792628e1c83a5be9a0f52d2faeb445 Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Wed, 26 Oct 2022 09:57:19 -0400 Subject: [PATCH 06/11] Update DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt Co-authored-by: yuguen-intel --- .../DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt index f88598195c..57d2aa2ae3 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/CMakeLists.txt @@ -5,7 +5,7 @@ else() # Windows # Force CMake to use dpcpp rather than the default C++ compiler/linker # (needed on Windows only) include (CMakeForceCompiler) - CMAKE_FORCE_CXX_COMPILER (dpcpp IntelDPCPP) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) include (Platform/Windows-Clang) endif() From cfc2d7baffc4a0bcae2900c39e29cff18027252c Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Thu, 3 Nov 2022 06:41:23 -0400 Subject: [PATCH 07/11] Delete third-party-programs.txt --- .../task_sequence/third-party-programs.txt | 253 ------------------ 1 file changed, 253 deletions(-) delete mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt deleted file mode 100644 index 8377fa0c7a..0000000000 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/third-party-programs.txt +++ /dev/null @@ -1,253 +0,0 @@ -oneAPI Code Samples - Third Party Programs File - -This file contains the list of third party software ("third party programs") -contained in the Intel software and their required notices and/or license -terms. This third party software, even if included with the distribution of the -Intel software, may be governed by separate license terms, including without -limitation, third party license terms, other Intel software license terms, and -open source software license terms. These separate license terms govern your use -of the third party programs as set forth in the “third-party-programs.txt” or -other similarly named text file. - -Third party programs and their corresponding required notices and/or license -terms are listed below. - --------------------------------------------------------------------------------- - -1. Nothings STB Libraries - -stb/LICENSE - - This software is available under 2 licenses -- choose whichever you prefer. - ------------------------------------------------------------------------------ - ALTERNATIVE A - MIT License - Copyright (c) 2017 Sean Barrett - Permission is hereby granted, free of charge, to any person obtaining a copy of - this software and associated documentation files (the "Software"), to deal in - the Software without restriction, including without limitation the rights to - use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is furnished to do - so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - ------------------------------------------------------------------------------ - ALTERNATIVE B - Public Domain (www.unlicense.org) - This is free and unencumbered software released into the public domain. - Anyone is free to copy, modify, publish, use, compile, sell, or distribute this - software, either in source code form or as a compiled binary, for any purpose, - commercial or non-commercial, and by any means. - In jurisdictions that recognize copyright laws, the author or authors of this - software dedicate any and all copyright interest in the software to the public - domain. We make this dedication for the benefit of the public at large and to - the detriment of our heirs and successors. We intend this dedication to be an - overt act of relinquishment in perpetuity of all present and future rights to - this software under copyright law. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -2. FGPA example designs-gzip - - SDL2.0 - -zlib License - - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - --------------------------------------------------------------------------------- - -3. Nbody - (c) 2019 Fabio Baruffa - - Plotly.js - Copyright (c) 2020 Plotly, Inc - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -© 2020 GitHub, Inc. - --------------------------------------------------------------------------------- - -4. GNU-EFI - Copyright (c) 1998-2000 Intel Corporation - -The files in the "lib" and "inc" subdirectories are using the EFI Application -Toolkit distributed by Intel at http://developer.intel.com/technology/efi - -This code is covered by the following agreement: - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. THE EFI SPECIFICATION AND ALL OTHER INFORMATION -ON THIS WEB SITE ARE PROVIDED "AS IS" WITH NO WARRANTIES, AND ARE SUBJECT -TO CHANGE WITHOUT NOTICE. - --------------------------------------------------------------------------------- - -5. Edk2 - Copyright (c) 2019, Intel Corporation. All rights reserved. - - Edk2 Basetools - Copyright (c) 2019, Intel Corporation. All rights reserved. - -SPDX-License-Identifier: BSD-2-Clause-Patent - --------------------------------------------------------------------------------- - -6. Heat Transmission - -GNU LESSER GENERAL PUBLIC LICENSE -Version 3, 29 June 2007 - -Copyright © 2007 Free Software Foundation, Inc. - -Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - -This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. - -0. Additional Definitions. -As used herein, “this License” refers to version 3 of the GNU Lesser General Public License, and the “GNU GPL” refers to version 3 of the GNU General Public License. - -“The Library” refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. - -An “Application” is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. - -A “Combined Work” is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the “Linked Version”. - -The “Minimal Corresponding Source” for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. - -The “Corresponding Application Code” for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. - -1. Exception to Section 3 of the GNU GPL. -You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. - -2. Conveying Modified Versions. -If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: - -a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or -b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. -3. Object Code Incorporating Material from Library Header Files. -The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: - -a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. -b) Accompany the object code with a copy of the GNU GPL and this license document. -4. Combined Works. -You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: - -a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. -b) Accompany the Combined Work with a copy of the GNU GPL and this license document. -c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. -d) Do one of the following: -0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. -1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. -e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) -5. Combined Libraries. -You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: - -a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. -b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. -6. Revised Versions of the GNU Lesser General Public License. -The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. - -If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. - --------------------------------------------------------------------------------- -7. Rodinia - Copyright (c)2008-2011 University of Virginia -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted without royalty fees or other restrictions, provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the University of Virginia, the Dept. of Computer Science, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF VIRGINIA OR THE SOFTWARE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -If you use this software or a modified version of it, please cite the most relevant among the following papers: - - - M. A. Goodrum, M. J. Trotter, A. Aksel, S. T. Acton, and K. Skadron. Parallelization of Particle Filter Algorithms. In Proceedings of the 3rd Workshop on Emerging Applications and Many-core Architecture (EAMA), in conjunction with the IEEE/ACM International -Symposium on Computer Architecture (ISCA), June 2010. - - - S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, Sang-Ha Lee and K. Skadron. -Rodinia: A Benchmark Suite for Heterogeneous Computing. IEEE International Symposium -on Workload Characterization, Oct 2009. - -- J. Meng and K. Skadron. "Performance Modeling and Automatic Ghost Zone Optimization -for Iterative Stencil Loops on GPUs." In Proceedings of the 23rd Annual ACM International -Conference on Supercomputing (ICS), June 2009. - -- L.G. Szafaryn, K. Skadron and J. Saucerman. "Experiences Accelerating MATLAB Systems -Biology Applications." in Workshop on Biomedicine in Computing (BiC) at the International -Symposium on Computer Architecture (ISCA), June 2009. - -- M. Boyer, D. Tarjan, S. T. Acton, and K. Skadron. "Accelerating Leukocyte Tracking using CUDA: -A Case Study in Leveraging Manycore Coprocessors." In Proceedings of the International Parallel -and Distributed Processing Symposium (IPDPS), May 2009. - -- S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, and K. Skadron. "A Performance -Study of General Purpose Applications on Graphics Processors using CUDA" Journal of -Parallel and Distributed Computing, Elsevier, June 2008. - --------------------------------------------------------------------------------- -Other names and brands may be claimed as the property of others. - --------------------------------------------------------------------------------- From 701f8c27765546239423ac19b22278f5b01b9e37 Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Thu, 3 Nov 2022 06:41:46 -0400 Subject: [PATCH 08/11] Delete task_sequence.vcxproj --- .../task_sequence/task_sequence.vcxproj | 160 ------------------ 1 file changed, 160 deletions(-) delete mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj deleted file mode 100644 index 93c595e971..0000000000 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.vcxproj +++ /dev/null @@ -1,160 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - - - - - - - 15.0 - {3f5364b3-f987-4676-89a5-1f19ba3d4b75} - Win32Proj - task_sequence - $(WindowsSDKVersion.Replace("\","")) - - - - Application - true - Intel(R) oneAPI DPC++ Compiler 2022 - Unicode - - - Application - false - Intel(R) oneAPI DPC++ Compiler 2022 - true - Unicode - - - Application - true - Intel(R) oneAPI DPC++ Compiler 2022 - Unicode - - - Application - false - Intel(R) oneAPI DPC++ Compiler 2022 - true - Unicode - - - - - - - - - - - - - - - - - - - - - true - - - true - - - false - - - false - - - - Use - Level3 - Disabled - true - true - pch.h - $(ONEAPI_ROOT)dev-utilities\latest\include - - - Console - true - - - - - Use - Level3 - Disabled - true - true - pch.h - true - -DFPGA_EMULATOR %(AdditionalOptions) - $(IntDir)task_sequence.obj - $(ONEAPI_ROOT)dev-utilities\latest\include - - - Console - true - - - - - Use - Level3 - MaxSpeed - true - true - true - true - pch.h - $(ONEAPI_ROOT)dev-utilities\latest\include - - - Console - true - true - true - - - - - Use - Level3 - MaxSpeed - true - true - true - true - pch.h - true - -DFPGA_EMULATOR %(AdditionalOptions) - $(IntDir)task_sequence.obj - $(ONEAPI_ROOT)dev-utilities\latest\include - - - Console - true - true - true - - - - - - From cec2b24451e7c043699a17ecadf04e10d19b2ec5 Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Thu, 3 Nov 2022 06:42:09 -0400 Subject: [PATCH 09/11] Delete License.txt --- .../Features/task_sequence/License.txt | 23 ------------------- 1 file changed, 23 deletions(-) delete mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt deleted file mode 100644 index 7c8b8a36c6..0000000000 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/License.txt +++ /dev/null @@ -1,23 +0,0 @@ -Copyright Intel Corporation - -SPDX-License-Identifier: MIT -https://opensource.org/licenses/MIT - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - From aa5c241e45fb2a3f60bc65497fafd7e2004bdbe7 Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Thu, 3 Nov 2022 06:48:50 -0400 Subject: [PATCH 10/11] Apply suggestions from code review Co-authored-by: yuguen-intel --- .../Features/task_sequence/README.md | 48 ++++++++++--------- .../Features/task_sequence/sample.json | 20 +++++--- .../Features/task_sequence/src/CMakeLists.txt | 11 +++-- .../task_sequence/src/task_sequence.cpp | 4 +- 4 files changed, 48 insertions(+), 35 deletions(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md index 93c3bc8673..7ce470740f 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/README.md @@ -1,7 +1,7 @@ # The `task_sequence` extension -This FPGA tutorial demonstrates how to use the task_sequence extension to asynchronously run sub-kernel sets of operations, called tasks, in parallel. The task_sequence extension provides a templated class, task_sequence, that defines an API for asynchronously launching a parallel task, and for retrieving the results of that task. Objects of this class represent a FIFO queue of tasks matching the order in which these tasks were invoked, as well as an instantiation of the FPGA hardware used to perform the operations of that task. +This FPGA tutorial demonstrates how to use the `task_sequence` extension to asynchronously run sub-kernel sets of operations, called tasks, in parallel. The `task_sequence` extension provides a templated class, `task_sequence`, that defines an API for asynchronously launching a parallel task, and for retrieving the results of that task. Objects of this class represent a FIFO queue of tasks matching the order in which these tasks were invoked, as well as an instantiation of the FPGA hardware used to perform the operations of that task. ***Documentation***: The [DPC++ FPGA Code Samples Guide](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of DPC++ for FPGA.
The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) is the reference manual for targeting FPGAs through DPC++.
@@ -9,7 +9,7 @@ The [oneAPI Programming Guide](https://www.intel.com/content/www/us/en/develop/d | Optimized for | Description --- |--- -| OS | Linux* Ubuntu* 18.04/20.04, RHEL*/CentOS* 8, SUSE* 15; Windows* 10 +| OS | Linux* Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10 | Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA
Intel® FPGA Programmable Acceleration Card (PAC) D5005 (with Intel Stratix® 10 SX)
Intel® FPGA 3rd party / custom platforms with oneAPI support
*__Note__: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04* | Software | Intel® oneAPI DPC++ Compiler
Intel® FPGA Add-On for oneAPI Base Toolkit | What you will learn | Basics of task_sequence declaration and usage @@ -19,16 +19,16 @@ The [oneAPI Programming Guide](https://www.intel.com/content/www/us/en/develop/d ## Purpose -Use objects of a task_sequence class to asychronously run parallel tasks, and to define the hardware that is instantiated to perform those tasks. An API for invoking tasks and retriving results of these tasks imposes a FIFO ordering on outstanding tasks and their results. The scope of a task_sequence object defines the lifetime in which the hardware represented by that object can be used to perform a task. Users can control hardware reuse and replication by declaring single or multiple objects of the same task_sequence class. +Use objects of a `task_sequence` class to asynchronously run parallel tasks, and to define the hardware that is instantiated to perform those tasks. An API for invoking tasks and retrieving results of these tasks imposes a FIFO ordering on outstanding tasks and their results. The scope of a `task_sequence` object defines the lifetime in which the hardware represented by that object can be used to perform a task. Users can control hardware reuse and replication by declaring single or multiple objects of the same `task_sequence` class. -### Declaring a task_sequence -A task_sequence is a templated class that defines a set of operations (task), and methods for asynchronously invoking parallel instances of that task, and retrieiving the results of those parallel tasks in FIFO order. The first template parameter is an auto reference to a Callable f that defines the asynchronous task to be associated with the task_sequence. The requirement for an auto reference amounts to a requirement that f be statically resolvable at compile time, i.e., not a function pointer. Furthermore, the return type and argument types of f must be resolvable and fixed for each definition of task_sequence. +### Declaring a `task_sequence` +A `task_sequence` is a templated class that defines a set of operations (tasks), and methods for asynchronously invoking parallel instances of these tasks, and retrieving their results in a FIFO order. The first template parameter is an auto reference to a callable `f` that defines the asynchronous task to be associated with the `task_sequence`. The requirement for an auto reference amounts to a requirement that `f` be statically resolvable at compile time, i.e., not a function pointer. Furthermore, the return type and argument types of `f` must be resolvable and fixed for each definition of `task_sequence`. -The task_sequence class optionally takes two additional unsigned int template parameters specifying the invocation capacity and response capacity for instantiated task_sequence objects. The invocation capacity parameter defines the minimum number of task invocations (see [async](async) in [task_sequence API](task_sequence API) below) that must be supported without any response being collected (see [get](get) in [task_sequence API](task_sequence API) below). This number of async invocations without a get call is the minimum number that will be supported before a subsequenct async member function may block. A default value of 1 is assumed if the invocation capacity parameter is not specified. +The `task_sequence` class optionally takes two additional `unsigned int` template parameters specifying the invocation capacity and response capacity for instantiated `task_sequence` objects. The invocation capacity parameter defines the minimum number of task invocations (see [async](async) in [task_sequence API](task_sequence API) below) that must be supported without any response being collected (see [get](get) in [task_sequence API](task_sequence API) below). This number of `async` invocations without a `get` call is the minimum number that will be supported before a subsequent `async` member function may block. A default value of 1 is assumed if the invocation capacity parameter is not specified. -The response capacity paramter defines the maximum number of outstanding async invocations such that all outstanding invocations are guaranteed to make forward progress. Further async invocations may block until enough get calls are invoked such that the number of outstanding async invocations is reduced to the response capacity. A default value of 1 is assumed if the response capacity parameter is not specified. +The response capacity parameter defines the maximum number of outstanding `async` invocations such that all outstanding invocations are guaranteed to make forward progress. Further `async` invocations may block until enough `get` calls are invoked such that the number of outstanding `async` invocations is reduced to the response capacity. A default value of 1 is assumed if the response capacity parameter is not specified. -Object instances of a templated task_sequence class represent a specific instantiation of FPGA hardware to perform the operations of the task f. Users can control the amount of replication of FPGA hardware by the number of object declarations they use. +Object instances of a templated `task_sequence` class represent a specific instantiation of FPGA hardware to perform the operations of the task `f`. Users can control the amount of replication of FPGA hardware by the number of object declarations they use. ```c++ int someTask(int intArg, float floatArg) { @@ -44,18 +44,18 @@ int someTask(int intArg, float floatArg) { ``` -In this example, `firstInstance` and `secondInstance` are two task_sequence objects that implement the task `someTask`, which takes an integer argument and returns and integer result. Since they are two different object instances, they represent two distinct instances of FPGA hardware implementing `someTask`, as well as two separate queues for holding the results of parallel invocations of `someTask`. +In this example, `firstInstance` and `secondInstance` are two `task_sequence` objects that implement the task `someTask`, which takes an integer argument and returns and integer result. Since they are two different object instances, they represent two distinct instances of FPGA hardware implementing `someTask`, as well as two separate queues for holding the results of parallel invocations of `someTask`. ### task_sequence API -task_sequence provides two methods for asynchronously invoking and collecting parallel instances of the templated task function. +`task_sequence` provides two methods for asynchronously invoking and collecting parallel instances of the templated task function. - - [async](#asymc) + - [async](#async) - [get](#get) #### async -The async method asynchronously invokes a parallel instance of the templated task function. The async method takes the same arguments (the same type and same order) as those defined in the templated task function's signature. +The `async` method asynchronously invokes a parallel instance of the templated task function. The `async` method takes the same arguments (the same type and same order) as those defined in the templated task function's signature. ```c++ int someTask(int intArg, float floatArg); @@ -77,11 +77,11 @@ int someTask(int intArg, float floatArg); ``` -In the above example, two asynchronous parallel invocations of 'someTask' are invoked on the FPGA hardware represented by the firstInstance task_sequence object. The first parallel task is invoked with arguments 'argA' and 'argB', and the second invocation with 'argC' and 'argD'. +In the above example, two asynchronous parallel invocations of `someTask` are invoked on the FPGA hardware represented by the `firstInstance` `task_sequence` object. The first parallel task is invoked with arguments `argA` and `argB`, and the second invocation with `argC` and `argD`. #### get -The get method collects the result of a task_sequence task previously invoked through the async method. The get method for a particular task_sequence object has the same return type as the templated task function for the object. Calling get returns results in the same order in which the tasks were invoked. +The `get` method collects the result of a `task_sequence` task previously invoked through the `async` method. The `get` method for a particular `task_sequence` object has the same return type as the templated task function for the object. Calling `get` returns results in the same order in which the tasks were invoked. ```c++ // FPGA code @@ -104,12 +104,12 @@ The get method collects the result of a task_sequence task previously invoked th ``` -In this continuation of the async example, firstResult contains the return value of the async invocation using (argA, argB), and secondResult contains the return value of the async invocation using (argC, argD). +In this continuation of the `async` example, `firstResult` contains the return value of the `async` invocation using `(argA, argB)`, and `secondResult` contains the return value of the `async` invocation using `(argC, argD)`. -The get method is a blocking call. That is, if no previous async invocation has completed, get will block until one has. +The `get` method is a blocking call. That is, if no previous `async` invocation has completed, `get` will block until one has. ### Testing the Tutorial -In `task_sequence.cpp`, the dot product of a 16k element vector is calculated twice. The first calculation, performed in the 'SequentialTask' kernel, is performed by a single async invocation of the 'dotProduct' function by a single task_sequence object. +In `task_sequence.cpp`, the dot product of a 16k element vector is calculated twice. The first calculation, performed in the `SequentialTask` kernel, is performed by a single `async` invocation of the `dotProduct` function by a single `task_sequence` object. ```c++ @@ -119,7 +119,7 @@ h.single_task([=]() { out_acc[0] = whole.get(); }); ``` -The second calculation, performed by the `ParallelTask` kernel, is performed by 4 async invocations of the `dotProduct` function via 4 different task_sequence objects. Each async invocation operates on one-quarter of the vector. Since each async invocation utilizes its own FPGA hardware and operates on a different quarter of the vector, each partial dot product calculation can be done in parallel, speeding up the result. +The second calculation, performed by the `ParallelTask` kernel, is performed by 4 `async` invocations of the `dotProduct` function via 4 different `task_sequence` objects. Each `async` invocation operates on one-quarter of the vector. Since each `async` invocation utilizes its own FPGA hardware and operates on a different quarter of the vector, each partial dot product calculation can be done in parallel, speeding up the result. ```c++ h.single_task([=]() { @@ -137,8 +137,8 @@ h.single_task([=]() { ``` ## Key Concepts -* Basics of declaring task_sequence objects -* Using task_sequence async and get API for invoking and collecting parallel tasks +* Basics of declaring `task_sequence` objects +* Using `task_sequence` `async` and `get` API for invoking and collecting parallel tasks ## License Code samples are licensed under the MIT license. See @@ -275,7 +275,8 @@ dependencies and permissions errors. ### In Third-Party Integrated Development Environments (IDEs) -You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs]([https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html)) +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). +For instructions, refer to [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html). ## Examining the Reports @@ -285,11 +286,11 @@ Open the **Views** menu and select **System Viewer**. In the left-hand pane, select **SequentialTask.B0** under the System hierarchy. -In the main **System Viewer** pane, the task_sequence async and get for the single 'whole' task_sequence object are highlighted as a 'WR' and 'RD' node respectively. These represent a write pipe for writing the arguments and start command to the 'dotProduct' task function, and a read pipe for returning the results. +In the main **System Viewer** pane, the `task_sequence` `async` and `get` for the single `whole` `task_sequence` object are highlighted as a `WR` and `RD` node respectively. These represent a write pipe for writing the arguments and start command to the `dotProduct` task function, and a read pipe for returning the results. Now select **ParallelTask.B0** in the left-hand pane. -In the main **System Viewer(( pane, the four task_sequence async and get commands for the four task_sequence objects of the parallelTask kernel are highlighted. These represent the four parallel async invocations in this kernel. As in the the sequentialTask kernel, the 'WR' nodes represent pipes for writing the arguments and start command to each instance of the 'dotProduct' task function (since there are 4 task_sequence objects, there are 4 hardware instances), and the 'RD' nodes represent pipes for returning the results. +In the main **System Viewer(( pane, the four `task_sequence` `async` and `get` commands for the four `task_sequence` objects of the `parallelTask` kernel are highlighted. These represent the four parallel `async` invocations in this kernel. As in the the `sequentialTask` kernel, the `WR` nodes represent pipes for writing the arguments and start command to each instance of the `dotProduct` task function (since there are 4 `task_sequence` objects, there are 4 hardware instances), and the `RD` nodes represent pipes for returning the results. ## Running the Sample @@ -301,6 +302,7 @@ In the main **System Viewer(( pane, the four task_sequence async and get command 2. Run the sample on the FPGA device: ``` ./task_sequence.fpga (Linux) + task_sequence.fpga.exe (Windows) ``` ### Example of Output diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json index 2881dd914b..38a51beacb 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/sample.json @@ -3,17 +3,25 @@ "name": "task_sequence", "categories": ["Toolkit/oneAPI Direct Programming/DPC++ FPGA/Tutorials/Features"], "description": "An Intel® FPGA tutorial demonstrating the usage of the task_sequence extenstion", - "toolchain": ["dpcpp"], + "toolchain": ["icpx"], "os": ["linux", "windows"], "targetDevice": ["FPGA"], "builder": ["ide", "cmake"], - "languages": [{"cpp":{}}], + "languages": [{"cpp":{}}], "commonFolder": { + "base": "../../../..", + "include": [ + "README.md", + "Tutorials/Features/experimental/task_sequence", + "include" + ], + "exclude": [] + }, "ciTests": { "linux": [ { "id": "fpga_emu", "steps": [ - "dpcpp --version", + "icpx --version", "mkdir build", "cd build", "cmake ..", @@ -24,7 +32,7 @@ { "id": "report", "steps": [ - "dpcpp --version", + "icpx --version", "mkdir build", "cd build", "cmake ..", @@ -36,7 +44,7 @@ { "id": "fpga_emu", "steps": [ - "dpcpp --version", + "icpx --version", "cd ../../..", "mkdir build", "cd build", @@ -48,7 +56,7 @@ { "id": "report", "steps": [ - "dpcpp --version", + "icpx --version", "cd ../../..", "mkdir build", "cd build", diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt index f354a4fec5..09e89794a7 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/CMakeLists.txt @@ -25,10 +25,10 @@ endif() # 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). # 2. The "link" stage invokes the compiler's FPGA backend before linking. # For this reason, FPGA backend flags must be passed as link flags in CMake. -set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR -g0") -set(EMULATOR_LINK_FLAGS "-fintelfpga -g0") -set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga") -set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +set(EMULATOR_COMPILE_FLAGS "-fsycl -Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR -g0") +set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga -g0") +set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall ${WIN_FLAG} -fintelfpga") +set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") # use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation ############################################################################### @@ -40,6 +40,7 @@ set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_H # [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o task_sequence.cpp.o -c task_sequence.cpp # [link] dpcpp -fintelfpga task_sequence.cpp.o -o task_sequence.fpga_emu add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../../include) set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) @@ -52,6 +53,7 @@ add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) # The compile output is not an executable, but an intermediate compilation result unique to DPC++. add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../../include) add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early") @@ -66,6 +68,7 @@ set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK # [compile] dpcpp -fintelfpga -o task_sequence.cpp.o -c task_sequence.cpp # [link] dpcpp -fintelfpga -Xshardware -Xsboard= task_sequence.cpp.o -o task_sequence.fpga add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${FPGA_TARGET} PRIVATE ../../../../../include) add_custom_target(fpga DEPENDS ${FPGA_TARGET}) set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp index 6482f950d4..4747417e3b 100644 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/src/task_sequence.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -7,7 +7,7 @@ #include #include -#include "dpc_common.hpp" +#include "exception_handler.hpp" using ValueT = float; From d3db0810acf61f7d09df3cecfffd01e46db505ed Mon Sep 17 00:00:00 2001 From: Robert Ho <84344325+rho180@users.noreply.github.com> Date: Thu, 3 Nov 2022 06:56:05 -0400 Subject: [PATCH 11/11] Delete task_sequence.sln --- .../Features/task_sequence/task_sequence.sln | 25 ------------------- 1 file changed, 25 deletions(-) delete mode 100644 DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln deleted file mode 100644 index ac10327a83..0000000000 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/task_sequence/task_sequence.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28307.705 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "task_sequence", "task_sequence.vcxproj", "{3F5364B3-F987-4676-89A5-1F19BA3D4B75}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x64 = Debug|x64 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Debug|x64.ActiveCfg = Debug|x64 - {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Debug|x64.Build.0 = Debug|x64 - {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Release|x64.ActiveCfg = Release|x64 - {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {C0550E85-8C31-40EE-BFFA-F267DC16329D} - EndGlobalSection -EndGlobal