diff --git a/HCCStream.cpp b/HCCStream.cpp index ff8de0d0..57d4f39e 100644 --- a/HCCStream.cpp +++ b/HCCStream.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "HCCStream.h" @@ -44,14 +45,13 @@ void listDevices(void) } } - - template HCCStream::HCCStream(const unsigned int ARRAY_SIZE, const int device_index): array_size(ARRAY_SIZE), - d_a(ARRAY_SIZE), - d_b(ARRAY_SIZE), - d_c(ARRAY_SIZE) + accelerator(hc::accelerator::get_all()[device_index]), + d_a(ARRAY_SIZE,accelerator.get_default_view()), + d_b(ARRAY_SIZE,accelerator.get_default_view()), + d_c(ARRAY_SIZE,accelerator.get_default_view()) { // The array size must be divisible by TBSIZE for kernel launches @@ -67,7 +67,6 @@ HCCStream::HCCStream(const unsigned int ARRAY_SIZE, const int device_index): auto current = accs[device_index]; std::cout << "Using HCC device " << getDeviceName(current) << std::endl; - // Check buffers fit on the device // TODO: unclear how to do that!! @@ -96,15 +95,19 @@ void HCCStream::read_arrays(std::vector& a, std::vector& b, std::vector hc::copy(d_c,c.begin()); } - template void HCCStream::copy() { + hc::array &d_a = this->d_a; + hc::array &d_c = this->d_c; + try{ // launch a GPU kernel to compute the saxpy in parallel - hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) + hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view() + , hc::extent<1>(array_size) , [&](hc::index<1> i) __attribute((hc)) { - d_c[i] = d_a[i]; + + d_c[i] = d_a[i]; }); future_kernel.wait(); } @@ -117,10 +120,14 @@ void HCCStream::copy() template void HCCStream::mul() { + hc::array &d_b = this->d_b; + hc::array &d_c = this->d_c; + const T scalar = 0.3; try{ // launch a GPU kernel to compute the saxpy in parallel - hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) + hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view() + , hc::extent<1>(array_size) , [&](hc::index<1> i) __attribute((hc)) { d_b[i] = scalar*d_c[i]; }); @@ -135,9 +142,14 @@ void HCCStream::mul() template void HCCStream::add() { + hc::array &d_a = this->d_a; + hc::array &d_b = this->d_b; + hc::array &d_c = this->d_c; + try{ // launch a GPU kernel to compute the saxpy in parallel - hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) + hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view() + , hc::extent<1>(array_size) , [&](hc::index<1> i) __attribute((hc)) { d_c[i] = d_a[i]+d_b[i]; }); @@ -152,10 +164,15 @@ void HCCStream::add() template void HCCStream::triad() { + hc::array &d_a = this->d_a; + hc::array &d_b = this->d_b; + hc::array &d_c = this->d_c; + const T scalar = 0.3; try{ // launch a GPU kernel to compute the saxpy in parallel - hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) + hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view() + , hc::extent<1>(array_size) , [&](hc::index<1> i) __attribute((hc)) { d_a[i] = d_b[i] + scalar*d_c[i]; }); diff --git a/HCCStream.h b/HCCStream.h index 818d2438..e8a056f4 100644 --- a/HCCStream.h +++ b/HCCStream.h @@ -23,6 +23,9 @@ class HCCStream : public Stream // Size of arrays unsigned int array_size; + // Selected device + hc::accelerator accelerator; + // Device side pointers to arrays hc::array d_a; hc::array d_b; diff --git a/main.cpp b/main.cpp index b4f5abff..7aad2154 100644 --- a/main.cpp +++ b/main.cpp @@ -40,6 +40,7 @@ // Default size of 2^25 unsigned int ARRAY_SIZE = 33554432; + unsigned int num_times = 100; unsigned int deviceIndex = 0; bool use_float = false; @@ -133,7 +134,7 @@ void run() #elif defined(HCC) // Use the "reference" OpenMP 3 implementation stream = new HCCStream(ARRAY_SIZE, deviceIndex); - + #endif stream->write_arrays(a, b, c); @@ -213,9 +214,7 @@ void run() << std::endl; } - delete stream; - } template