diff --git a/HCCStream.cpp b/HCCStream.cpp
index ff8de0d0..57d4f39e 100644
--- a/HCCStream.cpp
+++ b/HCCStream.cpp
@@ -8,6 +8,7 @@
 #include <codecvt>
 #include <vector>
 #include <locale>
+#include <cstdio>
 
 #include "HCCStream.h"
 
@@ -44,14 +45,13 @@ void listDevices(void)
   }
 }
 
-
-
 template <class T>
 HCCStream<T>::HCCStream(const unsigned int ARRAY_SIZE, const int device_index):
   array_size(ARRAY_SIZE),
-  d_a(ARRAY_SIZE),
-  d_b(ARRAY_SIZE),
-  d_c(ARRAY_SIZE)
+  accelerator(hc::accelerator::get_all()[device_index]),
+  d_a(ARRAY_SIZE,accelerator.get_default_view()),
+  d_b(ARRAY_SIZE,accelerator.get_default_view()),
+  d_c(ARRAY_SIZE,accelerator.get_default_view())
 {
 
   // The array size must be divisible by TBSIZE for kernel launches
@@ -67,7 +67,6 @@ HCCStream<T>::HCCStream(const unsigned int ARRAY_SIZE, const int device_index):
   auto current = accs[device_index];
   
   std::cout << "Using HCC device " << getDeviceName(current) << std::endl;
-  
 
   // Check buffers fit on the device
   // TODO: unclear how to do that!!
@@ -96,15 +95,19 @@ void HCCStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector
   hc::copy(d_c,c.begin());
 }
 
-
 template <class T>
 void HCCStream<T>::copy()
 {
+  hc::array<T> &d_a = this->d_a;
+  hc::array<T> &d_c = this->d_c;
+
   try{
   // launch a GPU kernel to compute the saxpy in parallel 
-    hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
+    hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view()
+                , hc::extent<1>(array_size)
 								, [&](hc::index<1> i) __attribute((hc)) {
-								  d_c[i] = d_a[i];
+
+								 d_c[i] = d_a[i];
 								});
     future_kernel.wait();
   }
@@ -117,10 +120,14 @@ void HCCStream<T>::copy()
 template <class T>
 void HCCStream<T>::mul()
 {
+  hc::array<T> &d_b = this->d_b;
+  hc::array<T> &d_c = this->d_c;
+
   const T scalar = 0.3;
   try{
   // launch a GPU kernel to compute the saxpy in parallel 
-    hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
+    hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view()
+                , hc::extent<1>(array_size)
 								, [&](hc::index<1> i) __attribute((hc)) {
 								  d_b[i] = scalar*d_c[i];
 								});
@@ -135,9 +142,14 @@ void HCCStream<T>::mul()
 template <class T>
 void HCCStream<T>::add()
 {
+  hc::array<T> &d_a = this->d_a;
+  hc::array<T> &d_b = this->d_b;
+  hc::array<T> &d_c = this->d_c;
+
   try{
     // launch a GPU kernel to compute the saxpy in parallel 
-    hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
+    hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view()
+                , hc::extent<1>(array_size)
 								, [&](hc::index<1> i) __attribute((hc)) {
 								  d_c[i] = d_a[i]+d_b[i];
 								});
@@ -152,10 +164,15 @@ void HCCStream<T>::add()
 template <class T>
 void HCCStream<T>::triad()
 {
+  hc::array<T> &d_a = this->d_a;
+  hc::array<T> &d_b = this->d_b;
+  hc::array<T> &d_c = this->d_c;
+
   const T scalar = 0.3;
   try{
     // launch a GPU kernel to compute the saxpy in parallel 
-    hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
+    hc::completion_future future_kernel = hc::parallel_for_each(accelerator.get_default_view()
+                , hc::extent<1>(array_size)
 								, [&](hc::index<1> i) __attribute((hc)) {
 								  d_a[i] = d_b[i] + scalar*d_c[i];
 								});
diff --git a/HCCStream.h b/HCCStream.h
index 818d2438..e8a056f4 100644
--- a/HCCStream.h
+++ b/HCCStream.h
@@ -23,6 +23,9 @@ class HCCStream : public Stream<T>
   // Size of arrays
   unsigned int array_size;
 
+  // Selected device
+  hc::accelerator accelerator;
+
   // Device side pointers to arrays
   hc::array<T,1> d_a;
   hc::array<T,1> d_b;
diff --git a/main.cpp b/main.cpp
index b4f5abff..7aad2154 100644
--- a/main.cpp
+++ b/main.cpp
@@ -40,6 +40,7 @@
 
 // Default size of 2^25
 unsigned int ARRAY_SIZE = 33554432;
+
 unsigned int num_times = 100;
 unsigned int deviceIndex = 0;
 bool use_float = false;
@@ -133,7 +134,7 @@ void run()
 #elif defined(HCC)
   // Use the "reference" OpenMP 3 implementation
   stream = new HCCStream<T>(ARRAY_SIZE, deviceIndex);
-  
+
 #endif
 
   stream->write_arrays(a, b, c);
@@ -213,9 +214,7 @@ void run()
       << std::endl;
 
   }
-
   delete stream;
-
 }
 
 template <typename T>