scality · lamphamsy · Sep 13, 2018 · Oct 15, 2018 · Oct 5, 2018 · Oct 5, 2018
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -31,6 +31,7 @@ include(GNUInstallDirs)
 set(LIB_SRC
   ${SOURCE_DIR}/core.cpp
   ${SOURCE_DIR}/fec_vectorisation.cpp
+  ${SOURCE_DIR}/fft_2n.cpp
   ${SOURCE_DIR}/misc.cpp
   ${SOURCE_DIR}/gf_nf4.cpp
   ${SOURCE_DIR}/gf_ring.cpp

diff --git a/src/arith.h b/src/arith.h
@@ -41,12 +41,6 @@
 
 namespace quadiron {
 
-template <typename T>
-using DoubleSizeVal = typename DoubleSize<T>::T;
-
-template <typename T>
-using SignedDoubleSizeVal = typename SignedDoubleSize<T>::T;
-
 /** Base/core arithmetical functions of QuadIron. */
 namespace arith {
 

diff --git a/src/core.h b/src/core.h
@@ -34,7 +34,7 @@
 #include <random>
 
 #include "big_int.h"
-#include "simd/simd.h"
+#include "simd/allocator.h"
 
 namespace quadiron {
 
@@ -78,6 +78,12 @@ struct SignedDoubleSize<__uint128_t> {
     typedef Int256 T;
 };
 
+template <typename T>
+using DoubleSizeVal = typename DoubleSize<T>::T;
+
+template <typename T>
+using SignedDoubleSizeVal = typename SignedDoubleSize<T>::T;
+
 /** A group of values stored as one.
  *
  * This allows faster processing, as the values can be processed as one.

diff --git a/src/fec_base.h b/src/fec_base.h
@@ -51,7 +51,7 @@
 
 #ifdef QUADIRON_USE_SIMD
 
-#include "simd.h"
+#include "simd/simd.h"
 
 #endif // #ifdef QUADIRON_USE_SIMD
 
@@ -74,8 +74,6 @@ static inline uint64_t hrtime_usec(timeval begin)
     return 1000000 * (tv.tv_sec - begin.tv_sec) + tv.tv_usec - begin.tv_usec;
 }
 
-#define OOR_MARK 1
-
 enum class FecType {
     /** Systematic code
      *

diff --git a/src/fec_rs_fnt.h b/src/fec_rs_fnt.h
@@ -60,6 +60,11 @@ class RsFnt : public FecCode<T> {
     // decoding context used in encoding of systematic FNT
     std::unique_ptr<DecodeContext<T>> enc_context;
 
+    // Indices used for accelerated functions
+    size_t simd_vec_len;
+    size_t simd_trailing_len;
+    size_t simd_offset;
+
   public:
     RsFnt(
         FecType type,
@@ -70,6 +75,12 @@ class RsFnt : public FecCode<T> {
         : FecCode<T>(type, word_size, n_data, n_parities, pkt_size)
     {
         this->fec_init();
+
+        // Indices used for accelerated functions
+        const unsigned ratio = simd::countof<T>();
+        simd_vec_len = this->pkt_size / ratio;
+        simd_trailing_len = this->pkt_size - simd_vec_len * ratio;
+        simd_offset = simd_vec_len * ratio;
     }
 
     inline void check_params() override

diff --git a/src/fec_vectorisation.cpp b/src/fec_vectorisation.cpp
@@ -32,12 +32,11 @@
 #include "fec_rs_fnt.h"
 
 /*
- * The file includes vectorized operations used by FEC classes
+ * The file includes specialized operations used by FEC classes
  */
 
 #ifdef QUADIRON_USE_SIMD
 
-#include "simd.h"
 #include "simd/simd.h"
 
 namespace quadiron {
@@ -53,20 +52,13 @@ void RsFnt<uint16_t>::encode_post_process(
     uint16_t threshold = this->gf->card_minus_one();
     unsigned code_len = this->n_outputs;
 
-    // number of elements per vector register
-    unsigned vec_size = simd::countof<uint16_t>();
-    // number of vector registers per fragment packet
-    size_t vecs_nb = size / vec_size;
-    // odd number of elements not vectorized
-    size_t last_len = size - vecs_nb * vec_size;
-
     simd::encode_post_process(
-        output, props, offset, code_len, threshold, vecs_nb);
+        output, props, offset, code_len, threshold, simd_vec_len);
 
-    if (last_len > 0) {
+    if (simd_trailing_len > 0) {
         for (unsigned i = 0; i < code_len; ++i) {
             uint16_t* chunk = output.get(i);
-            for (size_t j = vecs_nb * vec_size; j < size; ++j) {
+            for (size_t j = simd_offset; j < size; ++j) {
                 if (chunk[j] == threshold) {
                     props[i].add(offset + j, OOR_MARK);
                 }
@@ -85,20 +77,13 @@ void RsFnt<uint32_t>::encode_post_process(
     const uint32_t threshold = this->gf->card_minus_one();
     const unsigned code_len = this->n_outputs;
 
-    // number of elements per vector register
-    const unsigned vec_size = simd::countof<uint32_t>();
-    // number of vector registers per fragment packet
-    const size_t vecs_nb = size / vec_size;
-    // odd number of elements not vectorized
-    const size_t last_len = size - vecs_nb * vec_size;
-
     simd::encode_post_process(
-        output, props, offset, code_len, threshold, vecs_nb);
+        output, props, offset, code_len, threshold, simd_vec_len);
 
-    if (last_len > 0) {
+    if (simd_trailing_len > 0) {
         for (unsigned i = 0; i < code_len; ++i) {
             uint32_t* chunk = output.get(i);
-            for (size_t j = vecs_nb * vec_size; j < size; ++j) {
+            for (size_t j = simd_offset; j < size; ++j) {
                 if (chunk[j] == threshold) {
                     props[i].add(offset + j, OOR_MARK);
                 }

diff --git a/src/fft_2n.cpp b/src/fft_2n.cpp
@@ -0,0 +1,192 @@
+/* -*- mode: c++ -*- */
+/*
+ * Copyright 2017-2018 Scality
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fft_2n.h"
+
+/*
+ * The file includes vectorized operations used by Radix2 classes
+ */
+
+#ifdef QUADIRON_USE_SIMD
+
+#include "simd/simd.h"
+
+namespace quadiron {
+namespace fft {
+
+template <>
+void Radix2<uint16_t>::butterfly_ct_two_layers_step(
+    vec::Buffers<uint16_t>& buf,
+    unsigned start,
+    unsigned m)
+{
+    const unsigned coefIndex = start * this->n / m / 2;
+    const uint16_t r1 = vec_W[coefIndex];
+    const uint16_t r2 = vec_W[coefIndex / 2];
+    const uint16_t r3 = vec_W[coefIndex / 2 + this->n / 4];
+
+    // perform vector operations
+    simd::butterfly_ct_two_layers_step(
+        buf, r1, r2, r3, start, m, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_ct_two_layers_step_slow(buf, start, m, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint16_t>::butterfly_ct_step(
+    vec::Buffers<uint16_t>& buf,
+    uint16_t r,
+    unsigned start,
+    unsigned m,
+    unsigned step)
+{
+    // perform vector operations
+    simd::butterfly_ct_step(buf, r, start, m, step, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_ct_step_slow(buf, r, start, m, step, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint16_t>::butterfly_gs_step(
+    vec::Buffers<uint16_t>& buf,
+    uint16_t coef,
+    unsigned start,
+    unsigned m,
+    unsigned step)
+{
+    // perform vector operations
+    simd::butterfly_gs_step(buf, coef, start, m, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_gs_step_slow(buf, coef, start, m, step, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint16_t>::butterfly_gs_step_simple(
+    vec::Buffers<uint16_t>& buf,
+    uint16_t coef,
+    unsigned start,
+    unsigned m,
+    unsigned step)
+{
+    // perform vector operations
+    simd::butterfly_gs_step_simple(buf, coef, start, m, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_gs_step_simple_slow(buf, coef, start, m, step, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint32_t>::butterfly_ct_two_layers_step(
+    vec::Buffers<uint32_t>& buf,
+    unsigned start,
+    unsigned m)
+{
+    const unsigned coefIndex = start * this->n / m / 2;
+    const uint32_t r1 = vec_W[coefIndex];
+    const uint32_t r2 = vec_W[coefIndex / 2];
+    const uint32_t r3 = vec_W[coefIndex / 2 + this->n / 4];
+
+    // perform vector operations
+    simd::butterfly_ct_two_layers_step(
+        buf, r1, r2, r3, start, m, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_ct_two_layers_step_slow(buf, start, m, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint32_t>::butterfly_ct_step(
+    vec::Buffers<uint32_t>& buf,
+    uint32_t r,
+    unsigned start,
+    unsigned m,
+    unsigned step)
+{
+    // perform vector operations
+    simd::butterfly_ct_step(buf, r, start, m, step, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_ct_step_slow(buf, r, start, m, step, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint32_t>::butterfly_gs_step(
+    vec::Buffers<uint32_t>& buf,
+    uint32_t coef,
+    unsigned start,
+    unsigned m,
+    unsigned step)
+{
+    // perform vector operations
+    simd::butterfly_gs_step(buf, coef, start, m, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_gs_step_slow(buf, coef, start, m, step, simd_offset);
+    }
+}
+
+template <>
+void Radix2<uint32_t>::butterfly_gs_step_simple(
+    vec::Buffers<uint32_t>& buf,
+    uint32_t coef,
+    unsigned start,
+    unsigned m,
+    unsigned step)
+{
+    // perform vector operations
+    simd::butterfly_gs_step_simple(buf, coef, start, m, simd_vec_len, card);
+
+    // for last elements, perform as non-SIMD method
+    if (simd_trailing_len > 0) {
+        butterfly_gs_step_simple_slow(buf, coef, start, m, step, simd_offset);
+    }
+}
+
+} // namespace fft
+} // namespace quadiron
+
+#endif // #ifdef QUADIRON_USE_SIMD