Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
0b2852f
Move OOR_MARK definition to property.h
lamphamsy Sep 13, 2018
8977340
FFT2n: specialize butterfly operations
lamphamsy Oct 15, 2018
d301e48
CMakeLists: add fft_2n.cpp file
lamphamsy Oct 5, 2018
c7c6743
FFT2n.cpp: implement specialized operations
lamphamsy Oct 5, 2018
ecde06e
SIMD 128 u16 & u32: update
lamphamsy Oct 5, 2018
23f7ec6
SIMD 256 u16 & u32: update
lamphamsy Oct 5, 2018
8e7fb47
SIMD 256 u16 u32: remove useless files
lamphamsy Oct 5, 2018
559f733
SIMD 128 u16 u32: remove useless files
lamphamsy Oct 5, 2018
0876774
SIMD Main file including necessary files
lamphamsy Oct 5, 2018
4f97fa6
SIMD 128: essential operations for SSE
lamphamsy Oct 5, 2018
7dc72c1
SIMD 256: essential operations for AVX
lamphamsy Oct 5, 2018
fceff91
SIMD Basic: includes basic Operations
lamphamsy Oct 5, 2018
ec70d62
SIMD NF4 contains vectorized operations for NF4
lamphamsy Oct 5, 2018
542c810
SIMD FNT: vectorised operations for FNT
lamphamsy Oct 5, 2018
c100aab
SIMD 256: update
lamphamsy Oct 26, 2018
d00d648
SIMD: use auto for return type of MVMSK8
lamphamsy Oct 26, 2018
fd2197b
SIMD: move CARD & CARD_M_1 to simd_basic.h
lamphamsy Oct 26, 2018
9e11def
SIMD 128 & 256: move MASK8_LO to const variable groups
lamphamsy Oct 26, 2018
a23a0ab
SIMD: use macro for shiftr
lamphamsy Oct 26, 2018
373afd1
SIMD 128: use template functions
lamphamsy Oct 26, 2018
ec0991a
SIMD 256: use template functions
lamphamsy Oct 26, 2018
fde40cc
SIMD Basic: use templated essential functions
lamphamsy Oct 26, 2018
70ace14
SIMD Basic: use const & curly braces
lamphamsy Oct 26, 2018
49c4287
SIMD FNT: get rid of refactored butterfly functions
lamphamsy Oct 29, 2018
692f7ff
SIMD 128: add function is_all_zeros
lamphamsy Oct 29, 2018
324f470
SIMD 256: add function is_all_zeros
lamphamsy Oct 29, 2018
e7cfeaf
SIMD Basic: refactor MULFULL_MOD
lamphamsy Oct 29, 2018
f28fccc
SIMD 128: fix is_all_zeros
lamphamsy Oct 29, 2018
f65910a
SIMD basic: use const
lamphamsy Oct 29, 2018
163151a
SIMD FNT: fix typo & remove unnecessary comments
lamphamsy Oct 29, 2018
83c97ea
SIMD 256: remove NF4Type
lamphamsy Oct 29, 2018
435f89d
SIMD NF4: remove NF4Type
lamphamsy Oct 29, 2018
4bd2d6c
SIMD NF4: remove C-style cast
lamphamsy Oct 29, 2018
f608c75
FFT_2n.h: compute simd indices
lamphamsy Oct 30, 2018
664cb68
FFT_2n.cpp: remove calculation of indices
lamphamsy Oct 30, 2018
e657e79
FFT_2n.h: define butterfly_ct_two_layers_step_slow
lamphamsy Oct 30, 2018
cb734b4
FFT_2n.cpp: use butterfly_ct_two_layers_step_slow
lamphamsy Oct 30, 2018
1a11fb0
FEC RS FNT: simd indices as member variables
lamphamsy Oct 30, 2018
e1e9eeb
FEC Vectorisation: use FNT's simd indices
lamphamsy Oct 30, 2018
761eef7
SIMD Basic: clang-format fix
lamphamsy Oct 30, 2018
fbc8c77
SIMD NF4: clang-format fix
lamphamsy Oct 30, 2018
114186c
SIMD: rename LOAD
lamphamsy Oct 30, 2018
96617b3
SIMD: rename STORE
lamphamsy Oct 30, 2018
96dc58b
SIMD: rename AND
lamphamsy Oct 30, 2018
f2114e3
SIMD: rename XOR
lamphamsy Oct 30, 2018
1df8169
SIMD: rename MVMSK8
lamphamsy Oct 30, 2018
18399bd
SIMD: rename TESTZ
lamphamsy Oct 30, 2018
a873b88
SIMD: rename is_all_zeros
lamphamsy Oct 30, 2018
02be87f
SIMD: rename SET1
lamphamsy Oct 30, 2018
76abc31
SIMD: rename ADD
lamphamsy Oct 30, 2018
3a5beca
SIMD: rename SUB
lamphamsy Oct 30, 2018
5d7d0fd
SIMD: rename MUL
lamphamsy Oct 30, 2018
4ac9650
SIMD: rename CMPEQ
lamphamsy Oct 30, 2018
91fd3ee
SIMD: rename MIN
lamphamsy Oct 30, 2018
a8f971a
SIMD: rename CARD & CARD_M_1
lamphamsy Oct 30, 2018
b0516fc
SIMD: rename ADD_MOD
lamphamsy Oct 30, 2018
61122ea
SIMD: rename SUB_MOD
lamphamsy Oct 30, 2018
2d86344
SIMD: rename NEG_MOD
lamphamsy Oct 30, 2018
7766b56
SIMD: rename MUL_MOD
lamphamsy Oct 30, 2018
34b6595
SIMD: rename MULFULL_MOD
lamphamsy Oct 30, 2018
dca8686
SIMD: rename ADD_PROPS
lamphamsy Oct 30, 2018
84e0714
SIMD: rename BUTTERFLY_CT
lamphamsy Oct 30, 2018
51d2f8d
SIMD: rename BUTTERFLY_GS
lamphamsy Oct 30, 2018
cc7d37c
SIMD: rename BUTTERFLY_GS_SIMPLE
lamphamsy Oct 30, 2018
db98764
SIMD: rename STORE_LOW
lamphamsy Oct 30, 2018
e9de0ba
SIMD: rename macro names
lamphamsy Oct 30, 2018
d35c4d3
SIMD Basic: fix Card & CardMinusOne functions
lamphamsy Oct 30, 2018
05d3938
SIMD Basic: refactor get low/high half elements for ModMul
lamphamsy Oct 30, 2018
0c275e4
Core includes only SIMD's allocator
lamphamsy Oct 31, 2018
c72dd8a
SIMD: update simd header
lamphamsy Oct 31, 2018
ce91fb2
Remove simd.h
lamphamsy Oct 31, 2018
bb838ae
SIMD: move simd_* header to simd dir
lamphamsy Oct 31, 2018
97d9cf8
SIMD: remove useless included headers
lamphamsy Oct 31, 2018
4b26041
Buffers includes only SIMD's allocator
lamphamsy Oct 31, 2018
cac0b66
Include new SIMD's header
lamphamsy Oct 31, 2018
942d47c
Include right headers for simd tests
lamphamsy Oct 31, 2018
8808814
SIMD: include headers for simd tests
lamphamsy Oct 31, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ include(GNUInstallDirs)
set(LIB_SRC
${SOURCE_DIR}/core.cpp
${SOURCE_DIR}/fec_vectorisation.cpp
${SOURCE_DIR}/fft_2n.cpp
${SOURCE_DIR}/misc.cpp
${SOURCE_DIR}/gf_nf4.cpp
${SOURCE_DIR}/gf_ring.cpp
Expand Down
6 changes: 0 additions & 6 deletions src/arith.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@

namespace quadiron {

template <typename T>
using DoubleSizeVal = typename DoubleSize<T>::T;

template <typename T>
using SignedDoubleSizeVal = typename SignedDoubleSize<T>::T;

/** Base/core arithmetical functions of QuadIron. */
namespace arith {

Expand Down
8 changes: 7 additions & 1 deletion src/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include <random>

#include "big_int.h"
#include "simd/simd.h"
#include "simd/allocator.h"

namespace quadiron {

Expand Down Expand Up @@ -78,6 +78,12 @@ struct SignedDoubleSize<__uint128_t> {
typedef Int256 T;
};

template <typename T>
using DoubleSizeVal = typename DoubleSize<T>::T;

template <typename T>
using SignedDoubleSizeVal = typename SignedDoubleSize<T>::T;

/** A group of values stored as one.
*
* This allows faster processing, as the values can be processed as one.
Expand Down
4 changes: 1 addition & 3 deletions src/fec_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

#ifdef QUADIRON_USE_SIMD

#include "simd.h"
#include "simd/simd.h"

#endif // #ifdef QUADIRON_USE_SIMD

Expand All @@ -74,8 +74,6 @@ static inline uint64_t hrtime_usec(timeval begin)
return 1000000 * (tv.tv_sec - begin.tv_sec) + tv.tv_usec - begin.tv_usec;
}

#define OOR_MARK 1

enum class FecType {
/** Systematic code
*
Expand Down
11 changes: 11 additions & 0 deletions src/fec_rs_fnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class RsFnt : public FecCode<T> {
// decoding context used in encoding of systematic FNT
std::unique_ptr<DecodeContext<T>> enc_context;

// Indices used for accelerated functions
size_t simd_vec_len;
size_t simd_trailing_len;
size_t simd_offset;

public:
RsFnt(
FecType type,
Expand All @@ -70,6 +75,12 @@ class RsFnt : public FecCode<T> {
: FecCode<T>(type, word_size, n_data, n_parities, pkt_size)
{
this->fec_init();

// Indices used for accelerated functions
const unsigned ratio = simd::countof<T>();
simd_vec_len = this->pkt_size / ratio;
simd_trailing_len = this->pkt_size - simd_vec_len * ratio;
simd_offset = simd_vec_len * ratio;
}

inline void check_params() override
Expand Down
29 changes: 7 additions & 22 deletions src/fec_vectorisation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@
#include "fec_rs_fnt.h"

/*
* The file includes vectorized operations used by FEC classes
* The file includes specialized operations used by FEC classes
*/

#ifdef QUADIRON_USE_SIMD

#include "simd.h"
#include "simd/simd.h"

namespace quadiron {
Expand All @@ -53,20 +52,13 @@ void RsFnt<uint16_t>::encode_post_process(
uint16_t threshold = this->gf->card_minus_one();
unsigned code_len = this->n_outputs;

// number of elements per vector register
unsigned vec_size = simd::countof<uint16_t>();
// number of vector registers per fragment packet
size_t vecs_nb = size / vec_size;
// odd number of elements not vectorized
size_t last_len = size - vecs_nb * vec_size;

simd::encode_post_process(
output, props, offset, code_len, threshold, vecs_nb);
output, props, offset, code_len, threshold, simd_vec_len);

if (last_len > 0) {
if (simd_trailing_len > 0) {
for (unsigned i = 0; i < code_len; ++i) {
uint16_t* chunk = output.get(i);
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
for (size_t j = simd_offset; j < size; ++j) {
if (chunk[j] == threshold) {
props[i].add(offset + j, OOR_MARK);
}
Expand All @@ -85,20 +77,13 @@ void RsFnt<uint32_t>::encode_post_process(
const uint32_t threshold = this->gf->card_minus_one();
const unsigned code_len = this->n_outputs;

// number of elements per vector register
const unsigned vec_size = simd::countof<uint32_t>();
// number of vector registers per fragment packet
const size_t vecs_nb = size / vec_size;
// odd number of elements not vectorized
const size_t last_len = size - vecs_nb * vec_size;

simd::encode_post_process(
output, props, offset, code_len, threshold, vecs_nb);
output, props, offset, code_len, threshold, simd_vec_len);

if (last_len > 0) {
if (simd_trailing_len > 0) {
for (unsigned i = 0; i < code_len; ++i) {
uint32_t* chunk = output.get(i);
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
for (size_t j = simd_offset; j < size; ++j) {
if (chunk[j] == threshold) {
props[i].add(offset + j, OOR_MARK);
}
Expand Down
192 changes: 192 additions & 0 deletions src/fft_2n.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/* -*- mode: c++ -*- */
/*
* Copyright 2017-2018 Scality
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include "fft_2n.h"

/*
* The file includes vectorized operations used by Radix2 classes
*/

#ifdef QUADIRON_USE_SIMD

#include "simd/simd.h"

namespace quadiron {
namespace fft {

template <>
void Radix2<uint16_t>::butterfly_ct_two_layers_step(
vec::Buffers<uint16_t>& buf,
unsigned start,
unsigned m)
{
const unsigned coefIndex = start * this->n / m / 2;
const uint16_t r1 = vec_W[coefIndex];
const uint16_t r2 = vec_W[coefIndex / 2];
const uint16_t r3 = vec_W[coefIndex / 2 + this->n / 4];

// perform vector operations
simd::butterfly_ct_two_layers_step(
buf, r1, r2, r3, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_two_layers_step_slow(buf, start, m, simd_offset);
}
}

template <>
void Radix2<uint16_t>::butterfly_ct_step(
vec::Buffers<uint16_t>& buf,
uint16_t r,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_ct_step(buf, r, start, m, step, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_step_slow(buf, r, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint16_t>::butterfly_gs_step(
vec::Buffers<uint16_t>& buf,
uint16_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_slow(buf, coef, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint16_t>::butterfly_gs_step_simple(
vec::Buffers<uint16_t>& buf,
uint16_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step_simple(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_simple_slow(buf, coef, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_ct_two_layers_step(
vec::Buffers<uint32_t>& buf,
unsigned start,
unsigned m)
{
const unsigned coefIndex = start * this->n / m / 2;
const uint32_t r1 = vec_W[coefIndex];
const uint32_t r2 = vec_W[coefIndex / 2];
const uint32_t r3 = vec_W[coefIndex / 2 + this->n / 4];

// perform vector operations
simd::butterfly_ct_two_layers_step(
buf, r1, r2, r3, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_two_layers_step_slow(buf, start, m, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_ct_step(
vec::Buffers<uint32_t>& buf,
uint32_t r,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_ct_step(buf, r, start, m, step, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_step_slow(buf, r, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_gs_step(
vec::Buffers<uint32_t>& buf,
uint32_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_slow(buf, coef, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_gs_step_simple(
vec::Buffers<uint32_t>& buf,
uint32_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step_simple(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_simple_slow(buf, coef, start, m, step, simd_offset);
}
}

} // namespace fft
} // namespace quadiron

#endif // #ifdef QUADIRON_USE_SIMD
Loading