Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
355 changes: 176 additions & 179 deletions NAM/conv1d.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions NAM/convnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ nam::convnet::ConvNetConfig nam::convnet::parse_config_json(const nlohmann::json
// ConvNetConfig::create()
std::unique_ptr<nam::DSP> nam::convnet::ConvNetConfig::create(std::vector<float> weights, double sampleRate)
{
return std::make_unique<nam::convnet::ConvNet>(in_channels, out_channels, channels, dilations, batchnorm, activation,
weights, sampleRate, groups);
return std::make_unique<nam::convnet::ConvNet>(
in_channels, out_channels, channels, dilations, batchnorm, activation, weights, sampleRate, groups);
}

// Config parser for ConfigParserRegistry
Expand Down
115 changes: 58 additions & 57 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
for (int f = 0; f < num_frames; f++)
{
const float in_val = input_ptr[f * in_stride];
output_ptr[f * 2] = w0 * in_val;
output_ptr[f * 2] = w0 * in_val;
output_ptr[f * 2 + 1] = w1 * in_val;
}
}
Expand All @@ -508,7 +508,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
for (int f = 0; f < num_frames; f++)
{
const float in_val = input_ptr[f * in_stride];
output_ptr[f * 3] = w0 * in_val;
output_ptr[f * 3] = w0 * in_val;
output_ptr[f * 3 + 1] = w1 * in_val;
output_ptr[f * 3 + 2] = w2 * in_val;
}
Expand All @@ -520,7 +520,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
for (int f = 0; f < num_frames; f++)
{
const float in_val = input_ptr[f * in_stride];
output_ptr[f * 4] = w0 * in_val;
output_ptr[f * 4] = w0 * in_val;
output_ptr[f * 4 + 1] = w1 * in_val;
output_ptr[f * 4 + 2] = w2 * in_val;
output_ptr[f * 4 + 3] = w3 * in_val;
Expand Down Expand Up @@ -567,7 +567,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float* __restrict__ in_col = input_ptr + f * in_stride;
const float i0 = in_col[0];
const float i1 = in_col[1];
output_ptr[f * 2] = w00 * i0 + w01 * i1;
output_ptr[f * 2] = w00 * i0 + w01 * i1;
output_ptr[f * 2 + 1] = w10 * i0 + w11 * i1;
}
}
Expand All @@ -584,7 +584,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float i1 = in_col[1];
const float i2 = in_col[2];
const float i3 = in_col[3];
output_ptr[f * 2] = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
output_ptr[f * 2] = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
output_ptr[f * 2 + 1] = w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
}
}
Expand All @@ -595,8 +595,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
for (int f = 0; f < num_frames; f++)
{
const float* __restrict__ in_col = input_ptr + f * in_stride;
output_ptr[f] = w0 * in_col[0] + w1 * in_col[1]
+ w2 * in_col[2] + w3 * in_col[3];
output_ptr[f] = w0 * in_col[0] + w1 * in_col[1] + w2 * in_col[2] + w3 * in_col[3];
}
}
else if (out_ch == 4 && in_ch == 2)
Expand All @@ -608,7 +607,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float* __restrict__ in_col = input_ptr + f * in_stride;
const float i0 = in_col[0];
const float i1 = in_col[1];
output_ptr[f * 4] = w00 * i0 + w01 * i1;
output_ptr[f * 4] = w00 * i0 + w01 * i1;
output_ptr[f * 4 + 1] = w10 * i0 + w11 * i1;
output_ptr[f * 4 + 2] = w20 * i0 + w21 * i1;
output_ptr[f * 4 + 3] = w30 * i0 + w31 * i1;
Expand All @@ -628,7 +627,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float i0 = in_col[0];
const float i1 = in_col[1];
const float i2 = in_col[2];
output_ptr[f * 3] = w00 * i0 + w01 * i1 + w02 * i2 + b0;
output_ptr[f * 3] = w00 * i0 + w01 * i1 + w02 * i2 + b0;
output_ptr[f * 3 + 1] = w10 * i0 + w11 * i1 + w12 * i2 + b1;
output_ptr[f * 3 + 2] = w20 * i0 + w21 * i1 + w22 * i2 + b2;
}
Expand All @@ -642,17 +641,17 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float i0 = in_col[0];
const float i1 = in_col[1];
const float i2 = in_col[2];
output_ptr[f * 3] = w00 * i0 + w01 * i1 + w02 * i2;
output_ptr[f * 3] = w00 * i0 + w01 * i1 + w02 * i2;
output_ptr[f * 3 + 1] = w10 * i0 + w11 * i1 + w12 * i2;
output_ptr[f * 3 + 2] = w20 * i0 + w21 * i1 + w22 * i2;
}
}
}
else if (out_ch == 4 && in_ch == 4)
{
const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
const float w03 = weight_ptr[12], w13 = weight_ptr[13], w23 = weight_ptr[14], w33 = weight_ptr[15];
for (int f = 0; f < num_frames; f++)
{
Expand All @@ -661,7 +660,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float i1 = in_col[1];
const float i2 = in_col[2];
const float i3 = in_col[3];
output_ptr[f * 4] = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
output_ptr[f * 4] = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
output_ptr[f * 4 + 1] = w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
output_ptr[f * 4 + 2] = w20 * i0 + w21 * i1 + w22 * i2 + w23 * i3;
output_ptr[f * 4 + 3] = w30 * i0 + w31 * i1 + w32 * i2 + w33 * i3;
Expand All @@ -677,8 +676,8 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
const float i3 = in_col[3], i4 = in_col[4], i5 = in_col[5];
for (int o = 0; o < 6; o++)
{
out_col[o] = weight_ptr[o] * i0 + weight_ptr[6 + o] * i1 + weight_ptr[12 + o] * i2
+ weight_ptr[18 + o] * i3 + weight_ptr[24 + o] * i4 + weight_ptr[30 + o] * i5;
out_col[o] = weight_ptr[o] * i0 + weight_ptr[6 + o] * i1 + weight_ptr[12 + o] * i2 + weight_ptr[18 + o] * i3
+ weight_ptr[24 + o] * i4 + weight_ptr[30 + o] * i5;
}
}
}
Expand All @@ -693,7 +692,8 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
for (int o = 0; o < 8; o++)
{
out_col[o] = weight_ptr[o] * i0 + weight_ptr[8 + o] * i1 + weight_ptr[16 + o] * i2 + weight_ptr[24 + o] * i3
+ weight_ptr[32 + o] * i4 + weight_ptr[40 + o] * i5 + weight_ptr[48 + o] * i6 + weight_ptr[56 + o] * i7;
+ weight_ptr[32 + o] * i4 + weight_ptr[40 + o] * i5 + weight_ptr[48 + o] * i6
+ weight_ptr[56 + o] * i7;
}
}
}
Expand All @@ -708,7 +708,8 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
for (int o = 0; o < 4; o++)
{
out_col[o] = weight_ptr[o] * i0 + weight_ptr[4 + o] * i1 + weight_ptr[8 + o] * i2 + weight_ptr[12 + o] * i3
+ weight_ptr[16 + o] * i4 + weight_ptr[20 + o] * i5 + weight_ptr[24 + o] * i6 + weight_ptr[28 + o] * i7;
+ weight_ptr[16 + o] * i4 + weight_ptr[20 + o] * i5 + weight_ptr[24 + o] * i6
+ weight_ptr[28 + o] * i7;
}
}
}
Expand Down Expand Up @@ -754,56 +755,56 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
#ifdef NAM_USE_INLINE_GEMM
if (!bias_fused)
{
const int out_ch = (int)get_out_channels();
float* __restrict__ output_ptr = _output.data();
const float* __restrict__ bias_ptr = this->_bias.data();
const int out_ch = (int)get_out_channels();
float* __restrict__ output_ptr = _output.data();
const float* __restrict__ bias_ptr = this->_bias.data();

// Specialized paths for common small channel counts
if (out_ch == 2)
{
const float b0 = bias_ptr[0], b1 = bias_ptr[1];
for (int f = 0; f < num_frames; f++)
// Specialized paths for common small channel counts
if (out_ch == 2)
{
const int off = f * 2;
output_ptr[off] += b0;
output_ptr[off + 1] += b1;
const float b0 = bias_ptr[0], b1 = bias_ptr[1];
for (int f = 0; f < num_frames; f++)
{
const int off = f * 2;
output_ptr[off] += b0;
output_ptr[off + 1] += b1;
}
}
}
else if (out_ch == 3)
{
const float b0 = bias_ptr[0], b1 = bias_ptr[1], b2 = bias_ptr[2];
for (int f = 0; f < num_frames; f++)
else if (out_ch == 3)
{
const int off = f * 3;
output_ptr[off] += b0;
output_ptr[off + 1] += b1;
output_ptr[off + 2] += b2;
const float b0 = bias_ptr[0], b1 = bias_ptr[1], b2 = bias_ptr[2];
for (int f = 0; f < num_frames; f++)
{
const int off = f * 3;
output_ptr[off] += b0;
output_ptr[off + 1] += b1;
output_ptr[off + 2] += b2;
}
}
}
else if (out_ch == 4)
{
const float b0 = bias_ptr[0], b1 = bias_ptr[1];
const float b2 = bias_ptr[2], b3 = bias_ptr[3];
for (int f = 0; f < num_frames; f++)
else if (out_ch == 4)
{
const int off = f * 4;
output_ptr[off] += b0;
output_ptr[off + 1] += b1;
output_ptr[off + 2] += b2;
output_ptr[off + 3] += b3;
const float b0 = bias_ptr[0], b1 = bias_ptr[1];
const float b2 = bias_ptr[2], b3 = bias_ptr[3];
for (int f = 0; f < num_frames; f++)
{
const int off = f * 4;
output_ptr[off] += b0;
output_ptr[off + 1] += b1;
output_ptr[off + 2] += b2;
output_ptr[off + 3] += b3;
}
}
}
else
{
for (int f = 0; f < num_frames; f++)
else
{
float* __restrict__ out_col = output_ptr + f * out_ch;
for (int o = 0; o < out_ch; o++)
for (int f = 0; f < num_frames; f++)
{
out_col[o] += bias_ptr[o];
float* __restrict__ out_col = output_ptr + f * out_ch;
for (int o = 0; o < out_ch; o++)
{
out_col[o] += bias_ptr[o];
}
}
}
}
} // !bias_fused
#else
_output.leftCols(num_frames).colwise() += this->_bias;
Expand Down
4 changes: 2 additions & 2 deletions NAM/dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
/// \brief Use a sample rate of -1 if we don't know what the model expects to be run at
#define NAM_UNKNOWN_EXPECTED_SAMPLE_RATE -1.0

#if defined(_MSC_VER) && !defined(__llvm__)
#define __restrict__ __restrict
#if defined(_MSC_VER) && !defined(__llvm__)
#define __restrict__ __restrict
#endif

namespace nam
Expand Down
4 changes: 2 additions & 2 deletions NAM/film.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class FiLM
int i = 0;
for (; i + 3 < input_dim; i += 4)
{
out_col[i] = in_col[i] * scale_col[i] + shift_col[i];
out_col[i] = in_col[i] * scale_col[i] + shift_col[i];
out_col[i + 1] = in_col[i + 1] * scale_col[i + 1] + shift_col[i + 1];
out_col[i + 2] = in_col[i + 2] * scale_col[i + 2] + shift_col[i + 2];
out_col[i + 3] = in_col[i + 3] * scale_col[i + 3] + shift_col[i + 3];
Expand Down Expand Up @@ -161,7 +161,7 @@ class FiLM
int i = 0;
for (; i + 3 < input_dim; i += 4)
{
out_col[i] = in_col[i] * scale_col[i];
out_col[i] = in_col[i] * scale_col[i];
out_col[i + 1] = in_col[i + 1] * scale_col[i + 1];
out_col[i + 2] = in_col[i + 2] * scale_col[i + 2];
out_col[i + 3] = in_col[i + 3] * scale_col[i + 3];
Expand Down
4 changes: 2 additions & 2 deletions NAM/gating_activations.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class GatingActivation
const int input_stride = (int)input.outerStride();
const float* __restrict__ input_ptr = input.derived().data();
float* __restrict__ output_ptr = output.derived().data();
const int output_stride = (int)output.outerStride(); // Column stride for output
const int output_stride = (int)output.outerStride(); // Column stride for output

for (int f = 0; f < num_samples; f++)
{
Expand Down Expand Up @@ -178,7 +178,7 @@ class BlendingActivation
const int input_stride = (int)input.outerStride();
const float* __restrict__ input_ptr = input.derived().data();
float* __restrict__ output_ptr = output.derived().data();
const int output_stride = (int)output.outerStride(); // Column stride for output
const int output_stride = (int)output.outerStride(); // Column stride for output

for (int f = 0; f < num_samples; f++)
{
Expand Down
4 changes: 2 additions & 2 deletions NAM/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ nam::lstm::LSTMConfig nam::lstm::parse_config_json(const nlohmann::json& config)
// LSTMConfig::create()
std::unique_ptr<nam::DSP> nam::lstm::LSTMConfig::create(std::vector<float> weights, double sampleRate)
{
return std::make_unique<nam::lstm::LSTM>(in_channels, out_channels, num_layers, input_size, hidden_size, weights,
sampleRate);
return std::make_unique<nam::lstm::LSTM>(
in_channels, out_channels, num_layers, input_size, hidden_size, weights, sampleRate);
}

// Config parser for ConfigParserRegistry
Expand Down
8 changes: 4 additions & 4 deletions tools/render.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSample
const uint32_t fmtSize = 16;
out.write("fmt ", 4);
out.write(reinterpret_cast<const char*>(&fmtSize), 4);
const uint16_t audioFormat = 3; // IEEE float
const uint16_t audioFormat = 3; // IEEE float
out.write(reinterpret_cast<const char*>(&audioFormat), 2);
const uint16_t numChannels = 1;
out.write(reinterpret_cast<const char*>(&numChannels), 2);
Expand All @@ -59,7 +59,7 @@ bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSample
return out.good();
}

} // namespace
} // namespace

int main(int argc, char* argv[])
{
Expand Down Expand Up @@ -138,8 +138,8 @@ int main(int argc, char* argv[])
const double expectedRate = model->GetExpectedSampleRate();
if (expectedRate > 0 && std::abs(inputSampleRate - expectedRate) > 0.5)
{
std::cerr << "Error: Input WAV sample rate (" << inputSampleRate
<< " Hz) does not match model expected rate (" << expectedRate << " Hz)\n";
std::cerr << "Error: Input WAV sample rate (" << inputSampleRate << " Hz) does not match model expected rate ("
<< expectedRate << " Hz)\n";
return 1;
}

Expand Down
Loading
Loading