Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,9 @@ src/x64
.vs/
.vscode/
CMakeSettings.json

# Downloaded model files (autogen.sh / dnn/download_model.sh)
dnn/*_data.c
dnn/*_data.h
dnn/dred_rdovae_constants.h
*.pth
169 changes: 168 additions & 1 deletion celt/bands.c
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,7 @@ static void oaci_special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt
OAC_COPY(&norm2[n1], &norm2[2*n1 - n2], n2 - n1);
}

void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,
static void quant_all_bands_twoch(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, oac_int32 total_bits,
Expand Down Expand Up @@ -1664,3 +1664,170 @@ void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,

RESTORE_STACK;
}
/* Multi-channel multi-mono: each channel encoded independently per band.
collapse_masks are stored interleaved as [band*C + channel] so that
oaci_anti_collapse can read them correctly. */
static void quant_all_bands_multi(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, int C, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int *tf_res, oac_int32 total_bits,
oac_int32 balance, ec_ctx *ec, int LM, int codedBands,
oac_uint32 *seed, int arch, int disable_inv) {
int i, c;
oac_int32 remaining_bits;
const oac_int16 * OAC_RESTRICT eBands = m->eBands;
int B;
int M;
int frame_size;
int norm_offset;
int norm_size;
int lowband_offset;
int update_lowband = 1;
int resynth_alloc;
struct band_ctx ctx;
VARDECL(celt_norm, _norm);
VARDECL(celt_norm, _lowband_scratch);
VARDECL(int, split_mem);
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !encode;
#endif
SAVE_STACK;
M = 1<<LM;
B = shortBlocks ? M : 1;
frame_size = M*m->shortMdctSize;
norm_offset = M*eBands[start];
norm_size = M*eBands[m->nbEBands - 1] - norm_offset;
/* One norm array per channel for spectral folding */
ALLOC(_norm, C*norm_size, celt_norm);
Comment thread
janpbuethe marked this conversation as resolved.
if (encode && resynth)
resynth_alloc = M*(eBands[m->nbEBands] - eBands[m->nbEBands - 1]);
else
resynth_alloc = ALLOC_NONE;
ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
ALLOC(split_mem, C*15, int);
Comment thread
janpbuethe marked this conversation as resolved.
OAC_CLEAR(split_mem, C*15);
ctx.ec = ec;
ctx.encode = encode;
ctx.intensity = 0;
ctx.m = m;
ctx.seed = *seed;
ctx.spread = spread;
ctx.arch = arch;
ctx.disable_inv = disable_inv;
ctx.resynth = resynth;
ctx.theta_round = 0;
ctx.avoid_split_noise = B > 1;
lowband_offset = 0;
for (i = start; i < end; i++) {
oac_int32 tell;
int b;
int band_N;
oac_int32 curr_balance;
int effective_lowband = -1;
int tf_change;
int last = (i == end - 1);
ctx.i = i;
band_N = M*eBands[i + 1] - M*eBands[i];
celt_assert(band_N > 0);
tell = oaci_ec_tell_frac(ec);
if (i != start)
balance -= tell;
remaining_bits = total_bits - tell;
if (i <= codedBands - 1) {
curr_balance = oaci_celt_sudiv(balance, IMIN(3, codedBands - i));
b = IMAX(0, IMIN(16383, IMIN(remaining_bits, pulses[i] + curr_balance)));
} else {
b = 0;
}
/* Update lowband offset for spectral folding */
if (resynth && (M*eBands[i] - band_N >= M*eBands[start] || i == start + 1)
&& (update_lowband || lowband_offset == 0))
lowband_offset = i;
/* Handle hybrid mode band boundary */
if (i == start + 1) {
int n1 = M*(eBands[start + 1] - eBands[start]);
int n2 = M*(eBands[start + 2] - eBands[start + 1]);
for (c = 0; c < C; c++) {
celt_norm *norm_c = _norm + c*norm_size;
OAC_COPY(&norm_c[n1], &norm_c[2*n1 - n2], n2 - n1);
}
}
tf_change = tf_res[i];
ctx.tf_change = tf_change;
/* Encode each channel independently for this band */
{
oac_int32 remaining_per_chan = remaining_bits / C;
for (c = 0; c < C; c++) {
celt_norm *X_c;
celt_norm *norm_c = _norm + c*norm_size;
celt_norm *lowband_ptr = NULL;
celt_norm *lowband_out;
celt_norm *lb_scratch;
unsigned x_cm;
int chan_b = b / C;
/* Each channel's PVQ budget cap: current tell + per-channel share */
ctx.total_bits = oaci_ec_tell_frac(ec) + remaining_per_chan;
ctx.bandE = bandE + c*m->nbEBands;
X_c = X_ + c*frame_size + M*eBands[i];
lowband_out = last ? NULL : norm_c + M*eBands[i] - norm_offset;
/* Setup lowband for folding */
if (lowband_offset != 0 && (spread != SPREAD_AGGRESSIVE || B > 1 || tf_change < 0)) {
int fold_start, fold_end, fold_i;
effective_lowband = IMAX(0, M*eBands[lowband_offset] - norm_offset - band_N);
fold_start = lowband_offset;
while (M*eBands[--fold_start] > effective_lowband + norm_offset) ;
fold_end = lowband_offset - 1;
while (++fold_end < i && M*eBands[fold_end] < effective_lowband + norm_offset + band_N) ;
x_cm = 0;
fold_i = fold_start; do {
x_cm |= collapse_masks[fold_i*C + c];
} while (++fold_i < fold_end);
lowband_ptr = norm_c + effective_lowband;
} else {
x_cm = (1<<B) - 1;
}
if (i >= m->effEBands) {
X_c = norm_c;
lb_scratch = NULL;
} else if (encode && resynth) {
lb_scratch = _lowband_scratch;
} else {
lb_scratch = X_ + c*frame_size + M*eBands[m->effEBands - 1];
}
if (last)
lb_scratch = NULL;
x_cm = oaci_quant_band(&ctx, X_c, band_N, chan_b, B,
lowband_ptr, LM, lowband_out, Q31ONE, lb_scratch, x_cm, split_mem + c*15);
collapse_masks[i*C + c] = (unsigned char)x_cm;
}
}
balance += pulses[i] + tell;
update_lowband = b > (band_N<<BITRES);
ctx.avoid_split_noise = 0;
}
*seed = ctx.seed;
RESTORE_STACK;
}
void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, int C, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, oac_int32 total_bits,
oac_int32 balance, ec_ctx *ec, int LM, int codedBands,
oac_uint32 *seed, int complexity, int arch, int disable_inv) {
if (C <= 2) {
int M = 1<<LM;
int frame_size = M*m->shortMdctSize;
celt_norm *Y_ = (C == 2) ? X_ + frame_size : NULL;
quant_all_bands_twoch(encode, m, start, end, X_, Y_, collapse_masks,
bandE, pulses, shortBlocks, spread, dual_stereo, intensity,
tf_res, total_bits, balance, ec, LM, codedBands,
seed, complexity, arch, disable_inv);
} else {
quant_all_bands_multi(encode, m, start, end, X_, C, collapse_masks,
bandE, pulses, shortBlocks, spread,
tf_res, total_bits, balance, ec, LM, codedBands,
seed, arch, disable_inv);
}
}
14 changes: 7 additions & 7 deletions celt/bands.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,15 @@ void oaci_haar1(celt_norm *X, int N0, int stride);
* @param m Mode data
* @param start First band to process
* @param end Last band to process + 1
* @param X Residual (normalised)
* @param Y Residual (normalised) for second channel (or NULL for mono)
* @param collapse_masks Anti-collapse tracking mask
* @param bandE Square root of the energy for each band
* @param X Residual (normalised), all C channels contiguous (channel stride = M*shortMdctSize)
* @param C Number of channels
* @param collapse_masks Anti-collapse tracking mask (C*nbEBands, interleaved as [band*C + channel])
* @param bandE Square root of the energy for each band (C*nbEBands, stride nbEBands per channel)
* @param pulses Bit allocation (per band) for PVQ
* @param shortBlocks Zero for long blocks, non-zero for short blocks
* @param spread Amount of spreading to use
* @param dual_stereo Zero for MS stereo, non-zero for dual stereo
* @param intensity First band to use intensity stereo
* @param dual_stereo Zero for MS stereo, non-zero for dual stereo (C<=2 only)
* @param intensity First band to use intensity stereo (C<=2 only)
* @param tf_res Time-frequency resolution change
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param balance Number of unallocated bits
Expand All @@ -137,7 +137,7 @@ void oaci_haar1(celt_norm *X, int N0, int stride);
* @param arch Run-time architecture (see oac_select_arch())
*/
void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
celt_norm * X, int C, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, oac_int32 total_bits,
oac_int32 balance, ec_ctx *ec, int M, int codedBands, oac_uint32 *seed,
Expand Down
4 changes: 3 additions & 1 deletion celt/celt.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,12 @@ const signed char oaci_tf_select_table[4][8] = {

void oaci_init_caps(const CELTMode *m, int *cap, int LM, int C) {
int i;
/* Use mono cap table for C > 2 since channels are coded independently */
int C_cap = C == 2 ? 2 : 1;
for (i = 0; i < m->nbEBands; i++) {
int N;
N = (m->eBands[i + 1] - m->eBands[i])<<LM;
cap[i] = (m->cache.caps[m->nbEBands*(2*LM + C - 1) + i] + 64)*C*N>>2;
cap[i] = (m->cache.caps[m->nbEBands*(2*LM + C_cap - 1) + i] + 64)*C*N>>2;
}
}

Expand Down
10 changes: 8 additions & 2 deletions celt/celt.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@
#define CELTDecoder OacCustomDecoder
#define CELTMode OacCustomMode

/** Maximum supported ambisonics order. */
#define OAC_MAX_AMBISONICS_ORDER 5
#define OAC_MAX_AMBISONICS_CHANNELS ((OAC_MAX_AMBISONICS_ORDER+1)*(OAC_MAX_AMBISONICS_ORDER+1))
#define OAC_MAX_CHANNELS 255
/* Check that OAC_MAX_CHANNELS is large enough */
typedef char oac_assert_max_channels_sufficient[(OAC_MAX_CHANNELS >= OAC_MAX_AMBISONICS_CHANNELS) ? 1 : -1];
#define LEAK_BANDS 19

typedef struct {
Expand Down Expand Up @@ -184,7 +190,7 @@ int oaci_celt_encode_with_ec(OacCustomEncoder * OAC_RESTRICT st, const oac_res *
unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);

int oaci_celt_encoder_init(CELTEncoder *st, oac_int32 sampling_rate, int channels,
int arch);
int arch, int format);



Expand All @@ -193,7 +199,7 @@ int oaci_celt_encoder_init(CELTEncoder *st, oac_int32 sampling_rate, int channel
int oaci_celt_decoder_get_size(int channels);


int oaci_celt_decoder_init(CELTDecoder *st, oac_int32 sampling_rate, int channels);
int oaci_celt_decoder_init(CELTDecoder *st, oac_int32 sampling_rate, int channels, int format);

int oaci_celt_decode_with_ec_dred(CELTDecoder * OAC_RESTRICT st, const unsigned char *data,
int len, oac_res * OAC_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
Expand Down
Loading
Loading