Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
mod fdct;
mod ycbcr;

use crate::encoder::Operations;
use crate::encoder::{AlignedBlock, Operations};
pub use fdct::fdct_avx2;
pub use ycbcr::*;

pub(crate) struct AVX2Operations;

impl Operations for AVX2Operations {
#[inline(always)]
fn fdct(data: &mut [i16; 64]) {
fn fdct(data: &mut AlignedBlock) {
fdct_avx2(data);
}
}
8 changes: 6 additions & 2 deletions src/avx2/fdct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ use core::arch::x86_64::{
_mm256_unpacklo_epi16, _mm256_unpacklo_epi32,
};

use crate::encoder::AlignedBlock;

const CONST_BITS: i32 = 13;
const PASS1_BITS: i32 = 2;

Expand Down Expand Up @@ -57,14 +59,14 @@ const DESCALE_P1: i32 = CONST_BITS - PASS1_BITS;
const DESCALE_P2: i32 = CONST_BITS + PASS1_BITS;

#[inline(always)]
pub fn fdct_avx2(data: &mut [i16; 64]) {
pub fn fdct_avx2(data: &mut AlignedBlock) {
unsafe {
fdct_avx2_internal(data);
}
}

#[target_feature(enable = "avx2")]
fn fdct_avx2_internal(data: &mut [i16; 64]) {
fn fdct_avx2_internal(data: &mut AlignedBlock) {
#[target_feature(enable = "avx2")]
#[allow(non_snake_case)]
#[inline]
Expand Down Expand Up @@ -420,6 +422,8 @@ fn fdct_avx2_internal(data: &mut [i16; 64]) {
(t1, t2, t3, t4)
}

let data = &mut data.data;

let ymm4 = avx_load(&data[0..16]);
let ymm5 = avx_load(&data[16..32]);
let ymm6 = avx_load(&data[32..48]);
Expand Down
52 changes: 35 additions & 17 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,24 @@ pub enum JpegColorType {
Ycck,
}

#[derive(Copy, Clone)]
#[repr(C, align(32))]
pub(crate) struct AlignedBlock {
pub data: [i16; 64],
}

impl AlignedBlock {
pub const fn new(data: [i16; 64]) -> Self {
AlignedBlock { data }
}
}

impl Default for AlignedBlock {
fn default() -> Self {
AlignedBlock { data: [0i16; 64] }
}
}

impl JpegColorType {
pub(crate) fn get_num_components(self) -> usize {
use JpegColorType::*;
Expand Down Expand Up @@ -752,7 +770,7 @@ impl<W: JfifWrite> Encoder<W> {

OP::fdct(&mut block);

let mut q_block = [0i16; 64];
let mut q_block = AlignedBlock::default();

OP::quantize_block(
&block,
Expand All @@ -767,7 +785,7 @@ impl<W: JfifWrite> Encoder<W> {
&self.huffman_tables[component.ac_huffman_table as usize].1,
)?;

prev_dc[i] = q_block[0];
prev_dc[i] = q_block.data[0];
}
}
}
Expand Down Expand Up @@ -827,7 +845,7 @@ impl<W: JfifWrite> Encoder<W> {
&self.huffman_tables[component.ac_huffman_table as usize].1,
)?;

prev_dc = block[0];
prev_dc = block.data[0];

if restart_interval > 0 {
if restarts_to_go == 0 {
Expand Down Expand Up @@ -883,12 +901,12 @@ impl<W: JfifWrite> Encoder<W> {
}

self.writer.write_dc(
block[0],
block.data[0],
prev_dc,
&self.huffman_tables[component.dc_huffman_table as usize].0,
)?;

prev_dc = block[0];
prev_dc = block.data[0];

if restart_interval > 0 {
if restarts_to_go == 0 {
Expand Down Expand Up @@ -960,7 +978,7 @@ impl<W: JfifWrite> Encoder<W> {
&mut self,
image: &I,
q_tables: &[QuantizationTable; 2],
) -> [Vec<[i16; 64]>; 4] {
) -> [Vec<AlignedBlock>; 4] {
let width = image.width();
let height = image.height();

Expand Down Expand Up @@ -1022,7 +1040,7 @@ impl<W: JfifWrite> Encoder<W> {

OP::fdct(&mut block);

let mut q_block = [0i16; 64];
let mut q_block = AlignedBlock::default();

OP::quantize_block(
&block,
Expand All @@ -1037,7 +1055,7 @@ impl<W: JfifWrite> Encoder<W> {
blocks
}

fn init_block_buffers(&mut self, buffer_size: usize) -> [Vec<[i16; 64]>; 4] {
fn init_block_buffers(&mut self, buffer_size: usize) -> [Vec<AlignedBlock>; 4] {
// To simplify the code and to give the compiler more infos to optimize stuff we always initialize 4 components
// Resource overhead should be minimal because an empty Vec doesn't allocate

Expand Down Expand Up @@ -1065,7 +1083,7 @@ impl<W: JfifWrite> Encoder<W> {
}

// Create new huffman tables optimized for this image
fn optimize_huffman_table(&mut self, blocks: &[Vec<[i16; 64]>; 4]) {
fn optimize_huffman_table(&mut self, blocks: &[Vec<AlignedBlock>; 4]) {
// TODO: Find out if it's possible to reuse some code from the writer

let max_tables = self.components.len().min(2) as u8;
Expand All @@ -1088,7 +1106,7 @@ impl<W: JfifWrite> Encoder<W> {
debug_assert!(!blocks[i].is_empty());

for block in &blocks[i] {
let value = block[0];
let value = block.data[0];
let diff = value - prev_dc;
let num_bits = get_num_bits(diff);

Expand Down Expand Up @@ -1120,7 +1138,7 @@ impl<W: JfifWrite> Encoder<W> {
for block in &blocks[i] {
let mut zero_run = 0;

for &value in &block[start..end] {
for &value in &block.data[start..end] {
if value == 0 {
zero_run += 1;
} else {
Expand All @@ -1146,7 +1164,7 @@ impl<W: JfifWrite> Encoder<W> {
for block in &blocks[i] {
let mut zero_run = 0;

for &value in &block[1..] {
for &value in &block.data[1..] {
if value == 0 {
zero_run += 1;
} else {
Expand Down Expand Up @@ -1208,7 +1226,7 @@ fn get_block(
col_stride: usize,
row_stride: usize,
width: usize,
) -> [i16; 64] {
) -> AlignedBlock {
let mut block = [0i16; 64];

for y in 0..8 {
Expand All @@ -1220,7 +1238,7 @@ fn get_block(
}
}

block
AlignedBlock::new(block)
}

fn get_num_bits(mut value: i16) -> u8 {
Expand All @@ -1240,15 +1258,15 @@ fn get_num_bits(mut value: i16) -> u8 {

pub(crate) trait Operations {
#[inline(always)]
fn fdct(data: &mut [i16; 64]) {
fn fdct(data: &mut AlignedBlock) {
fdct(data);
}

#[inline(always)]
fn quantize_block(block: &[i16; 64], q_block: &mut [i16; 64], table: &QuantizationTable) {
fn quantize_block(block: &AlignedBlock, q_block: &mut AlignedBlock, table: &QuantizationTable) {
for i in 0..64 {
let z = ZIGZAG[i] as usize & 0x3f;
q_block[i] = table.quantize(block[z], z);
q_block.data[i] = table.quantize(block.data[z], z);
}
}
}
Expand Down
16 changes: 11 additions & 5 deletions src/fdct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
* scaled fixed-point arithmetic, with a minimal number of shifts.
*/

use crate::encoder::AlignedBlock;

const CONST_BITS: i32 = 13;
const PASS1_BITS: i32 = 2;

Expand Down Expand Up @@ -102,7 +104,9 @@ fn into_el(v: i32) -> i16 {

#[allow(clippy::erasing_op)]
#[allow(clippy::identity_op)]
pub fn fdct(data: &mut [i16; 64]) {
pub fn fdct(data: &mut AlignedBlock) {
let data = &mut data.data;

/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
Expand Down Expand Up @@ -238,6 +242,8 @@ mod tests {

// Inputs and outputs are taken from libjpegs jpeg_fdct_islow for a typical image

use crate::encoder::AlignedBlock;

use super::fdct;

const INPUT1: [i16; 64] = [
Expand Down Expand Up @@ -269,12 +275,12 @@ mod tests {

#[test]
pub fn test_fdct_libjpeg() {
let mut i1 = INPUT1.clone();
let mut i1 = AlignedBlock::new(INPUT1.clone());
fdct(&mut i1);
assert_eq!(i1, OUTPUT1);
assert_eq!(i1.data, OUTPUT1);

let mut i2 = INPUT2.clone();
let mut i2 = AlignedBlock::new(INPUT2.clone());
fdct(&mut i2);
assert_eq!(i2, OUTPUT2);
assert_eq!(i2.data, OUTPUT2);
}
}
10 changes: 5 additions & 5 deletions src/writer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::EncodingError;
use crate::encoder::Component;
use crate::encoder::{AlignedBlock, Component};
use crate::huffman::{CodingClass, HuffmanTable};
use crate::marker::{Marker, SOFType};
use crate::quantization::QuantizationTable;
Expand Down Expand Up @@ -330,12 +330,12 @@ impl<W: JfifWrite> JfifWriter<W> {

pub fn write_block(
&mut self,
block: &[i16; 64],
block: &AlignedBlock,
prev_dc: i16,
dc_table: &HuffmanTable,
ac_table: &HuffmanTable,
) -> Result<(), EncodingError> {
self.write_dc(block[0], prev_dc, dc_table)?;
self.write_dc(block.data[0], prev_dc, dc_table)?;
self.write_ac_block(block, 1, 64, ac_table)
}

Expand All @@ -355,14 +355,14 @@ impl<W: JfifWrite> JfifWriter<W> {

pub fn write_ac_block(
&mut self,
block: &[i16; 64],
block: &AlignedBlock,
start: usize,
end: usize,
ac_table: &HuffmanTable,
) -> Result<(), EncodingError> {
let mut zero_run = 0;

for &value in &block[start..end] {
for &value in &block.data[start..end] {
if value == 0 {
zero_run += 1;
} else {
Expand Down
Loading