Skip to content

Commit f001490

Browse files
CopilotVelli20
andauthored
pdf-object: address review feedback on CCITTFaxDecode
- Fix `from_dictionary` doc to note caller must resolve indirect refs - Add Group 3 RTC (6-EOL end-of-block) detection for K=0 and K>0 - Fix K>0: decode_2d_row None now breaks instead of emitting all-white - Fix encoded_g4_one_white_row fixture bytes (was off by one bit) - Add test_1d_rtc_with_eol and test_1d_rtc_no_eol to cover RTC paths Agent-Logs-Url: https://github.com/Velli20/safe-pdf/sessions/99845e2b-7343-4c89-ac0c-50a3dffa3002 Co-authored-by: Velli20 <25138671+Velli20@users.noreply.github.com>
1 parent 56fd2a7 commit f001490

1 file changed

Lines changed: 98 additions & 11 deletions

File tree

crates/pdf-object/src/ccitt.rs

Lines changed: 98 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,13 @@ impl Default for CCITTFaxParams {
5454
}
5555

5656
impl CCITTFaxParams {
57-
/// Parse decode parameters directly from a PDF `/DecodeParms` dictionary.
57+
/// Parse decode parameters from a PDF `/DecodeParms` dictionary.
5858
///
59-
/// Values are extracted by directly matching [`ObjectVariant`] variants;
60-
/// no [`ObjectResolver`][crate::object_resolver::ObjectResolver] is needed
61-
/// because decode-parameter dictionaries are always inline in the stream
62-
/// dictionary and never indirect references.
59+
/// Values are extracted by directly matching [`ObjectVariant`] variants in the
60+
/// provided [`Dictionary`]. This function does not resolve indirect references
61+
/// on its own; if the `/DecodeParms` entry is an indirect reference (as allowed
62+
/// by the PDF specification), the caller must resolve it to a dictionary before
63+
/// calling this method.
6364
pub fn from_dictionary(dict: &Dictionary) -> Self {
6465
let mut p = Self::default();
6566

@@ -793,6 +794,15 @@ pub fn decode(data: &[u8], params: &CCITTFaxParams) -> Result<Vec<u8>, ObjectErr
793794
if params.end_of_line && read_eol(&mut reader, params.encoded_byte_align).is_err() {
794795
break;
795796
}
797+
// With `end_of_line=true` the row EOL has just been consumed; with
798+
// `end_of_line=false` we are at the start of the next row. In both
799+
// cases, an EOL bit pattern here is the beginning of the
800+
// Return-to-Control (RTC) end-of-block sequence (T.4: six consecutive
801+
// EOLs). Stop rather than passing the remaining EOLs into the Huffman
802+
// row decoder.
803+
if params.end_of_block && reader.peek_bits(EOL_LEN) == Some(EOL_BITS) {
804+
break;
805+
}
796806

797807
let row = decode_1d_row(&mut reader, columns)?;
798808
output.extend_from_slice(&pack_row(&row, params.black_is1));
@@ -803,15 +813,27 @@ pub fn decode(data: &[u8], params: &CCITTFaxParams) -> Result<Vec<u8>, ObjectErr
803813
break;
804814
}
805815

816+
// After the row EOL, a second consecutive EOL signals the start of
817+
// the RTC end-of-block sequence. Stop rather than mis-reading the
818+
// second EOL as a row-encoding tag bit.
819+
if params.end_of_block && reader.peek_bits(EOL_LEN) == Some(EOL_BITS) {
820+
break;
821+
}
822+
806823
// The bit after the EOL selects the row encoding:
807824
// 1 = 1D (Modified Huffman), 0 = 2D.
808825
let is_1d = reader.read_bit().unwrap_or(true);
809826

810827
let row = if is_1d {
811828
decode_1d_row(&mut reader, columns)?
812829
} else {
813-
decode_2d_row(&mut reader, &ref_row, columns)?
814-
.unwrap_or_else(|| vec![false; columns])
830+
match decode_2d_row(&mut reader, &ref_row, columns)? {
831+
Some(row) => row,
832+
// `decode_2d_row` returns `None` when it encounters an EOL or
833+
// EOFB marker before completing the row, signalling the end of
834+
// the encoded data.
835+
None => break,
836+
}
815837
};
816838

817839
output.extend_from_slice(&pack_row(&row, params.black_is1));
@@ -977,6 +999,71 @@ mod tests {
977999
assert_eq!(output, vec![0xCC]); // W W B B W W B B → 1 1 0 0 1 1 0 0
9781000
}
9791001

1002+
// ── Group 3 1D RTC (end-of-block) ────────────────────────────────────────
1003+
1004+
/// Group 3 1D, end_of_line=true, end_of_block=true: one all-white row
1005+
/// followed by two consecutive EOLs (start of the 6-EOL RTC sequence).
1006+
///
1007+
/// Bit layout:
1008+
/// Row EOL: 000000000001 (12 bits, pos 0-11, '1' at pos 11)
1009+
/// white-8: 10011 ( 5 bits, pos 12-16, '1' at pos 12, 15, 16)
1010+
/// RTC EOL1: 000000000001 (12 bits, pos 17-28, '1' at pos 28)
1011+
/// RTC EOL2: 000000000001 (12 bits, pos 29-40, '1' at pos 40)
1012+
/// Padding: 0000000 ( 7 bits)
1013+
/// Total: 48 bits = 6 bytes
1014+
///
1015+
/// byte 0 (pos 0-7): 0 0 0 0 0 0 0 0 = 0x00
1016+
/// byte 1 (pos 8-15): 0 0 0 1 1 0 0 1 = 0x19
1017+
/// byte 2 (pos 16-23): 1 0 0 0 0 0 0 0 = 0x80
1018+
/// byte 3 (pos 24-31): 0 0 0 0 1 0 0 0 = 0x08
1019+
/// byte 4 (pos 32-39): 0 0 0 0 0 0 0 0 = 0x00
1020+
/// byte 5 (pos 40-47): 1 0 0 0 0 0 0 0 = 0x80
1021+
#[test]
1022+
fn test_1d_rtc_with_eol() {
1023+
let data = vec![0x00, 0x19, 0x80, 0x08, 0x00, 0x80];
1024+
let params = CCITTFaxParams {
1025+
k: 0,
1026+
columns: 8,
1027+
rows: 0,
1028+
end_of_line: true,
1029+
end_of_block: true,
1030+
..Default::default()
1031+
};
1032+
// Decoder must stop cleanly at RTC without error and emit exactly one row.
1033+
let output = decode(&data, &params).expect("RTC should terminate cleanly");
1034+
assert_eq!(output, vec![0xFF]); // all-white row
1035+
}
1036+
1037+
/// Group 3 1D, end_of_line=false, end_of_block=true: one all-white row
1038+
/// followed by two consecutive EOLs (start of RTC) that trigger termination.
1039+
///
1040+
/// Bit layout:
1041+
/// white-8: 10011 ( 5 bits, pos 0-4, '1' at pos 0, 3, 4)
1042+
/// RTC EOL1: 000000000001 (12 bits, pos 5-16, '1' at pos 16)
1043+
/// RTC EOL2: 000000000001 (12 bits, pos 17-28, '1' at pos 28)
1044+
/// Padding: 000 ( 3 bits)
1045+
/// Total: 32 bits = 4 bytes
1046+
///
1047+
/// byte 0 (pos 0-7): 1 0 0 1 1 0 0 0 = 0x98
1048+
/// byte 1 (pos 8-15): 0 0 0 0 0 0 0 0 = 0x00
1049+
/// byte 2 (pos 16-23): 1 0 0 0 0 0 0 0 = 0x80
1050+
/// byte 3 (pos 24-31): 0 0 0 0 1 0 0 0 = 0x08
1051+
#[test]
1052+
fn test_1d_rtc_no_eol() {
1053+
let data = vec![0x98, 0x00, 0x80, 0x08];
1054+
let params = CCITTFaxParams {
1055+
k: 0,
1056+
columns: 8,
1057+
rows: 0,
1058+
end_of_line: false,
1059+
end_of_block: true,
1060+
..Default::default()
1061+
};
1062+
// Decoder must stop cleanly at RTC without error and emit exactly one row.
1063+
let output = decode(&data, &params).expect("RTC should terminate cleanly");
1064+
assert_eq!(output, vec![0xFF]); // all-white row
1065+
}
1066+
9801067
// ── Group 4 (K < 0) ──────────────────────────────────────────────────────
9811068

9821069
/// Build a Group 4 stream for one all-white 8-pixel row followed by EOFB.
@@ -992,11 +1079,11 @@ mod tests {
9921079
///
9931080
/// bits: 1 000000000001 000000000001 0000000 (padded to 32 bits)
9941081
/// byte0: 10000000 = 0x80
995-
/// byte1: 00000100 = 0x04 (bits 8..15: 0 0 0 0 0 1 0 0)
996-
/// byte2: 00000001 = 0x01 (bits 16..23: 0 0 0 0 0 0 0 1)
997-
/// byte3: 00000000 = 0x00 (padding)
1082+
/// byte1: 00001000 = 0x08 (bits 8..15: 0 0 0 0 1 0 0 0, EOL1's '1' at pos 12)
1083+
/// byte2: 00000000 = 0x00 (bits 16..23: all zero)
1084+
/// byte3: 10000000 = 0x80 (bit 24 = EOL2's '1', bits 25..31 padding)
9981085
fn encoded_g4_one_white_row() -> Vec<u8> {
999-
vec![0x80, 0x08, 0x01, 0x00]
1086+
vec![0x80, 0x08, 0x00, 0x80]
10001087
}
10011088

10021089
#[test]

0 commit comments

Comments
 (0)