Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions crates/pdf-canvas/src/canvas_text_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use pdf_content_stream::TextElement;
use pdf_content_stream::pdf_operator_backend::{
TextObjectOps, TextPositioningOps, TextShowingOps, TextStateOps,
};
use pdf_font::flags::FontFlags;
use pdf_font::font::Font;
use pdf_font::type0_font::CidFontSubType;
use pdf_graphics::TextRenderingMode;
Expand Down Expand Up @@ -171,8 +172,9 @@ impl<B: CanvasBackend> TextShowingOps for PdfCanvas<'_, B> {
}
Font::TrueType(font) => {
let iter = to_char_iter(text);

let mut renderer = TrueTypeFontRenderer::new(self, &font.font_file, false)?;
let is_symbolic = font.flags.contains(FontFlags::SYMBOLIC);
let mut renderer =
TrueTypeFontRenderer::new(self, &font.font_file, false, is_symbolic)?;
renderer.render_text(iter)
}
Font::Type0(font) => {
Expand All @@ -185,7 +187,9 @@ impl<B: CanvasBackend> TextShowingOps for PdfCanvas<'_, B> {
renderer.render_text(iter)
}
CidFontSubType::Type2 => {
let mut renderer = TrueTypeFontRenderer::new(self, &font.font_file, true)?;
// CID TrueType fonts use glyph IDs directly; symbolic flag is irrelevant.
let mut renderer =
TrueTypeFontRenderer::new(self, &font.font_file, true, false)?;
renderer.render_text(iter)
}
}
Expand Down
84 changes: 42 additions & 42 deletions crates/pdf-canvas/src/truetype_font_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,52 +41,42 @@ pub(crate) struct TrueTypeFontRenderer<'a, 'b, B: CanvasBackend> {
units_per_em: u16,
/// Whether this font uses CID (Character Identifier) encoding.
is_cid: bool,
/// Whether the font is flagged as symbolic (FontFlags::SYMBOLIC).
is_symbolic: bool,
}

/// Resolve a TrueType `GlyphId` for a given encoded character code.
///
/// Implements the glyph-mapping algorithm described in ISO 32000-1 §9.6.6.4
/// for non-CID TrueType fonts embedded in PDF.
///
/// # Resolution order
///
/// 1. **Encoding → AGL → cmap**: If a glyph name is available from the PDF
/// font's `/Encoding`, map it to a Unicode codepoint via the Adobe Glyph
/// List, then look up the codepoint in the font's best cmap subtable.
/// 2. **Direct Unicode**: Treat `char_code` as a Unicode scalar value and
/// probe the cmap (works for WinAnsiEncoding where codes ≈ Unicode).
/// 3. **Raw glyph index**: Use `char_code` as a raw glyph ID (last resort).
///
/// # Parameters
///
/// - `charmap`: Pre-computed skrifa `Charmap` (selects the best cmap subtable).
/// - `char_code`: The PDF text stream's 1-byte character code (widened to `u16`).
/// - `glyph_name`: Optional glyph name from the font's `/Encoding` dictionary.
fn resolve_glyph_id(charmap: &Charmap<'_>, char_code: u16, glyph_name: Option<&str>) -> GlyphId {
// Step 1: Encoding -> glyph name -> Unicode (via AGL) -> cmap
if let Some(name) = glyph_name
&& let Some(unicode_char) = glyph_name_to_unicode(name)
&& let Some(gid) = charmap.map(unicode_char)
{
return gid;
}
impl<'a, 'b, B: CanvasBackend> TrueTypeFontRenderer<'a, 'b, B> {
fn resolve_simple_gid(&self, code: u16, glyph_name: Option<&str>) -> GlyphId {
// Non-symbolic path: encoding → glyph name → AGL → Unicode → cmap.
if let Some(name) = glyph_name
&& let Some(unicode_char) = glyph_name_to_unicode(name)
&& let Some(gid) = self.charmap.map(unicode_char)
{
return gid;
}

// Step 2: treat char_code as a Unicode codepoint directly
if let Some(unicode_char) = char::from_u32(u32::from(char_code))
&& let Some(gid) = charmap.map(unicode_char)
{
return gid;
}
if let Ok(cmap) = self.font_ref.cmap()
&& let Some((_, _, subtable)) = cmap.best_subtable()
&& let Some(id) = subtable.map_codepoint(code)
{
return id;
}

// Step 3: use the character code as a raw glyph index
GlyphId::new(u32::from(char_code))
}
if self.is_symbolic {
// Fall back: treat char_code as a Unicode codepoint directly.
// Correct for WinAnsiEncoding (codes 0x20–0xFF ≈ Unicode).
GlyphId::new(u32::from(code))
} else {
// No mapping found.
GlyphId::NOTDEF
}
}

impl<'a, 'b, B: CanvasBackend> TrueTypeFontRenderer<'a, 'b, B> {
pub fn new(
canvas: &'b mut PdfCanvas<'a, B>,
stream_object: &'a [u8],
is_cid: bool,
is_symbolic: bool,
) -> Result<Self, PdfCanvasError> {
let font_ref = FontRef::new(stream_object)
.map_err(|e| TrueTypeFontRendererError::FontParseError(e.to_string()))?;
Expand Down Expand Up @@ -117,6 +107,7 @@ impl<'a, 'b, B: CanvasBackend> TrueTypeFontRenderer<'a, 'b, B> {
glyph_base_transform,
units_per_em,
is_cid,
is_symbolic,
})
}
}
Expand All @@ -132,22 +123,31 @@ impl<B: CanvasBackend> TextRenderer for TrueTypeFontRenderer<'_, '_, B> {
.text_state
.compose_glyph_matrix(self.glyph_base_transform, &state.transform);

let glyph_id = if !self.is_cid {
// For CID fonts the char_code IS the glyph index by definition.
// For non-CID fonts use the §9.6.6.4 resolver; None means the cmap
// has no entry for this code — draw nothing but still advance.
let resolved_glyph_id = if !self.is_cid {
let glyph_name = state.text_state.glyph_name(char_code);
resolve_glyph_id(&self.charmap, char_code, glyph_name)
self.resolve_simple_gid(char_code, glyph_name)
} else {
GlyphId::new(u32::from(char_code))
};

if let Some(outline_glyph) = self.outlines.get(glyph_id) {
if resolved_glyph_id != GlyphId::NOTDEF
&& let Some(outline_glyph) = self.outlines.get(resolved_glyph_id)
{
self.canvas
.draw_outline_glyph(&outline_glyph, &glyph_matrix_for_char)?;
}

self.canvas
.current_state_mut()?
.text_state
.advance_horizontal_glyph(char_code, &self.font_ref, glyph_id, self.units_per_em)?;
.advance_horizontal_glyph(
char_code,
&self.font_ref,
resolved_glyph_id,
self.units_per_em,
)?;
}
Ok(())
}
Expand Down
7 changes: 6 additions & 1 deletion crates/pdf-font/src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ mod tests {
use std::borrow::Cow;

use super::*;
use crate::{encoding::Encoding, to_unicode_cmap::ToUnicodeCMap, true_type_font::TrueTypeFont};
use crate::{
encoding::Encoding, flags::FontFlags, to_unicode_cmap::ToUnicodeCMap,
true_type_font::TrueTypeFont,
};

#[test]
fn test_truetype_encoding_fallback() {
Expand All @@ -235,6 +238,7 @@ mod tests {
encoding: Some(enc),
to_unicode: None,
standard14: None,
flags: FontFlags::empty(),
});
assert_eq!(font.char_to_unicode(65), Some('A'));
assert_eq!(&*font.chars_to_unicode(65), ['A'].as_slice());
Expand All @@ -251,6 +255,7 @@ mod tests {
encoding: None,
to_unicode: Some(cmap),
standard14: None,
flags: FontFlags::empty(),
});
assert_eq!(
&*font.chars_to_unicode(1),
Expand Down
55 changes: 42 additions & 13 deletions crates/pdf-font/src/true_type_font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ pub struct TrueTypeFont {
/// Standard 14 identity when this font is a synthetic fallback selected
/// from a Standard 14 `/BaseFont` name.
pub standard14: Option<Standard14Font>,
/// Font flags from the PDF font descriptor, if available. Used to determine
/// whether to apply the symbolic font fallback behavior for unmapped char codes.
pub flags: FontFlags,
}

impl TrueTypeFont {
Expand All @@ -42,7 +45,7 @@ impl TrueTypeFont {
dictionary: &Dictionary,
objects: &dyn ObjectResolver,
) -> Result<Self, FontError> {
let font_file = Self::read_font_file(dictionary, objects)?;
let (font_file, flags) = Self::read_font_file(dictionary, objects)?;
// Read the `/Widths` entry.
let widths = SimpleFontGlyphWidthsMap::from_dictionary(dictionary, objects)?;

Expand Down Expand Up @@ -74,21 +77,25 @@ impl TrueTypeFont {
encoding,
to_unicode,
standard14: None,
flags,
})
}

/// Creates a minimal `TrueTypeFont` from raw font bytes with no
/// widths, encoding, or ToUnicode map.
/// widths or ToUnicode map.
///
/// Used for Standard 14 fallback fonts where the bundled bytes are
/// `Cow::Borrowed` (zero-copy from `include_bytes!`).
/// `Cow::Borrowed` (zero-copy from `include_bytes!`). Those fallback fonts
/// behave like simple Type 1 fonts, so they default to StandardEncoding
/// when the PDF omitted an explicit `/Encoding`.
pub fn from_bytes(font_file: Cow<'static, [u8]>, standard14: Option<Standard14Font>) -> Self {
Self {
font_file,
widths: None,
encoding: None,
encoding: standard14.map(|_| Encoding::default()),
to_unicode: None,
standard14,
flags: FontFlags::empty(),
}
}
}
Expand All @@ -114,30 +121,52 @@ impl TrueTypeFont {
pub(crate) fn read_font_file(
dictionary: &Dictionary,
objects: &dyn ObjectResolver,
) -> Result<Cow<'static, [u8]>, ObjectError> {
) -> Result<(Cow<'static, [u8]>, FontFlags), ObjectError> {
let flags = if let Some(descriptor) = dictionary
.get("FontDescriptor")
.map(|obj| obj.try_dictionary(objects))
.transpose()?
{
let flags = descriptor
.get("Flags")
.and_then(|obj| obj.try_number::<u32>(objects).ok())
.map(FontFlags::from_bits_truncate)
.unwrap_or_default();

if let Some(stream) = descriptor
.get("FontFile2")
.map(|obj| obj.try_stream(objects))
.transpose()?
{
return Ok(Cow::Owned(stream.data()?.to_vec()));
return Ok((Cow::Owned(stream.data()?.to_vec()), flags));
}

// Read Flags to determine fallback font style
descriptor
.get("Flags")
.and_then(|obj| obj.try_number::<u32>(objects).ok())
.map(FontFlags::from_bits_truncate)
.unwrap_or_default()
flags
} else {
FontFlags::empty()
};

Ok(Standard14Font::from(flags).fallback_font_bytes())
Ok((Standard14Font::from(flags).fallback_font_bytes(), flags))
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn standard14_fallback_fonts_default_to_standard_encoding() {
let font = TrueTypeFont::from_bytes(
Standard14Font::Helvetica.fallback_font_bytes(),
Some(Standard14Font::Helvetica),
);

assert_eq!(
font.encoding
.as_ref()
.and_then(|encoding| encoding.names.get(65))
.map(std::borrow::Cow::as_ref),
Some("A"),
);
}
}
4 changes: 3 additions & 1 deletion crates/pdf-font/src/type0_font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ impl Type0Font {
// - Type2 (TrueType): Use the raw TrueType data directly.
let font_file = match subtype {
CidFontSubType::Type0 => Type1Font::read_font_file(dictionary, objects)?,
CidFontSubType::Type2 => TrueTypeFont::read_font_file(dictionary, objects)?.to_vec(),
CidFontSubType::Type2 => TrueTypeFont::read_font_file(dictionary, objects)?
.0
.to_vec(),
};

// Build reverse glyph→Unicode map from the embedded font's cmap when
Expand Down
Loading