From 54ea35cd65e9a425d69debd2484726d7ad1ee97e Mon Sep 17 00:00:00 2001 From: obo Date: Mon, 16 Feb 2026 22:21:28 +0800 Subject: [PATCH] feat: add support for Ideographic Variation Sequences (IVS) in TrueType fonts --- .../com/lowagie/text/pdf/FontDetails.java | 90 ++++++++++++++++--- .../com/lowagie/text/pdf/TrueTypeFont.java | 85 ++++++++++++++++++ .../lowagie/text/pdf/TrueTypeFontUnicode.java | 27 +++++- 3 files changed, 187 insertions(+), 15 deletions(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index 4e8e0d68e..921da81dd 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -243,31 +243,79 @@ byte[] convertToBytes(String text, TextRenderingOptions options) { private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingException { int len = text.length(); - int[] metrics = null; - int[] glyph = new int[len]; + int[] glyph = new int[len * 2]; int i = 0; - for (int k = 0; k < len; ++k) { - int val; + int k = 0; + + while (k < len) { + int baseCp; + int charCount; + if (Utilities.isSurrogatePair(text, k)) { - val = Utilities.convertToUtf32(text, k); - k++; + baseCp = Utilities.convertToUtf32(text, k); + charCount = 2; } else { - val = text.charAt(k); + baseCp = text.charAt(k); + charCount = 1; } - metrics = ttu.getMetricsTT(val); - if (metrics == null) { + + // try to process IVS + IVSResult ivsResult = tryProcessIVS(text, k + charCount, baseCp); + if (ivsResult.found) { + glyph[i++] = ivsResult.glyphCode; + k += charCount + ivsResult.vsCharCount; continue; } - int m0 = metrics[0]; - Integer gl = m0; - if (!longTag.containsKey(gl)) { - longTag.put(gl, new int[]{m0, metrics[1], val}); + // common glyph searching + int[] metrics = ttu.getMetricsTT(baseCp); + if (metrics != null) { + int m0 = metrics[0]; + longTag.computeIfAbsent(m0, key -> new int[]{m0, metrics[1], baseCp}); + glyph[i++] = m0; } - glyph[i++] = m0; + + k += charCount; } + return getCJKEncodingBytes(glyph, i); } + private IVSResult tryProcessIVS(String text, int vsStartIndex, int baseCp) { + if (vsStartIndex >= text.length()) { + return IVSResult.NOT_FOUND; + } + + int vsCp; + int vsCharCount; + + if (Utilities.isSurrogatePair(text, vsStartIndex)) { + vsCp = Utilities.convertToUtf32(text, vsStartIndex); + vsCharCount = 2; + } else { + vsCp = text.charAt(vsStartIndex); + vsCharCount = 1; + } + + if (!isVariationSelector(vsCp)) { + return IVSResult.NOT_FOUND; + } + + int[] format14Metrics = ttu.getFormat14MetricsTT(baseCp, vsCp); + if (format14Metrics == null) { + return IVSResult.NOT_FOUND; + } + + int glyphCode = format14Metrics[0]; + Integer gl = glyphCode; + longTag.computeIfAbsent(gl, k -> new int[]{glyphCode, format14Metrics[1], baseCp, vsCp}); + return new IVSResult(true, glyphCode, vsCharCount); + } + + private static boolean isVariationSelector(int codePoint) { + return (codePoint >= 0xFE00 && codePoint <= 0xFE0F) || + (codePoint >= 0xE0100 && codePoint <= 0xE01EF); + } + private byte[] getCJKEncodingBytes(int[] glyph, int size) { byte[] result = new byte[size * 2]; for (int i = 0; i < size; i++) { @@ -380,4 +428,18 @@ public boolean isSubset() { public void setSubset(boolean subset) { this.subset = subset; } + + private static class IVSResult { + static final IVSResult NOT_FOUND = new IVSResult(false, 0, 0); + + final boolean found; + final int glyphCode; + final int vsCharCount; + + IVSResult(boolean found, int glyphCode, int vsCharCount) { + this.found = found; + this.glyphCode = glyphCode; + this.vsCharCount = vsCharCount; + } + } } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index 64100b556..3b05ee7c1 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -206,6 +206,8 @@ class TrueTypeFont extends BaseFont { protected HashMap cmapExt; + protected HashMap cmap05; + /** * The map containing the kerning information. It represents the content of table 'kern'. The key is an * Integer where the top 16 bits are the glyph number for the first character and the lower 16 bits @@ -797,6 +799,7 @@ void readCMaps() throws DocumentException, IOException { int map31 = 0; int map30 = 0; int mapExt = 0; + int map05 = 0; for (int k = 0; k < num_tables; ++k) { int platId = rf.readUnsignedShort(); int platSpecId = rf.readUnsignedShort(); @@ -808,6 +811,8 @@ void readCMaps() throws DocumentException, IOException { map31 = offset; } else if (platId == 3 && platSpecId == 10) { mapExt = offset; + } else if (platId == 0 && platSpecId == 5) { + map05 = offset; } if (platId == 1 && platSpecId == 0) { map10 = offset; @@ -860,6 +865,83 @@ void readCMaps() throws DocumentException, IOException { break; } } + if (map05 > 0) { + int format14Location = table_location[0] + map05; + this.rf.seek((long) format14Location); + int format = this.rf.readUnsignedShort(); + if (format == 14) { + this.cmap05 = this.readFormat14(format14Location); + } + } + } + + HashMap readFormat14(int format14Location) throws IOException { + HashMap result = new HashMap<>(); + this.rf.getFilePointer(); //startPosition unused + + this.rf.readInt(); // byteLength unused + int numVarSelectorRecords = this.rf.readInt(); + + if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) { + throw new IOException("Invalid numVarSelectorRecords: " + numVarSelectorRecords); + } + + Map nonDefaultOffsetMap = new HashMap<>(); + + for (int i = 0; i < numVarSelectorRecords; ++i) { + byte[] input = new byte[3]; + this.rf.read(input); + int selectorUnicodeValue = this.byte2int(input, 3); + this.rf.readInt(); // defaultUVSOffset unused + int nonDefaultUVSOffset = this.rf.readInt(); + + if (nonDefaultUVSOffset > 0) { + nonDefaultOffsetMap.put(selectorUnicodeValue, nonDefaultUVSOffset); + } + } + + for (Map.Entry entry : nonDefaultOffsetMap.entrySet()) { + Integer selectorUnicodeValue = entry.getKey(); + int nonDefaultUVSOffset = entry.getValue(); + + this.rf.seek((long) (format14Location + nonDefaultUVSOffset)); + int mappingNums = this.rf.readInt(); + + if (mappingNums < 0 || mappingNums > 10000) { + continue; + } + + for (int i = 0; i < mappingNums; ++i) { + byte[] input = new byte[3]; + this.rf.read(input); + int unicodeValue = this.byte2int(input, 3); + int glyphId = this.rf.readUnsignedShort(); + result.put(unicodeValue + "_" + selectorUnicodeValue, + new int[]{glyphId, this.getGlyphWidth(glyphId)}); + } + } + return result; + } + + /** + * convert(Big-Endian)byte Array to unsigned int + */ + public int byte2int(byte[] data, int n) { + if (data == null || n <= 0 || n > 4 || data.length < n) { + return 0; + } + int result = 0; + for (int i = 0; i < n; i++) { + result = (result << 8) | (data[i] & 0xFF); // & 0xFF 确保无符号 + } + return result; + } + + public int[] getFormat14MetricsTT(int char1, int char2) { + if (this.cmap05 != null) { + return this.cmap05.get(char1 + "_" + char2); + } + return null; } HashMap readFormat12() throws IOException { @@ -1419,6 +1501,9 @@ public int[] getMetricsTT(int c) { if (cmap10 != null) { return cmap10.get(c); } + if (cmap05 != null) { + return cmap05.get(c); + } return null; } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java index 125133939..834527a0f 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java @@ -268,7 +268,9 @@ private PdfStream getToUnicode(int[][] metrics) { --size; int[] metric = metrics[k]; String fromTo = toHex(metric[0]); - buf.append(fromTo).append(fromTo).append(toHex(metric[2])).append('\n'); + String hexString; + hexString = metric.length == 4 ? toHex(metric[2], metric[3]) : toHex(metric[2]); + buf.append(fromTo).append(fromTo).append(hexString).append('\n'); } buf.append( "endbfrange\n" + @@ -585,4 +587,27 @@ public int[] getCharBBox(int c) { return bboxes[m[0]]; } + private String toHex(int char1, int char2) { + String hex1; + int high; + int low; + if (char1 < 65536) { + hex1 = toHex4(char1); + } else { + char1 -= 65536; + high = char1 / 1024 + '\ud800'; + low = char1 % 1024 + '\udc00'; + hex1 = toHex4(high) + toHex4(low); + } + String hex2; + if (char2 < 65536) { + hex2 = toHex4(char2); + } else { + char2 -= 65536; + high = char2 / 1024 + '\ud800'; + low = char2 % 1024 + '\udc00'; + hex2 = toHex4(high) + toHex4(low); + } + return "[<" + hex1 + hex2 + ">]"; + } }