From d396e94756537891ae6ee6cc44fc0c2fb231283e Mon Sep 17 00:00:00 2001 From: Sindre Sorhus Date: Tue, 23 Dec 2025 14:31:36 +0100 Subject: [PATCH] Fix width calculation for minimally-qualified emoji sequences Fixes #67 Simplify emoji cluster detection --- index.js | 28 ++++++++++++++++++++++++++-- test.js | 20 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 1140bf6..87d2c94 100644 --- a/index.js +++ b/index.js @@ -7,7 +7,8 @@ Logic: - Width rules: 1. Skip non-printing clusters (Default_Ignorable, Control, pure Mark, lone Surrogates). Tabs are ignored by design. 2. RGI emoji clusters (\p{RGI_Emoji}) are double-width. - 3. Otherwise use East Asian Width of the cluster’s first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). + 3. Minimally-qualified/unqualified emoji clusters (ZWJ sequences with 2+ Extended_Pictographic, or keycap sequences) are double-width. + 4. Otherwise use East Asian Width of the cluster's first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). */ const segmenter = new Intl.Segmenter(); @@ -21,6 +22,29 @@ const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p // RGI emoji sequences const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; +// Detect minimally-qualified/unqualified emoji sequences (missing VS16 but still render as double-width) +const unqualifiedKeycapRegex = /^[\d#*]\u20E3$/; +const extendedPictographicRegex = /\p{Extended_Pictographic}/gu; + +function isDoubleWidthNonRgiEmojiSequence(segment) { + // Real emoji clusters are < 30 chars; guard against pathological input + if (segment.length > 50) { + return false; + } + + if (unqualifiedKeycapRegex.test(segment)) { + return true; + } + + // ZWJ sequences with 2+ Extended_Pictographic + if (segment.includes('\u200D')) { + const pictographics = segment.match(extendedPictographicRegex); + return pictographics !== null && pictographics.length >= 2; + } + + return false; +} + function baseVisible(segment) { return segment.replace(leadingNonPrintingRegex, ''); } @@ -72,7 +96,7 @@ export default function stringWidth(input, options = {}) { } // Emoji width logic - if (rgiEmojiRegex.test(segment)) { + if (rgiEmojiRegex.test(segment) || isDoubleWidthNonRgiEmojiSequence(segment)) { width += 2; continue; } diff --git a/test.js b/test.js index e503232..8ac4da9 100644 --- a/test.js +++ b/test.js @@ -254,3 +254,23 @@ test('hash as plain text', macro, '#', 1); test('Arabic number sign U+0600', macro, '\u0600', 0); test('Arabic end of ayah U+06DD', macro, '\u06DD', 0); test('Syriac abbreviation mark U+070F', macro, '\u070F', 0); + +// Minimally-qualified/unqualified emoji sequences +// These are emoji sequences missing VS16 but should still be width 2 +test('heart on fire (MQ)', macro, '\u2764\u200D\u{1F525}', 2); // ❀‍πŸ”₯ +test('rainbow flag (MQ)', macro, '\u{1F3F3}\u200D\u{1F308}', 2); // πŸ³β€πŸŒˆ +test('transgender flag (MQ)', macro, '\u{1F3F3}\u200D\u26A7', 2); // πŸ³β€βš§ +test('broken chain (MQ)', macro, '\u26D3\u200D\u{1F4A5}', 2); // ⛓‍πŸ’₯ +test('eye in speech bubble (MQ)', macro, '\u{1F441}\u200D\u{1F5E8}', 2); // πŸ‘β€πŸ—¨ +test('man bouncing ball (MQ)', macro, '\u26F9\u200D\u2642', 2); // ⛹‍♂ +test('woman bouncing ball (MQ)', macro, '\u26F9\u200D\u2640', 2); // ⛹‍♀ +test('man detective (MQ)', macro, '\u{1F575}\u200D\u2642', 2); // πŸ•΅β€β™‚ +test('woman detective (MQ)', macro, '\u{1F575}\u200D\u2640', 2); // πŸ•΅β€β™€ + +// Unqualified keycap sequences (missing VS16) +test('keycap # (UQ)', macro, '#\u20E3', 2); // #⃣ +test('keycap 0 (UQ)', macro, '0\u20E3', 2); // 0⃣ +test('keycap * (UQ)', macro, '*\u20E3', 2); // *⃣ + +// Ensure invalid keycap sequences don't match +test('phone + keycap (invalid)', macro, '\u260E\uFE0F\u20E3', 1); // Not a valid keycap base