From b6f41a8af47e790bb8517091f7f58e3a04436cd7 Mon Sep 17 00:00:00 2001 From: flan Date: Wed, 20 Aug 2025 23:59:52 +0200 Subject: [PATCH] Make constrainByteLength work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should hopefully prevent overly long non-ASCII messages from getting eaten during send attempts. Seems like the current implementation could have worked with a bit more space in buf (enough to fit the next largest UTF-8 character) and comparing `written` instead of `read` as `read` is in UTF-16 code units instead of bytes, but it still has weird behavior when the caret is not at the end of the string (on regular input it's forced to the end, if you paste something the existing text at the end gets cut off if the entire string is too long). The behavior of the built-in `maxlength` attribute is not very consistent across browsers: if the user attempts to replace currently selected text and not even one character from the replacement string fits, Firefox preserves the selection while Chromium discards it instead. This implementation discards the selection. Shortcoming: hitting the length limit breaks undo (does nothing). (This is a problem with the current implementation as well, it's just a bit more hidden as ASCII inputs get properly constrained via HTML `maxlength`.) Test code: // "|" is the caret const tests = [ // below the byte limit, unchanged "🐱|", "🐱", "あい|", "あい", "abc🐱|", "abc🐱", "abcdあ|", "abcdあ", "abcdefg|", "abcdefg", // above the byte limit, truncated "abcdefgh|", "abcdefg", "あabcde|", "あabcd", "abcdeあ|", "abcde", "abcd🐱|", "abcd", "あいう|", "あい", "🐱🦈|", "🐱", // above the byte limit, caret in the middle of the string "abcd|efgh", "abcefgh", "あb|cdef", "あcdef", "abc|deあ", "abdeあ", "abc|d🐱", "abd🐱", "あい|う", "あう", "🐱|🦈", "🦈", ]; const cbl = constrainByteLength(7); for (let i = 0; i < tests.length; i += 2) { const sel = tests[i].indexOf('|'); console.assert(sel >= 0, `no caret in ${tests[i]}`); const inVal = tests[i].substring(0, sel) + tests[i].substring(sel+1); const event = {target: {value: inVal, selectionStart: sel, selectionEnd: sel}}; cbl(event); const actual = event.target.value, expected = tests[i+1]; console.assert(expected === actual, `expected ${expected}, got ${actual}`); } --- chat.js | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/chat.js b/chat.js index df281905..5a3834ea 100644 --- a/chat.js +++ b/chat.js @@ -479,17 +479,26 @@ function chatInputActionFired() { /** @param {number} length */ function constrainByteLength(length) { - const buf = new Uint8Array(length + 1); + const buf = new Uint8Array(length); const enc = new TextEncoder(); - const dec = new TextDecoder(undefined, { fatal: false }); return event => { const target = event?.target; if (!target) return; - if (enc.encodeInto(target.value, buf).read <= length) return; - let recovered = dec.decode(buf.slice(0, length)); - const invalid = recovered.indexOf('�'); - if (invalid > -1) recovered = recovered.slice(0, invalid); - target.value = recovered; + // modifying input contents during composition would interrupt it + // and prevent the user from finishing + if (event.isComposing) return; + + const sel = target.selectionStart; + const v = target.value; + + // length is implicitly constrained by encodeInto + // encode the substring after the caret first so it doesn't get + // overwritten if the caret is in the middle of the string + const e2 = enc.encodeInto(v.substring(sel), buf); + // then do the one before the caret + const e1 = enc.encodeInto(v.substring(0, sel), buf.subarray(e2.written)); + target.value = v.substring(0, e1.read) + v.substring(sel, sel+e2.read); + target.selectionEnd = e1.read; }; }