From b6f41a8af47e790bb8517091f7f58e3a04436cd7 Mon Sep 17 00:00:00 2001
From: flan <flan@flande.re>
Date: Wed, 20 Aug 2025 23:59:52 +0200
Subject: [PATCH] Make constrainByteLength work
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Should hopefully prevent overly long non-ASCII messages from
getting eaten during send attempts.

Seems like the current implementation could have worked with
a bit more space in buf (enough to fit the next largest UTF-8
character) and comparing `written` instead of `read` as `read`
is in UTF-16 code units instead of bytes, but it still has
weird behavior when the caret is not at the end of the string
(on regular input it's forced to the end, if you paste
something the existing text at the end gets cut off if the
entire string is too long).

The behavior of the built-in `maxlength` attribute is not very
consistent across browsers: if the user attempts to replace
currently selected text and not even one character from the
replacement string fits, Firefox preserves the selection while
Chromium discards it instead. This implementation discards the
selection.

Shortcoming: hitting the length limit breaks undo (does nothing).
(This is a problem with the current implementation as well, it's
just a bit more hidden as ASCII inputs get properly constrained
via HTML `maxlength`.)

Test code:

// "|" is the caret
const tests = [
  // below the byte limit, unchanged
  "🐱|",       "🐱",
  "あい|",     "あい",
  "abc🐱|",    "abc🐱",
  "abcdあ|",   "abcdあ",
  "abcdefg|",  "abcdefg",
  // above the byte limit, truncated
  "abcdefgh|", "abcdefg",
  "あabcde|",  "あabcd",
  "abcdeあ|",  "abcde",
  "abcd🐱|",   "abcd",
  "あいう|",   "あい",
  "🐱🦈|",     "🐱",
  // above the byte limit, caret in the middle of the string
  "abcd|efgh", "abcefgh",
  "あb|cdef",  "あcdef",
  "abc|deあ",  "abdeあ",
  "abc|d🐱",   "abd🐱",
  "あい|う",   "あう",
  "🐱|🦈",     "🦈",
];
const cbl = constrainByteLength(7);
for (let i = 0; i < tests.length; i += 2) {
  const sel = tests[i].indexOf('|');
  console.assert(sel >= 0, `no caret in ${tests[i]}`);
  const inVal = tests[i].substring(0, sel) + tests[i].substring(sel+1);
  const event = {target: {value: inVal, selectionStart: sel, selectionEnd: sel}};
  cbl(event);
  const actual = event.target.value, expected = tests[i+1];
  console.assert(expected === actual, `expected ${expected}, got ${actual}`);
}
---
 chat.js | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/chat.js b/chat.js
index df281905..5a3834ea 100644
--- a/chat.js
+++ b/chat.js
@@ -479,17 +479,26 @@ function chatInputActionFired() {
 
 /** @param {number} length */
 function constrainByteLength(length) {
-  const buf = new Uint8Array(length + 1);
+  const buf = new Uint8Array(length);
   const enc = new TextEncoder();
-  const dec = new TextDecoder(undefined, { fatal: false });
   return event => {
     const target = event?.target;
     if (!target) return;
-    if (enc.encodeInto(target.value, buf).read <= length) return;
-    let recovered = dec.decode(buf.slice(0, length));
-    const invalid = recovered.indexOf('�');
-    if (invalid > -1) recovered = recovered.slice(0, invalid);
-    target.value = recovered;
+    // modifying input contents during composition would interrupt it
+    // and prevent the user from finishing
+    if (event.isComposing) return;
+
+    const sel = target.selectionStart;
+    const v = target.value;
+
+    // length is implicitly constrained by encodeInto
+    // encode the substring after the caret first so it doesn't get
+    // overwritten if the caret is in the middle of the string
+    const e2 = enc.encodeInto(v.substring(sel), buf);
+    // then do the one before the caret
+    const e1 = enc.encodeInto(v.substring(0, sel), buf.subarray(e2.written));
+    target.value = v.substring(0, e1.read) + v.substring(sel, sel+e2.read);
+    target.selectionEnd = e1.read;
   };
 }