From 72790ba64577ecd34ce6cda568664a3d6065384a Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Fri, 13 Feb 2026 00:52:26 -0800 Subject: [PATCH] Fix IndexError in scan_tag() and check_key() on empty input Calling peek(1) without checking buffer bounds causes IndexError when the buffer doesn't have enough characters available. This occurs with empty or very short input where peek(1) tries to access buffer[pointer+1] but only buffer[pointer] exists (the EOF marker '\0'). Fixed by adding bounds checking before peek() calls in: - check_key(): Check if pointer+1 < len(buffer) before peek(1) - check_value(): Check if pointer+1 < len(buffer) before peek(1) - scan_tag(): Check buffer bounds before both peek(1) and peek() calls When buffer bounds are insufficient, fall back to peek() which returns the EOF marker '\0', ensuring proper handling of end-of-stream cases. Fixes #906 --- lib/yaml/scanner.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index de925b07f..808542e6a 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -716,7 +716,10 @@ def check_key(self): # KEY(block context): '?' (' '|'\n') else: - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + if self.pointer + 1 < len(self.buffer): + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + else: + return self.peek() == '\0' def check_value(self): @@ -726,7 +729,10 @@ def check_value(self): # VALUE(block context): ':' (' '|'\n') else: - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + if self.pointer + 1 < len(self.buffer): + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + else: + return self.peek() == '\0' def check_plain(self): @@ -935,7 +941,10 @@ def scan_anchor(self, TokenClass): def scan_tag(self): # See the specification for details. start_mark = self.get_mark() - ch = self.peek(1) + if self.pointer + 1 < len(self.buffer): + ch = self.peek(1) + else: + ch = self.peek() if ch == '<': handle = None self.forward(2) @@ -965,7 +974,10 @@ def scan_tag(self): handle = '!' self.forward() suffix = self.scan_tag_uri('tag', start_mark) - ch = self.peek() + if self.pointer < len(self.buffer): + ch = self.peek() + else: + ch = '\0' if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a tag", start_mark, "expected ' ', but found %r" % ch, self.get_mark())