Skip to content

Commit 72cad14

Browse files
marcelmblurb-it[bot]erlend-aaslandvstinneremmatyping
authored
gh-90533: Implement BytesIO.peek() (#150917)
Add io.BytesIO.peek() method to read without advancing position. Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: Emma Smith <emma@emmatyping.dev> Co-authored-by: Stan Ulbrych <stan@python.org> Co-authored-by: Cody Maloney <cmaloney@users.noreply.github.com>
1 parent 3ad66bf commit 72cad14

8 files changed

Lines changed: 198 additions & 4 deletions

File tree

Doc/library/io.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,17 @@ than raw I/O does.
771771

772772
Return :class:`bytes` containing the entire contents of the buffer.
773773

774+
.. method:: peek(size=0, /)
775+
776+
Return a copy of the buffer from the current position onwards without
777+
advancing the position.
778+
779+
If *size* is less than one or omitted, at most
780+
:data:`DEFAULT_BUFFER_SIZE` bytes are returned.
781+
Otherwise, at most *size* bytes are returned.
782+
Return an empty :class:`bytes` object at EOF.
783+
784+
.. versionadded:: next
774785

775786
.. method:: read1(size=-1, /)
776787

Doc/whatsnew/3.16.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ gzip
163163
which is passed on to the constructor of the :class:`~gzip.GzipFile` class.
164164
(Contributed by Marin Misur in :gh:`91372`.)
165165

166+
io
167+
--
168+
169+
* Add :meth:`io.BytesIO.peek` method to read without advancing position.
170+
(Contributed by Marcel Martin in :gh:`90533`.)
171+
166172

167173
logging
168174
-------

Lib/_pyio.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,13 @@ def tell(self):
10001000
raise ValueError("tell on closed file")
10011001
return self._pos
10021002

1003+
def peek(self, size=0):
1004+
if self.closed:
1005+
raise ValueError("peek on closed file")
1006+
if size < 1:
1007+
return self._buffer[self._pos:self._pos + io.DEFAULT_BUFFER_SIZE]
1008+
return self._buffer[self._pos:self._pos + size]
1009+
10031010
def truncate(self, pos=None):
10041011
if self.closed:
10051012
raise ValueError("truncate on closed file")

Lib/test/test_free_threading/test_io.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ def readinto(barrier, b, into, *ignore):
6767
barrier.wait()
6868
b.readinto(into)
6969

70+
def peek(barrier, b, *ignore):
71+
barrier.wait()
72+
b.peek()
73+
7074
def close(barrier, b, *ignore):
7175
barrier.wait()
7276
b.close()
@@ -103,6 +107,7 @@ def sizeof(barrier, b, *ignore):
103107
self.check([truncate] + [readline] * 10, self.ioclass(b'0\n'*20480))
104108
self.check([truncate] + [readlines] * 10, self.ioclass(b'0\n'*20480))
105109
self.check([truncate] + [readinto] * 10, self.ioclass(b'0\n'*204800), bytearray(b'0\n'*204800))
110+
self.check([truncate] + [peek] * 10, self.ioclass(b'0\n'*204800))
106111
self.check([close] + [write] * 10, self.ioclass())
107112
self.check([truncate] + [getvalue] * 10, self.ioclass(b'0\n'*204800))
108113
self.check([truncate] + [getbuffer] * 10, self.ioclass(b'0\n'*204800))

Lib/test/test_io/test_memoryio.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,77 @@ def test_issue141311(self):
566566
buf = bytearray(2)
567567
self.assertEqual(0, memio.readinto(buf))
568568

569+
def test_peek(self):
570+
buf = self.buftype("1234567890")
571+
with self.ioclass(buf) as memio:
572+
self.assertEqual(memio.tell(), 0)
573+
self.assertEqual(memio.peek(1), buf[:1])
574+
self.assertEqual(memio.peek(1), buf[:1])
575+
self.assertEqual(memio.peek(), buf)
576+
self.assertEqual(memio.peek(3), buf[:3])
577+
self.assertEqual(memio.peek(5), buf[:5])
578+
self.assertEqual(memio.peek(0), buf)
579+
self.assertEqual(memio.peek(len(buf) + 100), buf)
580+
self.assertEqual(memio.peek(-1), buf)
581+
self.assertEqual(memio.tell(), 0)
582+
583+
memio.read(1)
584+
self.assertEqual(memio.tell(), 1)
585+
self.assertEqual(memio.peek(1), buf[1:2])
586+
self.assertEqual(memio.peek(), buf[1:])
587+
self.assertEqual(memio.peek(3), buf[1:4])
588+
self.assertEqual(memio.peek(5), buf[1:6])
589+
self.assertEqual(memio.peek(0), buf[1:])
590+
self.assertEqual(memio.peek(len(buf) + 100), buf[1:])
591+
self.assertEqual(memio.peek(-1), buf[1:])
592+
self.assertEqual(memio.tell(), 1)
593+
594+
memio.read()
595+
self.assertEqual(memio.tell(), len(buf))
596+
self.assertEqual(memio.peek(1), self.EOF)
597+
self.assertEqual(memio.peek(3), self.EOF)
598+
self.assertEqual(memio.peek(5), self.EOF)
599+
self.assertEqual(memio.peek(0), b"")
600+
self.assertEqual(memio.tell(), len(buf))
601+
602+
# Peeking works after writing
603+
abc = self.buftype("abc")
604+
memio.write(abc)
605+
self.assertEqual(memio.peek(), self.EOF)
606+
memio.seek(len(buf))
607+
self.assertEqual(memio.peek(), abc)
608+
self.assertEqual(memio.peek(-1), abc)
609+
self.assertEqual(memio.peek(len(abc) + 100), abc)
610+
self.assertEqual(memio.tell(), len(buf))
611+
612+
with self.ioclass(buf) as memio:
613+
memio.seek(len(buf))
614+
self.assertEqual(memio.peek(), self.EOF)
615+
616+
# Length greater than DEFAULT_BUFFER_SIZE
617+
buf = self.buftype("1234567890" * io.DEFAULT_BUFFER_SIZE)
618+
with self.ioclass(buf) as memio:
619+
self.assertEqual(memio.peek(), buf[:io.DEFAULT_BUFFER_SIZE])
620+
self.assertEqual(memio.peek(0), buf[:io.DEFAULT_BUFFER_SIZE])
621+
self.assertEqual(memio.peek(-1), buf[:io.DEFAULT_BUFFER_SIZE])
622+
self.assertEqual(memio.peek(io.DEFAULT_BUFFER_SIZE + 100),
623+
buf[:io.DEFAULT_BUFFER_SIZE + 100])
624+
self.assertEqual(memio.peek(io.DEFAULT_BUFFER_SIZE * 100), buf)
625+
626+
# Current position beyond buffer end
627+
with self.ioclass(buf) as memio:
628+
memio.seek(len(buf) + 100)
629+
self.assertEqual(memio.peek(), self.EOF)
630+
with self.ioclass(buf) as memio:
631+
memio.read()
632+
memio.truncate(0)
633+
self.assertEqual(memio.tell(), len(buf))
634+
self.assertEqual(memio.peek(), self.EOF)
635+
636+
637+
# Peek after close raises
638+
self.assertRaises(ValueError, memio.peek)
639+
569640
def test_unicode(self):
570641
memio = self.ioclass()
571642

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add :meth:`io.BytesIO.peek` method to read without advancing position.

Modules/_io/bytesio.c

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,9 @@ _io_BytesIO_tell_impl(bytesio *self)
420420
return PyLong_FromSsize_t(self->pos);
421421
}
422422

423+
/* Read without advancing position. */
423424
static PyObject *
424-
read_bytes_lock_held(bytesio *self, Py_ssize_t size)
425+
peek_bytes_lock_held(bytesio *self, Py_ssize_t size)
425426
{
426427
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
427428

@@ -432,7 +433,6 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size)
432433
if (size > 1 &&
433434
self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
434435
FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0) {
435-
self->pos += size;
436436
return Py_NewRef(self->buf);
437437
}
438438

@@ -444,10 +444,20 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size)
444444
}
445445

446446
output = PyBytes_AS_STRING(self->buf) + self->pos;
447-
self->pos += size;
448447
return PyBytes_FromStringAndSize(output, size);
449448
}
450449

450+
static PyObject *
451+
read_bytes_lock_held(bytesio *self, Py_ssize_t size)
452+
{
453+
PyObject *bytes = peek_bytes_lock_held(self, size);
454+
if (bytes != NULL) {
455+
assert(PyBytes_GET_SIZE(bytes) == size);
456+
self->pos += size;
457+
}
458+
return bytes;
459+
}
460+
451461
/*[clinic input]
452462
@critical_section
453463
_io.BytesIO.read
@@ -499,6 +509,41 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
499509
return _io_BytesIO_read_impl(self, size);
500510
}
501511

512+
513+
/*[clinic input]
514+
@critical_section
515+
_io.BytesIO.peek
516+
size: Py_ssize_t = 0
517+
/
518+
519+
Return bytes from the stream without advancing the position.
520+
521+
Return an empty bytes object at EOF.
522+
[clinic start generated code]*/
523+
524+
static PyObject *
525+
_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size)
526+
/*[clinic end generated code: output=fa4d8ce28b35db9b input=2ce74234b10aec3e]*/
527+
{
528+
CHECK_CLOSED(self);
529+
530+
if (size < 1) {
531+
size = DEFAULT_BUFFER_SIZE;
532+
}
533+
534+
/* adjust invalid sizes */
535+
Py_ssize_t n = self->string_size - self->pos;
536+
if (size > n) {
537+
size = n;
538+
/* n can be negative after truncate() or seek() */
539+
if (size < 0) {
540+
size = 0;
541+
}
542+
}
543+
return peek_bytes_lock_held(self, size);
544+
}
545+
546+
502547
/*[clinic input]
503548
@critical_section
504549
_io.BytesIO.readline
@@ -1135,6 +1180,7 @@ static struct PyMethodDef bytesio_methods[] = {
11351180
_IO_BYTESIO_READLINE_METHODDEF
11361181
_IO_BYTESIO_READLINES_METHODDEF
11371182
_IO_BYTESIO_READ_METHODDEF
1183+
_IO_BYTESIO_PEEK_METHODDEF
11381184
_IO_BYTESIO_GETBUFFER_METHODDEF
11391185
_IO_BYTESIO_GETVALUE_METHODDEF
11401186
_IO_BYTESIO_SEEK_METHODDEF

Modules/_io/clinic/bytesio.c.h

Lines changed: 48 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)