From 8f59efa0120ecab259d8d0ebf9384326a39ec630 Mon Sep 17 00:00:00 2001 From: jpco Date: Wed, 14 Aug 2024 20:09:48 -0700 Subject: [PATCH 1/4] read in chunks for lseek()able files. --- configure.ac | 4 ++-- prim-io.c | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 617decb9..8979c71a 100644 --- a/configure.ac +++ b/configure.ac @@ -94,8 +94,8 @@ AC_PROG_GCC_TRADITIONAL AC_FUNC_MMAP AC_FUNC_WAIT3 -AC_CHECK_FUNCS(strerror strtol lstat setrlimit sigrelse sighold sigaction \ -sysconf setsid sigsetjmp) +AC_CHECK_FUNCS(strerror strtol lseek lstat setrlimit sigrelse sighold \ +sigaction sysconf setsid sigsetjmp) AC_CACHE_CHECK(for an abused getenv, es_cv_abused_getenv, AC_RUN_IFELSE([AC_LANG_SOURCE([[ diff --git a/prim-io.c b/prim-io.c index d2061d26..7cd7e4f4 100644 --- a/prim-io.c +++ b/prim-io.c @@ -405,8 +405,29 @@ PRIM(read) { freebuffer(buffer); buffer = openbuffer(0); - while ((c = read1(fd)) != EOF && c != '\n') - buffer = bufputc(buffer, c); +#if HAVE_LSEEK + if (lseek(fd, 0, SEEK_CUR) < 0) { +#endif + while ((c = read1(fd)) != EOF && c != '\n') + buffer = bufputc(buffer, c); +#if HAVE_LSEEK + } else { + int n; + char *p; + char s[BUFSIZE]; + c = EOF; + while ((n = eread(fd, s, BUFSIZE)) > 0) { + c = 0; + if ((p = strchr(s, '\n')) == NULL) + buffer = bufncat(buffer, s, n); + else { + buffer = bufncat(buffer, s, (p - s)); + lseek(fd, 1 + ((p - s) - n), SEEK_CUR); + break; + } + } + } +#endif if (c == EOF && buffer->current == 0) { freebuffer(buffer); From 69a38d218f9a3e64c349d7472aa8d33398c75ddd Mon Sep 17 00:00:00 2001 From: Jack Conger Date: Mon, 23 Jun 2025 22:04:37 -0700 Subject: [PATCH 2/4] Add NUL awareness for seeking %read Still incomplete; failing test case illustrates the remaining bug --- prim-io.c | 9 ++++++--- test/tests/read.es | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 test/tests/read.es diff --git a/prim-io.c b/prim-io.c index 78bfb2cc..9a79442c 100644 --- a/prim-io.c +++ b/prim-io.c @@ -450,12 +450,15 @@ PRIM(read) { c = EOF; while ((n = eread(fd, s, BUFSIZE)) > 0) { c = 0; - if ((p = strchr(s, '\n')) == NULL) - buffer = bufncat(buffer, s, n); - else { + if ((p = memchr(s, '\0', n)) != NULL) { + lseek(fd, 1 + ((p - s) - n), SEEK_CUR); + fail("$&read", "%%read: null character encountered"); + } else if ((p = strchr(s, '\n')) != NULL) { buffer = bufncat(buffer, s, (p - s)); lseek(fd, 1 + ((p - s) - n), SEEK_CUR); break; + } else { + buffer = bufncat(buffer, s, n); } } } diff --git a/test/tests/read.es b/test/tests/read.es new file mode 100644 index 00000000..e4e15983 --- /dev/null +++ b/test/tests/read.es @@ -0,0 +1,38 @@ +#!/usr/local/bin/es + +test 'null reading' { + let (tmp = `{mktemp test-nul.XXXXXX}) + unwind-protect { + echo first line > $tmp + ./testrun 0 >> $tmp + + let (fl = (); ex = (); remainder = ()) { + catch @ e { + ex = $e + remainder = <=%read + } { + fl = <=%read + %read + } < $tmp + assert {~ $fl 'first line'} 'seeking read reads valid line' + assert {~ $ex(3) *'null character encountered'*} 'seeking read throws exception correctly' + assert {~ $remainder 'sult 6'} 'seeking read leaves file in correct state:' $remainder + } + + let ((fl ex remainder) = `` \n { + let (fl = ()) + cat $tmp | catch @ e { + echo $fl\n$e(3)\n^<=%read + } { + fl = <=%read + %read + } + }) { + assert {~ $fl 'first line'} 'non-seeking read reads valid line' + assert {~ $ex *'null character encountered'*} 'non-seeking read throws exception correctly' + assert {~ $remainder 'sult 6'} 'non-seeking read leaves file in correct state' + } + } { + rm -f $tmp + } +} From 8d4345e4069c78302aa745cd014323b7664a6c02 Mon Sep 17 00:00:00 2001 From: Jack Conger Date: Wed, 11 Mar 2026 09:03:46 -0700 Subject: [PATCH 3/4] Make seeking read work with EINTR and NUL bytes --- prim-io.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/prim-io.c b/prim-io.c index 94da5fdd..eb3886ef 100644 --- a/prim-io.c +++ b/prim-io.c @@ -413,7 +413,7 @@ PRIM(newfd) { return mklist(mkstr(str("%d", newfd())), NULL); } -/* read1 -- read one byte */ +/* read1 -- read one byte, return the byte */ static int read1(int fd) { int nread; unsigned char buf; @@ -426,6 +426,18 @@ static int read1(int fd) { return nread == 0 ? EOF : buf; } +/* readn -- read up to n bytes, return the number read */ +static int readn(int fd, char *s, size_t n) { + int nread; + do { + nread = read(fd, s, n); + SIGCHK(); + } while (nread == -1 && errno == EINTR); + if (nread == -1) + fail("$&read", "%s", esstrerror(errno)); + return nread; +} + PRIM(read) { int c; int fd = fdmap(0); @@ -446,21 +458,22 @@ PRIM(read) { #if HAVE_LSEEK } else { int n; - char *p; + char *np, *zp; char s[BUFSIZE]; c = EOF; - while ((n = eread(fd, s, BUFSIZE)) > 0) { + while ((n = readn(fd, s, BUFSIZE)) > 0) { c = 0; - if ((p = memchr(s, '\0', n)) != NULL) { - lseek(fd, 1 + ((p - s) - n), SEEK_CUR); + if ((np = strchr(s, '\n')) != NULL) { + lseek(fd, 1 + ((np - s) - n), SEEK_CUR); + n = np - s; + } + if ((zp = memchr(s, '\0', n)) != NULL) { + lseek(fd, 1 + ((zp - s) - n), SEEK_CUR); fail("$&read", "%%read: null character encountered"); - } else if ((p = strchr(s, '\n')) != NULL) { - buffer = bufncat(buffer, s, (p - s)); - lseek(fd, 1 + ((p - s) - n), SEEK_CUR); - break; - } else { - buffer = bufncat(buffer, s, n); } + buffer = bufncat(buffer, s, n); + if (np != NULL && *np == '\n') + break; } } #endif From f243b9bb986f75cbef1b59ca0e2787994b8ae859 Mon Sep 17 00:00:00 2001 From: Jack Conger Date: Wed, 11 Mar 2026 09:06:37 -0700 Subject: [PATCH 4/4] Re-add lost backslash in configure.ac --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 9705f9fc..69b7b9f2 100644 --- a/configure.ac +++ b/configure.ac @@ -77,7 +77,7 @@ dnl Checks for library functions. AC_TYPE_GETGROUPS AC_FUNC_MMAP -AC_CHECK_FUNCS(strerror strtol lseek lstat setrlimit sigrelse sighold +AC_CHECK_FUNCS(strerror strtol lseek lstat setrlimit sigrelse sighold \ sigaction sysconf sigsetjmp getrusage gettimeofday mmap mprotect) AC_CACHE_CHECK(whether getenv can be redefined, es_cv_local_getenv,