From 00c672f1ae1ba8585d1e2f06974be36744700b85 Mon Sep 17 00:00:00 2001 From: sachintu47 Date: Mon, 11 May 2026 02:50:26 -0400 Subject: [PATCH 1/3] fix trailing space stripping for different encodings --- src/libdio.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/libdio.c b/src/libdio.c index 214edc8..9a6266b 100644 --- a/src/libdio.c +++ b/src/libdio.c @@ -17,6 +17,7 @@ #include <_Nascii.h> #include #include +#include <_Ccsid.h> #define _OPEN_SYS_EXT #include @@ -28,6 +29,17 @@ const struct s99_rbx s99rbxtemplate = {"S99RBX",S99RBXVR,{0,1,0,0,0,0,0},0,0,0}; +static int get_space_char(int ccsid) { + if (ccsid > 0) { + __csType cs = __CcsidType(ccsid); + if (cs == _CSTYPE_ASCII || cs == _CSTYPE_UTF8 || cs == _CSTYPE_UTF16 || + cs == _CSTYPE_UTF32) { + return 0x20; // ASCII space + } + } + return 0x40; // Default EBCDIC space +} + void dbgmsg(struct DFILE* dfile, const char* format, ...) { if (!dfile->debug) { @@ -704,8 +716,8 @@ static enum DIOERR read_dataset_internal(struct DFILE* dfile) difile->cur_read_offset += sizeof(reclen); } memcpy(&dfile->buffer[difile->cur_read_offset], record, bytes_to_copy); - if (!is_binary) - isbinary = is_binary(&dfile->buffer[difile->cur_read_offset], bytes_to_copy); + if (!isbinary) + isbinary = is_binary(&dfile->buffer[difile->cur_read_offset], bytes_to_copy); #ifdef DEBUG printf("%5.5u <%*.*s>\n", reclen, reclen, reclen, record); #endif @@ -1144,6 +1156,7 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, struct f_cnvrt req = {SETCVTOFF, 0, 0}; fcntl(temp_fd, F_CONTROL_CVT, &req); + int space_char = get_space_char(dfile->ccsid); int length_prefix = has_length_prefix(dfile->recfm); int i = 0; char *data = dfile->buffer; @@ -1152,7 +1165,13 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, while (i < dfile->bufflen) { memcpy(&reclen, &data[i], sizeof(reclen)); i += sizeof(reclen); - if (write(temp_fd, &data[i], reclen) != reclen) { + int actual_len = reclen; + if (!force_binary) { + while (actual_len > 0 && data[i + actual_len - 1] == space_char) { + actual_len--; + } + } + if (write(temp_fd, &data[i], actual_len) != actual_len) { close_dataset(dfile); close(temp_fd); return 1; @@ -1167,7 +1186,13 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, } } else { while (i < dfile->bufflen) { - if (write(temp_fd, &data[i], dfile->reclen) != dfile->reclen) { + int actual_len = dfile->reclen; + if (!force_binary) { + while (actual_len > 0 && data[i + actual_len - 1] == space_char) { + actual_len--; + } + } + if (write(temp_fd, &data[i], actual_len) != actual_len) { close_dataset(dfile); close(temp_fd); return 1; From ec245f2ba0e5b9df825cd3dee7f594070c2246ee Mon Sep 17 00:00:00 2001 From: sachintu47 Date: Mon, 11 May 2026 03:28:06 -0400 Subject: [PATCH 2/3] disable stripping for multi-byte encodings and restrict binary scan to data bytes. --- src/libdio.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/libdio.c b/src/libdio.c index 9a6266b..2bfe2d7 100644 --- a/src/libdio.c +++ b/src/libdio.c @@ -32,12 +32,14 @@ const struct s99_rbx s99rbxtemplate = {"S99RBX",S99RBXVR,{0,1,0,0,0,0,0},0,0,0}; static int get_space_char(int ccsid) { if (ccsid > 0) { __csType cs = __CcsidType(ccsid); - if (cs == _CSTYPE_ASCII || cs == _CSTYPE_UTF8 || cs == _CSTYPE_UTF16 || - cs == _CSTYPE_UTF32) { + if (cs == _CSTYPE_ASCII || cs == _CSTYPE_UTF8) { return 0x20; // ASCII space } + if (cs == _CSTYPE_EBCDIC) { + return 0x40; // EBCDIC space + } } - return 0x40; // Default EBCDIC space + return -1; // Default to disable trimming for unknown/multi-byte encodings } void dbgmsg(struct DFILE* dfile, const char* format, ...) @@ -715,9 +717,9 @@ static enum DIOERR read_dataset_internal(struct DFILE* dfile) memcpy(&dfile->buffer[difile->cur_read_offset], &reclen, sizeof(reclen)); difile->cur_read_offset += sizeof(reclen); } - memcpy(&dfile->buffer[difile->cur_read_offset], record, bytes_to_copy); + memcpy(&dfile->buffer[difile->cur_read_offset], record, rc); if (!isbinary) - isbinary = is_binary(&dfile->buffer[difile->cur_read_offset], bytes_to_copy); + isbinary = is_binary(&dfile->buffer[difile->cur_read_offset], rc); #ifdef DEBUG printf("%5.5u <%*.*s>\n", reclen, reclen, reclen, record); #endif @@ -1163,10 +1165,16 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, if (length_prefix) { uint16_t reclen; while (i < dfile->bufflen) { + if (i + sizeof(reclen) > dfile->bufflen) { + break; // Corrupt buffer + } memcpy(&reclen, &data[i], sizeof(reclen)); i += sizeof(reclen); int actual_len = reclen; - if (!force_binary) { + if (i + actual_len > dfile->bufflen) { + actual_len = dfile->bufflen - i; // Limit to remaining buffer + } + if (!force_binary && space_char != -1) { while (actual_len > 0 && data[i + actual_len - 1] == space_char) { actual_len--; } @@ -1187,7 +1195,7 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, } else { while (i < dfile->bufflen) { int actual_len = dfile->reclen; - if (!force_binary) { + if (!force_binary && space_char != -1) { while (actual_len > 0 && data[i + actual_len - 1] == space_char) { actual_len--; } From 588f405de5d630af6288aeeda6413bd2cf87a92f Mon Sep 17 00:00:00 2001 From: sachintu47 Date: Mon, 11 May 2026 06:43:35 -0400 Subject: [PATCH 3/3] augment review fixes --- src/libdio.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/libdio.c b/src/libdio.c index 2bfe2d7..c7f17ca 100644 --- a/src/libdio.c +++ b/src/libdio.c @@ -30,6 +30,9 @@ const struct s99_rbx s99rbxtemplate = {"S99RBX",S99RBXVR,{0,1,0,0,0,0,0},0,0,0}; static int get_space_char(int ccsid) { + if (ccsid == 0) { + return 0x40; // Default to EBCDIC for untagged datasets + } if (ccsid > 0) { __csType cs = __CcsidType(ccsid); if (cs == _CSTYPE_ASCII || cs == _CSTYPE_UTF8) { @@ -1166,13 +1169,17 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, uint16_t reclen; while (i < dfile->bufflen) { if (i + sizeof(reclen) > dfile->bufflen) { - break; // Corrupt buffer + close_dataset(dfile); + close(temp_fd); + return 1; // Corrupt buffer: header truncated } memcpy(&reclen, &data[i], sizeof(reclen)); i += sizeof(reclen); int actual_len = reclen; if (i + actual_len > dfile->bufflen) { - actual_len = dfile->bufflen - i; // Limit to remaining buffer + close_dataset(dfile); + close(temp_fd); + return 1; // Corrupt buffer: record truncated } if (!force_binary && space_char != -1) { while (actual_len > 0 && data[i + actual_len - 1] == space_char) { @@ -1195,6 +1202,9 @@ int write_dataset_to_temp_file(struct DFILE *dfile, char *tempname, } else { while (i < dfile->bufflen) { int actual_len = dfile->reclen; + if (i + actual_len > dfile->bufflen) { + actual_len = dfile->bufflen - i; // Clamp to remaining buffer + } if (!force_binary && space_char != -1) { while (actual_len > 0 && data[i + actual_len - 1] == space_char) { actual_len--;