From 2b1fc02982aecd0fce2dc884e54fa83f30b5b6d9 Mon Sep 17 00:00:00 2001 From: sabi789 Date: Tue, 12 May 2026 11:13:07 -0400 Subject: [PATCH] Add _ENCODE_FILE_EXISTING variable Signed-off-by: sabi789 --- man/zoslib.1 | 22 ++++++++++++++ src/zos-io.cc | 18 ++++++++++++ test/test-clib-override.cc | 60 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/man/zoslib.1 b/man/zoslib.1 index b7b1b4f2..e05ca1f3 100644 --- a/man/zoslib.1 +++ b/man/zoslib.1 @@ -21,8 +21,30 @@ New files are created with encoding IBM-1047 and tagged IBM-1047. .TP .B _ENCODE_FILE_NEW=BINARY + +.TP +.B _ENCODE_FILE_NEW=UTF-8 +New files are created with encoding UTF-8 and tagged UTF-8. + + New files are created without translation and are tagged as BINARY. +.TP +.B _ENCODE_FILE_EXISTING=ISO8859-1 +(Default) Existing files are opened with encoding ISO8859-1. + +.TP +.B _ENCODE_FILE_EXISTING=IBM-1047 +Existing files are opened with encoding IBM-1047. + +.TP +.B _ENCODE_FILE_EXISTING=BINARY +Existing files are opened without translation as BINARY. + +.TP +.B _ENCODE_FILE_EXISTING=UTF-8 +Existing files are opened with encoding UTF-8. + .TP .B __UNTAGGED_READ_MODE=AUTO (default) for handling of reading untagged files or files tagged with CCSID 1047 and txtflag turned off, up to 4k of datawill be read and checked, if it is found to be in CCSID 1047, data is converted diff --git a/src/zos-io.cc b/src/zos-io.cc index 100cf206..d081c67b 100644 --- a/src/zos-io.cc +++ b/src/zos-io.cc @@ -761,6 +761,20 @@ int __tag_new_file(int fd) { return __chgfdccsid(fd, ccsid); } +int __tag_existing_file(int fd) { + char* encode_file_existing = getenv("_ENCODE_FILE_EXISTING"); + + if (!encode_file_existing) { + return 0; + } + + if (strcmp(encode_file_existing, "BINARY") == 0) { + return __setfdbinary(fd); + } + + return __chgfdcodeset(fd, encode_file_existing); +} + int __chgfdcodeset(int fd, char* codeset) { unsigned short ccsid = __toCcsid(codeset); if (!ccsid) @@ -877,6 +891,8 @@ int __open_ascii(const char *filename, int opts, ...) { } // Enable auto-conversion of untagged files else if (S_ISREG(sb.st_mode)) { + __tag_existing_file(fd); + errno = old_errno; struct file_tag *t = &sb.st_tag; if (t->ft_txtflag == 0 && (t->ft_ccsid == 0 || t->ft_ccsid == 1047) && (opts & O_RDONLY) != 0) { @@ -920,6 +936,8 @@ FILE *__fopen_ascii(const char *filename, const char *mode) { } // Enable auto-conversion of untagged files else if (S_ISREG(sb.st_mode)) { + __tag_existing_file(fd); + errno = old_errno; struct file_tag *t = &sb.st_tag; if (t->ft_txtflag == 0 && (t->ft_ccsid == 0 || t->ft_ccsid == 1047) && strcmp(mode, "r") == 0) { diff --git a/test/test-clib-override.cc b/test/test-clib-override.cc index 46148bd9..50692e36 100644 --- a/test/test-clib-override.cc +++ b/test/test-clib-override.cc @@ -179,6 +179,66 @@ TEST_F(CLIBOverrides, open) { EXPECT_EQ(__getfdccsid(fd), 0x10000 + 819); memset(buff2, 1, sizeof(buff)); read(fd, buff2, sizeof(buff)); + + // Delete and re-open temp_path _ENCODE_FILE_NEW=UTF-8 + setenv("_ENCODE_FILE_NEW", "UTF-8", 1); + remove(temp_path); + fd = open(temp_path, O_CREAT | O_WRONLY, 0777); + EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1208); + write(fd, buff, sizeof(buff)); + close(fd); + + fd = open(temp_path, O_RDONLY); + EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1208); + memset(buff2, 1, sizeof(buff)); + read(fd, buff2, sizeof(buff)); + EXPECT_EQ(strcmp(buff, buff2), 0); + + // Test _ENCODE_FILE_EXISTING with IBM-1047 + unsetenv("_ENCODE_FILE_NEW"); + remove(temp_path); + fd = open(temp_path, O_CREAT | O_WRONLY, 0777); + write(fd, buff, sizeof(buff)); + close(fd); + + setenv("_ENCODE_FILE_EXISTING", "IBM-1047", 1); + fd = open(temp_path, O_RDONLY); + EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1047); + memset(buff2, 1, sizeof(buff)); + read(fd, buff2, sizeof(buff)); + EXPECT_EQ(strcmp(buff, buff2), 0); + close(fd); + + // Test _ENCODE_FILE_EXISTING with BINARY + setenv("_ENCODE_FILE_EXISTING", "BINARY", 1); + fd = open(temp_path, O_RDONLY); + EXPECT_EQ(__getfdccsid(fd), 65535); + memset(buff2, 1, sizeof(buff)); + read(fd, buff2, sizeof(buff)); + EXPECT_EQ(strcmp(buff, buff2), 0); + close(fd); + + // Test _ENCODE_FILE_EXISTING with ISO8859-1 + setenv("_ENCODE_FILE_EXISTING", "ISO8859-1", 1); + fd = open(temp_path, O_RDONLY); + EXPECT_EQ(__getfdccsid(fd), 0x10000 + 819); + memset(buff2, 1, sizeof(buff)); + read(fd, buff2, sizeof(buff)); + EXPECT_EQ(strcmp(buff, buff2), 0); + close(fd); + + // Test _ENCODE_FILE_EXISTING with UTF-8 + setenv("_ENCODE_FILE_EXISTING", "UTF-8", 1); + fd = open(temp_path, O_RDONLY); + EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1208); + memset(buff2, 1, sizeof(buff)); + read(fd, buff2, sizeof(buff)); + EXPECT_EQ(strcmp(buff, buff2), 0); + close(fd); + + unsetenv("_ENCODE_FILE_EXISTING"); + + EXPECT_EQ(strcmp(buff, buff2), 0); free(buff2); close(fd);