Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions man/zoslib.1
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,30 @@ New files are created with encoding IBM-1047 and tagged IBM-1047.

.TP
.B _ENCODE_FILE_NEW=BINARY

.TP
.B _ENCODE_FILE_NEW=UTF-8
Copy link
Copy Markdown

@augmentcode augmentcode Bot May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

man/zoslib.1:26: The BINARY description line ("New files are created without translation and are tagged as BINARY.") now appears after the UTF-8 stanza, so it likely renders as the UTF-8 description and leaves the BINARY option undocumented.

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

New files are created with encoding UTF-8 and tagged UTF-8.


New files are created without translation and are tagged as BINARY.

.TP
.B _ENCODE_FILE_EXISTING=ISO8859-1
Copy link
Copy Markdown

@augmentcode augmentcode Bot May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

man/zoslib.1:33: Marking _ENCODE_FILE_EXISTING=ISO8859-1 as "(Default)" seems inconsistent with the implementation (when _ENCODE_FILE_EXISTING is unset, __tag_existing_file() is a no-op and existing file behavior depends on the file’s current tag/heuristics).

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

(Default) Existing files are opened with encoding ISO8859-1.

.TP
.B _ENCODE_FILE_EXISTING=IBM-1047
Existing files are opened with encoding IBM-1047.

.TP
.B _ENCODE_FILE_EXISTING=BINARY
Existing files are opened without translation as BINARY.

.TP
.B _ENCODE_FILE_EXISTING=UTF-8
Existing files are opened with encoding UTF-8.

.TP
.B __UNTAGGED_READ_MODE=AUTO
(default) for handling of reading untagged files or files tagged with CCSID 1047 and txtflag turned off, up to 4k of datawill be read and checked, if it is found to be in CCSID 1047, data is converted
Expand Down
18 changes: 18 additions & 0 deletions src/zos-io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,20 @@ int __tag_new_file(int fd) {
return __chgfdccsid(fd, ccsid);
}

int __tag_existing_file(int fd) {
char* encode_file_existing = getenv("_ENCODE_FILE_EXISTING");

if (!encode_file_existing) {
return 0;
}

if (strcmp(encode_file_existing, "BINARY") == 0) {
return __setfdbinary(fd);
}

return __chgfdcodeset(fd, encode_file_existing);
}

int __chgfdcodeset(int fd, char* codeset) {
unsigned short ccsid = __toCcsid(codeset);
if (!ccsid)
Expand Down Expand Up @@ -877,6 +891,8 @@ int __open_ascii(const char *filename, int opts, ...) {
}
// Enable auto-conversion of untagged files
else if (S_ISREG(sb.st_mode)) {
__tag_existing_file(fd);
Copy link
Copy Markdown

@augmentcode augmentcode Bot May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

src/zos-io.cc:894: After retagging the FD via _ENCODE_FILE_EXISTING, the auto-conversion decision still consults sb.st_tag from the pre-open stat(), so previously-untagged files can still trigger the heuristic and enable conversion (potentially overriding _ENCODE_FILE_EXISTING, especially BINARY).

Severity: high

Other Locations
  • src/zos-io.cc:939

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

errno = old_errno;
struct file_tag *t = &sb.st_tag;
if (t->ft_txtflag == 0 && (t->ft_ccsid == 0 || t->ft_ccsid == 1047) &&
(opts & O_RDONLY) != 0) {
Expand Down Expand Up @@ -920,6 +936,8 @@ FILE *__fopen_ascii(const char *filename, const char *mode) {
}
// Enable auto-conversion of untagged files
else if (S_ISREG(sb.st_mode)) {
__tag_existing_file(fd);
errno = old_errno;
struct file_tag *t = &sb.st_tag;
if (t->ft_txtflag == 0 && (t->ft_ccsid == 0 || t->ft_ccsid == 1047) &&
strcmp(mode, "r") == 0) {
Expand Down
60 changes: 60 additions & 0 deletions test/test-clib-override.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,66 @@ TEST_F(CLIBOverrides, open) {
EXPECT_EQ(__getfdccsid(fd), 0x10000 + 819);
memset(buff2, 1, sizeof(buff));
read(fd, buff2, sizeof(buff));

// Delete and re-open temp_path _ENCODE_FILE_NEW=UTF-8
setenv("_ENCODE_FILE_NEW", "UTF-8", 1);
Copy link
Copy Markdown

@augmentcode augmentcode Bot May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test/test-clib-override.cc:184: The fd from the preceding ISO8859-1 open(temp_path, O_RDONLY) isn’t closed before remove()/reusing fd, and the later UTF-8 open(temp_path, O_RDONLY) also isn’t closed; this can leak descriptors and make the test flaky.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

remove(temp_path);
fd = open(temp_path, O_CREAT | O_WRONLY, 0777);
EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1208);
write(fd, buff, sizeof(buff));
close(fd);

fd = open(temp_path, O_RDONLY);
EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1208);
memset(buff2, 1, sizeof(buff));
read(fd, buff2, sizeof(buff));
EXPECT_EQ(strcmp(buff, buff2), 0);

// Test _ENCODE_FILE_EXISTING with IBM-1047
unsetenv("_ENCODE_FILE_NEW");
remove(temp_path);
fd = open(temp_path, O_CREAT | O_WRONLY, 0777);
write(fd, buff, sizeof(buff));
close(fd);

setenv("_ENCODE_FILE_EXISTING", "IBM-1047", 1);
fd = open(temp_path, O_RDONLY);
EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1047);
memset(buff2, 1, sizeof(buff));
read(fd, buff2, sizeof(buff));
Copy link
Copy Markdown

@augmentcode augmentcode Bot May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test/test-clib-override.cc:208: These tests don’t check the return value from read() and then use strcmp(buff, buff2); if a short read occurs, buff2 may not be NUL-terminated and strcmp can read past the buffer (undefined behavior).

Severity: medium

Other Locations
  • test/test-clib-override.cc:194
  • test/test-clib-override.cc:217
  • test/test-clib-override.cc:226
  • test/test-clib-override.cc:235

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

EXPECT_EQ(strcmp(buff, buff2), 0);
close(fd);

// Test _ENCODE_FILE_EXISTING with BINARY
setenv("_ENCODE_FILE_EXISTING", "BINARY", 1);
fd = open(temp_path, O_RDONLY);
EXPECT_EQ(__getfdccsid(fd), 65535);
memset(buff2, 1, sizeof(buff));
read(fd, buff2, sizeof(buff));
EXPECT_EQ(strcmp(buff, buff2), 0);
close(fd);

// Test _ENCODE_FILE_EXISTING with ISO8859-1
setenv("_ENCODE_FILE_EXISTING", "ISO8859-1", 1);
fd = open(temp_path, O_RDONLY);
EXPECT_EQ(__getfdccsid(fd), 0x10000 + 819);
memset(buff2, 1, sizeof(buff));
read(fd, buff2, sizeof(buff));
EXPECT_EQ(strcmp(buff, buff2), 0);
close(fd);

// Test _ENCODE_FILE_EXISTING with UTF-8
setenv("_ENCODE_FILE_EXISTING", "UTF-8", 1);
fd = open(temp_path, O_RDONLY);
EXPECT_EQ(__getfdccsid(fd), 0x10000 + 1208);
memset(buff2, 1, sizeof(buff));
read(fd, buff2, sizeof(buff));
EXPECT_EQ(strcmp(buff, buff2), 0);
close(fd);

unsetenv("_ENCODE_FILE_EXISTING");


EXPECT_EQ(strcmp(buff, buff2), 0);
free(buff2);
close(fd);
Expand Down
Loading