Skip to content

Commit 5f8024a

Browse files
committed
init/aws-nitro: support partial read/write ethernet packet forwarding
Ensure the total requested number of bytes is forwarded to/from the host vsock providing the network access and the guest TAP device routing the application network traffic. To write this code I used Cursor and the claude-4.6-opus-high model. Signed-off-by: Jake Correnti <jakecorrenti+github@proton.me>
1 parent 7c5292c commit 5f8024a

2 files changed

Lines changed: 145 additions & 23 deletions

File tree

init/aws-nitro/device/net_tap_afvsock.c

Lines changed: 142 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,64 @@
3131
#define TUN_DEV_MAJOR 10
3232
#define TUN_DEV_MINOR 200
3333

34+
/*
35+
* The Extended Ethernet Frame header is 14 bytes, representing the Destination
36+
* Address (6 bytes), Source Address (6 bytes) and the Ethertype (2 bytes).
37+
*/
38+
#define ETH_HEADER_LEN 14
39+
40+
#define PROXY_HEADER_LEN 4
41+
42+
/*
43+
* Read exactly n bytes into the buffer, retrying on partial reads.
44+
* Returns n on success, 0 on clean EOF, or -1 on error.
45+
*/
46+
static ssize_t read_exact(int fd, void *buf, size_t n)
47+
{
48+
size_t total = 0;
49+
50+
while (total < n) {
51+
ssize_t r = read(fd, (char *)buf + total, n - total);
52+
if (r < 0) {
53+
if (errno == EINTR)
54+
continue;
55+
return -1;
56+
} else if (r == 0) {
57+
if (total > 0) {
58+
errno = EIO;
59+
return -1;
60+
}
61+
return 0;
62+
}
63+
total += r;
64+
}
65+
return (ssize_t)total;
66+
}
67+
68+
/*
69+
* Write exactly n bytes from the buffer to the fd, retrying on partial writes.
70+
* Returns n on success, or -1 on error.
71+
*/
72+
static ssize_t write_all(int fd, const void *buf, size_t n)
73+
{
74+
size_t total = 0;
75+
76+
while (total < n) {
77+
ssize_t w = write(fd, (const char *)buf + total, n - total);
78+
if (w <= 0) {
79+
if (w < 0 && errno == EINTR)
80+
continue;
81+
82+
if (w == 0)
83+
errno = EIO;
84+
85+
return -1;
86+
}
87+
total += w;
88+
}
89+
return (ssize_t)total;
90+
}
91+
3492
/*
3593
* Forward ethernet packets to/from the host vsock providing network access and
3694
* the guest TAP device routing application network traffic.
@@ -53,7 +111,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
53111
sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
54112
if (sock_fd < 0) {
55113
perror("creating INET socket to get TAP MTU");
56-
return -errno;
114+
return -1;
57115
}
58116

59117
memset(&ifr, 0, sizeof(struct ifreq));
@@ -63,22 +121,29 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
63121
if (ret < 0) {
64122
close(sock_fd);
65123
perror("fetch MTU of TAP device");
66-
exit(-errno);
124+
return -1;
67125
}
68126

69127
close(sock_fd);
70128

71-
buf = (unsigned char *)malloc(ifr.ifr_mtu);
129+
uint32_t eth_frame_size = ifr.ifr_mtu + ETH_HEADER_LEN;
130+
buf = (unsigned char *)malloc(eth_frame_size);
72131
if (buf == NULL) {
73132
perror("allocate buffer for TAP/vsock communication");
74-
exit(-1);
133+
return -1;
75134
}
76135

77-
// Forward the MTU to the host for it to allocate a corresponding buffer.
78-
ret = write(vsock_fd, (void *)&ifr.ifr_mtu, sizeof(int));
79-
if (ret < sizeof(int)) {
80-
perror("write TAP device MTU to host");
81-
exit(-errno);
136+
// Forward the max ethernet frame size to the host for it to allocate a
137+
// corresponding buffer.
138+
139+
// To avoid issues where the host endianness and the enclave endianness is
140+
// different, convert to big endian to pass the max ethernet frame size to
141+
// the host.
142+
uint32_t eth_frame_size_be = htonl(eth_frame_size);
143+
if (write_all(vsock_fd, &eth_frame_size_be, sizeof(eth_frame_size)) < 0) {
144+
perror("write max ethernet frame size to host");
145+
free(buf);
146+
return -1;
82147
}
83148

84149
pfds[0].fd = vsock_fd;
@@ -97,25 +162,81 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
97162
event_found = false;
98163
// Event on vsock. Read the frame and write it to the TAP device.
99164
if (pfds[0].revents & POLLIN) {
100-
nread = read(vsock_fd, &sz, 4);
101-
if (nread != 4)
102-
exit(0);
165+
nread = read_exact(vsock_fd, &sz, PROXY_HEADER_LEN);
166+
if (nread == 0) {
167+
// vsock connection closed cleanly
168+
break;
169+
} else if (nread < 0) {
170+
perror("unable to read the proxy header from vsock");
171+
free(buf);
172+
return -1;
173+
}
103174

104-
unsigned int len = htonl(sz);
175+
unsigned int len = ntohl(sz);
176+
if (len > eth_frame_size) {
177+
fprintf(stderr,
178+
"ethernet frame size %u exceeds MTU + header size %u\n",
179+
len, eth_frame_size);
180+
free(buf);
181+
return -1;
182+
}
183+
184+
nread = read_exact(vsock_fd, buf, len);
185+
if (nread != (ssize_t)len) {
186+
if (nread == 0)
187+
errno = EIO;
188+
189+
perror("failed to read the ethernet frame from vsock");
190+
free(buf);
191+
return -1;
192+
}
105193

106-
nread = read(vsock_fd, buf, len);
107-
write(tun_fd, buf, nread);
194+
// TAP devices are expected to write an entire frame at once and not
195+
// do partial writes. Only retry if the syscall is interrupted.
196+
ssize_t bytes_written = 0;
197+
do {
198+
bytes_written = write(tun_fd, buf, nread);
199+
} while (bytes_written < 0 && errno == EINTR);
200+
201+
if (bytes_written != nread) {
202+
// the entire frame wasn't written
203+
if (bytes_written >= 0)
204+
errno = EIO;
205+
206+
perror("unable to write the ethernet frame to the TAP device");
207+
free(buf);
208+
return -1;
209+
}
108210

109211
event_found = true;
110212
}
111213

112214
// Event on the TAP device. Read the frame and write it to the vsock.
113215
if (pfds[1].revents & POLLIN) {
114-
nread = read(tun_fd, buf, ifr.ifr_mtu);
115-
if (nread > 0) {
116-
sz = htonl(nread);
117-
write(vsock_fd, (void *)&sz, 4);
118-
write(vsock_fd, buf, nread);
216+
// TAP devices are expected to read an entire frame at once and not
217+
// do partial reads. Only retry if the syscall is interrupted.
218+
do {
219+
nread = read(tun_fd, buf, eth_frame_size);
220+
} while (nread < 0 && errno == EINTR);
221+
if (nread <= 0) {
222+
if (nread == 0)
223+
errno = EIO;
224+
225+
perror("failed to read the ethernet frame from the TAP device");
226+
free(buf);
227+
return -1;
228+
}
229+
230+
sz = htonl((uint32_t)nread);
231+
if (write_all(vsock_fd, (void *)&sz, PROXY_HEADER_LEN) < 0) {
232+
perror("unable to write the proxy header to vsock");
233+
free(buf);
234+
return -1;
235+
}
236+
if (write_all(vsock_fd, buf, nread) < 0) {
237+
perror("unable to write the ethernet frame to vsock");
238+
free(buf);
239+
return -1;
119240
}
120241

121242
event_found = true;
@@ -132,6 +253,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
132253
break;
133254
}
134255

256+
free(buf);
135257
close(vsock_fd);
136258
close(tun_fd);
137259

src/aws_nitro/src/enclave/proxy/proxies/net.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,14 @@ impl DeviceProxy for NetProxy {
9595
let (mut vsock, _) = listener.accept().map_err(Error::VsockAccept)?;
9696

9797
/*
98-
* Upon initial connection, read the MTU size from the enclave and allocate the buffer
98+
* Upon initial connection, read the MTU size + ethernet frame header from the enclave and allocate the buffer
9999
* accordingly.
100100
*/
101101
let size = {
102102
let mut size_buf = [0u8; size_of::<u32>()];
103-
let _ = vsock.read(&mut size_buf).map_err(Error::VsockRead)?;
103+
vsock.read_exact(&mut size_buf).map_err(Error::VsockRead)?;
104104

105-
u32::from_ne_bytes(size_buf)
105+
u32::from_be_bytes(size_buf)
106106
};
107107

108108
self.buf

0 commit comments

Comments
 (0)