diff --git a/README.md b/README.md index ca0a254e..97cc8114 100644 --- a/README.md +++ b/README.md @@ -77,8 +77,21 @@ The Raspberry Pi emulation has the following limitations: ## Networking -The system provides medium level networking support. At startup, it scans for available network cards, loads the matching driver if present and creates a logical L2 interface for each device; a loopback interface is also added for both IPv4 and IPv6, though the current loopback implementation is partial and rejects all localhost packets. IPv4 networking is fully implemented, while IPv6 support is limited to interface structures and it cannot perform real communication. -Once interfaces and dispatcher are initialized, the system starts ARP, DHCP, DNS and SNTP processes. At the moment, SNTP sets the unix time and the time zone is configured manually since DHCP options 100 and 101 are not always included. -The stack supports UDP and TCP sockets, and an HTTP socket is implemented on top of TCP. ICMP is also supported with the system able to respond to ping requests. DNS resolution can be called directly by applications and is also used by sockets. +The system provides medium level networking support. At startup, it scans for available network cards, loads the matching driver if present and creates a logical L2 interface for each device; a loopback interface is also added for both IPv4 and IPv6. + +The stack follows a layered design. At link level it handles basic neighbor resolution (ARP for IPv4 and NDP for IPv6). + +on top of that the stack provides full IPv4 support and a solid IPv6 implementation. Both protocols include routing support and the related control via ICMP and ICMPv6. IPv6 is already usable in practice and covers the core features needed for normal operation; some more advanced pieces are missing (such as full SLAAC support, router preference handling, complete set of extension header features, ULA) + +Multicast is supported and traffic is filtered at the NIC level to avoid unnecessary delivery and group membership; announcements are made through IGMP for Ipv4 and MLD for IPv6 keeping multicast use clean and controlled + +For address config the system provides small APIs based on DHCP for IPv4 and DHCPv6 stateful, stateless and SLAAC for IPv6, allowing interfaces to be configured automatically without special handling in applications. + +For name resolution a DNS resolver is available, backed by an internal cache to reduce latency and unnecessary network interrupts. also service discovery on the local network is supported in a lightweight form via DNS SD/mDNS responders and SSDP utilities(currently disabled). + +time sync is provided through an NTP based service. The current implementation is not fully standards compliant but it is designed to be as accurate as possible for the time being, focusing on correct offset calculation and stable clock adjustments rather than full protocol coverage. SNTP is also present for compatibility, but it is considered deprecated and kept for legacy. + +apps can interact with the network through a socket-oriented interface. UDP and TCP sockets are supported and a small HTTP layer is available both as a client and server + An embedded HTTP server is included and listens on port 80, serving a minimal page. Also discovery mechanism is available: the system probes the local network via UDP broadcast on port 8080, and when a responder replies it connects via HTTP on port 80, though it currently does nothing noteworthy. An implementation of the server can be found at the [RedactedOS Firmware Server Repository](https://github.com/differrari/RedactedOS_firmware_server/tree/main) diff --git a/kernel/bin/bin_mod.c b/kernel/bin/bin_mod.c index bf33fe29..72475c18 100644 --- a/kernel/bin/bin_mod.c +++ b/kernel/bin/bin_mod.c @@ -1,5 +1,6 @@ #include "bin_mod.h" #include "ping.h" +#include "shutdown.h" #include "tracert.h" #include "monitor_processes.h" #include "kernel_processes/kprocess_loader.h" @@ -22,6 +23,7 @@ typedef struct open_bin_ref { open_bin_ref available_cmds[] = { { "ping", run_ping }, + { "shutdown", run_shutdown }, { "tracert", run_tracert }, { "monitor", monitor_procs }, }; diff --git a/kernel/bin/ping.c b/kernel/bin/ping.c index 23b4a243..5b05b5df 100644 --- a/kernel/bin/ping.c +++ b/kernel/bin/ping.c @@ -10,8 +10,10 @@ #include "syscalls/syscalls.h" #include "networking/internet_layer/ipv4.h" #include "networking/internet_layer/ipv4_route.h" -#include "networking/application_layer/dns.h" +#include "networking/application_layer/dns/dns.h" #include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/internet_layer/icmpv6.h" typedef struct { ip_version_t ver; @@ -75,6 +77,23 @@ static bool parse_args(int argc, char *argv[], ping_opts_t *o) { return true; } +static const char *status_to_msg(uint8_t st) { + switch (st) { + case PING_TIMEOUT: return "Request timed out."; + case PING_NET_UNREACH: return "Destination Net Unreachable."; + case PING_HOST_UNREACH: return "Destination Host Unreachable."; + case PING_PROTO_UNREACH: return "Protocol Unreachable."; + case PING_PORT_UNREACH: return "Port Unreachable."; + case PING_FRAG_NEEDED: return "Fragmentation Needed."; + case PING_SRC_ROUTE_FAILED: return "Source Route Failed."; + case PING_ADMIN_PROHIBITED: return "Administratively Prohibited."; + case PING_TTL_EXPIRED: return "Time To Live exceeded."; + case PING_PARAM_PROBLEM: return "Parameter Problem."; + case PING_REDIRECT: return "Redirect received."; + default: return "No reply (unknown error)."; + } +} + static int ping_v4(file *fd, const ping_opts_t *o) { const char *host = o->host; @@ -100,32 +119,25 @@ static int ping_v4(file *fd, const ping_opts_t *o) { write_file(fd, host, strlen_max(host, STRING_MAX_LEN)); write_file(fd, " (", 2); write_file(fd, ipstr, strlen_max(ipstr, STRING_MAX_LEN)); - write_file(fd, ") with 32 bytes of data:", 25); - write_file(fd, "\n", 1); + write_file(fd, ") with 32 bytes of data:\n", 26); uint32_t sent = 0, received = 0, min_ms = UINT32_MAX, max_ms = 0; uint64_t sum_ms = 0; uint16_t id = (uint16_t)(get_current_proc_pid() & 0xFFFF); uint16_t seq_base = (uint16_t)(get_time() & 0xFFFF); - int8_t bound_index = -1; ipv4_tx_opts_t txo = {0}; const ipv4_tx_opts_t *txop = NULL; if (o->src_set) { l3_ipv4_interface_t *l3 = l3_ipv4_find_by_ip(o->src_ip); - //kprintf("%d", l3); if (!l3) { const char *em = "ping: invalid source (no local ip match)\n"; write_file(fd, em, strlen_max(em, STRING_MAX_LEN)); return 2; } - - bound_index = (int8_t)l3->l3_id; - txo.index = (uint8_t)bound_index; - txo.scope = IPV4_TX_BOUND_L3; + txo.index = (uint8_t)l3->l3_id; + txo.scope = IP_TX_BOUND_L3; txop = &txo; - char ssrc[16]; - ipv4_to_string(o->src_ip, ssrc); } for (uint32_t i = 0; i < o->count; i++) { @@ -133,7 +145,7 @@ static int ping_v4(file *fd, const ping_opts_t *o) { uint16_t seq = (uint16_t)(seq_base + i); ping_result_t res = {0}; - bool ok = icmp_ping(dst_ip_be, id, seq, o->timeout_ms, txop, o->ttl, &res); + bool ok = icmp_ping(dst_ip_be, id, seq, o->timeout_ms, txop, (uint8_t)o->ttl, &res); if (ok) { ++received; @@ -146,46 +158,93 @@ static int ping_v4(file *fd, const ping_opts_t *o) { write_file(fd, "\n", 1); free_sized(ln.data, ln.mem_length); } else { - const char *msg = NULL; - if (res.status == PING_TIMEOUT) msg = "Request timed out."; - else { - switch (res.status) { - case PING_NET_UNREACH: - msg = "Destination Net Unreachable."; - break; - case PING_HOST_UNREACH: - msg = "Destination Host Unreachable."; - break; - case PING_PROTO_UNREACH: - msg = "Protocol Unreachable."; - break; - case PING_PORT_UNREACH: - msg = "Port Unreachable."; - break; - case PING_FRAG_NEEDED: - msg = "Fragmentation Needed."; - break; - case PING_SRC_ROUTE_FAILED: - msg = "Source Route Failed."; - break; - case PING_ADMIN_PROHIBITED: - msg = "Administratively Prohibited."; - break; - case PING_TTL_EXPIRED: - msg = "Time To Live exceeded."; - break; - case PING_PARAM_PROBLEM: - msg = "Parameter Problem."; - break; - case PING_REDIRECT: - msg = "Redirect received."; - break; - default: - msg = "No reply (unknown error)."; - break; - } - } - write_file(fd, msg, strlen_max(msg, STRING_MAX_LEN)); + const char *msg = status_to_msg(res.status); + write_file(fd, msg, strlen(msg)); + write_file(fd, "\n", 1); + } + + if (i + 1 < o->count) msleep(o->interval_ms); + } + + write_file(fd, "\n", 1); + + string h = string_format("--- %s ping statistics ---", host); + write_file(fd, h.data, h.length); + write_file(fd, "\n", 1); + free_sized(h.data, h.mem_length); + + uint32_t loss = (sent == 0) ? 0 : (uint32_t)((((uint64_t)(sent - received)) * 100) / sent); + uint32_t total_time = (o->count > 0) ? (o->count - 1) * o->interval_ms : 0; + + string s = string_format("%u packets transmitted, %u received, %u%% packet loss, time %ums", sent, received, loss, total_time); + write_file(fd, s.data, s.length); + write_file(fd, "\n", 1); + free_sized(s.data, s.mem_length); + + if (received > 0) { + uint32_t avg = (uint32_t)(sum_ms / received); + if (min_ms == UINT32_MAX) min_ms = avg; + string r = string_format("rtt min/avg/max = %u/%u/%u ms", min_ms, avg, max_ms); + write_file(fd, r.data, r.length); + write_file(fd, "\n", 1); + free_sized(r.data, r.mem_length); + } + + return (received > 0) ? 0 : 1; +} + +static int ping_v6(file *fd, const ping_opts_t *o) { + const char *host = o->host; + + uint8_t dst6[16] ={0}; + bool is_lit = ipv6_parse(host, dst6); + if (!is_lit) { + dns_result_t dr = dns_resolve_aaaa(host, dst6, DNS_USE_BOTH, o->timeout_ms); + if (dr != DNS_OK) { + string m = string_format("ping: dns lookup failed (%d) for '%s'",(int)dr, host); + write_file(fd, m.data, m.length); + write_file(fd, "\n", 1); + free_sized(m.data, m.mem_length); + return 2; + } + } + + char ipstr[64]; + ipv6_to_string(dst6, ipstr, (int)sizeof(ipstr)); + + write_file(fd, "PING ", 5); + write_file(fd, host, strlen(host)); + write_file(fd, " (", 2); + write_file(fd, ipstr, strlen(ipstr)); + write_file(fd, ") with 32 bytes of data:", 25); + write_file(fd, "\n", 1); + + uint32_t sent = 0, received = 0, min_ms = UINT32_MAX, max_ms = 0; + uint64_t sum_ms = 0; + uint16_t id = (uint16_t)(get_current_proc_pid() & 0xFFFF); + uint16_t seq_base = (uint16_t)(get_time() & 0xFFFF); + + for (uint32_t i = 0; i < o->count; i++) { + ++sent; + uint16_t seq = (uint16_t)(seq_base + i); + + ping6_result_t res = {0}; + bool ok = icmpv6_ping(dst6, id, seq, o->timeout_ms, NULL, (uint8_t)o->ttl, &res); + + if (ok) { + ++received; + uint32_t rtt = res.rtt_ms; + if (rtt < min_ms) min_ms = rtt; + if (rtt > max_ms) max_ms = rtt; + sum_ms += rtt; + + string ln = string_format("Reply from %s: bytes=32 time=%ums", ipstr, (uint32_t)rtt); + write_file(fd, ln.data, ln.length); + write_file(fd, "\n", 1); + free_sized(ln.data, ln.mem_length); + } else { + const char *msg = status_to_msg(res.status); + write_file(fd, msg, strlen(msg)); write_file(fd, "\n", 1); } @@ -234,6 +293,13 @@ int run_ping(int argc, char *argv[]) { return 2; } + if (opts.ver == IP_VER6 && opts.src_set) { + const char *em = "ping: -s is only supported for IPv4\n"; + write_file(&fd, em, strlen(em)); + close_file(&fd); + return 2; + } + if (opts.ver == IP_VER4 && opts.src_set) { l3_ipv4_interface_t *l3 = l3_ipv4_find_by_ip(opts.src_ip); if (!l3) { @@ -250,7 +316,7 @@ int run_ping(int argc, char *argv[]) { int rc = 0; if (opts.ver == IP_VER4) rc = ping_v4(&fd, &opts); - else if (opts.ver == IP_VER6) rc = 3; //unimplemented + else if (opts.ver == IP_VER6) rc = ping_v6(&fd, &opts); else { help(&fd); rc = 2; } close_file(&fd); diff --git a/kernel/bin/shutdown.c b/kernel/bin/shutdown.c new file mode 100644 index 00000000..21fd8ad4 --- /dev/null +++ b/kernel/bin/shutdown.c @@ -0,0 +1,58 @@ +#include "shutdown.h" + +#include "kernel_processes/kprocess_loader.h" +#include "filesystem/filesystem.h" +#include "process/scheduler.h" +#include "std/string.h" +#include "hw/power.h" +#include "syscalls/syscalls.h" + +int run_shutdown(int argc, char* argv[]){ + const char *u = "usage: shutdown [-r|-p]\n -r reboot\n -p power off\n"; + + + uint16_t pid = get_current_proc_pid(); + string p = string_format("/proc/%i/out", pid); + file out; + FS_RESULT r = open_file(p.data, &out); + free_sized(p.data, p.mem_length); + + if (r != FS_RESULT_SUCCESS){ + return 2; + } + if (argc <= 0){ + write_file(&out, u,strlen(u)); + return 0; + } + + int mode = -1; + + for (int i = 0; i < argc; ++i){ + const char *a = argv[i]; + if (!a || a[0] == 0) continue; + + if (strcmp(a, "-r") == 0) mode = SHUTDOWN_REBOOT; + else if (strcmp(a, "-p") == 0) mode = SHUTDOWN_POWEROFF; + else{ + write_file(&out, u,strlen(u)); + msleep(100); + close_file(&out); + return 2; + } + } + + if (mode == -1){ + write_file(&out, u,strlen(u)); + msleep(100); + close_file(&out); + return 2; + } + + if (mode == SHUTDOWN_REBOOT) write_file(&out, "Rebooting...\n", 13); + else write_file(&out, "Powering off...\n", 16); + + msleep(100); + close_file(&out); + hw_shutdown(mode); + return 0; +} \ No newline at end of file diff --git a/kernel/bin/shutdown.h b/kernel/bin/shutdown.h new file mode 100644 index 00000000..b69470ec --- /dev/null +++ b/kernel/bin/shutdown.h @@ -0,0 +1,3 @@ +#pragma once + +int run_shutdown(int argc, char* argv[]); \ No newline at end of file diff --git a/kernel/bin/tracert.c b/kernel/bin/tracert.c index 8e80f090..addbea48 100644 --- a/kernel/bin/tracert.c +++ b/kernel/bin/tracert.c @@ -9,8 +9,10 @@ #include "syscalls/syscalls.h" #include "networking/internet_layer/ipv4.h" #include "networking/internet_layer/ipv4_route.h" -#include "networking/application_layer/dns.h" +#include "networking/application_layer/dns/dns.h" #include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/internet_layer/icmpv6.h" typedef struct { ip_version_t ver; @@ -137,7 +139,7 @@ static int tracert_v4(file *fd, const tr_opts_t *o) { return 2; } txo.index = l3->l3_id; - txo.scope = IPV4_TX_BOUND_L3; + txo.scope = IP_TX_BOUND_L3; txop = &txo; } @@ -191,9 +193,9 @@ static int tracert_v4(file *fd, const tr_opts_t *o) { } } else { dead_streak++; - write_file(fd, "Request timed out.", 19); + write_file(fd, "Request timed out. ", 20); } - write_file(fd, "\n", 1); + write_file(fd, " \n", 2); if (hop_ip == dst) break; if (dead_streak >= o->timeout_streak_limit) { @@ -208,6 +210,106 @@ static int tracert_v4(file *fd, const tr_opts_t *o) { return 0; } +static int tracert_v6(file *fd, const tr_opts_t *o) { + uint8_t dst[16]; + bool lit = ipv6_parse(o->host, dst); + if (!lit) { + dns_result_t dr = dns_resolve_aaaa(o->host, dst, DNS_USE_BOTH, o->timeout_ms); + if (dr != DNS_OK) { + string m = string_format("tracert: dns lookup failed (%d) for '%s'", (int)dr, o->host); + write_file(fd, m.data, m.length); + write_file(fd, "\n", 1); + free_sized(m.data, m.mem_length); + return 2; + } + } + + char dip[64]; + ipv6_to_string(dst, dip, (int)sizeof(dip)); + write_file(fd, "Tracing route to ", 17); + write_file(fd, o->host, strlen_max(o->host, STRING_MAX_LEN)); + write_file(fd, " [", 2); + write_file(fd, dip, strlen_max(dip, STRING_MAX_LEN)); + write_file(fd, "]\n", 2); + + write_file(fd, "hop ", 5); + for (uint32_t p = 0; p < o->count; p++) { + string col = string_format("rtt%u ", p + 1); + write_file(fd, col.data, col.length); + free_sized(col.data, col.mem_length); + } + write_file(fd, "address\n", 8); + + uint16_t id = (uint16_t)(get_current_proc_pid() & 0xFFFF); + uint16_t seq0 = (uint16_t)(get_time() & 0xFFFF); + uint32_t dead_streak = 0; + + for (uint32_t hl = 1; hl <= o->max_ttl; hl++) { + string h = string_format("%2u ", hl); + write_file(fd, h.data, h.length); + free_sized(h.data, h.mem_length); + + uint8_t hop_ip[16]; + for (int i = 0; i < 16; i++) hop_ip[i] = 0; + bool any = false; + + for (uint32_t p = 0; p < o->count; p++) { + uint16_t seq = (uint16_t)(seq0 + (hl << 6) + p); + ping6_result_t r = (ping6_result_t){0}; + bool ok = icmpv6_ping(dst, id, seq, o->timeout_ms, NULL, (uint8_t)hl, &r); + + if (!ipv6_is_unspecified(r.responder_ip) && ipv6_is_unspecified(hop_ip)) ipv6_cpy(hop_ip, r.responder_ip); + + if (ok) { + any = true; + string ms = string_format("%ums ", r.rtt_ms); + write_file(fd, ms.data, ms.length); + free_sized(ms.data, ms.mem_length); + } else { + if (r.status == PING_TTL_EXPIRED || r.status == PING_REDIRECT || + r.status == PING_PARAM_PROBLEM || r.status == PING_NET_UNREACH || + r.status == PING_HOST_UNREACH || r.status == PING_ADMIN_PROHIBITED || + r.status == PING_FRAG_NEEDED || r.status == PING_SRC_ROUTE_FAILED) { + any = true; + string ms = string_format("%ums ", r.rtt_ms); + write_file(fd, ms.data, ms.length); + free_sized(ms.data, ms.mem_length); + } else { + write_file(fd, "* ", 3); + } + } + + if (p + 1 < o->count) msleep(o->interval_ms); + } + + if (any) { + dead_streak = 0; + if (!ipv6_is_unspecified(hop_ip)) { + char hip[64]; + ipv6_to_string(hop_ip, hip, (int)sizeof(hip)); + write_file(fd, hip, strlen_max(hip, STRING_MAX_LEN)); + } else { + write_file(fd, "???", 3); + } + } else { + dead_streak++; + write_file(fd, "Request timed out. ", 19); + } + write_file(fd, "\n", 1); + + if (ipv6_cmp(hop_ip, dst) == 0) break; + if (dead_streak >= o->timeout_streak_limit) { + string note = string_format("stopping after %u consecutive timeout hops", dead_streak); + write_file(fd, note.data, note.length); + write_file(fd, "\n", 1); + free_sized(note.data, note.mem_length); + break; + } + } + + return 0; +} + int run_tracert(int argc, char *argv[]) { uint16_t pid = get_current_proc_pid(); string p = string_format("/proc/%u/out", pid); @@ -222,8 +324,10 @@ int run_tracert(int argc, char *argv[]) { return 2; } - if (o.ver == IP_VER6) {//unimplemented - return 3; + if (o.ver == IP_VER6) { + int rc = tracert_v6(&fd, &o); + close_file(&fd); + return rc; } int rc = tracert_v4(&fd, &o); diff --git a/kernel/exceptions/timer.c b/kernel/exceptions/timer.c index d94bd1a7..e940c90f 100644 --- a/kernel/exceptions/timer.c +++ b/kernel/exceptions/timer.c @@ -1,8 +1,16 @@ #include "timer.h" +#include "math/math.h" + +#define TIMER_SLEW_MAX_PPM 500 +#define TIMER_FREQ_MAX_PPM 500 -static int64_t g_wall_offset_us = 0; static int g_sync = 0; +static uint64_t g_wall_base_mono_us = 0; +static int64_t g_wall_base_unix_us = 0; +static int32_t g_freq_ppm = 0; +static int64_t g_slew_rem_us = 0; + static int32_t g_tz_offset_min = 0; static inline uint64_t rd_cntfrq_el0(void) { @@ -31,6 +39,12 @@ void permanent_disable_timer(){ void timer_init(uint64_t msecs) { timer_reset(msecs); timer_enable(); + + g_wall_base_mono_us = timer_now_usec(); + g_wall_base_unix_us = 0; + g_freq_ppm = 0; + g_slew_rem_us = 0; + g_sync = 0; } void virtual_timer_reset(uint64_t smsecs) { @@ -75,26 +89,79 @@ uint64_t timer_now_usec(void) { return us; } -void timer_apply_sntp_sample_us(uint64_t server_unix_us) { - int64_t mono_us = (int64_t)timer_now_usec(); - int64_t off = (int64_t)server_unix_us - mono_us; +static int64_t wall_advance_to(uint64_t mono_now_us) { + if (!g_wall_base_mono_us) g_wall_base_mono_us = mono_now_us; + + uint64_t dt_u = mono_now_us-g_wall_base_mono_us; + if (dt_u) { + int64_t dt = (int64_t)dt_u; + + int64_t base = g_wall_base_unix_us; + int64_t adj = dt +(dt * (int64_t)g_freq_ppm)/1000000LL; + base += adj; + + int64_t max_slew = (dt * (int64_t)TIMER_SLEW_MAX_PPM) / 1000000LL; + if (max_slew < 1)max_slew = 1; + + if (g_slew_rem_us) { + int64_t apply = clamp_i64(g_slew_rem_us, -max_slew, max_slew); + g_slew_rem_us -= apply; + base += apply; + } - g_wall_offset_us = off; + g_wall_base_mono_us = mono_now_us; + g_wall_base_unix_us = base; + return base; + } + + return g_wall_base_unix_us; +} + +uint64_t timer_wall_time_us(void) { + return (uint64_t)wall_advance_to(timer_now_usec()); +} + +uint64_t timer_unix_time_us(void) { + if (!g_sync) return 0; + int64_t u = wall_advance_to( timer_now_usec()); + if (u < 0) return 0; + return (uint64_t)u; +} + +void timer_sync_set_unix_us(uint64_t unix_us) { + uint64_t now_us = timer_now_usec(); + g_wall_base_mono_us = now_us; + g_wall_base_unix_us= (int64_t)unix_us; + g_slew_rem_us = 0; g_sync = 1; } +void timer_sync_slew_us(int64_t delta_us){ + const int64_t cap = 60LL * 1000000LL; + int64_t v = g_slew_rem_us + delta_us; + g_slew_rem_us = clamp_i64(v, -cap, cap); +} + +void timer_sync_set_freq_ppm(int32_t ppm) { + g_freq_ppm = clamp_i64((int32_t)ppm, -TIMER_FREQ_MAX_PPM, TIMER_FREQ_MAX_PPM); +} + +int32_t timer_sync_get_freq_ppm(void) { + return g_freq_ppm; +} + +void timer_apply_sntp_sample_us(uint64_t server_unix_us) { + timer_sync_set_unix_us(server_unix_us); +} + int timer_is_synchronised(void) { return g_sync; } uint64_t timer_unix_time_ms(void) { - if (!g_sync) return 0; - uint64_t now_us = timer_now_usec(); - int64_t off = g_wall_offset_us; - - int64_t unix_us = (int64_t)now_us + off; - if (unix_us < 0) return 0; - return (uint64_t)unix_us / 1000ULL; + uint64_t us = timer_unix_time_us(); + if (us ==0) return 0; + return us / 1000ULL; } void timer_set_timezone_minutes(int32_t minutes){ @@ -116,8 +183,8 @@ uint64_t timer_local_time_ms(void){ int timer_set_manual_unix_time_ms(uint64_t unix_ms){ if (g_sync) return -1; uint64_t now_us = timer_now_usec(); - int64_t off = (int64_t)(unix_ms * 1000ULL) - (int64_t)now_us; - g_wall_offset_us = off; + g_wall_base_mono_us = now_us; + g_wall_base_unix_us = (int64_t)(unix_ms * 1000ULL); return 0; } diff --git a/kernel/exceptions/timer.h b/kernel/exceptions/timer.h index d9d1d2c5..86e5552f 100644 --- a/kernel/exceptions/timer.h +++ b/kernel/exceptions/timer.h @@ -17,7 +17,15 @@ uint64_t timer_now(); uint64_t timer_now_msec(); uint64_t timer_now_usec(void); +uint64_t timer_wall_time_us(void); +uint64_t timer_unix_time_us(void); + void timer_apply_sntp_sample_us(uint64_t server_unix_us); + +void timer_sync_set_unix_us(uint64_t unix_us); +void timer_sync_slew_us(int64_t delta_us); +void timer_sync_set_freq_ppm(int32_t ppm); +int32_t timer_sync_get_freq_ppm(void); int timer_is_synchronised(void); uint64_t timer_unix_time_ms(void); diff --git a/kernel/filesystem/virtio_9p_pci.cpp b/kernel/filesystem/virtio_9p_pci.cpp index 704a6ae1..852d1de5 100644 --- a/kernel/filesystem/virtio_9p_pci.cpp +++ b/kernel/filesystem/virtio_9p_pci.cpp @@ -215,8 +215,9 @@ size_t Virtio9PDriver::choose_version(){ cmd->str_size = 8; memcpy(cmd->buffer,"9P2000.L",8); - virtio_send_2d(&np_dev, (uintptr_t)cmd, sizeof(p9_version_packet), (uintptr_t)resp, sizeof(p9_version_packet),VIRTQ_DESC_F_NEXT); - + virtio_buf b[2]={VBUF(cmd, sizeof(p9_version_packet), 0), VBUF(resp, sizeof(p9_version_packet), VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); + uint64_t msize = resp->msize; kfree(cmd, sizeof(p9_packet_header)); @@ -255,7 +256,8 @@ uint32_t Virtio9PDriver::attach(){ cmd->n_uname = 12345;//TODO: hash (name+timestamp) or random memcpy(cmd->uname,"REDACTED",8); - virtio_send_2d(&np_dev, (uintptr_t)cmd, sizeof(t_attach), (uintptr_t)resp, sizeof(r_attach),VIRTQ_DESC_F_NEXT); + virtio_buf b[2]= {VBUF(cmd, sizeof(t_attach), 0), VBUF(resp, sizeof(r_attach), VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); uint32_t rid = resp->header.id == P9_RLERROR ? INVALID_FID : fid; @@ -292,8 +294,8 @@ uint32_t Virtio9PDriver::open(uint32_t fid){ cmd->fid = fid; cmd->flags = O_RDONLY; - virtio_send_2d(&np_dev, (uintptr_t)cmd, sizeof(t_lopen), (uintptr_t)resp, sizeof(r_lopen),VIRTQ_DESC_F_NEXT); - + virtio_buf b[2] = {VBUF(cmd, sizeof(t_lopen), 0), VBUF(resp, sizeof(r_lopen), VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); uint32_t rid = resp->header.id == P9_RLERROR ? INVALID_FID : fid; kfree(cmd, sizeof(t_lopen)); @@ -335,7 +337,8 @@ size_t Virtio9PDriver::list_contents(uint32_t fid, void *buf, size_t size, uint6 cmd->count = size; cmd->offset = offset ? *offset : 0; - virtio_send_2d(&np_dev, (uintptr_t)cmd, sizeof(t_readdir), resp, sizeof(r_readdir) + cmd->count,VIRTQ_DESC_F_NEXT); + virtio_buf b[2]={VBUF(cmd, sizeof(t_readdir) ,0), VBUF((void*)resp, sizeof(r_readdir) + cmd->count, VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); kfree(cmd, sizeof(t_readdir)); @@ -413,7 +416,8 @@ uint32_t Virtio9PDriver::walk_dir(uint32_t fid, char *path){ cmd->header.size = p-(uintptr_t)cmd; - virtio_send_2d(&np_dev, (uintptr_t)cmd, cmd->header.size, resp, amount,VIRTQ_DESC_F_NEXT); + virtio_buf b[2] = { VBUF(cmd, cmd->header.size, 0), VBUF((void*)resp, amount, VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); uint32_t rid = ((p9_packet_header*)resp)->id == P9_RLERROR ? INVALID_FID : nfid; kfree((void*)cmd, sizeof(t_walk) + amount); @@ -455,8 +459,8 @@ uint64_t Virtio9PDriver::get_attribute(uint32_t fid, uint64_t mask){ cmd->fid = fid; cmd->mask = mask; - virtio_send_2d(&np_dev, (uintptr_t)cmd, cmd->header.size, (uintptr_t)resp, sizeof(r_getattr), VIRTQ_DESC_F_NEXT); - + virtio_buf b[2] = {VBUF(cmd, cmd->header.size, 0), VBUF(resp, sizeof(r_getattr), VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); uint64_t attr = resp->header.id == P9_RLERROR ? 0 : resp->size; kfree((void*)cmd, sizeof(t_getattr)); @@ -486,7 +490,8 @@ uint64_t Virtio9PDriver::read(uint32_t fid, uint64_t offset, void *file){ cmd->offset = offset; cmd->count = amount - sizeof(p9_packet_header) - sizeof(uint32_t); - virtio_send_2d(&np_dev, (uintptr_t)cmd, sizeof(t_read), resp, amount,VIRTQ_DESC_F_NEXT); + virtio_buf b[2] = {VBUF(cmd, sizeof(t_read), 0) ,VBUF((void*)resp, amount, VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&np_dev, b, 2); if (((p9_packet_header*)resp)->id == P9_RLERROR) return 0; diff --git a/kernel/hw/hw.c b/kernel/hw/hw.c index 1954411c..8ebda4bb 100644 --- a/kernel/hw/hw.c +++ b/kernel/hw/hw.c @@ -23,6 +23,7 @@ uintptr_t GPIO_PIN_BASE; uintptr_t DWC2_BASE; uint32_t MSI_OFFSET; uintptr_t LOWEST_ADDR; +uintptr_t PM_BASE; void detect_hardware(){ if (BOARD_TYPE == 1){ @@ -85,6 +86,8 @@ void detect_hardware(){ CRAM_START = 0x13600000; MSI_OFFSET = 0; LOWEST_ADDR = MMIO_BASE; + PM_BASE = MMIO_BASE + 0x100000u; + if (RPI_BOARD != 5) reset_gpio(); } } diff --git a/kernel/hw/hw.h b/kernel/hw/hw.h index 3b03557a..dfad19c0 100644 --- a/kernel/hw/hw.h +++ b/kernel/hw/hw.h @@ -36,6 +36,7 @@ extern uintptr_t DWC2_BASE; extern uint32_t MSI_OFFSET; extern uintptr_t LOWEST_ADDR; +extern uintptr_t PM_BASE; void detect_hardware(); void print_hardware(); diff --git a/kernel/hw/power.c b/kernel/hw/power.c new file mode 100644 index 00000000..1dfaacee --- /dev/null +++ b/kernel/hw/power.c @@ -0,0 +1,68 @@ +#include "power.h" + +#include "hw/hw.h" +#include "std/memory_access.h" +#include "exceptions/irq.h" +#include "types.h" + +static inline uint64_t hvc_call(uint64_t fid, uint64_t x1, uint64_t x2, uint64_t x3){ + register uint64_t r0 asm("x0") = fid; + register uint64_t r1 asm("x1") = x1; + register uint64_t r2 asm("x2") = x2; + register uint64_t r3 asm("x3") = x3; + asm volatile("hvc #0" : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) :: "x4", "x5", "x6", "x7", "memory"); + return r0; +} + +static inline void rpi_full_reset(){ + uintptr_t base = PM_BASE; + uintptr_t wdog = base + 0x24u; + uintptr_t rstc = base + 0x1cu; + uint32_t pass = 0x5a000000u; + write32(wdog, pass | (10u & 0xffffu)); + uint32_t val = read32(rstc); + val &= ~0x30u; + val |= pass | 0x20u; + write32(rstc, val); + while (1) asm volatile("wfi"); +} + +static inline void rpi_mark_halt(){ + uintptr_t base = PM_BASE; + uintptr_t rsts = base + 0x20u; + uint32_t pass = 0x5a000000u; + uint32_t val = read32(rsts); + val |= pass | 0x00000555u; + write32(rsts, val); +} + +static inline void psci_reset(){ + (void)hvc_call(0x84000009u, 0, 0, 0); + while (1) asm volatile("wfi"); +} + +static inline void psci_off(){ + (void)hvc_call(0x84000008u, 0, 0, 0); + while (1) asm volatile("wfi"); +} + +void hw_shutdown(shutdown_mode mode){ + disable_interrupt(); + + if (RPI_BOARD != 0){ + //TODO raspi shutdown isn't tested + //(im not fully confident raspi is correct) + if (mode == SHUTDOWN_REBOOT) rpi_full_reset(); + if (mode == SHUTDOWN_POWEROFF){ + rpi_mark_halt(); + rpi_full_reset(); + } + } + + if (BOARD_TYPE == 1){ + if (mode == SHUTDOWN_REBOOT) psci_reset(); + if (mode == SHUTDOWN_POWEROFF) psci_off(); + } + + while (1) asm volatile("wfi"); +} diff --git a/kernel/hw/power.h b/kernel/hw/power.h new file mode 100644 index 00000000..067aeb39 --- /dev/null +++ b/kernel/hw/power.h @@ -0,0 +1,8 @@ +#pragma once + +typedef enum { + SHUTDOWN_REBOOT = 0, + SHUTDOWN_POWEROFF = 1, +} shutdown_mode; + +void hw_shutdown(shutdown_mode mode); \ No newline at end of file diff --git a/kernel/kernel_processes/kprocess_loader.c b/kernel/kernel_processes/kprocess_loader.c index 13fe6b81..e16ed2b5 100644 --- a/kernel/kernel_processes/kprocess_loader.c +++ b/kernel/kernel_processes/kprocess_loader.c @@ -4,6 +4,7 @@ #include "memory/page_allocator.h" #include "exceptions/irq.h" #include "sysregs.h" +#include "std/std.h" process_t *create_kernel_process(const char *name, int (*func)(int argc, char* argv[]), int argc, const char* argv[]){ @@ -15,7 +16,7 @@ process_t *create_kernel_process(const char *name, int (*func)(int argc, char* a uint64_t stack_size = 0x10000; - uintptr_t stack = (uintptr_t)palloc(stack_size, MEM_PRIV_KERNEL, MEM_RW, false); + uintptr_t stack = (uintptr_t)palloc(stack_size, MEM_PRIV_KERNEL, MEM_RW, true); kprintf("Stack size %llx. Start %llx", stack_size,stack); if (!stack) return 0; @@ -34,8 +35,54 @@ process_t *create_kernel_process(const char *name, int (*func)(int argc, char* a kprintf("Kernel process %s (%i) allocated with address at %llx, stack at %llx, heap at %llx. %i argument(s)", (uintptr_t)name, proc->id, proc->pc, proc->sp, proc->heap, argc); proc->spsr = 0x205; proc->state = READY; - proc->PROC_X0 = argc; - proc->PROC_X1 = (uintptr_t)argv; + + proc->PROC_X0 = 0; + proc->PROC_X1 = 0; + + if (argc > 0 && argv) { + + uint64_t argvs = (uint64_t)(argc + 1) * sizeof(char*); + uint64_t str_total = 0; + + for (int i = 0; i < argc; i++) { + if (!argv[i]) continue; + str_total += (uint64_t)strlen(argv[i]) + 1; + } + + uint64_t need = argvs + str_total; + need = (need + 0xF) & ~0xFULL; + + if (need + 0x20 < stack_size) { + + uintptr_t top = proc->stack; + uintptr_t base = (top - need) & ~0xFULL; + + char **kargv = (char**)base; + char *kstr = (char*)(base + argvs); + + uint64_t off = 0; + for (int i = 0; i < argc; i++) { + + if (!argv[i]) { + kargv[i] = 0; + continue; + } + + uint64_t len = (uint64_t)strlen(argv[i]); + memcpy(kstr + off, argv[i], len); + kstr[off + len] = 0; + + kargv[i] = kstr + off; + off += len + 1; + } + + kargv[argc] = 0; + + proc->sp = base; + proc->PROC_X0 = argc; + proc->PROC_X1 = (uintptr_t)kargv; + } + } proc->output = PHYS_TO_VIRT((uintptr_t)palloc(0x1000, MEM_PRIV_KERNEL, MEM_RW, true)); diff --git a/kernel/memory/page_allocator.c b/kernel/memory/page_allocator.c index 89478415..bb558866 100644 --- a/kernel/memory/page_allocator.c +++ b/kernel/memory/page_allocator.c @@ -32,7 +32,7 @@ void page_alloc_enable_verbose(){ }\ }) -int count_pages(uint64_t i1,uint64_t i2){ +uint64_t count_pages(uint64_t i1,uint64_t i2){ return (i1/i2) + (i1 % i2 > 0); } @@ -187,7 +187,9 @@ void* palloc_inner(uint64_t size, uint8_t level, uint8_t attributes, bool full, } void* palloc(uint64_t size, uint8_t level, uint8_t attributes, bool full){ - return PHYS_TO_VIRT_P(palloc_inner(size, level, attributes, full, true)); + void* phys = palloc_inner(size, level, attributes, full, true); + if(!phys) return 0; + return PHYS_TO_VIRT_P(phys); } bool page_used(uintptr_t ptr){ @@ -230,7 +232,14 @@ void* kalloc_inner(void *page, size_t size, uint16_t alignment, uint8_t level, u kprintfv("[in_page_alloc] Requested size: %x", size); mem_page *info = (mem_page*)PHYS_TO_VIRT_P(page); + if (!info->next_free_mem_ptr){ + uintptr_t page_phys = (uintptr_t)page; + if ((page_phys & HIGH_VA) == HIGH_VA) page_phys = VIRT_TO_PHYS(page_phys); + setup_page(page_phys, info->attributes); + info = (mem_page*)PHYS_TO_VIRT_P((void*)page_phys); + } + if (size >= PAGE_SIZE){ void* ptr = palloc(size, level, info->attributes, true); page_index *index = info->page_alloc; @@ -265,7 +274,8 @@ void* kalloc_inner(void *page, size_t size, uint16_t alignment, uint8_t level, u kprintfv("[in_page_alloc] Reusing free block at %x",(uintptr_t)*curr); uint64_t result = (uint64_t)cblock; - *curr = VIRT_TO_PHYS_P(cblock->next); + //*curr = VIRT_TO_PHYS_P(cblock->next); + *curr = cblock->next; memset((void*)PHYS_TO_VIRT(result), 0, size); info->size += size; if (page_va){ @@ -318,18 +328,22 @@ void* kalloc(void *page, size_t size, uint16_t alignment, uint8_t level){ } void kfree(void* ptr, size_t size) { + if(!ptr || size == 0) return; kprintfv("[page_alloc_free] Freeing block at %x size %x",(uintptr_t)ptr, size); - memset32((void*)ptr,0xDEADBEEF,size); + if(size & 0xF) size = (size + 15) & ~0xFULL; - mem_page *page = (mem_page *)(((uintptr_t)ptr) & ~0xFFF); + memset32((void*)ptr,0xDEADBEEF,size); - uintptr_t phys_page = PHYS_TO_VIRT(mmu_translate((uintptr_t)page)); + mem_page *page = (mem_page *)(((uintptr_t)ptr) & ~0xFFFULL); + uintptr_t phys_page = mmu_translate((uintptr_t)page); + uintptr_t off = (uintptr_t)ptr & 0xFFFULL; + uintptr_t block_phys = phys_page | off; - FreeBlock* block = (FreeBlock*)((uintptr_t)phys_page | ((uintptr_t)ptr & 0xFFF)); + FreeBlock* block = (FreeBlock*)PHYS_TO_VIRT(block_phys); block->size = size; block->next = page->free_list; - page->free_list = block; + page->free_list = (FreeBlock*)block_phys; page->size -= size; } diff --git a/kernel/memory/page_allocator.h b/kernel/memory/page_allocator.h index cd72f79d..2442fd2e 100644 --- a/kernel/memory/page_allocator.h +++ b/kernel/memory/page_allocator.h @@ -35,7 +35,7 @@ void* kalloc_inner(void *page, size_t size, uint16_t alignment, uint8_t level, u void* kalloc(void *page, size_t size, uint16_t alignment, uint8_t level); void kfree(void* ptr, size_t size); -int count_pages(uint64_t i1,uint64_t i2); +uint64_t count_pages(uint64_t i1,uint64_t i2); void free_sizedptr(sizedptr ptr); diff --git a/kernel/memory/talloc.c b/kernel/memory/talloc.c index 5368050c..8234bc83 100644 --- a/kernel/memory/talloc.c +++ b/kernel/memory/talloc.c @@ -20,18 +20,21 @@ static uint64_t calculated_ram_end = 0; FreeBlock* temp_free_list = 0; -#define PCI_MMIO_LIMIT MMIO_BASE + 0xFFFFFFF +#define PCI_MMIO_LIMIT (MMIO_BASE + 0xFFFFFFF) static uint64_t next_mmio_base; //TODO: come up with a fix for hardcoded MMIO for some devices -uint64_t alloc_mmio_region(uint64_t size) { +uint64_t alloc_mmio_region(uint64_t size){ if (next_mmio_base == 0) next_mmio_base = MMIO_BASE; + size = (size + 0xFFF) & ~0xFFF; + if (next_mmio_base + size > PCI_MMIO_LIMIT){ - panic("MMIO alloc overflow",next_mmio_base+size); + panic("MMIO alloc overflow", next_mmio_base + size); return 0; } + uint64_t addr = next_mmio_base; next_mmio_base += size; return addr; @@ -43,6 +46,7 @@ bool is_mmio_allocated(uint64_t addr){ extern uint64_t kernel_start; extern uint64_t kcode_end; + static bool talloc_verbose = false; uint64_t next_free_temp_memory; @@ -57,16 +61,17 @@ bool can_automap = false; void pre_talloc(){ pre_talloc_ptr = palloc_inner(GRANULE_2MB, MEM_PRIV_KERNEL, MEM_DEV | MEM_RW, true, can_automap); pre_talloc_mem_limit = (uintptr_t)pre_talloc_ptr + GRANULE_2MB; + if (!can_automap){ can_automap = true; next_free_temp_memory = (uintptr_t)pre_talloc_ptr; talloc_mem_limit = pre_talloc_mem_limit; } + mmu_map_all((uintptr_t)pre_talloc_ptr); } -uint64_t talloc(uint64_t size) { - +uint64_t talloc(uint64_t size){ size = (size + 0xFFF) & ~0xFFF; if (talloc_verbose){ @@ -76,8 +81,9 @@ uint64_t talloc(uint64_t size) { } FreeBlock** curr = &temp_free_list; - while (*curr) { - if ((*curr)->size >= size) { + + while (*curr){ + if ((*curr)->size >= size){ if (talloc_verbose){ uart_raw_puts("[talloc] Reusing free block at "); uart_puthex((uint64_t)*curr); @@ -89,12 +95,14 @@ uint64_t talloc(uint64_t size) { memset((void*)result, 0, size); return result; } + curr = &(*curr)->next; } - if (next_free_temp_memory + size > talloc_mem_limit) { + if (next_free_temp_memory + size > talloc_mem_limit){ if (!pre_talloc_ptr) panic("Kernel allocator overflow", next_free_temp_memory); + next_free_temp_memory = (uintptr_t)pre_talloc_ptr; talloc_mem_limit = pre_talloc_mem_limit; pre_talloc(); @@ -113,8 +121,9 @@ uint64_t talloc(uint64_t size) { return result; } -void temp_free(void* ptr, uint64_t size) { +void temp_free(void* ptr, uint64_t size){ size = (size + 0xFFF) & ~0xFFF; + if (talloc_verbose){ uart_raw_puts("[temp_free] Freeing block at "); uart_puthex((uint64_t)ptr); @@ -123,7 +132,7 @@ void temp_free(void* ptr, uint64_t size) { uart_raw_putc('\n'); } - memset(PHYS_TO_VIRT_P(ptr),0,size); + memset(PHYS_TO_VIRT_P(ptr), 0, size); FreeBlock* block = VIRT_TO_PHYS_P(ptr); block->size = size; @@ -143,48 +152,54 @@ uint64_t mem_get_kmem_end(){ return (uint64_t)&kcode_end; } -int handle_mem_node(const char *propname, const void *prop, uint32_t len, dtb_match_t *match) { - if (strcmp(propname, "reg") == 0 && len >= 16) { +int handle_mem_node(const char *propname, const void *prop, uint32_t len, dtb_match_t *match){ + if (strcmp(propname, "reg") == 0 && len >= 16){ uint32_t *p = (uint32_t *)prop; match->reg_base = ((uint64_t)__builtin_bswap32(p[0]) << 32) | __builtin_bswap32(p[1]); match->reg_size = ((uint64_t)__builtin_bswap32(p[2]) << 32) | __builtin_bswap32(p[3]); - return 1; } - if (strcmp(propname, "device_type") == 0 && strcmp(prop,"memory") == 0){ + + if (strcmp(propname, "device_type") == 0 && strcmp(prop, "memory") == 0){ match->found = true; } + return 0; } -int get_memory_region(uint64_t *out_base, uint64_t *out_size) { +int get_memory_region(uint64_t *out_base, uint64_t *out_size){ dtb_match_t match = {0}; - if (dtb_scan("memory",handle_mem_node, &match)) { + + if (dtb_scan("memory", handle_mem_node, &match)){ *out_base = match.reg_base; *out_size = match.reg_size; return 1; } + return 0; } void calc_ram(){ - if (USE_DTB && get_memory_region(&total_ram_start, &total_ram_size)) { - calculated_ram_end = total_ram_start + total_ram_size; - calculated_ram_start = ((uint64_t)&kcode_end) + 0x1; - calculated_ram_start = ((calculated_ram_start) & ~((1ULL << 21) - 1)); - calculated_ram_end = ((calculated_ram_end) & ~((1ULL << 21) - 1)); + if (USE_DTB && get_memory_region(&total_ram_start, &total_ram_size)){ + calculated_ram_end = total_ram_start + total_ram_size; + + calculated_ram_start = ((uint64_t)&kcode_end) + 0x1; + calculated_ram_start = (calculated_ram_start + ((1ULL << 21) - 1)) & ~((1ULL << 21) - 1); + + calculated_ram_end = calculated_ram_end & ~((1ULL << 21) - 1); } else { total_ram_start = RAM_START; total_ram_size = RAM_SIZE; calculated_ram_end = CRAM_END; calculated_ram_start = CRAM_START; } + calculated_ram_size = calculated_ram_end - calculated_ram_start; } -#define calcvar(var)\ - if (var == 0)\ - calc_ram();\ +#define calcvar(var) \ + if (var == 0) \ + calc_ram(); \ return var; uint64_t get_total_ram(){ @@ -201,4 +216,4 @@ uint64_t get_user_ram_start(){ uint64_t get_user_ram_end(){ calcvar(calculated_ram_end) -} \ No newline at end of file +} diff --git a/kernel/networking/application_layer/csocket_http_client.cpp b/kernel/networking/application_layer/csocket_http_client.cpp index 7085aded..2fe47744 100644 --- a/kernel/networking/application_layer/csocket_http_client.cpp +++ b/kernel/networking/application_layer/csocket_http_client.cpp @@ -6,8 +6,8 @@ extern "C" { -http_client_handle_t http_client_create(uint16_t pid) { - HTTPClient* cli = new HTTPClient(pid); +http_client_handle_t http_client_create(uint16_t pid, const SocketExtraOptions* extra) { + HTTPClient* cli = new HTTPClient(pid, extra); if (!cli) return nullptr; return reinterpret_cast(cli); } diff --git a/kernel/networking/application_layer/csocket_http_client.h b/kernel/networking/application_layer/csocket_http_client.h index cb89ae86..30bd1088 100644 --- a/kernel/networking/application_layer/csocket_http_client.h +++ b/kernel/networking/application_layer/csocket_http_client.h @@ -8,7 +8,7 @@ extern "C" { typedef void* http_client_handle_t; -http_client_handle_t http_client_create(uint16_t pid); +http_client_handle_t http_client_create(uint16_t pid, const SocketExtraOptions* extra); void http_client_destroy(http_client_handle_t h); int32_t http_client_connect_ex(http_client_handle_t h, uint8_t dst_kind, const void *dst, uint16_t port); diff --git a/kernel/networking/application_layer/csocket_http_server.cpp b/kernel/networking/application_layer/csocket_http_server.cpp index 3cb3713a..afe75eba 100644 --- a/kernel/networking/application_layer/csocket_http_server.cpp +++ b/kernel/networking/application_layer/csocket_http_server.cpp @@ -6,8 +6,8 @@ extern "C" { -http_server_handle_t http_server_create(uint16_t pid) { - HTTPServer* srv = new HTTPServer(pid); +http_server_handle_t http_server_create(uint16_t pid, const SocketExtraOptions* extra) { + HTTPServer* srv = new HTTPServer(pid, extra); if (!srv) return nullptr; return reinterpret_cast(srv); } @@ -18,7 +18,7 @@ void http_server_destroy(http_server_handle_t h) { delete srv; } -int32_t http_server_bind_ex(http_server_handle_t h, const SockBindSpec *spec, uint16_t port) { +int32_t http_server_bind(http_server_handle_t h, const SockBindSpec *spec, uint16_t port) { if (!h || !spec) return (int32_t)SOCK_ERR_INVAL; HTTPServer* srv = reinterpret_cast(h); return srv->bind(*spec, port); @@ -55,9 +55,8 @@ int32_t http_server_send_response(http_server_handle_t h, http_connection_handle int32_t http_connection_close(http_connection_handle_t c) { if (!c) return (int32_t)SOCK_ERR_INVAL; TCPSocket* conn = reinterpret_cast(c); - int32_t r = conn->close(); delete conn; - return r; + return (int32_t)SOCK_OK; } int32_t http_server_close(http_server_handle_t h) { diff --git a/kernel/networking/application_layer/csocket_http_server.h b/kernel/networking/application_layer/csocket_http_server.h index 0fda64a7..0b673473 100644 --- a/kernel/networking/application_layer/csocket_http_server.h +++ b/kernel/networking/application_layer/csocket_http_server.h @@ -10,10 +10,10 @@ extern "C" { typedef void* http_server_handle_t; typedef void* http_connection_handle_t; -http_server_handle_t http_server_create(uint16_t pid); +http_server_handle_t http_server_create(uint16_t pid, const SocketExtraOptions* extra); void http_server_destroy(http_server_handle_t srv); -int32_t http_server_bind_ex(http_server_handle_t srv, const struct SockBindSpec *spec, uint16_t port); +int32_t http_server_bind(http_server_handle_t srv, const struct SockBindSpec *spec, uint16_t port); int32_t http_server_listen(http_server_handle_t srv, int backlog); diff --git a/kernel/networking/application_layer/dhcp_daemon.c b/kernel/networking/application_layer/dhcp_daemon.c index 94986b39..5b4d6ece 100644 --- a/kernel/networking/application_layer/dhcp_daemon.c +++ b/kernel/networking/application_layer/dhcp_daemon.c @@ -12,6 +12,7 @@ #include "networking/transport_layer/udp.h" #include "networking/transport_layer/csocket_udp.h" +#include "networking/transport_layer/trans_utils.h" #include "types.h" #include "networking/interface_manager.h" @@ -123,7 +124,7 @@ static void ensure_inventory() { const uint8_t* m = network_get_mac(st.ifindex); if (m) { memcpy(st.mac, m, 6); st.mac_ok = true; } st.needs_inform = (v4->mode == IPV4_CFG_STATIC && v4->ip != 0); - st.sock = udp_socket_create(SOCK_ROLE_SERVER, g_pid_dhcpd); + st.sock = udp_socket_create(SOCK_ROLE_SERVER, g_pid_dhcpd, NULL); SockBindSpec spec; memset(&spec, 0, sizeof(spec)); spec.kind = BIND_L3; @@ -339,24 +340,18 @@ static void dhcp_send_discover_for(dhcp_if_state_t* st) { memset(&req, 0, sizeof(req)); if (st->mac_ok) memcpy(req.mac, st->mac, 6); sizedptr pkt = dhcp_build_packet(&req, DHCPDISCOVER, xid, DHCPK_DISCOVER, true); - net_l4_endpoint dst; - memset(&dst, 0, sizeof(dst)); - dst.ver = IP_VER4; uint32_t bcast = 0xFFFFFFFFu; - memcpy(dst.ip, &bcast, 4); - dst.port = 67; + net_l4_endpoint dst; + make_ep(bcast, 67, IP_VER4, &dst); socket_sendto_udp_ex(st->sock, 0, &dst, 0, (const void*)pkt.ptr, pkt.size); free_sized((void*)pkt.ptr, pkt.size); } static void dhcp_send_request_select_for(dhcp_if_state_t* st, const dhcp_request* base) { sizedptr pkt = dhcp_build_packet(base, DHCPREQUEST, st->trans_xid, DHCPK_SELECT, true); - net_l4_endpoint dst; - memset(&dst, 0, sizeof(dst)); - dst.ver = IP_VER4; uint32_t dip = 0xFFFFFFFFu; - memcpy(dst.ip, &dip, 4); - dst.port = 67; + net_l4_endpoint dst; + make_ep(dip, 67, IP_VER4, &dst); socket_sendto_udp_ex(st->sock, 0, &dst, 0, (const void*)pkt.ptr, pkt.size); free_sized((void*)pkt.ptr, pkt.size); } @@ -374,12 +369,9 @@ static void dhcp_send_renew_for(dhcp_if_state_t* st) { rng_init_random(&rng); st->trans_xid = rng_next32(&rng); sizedptr pkt = dhcp_build_packet(&req, DHCPREQUEST, st->trans_xid, DHCPK_RENEW, st->server_ip_net == 0); - net_l4_endpoint dst; - memset(&dst, 0, sizeof(dst)); - dst.ver = IP_VER4; uint32_t dip = st->server_ip_net ? st->server_ip_net : 0xFFFFFFFFu; - memcpy(dst.ip, &dip, 4); - dst.port = 67; + net_l4_endpoint dst; + make_ep(dip, 67, IP_VER4, &dst); socket_sendto_udp_ex(st->sock, 0, &dst, 0, (const void*)pkt.ptr, pkt.size); free_sized((void*)pkt.ptr, pkt.size); } @@ -397,12 +389,9 @@ static void dhcp_send_rebind_for(dhcp_if_state_t* st) { rng_init_random(&rng); st->trans_xid = rng_next32(&rng); sizedptr pkt = dhcp_build_packet(&req, DHCPREQUEST, st->trans_xid, DHCPK_REBIND, true); - net_l4_endpoint dst; - memset(&dst, 0, sizeof(dst)); - dst.ver = IP_VER4; uint32_t dip = 0xFFFFFFFFu; - memcpy(dst.ip, &dip, 4); - dst.port = 67; + net_l4_endpoint dst; + make_ep(dip, 67, IP_VER4, &dst); socket_sendto_udp_ex(st->sock, 0, &dst, 0, (const void*)pkt.ptr, pkt.size); free_sized((void*)pkt.ptr, pkt.size); } @@ -420,12 +409,9 @@ static void dhcp_send_inform_for(dhcp_if_state_t* st) { rng_init_random(&rng); uint32_t xid = rng_next32(&rng); sizedptr pkt = dhcp_build_packet(&req, DHCPINFORM, xid, DHCPK_INFORM, true); - net_l4_endpoint dst; - memset(&dst, 0, sizeof(dst)); - dst.ver = IP_VER4; uint32_t dip = 0xFFFFFFFFu; - memcpy(dst.ip, &dip, 4); - dst.port = 67; + net_l4_endpoint dst; + make_ep(dip, 67, IP_VER4, &dst); socket_sendto_udp_ex(st->sock, 0, &dst, 0, (const void*)pkt.ptr, pkt.size); free_sized((void*)pkt.ptr, pkt.size); } diff --git a/kernel/networking/application_layer/dhcpv6.c b/kernel/networking/application_layer/dhcpv6.c new file mode 100644 index 00000000..339ad12e --- /dev/null +++ b/kernel/networking/application_layer/dhcpv6.c @@ -0,0 +1,331 @@ +#include "networking/application_layer/dhcpv6.h" + +#include "std/memory.h" +#include "std/string.h" + +static void opt_append(uint8_t*b, uint32_t cap, uint32_t*off, uint16_t code, const void*data, uint16_t len){ + if (!b || !off) return; + if (*off + 4u + len > cap) return; + + uint16_t c = bswap16(code); + uint16_t l = bswap16(len); + + memcpy(b + *off + 0, &c, 2); + memcpy(b + *off + 2, &l, 2); + + if (len && data) memcpy(b + *off + 4, data, len); + + *off += 4u + len; +} + +uint32_t dhcpv6_make_xid24(uint32_t r32){ + uint32_t x = r32 & 0x00FFFFFFu; + if (!x) x = 1; + return x; +} + +uint32_t dhcpv6_iaid_from_mac(const uint8_t mac[6]){ + if (!mac) return 0; + return ((uint32_t)mac[2] << 24) | ((uint32_t)mac[3] << 16) | ((uint32_t)mac[4] << 8) | (uint32_t)mac[5]; +} + +bool dhcpv6_build_message(uint8_t*out, uint32_t out_cap, uint32_t*out_len, const net_runtime_opts_v6_t*rt, const uint8_t mac[6], uint8_t msg_type, dhcpv6_req_kind kind, uint32_t xid24, bool want_address) { + if (!out || !out_len) return false; + if (out_cap < 4) return false; + + xid24 &= 0x00FFFFFFu; + if (!xid24) xid24 = 1; + + out[0] = msg_type; + out[1] = (uint8_t)((xid24 >> 16) & 0xFF); + out[2] = (uint8_t)((xid24 >> 8) & 0xFF); + out[3] = (uint8_t)(xid24 & 0xFF); + + uint32_t off = 4; + + uint8_t duid[10]; + uint16_t duid_type = bswap16(3); + uint16_t hw_type = bswap16(1); + + memcpy(duid + 0, &duid_type, 2); + memcpy(duid + 2, &hw_type, 2); + + if (mac) memcpy(duid + 4, mac, 6); + else memset(duid + 4, 0, 6); + + opt_append(out, out_cap, &off, DHCPV6_OPT_CLIENTID, duid, 10); + + if (rt && rt->server_id_len && rt->server_id_len <= DHCPV6_MAX_SERVER_ID) opt_append(out, out_cap, &off, DHCPV6_OPT_SERVERID, rt->server_id, rt->server_id_len); + + uint8_t elapsed[2] = {0, 0}; + opt_append(out, out_cap, &off, DHCPV6_OPT_ELAPSED, elapsed, 2); + + if (msg_type == DHCPV6_MSG_SOLICIT || msg_type == DHCPV6_MSG_REQUEST || msg_type == DHCPV6_MSG_RENEW || + msg_type == DHCPV6_MSG_REBIND || msg_type == DHCPV6_MSG_CONFIRM || msg_type == DHCPV6_MSG_RELEASE || + msg_type == DHCPV6_MSG_INFORMATION_REQUEST || msg_type == DHCPV6_MSG_DECLINE){ + + uint16_t oro[2]; + uint16_t n = 0; + + oro[n++] = bswap16(DHCPV6_OPT_DNS_SERVERS); + oro[n++] = bswap16(DHCPV6_OPT_NTP_SERVER); + + opt_append(out, out_cap, &off, DHCPV6_OPT_ORO, oro, (uint16_t)(n * 2)); + } + + if (want_address) { + uint32_t iaid = rt ? rt->iaid : 0; + uint32_t iaid_pd = iaid ? (iaid ^ 0xA5A5A5A5u) : 0; + + if (kind == DHCPV6K_SELECT){ + uint8_t iana[12]; + + uint32_t iaid_net = bswap32(iaid); + uint32_t t1_net = 0; + uint32_t t2_net = 0; + + memcpy(iana + 0, &iaid_net, 4); + memcpy(iana + 4, &t1_net, 4); + memcpy(iana + 8, &t2_net, 4); + + opt_append(out, out_cap, &off, DHCPV6_OPT_IA_NA, iana, 12); + + if (rt && rt->pd_prefix_len){ + uint8_t iapd[12]; + + uint32_t pd_iaid_net = bswap32(iaid_pd); + + memcpy(iapd + 0, &pd_iaid_net, 4); + memcpy(iapd + 4, &t1_net, 4); + memcpy(iapd + 8, &t2_net, 4); + + opt_append(out, out_cap, &off, DHCPV6_OPT_IA_PD, iapd, 12); + } + } else { + if (rt && rt->lease){ + uint8_t payload[40]; + uint8_t addr[16]; + + memset(addr, 0, 16); + + uint32_t iaid_net = bswap32(iaid); + uint32_t t1_net = 0; + uint32_t t2_net = 0; + + memcpy(payload + 0, &iaid_net, 4); + memcpy(payload + 4, &t1_net, 4); + memcpy(payload + 8, &t2_net, 4); + + uint16_t code_net = bswap16(DHCPV6_OPT_IAADDR); + uint16_t len_net = bswap16(24); + + memcpy(payload + 12, &code_net, 2); + memcpy(payload + 14, &len_net, 2); + memcpy(payload + 16, addr, 16); + + uint32_t pref_net = bswap32(rt->t1); + uint32_t valid_net = bswap32(rt->lease); + + memcpy(payload + 32, &pref_net, 4); + memcpy(payload + 36, &valid_net, 4); + + opt_append(out, out_cap, &off, DHCPV6_OPT_IA_NA, payload, 40); + } else { + uint8_t iana[12]; + + uint32_t iaid_net = bswap32(iaid); + uint32_t t1_net = 0; + uint32_t t2_net = 0; + + memcpy(iana + 0, &iaid_net, 4); + memcpy(iana + 4, &t1_net, 4); + memcpy(iana + 8, &t2_net, 4); + + opt_append(out, out_cap, &off, DHCPV6_OPT_IA_NA, iana, 12); + } + + if (rt && rt->pd_prefix_len){ + uint8_t payload[41]; + + uint32_t iaid_net = bswap32(iaid_pd); + uint32_t t1_net = 0; + uint32_t t2_net = 0; + + memcpy(payload + 0, &iaid_net, 4); + memcpy(payload + 4, &t1_net, 4); + memcpy(payload + 8, &t2_net, 4); + + uint16_t code_net = bswap16(DHCPV6_OPT_IAPREFIX); + uint16_t len_net = bswap16(25); + + memcpy(payload + 12, &code_net, 2); + memcpy(payload + 14, &len_net, 2); + + uint32_t pref_net = bswap32(rt->pd_preferred_lft); + uint32_t valid_net = bswap32(rt->pd_valid_lft); + + memcpy(payload + 16, &pref_net, 4); + memcpy(payload + 20, &valid_net, 4); + + payload[24] = rt->pd_prefix_len; + memcpy(payload + 25, rt->pd_prefix, 16); + + opt_append(out, out_cap, &off, DHCPV6_OPT_IA_PD, payload, 41); + } + } + } + *out_len = off; + return true; +} + +static bool parse_opts(const uint8_t*opt, uint32_t opt_len, uint32_t expect_iaid, dhcpv6_parsed_t*out){ + uint32_t off = 0; + bool got_addr = false; + + while (off + 4 <= opt_len) { + uint16_t code_net; + uint16_t len_net; + + memcpy(&code_net, opt + off + 0, 2); + memcpy(&len_net, opt + off + 2, 2); + + uint16_t code = bswap16(code_net); + uint16_t len = bswap16(len_net); + + off += 4; + if (off + len >opt_len) break; + + if (code == DHCPV6_OPT_SERVERID && len && len <= DHCPV6_MAX_SERVER_ID){ + memcpy(out->server_id, opt + off, len); + out->server_id_len = len; + out->has_server_id = true; + } + + if (code == DHCPV6_OPT_DNS_SERVERS && len >= 16){ + int n = (int)(len / 16); + if (n > 2) n = 2; + + for (int i = 0; i < n; i++) + memcpy(out->dns[i], opt + off + (uint32_t)i * 16u, 16); + + out->has_dns = true; + } + + if (code == DHCPV6_OPT_NTP_SERVER && len >= 16){ + int n = (int)(len / 16); + if (n > 2) n = 2; + + for (int i = 0; i < n; i++) + memcpy(out->ntp[i], opt + off + (uint32_t)i * 16u, 16); + + out->has_ntp = true; + } + + if (code == DHCPV6_OPT_IA_NA && len >= 12){ + uint32_t iaid_net; + uint32_t t1_net; + uint32_t t2_net; + + memcpy(&iaid_net, opt + off + 0, 4); + memcpy(&t1_net, opt + off + 4, 4); + memcpy(&t2_net, opt + off + 8, 4); + + uint32_t iaid = bswap32(iaid_net); + if (expect_iaid && iaid != expect_iaid){ off += len; continue; } + + out->t1 = bswap32(t1_net); + out->t2 = bswap32(t2_net); + + uint32_t sub = off + 12; + uint32_t end = off + len; + + while (sub + 4 <= end){ + uint16_t sc_net; + uint16_t sl_net; + + memcpy(&sc_net, opt + sub + 0, 2); + memcpy(&sl_net, opt + sub + 2, 2); + + uint16_t sc = bswap16(sc_net); + uint16_t sl = bswap16(sl_net); + + sub += 4; + if (sub + sl > end) break; + + if (sc == DHCPV6_OPT_IAADDR && sl >= 24){ + memcpy(out->addr, opt + sub + 0, 16); + + uint32_t pref_net; + uint32_t valid_net; + + memcpy(&pref_net, opt + sub + 16, 4); + memcpy(&valid_net, opt + sub + 20, 4); + + out->preferred_lft = bswap32(pref_net); + out->valid_lft = bswap32(valid_net); + out->has_addr = true; + got_addr = true; + break; + } + + sub += sl; + } + } + + if (code == DHCPV6_OPT_IA_PD && len >= 12){ + uint32_t sub = off + 12; + uint32_t end = off + len; + + while (sub + 4 <= end){ + uint16_t sc_net; + uint16_t sl_net; + + memcpy(&sc_net, opt + sub + 0, 2); + memcpy(&sl_net, opt + sub + 2, 2); + + uint16_t sc = bswap16(sc_net); + uint16_t sl = bswap16(sl_net); + + sub += 4; + if (sub + sl > end) break; + + if (sc == DHCPV6_OPT_IAPREFIX && sl >= 25){ + uint32_t pref_net; + uint32_t valid_net; + + memcpy(&pref_net, opt + sub + 0, 4); + memcpy(&valid_net, opt + sub + 4, 4); + + out->pd_preferred_lft = bswap32(pref_net); + out->pd_valid_lft = bswap32(valid_net); + out->pd_prefix_len = opt[sub + 8]; + + memcpy(out->pd_prefix, opt + sub + 9, 16); + + out->has_pd = true; + break; + } + + sub += sl; + } + } + + off += len; + } + + return got_addr || out->has_pd || out->has_server_id; +} + +bool dhcpv6_parse_message(const uint8_t*msg, uint32_t msg_len, uint32_t expect_xid24, uint32_t expect_iaid, dhcpv6_parsed_t*out){ + if (!msg || !out) return false; + if (msg_len < 4) return false; + + memset(out, 0, sizeof(*out)); + + out->msg_type = msg[0]; + out->xid24 = ((uint32_t)msg[1] << 16) | ((uint32_t)msg[2] << 8) | (uint32_t)msg[3]; + + if (expect_xid24 && out->xid24 != (expect_xid24 & 0x00FFFFFFu)) return false; + + return parse_opts(msg + 4, msg_len - 4, expect_iaid, out); +} \ No newline at end of file diff --git a/kernel/networking/application_layer/dhcpv6.h b/kernel/networking/application_layer/dhcpv6.h new file mode 100644 index 00000000..9fb690a1 --- /dev/null +++ b/kernel/networking/application_layer/dhcpv6.h @@ -0,0 +1,90 @@ +#pragma once + +#include "types.h" +#include "networking/interface_manager.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + DHCPV6_MSG_SOLICIT = 1, + DHCPV6_MSG_ADVERTISE = 2, + DHCPV6_MSG_REQUEST = 3, + DHCPV6_MSG_CONFIRM = 4, + DHCPV6_MSG_RENEW = 5, + DHCPV6_MSG_REBIND = 6, + DHCPV6_MSG_REPLY = 7, + DHCPV6_MSG_RELEASE = 8, + DHCPV6_MSG_DECLINE = 9, + DHCPV6_MSG_INFORMATION_REQUEST = 11 +}; + +#define DHCPV6_CLIENT_PORT 546 +#define DHCPV6_SERVER_PORT 547 + +#define DHCPV6_OPT_CLIENTID 1 +#define DHCPV6_OPT_SERVERID 2 +#define DHCPV6_OPT_IA_NA 3 +#define DHCPV6_OPT_IAADDR 5 +#define DHCPV6_OPT_ORO 6 +#define DHCPV6_OPT_ELAPSED 8 +#define DHCPV6_OPT_STATUS_CODE 13 +#define DHCPV6_OPT_DNS_SERVERS 23 +#define DHCPV6_OPT_NTP_SERVER 56 +#define DHCPV6_OPT_IA_PD 25 +#define DHCPV6_OPT_IAPREFIX 26 + +#define DHCPV6_MAX_SERVER_ID 128 +#define DHCPV6_MAX_MSG 512 + +typedef enum { + DHCPV6K_SELECT = 0, + DHCPV6K_RENEW = 1, + DHCPV6K_REBIND = 2, + DHCPV6K_CONFIRM = 3, + DHCPV6K_RELEASE = 4, + DHCPV6K_DECLINE = 5 +} dhcpv6_req_kind; + +typedef struct { + uint8_t msg_type; + uint32_t xid24; + + bool has_server_id; + uint16_t server_id_len; + uint8_t server_id[DHCPV6_MAX_SERVER_ID]; + + bool has_addr; + uint8_t addr[16]; + uint32_t preferred_lft; + uint32_t valid_lft; + + uint32_t t1; + uint32_t t2; + + bool has_dns; + uint8_t dns[2][16]; + + bool has_ntp; + uint8_t ntp[2][16]; + + bool has_pd; + uint8_t pd_prefix[16]; + uint8_t pd_prefix_len; + uint32_t pd_preferred_lft; + uint32_t pd_valid_lft; +} dhcpv6_parsed_t; + +uint32_t dhcpv6_make_xid24(uint32_t r32); + +void dhcpv6_duid_ll_from_mac(uint8_t out_duid[10], const uint8_t mac[6]); +uint32_t dhcpv6_iaid_from_mac(const uint8_t mac[6]); + +bool dhcpv6_build_message(uint8_t* out, uint32_t out_cap, uint32_t* out_len, const net_runtime_opts_v6_t* rt, const uint8_t mac[6], uint8_t type, dhcpv6_req_kind kind, uint32_t xid24, bool want_address); + +bool dhcpv6_parse_message(const uint8_t *msg, uint32_t msg_len, uint32_t expect_xid24, uint32_t expect_iaid, dhcpv6_parsed_t *out); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/application_layer/dhcpv6_daemon.c b/kernel/networking/application_layer/dhcpv6_daemon.c new file mode 100644 index 00000000..5032514b --- /dev/null +++ b/kernel/networking/application_layer/dhcpv6_daemon.c @@ -0,0 +1,814 @@ +#include "dhcpv6_daemon.h" + +#include "std/memory.h" +#include "std/string.h" +#include "syscalls/syscalls.h" +#include "process/scheduler.h" +#include "math/rng.h" + +#include "data_struct/linked_list.h" + +#include "networking/interface_manager.h" +#include "networking/network.h" + +#include "networking/application_layer/dhcpv6.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv6_utils.h" + +#include "networking/transport_layer/csocket_udp.h" + +enum { + DHCPV6_S_INIT = 0, + DHCPV6_S_SOLICIT = 1, + DHCPV6_S_REQUEST = 2, + DHCPV6_S_BOUND = 3, + DHCPV6_S_RENEWING = 4, + DHCPV6_S_REBINDING = 5, + DHCPV6_S_CONFIRMING = 6, + DHCPV6_S_RELEASING = 7, + DHCPV6_S_DECLINING = 8 +}; + +typedef struct { + uint8_t ifindex; + uint8_t target_l3_id; + uint8_t bound_linklocal_l3_id; + + uint8_t last_gateway[16]; + uint8_t last_gateway_ok; + + uint8_t mac[6]; + uint8_t mac_ok; + + socket_handle_t sock; + + uint32_t xid24; + + uint32_t retry_left_ms; + uint32_t backoff_ms; + + uint32_t t1_left_ms; + uint32_t t2_left_ms; + uint32_t lease_left_ms; + uint8_t last_state; + uint8_t tx_tries; + uint8_t done; +} dhcpv6_bind_t; + +#define DHCPV6_MAX_INFOREQ_TX 3 +#define DHCPV6_MAX_REQUEST_TX 3 +#define DHCPV6_MAX_OTHER_TX 5 + +static uint16_t g_dhcpv6_pid = 0xFFFF; +static rng_t g_dhcpv6_rng; +static clinkedlist_t* g_dhcpv6_binds = NULL; + +static volatile bool g_force_renew_all = false; +static volatile bool g_force_rebind_all = false; +static volatile bool g_force_confirm_all = false; + +static uint64_t g_force_release_mask = 0; +static uint64_t g_force_decline_mask = 0; + +uint16_t dhcpv6_get_pid() { return g_dhcpv6_pid; } +bool dhcpv6_is_running() { return g_dhcpv6_pid != 0xFFFF; } +void dhcpv6_set_pid(uint16_t pid) { g_dhcpv6_pid = pid; } + +void dhcpv6_force_renew_all() { g_force_renew_all = true; } +void dhcpv6_force_rebind_all() { g_force_rebind_all = true; } +void dhcpv6_force_confirm_all() { g_force_confirm_all = true; } + +static int l3id_to_bit(uint8_t l3_id) { + if (!l3_id) return -1; + if ((l3_id & 0x08) == 0) return -1; + + uint8_t ifx = (uint8_t)((l3_id >> 4) & 0x0F); + uint8_t sl = (uint8_t)(l3_id & 0x03); + int idx = ((int)(ifx - 1) * MAX_IPV6_PER_INTERFACE) + (int)sl; + + if (idx < 0 || idx >= 64) return -1; + return idx; +} + +void dhcpv6_force_release_l3(uint8_t l3_id) { + int b = l3id_to_bit(l3_id); + if (b < 0) return; + g_force_release_mask |= (1ull << (uint64_t)b); +} + +void dhcpv6_force_decline_l3(uint8_t l3_id) { + int b = l3id_to_bit(l3_id); + if (b < 0) return; + g_force_decline_mask |= (1ull << (uint64_t)b); +} + +static void mcast_servers(uint8_t out_ip[16]) { + memset(out_ip, 0, 16); + out_ip[0] = 0xFF; + out_ip[1] = 0x02; + out_ip[14] = 0x01; + out_ip[15] = 0x02; +} + +static uint32_t next_backoff_ms(dhcpv6_bind_t* b) { + if (!b) return 4000; + + if (!b->backoff_ms) b->backoff_ms = 4000; + else { + uint64_t d = (uint64_t)b->backoff_ms * 2u; + if (d > 64000u) d = 64000u; + b->backoff_ms = (uint32_t)d; + } + + uint32_t j = (uint32_t)(rng_next32(&g_dhcpv6_rng) % 2000u); + int64_t v = (int64_t)b->backoff_ms + (int64_t)((int32_t)j - 1000); + if (v < 1000) v = 1000; + + return (uint32_t)v; +} + +static void reset_backoff(dhcpv6_bind_t* b) { + if (!b) return; + b->backoff_ms = 0; + b->retry_left_ms = 0; +} + +static void reset_lease_state(l3_ipv6_interface_t* v6, dhcpv6_bind_t* b) { + if (v6) { + v6->dhcpv6_state = DHCPV6_S_INIT; + v6->runtime_opts_v6.server_id_len = 0; + v6->runtime_opts_v6.lease = 0; + v6->runtime_opts_v6.lease_start_time = 0; + } + if (b) { + b->t1_left_ms = 0; + b->t2_left_ms = 0; + b->lease_left_ms = 0; + b->xid24 = 0; + b->last_state = 0xFF; + b->tx_tries = 0; + reset_backoff(b); + } +} + +static void ensure_binds() { + if (!g_dhcpv6_binds) g_dhcpv6_binds = clinkedlist_create(); + if (!g_dhcpv6_binds) return; + + clinkedlist_node_t* it = g_dhcpv6_binds->head; + while (it) { + clinkedlist_node_t* nxt = it->next; + dhcpv6_bind_t* b = (dhcpv6_bind_t*)it->data; + + bool keep = true; + + if (!b) keep = false; + + l2_interface_t* l2 = NULL; + if (keep) { + l2 = l2_interface_find_by_index(b->ifindex); + if (!l2 || !l2->is_up) keep = false; + } + + l3_ipv6_interface_t* t = NULL; + if (keep) { + t = l3_ipv6_find_by_id(b->target_l3_id); + if (!t) keep = false; + } + + if (keep) { + bool stateful = (t->cfg == IPV6_CFG_DHCPV6); + bool stateless = (t->cfg == IPV6_CFG_SLAAC && t->dhcpv6_stateless); + if (!stateful && !stateless) keep = false; + } + if (keep) if (!t->l2 || !t->l2->is_up) keep = false; + + l3_ipv6_interface_t* llv6 = NULL; + if (keep) { + llv6 = l3_ipv6_find_by_id(b->bound_linklocal_l3_id); + if (!llv6) keep = false; + } + + if (keep) if (llv6->cfg == IPV6_CFG_DISABLE) keep = false; + if (keep) if (llv6->dad_state != IPV6_DAD_OK) keep = false; + if (keep) if (!ipv6_is_linklocal(llv6->ip)) keep = false; + + if (!keep) { + if (t) reset_lease_state(t, b); + + dhcpv6_bind_t* rb = (dhcpv6_bind_t*)clinkedlist_remove(g_dhcpv6_binds, it); + if (rb) { + if (rb->sock) { + socket_close_udp(rb->sock); + socket_destroy_udp(rb->sock); + rb->sock = 0; + } + free_sized(rb, sizeof(*rb)); + } + } + + it = nxt; + } + + uint8_t n = l2_interface_count(); + for (uint8_t ix = 0; ix < n; ix++) { + l2_interface_t* l2 = l2_interface_at(ix); + if (!l2 || !l2->is_up) continue; + + bool already = false; + for (clinkedlist_node_t* it2 = g_dhcpv6_binds->head; it2; it2 = it2->next) { + dhcpv6_bind_t* b = (dhcpv6_bind_t*)it2->data; + if (b && b->ifindex == l2->ifindex) { + already = true; + break; + } + } + if (already) continue; + + l3_ipv6_interface_t* target = NULL; + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; s++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6) continue; + if (!(v6->kind & IPV6_ADDRK_GLOBAL)) continue; + + if (v6->cfg == IPV6_CFG_DHCPV6) { + target = v6; + break; + } + if (v6->cfg == IPV6_CFG_SLAAC && v6->dhcpv6_stateless) { + target = v6; + break; + } + } + if (!target) continue; + + uint8_t ll_l3 = 0; + bool ll_ok = false; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (v6->dad_state != IPV6_DAD_OK) continue; + if (!ipv6_is_linklocal(v6->ip)) continue; + ll_l3 = v6->l3_id; + ll_ok = true; + break; + } + if (!ll_ok) continue; + + dhcpv6_bind_t* b = (dhcpv6_bind_t*)malloc(sizeof(*b)); + if (!b) continue; + + memset(b, 0, sizeof(*b)); + + b->ifindex = l2->ifindex; + b->target_l3_id = target->l3_id; + b->bound_linklocal_l3_id = ll_l3; + + const uint8_t* mac = network_get_mac(b->ifindex); + if (mac) { memcpy(b->mac, mac, 6); b->mac_ok = 1; } + + b->sock = udp_socket_create(SOCKET_SERVER, g_dhcpv6_pid, NULL); + if (!b->sock) { + free_sized(b, sizeof(*b)); + continue; + } + + SockBindSpec spec; + memset(&spec, 0, sizeof(spec)); + spec.kind = BIND_L3; + spec.ver = IP_VER6; + spec.l3_id = b->bound_linklocal_l3_id; + + if (socket_bind_udp_ex(b->sock, &spec, DHCPV6_CLIENT_PORT) != SOCK_OK) { + socket_destroy_udp(b->sock); + b->sock = 0; + free_sized(b, sizeof(*b)); + continue; + } + + uint8_t m[16]; + mcast_servers(m); + (void)l2_ipv6_mcast_join(b->ifindex, m); + + clinkedlist_push_front(g_dhcpv6_binds, b); + } +} + +static void fsm_once(dhcpv6_bind_t* b, uint32_t tick_ms) { + if (!b || !b->mac_ok || !b->sock) return; + if (b->done) return; + + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(b->target_l3_id); + if (!v6) return; + bool stateful = (v6->cfg == IPV6_CFG_DHCPV6); + bool stateless = (v6->cfg == IPV6_CFG_SLAAC && v6->dhcpv6_stateless); + + if (!stateful && !stateless) return; + if (!(v6->kind & IPV6_ADDRK_GLOBAL)) return; + if (stateless && v6->dhcpv6_stateless_done) return; + + if (v6->runtime_opts_v6.lease != 0 && v6->runtime_opts_v6.lease_start_time != 0 && !ipv6_is_unspecified(v6->ip) && v6->dhcpv6_state == DHCPV6_S_INIT) { + uint32_t now_s = get_time(); + uint32_t start_s = v6->runtime_opts_v6.lease_start_time; + uint32_t elapsed_s = (now_s >= start_s) ? (now_s - start_s) : 0; + + uint32_t lease_s = v6->runtime_opts_v6.lease; + if (elapsed_s >= lease_s) { + v6->runtime_opts_v6.lease = 0; + v6->runtime_opts_v6.lease_start_time = 0; + v6->dhcpv6_state = DHCPV6_S_INIT; + } else { + uint32_t left_s = lease_s - elapsed_s; + b->lease_left_ms = left_s * 1000u; + + uint32_t t1_s = v6->runtime_opts_v6.t1; + uint32_t t2_s = v6->runtime_opts_v6.t2; + + if (!t1_s) t1_s = lease_s / 2; + if (!t2_s) t2_s = (lease_s / 8) * 7; + + if (elapsed_s >= t1_s) b->t1_left_ms = 0; + else b->t1_left_ms = (t1_s - elapsed_s) * 1000u; + + if (elapsed_s >= t2_s) b->t2_left_ms = 0; + else b->t2_left_ms = (t2_s - elapsed_s) * 1000u; + + v6->dhcpv6_state = DHCPV6_S_BOUND; + reset_backoff(b); + } + } + + if (b->retry_left_ms > tick_ms) b->retry_left_ms -= tick_ms; + else b->retry_left_ms = 0; + + if (v6->dhcpv6_state == DHCPV6_S_BOUND) { + if (b->t1_left_ms > tick_ms) b->t1_left_ms -= tick_ms; + else b->t1_left_ms = 0; + if (b->t2_left_ms > tick_ms) b->t2_left_ms -= tick_ms; + else b->t2_left_ms = 0; + if (b->lease_left_ms > tick_ms) b->lease_left_ms -= tick_ms; + else b->lease_left_ms = 0; + } + + if (!v6->runtime_opts_v6.iaid) v6->runtime_opts_v6.iaid = dhcpv6_iaid_from_mac(b->mac); + if (!v6->runtime_opts_v6.iaid) v6->runtime_opts_v6.iaid = rng_next32(&g_dhcpv6_rng); + + int bit = l3id_to_bit(v6->l3_id); + bool do_release = false; + bool do_decline = false; + + if (bit >= 0) { + uint64_t m = (1ull << (uint64_t)bit); + + if (g_force_release_mask & m) { + g_force_release_mask &= ~m; + do_release = true; + } + + if (g_force_decline_mask & m) { + g_force_decline_mask &= ~m; + do_decline = true; + } + } + + if (do_release) { + v6->dhcpv6_state = DHCPV6_S_RELEASING; + b->retry_left_ms = 0; + reset_backoff(b); + } else if (do_decline) { + v6->dhcpv6_state = DHCPV6_S_DECLINING; + b->retry_left_ms = 0; + reset_backoff(b); + } + + if (g_force_confirm_all) { + v6->dhcpv6_state = DHCPV6_S_CONFIRMING; + b->retry_left_ms = 0; + reset_backoff(b); + } else if (g_force_rebind_all) { + v6->dhcpv6_state = DHCPV6_S_REBINDING; + b->retry_left_ms = 0; + reset_backoff(b); + } else if (g_force_renew_all) { + v6->dhcpv6_state = DHCPV6_S_RENEWING; + b->retry_left_ms = 0; + reset_backoff(b); + } + + if (v6->gateway[0] || v6->gateway[1]) { + if (!b->last_gateway_ok) { + ipv6_cpy(b->last_gateway, v6->gateway); + b->last_gateway_ok = 1; + } else if (ipv6_cmp(b->last_gateway, v6->gateway) != 0) { + ipv6_cpy(b->last_gateway, v6->gateway); + v6->dhcpv6_state = DHCPV6_S_CONFIRMING; + b->retry_left_ms = 0; + reset_backoff(b); + } + } + + if (v6->dhcpv6_state == DHCPV6_S_INIT) { + if (stateless) { + uint8_t zero16[16] = {0}; + int has_dns = (memcmp(v6->runtime_opts_v6.dns[0], zero16, 16) != 0) || (memcmp(v6->runtime_opts_v6.dns[1], zero16, 16) != 0); + + if (has_dns) { + v6->dhcpv6_stateless_done = 1; + b->retry_left_ms = 0; + reset_backoff(b); + return; + } + + v6->dhcpv6_state = DHCPV6_S_SOLICIT; + b->retry_left_ms = 0; + reset_backoff(b); + return; + } + + v6->dhcpv6_state = DHCPV6_S_SOLICIT; + b->retry_left_ms = 0; + reset_backoff(b); + return; + } + + if (v6->dhcpv6_state == DHCPV6_S_BOUND) { + if (!b->lease_left_ms && v6->runtime_opts_v6.lease) { + v6->dhcpv6_state = DHCPV6_S_SOLICIT; + v6->runtime_opts_v6.server_id_len = 0; + reset_backoff(b); + return; + } + + if (!b->t2_left_ms && b->lease_left_ms) { + v6->dhcpv6_state = DHCPV6_S_REBINDING; + b->retry_left_ms = 0; + reset_backoff(b); + return; + } + + if (!b->t1_left_ms && b->lease_left_ms) { + v6->dhcpv6_state = DHCPV6_S_RENEWING; + b->retry_left_ms = 0; + reset_backoff(b); + return; + } + + return; + } + + if (b->retry_left_ms) return; + if (b->last_state != (uint8_t)v6->dhcpv6_state) { + b->last_state = (uint8_t)v6->dhcpv6_state; + b->tx_tries = 0; + } + + uint8_t type_peek = DHCPV6_MSG_SOLICIT; + + if (stateless) type_peek = DHCPV6_MSG_INFORMATION_REQUEST; + else if (v6->dhcpv6_state == DHCPV6_S_SOLICIT) type_peek = DHCPV6_MSG_SOLICIT; + else if (v6->dhcpv6_state == DHCPV6_S_REQUEST) type_peek = DHCPV6_MSG_REQUEST; + else if (v6->dhcpv6_state == DHCPV6_S_RENEWING) type_peek = DHCPV6_MSG_RENEW; + else if (v6->dhcpv6_state == DHCPV6_S_REBINDING) type_peek = DHCPV6_MSG_REBIND; + else if (v6->dhcpv6_state == DHCPV6_S_CONFIRMING) type_peek = DHCPV6_MSG_CONFIRM; + else if (v6->dhcpv6_state == DHCPV6_S_RELEASING) type_peek = DHCPV6_MSG_RELEASE; + else if (v6->dhcpv6_state == DHCPV6_S_DECLINING) type_peek = DHCPV6_MSG_DECLINE; + + uint8_t lim = DHCPV6_MAX_OTHER_TX; + if (type_peek == DHCPV6_MSG_INFORMATION_REQUEST) lim = DHCPV6_MAX_INFOREQ_TX; + else if (type_peek == DHCPV6_MSG_REQUEST) lim = DHCPV6_MAX_REQUEST_TX; + + if (b->tx_tries >= lim) { + uint8_t zero16[16] = {0}; + int has_dns =(memcmp(v6->runtime_opts_v6.dns[0], zero16, 16) != 0) || (memcmp(v6->runtime_opts_v6.dns[1], zero16, 16) != 0); + + if (!has_dns) { + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) { + memcpy(v6->runtime_opts_v6.dns[0], v6->gateway, 16); + } + } + + if (stateless){ + v6->dhcpv6_stateless_done = 1; + v6->dhcpv6_state = DHCPV6_S_INIT; + b->retry_left_ms = 0; + reset_backoff(b); + return; + } + + b->done = 1; + v6->dhcpv6_state = DHCPV6_S_INIT; + reset_backoff(b); + return; + } + uint8_t msg[DHCPV6_MAX_MSG]; + uint32_t msg_len = 0; + + b->xid24 = dhcpv6_make_xid24(rng_next32(&g_dhcpv6_rng)); + + uint8_t type = DHCPV6_MSG_SOLICIT; + dhcpv6_req_kind kind = DHCPV6K_SELECT; + + if (stateless) { + type = DHCPV6_MSG_INFORMATION_REQUEST; + kind = DHCPV6K_SELECT; + } else if (v6->dhcpv6_state == DHCPV6_S_SOLICIT) { + type = DHCPV6_MSG_SOLICIT; + kind = DHCPV6K_SELECT; + } else if (v6->dhcpv6_state == DHCPV6_S_REQUEST) { + type = DHCPV6_MSG_REQUEST; + kind = DHCPV6K_SELECT; + } else if (v6->dhcpv6_state == DHCPV6_S_RENEWING) { + type = DHCPV6_MSG_RENEW; + kind = DHCPV6K_RENEW; + } else if (v6->dhcpv6_state == DHCPV6_S_REBINDING) { + type = DHCPV6_MSG_REBIND; + kind = DHCPV6K_REBIND; + } else if (v6->dhcpv6_state == DHCPV6_S_CONFIRMING) { + type = DHCPV6_MSG_CONFIRM; + kind = DHCPV6K_CONFIRM; + } else if (v6->dhcpv6_state == DHCPV6_S_RELEASING) { + type = DHCPV6_MSG_RELEASE; + kind = DHCPV6K_RELEASE; + } else if (v6->dhcpv6_state == DHCPV6_S_DECLINING) { + type = DHCPV6_MSG_DECLINE; + kind = DHCPV6K_DECLINE; + } else { + type = DHCPV6_MSG_SOLICIT; + kind = DHCPV6K_SELECT; + v6->dhcpv6_state = DHCPV6_S_SOLICIT; + } + bool want_addr = !stateless; + + if (!dhcpv6_build_message(msg, sizeof(msg), &msg_len, &v6->runtime_opts_v6, b->mac, type, kind, b->xid24, want_addr)) { + b->retry_left_ms = next_backoff_ms(b); + return; + } + + net_l4_endpoint dst; + memset(&dst, 0, sizeof(dst)); + dst.ver = IP_VER6; + mcast_servers(dst.ip); + dst.port = DHCPV6_SERVER_PORT; + + (void)socket_sendto_udp_ex(b->sock, DST_ENDPOINT, &dst, 0, (const void*)msg, (uint64_t)msg_len); + b->tx_tries++; + + uint8_t rx[DHCPV6_MAX_MSG]; + uint32_t rx_len = 0; + + net_l4_endpoint src; + memset(&src, 0, sizeof(src)); + + bool got = false; + uint32_t waited = 0; + + while (waited < 250) { + int64_t r = socket_recvfrom_udp_ex(b->sock, rx, sizeof(rx), &src); + if (r > 0) { + if (src.port != DHCPV6_SERVER_PORT) { + msleep(50); + waited += 50; + continue; + } + rx_len = (uint32_t)r; + got = true; + break; + } + msleep(50); + waited += 50; + } + + if (got && rx_len >= 4) { + uint32_t rid24 = ((uint32_t)rx[1] << 16) | ((uint32_t)rx[2] << 8) | (uint32_t)rx[3]; + uint32_t expect = dhcpv6_make_xid24(b->xid24); + + if (rid24 == (expect & 0x00FFFFFFu)) { + dhcpv6_parsed_t p; + + if (dhcpv6_parse_message(rx, rx_len, expect, v6->runtime_opts_v6.iaid, &p)) { + if (p.msg_type == DHCPV6_MSG_ADVERTISE && v6->dhcpv6_state == DHCPV6_S_SOLICIT) { + if (p.has_server_id) { + v6->runtime_opts_v6.server_id_len = p.server_id_len; + if (p.server_id_len) memcpy(v6->runtime_opts_v6.server_id, p.server_id, p.server_id_len); + } + + if (p.has_dns) memcpy(v6->runtime_opts_v6.dns, p.dns, sizeof(v6->runtime_opts_v6.dns)); + if (p.has_ntp) memcpy(v6->runtime_opts_v6.ntp, p.ntp, sizeof(v6->runtime_opts_v6.ntp)); + uint8_t zero16[16] = {0}; + int has_dns = (memcmp(v6->runtime_opts_v6.dns[0], zero16, 16) != 0) || (memcmp(v6->runtime_opts_v6.dns[1], zero16, 16) != 0); + + if (!has_dns) { + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) { + memcpy(v6->runtime_opts_v6.dns[0], v6->gateway, 16); + } + } + + if (p.has_pd) { + ipv6_cpy(v6->runtime_opts_v6.pd_prefix, p.pd_prefix); + v6->runtime_opts_v6.pd_prefix_len = p.pd_prefix_len; + v6->runtime_opts_v6.pd_preferred_lft = p.pd_preferred_lft; + v6->runtime_opts_v6.pd_valid_lft = p.pd_valid_lft; + } + + if (p.t1) v6->runtime_opts_v6.t1 = p.t1; + if (p.t2) v6->runtime_opts_v6.t2 = p.t2; + + if (p.has_addr) { + uint8_t gw[16]; + + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) ipv6_cpy(gw, v6->gateway); + else memset(gw, 0, 16); + + (void)l3_ipv6_update(v6->l3_id, p.addr, 128, gw, IPV6_CFG_DHCPV6, v6->kind); + + uint32_t lease_s = p.valid_lft; + v6->runtime_opts_v6.lease = lease_s; + v6->runtime_opts_v6.lease_start_time = get_time(); + + uint32_t t1_s = v6->runtime_opts_v6.t1; + uint32_t t2_s = v6->runtime_opts_v6.t2; + + if (!t1_s) t1_s = lease_s / 2; + if (!t2_s) t2_s = (lease_s / 8) * 7; + + b->t1_left_ms = t1_s * 1000u; + b->t2_left_ms = t2_s * 1000u; + b->lease_left_ms = lease_s * 1000u; + } + + v6->dhcpv6_state = DHCPV6_S_REQUEST; + b->retry_left_ms = 0; + reset_backoff(b); + } else if (p.msg_type == DHCPV6_MSG_REPLY) { + if (stateless) { + if (p.has_dns) memcpy(v6->runtime_opts_v6.dns, p.dns, sizeof(v6->runtime_opts_v6.dns)); + if (p.has_ntp) memcpy(v6->runtime_opts_v6.ntp, p.ntp, sizeof(v6->runtime_opts_v6.ntp)); + + uint8_t zero16[16] = {0}; + int has_dns = (memcmp(v6->runtime_opts_v6.dns[0], zero16, 16) != 0) || (memcmp(v6->runtime_opts_v6.dns[1], zero16, 16) != 0); + + if (!has_dns) { + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) { + memcpy(v6->runtime_opts_v6.dns[0], v6->gateway, 16); + } + } + + v6->dhcpv6_stateless_done = 1; + reset_backoff(b); + return; + } + if (v6->dhcpv6_state == DHCPV6_S_REQUEST || v6->dhcpv6_state == DHCPV6_S_RENEWING || v6->dhcpv6_state == DHCPV6_S_REBINDING || v6->dhcpv6_state == DHCPV6_S_CONFIRMING) { + if (p.has_server_id) { + v6->runtime_opts_v6.server_id_len = p.server_id_len; + if (p.server_id_len) memcpy(v6->runtime_opts_v6.server_id, p.server_id, p.server_id_len); + } + + if (p.has_dns) memcpy(v6->runtime_opts_v6.dns, p.dns, sizeof(v6->runtime_opts_v6.dns)); + if (p.has_ntp) memcpy(v6->runtime_opts_v6.ntp, p.ntp, sizeof(v6->runtime_opts_v6.ntp)); + uint8_t zero16[16] = {0}; + int has_dns = (memcmp(v6->runtime_opts_v6.dns[0], zero16, 16) != 0) || (memcmp(v6->runtime_opts_v6.dns[1], zero16, 16) != 0); + + if (!has_dns) { + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) { + memcpy(v6->runtime_opts_v6.dns[0], v6->gateway, 16); + } + } + + if (p.has_pd) { + ipv6_cpy(v6->runtime_opts_v6.pd_prefix, p.pd_prefix); + v6->runtime_opts_v6.pd_prefix_len = p.pd_prefix_len; + v6->runtime_opts_v6.pd_preferred_lft = p.pd_preferred_lft; + v6->runtime_opts_v6.pd_valid_lft = p.pd_valid_lft; + } + + if (p.t1) v6->runtime_opts_v6.t1 = p.t1; + if (p.t2) v6->runtime_opts_v6.t2 = p.t2; + + if (p.has_addr) { + uint8_t gw[16]; + + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) ipv6_cpy(gw, v6->gateway); + else memset(gw, 0, 16); + + (void)l3_ipv6_update(v6->l3_id, p.addr, 128, gw, IPV6_CFG_DHCPV6, v6->kind); + + uint32_t lease_s = p.valid_lft; + v6->runtime_opts_v6.lease = lease_s; + v6->runtime_opts_v6.lease_start_time = get_time(); + + uint32_t t1_s = v6->runtime_opts_v6.t1; + uint32_t t2_s = v6->runtime_opts_v6.t2; + + if (!t1_s) t1_s = lease_s / 2; + if (!t2_s) t2_s = (lease_s / 8) * 7; + + b->t1_left_ms = t1_s * 1000u; + b->t2_left_ms = t2_s * 1000u; + b->lease_left_ms = lease_s * 1000u; + } + + v6->dhcpv6_state = DHCPV6_S_BOUND; + reset_backoff(b); + } else if (v6->dhcpv6_state == DHCPV6_S_SOLICIT) { + if (p.has_server_id) { + v6->runtime_opts_v6.server_id_len = p.server_id_len; + if (p.server_id_len) memcpy(v6->runtime_opts_v6.server_id, p.server_id, p.server_id_len); + } + + if (p.has_dns) memcpy(v6->runtime_opts_v6.dns, p.dns, sizeof(v6->runtime_opts_v6.dns)); + if (p.has_ntp) memcpy(v6->runtime_opts_v6.ntp, p.ntp, sizeof(v6->runtime_opts_v6.ntp)); + uint8_t zero16[16] = {0}; + int has_dns = (memcmp(v6->runtime_opts_v6.dns[0], zero16, 16) != 0) || (memcmp(v6->runtime_opts_v6.dns[1], zero16, 16) != 0); + + if (!has_dns) { + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) { + memcpy(v6->runtime_opts_v6.dns[0], v6->gateway, 16); + } + } + + if (p.has_pd) { + ipv6_cpy(v6->runtime_opts_v6.pd_prefix, p.pd_prefix); + v6->runtime_opts_v6.pd_prefix_len = p.pd_prefix_len; + v6->runtime_opts_v6.pd_preferred_lft = p.pd_preferred_lft; + v6->runtime_opts_v6.pd_valid_lft = p.pd_valid_lft; + } + + if (p.t1) v6->runtime_opts_v6.t1 = p.t1; + if (p.t2) v6->runtime_opts_v6.t2 = p.t2; + + if (p.has_addr) { + uint8_t gw[16]; + + if (!ipv6_is_unspecified(v6->gateway) && !ipv6_is_multicast(v6->gateway)) ipv6_cpy(gw, v6->gateway); + else memset(gw, 0, 16); + + (void)l3_ipv6_update(v6->l3_id, p.addr, 128, gw, IPV6_CFG_DHCPV6, v6->kind); + + uint32_t lease_s = p.valid_lft; + v6->runtime_opts_v6.lease = lease_s; + v6->runtime_opts_v6.lease_start_time = get_time(); + + uint32_t t1_s = v6->runtime_opts_v6.t1; + uint32_t t2_s = v6->runtime_opts_v6.t2; + + if (!t1_s) t1_s = lease_s / 2; + if (!t2_s) t2_s = (lease_s / 8) * 7; + + b->t1_left_ms = t1_s * 1000u; + b->t2_left_ms = t2_s * 1000u; + b->lease_left_ms = lease_s * 1000u; + } + + v6->dhcpv6_state = DHCPV6_S_BOUND; + reset_backoff(b); + } else if (v6->dhcpv6_state == DHCPV6_S_RELEASING || v6->dhcpv6_state == DHCPV6_S_DECLINING) { + v6->runtime_opts_v6.lease = 0; + v6->runtime_opts_v6.lease_start_time = 0; + + b->t1_left_ms = 0; + b->t2_left_ms = 0; + b->lease_left_ms = 0; + + v6->dhcpv6_state = DHCPV6_S_INIT; + reset_backoff(b); + } + } + } + } + } + + b->retry_left_ms = next_backoff_ms(b); +} + +int dhcpv6_daemon_entry(int argc, char* argv[]) { + (void)argc; + (void)argv; + + g_dhcpv6_pid = (uint16_t)get_current_proc_pid(); + dhcpv6_set_pid(g_dhcpv6_pid); + + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&g_dhcpv6_rng, virt_timer); + + const uint32_t tick_ms = 250; + + for (;;) { + ensure_binds(); + + if (g_dhcpv6_binds) { + for (clinkedlist_node_t* it = g_dhcpv6_binds->head; it; it = it->next) { + dhcpv6_bind_t* b = (dhcpv6_bind_t*)it->data; + if (b) fsm_once(b, tick_ms); + } + } + + if (g_force_renew_all) g_force_renew_all = false; + if (g_force_rebind_all) g_force_rebind_all = false; + if (g_force_confirm_all) g_force_confirm_all = false; + + msleep(tick_ms); + } +} \ No newline at end of file diff --git a/kernel/networking/application_layer/dhcpv6_daemon.h b/kernel/networking/application_layer/dhcpv6_daemon.h new file mode 100644 index 00000000..6a2c7675 --- /dev/null +++ b/kernel/networking/application_layer/dhcpv6_daemon.h @@ -0,0 +1,24 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int dhcpv6_daemon_entry(int argc, char* argv[]); + +uint16_t dhcpv6_get_pid(); +bool dhcpv6_is_running(); +void dhcpv6_set_pid(uint16_t pid); + +void dhcpv6_force_renew_all(); +void dhcpv6_force_rebind_all(); +void dhcpv6_force_confirm_all(); + +void dhcpv6_force_release_l3(uint8_t l3_id); +void dhcpv6_force_decline_l3(uint8_t l3_id); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/application_layer/dns.c b/kernel/networking/application_layer/dns.c deleted file mode 100644 index dd4ac19d..00000000 --- a/kernel/networking/application_layer/dns.c +++ /dev/null @@ -1,265 +0,0 @@ -#include "dns.h" -#include "std/memory.h" -#include "random/random.h" -#include "process/scheduler.h" -#include "types.h" -#include "networking/internet_layer/ipv4.h" -#include "networking/interface_manager.h" -#include "dns_daemon.h" -#include "syscalls/syscalls.h" - - -static uint32_t encode_dns_qname(uint8_t* dst, const char* name){ - uint32_t index = 0; - uint32_t label_len = 0; - uint32_t label_pos = 0; - dst[index++] = 0; - while (*name) { - if (*name == '.') { dst[label_pos] = (uint8_t)label_len; label_len = 0; label_pos = index; dst[index++] = 0; name++; continue; } - dst[index++] = (uint8_t)(*name); label_len++; name++; - } - dst[label_pos] = (uint8_t)label_len; - dst[index++] = 0; - return index; -} - -static uint32_t skip_dns_name(const uint8_t* message, uint32_t message_len, uint32_t offset){ - if (offset >= message_len) return message_len + 1; - uint32_t cursor = offset; - while (cursor < message_len) { - uint8_t len = message[cursor++]; - if (len == 0) break; - if ((len & 0xC0) == 0xC0) { if (cursor >= message_len) return message_len + 1; cursor++; break; } - cursor += len; - if (cursor > message_len) return message_len + 1; - } - return cursor; -} - -static dns_result_t parse_dns_a_record(uint8_t* buffer, uint32_t buffer_len, uint16_t message_id, uint32_t* out_ip){ - if (buffer_len < 12) return DNS_ERR_FORMAT; - if (rd_be16(buffer+0) != message_id) return DNS_ERR_FORMAT; - uint16_t flags = rd_be16(buffer+2); - uint16_t question_count = rd_be16(buffer+4); - uint16_t answer_count = rd_be16(buffer+6); - if ((flags & 0x000F) == 3) return DNS_ERR_NXDOMAIN; - uint32_t offset = 12; - for (uint16_t i = 0; i < question_count; ++i){ - offset = skip_dns_name(buffer, buffer_len, offset); - if (offset + 4 > buffer_len) return DNS_ERR_FORMAT; - offset += 4; - } - for (uint16_t i = 0; i < answer_count; ++i){ - offset = skip_dns_name(buffer, buffer_len, offset); - if (offset + 10 > buffer_len) return DNS_ERR_FORMAT; - uint16_t type = rd_be16(buffer+offset+0); - uint16_t klass = rd_be16(buffer+offset+2); - uint16_t rdlength = rd_be16(buffer+offset+8); - offset += 10; - if (offset + rdlength > buffer_len) return DNS_ERR_FORMAT; - if (type == 1 && klass == 1 && rdlength == 4){ - uint32_t ip_host = rd_be32(buffer+offset); - *out_ip = ip_host; - return DNS_OK; - } - offset += rdlength; - } - return DNS_ERR_NO_ANSWER; -} - -static dns_result_t parse_dns_aaaa_record(uint8_t* buffer, uint32_t buffer_len, uint16_t message_id, uint8_t out_ipv6[16]){ - if (buffer_len < 12) return DNS_ERR_FORMAT; - if (rd_be16(buffer+0) != message_id) return DNS_ERR_FORMAT; - uint16_t flags = rd_be16(buffer+2); - uint16_t question_count = rd_be16(buffer+4); - uint16_t answer_count = rd_be16(buffer+6); - if ((flags & 0x000F) == 3) return DNS_ERR_NXDOMAIN; - uint32_t offset = 12; - for (uint16_t i = 0; i < question_count; ++i){ - offset = skip_dns_name(buffer, buffer_len, offset); - if (offset + 4 > buffer_len) return DNS_ERR_FORMAT; - offset += 4; - } - for (uint16_t i = 0; i < answer_count; ++i){ - offset = skip_dns_name(buffer, buffer_len, offset); - if (offset + 10 > buffer_len) return DNS_ERR_FORMAT; - uint16_t type = rd_be16(buffer+offset+0); - uint16_t klass = rd_be16(buffer+offset+2); - uint16_t rdlength = rd_be16(buffer+offset+8); - offset += 10; - if (offset + rdlength > buffer_len) return DNS_ERR_FORMAT; - if (type == 28 && klass == 1 && rdlength == 16){ - memcpy(out_ipv6, buffer+offset, 16); - return DNS_OK; - } - offset += rdlength; - } - return DNS_ERR_NO_ANSWER; -} - -static dns_result_t perform_dns_query_once_a(socket_handle_t sock, uint32_t dns_ip_host, const char* name, uint32_t timeout_ms, uint32_t* out_ip){ - uint8_t request_buffer[512]; memset(request_buffer,0,sizeof(request_buffer)); - rng_t rng; rng_init_random(&rng); - uint16_t message_id = (uint16_t)(rng_next32(&rng) & 0xFFFF); - wr_be16(request_buffer+0, message_id); - wr_be16(request_buffer+2, 0x0100); - wr_be16(request_buffer+4, 1); - uint32_t offset = 12; - offset += encode_dns_qname(request_buffer+offset, name); - wr_be16(request_buffer+offset+0, 1); - wr_be16(request_buffer+offset+2, 1); - offset += 4; - - net_l4_endpoint dst = {0}; - dst.ver = IP_VER4; - memcpy(dst.ip, &dns_ip_host, 4); - dst.port = 53; - - int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, request_buffer, offset); - if (sent < 0) return DNS_ERR_SEND; - - uint32_t waited_ms = 0; - while (waited_ms < timeout_ms){ - uint8_t response_buffer[512]; - net_l4_endpoint source; - int64_t received = socket_recvfrom_udp_ex(sock, response_buffer, sizeof(response_buffer), &source); - if (received > 0 && source.port == 53 && source.ver == IP_VER4 && *(uint32_t*)source.ip == dns_ip_host){ - uint32_t ip_host; - dns_result_t pr = parse_dns_a_record(response_buffer, (uint32_t)received, message_id, &ip_host); - if (pr == DNS_OK){ *out_ip = ip_host; return DNS_OK; } - if (pr == DNS_ERR_NXDOMAIN) return pr; - } else { - msleep(50); - waited_ms += 50; - } - } - return DNS_ERR_TIMEOUT; -} - -static dns_result_t perform_dns_query_once_aaaa(socket_handle_t sock, uint32_t dns_ip_host, const char* name, uint32_t timeout_ms, uint8_t out_ipv6[16]){ - uint8_t request_buffer[512]; memset(request_buffer, 0, sizeof(request_buffer)); - rng_t rng; rng_init_random(&rng); - uint16_t message_id = (uint16_t)(rng_next32(&rng) & 0xFFFF); - wr_be16(request_buffer+0, message_id); - wr_be16(request_buffer+2, 0x0100); - wr_be16(request_buffer+4, 1); - uint32_t offset = 12; - offset += encode_dns_qname(request_buffer+offset, name); - wr_be16(request_buffer+offset+0, 28); - wr_be16(request_buffer+offset+2, 1); - offset += 4; - - net_l4_endpoint dst = {0}; - dst.ver = IP_VER4; - memcpy(dst.ip, &dns_ip_host, 4); - dst.port = 53; - - int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, request_buffer, offset); - if (sent < 0) return DNS_ERR_SEND; - - uint32_t waited_ms = 0; - while (waited_ms < timeout_ms){ - uint8_t response_buffer[512]; - net_l4_endpoint source; - int64_t received = socket_recvfrom_udp_ex(sock, response_buffer, sizeof(response_buffer), &source); - if (received > 0 && source.port == 53 && source.ver == IP_VER4 && *(uint32_t*)source.ip == dns_ip_host){ - dns_result_t pr = parse_dns_aaaa_record(response_buffer, (uint32_t)received, message_id, out_ipv6); - if (pr == DNS_OK) return DNS_OK; - if (pr == DNS_ERR_NXDOMAIN) return pr; - } else { - msleep(50); - waited_ms += 50; - } - } - return DNS_ERR_TIMEOUT; -} - -static bool pick_dns_on_l3(uint8_t l3_id, uint32_t* out_primary, uint32_t* out_secondary){ - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(l3_id); - if (!v4) return false; - uint32_t p = v4->runtime_opts_v4.dns[0]; - uint32_t s = v4->runtime_opts_v4.dns[1]; - if (out_primary) *out_primary = p; - if (out_secondary) *out_secondary = s; - return (p != 0) || (s != 0); -} - -static bool pick_dns_first_iface(uint8_t* out_l3, uint32_t* out_primary, uint32_t* out_secondary){ - uint8_t n = l2_interface_count(); - for (uint8_t i = 0; i < n; ++i){ - l2_interface_t* l2 = l2_interface_at(i); - if (!l2) continue; - for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s){ - l3_ipv4_interface_t* v4 = l2->l3_v4[s]; - if (!v4 || v4->mode == IPV4_CFG_DISABLED) continue; - - uint32_t p = v4->runtime_opts_v4.dns[0]; - uint32_t q = v4->runtime_opts_v4.dns[1]; - if (p || q){ - if (out_l3) *out_l3 = v4->l3_id; - if (out_primary) *out_primary = p; - if (out_secondary)*out_secondary= q; - return true; - } - } - } - return false; -} - -static dns_result_t query_with_selection_a(uint32_t primary, uint32_t secondary, dns_server_sel_t which, const char* hostname, uint32_t timeout_ms, uint32_t* out_ip){ - if (which == DNS_USE_PRIMARY && primary == 0) return DNS_ERR_NO_DNS; - if (which == DNS_USE_SECONDARY && secondary == 0) return DNS_ERR_NO_DNS; - if (which == DNS_USE_BOTH && primary == 0 && secondary == 0) return DNS_ERR_NO_DNS; - socket_handle_t sock = dns_socket_handle(); - if (sock == 0) return DNS_ERR_SOCKET; - dns_result_t res = DNS_ERR_NO_DNS; - if (which == DNS_USE_PRIMARY) res = perform_dns_query_once_a(sock, primary, hostname, timeout_ms, out_ip); - else if (which == DNS_USE_SECONDARY) res = perform_dns_query_once_a(sock, secondary, hostname, timeout_ms, out_ip); - else { - res = perform_dns_query_once_a(sock, primary ? primary : secondary, hostname, timeout_ms, out_ip); - if (res != DNS_OK && secondary && secondary != primary) res = perform_dns_query_once_a(sock, secondary, hostname, timeout_ms, out_ip); - } - return res; -} - -static dns_result_t query_with_selection_aaaa(uint32_t primary, uint32_t secondary, dns_server_sel_t which, const char* hostname, uint32_t timeout_ms, uint8_t out_ipv6[16]){ - if (which == DNS_USE_PRIMARY && primary == 0) return DNS_ERR_NO_DNS; - if (which == DNS_USE_SECONDARY && secondary == 0) return DNS_ERR_NO_DNS; - if (which == DNS_USE_BOTH && primary == 0 && secondary == 0) return DNS_ERR_NO_DNS; - socket_handle_t sock = dns_socket_handle(); - if (sock == 0) return DNS_ERR_SOCKET; - dns_result_t res = DNS_ERR_NO_DNS; - if (which == DNS_USE_PRIMARY) res = perform_dns_query_once_aaaa(sock, primary, hostname, timeout_ms, out_ipv6); - else if (which == DNS_USE_SECONDARY) res = perform_dns_query_once_aaaa(sock, secondary, hostname, timeout_ms, out_ipv6); - else { - res = perform_dns_query_once_aaaa(sock, primary ? primary : secondary, hostname, timeout_ms, out_ipv6); - if (res != DNS_OK && secondary && secondary != primary) res = perform_dns_query_once_aaaa(sock, secondary, hostname, timeout_ms, out_ipv6); - } - return res; -} - -dns_result_t dns_resolve_a(const char* hostname, uint32_t* out_ip, dns_server_sel_t which, uint32_t timeout_ms){ - uint8_t l3 = 0; - uint32_t p = 0, s = 0; - if (!pick_dns_first_iface(&l3, &p, &s)) return DNS_ERR_NO_DNS; - return query_with_selection_a(p, s, which, hostname, timeout_ms, out_ip); -} - -dns_result_t dns_resolve_a_on_l3(uint8_t l3_id, const char* hostname, uint32_t* out_ip, dns_server_sel_t which, uint32_t timeout_ms){ - uint32_t p = 0, s = 0; - if (!pick_dns_on_l3(l3_id, &p, &s)) return DNS_ERR_NO_DNS; - return query_with_selection_a(p, s, which, hostname, timeout_ms, out_ip); -} - -dns_result_t dns_resolve_aaaa(const char* hostname, uint8_t out_ipv6[16], dns_server_sel_t which, uint32_t timeout_ms){ - uint8_t l3 = 0; - uint32_t p = 0, s = 0; - if (!pick_dns_first_iface(&l3, &p, &s)) return DNS_ERR_NO_DNS; - return query_with_selection_aaaa(p, s, which, hostname, timeout_ms, out_ipv6); -} - -dns_result_t dns_resolve_aaaa_on_l3(uint8_t l3_id, const char* hostname, uint8_t out_ipv6[16], dns_server_sel_t which, uint32_t timeout_ms){ - uint32_t p = 0, s = 0; - if (!pick_dns_on_l3(l3_id, &p, &s)) return DNS_ERR_NO_DNS; - return query_with_selection_aaaa(p, s, which, hostname, timeout_ms, out_ipv6); -} diff --git a/kernel/networking/application_layer/dns/dns.c b/kernel/networking/application_layer/dns/dns.c new file mode 100644 index 00000000..d9262a63 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns.c @@ -0,0 +1,545 @@ +#include "dns.h" +#include "dns_mdns.h" +#include "dns_cache.h" +#include "std/std.h" +#include "math/math.h" +#include "process/scheduler.h" +#include "types.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv6_utils.h" + +#include "networking/interface_manager.h" +#include "dns_daemon.h" +#include "syscalls/syscalls.h" +#include "networking/transport_layer/trans_utils.h" + +#define MDNS_TIMEOUT_A_MS 500u +#define MDNS_TIMEOUT_AAAA_MS 300u + +static bool dns_is_local_name(const char* hostname) { + if (!hostname) return false; + uint32_t nlen = strlen(hostname); + if (nlen < 6u) return false; + if (strncmp(hostname +(nlen - 6u), ".local", 6) != 0)return false; + return true; +} + +static dns_result_t dns_write_qname(uint8_t* buf, uint32_t buf_len, uint32_t* offset, const char* name) { + if (!buf || !offset || !name) return DNS_ERR_FORMAT; + uint32_t off = *offset; + if (off >= buf_len) return DNS_ERR_FORMAT; + uint32_t label_len = 0; + uint32_t label_pos = off; + buf[off++] = 0; + for (const char* p = name; *p; ++p) { + char c = *p; + if (c =='.') { + if (!label_len || label_len > 63u) return DNS_ERR_FORMAT; + buf[label_pos] = (uint8_t)label_len; + label_len = 0; + label_pos = off; + if (off >= buf_len) return DNS_ERR_FORMAT; + buf[off++] = 0; + continue; + } + if (label_len >= 63u) return DNS_ERR_FORMAT; + if (off >= buf_len) return DNS_ERR_FORMAT; + buf[off++] = (uint8_t)c; + label_len++; + } + if (!label_len || label_len > 63u) return DNS_ERR_FORMAT; + buf[label_pos] = (uint8_t)label_len; + if (off >= buf_len) return DNS_ERR_FORMAT; + buf[off++]= 0; + *offset = off; + return DNS_OK; +} + +static uint32_t skip_dns_name(const uint8_t* message, uint32_t message_len, uint32_t offset){ + if (offset >= message_len) return message_len + 1; + uint32_t cursor = offset; + while (cursor < message_len) { + uint8_t len = message[cursor++]; + if (len == 0) break; + if ((len & 0xC0) == 0xC0) { + if (cursor >= message_len) return message_len + 1; + cursor++; + break; + } + cursor += len; + if (cursor > message_len) return message_len + 1; + } + return cursor; +} + +static dns_result_t parse_dns_a_record(uint8_t* buffer, uint32_t buffer_len, uint16_t message_id, uint32_t* out_ip, uint32_t* out_ttl_s){ + if (buffer_len < 12) return DNS_ERR_FORMAT; + if (rd_be16(buffer+0) != message_id) return DNS_ERR_FORMAT; + uint16_t flags = rd_be16(buffer+2); + uint16_t question_count = rd_be16(buffer+4); + uint16_t answer_count = rd_be16(buffer+6); + if ((flags & 0x000F) == 3) return DNS_ERR_NXDOMAIN; + uint32_t offset = 12; + for (uint16_t i = 0; i < question_count; ++i){ + offset = skip_dns_name(buffer, buffer_len, offset); + if (offset + 4 > buffer_len) return DNS_ERR_FORMAT; + offset += 4; + } + for (uint16_t i = 0; i < answer_count; ++i){ + offset = skip_dns_name(buffer, buffer_len, offset); + if (offset + 10 > buffer_len) return DNS_ERR_FORMAT; + uint16_t type = rd_be16(buffer+offset+0); + uint16_t klass = rd_be16(buffer+offset+2); + uint32_t ttl_s = rd_be32(buffer+offset+4); + uint16_t rdlength = rd_be16(buffer+offset+8); + offset += 10; + if (offset + rdlength > buffer_len) return DNS_ERR_FORMAT; + if (type == 1 && klass == 1 && rdlength == 4){ + uint32_t ip_host = rd_be32(buffer+offset); + *out_ip = ip_host; + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; + } + offset += rdlength; + } + return DNS_ERR_NO_ANSWER; +} + +static dns_result_t parse_dns_aaaa_record(uint8_t* buffer, uint32_t buffer_len, uint16_t message_id, uint8_t out_ipv6[16], uint32_t* out_ttl_s){ + if (buffer_len < 12) return DNS_ERR_FORMAT; + if (rd_be16(buffer+0) != message_id) return DNS_ERR_FORMAT; + uint16_t flags = rd_be16(buffer+2); + uint16_t question_count = rd_be16(buffer+4); + uint16_t answer_count = rd_be16(buffer+6); + if ((flags & 0x000F) == 3) return DNS_ERR_NXDOMAIN; + uint32_t offset = 12; + for (uint16_t i = 0; i < question_count; ++i){ + offset = skip_dns_name(buffer, buffer_len, offset); + if (offset + 4 > buffer_len) return DNS_ERR_FORMAT; + offset += 4; + } + for (uint16_t i = 0; i < answer_count; ++i){ + offset = skip_dns_name(buffer, buffer_len, offset); + if (offset + 10 > buffer_len) return DNS_ERR_FORMAT; + uint16_t type = rd_be16(buffer+offset+0); + uint16_t klass = rd_be16(buffer+offset+2); + uint32_t ttl_s = rd_be32(buffer+offset+4); + uint16_t rdlength = rd_be16(buffer+offset+8); + offset += 10; + if (offset + rdlength > buffer_len) return DNS_ERR_FORMAT; + if (type == 28 && klass == 1 && rdlength == 16){ + memcpy(out_ipv6, buffer+offset, 16); + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; + } + offset += rdlength; + } + return DNS_ERR_NO_ANSWER; +} + +static dns_result_t perform_dns_query_once_a(socket_handle_t sock, const net_l4_endpoint* dns_srv, const char* name, uint32_t timeout_ms, uint32_t* out_ip, uint32_t* out_ttl_s){ + uint8_t request_buffer[512]; + memset(request_buffer,0,sizeof(request_buffer)); + rng_t rng; + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&rng, virt_timer); + uint16_t message_id = (uint16_t)(rng_next32(&rng) & 0xFFFF); + wr_be16(request_buffer+0, message_id); + wr_be16(request_buffer+2, 0x0100); + wr_be16(request_buffer+4, 1); + uint32_t offset = 12; + dns_result_t qnr=dns_write_qname(request_buffer, sizeof(request_buffer), &offset, name); + if (qnr != DNS_OK) return qnr; + + if (offset+ 4 > sizeof(request_buffer)) return DNS_ERR_FORMAT; + + wr_be16(request_buffer+offset+0, 1); + wr_be16(request_buffer+offset+2, 1); + offset += 4; + + net_l4_endpoint dst = *dns_srv; + dst.port = 53; + + int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, request_buffer, offset); + if (sent < 0) return DNS_ERR_SEND; + + uint32_t waited_ms = 0; + while (waited_ms < timeout_ms){ + uint8_t response_buffer[512]; + net_l4_endpoint source; + int64_t received = socket_recvfrom_udp_ex(sock, response_buffer, sizeof(response_buffer), &source); + bool ok_src = false; + if (received > 0 && source.port == 53 && source.ver == dst.ver) { + if (dst.ver == IP_VER4) ok_src = (*(uint32_t*)source.ip == *(uint32_t*)dst.ip); + else if (dst.ver == IP_VER6) ok_src = (memcmp(source.ip, dst.ip, 16) == 0); + } + if (ok_src){ + uint32_t ip_host; + uint32_t ttl_s = 0; + dns_result_t pr = parse_dns_a_record(response_buffer, (uint32_t)received, message_id, &ip_host, &ttl_s); + if(pr == DNS_OK){ + *out_ip = ip_host; + if(out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; + } + if (pr == DNS_ERR_NXDOMAIN) return pr; + } + msleep(50); + waited_ms += 50; + } + return DNS_ERR_TIMEOUT; +} + +static dns_result_t perform_dns_query_once_aaaa(socket_handle_t sock, const net_l4_endpoint* dns_srv, const char* name, uint32_t timeout_ms, uint8_t out_ipv6[16], uint32_t* out_ttl_s){ + uint8_t request_buffer[512]; + memset(request_buffer, 0, sizeof(request_buffer)); + rng_t rng; + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&rng, virt_timer); + uint16_t message_id = (uint16_t)(rng_next32(&rng) & 0xFFFF); + wr_be16(request_buffer+0, message_id); + wr_be16(request_buffer+2, 0x0100); + wr_be16(request_buffer+4, 1); + uint32_t offset = 12; + dns_result_t qnr=dns_write_qname(request_buffer, sizeof(request_buffer), &offset, name); + if (qnr != DNS_OK) return qnr; + + if (offset+ 4 > sizeof(request_buffer)) return DNS_ERR_FORMAT; + + wr_be16(request_buffer+offset+0, 28); + wr_be16(request_buffer+offset+2, 1); + offset += 4; + + net_l4_endpoint dst = *dns_srv; + dst.port = 53; + + int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, request_buffer, offset); + if (sent < 0) return DNS_ERR_SEND; + + uint32_t waited_ms = 0; + while (waited_ms < timeout_ms){ + uint8_t response_buffer[512]; + net_l4_endpoint source; + int64_t received = socket_recvfrom_udp_ex(sock, response_buffer, sizeof(response_buffer), &source); + bool ok_src = false; + if (received > 0 && source.port == 53 && source.ver == dst.ver) { + if (dst.ver == IP_VER4) ok_src = (*(uint32_t*)source.ip == *(uint32_t*)dst.ip); + else if (dst.ver == IP_VER6) ok_src = (memcmp(source.ip, dst.ip, 16) == 0); + } + if (ok_src){ + uint32_t ttl_s = 0; + dns_result_t pr = parse_dns_aaaa_record(response_buffer, (uint32_t)received, message_id, out_ipv6, &ttl_s); + if (pr == DNS_OK){ + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; + } + if (pr == DNS_ERR_NXDOMAIN) return pr; + } else { + msleep(50); + waited_ms += 50; + } + } + return DNS_ERR_TIMEOUT; +} + +static bool pick_dns_on_l3(uint8_t l3_id, net_l4_endpoint* out_primary, net_l4_endpoint* out_secondary){ + if (l3_ipv4_find_by_id(l3_id)) { + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(l3_id); + if (!v4) return false; + uint32_t p = v4->runtime_opts_v4.dns[0]; + uint32_t s = v4->runtime_opts_v4.dns[1]; + if (out_primary) { + memset(out_primary, 0, sizeof(*out_primary)); + out_primary->ver = IP_VER4; + memcpy(out_primary->ip, &p, 4); + } + if (out_secondary) { + memset(out_secondary, 0, sizeof(*out_secondary)); + out_secondary->ver = IP_VER4; + memcpy(out_secondary->ip, &s, 4); + } + return (p != 0) || (s != 0); + } + + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(l3_id); + if (!v6) return false; + static const uint8_t z[16] = {0}; + const uint8_t* p6 = v6->runtime_opts_v6.dns[0]; + const uint8_t* s6 = v6->runtime_opts_v6.dns[1]; + bool hp = memcmp(p6, z, 16) != 0; + bool hs = memcmp(s6, z, 16) != 0; + if (out_primary) { + memset(out_primary, 0, sizeof(*out_primary)); + out_primary->ver = IP_VER6; + if (hp) memcpy(out_primary->ip, p6, 16); + } + if (out_secondary) { + memset(out_secondary, 0, sizeof(*out_secondary)); + out_secondary->ver = IP_VER6; + if (hs) memcpy(out_secondary->ip, s6, 16); + } + return hp || hs; +} + +static bool pick_dns_first_iface(uint8_t* out_l3, net_l4_endpoint* out_primary, net_l4_endpoint* out_secondary){ + uint8_t n = l2_interface_count(); + for (uint8_t i = 0; i < n; ++i){ + l2_interface_t* l2 = l2_interface_at(i); + if (!l2) continue; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s){ + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4 || v4->mode == IPV4_CFG_DISABLED) continue; + + uint32_t p = v4->runtime_opts_v4.dns[0]; + uint32_t q = v4->runtime_opts_v4.dns[1]; + if (p || q){ + if (out_l3) *out_l3 = v4->l3_id; + if (out_primary) { + memset(out_primary, 0, sizeof(*out_primary)); + out_primary->ver = IP_VER4; + memcpy(out_primary->ip, &p, 4); + } + if (out_secondary) { + memset(out_secondary, 0, sizeof(*out_secondary)); + out_secondary->ver = IP_VER4; + memcpy(out_secondary->ip, &q, 4); + } + return true; + } + } + + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6 || v6->cfg == IPV6_CFG_DISABLE) continue; + static const uint8_t z[16] = {0}; + bool hp = memcmp(v6->runtime_opts_v6.dns[0], z, 16) != 0; + bool hq = memcmp(v6->runtime_opts_v6.dns[1], z, 16) != 0; + if (hp || hq){ + if (out_l3) *out_l3 = v6->l3_id; + if (out_primary) { + memset(out_primary, 0, sizeof(*out_primary)); + out_primary->ver = IP_VER6; + if (hp) memcpy(out_primary->ip, v6->runtime_opts_v6.dns[0], 16); + } + if (out_secondary) { + memset(out_secondary, 0, sizeof(*out_secondary)); + out_secondary->ver = IP_VER6; + if (hq) memcpy(out_secondary->ip, v6->runtime_opts_v6.dns[1], 16); + } + return true; + } + } + } + return false; +} + +static bool dns_srv_is_zero(const net_l4_endpoint* e){ + if (!e) return true; + if (e->ver == IP_VER4) return *(const uint32_t*)e->ip == 0; + if (e->ver == IP_VER6) return ipv6_is_unspecified(e->ip); + return true; +} + +static dns_result_t query_with_selection_a(const net_l4_endpoint* primary, const net_l4_endpoint* secondary, dns_server_sel_t which, const char* hostname, uint32_t timeout_ms, uint32_t* out_ip){ + if (which == DNS_USE_PRIMARY && dns_srv_is_zero(primary)) return DNS_ERR_NO_DNS; + if (which == DNS_USE_SECONDARY && dns_srv_is_zero(secondary)) return DNS_ERR_NO_DNS; + if (which == DNS_USE_BOTH && dns_srv_is_zero(primary) && dns_srv_is_zero(secondary)) return DNS_ERR_NO_DNS; + socket_handle_t sock = dns_socket_handle(); + if (sock == 0) return DNS_ERR_SOCKET; + dns_result_t res = DNS_ERR_NO_DNS; + uint32_t ttl_s = 0; + if (which == DNS_USE_PRIMARY) res = perform_dns_query_once_a(sock, primary, hostname, timeout_ms, out_ip, &ttl_s); + else if (which == DNS_USE_SECONDARY) res = perform_dns_query_once_a(sock, secondary, hostname, timeout_ms, out_ip, &ttl_s); + else { + const net_l4_endpoint* first = !dns_srv_is_zero(primary) ? primary : secondary; + const net_l4_endpoint* second = !dns_srv_is_zero(secondary) ? secondary : primary; + res = perform_dns_query_once_a(sock, first, hostname, timeout_ms, out_ip, &ttl_s); + if (res != DNS_OK && second && first != second) res = perform_dns_query_once_a(sock, second, hostname, timeout_ms, out_ip, &ttl_s); + } + if (res == DNS_OK) { + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + uint8_t addr[16]; + memset(addr, 0, 16); + wr_be32(addr, *out_ip); + dns_cache_put_ip(hostname, 1, addr, ttl_ms); + } + return res; +} + +static dns_result_t query_with_selection_aaaa(const net_l4_endpoint* primary, const net_l4_endpoint* secondary, dns_server_sel_t which, const char* hostname, uint32_t timeout_ms, uint8_t out_ipv6[16]){ + if (which == DNS_USE_PRIMARY && dns_srv_is_zero(primary)) return DNS_ERR_NO_DNS; + if (which == DNS_USE_SECONDARY && dns_srv_is_zero(secondary)) return DNS_ERR_NO_DNS; + if (which == DNS_USE_BOTH && dns_srv_is_zero(primary) && dns_srv_is_zero(secondary)) return DNS_ERR_NO_DNS; + socket_handle_t sock = dns_socket_handle(); + if (sock == 0) return DNS_ERR_SOCKET; + dns_result_t res = DNS_ERR_NO_DNS; + uint32_t ttl_s = 0; + if (which == DNS_USE_PRIMARY) res = perform_dns_query_once_aaaa(sock, primary, hostname, timeout_ms, out_ipv6, &ttl_s); + else if (which == DNS_USE_SECONDARY) res = perform_dns_query_once_aaaa(sock, secondary, hostname, timeout_ms, out_ipv6, &ttl_s); + else { + const net_l4_endpoint* first = !dns_srv_is_zero(primary) ? primary : secondary; + const net_l4_endpoint* second = !dns_srv_is_zero(secondary) ? secondary : primary; + res = perform_dns_query_once_aaaa(sock, first, hostname, timeout_ms, out_ipv6, &ttl_s); + if (res != DNS_OK && second && first != second) res = perform_dns_query_once_aaaa(sock, second, hostname, timeout_ms, out_ipv6, &ttl_s); + } + if (res == DNS_OK) { + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 28, out_ipv6, ttl_ms); + } + return res; +} + +dns_result_t dns_resolve_a(const char* hostname, uint32_t* out_ip, dns_server_sel_t which, uint32_t timeout_ms){ + if (!hostname || !out_ip) return DNS_ERR_FORMAT; + uint8_t cached[16]; + if (dns_cache_get_ip(hostname, 1, cached)) { + *out_ip = rd_be32(cached); + return DNS_OK; + } + + bool is_local = dns_is_local_name(hostname); + + if (is_local){ + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_a(hostname, timeout_ms > MDNS_TIMEOUT_A_MS ? MDNS_TIMEOUT_A_MS : timeout_ms, out_ip, &ttl_s); + if (mr == DNS_OK) { + uint8_t a[16]; + memset(a, 0, 16); + wr_be32(a, *out_ip); + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 1, a, ttl_ms); + } + return mr; + } + + dns_result_t res = DNS_ERR_NO_DNS; + uint8_t l3 = 0; + net_l4_endpoint p, s; + if (pick_dns_first_iface(&l3, &p, &s)) res = query_with_selection_a(&p, &s, which, hostname, timeout_ms, out_ip); + + if (res != DNS_OK && is_local){ + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_a(hostname, timeout_ms > MDNS_TIMEOUT_A_MS ? MDNS_TIMEOUT_A_MS : timeout_ms, out_ip, &ttl_s); + if (mr == DNS_OK) { + uint8_t a[16]; + memset(a,0, 16); + wr_be32(a, *out_ip); + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 1, a, ttl_ms); + return DNS_OK; + } + } + + return res; +} + +dns_result_t dns_resolve_a_on_l3(uint8_t l3_id, const char* hostname, uint32_t* out_ip, dns_server_sel_t which, uint32_t timeout_ms){ + if (!hostname || !out_ip) return DNS_ERR_FORMAT; + uint8_t cached[16]; + if (dns_cache_get_ip(hostname, 1, cached)) { + *out_ip = rd_be32(cached); + return DNS_OK; + } + + bool is_local = dns_is_local_name(hostname); + + if (is_local) { + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_a(hostname, timeout_ms > MDNS_TIMEOUT_A_MS ? MDNS_TIMEOUT_A_MS : timeout_ms, out_ip, &ttl_s); + if (mr == DNS_OK) { + uint8_t a[16]; + memset(a, 0, 16); + wr_be32(a, *out_ip); + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 1, a, ttl_ms); + } + return mr; + } + + dns_result_t res = DNS_ERR_NO_DNS; + net_l4_endpoint p, s; + + if (pick_dns_on_l3(l3_id, &p, &s)) res = query_with_selection_a(&p, &s, which, hostname, timeout_ms, out_ip); + + if (res != DNS_OK && is_local) { + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_a(hostname, timeout_ms > MDNS_TIMEOUT_A_MS ? MDNS_TIMEOUT_A_MS : timeout_ms, out_ip, &ttl_s); + if (mr == DNS_OK) { + uint8_t a[16]; + memset(a, 0, 16); + wr_be32(a, *out_ip); + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 1, a, ttl_ms); + return DNS_OK; + } + } + + return res; +} + +dns_result_t dns_resolve_aaaa(const char* hostname, uint8_t out_ipv6[16], dns_server_sel_t which, uint32_t timeout_ms){ + if (!hostname || !out_ipv6) return DNS_ERR_FORMAT; + if (dns_cache_get_ip(hostname, 28, out_ipv6)) return DNS_OK; + + bool is_local = dns_is_local_name(hostname); + + if (is_local) { + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_aaaa(hostname, timeout_ms > MDNS_TIMEOUT_AAAA_MS ? MDNS_TIMEOUT_AAAA_MS : timeout_ms, out_ipv6, &ttl_s); + if (mr == DNS_OK) { + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 28, out_ipv6, ttl_ms); + } + return mr; + } + + dns_result_t res = DNS_ERR_NO_DNS; + uint8_t l3 = 0; + net_l4_endpoint p, s; + if (pick_dns_first_iface(&l3, &p, &s)) res = query_with_selection_aaaa(&p, &s, which, hostname, timeout_ms, out_ipv6); + + if (res != DNS_OK && is_local) { + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_aaaa(hostname, timeout_ms > MDNS_TIMEOUT_AAAA_MS ? MDNS_TIMEOUT_AAAA_MS : timeout_ms, out_ipv6, &ttl_s); + if (mr == DNS_OK) { + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 28, out_ipv6, ttl_ms); + return DNS_OK; + } + } + + return res; +} + +dns_result_t dns_resolve_aaaa_on_l3(uint8_t l3_id, const char* hostname, uint8_t out_ipv6[16], dns_server_sel_t which, uint32_t timeout_ms){ + if (!hostname || !out_ipv6) return DNS_ERR_FORMAT; + if (dns_cache_get_ip(hostname, 28, out_ipv6)) return DNS_OK; + bool is_local = dns_is_local_name(hostname); + if (is_local) { + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_aaaa(hostname, timeout_ms > MDNS_TIMEOUT_AAAA_MS ? MDNS_TIMEOUT_AAAA_MS : timeout_ms, out_ipv6, &ttl_s); + if (mr == DNS_OK) { + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 28, out_ipv6, ttl_ms); + } + return mr; + } + + dns_result_t res = DNS_ERR_NO_DNS; + net_l4_endpoint p, s; + if (pick_dns_on_l3(l3_id, &p, &s)) res = query_with_selection_aaaa(&p, &s, which, hostname, timeout_ms, out_ipv6); + + if (res != DNS_OK && is_local) { + uint32_t ttl_s = 0; + dns_result_t mr = mdns_resolve_aaaa(hostname, timeout_ms > MDNS_TIMEOUT_AAAA_MS ? MDNS_TIMEOUT_AAAA_MS : timeout_ms, out_ipv6, &ttl_s); + if (mr == DNS_OK) { + uint32_t ttl_ms = ttl_s > (0xFFFFFFFFu / 1000u) ? 0xFFFFFFFFu : ttl_s * 1000u; + dns_cache_put_ip(hostname, 28, out_ipv6, ttl_ms); + return DNS_OK; + } + } + + return res; +} diff --git a/kernel/networking/application_layer/dns.h b/kernel/networking/application_layer/dns/dns.h similarity index 95% rename from kernel/networking/application_layer/dns.h rename to kernel/networking/application_layer/dns/dns.h index d64f8920..f20ddfe3 100644 --- a/kernel/networking/application_layer/dns.h +++ b/kernel/networking/application_layer/dns/dns.h @@ -25,6 +25,8 @@ dns_result_t dns_resolve_a_on_l3(uint8_t l3_id, const char* hostname, uint32_t* dns_result_t dns_resolve_aaaa(const char* hostname, uint8_t out_ipv6[16], dns_server_sel_t which, uint32_t timeout_ms); dns_result_t dns_resolve_aaaa_on_l3(uint8_t l3_id, const char* hostname, uint8_t out_ipv6[16], dns_server_sel_t which, uint32_t timeout_ms); +void dns_cache_tick(uint32_t ms); + #ifdef __cplusplus } -#endif +#endif \ No newline at end of file diff --git a/kernel/networking/application_layer/dns/dns_cache.c b/kernel/networking/application_layer/dns/dns_cache.c new file mode 100644 index 00000000..f08fca98 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_cache.c @@ -0,0 +1,99 @@ +#include "dns_cache.h" +#include "std/std.h" + +typedef struct { + uint8_t in_use; + uint8_t rr_type; + uint32_t name_len; + char name[128]; + uint32_t ttl_ms; + uint8_t addr[16]; +} dns_cache_entry_t; + +static dns_cache_entry_t g_dns_cache[32]; +static bool g_dns_cache_inited = false; + +static void dns_cache_ensure_init(void) { + if (g_dns_cache_inited) return; + g_dns_cache_inited= true; + + uint8_t a[16]; + memset(a, 0, sizeof(a)); + wr_be32(a, 0x7F000001u); + dns_cache_put_ip("localhost", 1, a, 0xFFFFFFFFu); + + uint8_t v6[16]; + memset(v6, 0, sizeof(v6)); + v6[15] = 1; + dns_cache_put_ip("localhost", 28, v6, 0xFFFFFFFFu); +} + +void dns_cache_put_ip(const char* name, uint8_t rr_type,const uint8_t addr[16], uint32_t ttl_ms) { + if (!name || !addr) return; + uint32_t nlen = strlen(name); + if (!nlen) return; + if (nlen >= 128) return; + if (!ttl_ms) return; + + if (nlen == 9u&& strncmp(name, "localhost", 9) == 0 && (rr_type == 1 || rr_type == 28))ttl_ms = 0xFFFFFFFFu; + + int free_i = -1; + for (int i = 0; i < 32; i++) { + if (!g_dns_cache[i].in_use) { + if (free_i < 0) free_i = i; + continue; + } + if (g_dns_cache[i].rr_type != rr_type) continue; + if (g_dns_cache[i].name_len != nlen) continue; + if (strncmp(g_dns_cache[i].name, name, (int)nlen) != 0) continue; + memcpy(g_dns_cache[i].addr, addr, 16); + g_dns_cache[i].ttl_ms = ttl_ms; + return; + } + + int idx = free_i; + if (idx < 0) idx = 0; + memset(&g_dns_cache[idx], 0, sizeof(g_dns_cache[idx])); + g_dns_cache[idx].in_use = 1; + g_dns_cache[idx].rr_type = rr_type; + g_dns_cache[idx].name_len = nlen; + memcpy(g_dns_cache[idx].name, name, nlen); + g_dns_cache[idx].name[nlen] = 0; + g_dns_cache[idx].ttl_ms = ttl_ms; + memcpy(g_dns_cache[idx].addr, addr, 16); +} + +bool dns_cache_get_ip(const char* name, uint8_t rr_type, uint8_t out_addr[16]) { + dns_cache_ensure_init(); + if (!name || !out_addr) return false; + uint32_t nlen = strlen(name); + if (!nlen) return false; + if (nlen >= 128) return false; + for (int i = 0; i < 32; i++) { + if (!g_dns_cache[i].in_use) continue; + if (g_dns_cache[i].rr_type != rr_type) continue; + if (g_dns_cache[i].ttl_ms == 0) continue; + if (g_dns_cache[i].name_len != nlen) continue; + if (strncmp(g_dns_cache[i].name, name, (int)nlen) != 0) continue; + memcpy(out_addr, g_dns_cache[i].addr, 16); + return true; + } + return false; +} + +void dns_cache_tick(uint32_t ms) { + dns_cache_ensure_init(); + for (int i = 0; i < 32; i++) { + if (!g_dns_cache[i].in_use) continue; + if (!g_dns_cache[i].ttl_ms) { + g_dns_cache[i].in_use = 0; + continue; + } + if (g_dns_cache[i].ttl_ms == 0xFFFFFFFFu) continue; + if (g_dns_cache[i].ttl_ms <= ms) { + memset(&g_dns_cache[i], 0, sizeof(g_dns_cache[i])); + } else { + g_dns_cache[i].ttl_ms -= ms; + } + } +} diff --git a/kernel/networking/application_layer/dns/dns_cache.h b/kernel/networking/application_layer/dns/dns_cache.h new file mode 100644 index 00000000..7c3c6fef --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_cache.h @@ -0,0 +1,14 @@ +#pragma once +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool dns_cache_get_ip(const char* name, uint8_t rr_type, uint8_t out_addr[16]); +void dns_cache_put_ip(const char* name, uint8_t rr_type,const uint8_t addr[16], uint32_t ttl_ms); +void dns_cache_tick(uint32_t ms); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/application_layer/dns/dns_daemon.c b/kernel/networking/application_layer/dns/dns_daemon.c new file mode 100644 index 00000000..f0e55f19 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_daemon.c @@ -0,0 +1,85 @@ +#include "dns_daemon.h" +#include "mdns_responder.h" +#include "dns_cache.h" +#include "dns_sd.h" +#include "process/scheduler.h" +#include "syscalls/syscalls.h" +#include "net/socket_types.h" +#include "networking/transport_layer/csocket_udp.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "std/memory.h" +#include "net/socket_types.h" + +static uint16_t g_pid_dnsd = 0xFFFF; +static socket_handle_t g_sock = 0; + +static socket_handle_t g_sock_mdns4 = 0; +static socket_handle_t g_sock_mdns6 = 0; + +uint16_t dns_get_pid(void){ return g_pid_dnsd; } +bool dns_is_running(void){ return g_pid_dnsd != 0xFFFF; } +void dns_set_pid(uint16_t p){ g_pid_dnsd = p; } +socket_handle_t dns_socket_handle(void){ return g_sock; } + +socket_handle_t mdns_socket_handle_v4(void){ return g_sock_mdns4; } +socket_handle_t mdns_socket_handle_v6(void){ return g_sock_mdns6; } + +static socket_handle_t mdns_create_socket(ip_version_t ver, const void* group) { + SocketExtraOptions opt; + memset(&opt, 0, sizeof(opt)); + opt.flags = SOCK_OPT_MCAST_JOIN | SOCK_OPT_TTL; + opt.ttl = 255; + opt.mcast_ver = ver; + if(ver == IP_VER4) memcpy(opt.mcast_group, group, 4); + else memcpy(opt.mcast_group, group, 16); + + socket_handle_t s = udp_socket_create(SOCK_ROLE_SERVER, g_pid_dnsd, &opt); + if(!s) return 0; + + SockBindSpec spec; + memset(&spec, 0, sizeof(spec)); + spec.kind = BIND_ANY; + + if(socket_bind_udp_ex(s, &spec, DNS_SD_MDNS_PORT) != SOCK_OK){ + socket_destroy_udp(s); + return 0; + } + + return s; +} + +int dns_deamon_entry(int argc, char* argv[]){ + (void)argc; (void)argv; + dns_set_pid(get_current_proc_pid()); + g_sock = udp_socket_create(SOCK_ROLE_CLIENT, g_pid_dnsd, NULL); + + uint32_t mdns_v4 = IPV4_MCAST_MDNS; + uint8_t mdns_v6[16]; + ipv6_make_multicast(0x02, IPV6_MCAST_MDNS, 0, mdns_v6); + + g_sock_mdns4 = mdns_create_socket(IP_VER4, &mdns_v4); + g_sock_mdns6 = mdns_create_socket(IP_VER6, mdns_v6); + + uint32_t tick_ms = 100; + for(;;) { + dns_cache_tick(tick_ms); + uint8_t buf[900]; + net_l4_endpoint src; + + if (g_sock_mdns4) { + memset(&src, 0, sizeof(src)); + int64_t r4 = socket_recvfrom_udp_ex(g_sock_mdns4, buf, sizeof(buf), &src); + if(r4 > 0) mdns_responder_handle_query(g_sock_mdns4, IP_VER4, (const uint8_t*)&mdns_v4, buf, (uint32_t)r4, &src); + } + + if (g_sock_mdns6) { + memset(&src, 0, sizeof(src)); + int64_t r6 = socket_recvfrom_udp_ex(g_sock_mdns6, buf, sizeof(buf), &src); + if(r6 > 0) mdns_responder_handle_query(g_sock_mdns6, IP_VER6, mdns_v6, buf, (uint32_t)r6, &src); + } + + mdns_responder_tick(g_sock_mdns4,g_sock_mdns6,(const uint8_t*)&mdns_v4,mdns_v6); + msleep(tick_ms); + } + return 1; +} \ No newline at end of file diff --git a/kernel/networking/application_layer/dns/dns_daemon.h b/kernel/networking/application_layer/dns/dns_daemon.h new file mode 100644 index 00000000..ca386c53 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_daemon.h @@ -0,0 +1,21 @@ +#pragma once +#include "networking/transport_layer/csocket_udp.h" + +#define IPV4_MCAST_MDNS 0xE00000FBu + +#ifdef __cplusplus +extern "C" { +#endif +bool dns_is_running(void); +void dns_set_pid(uint16_t p); +socket_handle_t dns_socket_handle(void); + +socket_handle_t mdns_socket_handle_v4(void); +socket_handle_t mdns_socket_handle_v6(void); + +uint16_t dns_get_pid(void); + +int dns_deamon_entry(int argc, char* argv[]); +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/application_layer/dns/dns_mdns.c b/kernel/networking/application_layer/dns/dns_mdns.c new file mode 100644 index 00000000..d4a29226 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_mdns.c @@ -0,0 +1,246 @@ +#include "dns_mdns.h" +#include "dns_daemon.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "std/std.h" +#include "networking/transport_layer/trans_utils.h" + +#define MDNS_PORT 5353 + +static uint32_t skip_dns_name(const uint8_t* message, uint32_t message_len, uint32_t offset) { + if (offset >= message_len) return message_len + 1; + uint32_t cursor = offset; + while (cursor < message_len) { + uint8_t len = message[cursor]; + cursor++; + if (len == 0) break; + if ((len & 0xC0) == 0xC0) { + if (cursor >= message_len) return message_len + 1; + cursor++; + break; + } + cursor +=len; + if (cursor > message_len) return message_len + 1; + } + return cursor; +} + +static bool read_dns_name(const uint8_t* message, uint32_t message_len, uint32_t offset, char* out, uint32_t out_cap, uint32_t* consumed) { + if (!message) return false; + if (!out) return false; + if (!out_cap) return false; + if (offset >= message_len) return false; + + uint32_t cur = offset; + uint32_t out_len = 0; + uint32_t consumed_local = 0; + uint8_t jumped = 0; + uint32_t jumps = 0; + + for (;;){ + if (cur >= message_len) return false; + + uint8_t len = message[cur]; + if (len == 0) { + if (!jumped) consumed_local = cur - offset + 1; + if (out_len >= out_cap) return false; + out[out_len] = 0; + if (consumed) *consumed = consumed_local; + return true; + } + + if ((len & 0xC0) == 0xC0) { + if (cur + 1 >= message_len) return false; + uint16_t ptr = (uint16_t)(((uint16_t)(len & 0x3F) << 8) | (uint16_t)message[cur + 1]); + if (ptr >= message_len) return false; + if (!jumped) consumed_local = cur - offset + 2; + cur = ptr; + jumped = 1; + jumps++; + if (jumps> 16) return false; + continue; + } + + cur++; + if (cur + len > message_len) return false; + + if (out_len) { + if (out_len + 1 >= out_cap) return false; + out[out_len++] = '.'; + } + + if (out_len + len >= out_cap) return false; + memcpy(out + out_len, message + cur, len); + out_len += len; + cur += len; + + if (!jumped) consumed_local = cur - offset; + } +} + +static dns_result_t parse_mdns_ip_record(const uint8_t* buffer, uint32_t buffer_len, const char* name, uint16_t qtype, uint8_t* out_rdata, uint32_t out_len, uint32_t* out_ttl_s) { + if (!buffer) return DNS_ERR_FORMAT; + if (buffer_len < 12) return DNS_ERR_FORMAT; + if (!name) return DNS_ERR_FORMAT; + if (!out_rdata) return DNS_ERR_FORMAT; + if (!out_len) return DNS_ERR_FORMAT; + + uint16_t qd = rd_be16(buffer + 4); + uint16_t an = rd_be16(buffer + 6); + uint16_t ns = rd_be16(buffer + 8); + uint16_t ar = rd_be16(buffer + 10); + + uint32_t offset = 12; + for (uint16_t i = 0; i < qd; ++i) { + offset = skip_dns_name(buffer, buffer_len, offset); + if (offset+4 > buffer_len) return DNS_ERR_FORMAT; + offset += 4; + } + + uint32_t total = (uint32_t)an + (uint32_t)ns + (uint32_t)ar; + uint32_t name_len = (uint32_t)strlen(name); + + for (uint32_t i = 0; i < total; ++i) { + char rrname[256]; + uint32_t consumed = 0; + if (!read_dns_name(buffer, buffer_len, offset, rrname, sizeof(rrname), &consumed)) return DNS_ERR_FORMAT; + offset += consumed; + + if (offset + 10 > buffer_len) return DNS_ERR_FORMAT; + + uint16_t type = rd_be16(buffer + offset + 0); + uint16_t klass = rd_be16(buffer + offset + 2); + uint32_t ttl_s = rd_be32(buffer + offset + 4); + uint16_t rdlen = rd_be16(buffer + offset + 8); + offset += 10; + + if (offset + rdlen > buffer_len) return DNS_ERR_FORMAT; + + if (type == qtype && (klass & 0x7FFFu) == 1u){ + uint32_t rrname_len = (uint32_t)strlen(rrname); + if (rrname_len == name_len&& strncmp(rrname, name, (int)name_len) == 0 && rdlen == out_len) { + memcpy(out_rdata, buffer + offset, out_len); + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; + } + } + + offset += rdlen; + } + + return DNS_ERR_NO_ANSWER; +} + +static bool dns_write_qname(uint8_t* buf, uint32_t buf_len, uint32_t*inout_off, const char* name) { + if (!buf || !inout_off || !name) return false; + uint32_t off = *inout_off; + if (off >= buf_len) return false; + uint32_t label_len = 0; + uint32_t label_pos = off; + buf[off++] = 0; + for (const char* p = name; *p; ++p) { + char c = *p; + if (c =='.') { + if (!label_len || label_len > 63u) return false; + buf[label_pos] = (uint8_t)label_len; + label_len = 0; + label_pos = off; + if (off >= buf_len) return false; + buf[off++] = 0; + continue; + } + if (label_len >= 63u) return false; + if (off >= buf_len) return false; + buf[off++]= (uint8_t)c; + label_len++; + } + if (!label_len || label_len > 63u) return false; + buf[label_pos] = (uint8_t)label_len; + if (off >= buf_len) return false; + buf[off++] = 0; + *inout_off = off; + return true; +} + +static dns_result_t perform_mdns_query_once(socket_handle_t sock, const net_l4_endpoint* dst, const char* name, uint16_t qtype, uint32_t timeout_ms, uint8_t* out_rdata, uint32_t out_len, uint32_t* out_ttl_s) { + if (!sock) return DNS_ERR_NO_DNS; + if (!dst) return DNS_ERR_NO_DNS; + if (!name) return DNS_ERR_FORMAT; + if (!out_rdata) return DNS_ERR_FORMAT; + if (!out_len) return DNS_ERR_FORMAT; + + uint8_t request_buffer[512]; + memset(request_buffer, 0, sizeof(request_buffer)); + + wr_be16(request_buffer + 0, 0); + wr_be16(request_buffer + 2, 0x0000); + wr_be16(request_buffer + 4, 1); + + uint32_t offset = 12; + if (!dns_write_qname(request_buffer, (uint32_t)sizeof(request_buffer), &offset, name)) return DNS_ERR_FORMAT; + if (offset + 4 > (uint32_t)sizeof(request_buffer)) return DNS_ERR_FORMAT; + wr_be16(request_buffer + offset + 0, qtype); + wr_be16(request_buffer + offset + 2, 0x0001); + offset += 4; + + int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, dst, 0, request_buffer, offset); + if (sent < 0) return DNS_ERR_SEND; + + uint32_t waited_ms = 0; + while (waited_ms < timeout_ms) { + uint8_t response_buffer[512]; + net_l4_endpoint source; + int64_t received = socket_recvfrom_udp_ex(sock, response_buffer, sizeof(response_buffer), &source); + if (received > 0 && source.port == MDNS_PORT){ + uint32_t ttl_s = 0; + dns_result_t pr = parse_mdns_ip_record(response_buffer, (uint32_t)received, name, qtype, out_rdata, out_len, &ttl_s); + if (pr == DNS_OK){ + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; + } + } + + msleep(20); + waited_ms += 20; + } + + return DNS_ERR_TIMEOUT; +} + +dns_result_t mdns_resolve_a(const char* name, uint32_t timeout_ms, uint32_t* out_ip, uint32_t* out_ttl_s) { + socket_handle_t sock = mdns_socket_handle_v4(); + if (!sock) return DNS_ERR_NO_DNS; + + uint32_t group = 0xE00000FBu; + net_l4_endpoint dst; + make_ep(group, MDNS_PORT, IP_VER4, &dst); + + uint8_t rdata[4]; + uint32_t ttl_s = 0; + + dns_result_t r = perform_mdns_query_once(sock, &dst, name, 1, timeout_ms, rdata, 4, &ttl_s); + if (r != DNS_OK) return r; + + uint32_t ip; + memcpy(&ip, rdata, 4); + if (out_ip) *out_ip = rd_be32((uint8_t*)&ip); + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; +} + +dns_result_t mdns_resolve_aaaa(const char* name, uint32_t timeout_ms, uint8_t out_ipv6[16], uint32_t* out_ttl_s) { + socket_handle_t sock = mdns_socket_handle_v6(); + if (!sock) return DNS_ERR_NO_DNS; + + net_l4_endpoint dst; + memset(&dst, 0, sizeof(dst)); + dst.ver = IP_VER6; + ipv6_make_multicast(0x02, IPV6_MCAST_MDNS, 0, dst.ip); + dst.port = MDNS_PORT; + + uint32_t ttl_s = 0; + dns_result_t r = perform_mdns_query_once(sock, &dst, name, 28, timeout_ms, out_ipv6, 16, &ttl_s); + if (r != DNS_OK) return r; + + if (out_ttl_s) *out_ttl_s = ttl_s; + return DNS_OK; +} diff --git a/kernel/networking/application_layer/dns/dns_mdns.h b/kernel/networking/application_layer/dns/dns_mdns.h new file mode 100644 index 00000000..e5a84ea5 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_mdns.h @@ -0,0 +1,6 @@ +#pragma once +#include "dns.h" +#include "types.h" + +dns_result_t mdns_resolve_a(const char* name, uint32_t timeout_ms, uint32_t* out_ip, uint32_t* out_ttl_s); +dns_result_t mdns_resolve_aaaa(const char* name, uint32_t timeout_ms, uint8_t out_ipv6[16], uint32_t* out_ttl_s); \ No newline at end of file diff --git a/kernel/networking/application_layer/dns/dns_sd.c b/kernel/networking/application_layer/dns/dns_sd.c new file mode 100644 index 00000000..5b53aac2 --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_sd.c @@ -0,0 +1,214 @@ +#include "dns_sd.h" +#include "std/std.h" + +uint32_t dns_sd_encode_qname(uint8_t* out, uint32_t cap, uint32_t off, const char* name) { + if(!out) return 0; + if(!cap) return 0; + if(off >= cap) return 0; + if(!name) return 0; + + uint32_t idx = off; + uint32_t lab_len = 0; + uint32_t lab_pos = idx; + + out[idx] = 0; + idx++; + + while (*name) { + if(*name == '.') { + if(lab_len > 63) return 0; + out[lab_pos] = (uint8_t)lab_len; + lab_len = 0; + lab_pos = idx; + if(idx >= cap) return 0; + out[idx] = 0; + idx++; + name++; + continue; + } + + if(idx >= cap) return 0; + out[idx] = (uint8_t)(*name); + idx++; + name++; + lab_len++; + if(lab_len > 63) return 0; + } + + if(lab_len > 63) return 0; + if(lab_pos >= cap) return 0; + out[lab_pos] = (uint8_t)lab_len; + if(idx >= cap) return 0; + out[idx] = 0; + idx++; + return idx; +} + +uint32_t dns_sd_put_u16(uint8_t* out, uint32_t cap, uint32_t off, uint16_t v) { + if(!out) return 0; + if(off + 2 > cap) return 0; + uint16_t t = be16(v); + memcpy(out + off, &t, 2); + return off + 2; +} + +uint32_t dns_sd_put_u32(uint8_t* out, uint32_t cap, uint32_t off, uint32_t v) { + if(!out) return 0; + if(off + 4 > cap) return 0; + uint32_t t = be32(v); + memcpy(out + off, &t, 4); + return off + 4; +} + +uint32_t dns_sd_add_rr_ptr(uint8_t* out, uint32_t cap, uint32_t off, const char* name, uint16_t rrclass, uint32_t ttl_s, const char* target) { + off = dns_sd_encode_qname(out, cap, off, name); + if(!off) return 0; + + off = dns_sd_put_u16(out, cap, off, DNS_SD_TYPE_PTR); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, rrclass); + if(!off) return 0; + off = dns_sd_put_u32(out, cap, off, ttl_s); + if(!off) return 0; + + uint32_t rdlen_pos = off; + off = dns_sd_put_u16(out, cap,off, 0); + if(!off) return 0; + + uint32_t r0 = off; + off = dns_sd_encode_qname(out, cap, off, target); + if(!off) return 0; + + uint16_t rdlen = (uint16_t)(off - r0); + uint16_t rdbe = be16(rdlen); + memcpy(out + rdlen_pos, &rdbe, 2); + return off; +} + +uint32_t dns_sd_add_rr_a(uint8_t* out, uint32_t cap, uint32_t off, const char* name, uint16_t rrclass, uint32_t ttl_s, uint32_t ip) { + off = dns_sd_encode_qname(out, cap, off, name); + if(!off) return 0; + + off = dns_sd_put_u16(out, cap,off,DNS_SD_TYPE_A); + if(!off) return 0; + off = dns_sd_put_u16(out,cap, off, rrclass); + if(!off) return 0; + off = dns_sd_put_u32(out, cap, off, ttl_s); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, 4); + if(!off) return 0; + + if(off + 4 > cap) return 0; + out[off + 0] = (uint8_t)(ip >> 24); + out[off + 1] = (uint8_t)(ip >> 16); + out[off + 2] = (uint8_t)(ip >> 8); + out[off + 3] = (uint8_t)(ip); + return off + 4; +} + +uint32_t dns_sd_add_rr_aaaa(uint8_t* out, uint32_t cap, uint32_t off, const char* name, uint16_t rrclass,uint32_t ttl_s, const uint8_t ip6[16]) { + off = dns_sd_encode_qname(out, cap, off, name); + if(!off) return 0; + + off = dns_sd_put_u16(out, cap, off, DNS_SD_TYPE_AAAA); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, rrclass); + if(!off) return 0; + off = dns_sd_put_u32(out, cap, off, ttl_s); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, 16); + if(!off) return 0; + + if(off + 16 > cap) return 0; + memcpy(out + off, ip6, 16); + return off + 16; +} + +uint32_t dns_sd_add_rr_srv(uint8_t* out, uint32_t cap, uint32_t off, const char* name, uint16_t rrclass, uint32_t ttl_s, uint16_t priority, uint16_t weight, uint16_t port, const char* target) { + if(!out) return 0; + if(!cap) return 0; + if(off >= cap) return 0; + if(!name) return 0; + if(!target) return 0; + + off = dns_sd_encode_qname(out, cap, off, name); + if(!off) return 0; + + off = dns_sd_put_u16(out, cap, off, DNS_SD_TYPE_SRV); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, rrclass); + if(!off) return 0; + off = dns_sd_put_u32(out, cap, off, ttl_s); + if(!off) return 0; + + uint32_t rdlen_pos = off; + off = dns_sd_put_u16(out, cap, off, 0); + if(!off) return 0; + + uint32_t rdata_start = off; + + off = dns_sd_put_u16(out, cap, off, priority); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, weight); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, port); + if(!off) return 0; + + off = dns_sd_encode_qname(out, cap, off, target); + if(!off) return 0; + + uint16_t rdlen = (uint16_t)(off - rdata_start); + uint16_t t = be16(rdlen); + memcpy(out + rdlen_pos, &t, 2); + return off; +} + +uint32_t dns_sd_add_rr_txt(uint8_t *out, uint32_t cap, uint32_t off, const char *name, uint16_t rrclass, uint32_t ttl_s, const char *txt) { + if(!out) return 0; + if(!cap) return 0; + if(off >= cap) return 0; + if(!name) return 0; + + off = dns_sd_encode_qname(out, cap, off, name); + if(!off) return 0; + + off = dns_sd_put_u16(out, cap, off, DNS_SD_TYPE_TXT); + if(!off) return 0; + off = dns_sd_put_u16(out, cap, off, rrclass); + if(!off) return 0; + off = dns_sd_put_u32(out, cap, off, ttl_s); + if(!off) return 0; + + uint32_t rdlen_pos = off; + off = dns_sd_put_u16(out, cap, off, 0); + if(!off) return 0; + + uint32_t rdata_start = off; + + if(txt && txt[0]){ + const char* p = txt; + while(*p) { + while(*p == ' ' || *p == '\t' || *p == ';' || *p == '\n' || *p == '\r') p++; + if(!*p) break; + + const char* start = p; + while(*p && *p != ';' && *p != '\n' && *p != '\r') p++; + + uint32_t len = (uint32_t)(p - start); + if(len > 255) len = 255; + + if(off + 1 + len > cap) return 0; + out[off] = (uint8_t)len; + off++; + memcpy(out + off, start,len); + off += len; + + while(*p == ' ' || *p == '\t' || *p == ';' || *p == '\n' || *p == '\r') p++; + } + } + + uint16_t rdlen = (uint16_t)(off - rdata_start); + uint16_t t = be16(rdlen); + memcpy(out + rdlen_pos, &t, 2); + return off; +} diff --git a/kernel/networking/application_layer/dns/dns_sd.h b/kernel/networking/application_layer/dns/dns_sd.h new file mode 100644 index 00000000..312a432b --- /dev/null +++ b/kernel/networking/application_layer/dns/dns_sd.h @@ -0,0 +1,37 @@ +#pragma once +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DNS_SD_MDNS_PORT 5353 + +#define DNS_SD_TYPE_A 1 +#define DNS_SD_TYPE_PTR 12 +#define DNS_SD_TYPE_TXT 16 +#define DNS_SD_TYPE_SRV 33 +#define DNS_SD_TYPE_AAAA 28 +#define DNS_SD_TYPE_ANY 255 + +#define DNS_SD_CLASS_IN 1 + +#define DNS_SD_FLAG_QR 0x8000 +#define DNS_SD_FLAG_AA 0x0400 + +#define DNS_SD_DOMAIN_LOCAL "local" +#define DNS_SD_ENUM_SERVICES "_services._dns-sd._udp.local" + +uint32_t dns_sd_encode_qname(uint8_t *out, uint32_t cap, uint32_t off, const char *name); +uint32_t dns_sd_put_u16(uint8_t *out, uint32_t cap, uint32_t off, uint16_t v); +uint32_t dns_sd_put_u32(uint8_t *out, uint32_t cap, uint32_t off, uint32_t v); + +uint32_t dns_sd_add_rr_ptr(uint8_t *out, uint32_t cap, uint32_t off, const char *name, uint16_t rrclass, uint32_t ttl_s, const char *target); +uint32_t dns_sd_add_rr_a(uint8_t *out, uint32_t cap, uint32_t off, const char *name, uint16_t rrclass, uint32_t ttl_s, uint32_t ip); +uint32_t dns_sd_add_rr_aaaa(uint8_t *out, uint32_t cap, uint32_t off, const char *name, uint16_t rrclass, uint32_t ttl_s, const uint8_t ip6[16]); +uint32_t dns_sd_add_rr_srv(uint8_t *out, uint32_t cap, uint32_t off, const char *name, uint16_t rrclass, uint32_t ttl_s, uint16_t priority, uint16_t weight, uint16_t port, const char *target); +uint32_t dns_sd_add_rr_txt(uint8_t *out, uint32_t cap, uint32_t off, const char *name, uint16_t rrclass, uint32_t ttl_s, const char *txt); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/application_layer/dns/mdns_responder.c b/kernel/networking/application_layer/dns/mdns_responder.c new file mode 100644 index 00000000..ba57b962 --- /dev/null +++ b/kernel/networking/application_layer/dns/mdns_responder.c @@ -0,0 +1,1000 @@ +#include "mdns_responder.h" + +#include "dns_sd.h" +#include "dns_cache.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/transport_layer/csocket_udp.h" +#include "networking/interface_manager.h" +#include "std/std.h" +#include "std/string.h" +#include "syscalls/syscalls.h" + +#define MDNS_TTL_S 120 +#define MDNS_ANNOUNCE_BURST 3 +#define MDNS_GOODBYE_BURST 3 +#define MDNS_ANNOUNCE_INTERVAL_MS 250 +#define MDNS_KEEPALIVE_MS 60000 +#define MDNS_MAX_SERVICES 8 +#define MDNS_CACHE_MAX 48 + +typedef struct { + bool used; + bool active; + uint8_t announce_left; + uint8_t goodbye_left; + uint64_t last_tx_ms; + char instance[64]; + char service[32]; + char proto[8]; + char txt[128]; + uint16_t port; +} mdns_service_t; + +typedef struct { + uint16_t rrtype; + uint16_t rrclass; + uint32_t ttl_s; + uint16_t rdlen; +} mdns_rr_hdr_t; + +typedef struct { + uint8_t type; + uint16_t rrtype; + uint16_t port; + uint64_t expire_ms; + char name[256]; + char target[256]; + char txt[256]; +} mdns_cache_entry_t; + +typedef struct { + uint8_t *out; + uint32_t cap; + uint32_t off; + uint32_t an_pos; + uint32_t ar_pos; + uint16_t an; + uint16_t ar; +} mdns_pkt_t; + +static uint32_t g_mdns_ipv4 = 0; +static uint8_t g_mdns_ipv6[16]; +static uint8_t g_mdns_ifindex = 0; +static char g_mdns_fqdn[72]; + +static uint64_t g_mdns_last_refresh_ms = 0; +static uint64_t g_mdns_last_keepalive_ms = 0; +static uint8_t g_mdns_host_announce_left = 0; +static uint64_t g_mdns_host_last_tx_ms = 0; + +static mdns_service_t g_mdns_services[MDNS_MAX_SERVICES]; +static mdns_cache_entry_t g_mdns_cache[MDNS_CACHE_MAX]; + + +static bool mdns_read_name(const uint8_t *msg, uint32_t msg_len, uint32_t off, char *out, uint32_t out_cap, uint32_t *out_next) { + if (!msg) return false; + if (!msg_len) return false; + if (off >= msg_len) return false; + if (!out) return false; + if (!out_cap) return false; + + uint32_t idx = off; + uint32_t out_idx = 0; + uint32_t jumps = 0; + bool jumped = false; + + while (true) { + if (idx >= msg_len) return false; + + uint8_t c = msg[idx]; + if ((c & 0xC0) == 0xC0) { + if (idx + 1 >= msg_len) return false; + uint16_t ptr = (uint16_t)(((uint16_t)(c & 0x3F) << 8) | msg[idx + 1]); + if (ptr >= msg_len) return false; + if (!jumped) { + if (out_next) *out_next = idx + 2; + jumped = true; + } + idx = ptr; + jumps++; + if (jumps > 16) return false; + continue; + } + + if (c == 0) { + if (!jumped) { + if (out_next) *out_next = idx + 1; + } + if (out_idx == 0) { + if (out_cap < 2) return false; + out[0] = '.'; + out[1] = 0; + return true; + } + if (out_idx >= out_cap) return false; + out[out_idx] = 0; + return true; + } + + uint32_t lab_len = c; + idx++; + if (idx + lab_len > msg_len) return false; + + if (out_idx) { + if (out_idx + 1 >= out_cap) return false; + out[out_idx] = '.'; + out_idx++; + } + + if (out_idx + lab_len >= out_cap) return false; + memcpy(out + out_idx, msg + idx, lab_len); + out_idx += lab_len; + idx += lab_len; + } +} + +static void mdns_send(socket_handle_t sock, const net_l4_endpoint *src, bool unicast, ip_version_t ver, const uint8_t *mcast_ip, const uint8_t *pkt, uint32_t pkt_len) { + if (!sock) return; + if (!pkt) return; + if (!pkt_len) return; + + net_l4_endpoint dst; + memset(&dst, 0, sizeof(dst)); + + if (unicast && src) { + dst = *src; + if (!dst.port) dst.port = DNS_SD_MDNS_PORT; + socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, (void*)pkt, pkt_len); + return; + } + + dst.ver = ver; + if (ver == IP_VER4) memcpy(dst.ip, mcast_ip, 4); + else memcpy(dst.ip, mcast_ip, 16); + dst.port = DNS_SD_MDNS_PORT; + socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, (void*)pkt, pkt_len); +} + +static bool mdns_pick_identity(uint32_t *out_v4, uint8_t out_v6[16], uint8_t *out_ifindex, uint8_t out_ifid[8]) { + if (!out_v4) return false; + if (!out_v6) return false; + if (!out_ifindex) return false; + if (!out_ifid) return false; + + uint32_t v4 = 0; + uint8_t v6_best[16]; + uint8_t v6_fallback[16]; + uint8_t ifid_best[8]; + uint8_t ifid_fallback[8]; + uint8_t if_best = 0; + uint8_t if_fallback = 0; + + memset(v6_best, 0, sizeof(v6_best)); + memset(v6_fallback, 0, sizeof(v6_fallback)); + memset(ifid_best, 0, sizeof(ifid_best)); + memset(ifid_fallback, 0, sizeof(ifid_fallback)); + + uint8_t c = l2_interface_count(); + for (uint8_t i = 0; i < c; i++) { + l2_interface_t *l2 = l2_interface_at(i); + if (!l2)continue; + if (!l2->is_up) continue; + + if (!v4) { + for (uint8_t j = 0; j < l2->ipv4_count; j++) { + l3_ipv4_interface_t *a = l2->l3_v4[j]; + if (!a) continue; + if (a->is_localhost) continue; + if (!a->ip) continue; + v4 = a->ip; + break; + } + } + + for (uint8_t j = 0; j < l2->ipv6_count; j++) { + l3_ipv6_interface_t *a = l2->l3_v6[j]; + if (!a) continue; + if (a->is_localhost) continue; + if (!a->ip[0]) continue; + + bool is_lla = (a->ip[0] == 0xFE && (a->ip[1] & 0xC0) == 0x80); + if (!is_lla && !if_best) { + memcpy(v6_best, a->ip, 16); + memcpy(ifid_best, a->interface_id, 8); + if_best = l2->ifindex; + } + + if (!if_fallback) { + memcpy(v6_fallback, a->ip, 16); + memcpy(ifid_fallback,a->interface_id, 8); + if_fallback = l2->ifindex; + } + } + } + + if (if_best) { + *out_v4 = v4; + memcpy(out_v6, v6_best, 16); + *out_ifindex = if_best; + memcpy(out_ifid, ifid_best, 8); + return true; + } + + if (if_fallback) { + *out_v4 = v4; + memcpy(out_v6, v6_fallback, 16); + *out_ifindex = if_fallback; + memcpy(out_ifid, ifid_fallback, 8); + return true; + } + + if (v4) { + *out_v4 = v4; + memset(out_v6, 0, 16); + *out_ifindex = 0; + memset(out_ifid, 0, 8); + return true; + } + + return false; +} + +static void mdns_refresh_identity(void) { + uint64_t now = get_time(); + if (g_mdns_last_refresh_ms && (now - g_mdns_last_refresh_ms) < 1000) return; + g_mdns_last_refresh_ms = now; + + uint32_t v4 = 0; + uint8_t v6[16]; + uint8_t ifid[8]; + uint8_t ifindex = 0; + memset(v6, 0, sizeof(v6)); + memset(ifid, 0, sizeof(ifid)); + + if (!mdns_pick_identity(&v4, v6, &ifindex, ifid)) return; + + bool changed = false; + if (g_mdns_ipv4 != v4) changed = true; + if (memcmp(g_mdns_ipv6, v6, 16) != 0) changed = true; + if (g_mdns_ifindex != ifindex) changed = true; + + g_mdns_ipv4 = v4; + memcpy(g_mdns_ipv6, v6, 16); + g_mdns_ifindex = ifindex; + + if (!g_mdns_fqdn[0]) { + char host[64]; + string_format_buf(host, sizeof(host),"redactedos-%02x%02x%02x%02x%02x%02x%02x%02x", ifid[0],ifid[1],ifid[2],ifid[3],ifid[4],ifid[5], ifid[6], ifid[7]); + string_format_buf(g_mdns_fqdn, sizeof(g_mdns_fqdn), "%s.local", host); + changed = true; + } + + if (changed) { + g_mdns_host_announce_left = MDNS_ANNOUNCE_BURST; + g_mdns_host_last_tx_ms= 0; + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + if (!g_mdns_services[i].used) continue; + if (!g_mdns_services[i].active) continue; + g_mdns_services[i].announce_left = MDNS_ANNOUNCE_BURST; + g_mdns_services[i].last_tx_ms = 0; + } + } +} + + +static void mdns_make_service_type(char *out, uint32_t out_cap, const char *service, const char *proto) { + if (!out) return; + if (!out_cap) return; + if (!service) return; + if (!proto) return; + string_format_buf(out, out_cap, "_%s._%s.local", service, proto); +} + + +static bool mdns_pkt_begin(mdns_pkt_t *p, uint8_t *out, uint32_t cap, uint16_t flags) { + if (!p) return false; + if (!out) return false; + if (cap < 12) return false; + + memset(p, 0, sizeof(*p)); + p->out = out; + p->cap = cap; + p->off = 0; + + p->off = dns_sd_put_u16(out, cap, p->off, 0); + if (!p->off) return false; + p->off = dns_sd_put_u16(out, cap, p->off, flags); + if (!p->off) return false; + p->off = dns_sd_put_u16(out, cap, p->off, 0); + if (!p->off) return false; + + p->an_pos = p->off; + p->off = dns_sd_put_u16(out, cap, p->off, 0); + if (!p->off) return false; + + p->off = dns_sd_put_u16(out, cap, p->off, 0); + if (!p->off) return false; + + p->ar_pos = p->off; + p->off = dns_sd_put_u16(out, cap, p->off, 0); + if (!p->off) return false; + + return true; +} + +static void mdns_pkt_commit(mdns_pkt_t *p) { + if (!p) return; + uint16_t anbe = be16(p->an); + uint16_t arbe = be16(p->ar); + memcpy(p->out + p->an_pos, &anbe, 2); + memcpy(p->out + p->ar_pos, &arbe, 2); +} + +static bool mdns_pkt_add_ptr(mdns_pkt_t *p, bool additional, const char *name, uint16_t rrclass, uint32_t ttl_s, const char *target) { + if (!p) return false; + uint32_t n = dns_sd_add_rr_ptr(p->out, p->cap, p->off, name, rrclass, ttl_s, target); + if (!n) return false; + p->off = n; + if (additional) p->ar++; + else p->an++; + return true; +} + +static bool mdns_pkt_add_a(mdns_pkt_t *p, bool additional, const char *name, uint16_t rrclass, uint32_t ttl_s, uint32_t ip) { + if (!p) return false; + uint32_t n = dns_sd_add_rr_a(p->out, p->cap, p->off, name, rrclass, ttl_s, ip); + if (!n) return false; + p->off = n; + if (additional) p->ar++; + else p->an++; + return true; +} + +static bool mdns_pkt_add_aaaa(mdns_pkt_t *p, bool additional, const char *name, uint16_t rrclass, uint32_t ttl_s, const uint8_t ip6[16]) { + if (!p) return false; + uint32_t n = dns_sd_add_rr_aaaa(p->out, p->cap, p->off, name, rrclass, ttl_s, ip6); + if (!n) return false; + p->off = n; + if (additional) p->ar++; + else p->an++; + return true; +} + +static bool mdns_pkt_add_srv(mdns_pkt_t *p, bool additional, const char *name, uint16_t rrclass, uint32_t ttl_s, uint16_t port, const char *target) { + if (!p) return false; + uint32_t n = dns_sd_add_rr_srv(p->out, p->cap, p->off, name, rrclass, ttl_s, 0, 0, port, target); + if (!n) return false; + p->off = n; + if (additional) p->ar++; + else p->an++; + return true; +} + +static bool mdns_pkt_add_txt(mdns_pkt_t *p, bool additional, const char *name, uint16_t rrclass, uint32_t ttl_s, const char *txt) { + if (!p) return false; + uint32_t n = dns_sd_add_rr_txt(p->out, p->cap, p->off, name, rrclass, ttl_s, txt); + if (!n) return false; + p->off = n; + if (additional) p->ar++; + else p->an++; + return true; +} + + +static void mdns_cache_put_ptr(const char *name, const char *target, uint32_t ttl_s) { + if (!name) return; + if (!target) return; + + uint64_t now = get_time(); + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (!e->type) continue; + if (e->rrtype != DNS_SD_TYPE_PTR) continue; + if (strncmp(e->name, name, 256) != 0) continue; + strncpy(e->target, target, sizeof(e->target)); + e->expire_ms = now + (uint64_t)ttl_s * 1000; + return; + } + + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (e->type) continue; + memset(e, 0, sizeof(*e)); + e->type = 1; + e->rrtype = DNS_SD_TYPE_PTR; + strncpy(e->name, name, sizeof(e->name)); + strncpy(e->target, target, sizeof(e->target)); + e->expire_ms = now + (uint64_t)ttl_s * 1000; + return; + } +} + +static void mdns_cache_put_srv(const char *name, uint16_t port, const char *target, uint32_t ttl_s) { + if (!name) return; + if (!target) return; + + uint64_t now = get_time(); + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (!e->type) continue; + if (e->rrtype != DNS_SD_TYPE_SRV) continue; + if (strncmp(e->name, name, 256) != 0) continue; + e->port = port; + strncpy(e->target, target, sizeof(e->target)); + e->expire_ms = now + (uint64_t)ttl_s * 1000; + return; + } + + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (e->type) continue; + memset(e, 0, sizeof(*e)); + e->type = 1; + e->rrtype = DNS_SD_TYPE_SRV; + e->port = port; + strncpy(e->name, name, sizeof(e->name)); + strncpy(e->target, target, sizeof(e->target)); + e->expire_ms = now + (uint64_t)ttl_s * 1000; + return; + } +} + +static void mdns_cache_put_txt(const char *name, const char *txt, uint32_t ttl_s) { + if (!name) return; + + uint64_t now = get_time(); + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (!e->type) continue; + if (e->rrtype != DNS_SD_TYPE_TXT) continue; + if (strncmp(e->name, name, 256) != 0) continue; + if (txt) strncpy(e->txt, txt, sizeof(e->txt)); + else e->txt[0] = 0; + e->expire_ms = now + (uint64_t)ttl_s * 1000; + return; + } + + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (e->type) continue; + memset(e, 0, sizeof(*e)); + e->type = 1; + e->rrtype = DNS_SD_TYPE_TXT; + strncpy(e->name, name, sizeof(e->name)); + if (txt) strncpy(e->txt, txt, sizeof(e->txt)); + e->expire_ms = now + (uint64_t)ttl_s * 1000; + return; + } +} + +static bool mdns_parse_ipv4_ptr_qname(const char *name, uint32_t *out_ip) { + if (!name) return false; + if (!out_ip) return false; + + uint32_t oct[4]; + memset(oct, 0, sizeof(oct)); + + const char *p = name; + for (int i = 0; i < 4; i++) { + uint32_t v = 0; + uint32_t digits = 0; + while (*p >= '0' && *p <= '9') { + v = v * 10 + (uint32_t)(*p - '0'); + p++; + digits++; + if (digits > 3) return false; + } + if (!digits) return false; + if (v > 255) return false; + oct[i] = v; + if (i < 3) { + if (*p != '.') return false; + p++; + } + } + + if (*p != '.') return false; + p++; + + if (strncmp(p, "in-addr.arpa", 12) != 0) return false; + + uint32_t a = oct[3]; + uint32_t b = oct[2]; + uint32_t c = oct[1]; + uint32_t d = oct[0]; + + *out_ip = (uint32_t)((a & 255u) | ((b & 255u) << 8) | ((c & 255u) << 16) | ((d & 255u) << 24)); + return true; +} + +static void mdns_parse_txt(const uint8_t *rdata, uint16_t rdlen, char *out, uint32_t out_cap) { + if (!out) return; + if (!out_cap) return; + + out[0] = 0; + if (!rdata) return; + if (!rdlen) return; + + uint32_t idx = 0; + uint32_t out_idx = 0; + + while (idx < rdlen) { + uint8_t len = rdata[idx]; + idx++; + if (idx + len > rdlen) break; + + if (len) { + if (out_idx) { + if (out_idx + 1 >= out_cap) break; + out[out_idx] = ';'; + out_idx++; + } + uint32_t copy = len; + if (out_idx + copy >= out_cap) copy = out_cap - out_idx - 1; + memcpy(out + out_idx, rdata + idx, copy); + out_idx += copy; + } + + idx += len; + } + + if (out_idx >= out_cap) out_idx = out_cap - 1; + out[out_idx] = 0; +} + +static void mdns_cache_from_packet(const uint8_t *pkt, uint32_t pkt_len) { + if (!pkt) return; + if (pkt_len < 12) return; + + uint16_t flags = be16(*(const uint16_t *)(pkt + 2)); + if (!(flags & DNS_SD_FLAG_QR)) return; + + uint16_t qd = be16(*(const uint16_t *)(pkt + 4)); + uint16_t an = be16(*(const uint16_t *)(pkt + 6)); + uint16_t ns = be16(*(const uint16_t *)(pkt + 8)); + uint16_t ar = be16(*(const uint16_t *)(pkt + 10)); + + uint32_t off = 12; + + for (uint16_t i = 0; i < qd; i++) { + char qname[256]; + uint32_t next = 0; + if (!mdns_read_name(pkt, pkt_len, off, qname, sizeof(qname), &next)) return; + if (next + 4 > pkt_len) return; + off = next + 4; + if (off > pkt_len) return; + } + + uint32_t rr_total = (uint32_t)an + (uint32_t)ns + (uint32_t)ar; + for (uint32_t i = 0; i < rr_total; i++) { + char name[256]; + uint32_t next = 0; + if (!mdns_read_name(pkt, pkt_len, off, name, sizeof(name), &next)) return; + if (next + 10 > pkt_len) return; + + mdns_rr_hdr_t h; + h.rrtype = be16(*(const uint16_t *)(pkt + next)); + h.rrclass = be16(*(const uint16_t *)(pkt + next + 2)); + h.ttl_s = be32(*(const uint32_t *)(pkt + next + 4)); + h.rdlen = be16(*(const uint16_t *)(pkt + next + 8)); + + uint32_t rdata = next + 10; + if (rdata + h.rdlen > pkt_len) return; + + if (h.rrtype == DNS_SD_TYPE_A) { + if (h.rdlen == 4) { + uint8_t ip4[16]; + memset(ip4, 0, sizeof(ip4)); + memcpy(ip4, pkt + rdata, 4); + dns_cache_put_ip(name, DNS_SD_TYPE_A, ip4, h.ttl_s * 1000); + } + } else if (h.rrtype == DNS_SD_TYPE_AAAA) { + if (h.rdlen == 16) { + dns_cache_put_ip(name, DNS_SD_TYPE_AAAA, pkt + rdata, h.ttl_s * 1000); + } + } else if (h.rrtype == DNS_SD_TYPE_PTR) { + char target[256]; + uint32_t tnext = 0; + if (mdns_read_name(pkt, pkt_len, rdata, target, sizeof(target), &tnext)) { + mdns_cache_put_ptr(name, target, h.ttl_s); + } + } else if (h.rrtype == DNS_SD_TYPE_SRV) { + if (h.rdlen >= 6) { + uint16_t port = be16(*(const uint16_t *)(pkt + rdata + 4)); + char target[256]; + uint32_t tnext = 0; + if (mdns_read_name(pkt, pkt_len, rdata + 6, target, sizeof(target), &tnext)) { + mdns_cache_put_srv(name, port, target, h.ttl_s); + } + } + } else if (h.rrtype == DNS_SD_TYPE_TXT) { + char txt[256]; + mdns_parse_txt(pkt + rdata, h.rdlen, txt, sizeof(txt)); + mdns_cache_put_txt(name, txt, h.ttl_s); + } + + off = rdata + h.rdlen; + if (off > pkt_len) return; + } +} + +static bool mdns_add_host_additionals(mdns_pkt_t *p) { + if (!p) return false; + + uint16_t rrclass = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + + if (g_mdns_ipv4) { + if (!mdns_pkt_add_a(p, true, g_mdns_fqdn, rrclass, MDNS_TTL_S, g_mdns_ipv4)) return false; + } + + uint8_t ip6[16]; + memcpy(ip6, g_mdns_ipv6, 16); + if (!ip6[0] && g_mdns_ifindex) ipv6_make_lla_from_mac(g_mdns_ifindex, ip6); + if (ip6[0]) { + if (!mdns_pkt_add_aaaa(p, true, g_mdns_fqdn, rrclass, MDNS_TTL_S, ip6)) return false; + } + + return true; +} + +static bool mdns_add_service_records(mdns_pkt_t *p, const mdns_service_t *s, uint32_t ttl_s, bool goodbye) { + if (!p) return false; + if (!s) return false; + + char type[128]; + char inst[256]; + mdns_make_service_type(type, sizeof(type), s->service, s->proto); + inst[0] = 0; + if (!s->instance[0] || !s->service[0] || !s->proto[0]) return false; + string_format_buf(inst, sizeof(inst), "%s._%s._%s.local", s->instance, s->service, s->proto); + + uint16_t ptr_class = DNS_SD_CLASS_IN; + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + + uint32_t ttl = goodbye ? 0 : ttl_s; + + if (!mdns_pkt_add_ptr(p, false, DNS_SD_ENUM_SERVICES, ptr_class, ttl, type)) return false; + if (!mdns_pkt_add_ptr(p, false, type, ptr_class, ttl, inst)) return false; + + if (!goodbye) { + if (!mdns_pkt_add_srv(p, true, inst, flush_class, ttl_s, s->port, g_mdns_fqdn)) return false; + if (!mdns_pkt_add_txt(p, true, inst, flush_class, ttl_s, s->txt)) return false; + if (!mdns_add_host_additionals(p)) return false; + } else { + if (!mdns_pkt_add_srv(p, true, inst, flush_class, 0, s->port, g_mdns_fqdn)) return false; + if (!mdns_pkt_add_txt(p, true, inst, flush_class, 0, s->txt)) return false; + } + + return true; +} + +bool mdns_register_service(const char *instance, const char *service, const char *proto, uint16_t port, const char *txt) { + if (!instance) return false; + if (!service) return false; + if (!proto) return false; + + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + mdns_service_t *s = &g_mdns_services[i]; + if (!s->used) continue; + if (strncmp(s->instance, instance, (int)sizeof(s->instance)) != 0) continue; + if (strncmp(s->service, service, (int)sizeof(s->service)) != 0) continue; + if (strncmp(s->proto, proto, (int)sizeof(s->proto)) != 0) continue; + + s->active = true; + s->port = port; + if (txt) strncpy(s->txt, txt, sizeof(s->txt)); + else s->txt[0] = 0; + s->announce_left = MDNS_ANNOUNCE_BURST; + s->goodbye_left = 0; + s->last_tx_ms = 0; + return true; + } + + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + mdns_service_t *s = &g_mdns_services[i]; + if (s->used) continue; + + memset(s, 0, sizeof(*s)); + s->used = true; + s->active = true; + s->port = port; + + strncpy(s->instance, instance, sizeof(s->instance)); + strncpy(s->service, service, sizeof(s->service)); + strncpy(s->proto, proto, sizeof(s->proto)); + if (txt) strncpy(s->txt, txt, sizeof(s->txt)); + + s->announce_left = MDNS_ANNOUNCE_BURST; + s->goodbye_left = 0; + s->last_tx_ms = 0; + return true; + } + + return false; +} + +bool mdns_deregister_service(const char *instance, const char *service, const char *proto) { + if (!instance) return false; + if (!service) return false; + if (!proto) return false; + + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + mdns_service_t *s = &g_mdns_services[i]; + if (!s->used) continue; + if (!s->active) continue; + if (strncmp(s->instance, instance, (int)sizeof(s->instance)) != 0) continue; + if (strncmp(s->service, service, (int)sizeof(s->service)) != 0) continue; + if (strncmp(s->proto, proto, (int)sizeof(s->proto)) != 0) continue; + + s->active = false; + s->announce_left = 0; + s->goodbye_left = MDNS_GOODBYE_BURST; + s->last_tx_ms = 0; + return true; + } + + return false; +} + +void mdns_responder_tick(socket_handle_t sock4, socket_handle_t sock6, const uint8_t mcast_v4[4], const uint8_t mcast_v6[16]) { + mdns_refresh_identity(); + uint64_t now = get_time(); + for (uint32_t i = 0; i < MDNS_CACHE_MAX; i++) { + mdns_cache_entry_t *e = &g_mdns_cache[i]; + if (!e->type) continue; + if (now < e->expire_ms) continue; + memset(e, 0, sizeof(*e)); + } + + if (!g_mdns_last_keepalive_ms) g_mdns_last_keepalive_ms = now; + if ((now - g_mdns_last_keepalive_ms) >= MDNS_KEEPALIVE_MS) { + g_mdns_last_keepalive_ms = now; + g_mdns_host_announce_left = 1; + g_mdns_host_last_tx_ms = 0; + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + if (!g_mdns_services[i].used) continue; + if (!g_mdns_services[i].active) continue; + g_mdns_services[i].announce_left = 1; + g_mdns_services[i].last_tx_ms = 0; + } + } + + if (g_mdns_host_announce_left) { + if (!g_mdns_host_last_tx_ms || (now - g_mdns_host_last_tx_ms) >= MDNS_ANNOUNCE_INTERVAL_MS) { + g_mdns_host_last_tx_ms = now; + + uint8_t pkt[900]; + mdns_pkt_t p; + if (mdns_pkt_begin(&p, pkt, sizeof(pkt), (uint16_t)(DNS_SD_FLAG_QR | DNS_SD_FLAG_AA))) { + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + if (g_mdns_ipv4) mdns_pkt_add_a(&p, false, g_mdns_fqdn, flush_class, MDNS_TTL_S, g_mdns_ipv4); + + uint8_t ip6[16]; + memcpy(ip6, g_mdns_ipv6, 16); + if (!ip6[0] && g_mdns_ifindex) ipv6_make_lla_from_mac(g_mdns_ifindex, ip6); + if (ip6[0]) mdns_pkt_add_aaaa(&p, false, g_mdns_fqdn, flush_class, MDNS_TTL_S, ip6); + + mdns_pkt_commit(&p); + + if (sock4 && mcast_v4) mdns_send(sock4, 0, false, IP_VER4, mcast_v4, pkt, p.off); + if (sock6 && mcast_v6) mdns_send(sock6, 0, false, IP_VER6, mcast_v6, pkt, p.off); + + uint32_t ttl_ms = MDNS_TTL_S * 1000; + + if (g_mdns_ipv4) { + uint8_t ip4[16]; + memset(ip4, 0, sizeof(ip4)); + memcpy(ip4, &g_mdns_ipv4, 4); + dns_cache_put_ip(g_mdns_fqdn, DNS_SD_TYPE_A, ip4, ttl_ms); + } + + uint8_t ip6_cache[16]; + memcpy(ip6_cache, g_mdns_ipv6, 16); + if (!ip6_cache[0] && g_mdns_ifindex) ipv6_make_lla_from_mac(g_mdns_ifindex, ip6_cache); + if (ip6_cache[0]) dns_cache_put_ip(g_mdns_fqdn, DNS_SD_TYPE_AAAA, ip6_cache, ttl_ms); + } + + g_mdns_host_announce_left--; + } + } + + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + mdns_service_t *s = &g_mdns_services[i]; + if (!s->used) continue; + + bool do_goodbye = false; + if (s->goodbye_left) do_goodbye = true; + if (!do_goodbye && !s->active) continue; + if (!do_goodbye && !s->announce_left) continue; + + uint32_t interval = MDNS_ANNOUNCE_INTERVAL_MS; + if (!s->last_tx_ms || (now - s->last_tx_ms) >= interval) { + s->last_tx_ms = now; + + uint8_t pkt[900]; + mdns_pkt_t p; + if (mdns_pkt_begin(&p, pkt, sizeof(pkt), (uint16_t)(DNS_SD_FLAG_QR | DNS_SD_FLAG_AA))) { + mdns_add_service_records(&p, s, MDNS_TTL_S, do_goodbye); + mdns_pkt_commit(&p); + + if (sock4 && mcast_v4) mdns_send(sock4, 0, false, IP_VER4, mcast_v4, pkt, p.off); + if (sock6 && mcast_v6) mdns_send(sock6, 0, false, IP_VER6, mcast_v6, pkt, p.off); + } + + if (do_goodbye) { + if (s->goodbye_left) s->goodbye_left--; + if (!s->goodbye_left) { + memset(s, 0, sizeof(*s)); + } + } else { + if (s->announce_left) s->announce_left--; + } + } + } +} + +void mdns_responder_handle_query(socket_handle_t sock, ip_version_t ver, const uint8_t *mcast_ip, const uint8_t *pkt, uint32_t pkt_len, const net_l4_endpoint *src) { + if (!sock) return; + if (!mcast_ip) return; + if (!pkt) return; + if (pkt_len < 12) return; + + mdns_refresh_identity(); + + uint16_t flags = be16(*(const uint16_t *)(pkt + 2)); + if (flags & DNS_SD_FLAG_QR) { + mdns_cache_from_packet(pkt, pkt_len); + return; + } + + uint16_t qd = be16(*(const uint16_t *)(pkt + 4)); + if (!qd) return; + + bool unicast_any = false; + + uint8_t out[1500]; + mdns_pkt_t p; + if (!mdns_pkt_begin(&p, out, sizeof(out), (uint16_t)(DNS_SD_FLAG_QR | DNS_SD_FLAG_AA))) return; + + uint32_t qoff = 12; + + for (uint16_t qi = 0; qi < qd; qi++) { + char qname[256]; + uint32_t next = 0; + if (!mdns_read_name(pkt, pkt_len, qoff, qname, sizeof(qname), &next)) return; + if (next + 4 > pkt_len) return; + + uint16_t qtype = be16(*(const uint16_t *)(pkt + next)); + uint16_t qclass = be16(*(const uint16_t *)(pkt + next + 2)); + if ((qclass & 0x8000u) != 0) unicast_any = true; + + uint32_t ipq = 0; + if (qtype == DNS_SD_TYPE_PTR && g_mdns_ipv4 && mdns_parse_ipv4_ptr_qname(qname, &ipq) && ipq == g_mdns_ipv4) { + uint16_t ptr_class = DNS_SD_CLASS_IN; + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + + if (!mdns_pkt_add_ptr(&p, false, qname, ptr_class, MDNS_TTL_S, g_mdns_fqdn)) return; + if (!mdns_pkt_add_a(&p, true, g_mdns_fqdn, flush_class, MDNS_TTL_S, g_mdns_ipv4)) return; + + uint8_t ip6[16]; + memcpy(ip6, g_mdns_ipv6, 16); + if (!ip6[0] && g_mdns_ifindex) ipv6_make_lla_from_mac(g_mdns_ifindex, ip6); + if (ip6[0]) { + if (!mdns_pkt_add_aaaa(&p, true, g_mdns_fqdn, flush_class, MDNS_TTL_S, ip6)) return; + } + } + + if (qtype == DNS_SD_TYPE_A || qtype == DNS_SD_TYPE_ANY) { + if (strncmp(qname, g_mdns_fqdn, 256) == 0 && g_mdns_ipv4) { + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + if (!mdns_pkt_add_a(&p, false, g_mdns_fqdn, flush_class, MDNS_TTL_S, g_mdns_ipv4)) return; + } + } + + if (qtype == DNS_SD_TYPE_AAAA || qtype == DNS_SD_TYPE_ANY) { + if (strncmp(qname, g_mdns_fqdn, 256) == 0) { + uint8_t ip6[16]; + memcpy(ip6, g_mdns_ipv6, 16); + if (!ip6[0] && g_mdns_ifindex) ipv6_make_lla_from_mac(g_mdns_ifindex, ip6); + if (ip6[0]) { + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + if (!mdns_pkt_add_aaaa(&p, false, g_mdns_fqdn, flush_class, MDNS_TTL_S, ip6)) return; + } + } + } + + if ((qtype == DNS_SD_TYPE_PTR || qtype == DNS_SD_TYPE_ANY) && + strncmp(qname, DNS_SD_ENUM_SERVICES, 256) == 0) { + uint16_t ptr_class = DNS_SD_CLASS_IN; + + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + if (!g_mdns_services[i].used) continue; + if (!g_mdns_services[i].active) continue; + + char type[128]; + mdns_make_service_type(type,sizeof(type), g_mdns_services[i].service, g_mdns_services[i].proto); + + bool seen = false; + for (uint32_t j = 0; j < i; j++) { + if (!g_mdns_services[j].used) continue; + if (!g_mdns_services[j].active) continue; + + char type2[128]; + mdns_make_service_type(type2,sizeof(type2),g_mdns_services[j].service, g_mdns_services[j].proto); + if (strncmp(type2, type, 128) == 0) { + seen = true; + break; + } + } + + if (seen) continue; + if (!mdns_pkt_add_ptr(&p, false, DNS_SD_ENUM_SERVICES, ptr_class, MDNS_TTL_S, type)) return; + } + } + + bool need_host_add = false; + for (uint32_t i = 0; i < MDNS_MAX_SERVICES; i++) { + if (!g_mdns_services[i].used) continue; + if (!g_mdns_services[i].active) continue; + + mdns_service_t *s = &g_mdns_services[i]; + + char type[128]; + char inst[256]; + mdns_make_service_type(type, sizeof(type), s->service, s->proto); + inst[0] = 0; + if (!s->instance[0] || !s->service[0] || !s->proto[0]) continue; + string_format_buf(inst, sizeof(inst), "%s._%s._%s.local", s->instance, s->service, s->proto); + + if ((qtype == DNS_SD_TYPE_PTR || qtype == DNS_SD_TYPE_ANY) && + strncmp(qname, type, 256) == 0) { + uint16_t ptr_class = DNS_SD_CLASS_IN; + if (!mdns_pkt_add_ptr(&p, false, type, ptr_class, MDNS_TTL_S, inst)) return; + + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + if (!mdns_pkt_add_srv(&p, true, inst, flush_class, MDNS_TTL_S, s->port, g_mdns_fqdn)) return; + if (!mdns_pkt_add_txt(&p, true, inst, flush_class, MDNS_TTL_S, s->txt)) return; + need_host_add = true; + } + + if ((qtype == DNS_SD_TYPE_SRV || qtype == DNS_SD_TYPE_ANY) && + strncmp(qname, inst, 256) == 0) { + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + if (!mdns_pkt_add_srv(&p, false, inst, flush_class, MDNS_TTL_S, s->port, g_mdns_fqdn)) return; + need_host_add = true; + } + + if ((qtype == DNS_SD_TYPE_TXT || qtype == DNS_SD_TYPE_ANY) && + strncmp(qname, inst, 256) == 0) { + uint16_t flush_class = (uint16_t)(0x8000u | DNS_SD_CLASS_IN); + if (!mdns_pkt_add_txt(&p, false, inst, flush_class, MDNS_TTL_S, s->txt)) return; + } + } + + if (need_host_add) { + if (!mdns_add_host_additionals(&p)) return; + } + + qoff = next + 4; + if (qoff > pkt_len) return; + } + + if (!p.an && !p.ar) return; + + mdns_pkt_commit(&p); + mdns_send(sock, src, unicast_any, ver, mcast_ip, out, p.off); + + uint32_t ttl_ms = MDNS_TTL_S * 1000; + + if (g_mdns_ipv4) { + uint8_t ip4[16]; + memset(ip4, 0, sizeof(ip4)); + memcpy(ip4, &g_mdns_ipv4, 4); + dns_cache_put_ip(g_mdns_fqdn, DNS_SD_TYPE_A, ip4, ttl_ms); + } + + uint8_t ip6[16]; + memcpy(ip6, g_mdns_ipv6, 16); + if (!ip6[0] && g_mdns_ifindex) ipv6_make_lla_from_mac(g_mdns_ifindex, ip6); + if (ip6[0]) dns_cache_put_ip(g_mdns_fqdn, DNS_SD_TYPE_AAAA, ip6, ttl_ms); +} \ No newline at end of file diff --git a/kernel/networking/application_layer/dns/mdns_responder.h b/kernel/networking/application_layer/dns/mdns_responder.h new file mode 100644 index 00000000..be14dd07 --- /dev/null +++ b/kernel/networking/application_layer/dns/mdns_responder.h @@ -0,0 +1,17 @@ +#pragma once +#include "networking/transport_layer/csocket_udp.h" +#include "net/network_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool mdns_register_service(const char *instance, const char *service, const char *proto, uint16_t port, const char *txt); +bool mdns_deregister_service(const char *instance, const char *service, const char *proto); + +void mdns_responder_tick(socket_handle_t sock4, socket_handle_t sock6, const uint8_t mcast_v4[4], const uint8_t mcast_v6[16]); +void mdns_responder_handle_query(socket_handle_t sock, ip_version_t ver, const uint8_t *mcast_ip, const uint8_t *pkt, uint32_t pkt_len, const net_l4_endpoint *src); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/application_layer/dns_daemon.c b/kernel/networking/application_layer/dns_daemon.c deleted file mode 100644 index bfb9c472..00000000 --- a/kernel/networking/application_layer/dns_daemon.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "dns_daemon.h" -#include "process/scheduler.h" -#include "syscalls/syscalls.h" -#include "net/socket_types.h" - -static uint16_t g_pid_dnsd = 0xFFFF; -static socket_handle_t g_sock = 0; - -uint16_t dns_get_pid(void){ return g_pid_dnsd; } -bool dns_is_running(void){ return g_pid_dnsd != 0xFFFF; } -void dns_set_pid(uint16_t p){ g_pid_dnsd = p; } -socket_handle_t dns_socket_handle(void){ return g_sock; } - -int dns_deamon_entry(int argc, char* argv[]){ - (void)argc; (void)argv; - dns_set_pid(get_current_proc_pid()); - g_sock = udp_socket_create(SOCK_ROLE_CLIENT, g_pid_dnsd); - for(;;){ msleep(250); } - return 1; -} diff --git a/kernel/networking/application_layer/dns_daemon.h b/kernel/networking/application_layer/dns_daemon.h deleted file mode 100644 index 6653b5f7..00000000 --- a/kernel/networking/application_layer/dns_daemon.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "networking/transport_layer/csocket_udp.h" - -#ifdef __cplusplus -extern "C" { -#endif - bool dns_is_running(void); - void dns_set_pid(uint16_t p); - socket_handle_t dns_socket_handle(void); - uint16_t dns_get_pid(void); - int dns_deamon_entry(int argc, char* argv[]); -#ifdef __cplusplus -} -#endif diff --git a/kernel/networking/application_layer/http.c b/kernel/networking/application_layer/http.c index c465d512..42daf3af 100644 --- a/kernel/networking/application_layer/http.c +++ b/kernel/networking/application_layer/http.c @@ -3,87 +3,59 @@ #include "std/memory.h" #include "syscalls/syscalls.h" - -static inline bool is_space(char c) { - return c == ' ' || c == '\t'; -} - -static inline uint32_t parse_u32(const char *s, uint32_t len) { - uint32_t r = 0; - for (uint32_t i = 0; i < len; i++) { - char c = s[i]; - if (c >= '0' && c <= '9') { - r = r * 10 + (uint32_t)(c - '0'); - } else { - break; - } - } - return r; -} - -string http_header_builder(const HTTPHeadersCommon *C, - const HTTPHeader *H, uint32_t N) -{ +string http_header_builder(const HTTPHeadersCommon *C, const HTTPHeader *H, uint32_t N){ string out = string_repeat('\0', 0); - if (C->type.length) { + if (C->type.length){ string_append_bytes(&out, "Content-Type: ", 14); - string_append_bytes(&out, - C->type.data, - C->type.length); + string_append_bytes(&out, C->type.data, C->type.length); string_append_bytes(&out, "\r\n", 2); } - if (C->length) { - string tmp = string_format("Content-Length: %i\r\n", - (int)C->length); - string_append_bytes(&out, tmp.data, tmp.length); - free_sized(tmp.data, tmp.mem_length); - } + string tmp = string_format("Content-Length: %i\r\n", (int)C->length); + string_append_bytes(&out, tmp.data, tmp.length); + free_sized(tmp.data, tmp.mem_length); - if (C->date.length) { + if (C->date.length){ string_append_bytes(&out, "Date: ", 6); - string_append_bytes(&out, - C->date.data, - C->date.length); + string_append_bytes(&out, C->date.data, C->date.length); string_append_bytes(&out, "\r\n", 2); } - if (C->host.length) { + if (C->host.length){ string_append_bytes(&out, "Host: ", 6); - string_append_bytes(&out, - C->host.data, - C->host.length); + bool has_colon = str_has_char(C->host.data, C->host.length, ':') >= 0; + bool has_lb = str_has_char(C->host.data, C->host.length, '[') >= 0; + bool has_rb = str_has_char(C->host.data, C->host.length, ']') >= 0; + if (has_colon && !has_lb && !has_rb){ + string_append_bytes(&out, "[", 1); + string_append_bytes(&out, C->host.data, C->host.length); + string_append_bytes(&out, "]", 1); + } else { + string_append_bytes(&out, C->host.data, C->host.length); + } string_append_bytes(&out, "\r\n", 2); } else { string_append_bytes(&out, "Host: RedactedOS_0.1\r\n", 22); } - if (C->connection.length) { + if (C->connection.length){ string_append_bytes(&out, "Connection: ", 12); - string_append_bytes(&out, - C->connection.data, - C->connection.length); + string_append_bytes(&out, C->connection.data, C->connection.length); string_append_bytes(&out, "\r\n", 2); } - if (C->keep_alive.length) { + if (C->keep_alive.length){ string_append_bytes(&out, "Keep-Alive: ", 12); - string_append_bytes(&out, - C->keep_alive.data, - C->keep_alive.length); + string_append_bytes(&out, C->keep_alive.data, C->keep_alive.length); string_append_bytes(&out, "\r\n", 2); } - for (uint32_t i = 0; i < N; i++) { + for (uint32_t i = 0; i < N; i++){ const HTTPHeader *hdr = &H[i]; - string_append_bytes(&out, - hdr->key.data, - hdr->key.length); + string_append_bytes(&out, hdr->key.data, hdr->key.length); string_append_bytes(&out, ": ", 2); - string_append_bytes(&out, - hdr->value.data, - hdr->value.length); + string_append_bytes(&out, hdr->value.data, hdr->value.length); string_append_bytes(&out, "\r\n", 2); } @@ -91,6 +63,25 @@ string http_header_builder(const HTTPHeadersCommon *C, return out; } +void http_headers_common_free(HTTPHeadersCommon *C){ + if (!C) return; + if (C->type.mem_length) free_sized(C->type.data, C->type.mem_length); + if (C->date.mem_length) free_sized(C->date.data, C->date.mem_length); + if (C->connection.mem_length) free_sized(C->connection.data, C->connection.mem_length); + if (C->keep_alive.mem_length) free_sized(C->keep_alive.data, C->keep_alive.mem_length); + if (C->host.mem_length) free_sized(C->host.data, C->host.mem_length); + if (C->content_type.mem_length) free_sized(C->content_type.data, C->content_type.mem_length); + *C = (HTTPHeadersCommon){0}; +} + +void http_headers_extra_free(HTTPHeader *extra, uint32_t extra_count){ + if (!extra) return; + for (uint32_t i = 0; i < extra_count; i++){ + if (extra[i].key.mem_length) free_sized(extra[i].key.data, extra[i].key.mem_length); + if (extra[i].value.mem_length) free_sized(extra[i].value.data, extra[i].value.mem_length); + } + free_sized(extra, extra_count * sizeof(HTTPHeader)); +} void http_header_parser(const char *buf, uint32_t len, HTTPHeadersCommon *C, @@ -100,89 +91,130 @@ void http_header_parser(const char *buf, uint32_t len, *C = (HTTPHeadersCommon){0}; uint32_t max_lines = 0; - for (uint32_t i = 0; i + 1 < len; i++) { - if (buf[i]=='\r' && buf[i+1]=='\n') - max_lines++; + for (uint32_t i = 0; i + 1 < len; i++){ + if (buf[i]=='\r' && buf[i+1]=='\n') max_lines++; } - HTTPHeader *extras = (HTTPHeader*)(uintptr_t)malloc(sizeof(*extras) * max_lines); - if (!extras) { - *out_extra = NULL; - *out_extra_count = 0; - return; + HTTPHeader *extras = NULL; + if (max_lines){ + extras = (HTTPHeader*)(uintptr_t)malloc(sizeof(*extras) * max_lines); + if (!extras){ + *out_extra = NULL; + *out_extra_count = 0; + return; + } } + uint32_t extra_i = 0; uint32_t pos = 0; char key_tmp[64]; - while (pos + 1 < len) { + while (pos + 1 < len){ uint32_t eol = pos; - while (eol + 1 < len && !(buf[eol]=='\r' && buf[eol+1]=='\n')) - eol++; - if (eol == pos) { + while (eol + 1 < len && !(buf[eol]=='\r' && buf[eol+1]=='\n')) eol++; + + if (eol == pos){ pos += 2; break; } uint32_t sep = pos; while (sep < eol && buf[sep] != ':') sep++; + + if (sep == eol){ + pos = eol + 2; + continue; + } + uint32_t key_len = sep - pos; uint32_t val_start = sep + 1; - while (val_start < eol && is_space((unsigned char)buf[val_start])) - val_start++; + while (val_start < eol && (buf[val_start]==' ' || buf[val_start]=='\t')) val_start++; + uint32_t val_len = eol - val_start; uint32_t copy_len = (key_len < sizeof(key_tmp)-1) ? key_len : (sizeof(key_tmp)-1); - for (uint32_t i = 0; i < copy_len; i++) { + for (uint32_t i = 0; i < copy_len; i++){ key_tmp[i] = buf[pos + i]; } key_tmp[copy_len] = '\0'; - if (copy_len == 14 && strcmp_case(key_tmp, "content-length",true) == 0) { - C->length = parse_u32(buf + val_start, val_len); + if (copy_len == 14 && strcmp_case(key_tmp, "content-length", true) == 0){ + C->length = (uint32_t)parse_int_u64(buf + val_start, val_len); } - else if (copy_len == 12 && strcmp_case(key_tmp, "content-type",true) == 0) { + else if (copy_len == 12 && strcmp_case(key_tmp, "content-type", true) == 0){ C->type = string_from_literal_length((char*)(buf + val_start), val_len); } - else if (copy_len == 4 && strcmp_case(key_tmp, "date",true) == 0) { + else if (copy_len == 4 && strcmp_case(key_tmp, "date", true) == 0){ C->date = string_from_literal_length((char*)(buf + val_start), val_len); } - else if (copy_len == 10 && strcmp_case(key_tmp, "connection",true) == 0) { + else if (copy_len == 10 && strcmp_case(key_tmp, "connection", true) == 0){ C->connection = string_from_literal_length((char*)(buf + val_start), val_len); } - else if (copy_len == 10 && strcmp_case(key_tmp, "keep-alive",true) == 0) { + else if (copy_len == 10 && strcmp_case(key_tmp, "keep-alive", true) == 0){ C->keep_alive = string_from_literal_length((char*)(buf + val_start), val_len); } + else if (copy_len == 4 && strcmp_case(key_tmp, "host", true) == 0){ + C->host = string_from_literal_length((char*)(buf + val_start), val_len); + } else { string key = string_from_literal_length((char*)(buf + pos), key_len); string value = string_from_literal_length((char*)(buf + val_start), val_len); - extras[extra_i++] = (HTTPHeader){ key, value }; + + if (extras && extra_i < max_lines){ + extras[extra_i++] = (HTTPHeader){ key, value }; + } else { + if (key.mem_length) free_sized(key.data, key.mem_length); + if (value.mem_length) free_sized(value.data, value.mem_length); + } } pos = eol + 2; } - *out_extra = extras; - *out_extra_count = extra_i; + if (!extras || extra_i == 0){ + if (extras) free_sized(extras, sizeof(*extras) * max_lines); + *out_extra = NULL; + *out_extra_count = 0; + return; + } + + if (extra_i == max_lines){ + *out_extra = extras; + *out_extra_count = extra_i; + return; + } + + HTTPHeader *shr = (HTTPHeader*)(uintptr_t)malloc(sizeof(*shr) * extra_i); + if (shr){ + memcpy(shr, extras, sizeof(*shr) * extra_i); + free_sized(extras, sizeof(*extras) * max_lines); + *out_extra = shr; + *out_extra_count = extra_i; + return; + } + + for (uint32_t i = 0; i < extra_i; i++){ + if (extras[i].key.mem_length) free_sized(extras[i].key.data, extras[i].key.mem_length); + if (extras[i].value.mem_length) free_sized(extras[i].value.data, extras[i].value.mem_length); + } + free_sized(extras, sizeof(*extras) * max_lines); + *out_extra = NULL; + *out_extra_count = 0; } -string http_request_builder(const HTTPRequestMsg *R) -{ +string http_request_builder(const HTTPRequestMsg *R){ static const char *Mnames[] = { "GET", "POST", "PUT", "DELETE" }; string out = string_format("%s ", Mnames[R->method]); string_append_bytes(&out, R->path.data, R->path.length); - string_append_bytes(&out, " HTTP/1.1\r\n", 11); - string hdrs = http_header_builder( - &R->headers_common, - R->extra_headers, R->extra_header_count); + string hdrs = http_header_builder(&R->headers_common, R->extra_headers, R->extra_header_count); string_append_bytes(&out, hdrs.data, hdrs.length); free_sized(hdrs.data, hdrs.mem_length); - if (R->body.ptr && R->body.size) { + if (R->body.ptr && R->body.size){ string body = string_from_literal_length((char*)R->body.ptr, R->body.size); string_append_bytes(&out, body.data, body.length); free_sized(body.data, body.mem_length); @@ -191,63 +223,46 @@ string http_request_builder(const HTTPRequestMsg *R) return out; } -string http_response_builder(const HTTPResponseMsg *R) { +string http_response_builder(const HTTPResponseMsg *R){ string out = string_format("HTTP/1.1 %i ", (int)R->status_code); - string_append_bytes(&out, - R->reason.data, - R->reason.length); + string_append_bytes(&out, R->reason.data, R->reason.length); string_append_bytes(&out, "\r\n", 2); - string hdrs = http_header_builder( - &R->headers_common, - R->extra_headers, - R->extra_header_count - ); + string hdrs = http_header_builder(&R->headers_common, R->extra_headers, R->extra_header_count); string_append_bytes(&out, hdrs.data, hdrs.length); free_sized(hdrs.data, hdrs.mem_length); - if (R->body.ptr && R->body.size) { - string_append_bytes(&out, - (char*)R->body.ptr, - (uint32_t)R->body.size); + if (R->body.ptr && R->body.size){ + string_append_bytes(&out, (char*)R->body.ptr, (uint32_t)R->body.size); } + return out; } - -int find_crlfcrlf(const char *data, uint32_t len) { - for (uint32_t i = 0; i + 3 < len; i++) { - if (data[i] == '\r' && - data[i+1] == '\n' && - data[i+2] == '\r' && - data[i+3] == '\n') - { - return (int)i; - } +int find_crlfcrlf(const char *data, uint32_t len){ + for (uint32_t i = 0; i + 3 < len; i++){ + if (data[i]=='\r' && data[i+1]=='\n' && data[i+2]=='\r' && data[i+3]=='\n') return (int)i; } return -1; } -sizedptr http_get_payload(sizedptr header) { - if (!header.ptr || header.size < 4) { - return (sizedptr){0}; - } +sizedptr http_get_payload(sizedptr header){ + if (!header.ptr || header.size < 4) return (sizedptr){0}; + int start = find_crlfcrlf((char*)header.ptr, header.size); - if (start < 0) { - return (sizedptr){0}; - } + if (start < 0) return (sizedptr){0}; + return (sizedptr){ header.ptr + (uint32_t)(start + 4), header.size - (uint32_t)(start + 4) }; } -string http_get_chunked_payload(sizedptr chunk) { - if (chunk.ptr && chunk.size > 0) { +string http_get_chunked_payload(sizedptr chunk){ + if (chunk.ptr && chunk.size > 0){ int sizetrm = strindex((char*)chunk.ptr, "\r\n"); uint64_t chunk_size = parse_hex_u64((char*)chunk.ptr, sizetrm); - return string_from_literal_length((char*)(chunk.ptr + sizetrm + 2), - (uint32_t)chunk_size); + return string_from_literal_length((char*)(chunk.ptr + sizetrm + 2), (uint32_t)chunk_size); } return (string){0}; -} \ No newline at end of file +} diff --git a/kernel/networking/application_layer/http.h b/kernel/networking/application_layer/http.h index a73cba5e..06163a76 100644 --- a/kernel/networking/application_layer/http.h +++ b/kernel/networking/application_layer/http.h @@ -38,6 +38,7 @@ typedef struct { string connection; string keep_alive; string host; + string content_type; } HTTPHeadersCommon; typedef struct { @@ -67,6 +68,9 @@ void http_header_parser(const char *buf, uint32_t len, HTTPHeader **out_extra, uint32_t *out_extra_count); +void http_headers_common_free(HTTPHeadersCommon *common); +void http_headers_extra_free(HTTPHeader *extra, uint32_t extra_count); + string http_request_builder(const HTTPRequestMsg *req); string http_response_builder(const HTTPResponseMsg *res); diff --git a/kernel/networking/application_layer/ntp.c b/kernel/networking/application_layer/ntp.c new file mode 100644 index 00000000..60406243 --- /dev/null +++ b/kernel/networking/application_layer/ntp.c @@ -0,0 +1,422 @@ +#include "ntp.h" +#include "exceptions/timer.h" +#include "std/memory.h" +#include "networking/internet_layer/ipv4.h" +#include "process/scheduler.h" +#include "console/kio.h" +#include "math/math.h" +#include "networking/transport_layer/csocket_udp.h" +#include "networking/transport_layer/trans_utils.h" + +#include "syscalls/syscalls.h" + +#define NTP_PORT 123 +#define NTP_VN 4 +#define NTP_MODE_CLIENT 3 +#define NTP_MODE_SERVER 4 +#define NTP_UNIX_EPOCH_DELTA 2208988800UL + +#define NTP_STEP_US 128000LL +#define NTP_FREQ_MAX_PPM 500 +#define NTP_PHI_PPM 15 + +#define NTP_REFID_DENY 0x44454E59u //DENY +#define NTP_REFID_RSTR 0x52535452u //RSTR + +typedef struct __attribute__((packed)) { + uint8_t li_vn_mode; + uint8_t stratum; + uint8_t poll; + int8_t precision; + uint32_t rootDelay; + uint32_t rootDispersion; + uint32_t refId; + uint64_t refTs; + uint64_t origTs; + uint64_t recvTs; + uint64_t txTs; +} ntp_packet_t; + +typedef struct { + uint32_t ip_host; + uint64_t last_sample_mono_us; + int64_t last_filt_offset_us; + int32_t freq_ppm; + ntp_sample_t filt[NTP_FILTER_N]; + uint8_t count; + uint64_t best_delay_us; + uint64_t best_disp_us; + uint64_t jitter_us; + uint64_t root_distance_us; + int64_t best_offset_us; + uint64_t root_delay_us; + uint64_t root_disp_us; +} ntp_peer_t; + +static ntp_peer_t g_peers[2]; +static uint64_t g_last_ref_unix_us = 0; + +static uint32_t g_best_ip_host = 0; +static uint64_t g_best_delay_us = 0; +static uint64_t g_best_disp_us = 0; +static uint64_t g_best_jitter_us = 0; +static uint64_t g_best_root_distance_us = 0; +static int64_t g_best_offset_us = 0; +static uint8_t g_best_count = 0; +static uint8_t g_max_count = 0; + +static uint64_t unix_us_to_ntp64_be(uint64_t unix_us){ + uint64_t sec = unix_us / 1000000ULL; + uint64_t frac = ((unix_us % 1000000ULL) << 32) / 1000000ULL; + sec += NTP_UNIX_EPOCH_DELTA; + uint64_t ntp = (sec << 32) | (frac & 0xffffffffULL); + return be64(ntp); +} + +static uint64_t ntp64_be_to_unix_us(uint64_t ntp_be){ + uint64_t ntp = be64(ntp_be); + uint64_t sec = ntp >> 32; + uint64_t frac = ntp & 0xffffffffULL; + if (sec < NTP_UNIX_EPOCH_DELTA) return 0; + sec -= NTP_UNIX_EPOCH_DELTA; + return sec * 1000000ULL + ((frac * 1000000ULL) >> 32); +} + +static uint64_t ntp_short_be_to_us_signed(uint32_t v_be){ + int32_t v = be32(v_be); + if (v <= 0) return 0; + uint64_t us = ((uint64_t)v * 1000000ULL) / 65536ULL; + return us; +} + +static inline uint64_t ntp_local_time_us(void){ + uint64_t u = timer_unix_time_us(); + if (u) return u; + return timer_wall_time_us(); +} + +static ntp_result_t ntp_send_query(socket_handle_t sock, uint32_t server_ip_host, uint64_t* t1_us_out, uint64_t* tx_ntp64_be_out){ + ntp_packet_t p; + memset(&p, 0, sizeof(p)); + p.li_vn_mode = (0u << 6) | ((uint8_t)NTP_VN << 3)| (uint8_t)NTP_MODE_CLIENT; + p.poll = 6; + p.precision = -20; + + if (g_last_ref_unix_us) p.refTs = unix_us_to_ntp64_be(g_last_ref_unix_us); + + uint64_t t1_us = ntp_local_time_us(); + uint64_t tx_be = unix_us_to_ntp64_be(t1_us); + p.txTs = tx_be; + + net_l4_endpoint dst; + make_ep(server_ip_host, NTP_PORT, IP_VER4, &dst); + int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, &p, sizeof(p)); + if (sent < 0) return NTP_ERR_SEND; + *t1_us_out = t1_us; + *tx_ntp64_be_out = tx_be; + return NTP_OK; +} + +static int ntp_valid_server_response(const ntp_packet_t* r, uint64_t expected_orig_ntp64_be, ntp_result_t* kod_out){ + if (!r) return 0; + uint8_t li = (uint8_t)(r->li_vn_mode >> 6); + if (li == 3) return 0; + if ((r->li_vn_mode & 0x7) != NTP_MODE_SERVER) return 0; + uint8_t vn = (r->li_vn_mode >> 3) & 0x7; + if (vn < 3 || vn > 4) return 0; + if (r->stratum== 0) { + if (kod_out) { + uint32_t rid = be32(r->refId); + if (rid == NTP_REFID_DENY || rid == NTP_REFID_RSTR) *kod_out = NTP_ERR_KOD; + else *kod_out = NTP_ERR_FORMAT; + } + return 0; + } + + if (r->stratum >= 16) return 0; + if (r->origTs != expected_orig_ntp64_be) return 0; + if (r->recvTs == 0 || r->txTs == 0) return 0; + uint64_t rx = ntp64_be_to_unix_us(r->recvTs); + uint64_t tx = ntp64_be_to_unix_us(r->txTs); + if (tx < rx) return 0; + (void)kod_out; + return 1; +} + +static void discover_servers(uint32_t* s0, uint32_t* s1){ + *s0 = 0; + *s1 = 0; + + uint8_t l2n = l2_interface_count(); + for (uint8_t i = 0; i < l2n && (*s0 == 0 || *s1 == 0); i++) { + l2_interface_t* l2 = l2_interface_at(i); + if (!l2) continue; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE && (*s0 == 0 || *s1 == 0); s++) { + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4) continue; + if (v4->mode == IPV4_CFG_DISABLED) continue; + const net_runtime_opts_t* rt =&v4->runtime_opts_v4; + if (!rt) continue; + uint32_t c0 = rt->ntp[0]; + uint32_t c1 = rt->ntp[1]; + if (c0 && c0 != *s0 && c0 != *s1) { + if (!*s0) *s0 = c0; + else if (!*s1) *s1 = c0; + } + if (c1 && c1 != *s0 && c1 != *s1) { + if (!*s0) *s0 = c1; + else if (!*s1) *s1 = c1; + } + } + } +} + +static void discipline_apply(ntp_peer_t* best_peer, uint64_t server_unix_us_at_t4, uint64_t mono_now_us){ + if (!best_peer) return; + + int64_t off = best_peer->best_offset_us; + g_last_ref_unix_us = server_unix_us_at_t4; + + if (!timer_is_synchronised()) { + timer_sync_set_unix_us(server_unix_us_at_t4); + timer_sync_set_freq_ppm(0); + best_peer->last_sample_mono_us = mono_now_us; + best_peer->last_filt_offset_us = off; + best_peer->freq_ppm = 0; + return; + } + + if (abs_i64(off) > NTP_STEP_US) { + timer_sync_set_unix_us(server_unix_us_at_t4); + timer_sync_set_freq_ppm(0); + + best_peer->count = 0; + best_peer->best_delay_us = 0; + best_peer->best_disp_us = 0; + best_peer->jitter_us = 0; + best_peer->root_distance_us = 0; + best_peer->best_offset_us = 0; + best_peer->last_sample_mono_us = mono_now_us; + best_peer->last_filt_offset_us = 0; + best_peer->freq_ppm = 0; + + return; + } + + timer_sync_slew_us(off); + + if (best_peer->last_sample_mono_us != 0) { + uint64_t dt = mono_now_us - best_peer->last_sample_mono_us; + if (dt >= 1000000ULL) { + int64_t thr = (int64_t)best_peer->jitter_us * 4LL + 2000LL; + if (thr < 2000) thr = 2000; + + if (abs_i64(off) <= thr && abs_i64(best_peer->last_filt_offset_us) <= thr) { + int64_t d_off = off - best_peer->last_filt_offset_us; + int64_t ppm_est = (d_off * 1000000LL) / (int64_t)dt; + if (ppm_est > NTP_FREQ_MAX_PPM) ppm_est = NTP_FREQ_MAX_PPM; + if (ppm_est < -NTP_FREQ_MAX_PPM) ppm_est = -NTP_FREQ_MAX_PPM; + + int32_t est = (int32_t)ppm_est; + best_peer->freq_ppm = (int32_t)((best_peer->freq_ppm * 7 + est) / 8); + if (best_peer->freq_ppm > NTP_FREQ_MAX_PPM) best_peer->freq_ppm = NTP_FREQ_MAX_PPM; + if (best_peer->freq_ppm < -NTP_FREQ_MAX_PPM) best_peer->freq_ppm = -NTP_FREQ_MAX_PPM; + timer_sync_set_freq_ppm(best_peer->freq_ppm); + } + } + } + + best_peer->last_sample_mono_us = mono_now_us; + best_peer->last_filt_offset_us = off; +} + +ntp_result_t ntp_poll_once(uint32_t timeout_ms){ + uint32_t s0 = 0; + uint32_t s1 = 0; + + discover_servers(&s0, &s1); + if (s0 == 0 && s1 == 0) return NTP_ERR_NO_SERVER; + + socket_handle_t sock = udp_socket_create(0, (uint32_t)get_current_proc_pid(), NULL); + if (sock == 0) return NTP_ERR_SOCKET; + + uint64_t t1_0 = 0, t1_1 = 0; + uint64_t o0 = 0, o1 = 0; + + if (s0) { + if (ntp_send_query(sock, s0, &t1_0, &o0) != NTP_OK) t1_0 = 0; + } + if (s1 && s1 != s0) { + if (ntp_send_query(sock, s1, &t1_1, &o1) != NTP_OK) t1_1 = 0; + } + + uint32_t waited = 0; + ntp_result_t best_err = NTP_ERR_TIMEOUT; + + uint64_t mono_now_us = timer_now_usec(); + + while (waited < timeout_ms) { + uint8_t buf[96]; + net_l4_endpoint src; + int64_t n = socket_recvfrom_udp_ex(sock, buf, sizeof(buf), &src); + + if (n >= (int64_t)sizeof(ntp_packet_t) && src.ver == IP_VER4 && src.port == NTP_PORT) { + uint32_t rip = 0; + memcpy(&rip, src.ip, 4); + if (rip == s0 || rip == s1) { + uint64_t t4_us = ntp_local_time_us(); + uint64_t mono_sample_us = timer_now_usec(); + const ntp_packet_t* r = (const ntp_packet_t*)buf; + uint64_t t1_us = (rip == s0) ? t1_0 : t1_1; + uint64_t orig_be = (rip == s0) ? o0 : o1; + if (t1_us == 0) continue; + + ntp_result_t kod = NTP_OK; + if (!ntp_valid_server_response(r, orig_be, &kod)) { + if (kod == NTP_ERR_KOD) { + uint32_t refid = be32(r->refId); + if (refid == NTP_REFID_DENY || refid == NTP_REFID_RSTR) best_err = NTP_ERR_KOD; + } + continue; + } + + uint64_t T2 = ntp64_be_to_unix_us(r->recvTs); + uint64_t T3 = ntp64_be_to_unix_us(r->txTs); + if (T2 == 0 || T3 == 0) continue; + + int64_t rtt = (int64_t)(t4_us - t1_us) - (int64_t)(T3 - T2); + if (rtt < 0) rtt = 0; + int64_t off = ((int64_t)(T2 - t1_us) + (int64_t)(T3 - t4_us)) / 2; + uint64_t server_unix_us = (uint64_t)((int64_t)t4_us + off); + + const uint64_t year2000 = 946684800ULL*1000000ULL; + bool ok_range = false; + if (timer_is_synchronised()) { + uint64_t now_wall_us = timer_unix_time_us(); + uint64_t plus1d = 86400ULL * 1000000ULL; + ok_range = (server_unix_us >= year2000) && (now_wall_us == 0 || server_unix_us <= now_wall_us + plus1d); + } else { + ok_range = (server_unix_us >= year2000); + } + if (!ok_range) continue; + + ntp_peer_t* p = NULL; + for (uint32_t i = 0; i < 2; i++) { + if (g_peers[i].ip_host== rip) { + p = &g_peers[i]; + break; + } + } + if (!p) { + for (uint32_t i = 0; i < 2; i++) { + if (g_peers[i].ip_host == 0) { + memset(&g_peers[i],0, sizeof(g_peers[i])); + g_peers[i].ip_host = rip; + p = &g_peers[i]; + break; + } + } + } + if (!p) p = &g_peers[0]; + + p->root_delay_us = ntp_short_be_to_us_signed(r->rootDelay); + p->root_disp_us = ntp_short_be_to_us_signed(r->rootDispersion); + + ntp_sample_t s; + s.offset_us = off; + s.delay_us = (uint64_t)rtt; + s.dispersion_us = (uint64_t)rtt / 2ULL+1000; + s.mono_time_us = mono_sample_us; + + for (int i = (int)NTP_FILTER_N - 1; i > 0; i--) p->filt[i] = p->filt[i - 1]; + p->filt[0] = s; + if (p->count < NTP_FILTER_N) p->count++; + if (p->count > g_max_count) g_max_count = p->count; + + if (p->count) { + uint8_t best = 0; + uint64_t best_delay = (uint64_t)-1; + uint64_t best_disp = (uint64_t)-1; + + for (uint8_t i = 0; i < p->count; i++) { + uint64_t age_us = mono_sample_us -p->filt[i].mono_time_us; + uint64_t grow = (age_us * (uint64_t)NTP_PHI_PPM) / 1000000ULL; + uint64_t disp = p->filt[i].dispersion_us + grow; + uint64_t delay = p->filt[i].delay_us; + + if (delay < best_delay || (delay == best_delay && disp < best_disp)) { + best = i; + best_delay = delay; + best_disp = disp; + } + } + + int64_t best_off = p->filt[best].offset_us; + uint64_t sumsq = 0; + for (uint8_t i = 0; i < p->count; i++) { + int64_t d = p->filt[i].offset_us - best_off; + uint64_t a = (uint64_t)abs_i64(d); + sumsq += a * a; + } + + uint64_t jitter = 0; + jitter = sqrt_u64(sumsq / (uint64_t)p->count); + + p->best_offset_us = best_off; + p->best_delay_us = best_delay; + p->best_disp_us = best_disp; + p->jitter_us = jitter; + + uint64_t root_dist = 0; + root_dist += p->root_disp_us; + root_dist += p->root_delay_us / 2ULL; + root_dist += best_delay / 2ULL; + root_dist += best_disp; + root_dist += jitter; + p->root_distance_us = root_dist; + } + + best_err =NTP_OK; + } + } else { + msleep(50); + waited += 50; + } + + if (best_err == NTP_OK && waited >= (timeout_ms / 2)) break; + } + + socket_destroy_udp(sock); + + ntp_peer_t* best = NULL; + for (uint32_t i = 0; i < 2; i++) { + if (g_peers[i].ip_host == 0) continue; + if (g_peers[i].count == 0) continue; + if (!best) best = &g_peers[i]; + else { + uint64_t da = g_peers[i].root_distance_us; + uint64_t db = best->root_distance_us; + if (da < db || (da == db && g_peers[i].best_delay_us < best->best_delay_us)) best = &g_peers[i]; + } + } + + if (!best) return best_err; + + g_best_ip_host = best->ip_host; + g_best_offset_us = best->best_offset_us; + g_best_delay_us = best->best_delay_us; + g_best_disp_us = best->best_disp_us; + g_best_jitter_us = best->jitter_us; + g_best_root_distance_us = best->root_distance_us; + g_best_count = best->count; + + mono_now_us = timer_now_usec(); + uint64_t t4_us = ntp_local_time_us(); + uint64_t server_unix_us = (uint64_t)((int64_t)t4_us + best->best_offset_us); + + discipline_apply(best, server_unix_us, mono_now_us); + + return NTP_OK; +} + +uint8_t ntp_max_filter_count(void){ return g_max_count; } \ No newline at end of file diff --git a/kernel/networking/application_layer/ntp.h b/kernel/networking/application_layer/ntp.h new file mode 100644 index 00000000..3ef25f48 --- /dev/null +++ b/kernel/networking/application_layer/ntp.h @@ -0,0 +1,33 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + NTP_OK = 0, + NTP_ERR_NO_SERVER, + NTP_ERR_SOCKET, + NTP_ERR_SEND, + NTP_ERR_TIMEOUT, + NTP_ERR_FORMAT, + NTP_ERR_KOD +} ntp_result_t; + +typedef struct { + int64_t offset_us; + uint64_t delay_us; + uint64_t dispersion_us; + uint64_t mono_time_us; +} ntp_sample_t; + +#define NTP_FILTER_N 8 + +ntp_result_t ntp_poll_once(uint32_t timeout_ms); +uint8_t ntp_max_filter_count(void); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/application_layer/ntp_daemon.c b/kernel/networking/application_layer/ntp_daemon.c new file mode 100644 index 00000000..827bf1e1 --- /dev/null +++ b/kernel/networking/application_layer/ntp_daemon.c @@ -0,0 +1,74 @@ +#include "ntp_daemon.h" +#include "ntp.h" +#include "exceptions/timer.h" +#include "process/scheduler.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/interface_manager.h" +#include "syscalls/syscalls.h" + +static uint16_t g_pid_ntp = 0xFFFF; +static socket_handle_t g_sock = 0; + +uint16_t ntp_get_pid(void){ return g_pid_ntp; } +bool ntp_is_running(void){ return g_pid_ntp != 0xFFFF; } +void ntp_set_pid(uint16_t p){ g_pid_ntp = p; } +socket_handle_t ntp_socket_handle(void){ return g_sock; } + +#define NTP_POLL_INTERVAL_MS 60000u +#define NTP_QUERY_TIMEOUT_MS 1200u +#define NTP_BOOTSTRAP_MAX_RETRY 8u +#define NTP_WARMUP_INTERVAL_MS 400u + +static bool any_ipv4_configured_nonlocal(void){ + uint8_t n = l2_interface_count(); + for (uint8_t i = 0; i < n; i++) { + l2_interface_t* l2 = l2_interface_at(i); + if (!l2 || !l2->is_up) continue; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; s++) { + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4) continue; + if (v4->mode == IPV4_CFG_DISABLED) continue; + if (!v4->ip) continue; + if (v4->is_localhost) continue; + if (ipv4_is_loopback(v4->ip)) continue; + return true; + } + } + return false; +} + +int ntp_daemon_entry(int argc, char* argv[]){ + (void)argc; + (void)argv; + + g_pid_ntp = get_current_proc_pid(); + g_sock = udp_socket_create(0, g_pid_ntp, NULL); + ntp_set_pid(get_current_proc_pid()); + + uint32_t attempts = 0; + while (attempts < NTP_BOOTSTRAP_MAX_RETRY) { + if (!any_ipv4_configured_nonlocal()) { + msleep(500); + continue; + } + ntp_result_t r = ntp_poll_once(NTP_QUERY_TIMEOUT_MS); + if (r == NTP_OK) break; + attempts++; + uint32_t backoff_ms = (1<<(attempts <= 4 ? attempts : 4)) * 250u; + uint32_t jitter = backoff_ms / 10u; + msleep(backoff_ms + (jitter / 2u)); + } + + while (any_ipv4_configured_nonlocal() && ntp_max_filter_count() < NTP_FILTER_N) { + ntp_poll_once(NTP_QUERY_TIMEOUT_MS); + msleep(NTP_WARMUP_INTERVAL_MS); + } + + for (;;) { + ntp_poll_once(NTP_QUERY_TIMEOUT_MS); + msleep(NTP_POLL_INTERVAL_MS); + } + + return 1; +} diff --git a/kernel/networking/application_layer/ntp_daemon.h b/kernel/networking/application_layer/ntp_daemon.h new file mode 100644 index 00000000..ffec242e --- /dev/null +++ b/kernel/networking/application_layer/ntp_daemon.h @@ -0,0 +1,19 @@ +#pragma once + +#include "types.h" +#include "networking/transport_layer/csocket_udp.h" + +#ifdef __cplusplus +extern "C" { +#endif + +uint16_t ntp_get_pid(void); +bool ntp_is_running(void); +void ntp_set_pid(uint16_t p); +socket_handle_t ntp_socket_handle(void); + +int ntp_daemon_entry(int argc, char* argv[]); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/application_layer/sntp.c b/kernel/networking/application_layer/sntp.c index 9be74867..9f17b3e9 100644 --- a/kernel/networking/application_layer/sntp.c +++ b/kernel/networking/application_layer/sntp.c @@ -1,4 +1,4 @@ -#include "sntp.h" +#include "sntp.h" //deprecated, use ntp #include "exceptions/timer.h" #include "std/memory.h" #include "networking/internet_layer/ipv4.h" @@ -7,6 +7,7 @@ #include "types.h" #include "networking/transport_layer/csocket_udp.h" #include "syscalls/syscalls.h" +#include "networking/transport_layer/trans_utils.h" #define NTP_PORT 123 #define NTP_UNIX_EPOCH_DELTA 2208988800UL @@ -42,22 +43,15 @@ static uint64_t ntp64_be_to_unix_us(uint64_t ntp_be){ return sec * 1000000ULL + ((frac * 1000000ULL) >> 32); } -static void make_v4_ep(uint32_t ip_host, uint16_t port, net_l4_endpoint* ep){ - memset(ep, 0, sizeof(*ep)); - ep->ver = IP_VER4; - memcpy(ep->ip, &ip_host, 4); - ep->port = port; -} - static sntp_result_t sntp_send_query(socket_handle_t sock, uint32_t server_ip_host, uint64_t* t1_us_out) { ntp_packet_t p; memset(&p, 0, sizeof(p)); p.li_vn_mode = (0u<<6) | (4u<<3) | 3u; - uint64_t t1_us = timer_now_usec(); + uint64_t t1_us = timer_wall_time_us(); p.txTs = unix_us_to_ntp64_be(t1_us); net_l4_endpoint dst; - make_v4_ep(server_ip_host, 0, &dst); - int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, NTP_PORT, &p, sizeof(p)); + make_ep(server_ip_host, NTP_PORT, IP_VER4, &dst); + int64_t sent = socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, &p, sizeof(p)); if (sent < 0) return SNTP_ERR_SEND; *t1_us_out = t1_us; return SNTP_OK; @@ -92,7 +86,7 @@ sntp_result_t sntp_poll_once(uint32_t timeout_ms){ if (s0 == 0 && s1 == 0) return SNTP_ERR_NO_SERVER; - socket_handle_t sock = udp_socket_create(0, (uint32_t)get_current_proc_pid()); + socket_handle_t sock = udp_socket_create(0, (uint32_t)get_current_proc_pid(), NULL); if (sock == 0) return SNTP_ERR_SOCKET; uint64_t t1_0 = 0, t1_1 = 0; @@ -118,7 +112,7 @@ sntp_result_t sntp_poll_once(uint32_t timeout_ms){ memcpy(&rip, src.ip, 4); if (rip == s0 || rip == s1){ ntp_packet_t* r = (ntp_packet_t*)buf; - uint64_t t4_us = timer_now_usec(); + uint64_t t4_us = timer_wall_time_us(); uint64_t T1 = ntp64_be_to_unix_us(r->origTs); uint64_t T2 = ntp64_be_to_unix_us(r->recvTs); diff --git a/kernel/networking/application_layer/sntp.h b/kernel/networking/application_layer/sntp.h index 16701a38..1816b682 100644 --- a/kernel/networking/application_layer/sntp.h +++ b/kernel/networking/application_layer/sntp.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once//deprecated, use ntp #include "types.h" #ifdef __cplusplus diff --git a/kernel/networking/application_layer/sntp_daemon.c b/kernel/networking/application_layer/sntp_daemon.c index 1c70da30..21a7adca 100644 --- a/kernel/networking/application_layer/sntp_daemon.c +++ b/kernel/networking/application_layer/sntp_daemon.c @@ -1,8 +1,10 @@ -#include "sntp_daemon.h" +#include "sntp_daemon.h"//deprecated, use ntp #include "sntp.h" #include "exceptions/timer.h" #include "process/scheduler.h" #include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv4_utils.h" + #include "networking/interface_manager.h" #include "syscalls/syscalls.h" @@ -19,8 +21,6 @@ socket_handle_t sntp_socket_handle(void){ return g_sock; } #define SNTP_QUERY_TIMEOUT_MS 1200u #define SNTP_BOOTSTRAP_MAX_RETRY 5u -static inline int ipv4_is_loopback_u32(uint32_t ip){ return ((ip & 0xFF000000u) == 0x7F000000u); } - static bool any_ipv4_configured_nonlocal(void){ uint8_t n = l2_interface_count(); for (uint8_t i = 0; i < n; i++) { @@ -32,7 +32,7 @@ static bool any_ipv4_configured_nonlocal(void){ if (v4->mode == IPV4_CFG_DISABLED) continue; if (!v4->ip) continue; if (v4->is_localhost) continue; - if (ipv4_is_loopback_u32(v4->ip)) continue; + if (ipv4_is_loopback(v4->ip)) continue; return true; } } @@ -42,7 +42,7 @@ static bool any_ipv4_configured_nonlocal(void){ int sntp_daemon_entry(int argc, char* argv[]){ (void)argc; (void)argv; g_pid_sntp = (uint16_t)get_current_proc_pid(); - g_sock = udp_socket_create(0, g_pid_sntp); + g_sock = udp_socket_create(0, g_pid_sntp, NULL); sntp_set_pid(get_current_proc_pid()); uint32_t attempts = 0; while (attempts < SNTP_BOOTSTRAP_MAX_RETRY){ diff --git a/kernel/networking/application_layer/sntp_daemon.h b/kernel/networking/application_layer/sntp_daemon.h index d73b582d..8ea56ac1 100644 --- a/kernel/networking/application_layer/sntp_daemon.h +++ b/kernel/networking/application_layer/sntp_daemon.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once//deprecated, use ntp #include "types.h" #include "networking/transport_layer/csocket_udp.h" diff --git a/kernel/networking/application_layer/socket_http_client.hpp b/kernel/networking/application_layer/socket_http_client.hpp index 9f6ae309..d04b9610 100644 --- a/kernel/networking/application_layer/socket_http_client.hpp +++ b/kernel/networking/application_layer/socket_http_client.hpp @@ -2,26 +2,107 @@ #include "console/kio.h" #include "networking/transport_layer/socket_tcp.hpp" #include "http.h" -#include "std/string.h" -#include "std/memory.h" +#include "std/std.h" #include "net/socket_types.h" class HTTPClient { private: - TCPSocket sock; + uint16_t pid; + TCPSocket* sock; + SocketExtraOptions log_opts; + SocketExtraOptions* tcp_extra; public: - explicit HTTPClient(uint16_t pid) : sock(SOCK_ROLE_CLIENT, pid){} - ~HTTPClient() {sock.close();} + explicit HTTPClient(uint16_t pid_, const SocketExtraOptions* extra) : pid(pid_), sock(nullptr), log_opts{}, tcp_extra(nullptr) { + if (extra) log_opts = *extra; - int32_t connect(SockDstKind kind, const void* dst, uint16_t port) { return sock.connect(kind, dst, port); } + const SocketExtraOptions* tcp_ptr = extra; + if (extra && (log_opts.flags & SOCK_OPT_DEBUG)) { + tcp_extra = (SocketExtraOptions*)malloc(sizeof(SocketExtraOptions)); + if (tcp_extra) { + *tcp_extra = *extra; + tcp_extra->flags &= ~SOCK_OPT_DEBUG; + tcp_ptr = tcp_extra; + } + } + + sock = (TCPSocket*)malloc(sizeof(TCPSocket)); + if (sock) new (sock) TCPSocket(SOCK_ROLE_CLIENT, pid, tcp_ptr); + } + + ~HTTPClient() {close();} + + int32_t connect(SockDstKind kind, const void* dst, uint16_t port) { + uint16_t p = port; + int32_t r = sock ? sock->connect(kind, dst, p) : SOCK_ERR_STATE; + + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_CLIENT; + ev.action = NETLOG_ACT_CONNECT; + ev.pid = pid; + ev.dst_kind = kind; + ev.u0 = p; + if (kind == DST_DOMAIN) ev.s0 = (const char*)dst; + if (kind == DST_ENDPOINT && dst) ev.dst_ep = *(const net_l4_endpoint*)dst; + ev.i0 = r; + + if (sock) { + ev.local_port = sock->get_local_port(); + ev.remote_ep = sock->get_remote_ep(); + if (ev.remote_ep.ver) ev.dst_ep = ev.remote_ep; + } + + netlog_socket_event(&log_opts, &ev); + return r; + } - HTTPResponseMsg send_request(const HTTPRequestMsg &req) { + HTTPResponseMsg send_request(const HTTPRequestMsg& req) { + HTTPResponseMsg resp{}; + if (!sock) { + resp.status_code = (HttpError)SOCK_ERR_STATE; + return resp; + } + string out = http_request_builder(&req); - int64_t sent = sock.send(out.data, out.length); + uint32_t out_len = out.length; + + uint32_t off = 0; + int64_t sent = 0; + while (off < out_len) { + int64_t r = sock->send(out.data + off, out_len - off); + if (r == TCP_WOULDBLOCK) { + msleep(5); + continue; + } + if (r < 0) { + sent = r; + break; + } + off += (uint32_t)r; + } + if (sent >= 0) sent = (int64_t)off; + + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_CLIENT; + ev.action = NETLOG_ACT_HTTP_SEND_REQUEST; + ev.pid = pid; + ev.u0 = out_len; + ev.i0 = sent; + ev.local_port = sock->get_local_port(); + ev.remote_ep = sock->get_remote_ep(); + + char pathbuf[128]; + if (req.path.length && req.path.data) { + uint32_t n = req.path.length; + if (n > sizeof(pathbuf) - 1) n = sizeof(pathbuf) - 1; + memcpy(pathbuf, req.path.data, n); + pathbuf[n] = 0; + ev.s0 = pathbuf; + } + + netlog_socket_event(&log_opts, &ev); free_sized(out.data, out.mem_length); - HTTPResponseMsg resp{}; if (sent < 0) { resp.status_code = (HttpError)sent; return resp; @@ -29,42 +110,41 @@ class HTTPClient { string buf = string_repeat('\0', 0); char tmp[512]; - int attempts = 0; int hdr_end = -1; - while (true) { - int64_t r = sock.recv(tmp, sizeof(tmp)); + + while (hdr_end < 0) { + int64_t r = sock->recv(tmp, sizeof(tmp)); + if (r == TCP_WOULDBLOCK) { + msleep(10); + continue; + } if (r < 0) { free_sized(buf.data, buf.mem_length); - resp.status_code = (HttpError)SOCK_ERR_SYS; + resp.status_code = (HttpError)r; return resp; } - if (r > 0) { - string_append_bytes(&buf, tmp, (uint32_t)r); - } - hdr_end = find_crlfcrlf(buf.data, buf.length); - if (hdr_end >= 0) break; - if (++attempts > 50) { + if (r == 0) { free_sized(buf.data, buf.mem_length); resp.status_code = (HttpError)SOCK_ERR_PROTO; return resp; } - msleep(10); + string_append_bytes(&buf, tmp, (uint32_t)r); + hdr_end = find_crlfcrlf(buf.data, buf.length); } - { - uint32_t i = 0; - while (i < (uint32_t)hdr_end && buf.data[i] != ' ') i++; - uint32_t code = 0, j = i+1; - while (j < (uint32_t)hdr_end && buf.data[j] >= '0' && buf.data[j] <= '9') { - code = code*10 + (buf.data[j]-'0'); ++j; - } - resp.status_code = (HttpError)code; - while (j < (uint32_t)hdr_end && buf.data[j]==' ') ++j; - if (j < (uint32_t)hdr_end) { - uint32_t rlen = hdr_end - j; - resp.reason = string_repeat('\0', 0); - string_append_bytes(&resp.reason, buf.data+j, rlen); - } + uint32_t i = 0; + while (i < (uint32_t)hdr_end && buf.data[i] != ' ') i++; + uint32_t code = 0, j = i+1; + while (j < (uint32_t)hdr_end && buf.data[j] >= '0' && buf.data[j] <= '9') { + code = code*10 + (buf.data[j]-'0'); + ++j; + } + resp.status_code = (HttpError)code; + while (j < (uint32_t)hdr_end && buf.data[j]==' ') ++j; + if (j < (uint32_t)hdr_end) { + uint32_t rlen = hdr_end - j; + resp.reason = string_repeat('\0', 0); + string_append_bytes(&resp.reason, buf.data+j, rlen); } HTTPHeader *extras = nullptr; @@ -85,39 +165,62 @@ class HTTPClient { uint32_t need = resp.headers_common.length; if (need > 0) { while (have < need) { - int64_t r = sock.recv(tmp, sizeof(tmp)); - if (r <= 0) break; + int64_t r = sock->recv(tmp, sizeof(tmp)); + if (r == TCP_WOULDBLOCK) { msleep(10); continue; } + if (r < 0) break; + if (r == 0) break; string_append_bytes(&buf, tmp, (uint32_t)r); have += (uint32_t)r; } - } else { - int idle = 0; - while (idle < 5) { - int64_t r = sock.recv(tmp, sizeof(tmp)); - if (r > 0) { - string_append_bytes(&buf, tmp, (uint32_t)r); - have += (uint32_t)r; - idle = 0; - } else { - ++idle; - msleep(20); - } - } } if (have > 0) { - char *body_copy = (char*)malloc(have + 1); + char *body_copy = (char*)malloc(have); if (body_copy) { memcpy(body_copy, buf.data + body_start, have); - body_copy[have] = '\0'; - resp.body.ptr = (uintptr_t)body_copy; + resp.body.ptr = (uintptr_t)body_copy; resp.body.size = have; } } + + netlog_socket_event_t ev1{}; + ev1.comp = NETLOG_COMP_HTTP_CLIENT; + ev1.action = NETLOG_ACT_HTTP_RECV_RESPONSE; + ev1.pid = pid; + ev1.u0 = (uint32_t)resp.status_code; + ev1.u1 = (uint32_t)resp.body.size; + ev1.local_port = sock->get_local_port(); + ev1.remote_ep = sock->get_remote_ep(); + netlog_socket_event(&log_opts, &ev1); + free_sized(buf.data, buf.mem_length); return resp; } int32_t close() { - return sock.close(); -} + int32_t r = SOCK_ERR_STATE; + if (sock) r = sock->close(); + + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_CLIENT; + ev.action = NETLOG_ACT_CLOSE; + ev.pid = pid; + ev.i0 = r; + + if (sock) { + ev.local_port = sock->get_local_port(); + ev.remote_ep = sock->get_remote_ep(); + } + + netlog_socket_event(&log_opts, &ev); + + if (sock) sock->~TCPSocket(); + if (sock) free_sized(sock, sizeof(TCPSocket)); + sock = nullptr; + + if (tcp_extra) free_sized(tcp_extra, sizeof(SocketExtraOptions)); + tcp_extra = nullptr; + + log_opts.flags &= ~SOCK_OPT_DEBUG; + return r; + } }; diff --git a/kernel/networking/application_layer/socket_http_server.hpp b/kernel/networking/application_layer/socket_http_server.hpp index 4d50e04e..fb8ed836 100644 --- a/kernel/networking/application_layer/socket_http_server.hpp +++ b/kernel/networking/application_layer/socket_http_server.hpp @@ -1,22 +1,80 @@ +#pragma once #include "console/kio.h" #include "networking/transport_layer/socket_tcp.hpp" #include "http.h" -#include "std/string.h" -#include "std/memory.h" +#include "std/std.h" #include "net/socket_types.h" + class HTTPServer { private: - TCPSocket sock; + uint16_t pid; + TCPSocket* sock; + SocketExtraOptions log_opts; + SocketExtraOptions* tcp_extra; public: - explicit HTTPServer(uint16_t pid) : sock(SOCK_ROLE_SERVER, pid) {} + explicit HTTPServer(uint16_t pid_, const SocketExtraOptions* extra) : pid(pid_), sock(nullptr), log_opts{}, tcp_extra(nullptr) { + if (extra) log_opts = *extra; + + const SocketExtraOptions* tcp_ptr = extra; + if (extra && (log_opts.flags & SOCK_OPT_DEBUG)) { + tcp_extra = (SocketExtraOptions*)malloc(sizeof(SocketExtraOptions)); + if (tcp_extra) { + *tcp_extra = *extra; + tcp_extra->flags &= ~SOCK_OPT_DEBUG; + tcp_ptr = tcp_extra; + } + } + + sock = (TCPSocket*)malloc(sizeof(TCPSocket)); + if (sock) new (sock) TCPSocket(SOCK_ROLE_SERVER, pid, tcp_ptr); + } ~HTTPServer() { close(); } - int32_t bind(const SockBindSpec& spec, uint16_t port) { return sock.bind(spec, port); } - int32_t listen(int backlog = 4) { return sock.listen(backlog); } - TCPSocket* accept() { return sock.accept(); } + int32_t bind(const SockBindSpec& spec, uint16_t port) { + uint16_t p = port; + int32_t r = sock ? sock->bind(spec, p) : SOCK_ERR_STATE; + + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_SERVER; + ev.action = NETLOG_ACT_BIND; + ev.pid = pid; + ev.u0 = p; + ev.i0 = r; + netlog_socket_event(&log_opts, &ev); + return r; + } + + int32_t listen(int backlog = 4) { + int b = backlog; + int32_t r = sock ? sock->listen(b) : SOCK_ERR_STATE; + + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_SERVER; + ev.action = NETLOG_ACT_LISTEN; + ev.pid = pid; + ev.u0 = (uint32_t)b; + ev.i0 = r; + netlog_socket_event(&log_opts, &ev); + return r; + } + + TCPSocket* accept() { + TCPSocket* c = sock ? sock->accept() : nullptr; + if (c) { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_SERVER; + ev.action = NETLOG_ACT_ACCEPT; + ev.pid = pid; + ev.i0 = (int64_t)(uintptr_t)c; + ev.local_port = c->get_local_port(); + ev.remote_ep = c->get_remote_ep(); + netlog_socket_event(&log_opts, &ev); + } + return c; + } HTTPRequestMsg recv_request(TCPSocket* client) { HTTPRequestMsg req{}; @@ -24,45 +82,75 @@ class HTTPServer { string buf = string_repeat('\0', 0); char tmp[512]; - int attempts = 0; - int hdr_end = -1; + int hdr_end = -1; - while (true) { + while (hdr_end < 0) { int64_t r = client->recv(tmp, sizeof(tmp)); - if (r < 0) return req; - if (r > 0) string_append_bytes(&buf, tmp, (uint32_t)r); + if (r == TCP_WOULDBLOCK) { + msleep(10); + continue; + } + if (r <= 0) { + free_sized(buf.data, buf.mem_length); + return req; + } + string_append_bytes(&buf, tmp, (uint32_t)r); hdr_end = find_crlfcrlf(buf.data, buf.length); - if (hdr_end >= 0) break; - if (++attempts > 100) return req; - msleep(10); } - uint32_t i = 0; - while (i < (uint32_t)hdr_end && buf.data[i] != ' ') ++i; - string method_tok = string_repeat('\0', 0); - string_append_bytes(&method_tok, buf.data, i); - - if (method_tok.length == 3 && memcmp(method_tok.data, "GET", 3) == 0) - req.method = HTTP_METHOD_GET; - else if (method_tok.length == 4 && memcmp(method_tok.data, "POST", 4) == 0) - req.method = HTTP_METHOD_POST; - else if (method_tok.length == 3 && memcmp(method_tok.data, "PUT", 3) == 0) - req.method = HTTP_METHOD_PUT; - else if (method_tok.length == 6 && memcmp(method_tok.data, "DELETE", 6) == 0) - req.method = HTTP_METHOD_DELETE; - else - req.method = HTTP_METHOD_GET; - - uint32_t j = i + 1; + uint32_t line_end = 0; + while (line_end + 1u < (uint32_t)hdr_end) { + if (buf.data[line_end] == '\r' && buf.data[line_end + 1u] == '\n') + break; + ++line_end; + } + + uint32_t p = 0; + while (p + 1u < line_end && buf.data[p] == '\r' && buf.data[p + 1u] == '\n') + p += 2; + + uint32_t i = p; + while (i < line_end && buf.data[i] != ' ') ++i; + + const char* method_tok = buf.data + p; + uint32_t mlen = i > p ? (i - p) : 0; + + if (mlen == 3 && memcmp(method_tok, "GET", 3) == 0) req.method = HTTP_METHOD_GET; + else if (mlen == 4 && memcmp(method_tok, "POST", 4) == 0) req.method = HTTP_METHOD_POST; + else if (mlen == 3 && memcmp(method_tok, "PUT", 3) == 0) req.method = HTTP_METHOD_PUT; + else if (mlen == 6 && memcmp(method_tok, "DELETE", 6) == 0) req.method = HTTP_METHOD_DELETE; + else req.method = HTTP_METHOD_GET; + + uint32_t j = (i < line_end) ? (i + 1u) : line_end; uint32_t path_start = j; - while (j < (uint32_t)hdr_end && buf.data[j] != ' ') ++j; + while (j < line_end && buf.data[j] != ' ') ++j; req.path = string_repeat('\0', 0); string_append_bytes(&req.path, buf.data + path_start, j - path_start); - int status_line_end = strindex((char*)buf.data, "\r\n"); + if (req.path.length >= 7 && memcmp(req.path.data, "http://", 7) == 0) { + uint32_t k = 7; + while (k < req.path.length && req.path.data[k] != '/') ++k; + if (k < req.path.length) { + string newp = string_repeat('\0', 0); + string_append_bytes(&newp, req.path.data + k, req.path.length - k); + free_sized(req.path.data, req.path.mem_length); + req.path = newp; + } + } else if (req.path.length >= 8 && memcmp(req.path.data, "https://", 8) == 0) { + uint32_t k = 8; + while (k < req.path.length && req.path.data[k] != '/') ++k; + if (k < req.path.length) { + string newp = string_repeat('\0', 0); + string_append_bytes(&newp, req.path.data + k, req.path.length - k); + free_sized(req.path.data, req.path.mem_length); + req.path = newp; + } + } + + int status_line_end = (int)line_end; http_header_parser( (char*)buf.data + status_line_end + 2, - buf.length - (uint32_t)(status_line_end + 2), + (uint32_t)hdr_end - (uint32_t)(status_line_end + 2), &req.headers_common, &req.extra_headers, &req.extra_header_count @@ -75,47 +163,108 @@ class HTTPServer { if (need > 0) { while (have < need) { int64_t r = client->recv(tmp, sizeof(tmp)); - if (r <= 0) break; + if (r == TCP_WOULDBLOCK) { + msleep(10); + continue; + } + if (r < 0) break; + if (r == 0) break; string_append_bytes(&buf, tmp, (uint32_t)r); have += (uint32_t)r; } - } else { - int idle = 0; - while (idle < 5) { - int64_t r = client->recv(tmp, sizeof(tmp)); - if (r > 0) { - string_append_bytes(&buf, tmp, (uint32_t)r); - have += (uint32_t)r; - idle = 0; - } else { - ++idle; - msleep(20); - } - } } if (have > 0) { - char* body_copy = (char*)malloc(have + 1); + char* body_copy = (char*)malloc(have); if (body_copy) { memcpy(body_copy, buf.data + body_start, have); - body_copy[have] = '\0'; - req.body.ptr = (uintptr_t)body_copy; - req.body.size = have; + req.body.ptr = (uintptr_t)body_copy; + req.body.size = have; } } + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_SERVER; + ev.action = NETLOG_ACT_HTTP_RECV_REQUEST; + ev.pid = pid; + ev.u0 = (uint32_t)req.method; + ev.u1 = (uint32_t)req.path.length; + ev.i0 = (int64_t)req.body.size; + ev.local_port = client->get_local_port(); + ev.remote_ep = client->get_remote_ep(); + + char pathbuf[128]; + if (req.path.length && req.path.data) { + uint32_t n = req.path.length; + if (n > sizeof(pathbuf) - 1) n = sizeof(pathbuf) - 1; + memcpy(pathbuf, req.path.data, n); + pathbuf[n] = 0; + ev.s0 = pathbuf; + } + + netlog_socket_event(&log_opts, &ev); + free_sized(buf.data, buf.mem_length); return req; } - int32_t send_response(TCPSocket* client, const HTTPResponseMsg& res) { if (!client) return SOCK_ERR_STATE; + uint32_t code = (uint32_t)res.status_code; string out = http_response_builder(&res); - int64_t sent = client->send(out.data, out.length); + uint32_t out_len = out.length; + uint32_t off = 0; + int64_t sent = 0; + while (off < out_len) { + int64_t r = client->send(out.data + off, out_len - off); + if (r == TCP_WOULDBLOCK) { + msleep(5); + continue; + } + if (r < 0) { + sent = r; + break; + } + off += (uint32_t)r; + } + if (sent >= 0) sent = (int64_t)off; + + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_SERVER; + ev.action = NETLOG_ACT_HTTP_SEND_RESPONSE; + ev.pid = pid; + ev.u0 = code; + ev.u1 = out_len; + ev.i0 = sent; + ev.local_port = client->get_local_port(); + ev.remote_ep = client->get_remote_ep(); + netlog_socket_event(&log_opts, &ev); + free_sized(out.data, out.mem_length); return sent < 0 ? (int32_t)sent : SOCK_OK; } - int32_t close() { return sock.close(); } -}; + int32_t close() { + int32_t r = sock ? SOCK_OK : SOCK_ERR_STATE; + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_HTTP_SERVER; + ev.action = NETLOG_ACT_CLOSE; + ev.pid = pid; + ev.i0 = r; + if (sock) { + ev.local_port = sock->get_local_port(); + ev.remote_ep = sock->get_remote_ep(); + } + netlog_socket_event(&log_opts, &ev); + + if (sock) sock->~TCPSocket(); + if (sock) free_sized(sock, sizeof(TCPSocket)); + sock = nullptr; + + if (tcp_extra) free_sized(tcp_extra, sizeof(SocketExtraOptions)); + tcp_extra = nullptr; + + log_opts.flags &= ~SOCK_OPT_DEBUG; + return r; + } +}; \ No newline at end of file diff --git a/kernel/networking/application_layer/ssdp.c b/kernel/networking/application_layer/ssdp.c new file mode 100644 index 00000000..99e2468e --- /dev/null +++ b/kernel/networking/application_layer/ssdp.c @@ -0,0 +1,69 @@ +#include "ssdp.h" +#include "syscalls/syscalls.h" +#include "networking/application_layer/http.h" +#include "std/std.h" + +uint32_t ssdp_parse_mx_ms(const char* buf, int len){ + const char* p = memmem(buf, len, "\r\nmx:", 5); + if (!p) p = memmem(buf, len, "\nmx:", 4); + if (!p) return 1000; + + int i = (int)(p - buf); + while (i < len && buf[i] != ':') ++i; + if (i >= len) return 1000; + ++i; + + while (i < len && (buf[i] == ' ' || buf[i] == '\t')) ++i; + + uint32_t v = 0; + parse_uint32_dec(buf, &v); + + if (v == 0) v = 1; + if (v > 5) v = 5; + return v * 1000; +} + +bool ssdp_is_msearch(const char* buf, int len) { + if (!buf || len < 8) return false; + if (memcmp(buf, "M-SEARCH", 8) != 0) return false; + if (!memmem(buf, len, "ssdp:discover", 13)) return false; + return true; +} + +string ssdp_build_search_response(void) { + HTTPHeader extra[5]; + extra[0] = (HTTPHeader){ string_from_literal("CACHE-CONTROL"), string_from_literal("max-age=60")}; + extra[1] = (HTTPHeader){ string_from_literal("EXT"), string_from_literal("")}; + extra[2] = (HTTPHeader){ string_from_literal("SERVER"), string_from_literal("RedactedOS/1.0 UPnP/1.1")}; + extra[3] = (HTTPHeader){ string_from_literal("ST"), string_from_literal("ssdp:all")}; + extra[4] = (HTTPHeader){ string_from_literal("USN"), string_from_literal("uuid:redacted-os::upnp:rootdevice")}; + + HTTPResponseMsg R = (HTTPResponseMsg){0}; + R.status_code = HTTP_OK; + R.reason = string_from_literal("OK"); + R.extra_headers = extra; + R.extra_header_count = 5; + return http_response_builder(&R); +} + +string ssdp_build_notify(bool alive, bool v6) { + const char* host = v6 ? "[ff02::c]:1900" : "239.255.255.250:1900"; + const char* nts = alive ? "ssdp:alive" : "ssdp:byebye"; + + string out = string_from_literal("NOTIFY * HTTP/1.1\r\n"); + + HTTPHeader extra[6]; + extra[0] = (HTTPHeader){ string_from_literal("HOST"), string_from_literal(host)}; + extra[1] = (HTTPHeader){ string_from_literal("NT"), string_from_literal("upnp:rootdevice")}; + extra[2] = (HTTPHeader){ string_from_literal("NTS"), string_from_literal(nts)}; + extra[3] = (HTTPHeader){ string_from_literal("USN"), string_from_literal("uuid:redacted-os::upnp:rootdevice")}; + extra[4] = (HTTPHeader){ string_from_literal("CACHE-CONTROL"), string_from_literal("max-age=1800")}; + extra[5] = (HTTPHeader){ string_from_literal("SERVER"), string_from_literal("RedactedOS/1.0 UPnP/1.1")}; + + HTTPHeadersCommon c = (HTTPHeadersCommon){0}; + string hdrs = http_header_builder(&c, extra, 6); + string_append_bytes(&out, hdrs.data, hdrs.length); + free_sized(hdrs.data, hdrs.mem_length); + string_append_bytes(&out, "\r\n", 2); + return out; +} diff --git a/kernel/networking/application_layer/ssdp.h b/kernel/networking/application_layer/ssdp.h new file mode 100644 index 00000000..6e609ec1 --- /dev/null +++ b/kernel/networking/application_layer/ssdp.h @@ -0,0 +1,20 @@ +#pragma once + +#include "types.h" +#include "std/string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define IPV4_MCAST_SSDP 0xEFFFFFFAu + +bool ssdp_is_msearch(const char* buf, int len); +uint32_t ssdp_parse_mx_ms(const char* buf, int len); + +string ssdp_build_search_response(void); +string ssdp_build_notify(bool alive, bool v6); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/application_layer/ssdp_daemon.c b/kernel/networking/application_layer/ssdp_daemon.c new file mode 100644 index 00000000..eb3c0812 --- /dev/null +++ b/kernel/networking/application_layer/ssdp_daemon.c @@ -0,0 +1,169 @@ +#include "ssdp_daemon.h" + +#include "process/scheduler.h" +#include "std/memory.h" +#include "std/string.h" +#include "syscalls/syscalls.h" +#include "net/network_types.h" +#include "networking/transport_layer/csocket_udp.h" +#include "networking/application_layer/ssdp.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "math/math.h" +#include "networking/transport_layer/trans_utils.h" + +//at the moment it's a very basic version. it's a protocol still in use but only in few cases +//it;s used in some printers, upnp, local video streaming and various other things +//eventually if needed, reactivate the process in net_proc + +typedef struct { + uint8_t used; + uint32_t due_ms; + net_l4_endpoint dst; +} ssdp_pending_t; + +static rng_t ssdp_rng; +static uint32_t ssdp_uptime_ms = 0; + +static uint32_t ssdp_host_v4 = IPV4_MCAST_SSDP; +static uint8_t ssdp_host_v6[16]; + +#define SSDP_MAX_PENDING 64 +#define SSDP_RATE_WINDOW_MS 1000 +#define SSDP_RATE_MAX 20 +#define SSDP_NOTIFY_INTERVAL_MS 300000 + +static ssdp_pending_t ssdp_pending[SSDP_MAX_PENDING]; +static uint32_t ssdp_rate_window_ms = 0; +static uint32_t ssdp_rate_count = 0; + +static void ssdp_schedule_response(const net_l4_endpoint* src, uint32_t mx_ms) { + if (!src) return; + for (int i = 0; i < SSDP_MAX_PENDING; ++i) { + if (!ssdp_pending[i].used) { + ssdp_pending[i].used = 1; + ssdp_pending[i].dst = *src; + ssdp_pending[i].due_ms = ssdp_uptime_ms + rng_between32(&ssdp_rng, 0, mx_ms); + return; + } + } +} + +static void ssdp_send_notify(socket_handle_t s4, socket_handle_t s6, bool alive) { + if (s4) { + string msg = ssdp_build_notify(alive, false); + net_l4_endpoint dst; + make_ep(ssdp_host_v4, 1900, IP_VER4, &dst); + (void)socket_sendto_udp_ex(s4, DST_ENDPOINT, &dst, 0, msg.data, msg.length); + free_sized(msg.data, msg.mem_length); + } + + if (s6) { + string msg = ssdp_build_notify(alive, true); + net_l4_endpoint dst = (net_l4_endpoint){0}; + dst.ver = IP_VER6; + memcpy(dst.ip, ssdp_host_v6, 16); + dst.port = 1900; + (void)socket_sendto_udp_ex(s6, DST_ENDPOINT, &dst, 0, msg.data, msg.length); + free_sized(msg.data, msg.mem_length); + } +} + +int ssdp_daemon_entry(int argc, char* argv[]) { + (void)argc; + (void)argv; + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&ssdp_rng, virt_timer); + + ipv6_make_multicast(0x02, IPV6_MCAST_SSDP, NULL, ssdp_host_v6); + + SocketExtraOptions opt4 = (SocketExtraOptions){0}; + opt4.flags = SOCK_OPT_MCAST_JOIN; + opt4.mcast_ver = IP_VER4; + memcpy(opt4.mcast_group, &ssdp_host_v4, 4); + + SocketExtraOptions opt6 = (SocketExtraOptions){0}; + opt6.flags = SOCK_OPT_MCAST_JOIN; + opt6.mcast_ver = IP_VER6; + memcpy(opt6.mcast_group, ssdp_host_v6, 16); + + uint16_t pid = get_current_proc_pid(); + + socket_handle_t s4 = udp_socket_create(SOCK_ROLE_SERVER, pid, &opt4); + socket_handle_t s6 = udp_socket_create(SOCK_ROLE_SERVER, pid, &opt6); + + struct SockBindSpec spec = (struct SockBindSpec){0}; + spec.kind = BIND_ANY; + if (s4 && socket_bind_udp_ex(s4, &spec, 1900) < 0) { + socket_close_udp(s4); + socket_destroy_udp(s4); + s4 = 0; + } + if (s6 && socket_bind_udp_ex(s6, &spec, 1900) < 0) { + socket_close_udp(s6); + socket_destroy_udp(s6); + s6 = 0; + } + + if (!s4 && !s6) return 1; + + ssdp_send_notify(s4, s6, true); + msleep(100); + ssdp_send_notify(s4, s6, true); + msleep(100); + ssdp_send_notify(s4, s6, true); + + uint32_t notify_ms = 0; + const uint32_t tick_ms = 50; + + while (1) { + notify_ms += tick_ms; + if (notify_ms >= SSDP_NOTIFY_INTERVAL_MS) { + notify_ms = 0; + ssdp_send_notify(s4, s6, true); + } + + char buf[2048]; + net_l4_endpoint src = (net_l4_endpoint){0}; + + if (s4) { + int64_t r4 = socket_recvfrom_udp_ex(s4, buf, sizeof(buf) - 1, &src); + if (r4 > 0) { + buf[r4] = 0; + if (ssdp_is_msearch(buf, (int)r4)) ssdp_schedule_response(&src, ssdp_parse_mx_ms(buf, (int)r4)); + } + } + + if (s6) { + int64_t r6 = socket_recvfrom_udp_ex(s6, buf, sizeof(buf) - 1, &src); + if (r6 > 0) { + buf[r6] = 0; + if (ssdp_is_msearch(buf, (int)r6)) ssdp_schedule_response(&src, ssdp_parse_mx_ms(buf, (int)r6)); + } + } + + ssdp_uptime_ms += tick_ms; + ssdp_rate_window_ms += tick_ms; + if (ssdp_rate_window_ms >= SSDP_RATE_WINDOW_MS) { + ssdp_rate_window_ms = 0; + ssdp_rate_count = 0; + } + + for (int i = 0; i < SSDP_MAX_PENDING; ++i) { + if (!ssdp_pending[i].used) continue; + if (ssdp_uptime_ms < ssdp_pending[i].due_ms) continue; + if (ssdp_rate_count >= SSDP_RATE_MAX) break; + + ssdp_rate_count += 1; + ssdp_pending[i].used = 0; + + string resp = ssdp_build_search_response(); + socket_handle_t sock = (ssdp_pending[i].dst.ver == IP_VER6) ? s6 : s4; + if (sock) (void)socket_sendto_udp_ex(sock, DST_ENDPOINT, &ssdp_pending[i].dst, 0, resp.data, resp.length); + free_sized(resp.data, resp.mem_length); + break; + } + + msleep(tick_ms); + } +} diff --git a/kernel/networking/application_layer/ssdp_daemon.h b/kernel/networking/application_layer/ssdp_daemon.h new file mode 100644 index 00000000..51956af3 --- /dev/null +++ b/kernel/networking/application_layer/ssdp_daemon.h @@ -0,0 +1,11 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +int ssdp_daemon_entry(int argc, char* argv[]); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/drivers/loopback/loopback_driver.cpp b/kernel/networking/drivers/loopback/loopback_driver.cpp new file mode 100644 index 00000000..7314e651 --- /dev/null +++ b/kernel/networking/drivers/loopback/loopback_driver.cpp @@ -0,0 +1,64 @@ +#include "loopback_driver.hpp" +#include "std/memory.h" +#include "memory/page_allocator.h" + +LoopbackDriver::LoopbackDriver(){ + memory_page = 0; + rx_head = 0; + rx_tail = 0; + verbose = false; + hw_name[0]='l'; hw_name[1]='o'; hw_name[2]='o'; hw_name[3]='p'; hw_name[4]='b'; hw_name[5]='a'; hw_name[6]='c'; hw_name[7]='k'; hw_name[8]=0; +} + +LoopbackDriver::~LoopbackDriver(){} + +bool LoopbackDriver::init_at(uint64_t pci_addr, uint32_t irq_base_vector){ + (void)pci_addr; + (void)irq_base_vector; + if (!memory_page) { + memory_page = palloc(PAGE_SIZE, MEM_PRIV_KERNEL, MEM_RW, true); + if (!memory_page) return false; + } + return true; +} + +sizedptr LoopbackDriver::allocate_packet(size_t size){ + if (!size) return (sizedptr){0,0}; + if (!memory_page && !init_at(0, 0)) return (sizedptr){0,0}; + void* p = kalloc(memory_page, size, ALIGN_16B, MEM_PRIV_KERNEL); + return (sizedptr){(uintptr_t)p, (uint32_t)size}; +} + +sizedptr LoopbackDriver::handle_receive_packet(){ + if (rx_head == rx_tail) return (sizedptr){0,0}; + sizedptr p = rxq[rx_head]; + rx_head = (uint16_t)((rx_head + 1) & 255); + return p; +} + +void LoopbackDriver::handle_sent_packet(){} + +void LoopbackDriver::enable_verbose(){ verbose = true; } + +bool LoopbackDriver::send_packet(sizedptr packet){ + if (!packet.ptr || !packet.size) return false; + uint16_t next = (uint16_t)((rx_tail + 1) & 255); + if (next == rx_head)return false; + rxq[rx_tail] = packet; + rx_tail = next; + return true; +} + +void LoopbackDriver::get_mac(uint8_t out_mac[6]) const{ + if (out_mac) memset(out_mac, 0, 6); +} + +uint16_t LoopbackDriver::get_mtu() const{ return 65535; } + +uint16_t LoopbackDriver::get_header_size() const{ return 0; } + +const char* LoopbackDriver::hw_ifname() const{ return hw_name; } + +uint32_t LoopbackDriver::get_speed_mbps() const{ return 0xFFFFFFFFu; } + +uint8_t LoopbackDriver::get_duplex() const{ return 0xFFu; } \ No newline at end of file diff --git a/kernel/networking/drivers/loopback/loopback_driver.hpp b/kernel/networking/drivers/loopback/loopback_driver.hpp new file mode 100644 index 00000000..c110301b --- /dev/null +++ b/kernel/networking/drivers/loopback/loopback_driver.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include "networking/drivers/net_driver.hpp" +#include "networking/link_layer/nic_types.h" + +class LoopbackDriver : public NetDriver { +public: + LoopbackDriver(); + ~LoopbackDriver() override; + + bool init_at(uint64_t pci_addr, uint32_t irq_base_vector) override; + sizedptr allocate_packet(size_t size) override; + sizedptr handle_receive_packet() override; + void handle_sent_packet() override; + void enable_verbose() override; + bool send_packet(sizedptr packet) override; + void get_mac(uint8_t out_mac[6]) const override; + uint16_t get_mtu() const override; + uint16_t get_header_size() const override; + const char* hw_ifname() const override; + uint32_t get_speed_mbps() const override; + uint8_t get_duplex() const override; + +private: + void* memory_page; + sizedptr rxq[256]; + uint16_t rx_head; + uint16_t rx_tail; + bool verbose; + char hw_name[16]; +}; \ No newline at end of file diff --git a/kernel/networking/drivers/net_bus.cpp b/kernel/networking/drivers/net_bus.cpp index cfbc0887..511513fb 100644 --- a/kernel/networking/drivers/net_bus.cpp +++ b/kernel/networking/drivers/net_bus.cpp @@ -30,31 +30,19 @@ static int g_net_next = 0; static bool g_lo_added = false; static bool verbose = true; -static void memzero(void* p, size_t n){ - memset(p,0,n); -} +static void make_ifname(char* dst, size_t cap, const char* prefix){ + if (!dst || cap == 0) return; -static size_t u32_to_dec(char* dst, size_t cap, unsigned v){ - char tmp[16]; - int n=0; - if (cap==0) return 0; - if (v==0){ if (cap>1){ dst[0]='0'; dst[1]=0; return 1; } dst[0]=0; return 0; } - while (v>0 && n<16){ tmp[n++] = (char)('0' + (v%10)); v/=10; } - size_t i=0; - while (i0){ dst[i++] = tmp[--n]; } - dst[i]=0; - return i; -} + int idx = 0; + const char* pfx = prefix; -static void make_ifname(char* dst, size_t cap, const char* prefix){ - if (!dst || cap==0) return; - int idx=0; - if (prefix && prefix[0]=='e'){ idx = g_eth_next++; } - else if (prefix && prefix[0]=='w'){ idx = g_wif_next++; } - else { idx = g_net_next++; } - size_t j=0; - if (prefix){ strncpy(dst, prefix, cap); j = strlen(dst); } else { dst[0]='n'; dst[1]='i'; dst[2]='c'; dst[3]=0; j=3; } - if (j= MAX_L2_INTERFACES) return; net_nic_desc_t* d = &g_nics[g_count++]; d->drv = nullptr; - memzero(d->ifname,sizeof(d->ifname)); - memzero(d->hw_ifname,sizeof(d->hw_ifname)); - d->ifname[0]='l'; d->ifname[1]='o'; d->ifname[2]='0'; d->ifname[3]=0; - d->hw_ifname[0]='l'; d->hw_ifname[1]='o'; d->hw_ifname[2]='o'; d->hw_ifname[3]='p'; d->hw_ifname[4]='b'; d->hw_ifname[5]='a'; d->hw_ifname[6]='c'; d->hw_ifname[7]='k'; d->hw_ifname[8]=0; - memzero(d->mac,6); + + memset(d->ifname, 0, sizeof(d->ifname)); + memset(d->hw_ifname, 0, sizeof(d->hw_ifname)); + memset(d->mac, 0, sizeof(d->mac)); + + strncpy(d->ifname, "lo0", sizeof(d->ifname)); + strncpy(d->hw_ifname, "loopback", sizeof(d->hw_ifname)); + d->mtu = 65535; d->header_size = 0; d->kind = NET_IFK_LOCALHOST; d->speed_mbps = 0xFFFFFFFFu; - d->duplex = NET_IFK_LOCALHOST; + d->duplex = LINK_DUPLEX_UNKNOWN; g_lo_added = true; kprintfv("[net-bus] added loopback ifname=%s",d->ifname); } -static bool is_virtio_net(uint16_t vendor, uint16_t device, uint8_t class_code, uint8_t subclass){ +static bool is_virtio_net(uint16_t vendor, uint16_t device, uint8_t class_code, uint8_t subclass) { if (vendor != 0x1AF4) return false; if (class_code == 0x02) return true; if (device == 0x1000) return true; + (void)subclass; return false; } @@ -94,12 +86,15 @@ int net_bus_init(){ pci_device_info infos[64]; size_t n = pci_enumerate(infos, 64); - kprintfv("[net-bus] pci_enumerate=%u",(unsigned)n); + kprintfv("[net-bus] pci_enumerate=%u", (unsigned)n); int nic_ord = 0; - for (size_t i=0;i= MAX_L2_INTERFACES){ kprintfv("[net-bus] cap reached"); break; } + for (size_t i = 0; i < n; i++){ + if (g_count >= MAX_L2_INTERFACES) { + kprintfv("[net-bus] cap reached"); + break; + } const uint16_t ven = infos[i].vendor; const uint16_t dev = infos[i].device; @@ -113,22 +108,30 @@ int net_bus_init(){ const char* if_prefix = "net"; uint8_t kind = NET_IFK_OTHER; - if (sub == 0x00){ if_prefix = "eth"; kind = NET_IFK_ETH; } + if (sub == 0x00) { + if_prefix = "eth"; + kind = NET_IFK_ETH; + } bool matched = false; if (is_virtio_net(ven, dev, cls, sub)){ VirtioNetDriver* d = new VirtioNetDriver(); - if (!d){ kprintf("[net-bus][warn] virtio alloc failed"); continue; } + if (!d) { + kprintf("[net-bus][warn] virtio alloc failed"); + continue; + } - uint32_t irq_base = NET_IRQ_BASE + (uint32_t)(2*nic_ord); - if (!d->init_at(infos[i].addr, irq_base)){ + uint32_t irq_base = NET_IRQ_BASE + (uint32_t)(2 * nic_ord); + if (!d->init_at(infos[i].addr, irq_base)) { kprintf("[net-bus][warn] virtio init_at failed"); delete d; continue; } net_nic_desc_t* e = &g_nics[g_count++]; + memset(e, 0, sizeof(*e)); + e->drv = d; d->get_mac(e->mac); @@ -144,7 +147,7 @@ int net_bus_init(){ strncpy(e->hw_ifname, (hw && hw[0]) ? hw : "vnet", sizeof(e->hw_ifname)); kprintfv("[net-bus] added if=%s mac=%x:%x:%x:%x:%x:%x mtu=%u hdr=%u hw=%s irq_base=%u spd=%u dup=%u", - e->ifname, e->mac[0],e->mac[1],e->mac[2],e->mac[3],e->mac[4],e->mac[5], + e->ifname, e->mac[0], e->mac[1], e->mac[2], e->mac[3], e->mac[4], e->mac[5], e->mtu, e->header_size, e->hw_ifname, (unsigned)irq_base, (unsigned)e->speed_mbps, (unsigned)e->duplex); @@ -164,7 +167,6 @@ int net_bus_init(){ return (int)g_count; } - void net_bus_enable_verbose(){ verbose = true; } @@ -188,8 +190,12 @@ const char* net_bus_hw_ifname(int idx){ return g_nics[idx].hw_ifname; } -void net_bus_get_mac(int idx, uint8_t out_mac[6]){ - if (idx < 0 || (size_t)idx >= g_count) { memzero(out_mac,6); return; } +void net_bus_get_mac(int idx, uint8_t out_mac[6]) { + if (!out_mac) return; + if (idx < 0 || (size_t)idx >= g_count) { + memset(out_mac, 0, 6); + return; + } memcpy(out_mac, g_nics[idx].mac, 6); } diff --git a/kernel/networking/drivers/net_bus.hpp b/kernel/networking/drivers/net_bus.hpp index 4d27bfe2..32f45502 100644 --- a/kernel/networking/drivers/net_bus.hpp +++ b/kernel/networking/drivers/net_bus.hpp @@ -1,13 +1,7 @@ #pragma once #include "net_driver.hpp" -enum NetIfKind : uint8_t { - NET_IFK_ETH = 0x00, - NET_IFK_WIFI = 0x01, - NET_IFK_OTHER = 0x02, - NET_IFK_LOCALHOST = 0xFE, - NET_IFK_UNKNOWN = 0xFF -}; +#include "networking/link_layer/nic_types.h" int net_bus_init(); int net_bus_count(); diff --git a/kernel/networking/drivers/net_driver.hpp b/kernel/networking/drivers/net_driver.hpp index a1e4778a..2d083da1 100644 --- a/kernel/networking/drivers/net_driver.hpp +++ b/kernel/networking/drivers/net_driver.hpp @@ -11,11 +11,12 @@ class NetDriver { virtual sizedptr handle_receive_packet() = 0; virtual void handle_sent_packet() = 0; virtual void enable_verbose() = 0; - virtual void send_packet(sizedptr packet) = 0; + virtual bool send_packet(sizedptr packet) = 0; virtual void get_mac(uint8_t out_mac[6]) const = 0; virtual uint16_t get_mtu() const = 0; virtual uint16_t get_header_size() const = 0; virtual const char* hw_ifname() const = 0; virtual uint32_t get_speed_mbps() const = 0; virtual uint8_t get_duplex() const = 0; -}; + virtual bool sync_multicast(const uint8_t* macs, uint32_t count) {(void)macs; (void)count; return true; } +}; \ No newline at end of file diff --git a/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.cpp b/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.cpp index d5b5e478..601f9ee0 100644 --- a/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.cpp +++ b/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.cpp @@ -5,11 +5,15 @@ #include "memory/page_allocator.h" #include "std/memory.h" #include "networking/network.h" +#include "sysregs.h" +#include "exceptions/irq.h" #define RECEIVE_QUEUE 0 #define TRANSMIT_QUEUE 1 +#define CONTROL_QUEUE 2 static constexpr uint32_t RX_BUF_SIZE = PAGE_SIZE; +static constexpr uint16_t RX_CHAIN_SEGS = 4; static void* g_rx_pool = nullptr; static uint16_t g_rx_qsz = 0; @@ -20,12 +24,68 @@ static uint16_t g_rx_qsz = 0; kprintf(fmt, ##__VA_ARGS__); \ }\ }) + +typedef struct __attribute__((packed)) virtio_net_ctrl_hdr_t { + uint8_t cls; + uint8_t cmd; +} virtio_net_ctrl_hdr_t; + +typedef struct __attribute__((packed)) virtio_net_ctrl_ack_t { + uint8_t ack; +} virtio_net_ctrl_ack_t; + +#define VIRTIO_NET_CTRL_RX 0 +#define VIRTIO_NET_CTRL_MAC 1 + +#define VIRTIO_NET_CTRL_RX_PROMISC 0 +#define VIRTIO_NET_CTRL_RX_ALLMULTI 1 +#define VIRTIO_NET_CTRL_RX_NOMULTI 3 + +#define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + + +static bool virtio_net_ctrl_send(virtio_device* dev, uint8_t cls, uint8_t cmd, const void* payload, uint32_t payload_len) { + if (!dev) return false; + + virtio_net_ctrl_hdr_t hdr; + hdr.cls= cls; + hdr.cmd = cmd; + + uint32_t in_len = (uint32_t)sizeof(hdr) + payload_len; + uint8_t* in = (uint8_t*)kalloc(dev->memory_page, (size_t)in_len, ALIGN_16B, MEM_PRIV_KERNEL); + if (!in) return false; + memcpy(in, &hdr, sizeof(hdr)); + if (payload_len && payload) memcpy(in + sizeof(hdr), payload, payload_len); + + virtio_net_ctrl_ack_t* ack = (virtio_net_ctrl_ack_t*)kalloc(dev->memory_page, sizeof(virtio_net_ctrl_ack_t), ALIGN_16B, MEM_PRIV_KERNEL); + if (!ack) { + kfree(in, in_len); + return false; + } + ack->ack = 1; + + virtio_buf bufs[2]; + bufs[0] = VBUF((uintptr_t)in, in_len, 0); + bufs[1] = VBUF((uintptr_t)ack, sizeof(virtio_net_ctrl_ack_t), VIRTQ_DESC_F_WRITE); + + select_queue(dev, CONTROL_QUEUE); + bool ok = virtio_send_nd(dev, bufs, 2); + bool aok = (ack->ack == 0); + + kfree(ack, sizeof(virtio_net_ctrl_ack_t)); + kfree(in, in_len); + return ok && aok; +} + VirtioNetDriver::VirtioNetDriver() { verbose = false; + mrg_rxbuf = false; + ctrl_vq = false; + ctrl_rx = false; header_size = sizeof(virtio_net_hdr_t); mtu = 1500; speed_mbps = 0xFFFFFFFFu; - duplex = LinkDuplex::Unknown; + duplex = LINK_DUPLEX_UNKNOWN; last_used_receive_idx = 0; last_used_sent_idx = 0; hw_name[0] = 0; @@ -43,7 +103,7 @@ bool VirtioNetDriver::init_at(uint64_t addr, uint32_t irq_base_vector){ if (vnp_net_dev.common_cfg) pci_register(((uintptr_t)vnp_net_dev.common_cfg) & ~(uintptr_t)(PAGE_SIZE-1), PAGE_SIZE); if (vnp_net_dev.device_cfg) pci_register(((uintptr_t)vnp_net_dev.device_cfg) & ~(uintptr_t)(PAGE_SIZE-1), PAGE_SIZE); if (vnp_net_dev.notify_cfg) pci_register(((uintptr_t)vnp_net_dev.notify_cfg) & ~(uintptr_t)(PAGE_SIZE-1), PAGE_SIZE); - if (vnp_net_dev.isr_cfg) pci_register(((uintptr_t)vnp_net_dev.isr_cfg) & ~(uintptr_t)(PAGE_SIZE-1), PAGE_SIZE); + if (vnp_net_dev.isr_cfg) pci_register(((uintptr_t)vnp_net_dev.isr_cfg) & ~(uintptr_t)(PAGE_SIZE-1), PAGE_SIZE); uint8_t interrupts_ok = pci_setup_interrupts(addr, irq_base_vector, 2); if (!interrupts_ok){ @@ -58,12 +118,39 @@ bool VirtioNetDriver::init_at(uint64_t addr, uint32_t irq_base_vector){ pci_enable_device(addr); kprintfv("[virtio-net] device enabled"); + uint64_t net_feature_mask = 0; + net_feature_mask |= (1ULL << VIRTIO_F_VERSION_1); + net_feature_mask |= (1ULL << VIRTIO_NET_F_MAC); + net_feature_mask |= (1ULL << VIRTIO_NET_F_STATUS); + net_feature_mask |= (1ULL << VIRTIO_NET_F_MTU); + net_feature_mask |= (1ULL << VIRTIO_NET_F_MRG_RXBUF); + net_feature_mask |= (1ULL << VIRTIO_NET_F_CTRL_VQ); + net_feature_mask |= (1ULL << VIRTIO_NET_F_CTRL_RX); + virtio_set_feature_mask(net_feature_mask); + if (!virtio_init_device(&vnp_net_dev)){ kprintf("[virtio-net][err] virtio_init_device failed"); return false; } - kprintfv("[virtio-net] common_cfg=%x device_cfg=%x", - (uintptr_t)vnp_net_dev.common_cfg,(uintptr_t)vnp_net_dev.device_cfg); + kprintfv("[virtio-net] common_cfg=%x device_cfg=%x", (uintptr_t)vnp_net_dev.common_cfg,(uintptr_t)vnp_net_dev.device_cfg); + + mrg_rxbuf = (vnp_net_dev.negotiated_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) != 0; + header_size = mrg_rxbuf ? sizeof(virtio_net_hdr_mrg_rxbuf_t) : sizeof(virtio_net_hdr_t); + + ctrl_vq = (vnp_net_dev.negotiated_features & (1ULL << VIRTIO_NET_F_CTRL_VQ)) != 0; + ctrl_rx = (vnp_net_dev.negotiated_features & (1ULL << VIRTIO_NET_F_CTRL_RX)) != 0; + if (ctrl_vq) { + select_queue(&vnp_net_dev, CONTROL_QUEUE); + if (!vnp_net_dev.common_cfg->queue_size) { + ctrl_vq = false; + ctrl_rx = false; + } else { + vnp_net_dev.common_cfg->queue_msix_vector = 0xFFFF; + } + } + + if (ctrl_vq && ctrl_rx) (void)sync_multicast((const uint8_t*)0, 0); + kprintfv("[virtio-net] negotiated ctrl_vq=%u ctrl_rx=%u", (unsigned)ctrl_vq, (unsigned)ctrl_rx); select_queue(&vnp_net_dev, RECEIVE_QUEUE); uint16_t rx_qsz = vnp_net_dev.common_cfg->queue_size; @@ -76,11 +163,36 @@ bool VirtioNetDriver::init_at(uint64_t addr, uint32_t irq_base_vector){ kprintfv("[virtio-net] rx_pool=%x",(uintptr_t)g_rx_pool); if (!g_rx_pool) return false; } - for (uint16_t i=0;iqueue_desc); + volatile virtq_avail* rx_avail = (volatile virtq_avail*)PHYS_TO_VIRT_P((virtq_avail*)vnp_net_dev.common_cfg->queue_driver); + + uint16_t chain_count = (uint16_t)(rx_qsz / RX_CHAIN_SEGS); + if (!chain_count) return false; + + rx_avail->idx = 0; + + for (uint16_t c = 0; c < chain_count; c++) { + uint16_t head = (uint16_t)(c * RX_CHAIN_SEGS); + + for (uint16_t s = 0; s < RX_CHAIN_SEGS; s++){ + uint16_t di = (uint16_t)(head + s); + void* buf = (void*)((uintptr_t)g_rx_pool + (uintptr_t)di * (uintptr_t)RX_BUF_SIZE); + + rx_desc[di].addr = VIRT_TO_PHYS((uintptr_t)buf); + rx_desc[di].len = RX_BUF_SIZE; + rx_desc[di].flags = (uint16_t)(VIRTQ_DESC_F_WRITE | ((s + 1 < RX_CHAIN_SEGS) ? VIRTQ_DESC_F_NEXT : 0)); + rx_desc[di].next = (uint16_t)(di + 1); + } + + rx_desc[head + (RX_CHAIN_SEGS - 1)].next = 0; + + rx_avail->ring[rx_avail->idx % rx_qsz] = head; + rx_avail->idx++; } + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(&vnp_net_dev); + vnp_net_dev.common_cfg->queue_msix_vector = 0; kprintfv("[virtio-net] RX vector=%u",vnp_net_dev.common_cfg->queue_msix_vector); if (vnp_net_dev.common_cfg->queue_msix_vector != 0) return false; @@ -98,18 +210,17 @@ bool VirtioNetDriver::init_at(uint64_t addr, uint32_t irq_base_vector){ uint16_t dev_mtu = cfg->mtu; if (dev_mtu != 0 && dev_mtu != 0xFFFF && dev_mtu >= 576) mtu = dev_mtu; else mtu = 1500; - header_size = sizeof(virtio_net_hdr_t); speed_mbps = cfg->speed; switch (cfg->duplex) { - case 0: duplex = LinkDuplex::Half; break; - case 1: duplex = LinkDuplex::Full; break; - default: duplex = LinkDuplex::Unknown; break; + case 0: duplex = LINK_DUPLEX_HALF; break; + case 1: duplex = LINK_DUPLEX_FULL; break; + default: duplex = LINK_DUPLEX_UNKNOWN; break; } hw_name[0] = 'v'; hw_name[1] = 'i'; hw_name[2] = 'r'; hw_name[3] = 't'; hw_name[4] = 'i'; hw_name[5] = 'o'; hw_name[6] = 0; - const char* dpx_str = (duplex == LinkDuplex::Full) ? "full" : (duplex == LinkDuplex::Half) ? "half" : "unknown"; + const char* dpx_str = (duplex == LINK_DUPLEX_FULL) ? "full" : (duplex == LINK_DUPLEX_HALF) ? "half" : "unknown"; if (speed_mbps != 0xFFFFFFFF) { kprintfv("[virtio-net] mac=%x:%x:%x:%x:%x:%x mtu=%u hdr=%u speed=%uMbps duplex=%s", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5], @@ -146,57 +257,125 @@ uint32_t VirtioNetDriver::get_speed_mbps() const { return speed_mbps; } uint8_t VirtioNetDriver::get_duplex() const { switch (duplex) { - case LinkDuplex::Half: return 0; - case LinkDuplex::Full: return 1; + case LINK_DUPLEX_HALF: return 0; + case LINK_DUPLEX_FULL: return 1; default: return 0xFF; } } sizedptr VirtioNetDriver::allocate_packet(size_t size){ - return (sizedptr){(uintptr_t)kalloc(vnp_net_dev.memory_page, size + header_size, ALIGN_64B, MEM_PRIV_KERNEL), size + header_size}; + size_t total = size + (size_t)header_size; + return (sizedptr){(uintptr_t)kalloc(vnp_net_dev.memory_page, total, ALIGN_64B, MEM_PRIV_KERNEL), total}; } sizedptr VirtioNetDriver::handle_receive_packet(){ + uint32_t desc_index = 0; + uint32_t total_len = 0; + uint16_t num_buffers = 1; + + disable_interrupt(); select_queue(&vnp_net_dev, RECEIVE_QUEUE); - volatile virtq_used* used = (virtq_used*)vnp_net_dev.common_cfg->queue_device; - volatile virtq_desc* desc = (virtq_desc*)vnp_net_dev.common_cfg->queue_desc; - volatile virtq_avail* avail = (virtq_avail*)vnp_net_dev.common_cfg->queue_driver; + volatile virtq_used* used = (volatile virtq_used*)PHYS_TO_VIRT_P((void*)(uintptr_t)vnp_net_dev.common_cfg->queue_device); + volatile virtq_desc* desc = (volatile virtq_desc*)PHYS_TO_VIRT_P((void*)(uintptr_t)vnp_net_dev.common_cfg->queue_desc); + volatile virtq_avail* avail = (volatile virtq_avail*)PHYS_TO_VIRT_P((void*)(uintptr_t)vnp_net_dev.common_cfg->queue_driver); uint16_t qsz = vnp_net_dev.common_cfg->queue_size; + if (!qsz) { + enable_interrupt(); + return (sizedptr){0,0}; + } + asm volatile ("dmb ishld" ::: "memory"); + uint16_t new_idx = used->idx; if (new_idx == last_used_receive_idx) { + enable_interrupt(); return (sizedptr){0,0}; } uint16_t used_ring_index = (uint16_t)(last_used_receive_idx % qsz); volatile virtq_used_elem* e = &used->ring[used_ring_index]; last_used_receive_idx++; - - uint32_t desc_index = e->id; - uint32_t len = e->len; - if (desc_index >= qsz || len <= header_size) - { - avail->ring[avail->idx % qsz] = (uint16_t)desc_index; - avail->idx++; - *(volatile uint16_t*)(vnp_net_dev.notify_cfg + vnp_net_dev.notify_off_multiplier * RECEIVE_QUEUE) = 0; + desc_index = e->id; + total_len = e->len; + + if (desc_index >= qsz || total_len <= (uint32_t)header_size){ + uint16_t aidx = avail->idx; + avail->ring[aidx % qsz] = (uint16_t)(desc_index % qsz); + asm volatile ("dmb ishst" ::: "memory"); + avail->idx = (uint16_t)(aidx + 1); + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(&vnp_net_dev); + enable_interrupt(); return (sizedptr){0,0}; } - uintptr_t packet_addr = desc[desc_index].addr; - uint32_t payload_len = len - header_size; + volatile uint8_t* first_buf = (volatile uint8_t*)PHYS_TO_VIRT_P((void*)(uintptr_t)desc[desc_index].addr); + if (mrg_rxbuf) { + virtio_net_hdr_mrg_rxbuf_t* h = (virtio_net_hdr_mrg_rxbuf_t*)(uintptr_t)first_buf; + num_buffers = h->num_buffers; + if (num_buffers == 0) num_buffers = 1; + if (num_buffers > RX_CHAIN_SEGS) { + uint16_t aidx = avail->idx; + avail->ring[aidx % qsz] = (uint16_t)desc_index; + asm volatile ("dmb ishst" ::: "memory"); + avail->idx = (uint16_t)(aidx + 1); + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(&vnp_net_dev); + enable_interrupt(); + return (sizedptr){0,0}; + } + } + + enable_interrupt(); + uint32_t payload_len = total_len - (uint32_t)header_size; void* out_buf = kalloc(vnp_net_dev.memory_page, payload_len, ALIGN_64B, MEM_PRIV_KERNEL); if (!out_buf){ - avail->ring[avail->idx % qsz] = (uint16_t)desc_index; - avail->idx++; - *(volatile uint16_t*)(vnp_net_dev.notify_cfg + vnp_net_dev.notify_off_multiplier * RECEIVE_QUEUE) = 0; + disable_interrupt(); + uint16_t aidx = avail->idx; + avail->ring[aidx % qsz] = (uint16_t)desc_index; + asm volatile ("dmb ishst" ::: "memory"); + avail->idx = (uint16_t)(aidx + 1); + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(&vnp_net_dev); + enable_interrupt(); return (sizedptr){0,0}; } - memcpy(out_buf, (void*)(packet_addr + header_size), payload_len); - avail->ring[avail->idx % qsz] = (uint16_t)desc_index; - avail->idx++; - *(volatile uint16_t*)(vnp_net_dev.notify_cfg + vnp_net_dev.notify_off_multiplier * RECEIVE_QUEUE) = 0; + uint32_t written = 0; + uint32_t remaining = payload_len; + uint16_t di = (uint16_t)desc_index; + for (uint16_t bi = 0; bi < num_buffers && remaining; bi++) { + volatile uint8_t* buf = (volatile uint8_t*)PHYS_TO_VIRT_P((void*)(uintptr_t)desc[di].addr); + uint32_t cap = desc[di].len; + uint32_t off = (bi == 0) ? (uint32_t)header_size : 0; + if (cap <= off) break; + + uint32_t chunk = cap - off; + if (chunk > remaining) chunk = remaining; + memcpy((uint8_t*)out_buf + written, (const void*)((uintptr_t)buf + off), chunk); + written += chunk; + remaining -= chunk; + + if (bi + 1 < num_buffers) { + if (!(desc[di].flags & VIRTQ_DESC_F_NEXT)) break; + di = desc[di].next; + } + } + + disable_interrupt(); + uint16_t aidx = avail->idx; + avail->ring[aidx % qsz] = (uint16_t)desc_index; + asm volatile ("dmb ishst" ::: "memory"); + avail->idx = (uint16_t)(aidx + 1); + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(&vnp_net_dev); + enable_interrupt(); + + if (remaining != 0) { + kfree(out_buf, payload_len); + return (sizedptr){0,0}; + } return (sizedptr){ (uintptr_t)out_buf, payload_len }; } @@ -204,31 +383,66 @@ sizedptr VirtioNetDriver::handle_receive_packet(){ void VirtioNetDriver::handle_sent_packet(){ select_queue(&vnp_net_dev, TRANSMIT_QUEUE); - volatile virtq_used* used = (virtq_used*)vnp_net_dev.common_cfg->queue_device; - volatile virtq_desc* desc = (virtq_desc*)vnp_net_dev.common_cfg->queue_desc; - uint16_t qsz = vnp_net_dev.common_cfg->queue_size; + volatile virtq_used* used = (volatile virtq_used*)PHYS_TO_VIRT_P((void*)(uintptr_t)vnp_net_dev.common_cfg->queue_device); + last_used_sent_idx = used->idx; +} - int cleaned = 0; - while (last_used_sent_idx != used->idx && cleaned < 64) { - uint16_t used_ring_index = (uint16_t)(last_used_sent_idx % qsz); - last_used_sent_idx = (uint16_t)(last_used_sent_idx + 1); - volatile virtq_used_elem* e = &used->ring[used_ring_index]; - uint32_t desc_index = e->id; +bool VirtioNetDriver::send_packet(sizedptr packet){ + if (!packet.ptr || !packet.size) return false; - if (desc_index < qsz) { - kfree((void*)desc[desc_index].addr, desc[desc_index].len); - } - cleaned++; - } + disable_interrupt(); + select_queue(&vnp_net_dev, TRANSMIT_QUEUE); + + if ((size_t)header_size <= packet.size) memset((void*)packet.ptr, 0, (size_t)header_size); + if (mrg_rxbuf) ((virtio_net_hdr_mrg_rxbuf_t*)packet.ptr)->num_buffers = 0; + virtio_buf b; + b.addr = packet.ptr; + b.len = (uint32_t)packet.size; + b.flags = 0; + bool ok = virtio_send_nd(&vnp_net_dev, &b, 1); + enable_interrupt(); + + kprintfv("[virtio-net] tx queued len=%u",(unsigned)packet.size); + kfree((void*)packet.ptr, packet.size); + return ok; } -void VirtioNetDriver::send_packet(sizedptr packet){ - select_queue(&vnp_net_dev, TRANSMIT_QUEUE); - if (packet.ptr && packet.size){ - if (header_size <= packet.size) memset((void*)packet.ptr, 0, header_size); - virtio_send_1d(&vnp_net_dev, packet.ptr, packet.size); - kprintfv("[virtio-net] tx queued len=%u",(unsigned)packet.size); +bool VirtioNetDriver::sync_multicast(const uint8_t* macs, uint32_t count) { + if (!ctrl_vq) return true; + if (!ctrl_rx) return true; + if (!macs && count) return false; + + disable_interrupt(); + + bool ok = true; + + uint8_t v0 = 0; + uint8_t v1 = 1; + + ok = ok && virtio_net_ctrl_send(&vnp_net_dev, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, &v0, 1); + ok = ok && virtio_net_ctrl_send(&vnp_net_dev, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_ALLMULTI, &v0, 1); + + if (count == 0) ok = ok && virtio_net_ctrl_send(&vnp_net_dev, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_NOMULTI, &v1, 1); + else ok = ok && virtio_net_ctrl_send(&vnp_net_dev, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_NOMULTI, &v0, 1); + + uint32_t payload_len = 8u + count * 6u; + uint8_t* payload = (uint8_t*)kalloc(vnp_net_dev.memory_page, payload_len, ALIGN_16B, MEM_PRIV_KERNEL); + if (!payload) { + enable_interrupt(); + return false; } + kprintfv("[virtio-net] sync_multicast ctrl_vq=%u ctrl_rx=%u count=%u",(unsigned)ctrl_vq, (unsigned)ctrl_rx, (unsigned)count); + + uint32_t uc = 0; + memcpy(payload + 0, &uc, 4); + memcpy(payload + 4, &count, 4); + for (uint32_t i = 0; i < count; ++i) memcpy(payload + 8u + i * 6u, macs + i * 6u, 6); + + ok = ok && virtio_net_ctrl_send(&vnp_net_dev, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, payload, payload_len); + + kfree(payload, payload_len); + enable_interrupt(); + return ok; } void VirtioNetDriver::enable_verbose(){ diff --git a/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.hpp b/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.hpp index b683c084..2b6e3501 100644 --- a/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.hpp +++ b/kernel/networking/drivers/virtio_net_pci/virtio_net_pci.hpp @@ -3,6 +3,27 @@ #include "networking/drivers/net_driver.hpp" #include "virtio/virtio_pci.h" #include "std/memory.h" +#include "networking/link_layer/nic_types.h" +#define VIRTIO_F_VERSION_1 32 + +#define VIRTIO_NET_F_CSUM 0 +#define VIRTIO_NET_F_GUEST_CSUM 1 +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 +#define VIRTIO_NET_F_MTU 3 +#define VIRTIO_NET_F_MAC 5 +#define VIRTIO_NET_F_GSO 6 +#define VIRTIO_NET_F_GUEST_TSO4 7 +#define VIRTIO_NET_F_GUEST_TSO6 8 +#define VIRTIO_NET_F_GUEST_ECN 9 +#define VIRTIO_NET_F_GUEST_UFO 10 +#define VIRTIO_NET_F_HOST_TSO4 11 +#define VIRTIO_NET_F_HOST_TSO6 12 +#define VIRTIO_NET_F_HOST_ECN 13 +#define VIRTIO_NET_F_HOST_UFO 14 +#define VIRTIO_NET_F_MRG_RXBUF 15 +#define VIRTIO_NET_F_STATUS 16 +#define VIRTIO_NET_F_CTRL_VQ 17 +#define VIRTIO_NET_F_CTRL_RX 18 typedef struct __attribute__((packed)) virtio_net_hdr_t { uint8_t flags; @@ -13,6 +34,11 @@ typedef struct __attribute__((packed)) virtio_net_hdr_t { uint16_t csum_offset; } virtio_net_hdr_t; +typedef struct __attribute__((packed)) virtio_net_hdr_mrg_rxbuf_t { + virtio_net_hdr_t hdr; + uint16_t num_buffers; +} virtio_net_hdr_mrg_rxbuf_t; + typedef struct __attribute__((packed)) virtio_net_config { uint8_t mac[6]; uint16_t status; @@ -25,12 +51,6 @@ typedef struct __attribute__((packed)) virtio_net_config { uint32_t supported_hash_types; } virtio_net_config; -enum class LinkDuplex : uint8_t { - Unknown = 0xFF, - Half = 0, - Full = 1 -}; - class VirtioNetDriver : public NetDriver { public: VirtioNetDriver(); @@ -45,16 +65,27 @@ class VirtioNetDriver : public NetDriver { uint32_t get_speed_mbps() const override; uint8_t get_duplex() const override; + bool sync_multicast(const uint8_t* macs, uint32_t count) override; sizedptr allocate_packet(size_t size) override; sizedptr handle_receive_packet() override; void handle_sent_packet() override; - void send_packet(sizedptr packet) override; + bool send_packet(sizedptr packet) override; private: virtio_device vnp_net_dev; + volatile virtq_desc* rx_desc; + volatile virtq_avail* rx_avail; + volatile virtq_used* rx_used; + uint16_t rx_qsz; + bool verbose; + bool mrg_rxbuf; + + bool ctrl_vq; + bool ctrl_rx; + uint16_t header_size; uint16_t mtu; uint32_t speed_mbps; diff --git a/kernel/networking/interface_manager.c b/kernel/networking/interface_manager.c index e87ee8ad..3858c095 100644 --- a/kernel/networking/interface_manager.c +++ b/kernel/networking/interface_manager.c @@ -1,71 +1,22 @@ #include "interface_manager.h" #include "std/memory.h" #include "networking/link_layer/arp.h" +#include "networking/link_layer/ndp.h" #include "networking/internet_layer/ipv4_route.h" +#include "networking/internet_layer/ipv6_route.h" #include "networking/port_manager.h" #include "process/scheduler.h" #include "memory/page_allocator.h" - -static void* g_kmem_page = NULL; - +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/internet_layer/igmp.h" +#include "networking/internet_layer/mld.h" +#include "networking/link_layer/nic_types.h" +#include "networking/network.h" + +static void* g_kmem_page_v4 = NULL; +static void* g_kmem_page_v6 = NULL; //TODO: add network settings -static inline void mem_zero(void *p, size_t n){ if (p) memset(p,0,n); } -static inline void mem_copy(void *d, const void *s, size_t n){ if (d && s && n) memcpy(d,s,n); } - -static void copy_name(char dst[16], const char* src) { - int i = 0; - if (!src) { dst[0] = 0; return; } - while (src[i] && i < 15) { dst[i] = src[i]; i += 1; } - dst[i] = 0; -} - -static inline bool is_power2_mask_contiguous(uint32_t mask){ - if (mask == 0) return false; - uint32_t inv = ~mask; - return ((inv & (inv + 1u)) == 0); -} - -static inline bool ipv4_is_unspecified(uint32_t ip){ return ip == 0; } -static inline bool ipv4_is_loopback(uint32_t ip){ return ((ip & 0xFF000000u) == 0x7F000000u); } -static inline bool ipv4_is_multicast(uint32_t ip){ return ((ip & 0xF0000000u) == 0xE0000000u); } -static inline bool ipv4_is_network_address(uint32_t ip, uint32_t mask){ if (mask==0 || mask==0xFFFFFFFFu) return false; return ((ip & mask) == ip); } -static inline bool ipv4_is_broadcast_address(uint32_t ip, uint32_t mask){ if (mask==0 || mask==0xFFFFFFFFu) return false; return (ip == ((ip & mask) | ~mask)); } -static inline bool ipv4_is_reserved_special(uint32_t ip){ - if ((ip & 0xFF000000u) == 0x00000000u) return true; - if ((ip & 0xFFFF0000u) == 0xA9FE0000u) return true; - if ((ip & 0xF0000000u) == 0xF0000000u) return true; - return false; -} - -static inline int prefix_match(const uint8_t a[16], const uint8_t b[16]){ - int bits = 0; - for (int i=0;i<16;i++){ - uint8_t x = (uint8_t)(a[i] ^ b[i]); - if (x==0){ bits += 8; continue; } - for (int bpos=7; bpos>=0; --bpos){ - if (x & (1u<> 4) & 0x0F); } -static inline uint8_t l3_local_slot_from_id(uint8_t l3_id){ return (uint8_t)(l3_id & 0x0F); } static bool v4_has_dhcp_on_l2(uint8_t ifindex){ for (int i = 0; i < V4_POOL_SIZE; i++){ @@ -114,19 +58,59 @@ static bool v4_has_dhcp_on_l2(uint8_t ifindex){ return false; } -uint8_t l2_interface_create(const char *name, void *driver_ctx, uint16_t base_metric){ - int slot = find_free_l2_slot(); +static bool l2_has_active_v4(l2_interface_t* itf) { + if (!itf) return false; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { + l3_ipv4_interface_t* v4 = itf->l3_v4[s]; + if (!v4) continue; + if (v4->mode == IPV4_CFG_DISABLED) continue; + if (v4->ip) return true; + } + return false; +} + +static bool l2_has_active_v6(l2_interface_t* itf) { + if (!itf) return false; + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { + l3_ipv6_interface_t* v6 = itf->l3_v6[s]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (!ipv6_is_unspecified(v6->ip)) return true; + } + return false; +} + +uint8_t l2_interface_create(const char *name, void *driver_ctx, uint16_t base_metric, uint8_t kind){ + int slot = -1; + for (int i=0;i<(int)MAX_L2_INTERFACES;i++) if (!g_l2_used[i]) { + slot=i; + break; + } if (slot < 0) return 0; + l2_interface_t* itf = &g_l2[slot]; - mem_zero(itf, sizeof(*itf)); + memset(itf, 0, sizeof(*itf)); itf->ifindex = (uint8_t)(slot + 1); - copy_name(itf->name, name); - itf->driver_context = driver_ctx; - itf->base_metric = base_metric; + int i = 0; + if (name) { + while (name[i] && i < 15) { + itf->name[i] = name[i]; + i++; + } + } + itf->name[i] = 0; - itf->arp_table = arp_table_create(); - itf->nd_table = NULL; + itf->driver_context = driver_ctx; + itf->base_metric = base_metric; + itf->kind = kind; + if (kind != NET_IFK_LOCALHOST) { + itf->arp_table = arp_table_create(); + itf->nd_table = ndp_table_create(); + } else { + itf->arp_table = NULL; + itf->nd_table = NULL; + } g_l2_used[slot] = 1; g_l2_count += 1; @@ -139,9 +123,16 @@ bool l2_interface_destroy(uint8_t ifindex){ l2_interface_t* itf = &g_l2[slot]; if (itf->ipv4_count || itf->ipv6_count) return false; - if (itf->arp_table) { arp_table_destroy((arp_table_t*)itf->arp_table); itf->arp_table = NULL; } + if (itf->arp_table) { + arp_table_destroy((arp_table_t*)itf->arp_table); + itf->arp_table = NULL; + } + if (itf->nd_table) { + ndp_table_destroy((ndp_table_t*)itf->nd_table); + itf->nd_table = NULL; + } - mem_zero(&g_l2[slot], sizeof(l2_interface_t)); + memset(&g_l2[slot], 0, sizeof(l2_interface_t)); g_l2_used[slot] = 0; if (g_l2_count) g_l2_count -= 1; return true; @@ -172,6 +163,46 @@ bool l2_interface_set_up(uint8_t ifindex, bool up) { return true; } +static bool l2_sync_multicast_filters(l2_interface_t* itf) { + if (!itf) return false; + uint8_t macs[(MAX_IPV4_MCAST_PER_INTERFACE + MAX_IPV6_MCAST_PER_INTERFACE) * 6]; + uint32_t count = 0; + + for (int i = 0; i < (int)itf->ipv4_mcast_count; ++i) { + uint8_t m[6]; + ipv4_mcast_to_mac(itf->ipv4_mcast[i], m); + bool exists = false; + for (uint32_t j = 0; j < count; ++j) { + if (memcmp(&macs[j * 6], m, 6) == 0) { + exists = true; + break; + } + } + if (!exists) { + memcpy(&macs[count * 6], m, 6); + count++; + } + } + + for (int i = 0; i < (int)itf->ipv6_mcast_count; ++i) { + uint8_t m[6]; + ipv6_multicast_mac(itf->ipv6_mcast[i], m); + bool exists = false; + for (uint32_t j = 0; j < count; ++j) { + if (memcmp(&macs[j*6], m, 6) == 0) { + exists = true; + break; + } + } + if (!exists) { + memcpy(&macs[count * 6], m, 6); + count++; + } + } + + return network_sync_multicast(itf->ifindex, macs, count); +} + static int find_ipv4_group_index(l2_interface_t* itf, uint32_t group) { for (int i = 0; i < (int)itf->ipv4_mcast_count; ++i) if (itf->ipv4_mcast[i] == group) return i; return -1; @@ -184,6 +215,8 @@ bool l2_ipv4_mcast_join(uint8_t ifindex, uint32_t group) { if (find_ipv4_group_index(itf, group) >= 0) return true; if (itf->ipv4_mcast_count >= MAX_IPV4_MCAST_PER_INTERFACE) return false; itf->ipv4_mcast[itf->ipv4_mcast_count++] = group; + if (itf->kind != NET_IFK_LOCALHOST) (void)l2_sync_multicast_filters(itf); + if (itf->kind != NET_IFK_LOCALHOST && l2_has_active_v4(itf)) (void)igmp_send_join(ifindex, group); return true; } @@ -194,11 +227,13 @@ bool l2_ipv4_mcast_leave(uint8_t ifindex, uint32_t group) { if (idx < 0) return true; for (int i = idx + 1; i < (int)itf->ipv4_mcast_count; ++i) itf->ipv4_mcast[i-1] = itf->ipv4_mcast[i]; if (itf->ipv4_mcast_count) itf->ipv4_mcast_count -= 1; + if (itf->kind != NET_IFK_LOCALHOST) (void)l2_sync_multicast_filters(itf); + if (itf->kind != NET_IFK_LOCALHOST && l2_has_active_v4(itf)) (void)igmp_send_leave(ifindex, group); return true; } static int find_ipv6_group_index(l2_interface_t* itf, const uint8_t group[16]) { - for (int i = 0; i < (int)itf->ipv6_mcast_count; ++i) if (cmp16(itf->ipv6_mcast[i], group) == 0) return i; + for (int i = 0; i < (int)itf->ipv6_mcast_count; ++i) if (ipv6_cmp(itf->ipv6_mcast[i], group) == 0) return i; return -1; } bool l2_ipv6_mcast_join(uint8_t ifindex, const uint8_t group[16]) { @@ -207,8 +242,10 @@ bool l2_ipv6_mcast_join(uint8_t ifindex, const uint8_t group[16]) { if (!ipv6_is_multicast(group)) return false; if (find_ipv6_group_index(itf, group) >= 0) return true; if (itf->ipv6_mcast_count >= MAX_IPV6_MCAST_PER_INTERFACE) return false; - cp16(itf->ipv6_mcast[itf->ipv6_mcast_count], group); + ipv6_cpy(itf->ipv6_mcast[itf->ipv6_mcast_count], group); itf->ipv6_mcast_count += 1; + if (itf->kind != NET_IFK_LOCALHOST) (void)l2_sync_multicast_filters(itf); + if (itf->kind != NET_IFK_LOCALHOST && l2_has_active_v6(itf)) (void)mld_send_join(ifindex, group); return true; } bool l2_ipv6_mcast_leave(uint8_t ifindex, const uint8_t group[16]) { @@ -216,8 +253,10 @@ bool l2_ipv6_mcast_leave(uint8_t ifindex, const uint8_t group[16]) { if (!itf || !group) return false; int idx = find_ipv6_group_index(itf, group); if (idx < 0) return true; - for (int i = idx + 1; i < (int)itf->ipv6_mcast_count; ++i) cp16(itf->ipv6_mcast[i-1], itf->ipv6_mcast[i]); + if (itf->kind != NET_IFK_LOCALHOST && l2_has_active_v6(itf)) (void)mld_send_leave(ifindex, group); + for (int i = idx + 1; i < (int)itf->ipv6_mcast_count; ++i) ipv6_cpy(itf->ipv6_mcast[i-1], itf->ipv6_mcast[i]); if (itf->ipv6_mcast_count) itf->ipv6_mcast_count -= 1; + if (itf->kind != NET_IFK_LOCALHOST) (void)l2_sync_multicast_filters(itf); return true; } @@ -227,8 +266,7 @@ static bool v4_ip_exists_anywhere(uint32_t ip){ } static bool v4_overlap_intra_l2(uint8_t ifindex, uint32_t ip, uint32_t mask){ - if (!is_power2_mask_contiguous(mask)) return true; - uint32_t n1 = ipv4_net(ip, mask); + if (!ipv4_mask_is_contiguous(mask)) return true; for (int i=0;imode == IPV4_CFG_DISABLED) continue; uint32_t m = (x->mask==0)?mask:((mask==0)?x->mask:((x->mask < mask)?x->mask:mask)); if (ipv4_net(ip, m) == ipv4_net(x->ip, m)) return true; - (void)n1; } return false; } static bool v6_ip_exists_anywhere(const uint8_t ip[16]){ if (ipv6_is_unspecified(ip)) return false; - for (int i=0;il3_v4[s] == NULL) return s; - return -1; -} - -static int alloc_local_slot_v6(l2_interface_t *l2){ - if (!l2) return -1; - for (int s=0; sl3_v6[s] == NULL) return s; - return -1; -} - -static int alloc_global_v4_slot(void){ for (int i=0;iname[0]=='l' && l2->name[1]=='o')) return 0; + if (!ipv4_mask_is_contiguous(mask)) return 0; + if (ipv4_is_loopback(ip) && (l2->kind != NET_IFK_LOCALHOST)) return 0; if (ipv4_is_multicast(ip)) return 0; - if (ipv4_is_reserved_special(ip)) return 0; + if (ipv4_is_reserved_special(ip)) { + if (!(ipv4_is_loopback(ip) && l2->kind == NET_IFK_LOCALHOST)) return 0; + } if (ipv4_is_network_address(ip, mask)) return 0; if (ipv4_is_broadcast_address(ip, mask)) return 0; if (v4_ip_exists_anywhere(ip)) return 0; @@ -304,15 +330,23 @@ uint8_t l3_ipv4_add_to_interface(uint8_t ifindex, uint32_t ip, uint32_t mask, ui } if (l2->ipv4_count >= MAX_IPV4_PER_INTERFACE) return 0; - int loc = alloc_local_slot_v4(l2); - int g = alloc_global_v4_slot(); + int loc = -1; + for (int s=0; sl3_v4[s] == NULL) { + loc = s; + break; + } + int g = -1; + for (int i=0;il2 = l2; n->mode = mode; n->ip = (mode==IPV4_CFG_STATIC) ? ip : 0; @@ -320,37 +354,44 @@ uint8_t l3_ipv4_add_to_interface(uint8_t ifindex, uint32_t ip, uint32_t mask, ui n->gw = (mode==IPV4_CFG_STATIC) ? gw : 0; n->broadcast = (mode==IPV4_CFG_STATIC) ? ipv4_broadcast_calc(ip, mask) : 0; - mem_zero(&n->runtime_opts_v4, sizeof(n->runtime_opts_v4)); - if (runtime_opts) { - n->runtime_opts_v4 = *runtime_opts; - } + memset(&n->runtime_opts_v4, 0, sizeof(n->runtime_opts_v4)); + if (runtime_opts) n->runtime_opts_v4 = *runtime_opts; - n->routing_table = ipv4_rt_create(); - if (!n->routing_table) { - g_v4[g].used = false; - mem_zero(&g_v4[g], sizeof(g_v4[g])); - return 0; + n->routing_table = NULL; + if (l2->kind != NET_IFK_LOCALHOST) { + n->routing_table = ipv4_rt_create(); + if (!n->routing_table) { + g_v4[g].used = false; + memset(&g_v4[g], 0, sizeof(g_v4[g])); + return 0; + } + ipv4_rt_ensure_basics((ipv4_rt_table_t*)n->routing_table, n->ip, n->mask, n->gw, l2->base_metric); } - ipv4_rt_ensure_basics((ipv4_rt_table_t*)n->routing_table, n->ip, n->mask, n->gw, l2->base_metric); - n->is_localhost = (l2->name[0]=='l' && l2->name[1]=='o'); - n->l3_id = make_l3_id(l2->ifindex, (uint8_t)loc); + n->is_localhost = (l2->kind == NET_IFK_LOCALHOST); + n->l3_id = make_l3_id_v4(l2->ifindex, (uint8_t)loc); l2->l3_v4[loc] = n; l2->ipv4_count++; - if (!g_kmem_page) g_kmem_page = palloc(PAGE_SIZE, MEM_PRIV_KERNEL, MEM_RW | MEM_NORM, false); - n->port_manager = (port_manager_t*)kalloc(g_kmem_page, sizeof(port_manager_t), ALIGN_16B, MEM_PRIV_KERNEL); + if (!g_kmem_page_v4) g_kmem_page_v4 = palloc(PAGE_SIZE*1, MEM_PRIV_KERNEL, MEM_RW|MEM_NORM, false); + if (!g_kmem_page_v4) return NULL; + + n->port_manager = (port_manager_t*)kalloc(g_kmem_page_v4, sizeof(port_manager_t), ALIGN_16B, MEM_PRIV_KERNEL); if (!n->port_manager) { l2->l3_v4[loc] = NULL; if (l2->ipv4_count) l2->ipv4_count--; - ipv4_rt_destroy((ipv4_rt_table_t*)n->routing_table); - n->routing_table = NULL; + if (n->routing_table) { + ipv4_rt_destroy((ipv4_rt_table_t*)n->routing_table); + n->routing_table = NULL; + } g_v4[g].used = false; - mem_zero(&g_v4[g], sizeof(g_v4[g])); + memset(&g_v4[g], 0, sizeof(g_v4[g])); return 0; } port_manager_init(n->port_manager); + if (n->mode != IPV4_CFG_DISABLED && n->ip && l2->kind != NET_IFK_LOCALHOST) (void)l2_ipv4_mcast_join(ifindex, IPV4_MCAST_ALL_HOSTS); + return n->l3_id; } @@ -364,10 +405,12 @@ bool l3_ipv4_update(uint8_t l3_id, uint32_t ip, uint32_t mask, uint32_t gw, ipv4 } if (mode == IPV4_CFG_STATIC){ if (ipv4_is_unspecified(ip)) return false; - if (!is_power2_mask_contiguous(mask)) return false; - if (ipv4_is_loopback(ip) && !(l2->name[0]=='l' && l2->name[1]=='o')) return false; + if (!ipv4_mask_is_contiguous(mask)) return false; + if (ipv4_is_loopback(ip)&& (l2->kind != NET_IFK_LOCALHOST)) return false; if (ipv4_is_multicast(ip)) return false; - if (ipv4_is_reserved_special(ip)) return false; + if (ipv4_is_reserved_special(ip)) { + if (!(ipv4_is_loopback(ip) && l2->kind == NET_IFK_LOCALHOST)) return false; + } if (ipv4_is_network_address(ip, mask)) return false; if (ipv4_is_broadcast_address(ip, mask)) return false; if (ip != n->ip && v4_ip_exists_anywhere(ip)) return false; @@ -384,15 +427,14 @@ bool l3_ipv4_update(uint8_t l3_id, uint32_t ip, uint32_t mask, uint32_t gw, ipv4 n->mode = mode; - if (runtime_opts) { - n->runtime_opts_v4 = *runtime_opts; - } + if (runtime_opts) n->runtime_opts_v4 = *runtime_opts; if (mode == IPV4_CFG_STATIC || mode == IPV4_CFG_DHCP) { n->ip = ip; n->mask = mask; n->gw = gw; n->broadcast = ipv4_broadcast_calc(ip, mask); + if (n->ip && l2->kind != NET_IFK_LOCALHOST) (void)l2_ipv4_mcast_join(l2->ifindex, IPV4_MCAST_ALL_HOSTS); } else { n->ip = 0; n->mask = 0; @@ -400,8 +442,15 @@ bool l3_ipv4_update(uint8_t l3_id, uint32_t ip, uint32_t mask, uint32_t gw, ipv4 n->broadcast = 0; } - if (!n->routing_table) n->routing_table = ipv4_rt_create(); - ipv4_rt_sync_basics((ipv4_rt_table_t*)n->routing_table, n->ip, n->mask, n->gw, l2->base_metric); + if (l2->kind != NET_IFK_LOCALHOST) { + if (!n->routing_table) n->routing_table = ipv4_rt_create(); + if (n->routing_table) ipv4_rt_sync_basics((ipv4_rt_table_t*)n->routing_table, n->ip, n->mask, n->gw, l2->base_metric); + } else { + if (n->routing_table) { + ipv4_rt_destroy((ipv4_rt_table_t*)n->routing_table); + n->routing_table = NULL; + } + } return true; } @@ -423,7 +472,7 @@ bool l3_ipv4_remove_from_interface(uint8_t l3_id){ n->port_manager = NULL; } - uint8_t slot = l3_local_slot_from_id(l3_id); + uint8_t slot = l3_slot_from_id(l3_id); if (slot < MAX_IPV4_PER_INTERFACE && l2->l3_v4[slot] == n){ l2->l3_v4[slot] = NULL; if (l2->ipv4_count) l2->ipv4_count--; @@ -435,13 +484,14 @@ bool l3_ipv4_remove_from_interface(uint8_t l3_id){ } g_v4[g].used = false; - mem_zero(&g_v4[g], sizeof(g_v4[g])); + memset(&g_v4[g], 0, sizeof(g_v4[g])); return true; } l3_ipv4_interface_t* l3_ipv4_find_by_id(uint8_t l3_id){ + if (!l3_id || l3_is_v6_from_id(l3_id)) return NULL; uint8_t ifx = l3_ifindex_from_id(l3_id); - uint8_t loc = l3_local_slot_from_id(l3_id); + uint8_t loc = l3_slot_from_id(l3_id); l2_interface_t *l2 = l2_interface_find_by_index(ifx); if (!l2) return NULL; if (loc >= MAX_IPV4_PER_INTERFACE) return NULL; @@ -457,11 +507,25 @@ uint8_t l3_ipv6_add_to_interface(uint8_t ifindex, const uint8_t ip[16], uint8_t if (!l2) return 0; if (prefix_len > 128) return 0; - int i_; int placeholder_ll = 0; - if (ip[0]==0xFE && ip[1]==0x80){ placeholder_ll = 1; for(i_=2;i_<16;i_++){ if (ip[i_]!=0){ placeholder_ll=0; break; } } } + if (ip[0]==0xFE && ip[1]==0x80) { + placeholder_ll = 1; + for(int i_ = 2; i_ < 16; i_++) { + if (ip[i_] != 0) { + placeholder_ll=0; break; + } + } + } int placeholder_gua = 0; - if (ip[0]==0x20 && ip[1]==0x00){ placeholder_gua = 1; for(i_=2;i_<16;i_++){ if (ip[i_]!=0){ placeholder_gua=0; break; } } } + if (ip[0]==0x20 && ip[1]==0x00) { + placeholder_gua = 1; + for(int i_=2;i_<16;i_++) { + if (ip[i_]!=0) { + placeholder_gua=0; + break; + } + } + } if (kind & IPV6_ADDRK_LINK_LOCAL){ if (!(cfg & (IPV6_CFG_SLAAC | IPV6_CFG_DHCPV6))){ @@ -481,7 +545,7 @@ uint8_t l3_ipv6_add_to_interface(uint8_t ifindex, const uint8_t ip[16], uint8_t } if (!ipv6_is_unspecified(ip)){ if (ipv6_is_multicast(ip)) return 0; - if (is_loop && !(l2->name[0]=='l' && l2->name[1]=='o')) return 0; + if (is_loop && (l2->kind != NET_IFK_LOCALHOST)) return 0; if (!is_loop){ if (ipv6_is_ula(ip)) return 0; if (!placeholder_gua){ @@ -505,36 +569,99 @@ uint8_t l3_ipv6_add_to_interface(uint8_t ifindex, const uint8_t ip[16], uint8_t } if (l2->ipv6_count >= MAX_IPV6_PER_INTERFACE) return 0; - int loc = alloc_local_slot_v6(l2); - int g = alloc_global_v6_slot(); + + int loc = -1; + for (int s=0; sl3_v6[s] == NULL) { + loc = s; + break; + } + + int g = -1; + for (int i=0;il2 = l2; n->cfg = cfg; n->kind = kind; - cp16(n->ip, ip); + n->mtu = 1500; + + uint8_t final_ip[16]; + ipv6_cpy(final_ip, ip); + if ((kind & IPV6_ADDRK_LINK_LOCAL) && placeholder_ll){ + ipv6_make_lla_from_mac(ifindex, final_ip); + prefix_len = 64; + } + + ipv6_cpy(n->ip, final_ip); n->prefix_len = prefix_len; - cp16(n->gateway, gw); - n->is_localhost = (l2->name[0]=='l' && l2->name[1]=='o'); - n->l3_id = make_l3_id(l2->ifindex, (uint8_t)loc); + ipv6_cpy(n->gateway, gw); + n->is_localhost = (l2->kind == NET_IFK_LOCALHOST); + n->valid_lifetime = 0; + n->preferred_lifetime = 0; + n->timestamp_created = 0; + memset(n->prefix, 0, sizeof(n->prefix)); + memset(n->interface_id, 0, sizeof(n->interface_id)); + n->dad_probes_sent = 0; + n->dad_timer_ms = 0; + + if (n->is_localhost) { + n->dad_state = IPV6_DAD_OK; + n->dad_requested = 0; + } else { + if (!ipv6_is_unspecified(n->ip) && !ipv6_is_multicast(n->ip) && !ipv6_is_placeholder_gua(n->ip)) { + n->dad_state = IPV6_DAD_NONE; + n->dad_requested = 1; + } else { + n->dad_state = IPV6_DAD_NONE; + n->dad_requested = 0; + } + } + + n->l3_id = make_l3_id_v6(l2->ifindex, (uint8_t)loc); l2->l3_v6[loc] = n; l2->ipv6_count++; - if (!g_kmem_page) g_kmem_page = palloc(PAGE_SIZE, MEM_PRIV_KERNEL, MEM_RW | MEM_NORM, false); - n->port_manager = (port_manager_t*)kalloc(g_kmem_page, sizeof(port_manager_t), ALIGN_16B, MEM_PRIV_KERNEL); + if (!g_kmem_page_v6) g_kmem_page_v6 = palloc(PAGE_SIZE*1, MEM_PRIV_KERNEL, MEM_RW|MEM_NORM, false); + if (!g_kmem_page_v6) return NULL; + + n->port_manager = (port_manager_t*)kalloc(g_kmem_page_v6, sizeof(port_manager_t), ALIGN_16B, MEM_PRIV_KERNEL); if (!n->port_manager){ l2->l3_v6[loc] = NULL; if (l2->ipv6_count) l2->ipv6_count--; g_v6[g].used = false; - mem_zero(&g_v6[g], sizeof(g_v6[g])); + memset(&g_v6[g], 0, sizeof(g_v6[g])); return 0; } port_manager_init(n->port_manager); + if (cfg == IPV6_CFG_DHCPV6){ + uint8_t m[16]; + ipv6_make_multicast(2, IPV6_MCAST_DHCPV6_SERVERS, NULL, m); + (void)l2_ipv6_mcast_join(ifindex, m); + } + n->routing_table = NULL; + if (!n->is_localhost) { + n->routing_table = ipv6_rt_create(); + if (n->routing_table){ + ipv6_rt_ensure_basics((ipv6_rt_table_t*)n->routing_table, n->ip, n->prefix_len, n->gateway, l2->base_metric); + } + + uint8_t m[16]; + ipv6_make_multicast(2, IPV6_MCAST_ALL_NODES, NULL, m); + (void)l2_ipv6_mcast_join(ifindex, m); + + if (n->cfg != IPV6_CFG_DISABLE && !ipv6_is_unspecified(n->ip) && !ipv6_is_placeholder_gua(n->ip)) { + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, n->ip, m); + (void)l2_ipv6_mcast_join(ifindex, m); + } + } return n->l3_id; } @@ -546,6 +673,8 @@ bool l3_ipv6_update(uint8_t l3_id, const uint8_t ip[16], uint8_t prefix_len, con if (!l2) return false; if (prefix_len > 128) return false; + if (kind == n->kind && cfg == n->cfg && prefix_len == n->prefix_len && ipv6_cmp(ip, n->ip) == 0 && ipv6_cmp(gw, n->gateway) == 0) return true; + if ((n->kind & IPV6_ADDRK_LINK_LOCAL) && cfg == IPV6_CFG_DISABLE){ for (int i=0;iip)!=0 && v6_ip_exists_anywhere(ip)) return false; + if (!ipv6_is_unspecified(ip) && ipv6_cmp(ip, n->ip)!=0 && v6_ip_exists_anywhere(ip)) return false; for (int i=0;iip)!=0 && v6_ip_exists_anywhere(ip)) return false; + if (ipv6_is_multicast(ip)) return false; + if (ipv6_is_loopback(ip) && (l2->kind != NET_IFK_LOCALHOST)) return false; + if (ipv6_cmp(ip,n->ip)!=0 && v6_ip_exists_anywhere(ip)) return false; if (v6_overlap_intra_l2(l2->ifindex, ip, prefix_len)){ for (int i=0;iip); + n->cfg = cfg; n->kind = kind; - cp16(n->ip, ip); + uint8_t m[16]; + + ipv6_make_multicast(2, IPV6_MCAST_DHCPV6_SERVERS, NULL, m); + (void)l2_ipv6_mcast_leave(l2->ifindex, m); + + ipv6_make_multicast(2, IPV6_MCAST_ALL_NODES, NULL, m); + (void)l2_ipv6_mcast_leave(l2->ifindex, m); + + ipv6_make_multicast(2, IPV6_MCAST_ALL_ROUTERS, NULL, m); + (void)l2_ipv6_mcast_leave(l2->ifindex, m); + + if (!ipv6_is_unspecified(old_ip) && !ipv6_is_placeholder_gua(old_ip)) { + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, old_ip, m); + (void)l2_ipv6_mcast_leave(l2->ifindex, m); + } + + ipv6_cpy(n->ip, ip); n->prefix_len = prefix_len; - cp16(n->gateway, gw); + ipv6_cpy(n->gateway, gw); + + if (!n->is_localhost) { + if (cfg != IPV6_CFG_DISABLE) { + ipv6_make_multicast(2, IPV6_MCAST_ALL_NODES, NULL, m); + (void)l2_ipv6_mcast_join(l2->ifindex, m); + } + + if (cfg == IPV6_CFG_DHCPV6){ + ipv6_make_multicast(2, IPV6_MCAST_DHCPV6_SERVERS, NULL, m); + (void)l2_ipv6_mcast_join(l2->ifindex, m); + } + + if (cfg == IPV6_CFG_SLAAC){ + ipv6_make_multicast(2, IPV6_MCAST_ALL_ROUTERS, NULL, m); + (void)l2_ipv6_mcast_join(l2->ifindex, m); + } + + if (cfg != IPV6_CFG_DISABLE && !ipv6_is_unspecified(n->ip) && !ipv6_is_placeholder_gua(n->ip)) { + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, n->ip, m); + (void)l2_ipv6_mcast_join(l2->ifindex, m); + } + } + + if (ipv6_cmp(old_ip, n->ip) != 0) { + n->dad_state = IPV6_DAD_NONE; + n->dad_timer_ms = 0; + n->dad_probes_sent = 0; + + if (n->is_localhost) { + n->dad_requested = 0; + n->dad_state = IPV6_DAD_OK; + } else if (ipv6_is_unspecified(n->ip) || ipv6_is_multicast(n->ip) || n->cfg == IPV6_CFG_DISABLE) { + n->dad_requested = 0; + } else { + n->dad_requested = 1; + } + } + + if (!n->is_localhost) { + if (n->dad_requested && !ipv6_is_placeholder_gua(n->ip)) { + uint8_t sn[16]; + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, n->ip, sn); + (void)l2_ipv6_mcast_join(l2->ifindex, sn); + } + + if (!n->routing_table) n->routing_table = ipv6_rt_create(); + if (n->routing_table){ + ipv6_rt_sync_basics((ipv6_rt_table_t*)n->routing_table, n->ip, n->prefix_len, n->gateway, l2->base_metric); + } + } else { + if (n->routing_table) { + ipv6_rt_destroy((ipv6_rt_table_t*)n->routing_table); + n->routing_table = NULL; + } + } + return true; } @@ -631,14 +836,19 @@ bool l3_ipv6_remove_from_interface(uint8_t l3_id){ n->port_manager = NULL; } - uint8_t slot = l3_local_slot_from_id(l3_id); + uint8_t slot = l3_slot_from_id(l3_id); if (slot < MAX_IPV6_PER_INTERFACE && l2->l3_v6[slot] == n){ l2->l3_v6[slot] = NULL; if (l2->ipv6_count) l2->ipv6_count--; } + if (n->routing_table){ + ipv6_rt_destroy((ipv6_rt_table_t*)n->routing_table); + n->routing_table = NULL; + } + g_v6[g].used = false; - mem_zero(&g_v6[g], sizeof(g_v6[g])); + memset(&g_v6[g], 0, sizeof(g_v6[g])); return true; } @@ -646,7 +856,6 @@ bool l3_ipv6_set_enabled(uint8_t l3_id, bool enable){ l3_ipv6_interface_t *n = l3_ipv6_find_by_id(l3_id); if (!n) return false; if (enable){ - if (n->cfg == IPV6_CFG_DISABLE) n->cfg = IPV6_CFG_STATIC; return true; } else { if ((n->kind & IPV6_ADDRK_LINK_LOCAL)){ @@ -659,20 +868,26 @@ bool l3_ipv6_set_enabled(uint8_t l3_id, bool enable){ } } n->cfg = IPV6_CFG_DISABLE; + n->dad_state = IPV6_DAD_NONE; + n->dad_probes_sent = 0; + n->dad_timer_ms = 0; return true; } } l3_ipv6_interface_t* l3_ipv6_find_by_id(uint8_t l3_id){ + if (!l3_id || !l3_is_v6_from_id(l3_id)) return NULL; uint8_t ifx = l3_ifindex_from_id(l3_id); - uint8_t loc = l3_local_slot_from_id(l3_id); + uint8_t loc = l3_slot_from_id(l3_id); l2_interface_t *l2 = l2_interface_find_by_index(ifx); if (!l2) return NULL; if (loc >= MAX_IPV6_PER_INTERFACE) return NULL; return l2->l3_v6[loc]; } l3_ipv6_interface_t* l3_ipv6_find_by_ip(const uint8_t ip[16]){ - for (int i=0;iifindex, loop6, 128, zero16, IPV6_CFG_STATIC, IPV6_ADDRK_GLOBAL); + + uint8_t multi[16]; + ipv6_make_multicast(2, IPV6_MCAST_ALL_NODES, loop6, multi); + (void)l2_ipv6_mcast_join(lo->ifindex, multi); + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, loop6, multi); + (void)l2_ipv6_mcast_join(lo->ifindex, multi); } +//TODO: add autoconfig settings/policy void ifmgr_autoconfig_l2(uint8_t ifindex){ l2_interface_t *l2 = l2_interface_find_by_index(ifindex); if (!l2) return; - if (l2->name[0]=='l' && l2->name[1]=='o' && l2->name[2]=='0' && l2->name[3]==0){ - return; - } + if (l2->kind == NET_IFK_LOCALHOST) return; if (l2->ipv4_count == 0){ (void)l3_ipv4_add_to_interface(ifindex, 0, 0, 0, IPV4_CFG_DHCP, NULL); } bool has_lla=false; + bool has_gua=false; + for (int i=0;il2 || x->l2->ifindex != ifindex) continue; - if (ipv6_is_linklocal(x->ip) && x->cfg != IPV6_CFG_DISABLE){ has_lla=true; break; } + + if (!has_lla) if (ipv6_is_linklocal(x->ip) && x->cfg != IPV6_CFG_DISABLE) has_lla=true; + + if (!has_gua) { + if ((x->kind == IPV6_ADDRK_GLOBAL) && x->cfg != IPV6_CFG_DISABLE) has_gua=true; + else if ((x->kind == IPV6_ADDRK_GLOBAL) && ipv6_is_placeholder_gua(x->ip)) has_gua=true; + } + + if (has_lla && has_gua) break; } if (!has_lla){ - uint8_t fe80_0[16]={0}; fe80_0[0]=0xFE; fe80_0[1]=0x80; - uint8_t zero16[16]={0}; - (void)l3_ipv6_add_to_interface(ifindex, fe80_0, 64, zero16, IPV6_CFG_SLAAC, IPV6_ADDRK_LINK_LOCAL); - } + uint8_t lla[16]; + uint8_t zero16[16] = {0}; - bool has_gua=false; - for (int i=0;il2 || x->l2->ifindex != ifindex) continue; - if (!ipv6_is_linklocal(x->ip) && x->cfg != IPV6_CFG_DISABLE){ has_gua=true; break; } + ipv6_make_lla_from_mac(ifindex, lla); + (void)l3_ipv6_add_to_interface(ifindex, lla, 64, zero16, IPV6_CFG_SLAAC, IPV6_ADDRK_LINK_LOCAL); + + uint8_t m[16]; + ipv6_make_multicast(2, IPV6_MCAST_ALL_NODES, lla, m); + (void)l2_ipv6_mcast_join(ifindex, m); } - if (!has_gua){ - uint8_t g2000_0[16]={0}; g2000_0[0]=0x20; g2000_0[1]=0x00; + + if (!has_gua) { + uint8_t ph[16]; uint8_t zero16[16]={0}; - (void)l3_ipv6_add_to_interface(ifindex, g2000_0, 64, zero16, IPV6_CFG_SLAAC, IPV6_ADDRK_GLOBAL); - } - //TODO: add autoconfig settings/policy + ipv6_make_placeholder_gua(ph); + (void)l3_ipv6_add_to_interface(ifindex, ph, 64, zero16, IPV6_CFG_SLAAC, IPV6_ADDRK_GLOBAL); + } } void ifmgr_autoconfig_all_l2(void){ @@ -778,20 +1014,68 @@ ip_resolution_result_t resolve_ipv4_to_interface(uint32_t dst_ip){ return r; } -ip_resolution_result_t resolve_ipv6_to_interface(const uint8_t dst_ip[16]){ - ip_resolution_result_t r; r.found=false; r.ipv4=NULL; r.ipv6=NULL; r.l2=NULL; - int best = -1; - for (int i=0;il2) continue; if (x->cfg == IPV6_CFG_DISABLE) continue; if (ipv6_is_unspecified(x->ip)) continue; - int match = prefix_match(dst_ip, x->ip); - if (match >= x->prefix_len && match > best){ best = match; r.found=true; r.ipv6=x; r.l2=x->l2; } + + int src_is_ll = ipv6_is_linklocal(x->ip); + + if (dst_is_ll != src_is_ll) + continue; + + int pl_conn = -1; + int match = ipv6_common_prefix_len(dst_ip, x->ip); + if (match >= x->prefix_len) pl_conn = x->prefix_len; + + int pl_tab = -1; + uint16_t met_tab = 0x7FFF; + uint8_t via[16] = {0}; + + if (x->routing_table) { + int out_pl = -1; + int out_met = 0x7FFF; + if (ipv6_rt_lookup_in((const ipv6_rt_table_t*)x->routing_table,dst_ip, via, &out_pl, &out_met)) + { + pl_tab = out_pl; + met_tab = out_met; + } + } + + int cand_pl = pl_conn; + uint16_t cand_cost = x->l2->base_metric; + + if (pl_tab > cand_pl || (pl_tab == cand_pl && (x->l2->base_metric + met_tab) < cand_cost)) { + cand_pl = pl_tab; + cand_cost = x->l2->base_metric + met_tab; + } + + if (cand_pl > best_pl || (cand_pl == best_pl && cand_cost < best_cost)) { + best_pl = cand_pl; + best_cost = cand_cost; + r.found = true; + r.ipv6 = x; + r.l2 = x->l2; + } + } + if (best_pl < 0) { + r.found = false; + r.ipv6 = NULL; + r.l2 = NULL; } - return r; -} -bool check_ipv4_overlap(uint32_t new_ip, uint32_t mask, uint8_t ifindex){ return v4_overlap_intra_l2(ifindex, new_ip, mask); } -bool check_ipv6_overlap(const uint8_t new_ip[16], uint8_t prefix_len, uint8_t ifindex){ return v6_overlap_intra_l2(ifindex, new_ip, prefix_len); } + return r; +} \ No newline at end of file diff --git a/kernel/networking/interface_manager.h b/kernel/networking/interface_manager.h index 7247d4c5..7786b16d 100644 --- a/kernel/networking/interface_manager.h +++ b/kernel/networking/interface_manager.h @@ -7,7 +7,7 @@ extern "C" { #endif -#define MAX_L2_INTERFACES 16 +#define MAX_L2_INTERFACES 15 #define MAX_IPV4_PER_INTERFACE 4 #define MAX_IPV6_PER_INTERFACE 4 #define MAX_IPV4_MCAST_PER_INTERFACE 12 @@ -19,6 +19,13 @@ typedef enum { IPV4_CFG_STATIC = 1 } ipv4_cfg_t; +typedef enum { + IPV6_DAD_NONE = 0, + IPV6_DAD_IN_PROGRESS = 1, + IPV6_DAD_FAILED = 2, + IPV6_DAD_OK = 3 +} ipv6_dad_state_t; + typedef enum { IPV6_ADDRK_GLOBAL = 0x01, IPV6_ADDRK_LINK_LOCAL = 0x02 @@ -28,7 +35,7 @@ typedef enum { IPV6_CFG_DISABLE = -1, IPV6_CFG_STATIC = 0x01, IPV6_CFG_SLAAC = 0x02, - IPV6_CFG_DHCPV6 = 0x04 + IPV6_CFG_DHCPV6 = 0x04 } ipv6_cfg_t; struct l2_interface; @@ -52,6 +59,7 @@ typedef struct l2_interface { char name[16]; bool is_up; uint16_t base_metric; + uint8_t kind; void *driver_context; void *arp_table; void *nd_table; @@ -79,8 +87,31 @@ typedef struct l3_ipv4_interface { l2_interface_t *l2; } l3_ipv4_interface_t; +typedef struct net_runtime_opts_v6 { + uint32_t t1; + uint32_t t2; + + uint8_t dns[2][16]; + uint8_t ntp[2][16]; + + uint32_t iaid; + + uint32_t lease; + uint32_t lease_start_time; + + uint16_t server_id_len; + uint8_t server_id[128]; + + uint8_t pd_prefix[16]; + uint8_t pd_prefix_len; + uint32_t pd_preferred_lft; + uint32_t pd_valid_lft; +} net_runtime_opts_v6_t; + typedef struct l3_ipv6_interface { uint8_t l3_id; + uint16_t mtu; + uint8_t ip[16]; uint8_t prefix_len; uint8_t gateway[16]; @@ -92,8 +123,23 @@ typedef struct l3_ipv6_interface { uint32_t timestamp_created; uint8_t prefix[16]; uint8_t interface_id[8]; + uint8_t dad_requested; + ipv6_dad_state_t dad_state; + uint8_t dad_probes_sent; + uint32_t dad_timer_ms; + void *routing_table; port_manager_t *port_manager; l2_interface_t *l2; + uint8_t ra_has; + uint8_t ra_autonomous; + uint8_t ra_is_default; + uint8_t ra_flags; + uint8_t dhcpv6_stateless; + uint8_t dhcpv6_stateless_done; + uint32_t ra_last_update_ms; + + net_runtime_opts_v6_t runtime_opts_v6; + uint8_t dhcpv6_state; } l3_ipv6_interface_t; typedef struct ip_resolution_result { @@ -103,7 +149,7 @@ typedef struct ip_resolution_result { l2_interface_t *l2; } ip_resolution_result_t; -uint8_t l2_interface_create(const char *name, void *driver_ctx, uint16_t base_metric); +uint8_t l2_interface_create(const char *name, void *driver_ctx, uint16_t base_metric, uint8_t kind); bool l2_interface_destroy(uint8_t ifindex); l2_interface_t *l2_interface_find_by_index(uint8_t ifindex); uint8_t l2_interface_count(void); @@ -137,13 +183,6 @@ void ifmgr_autoconfig_l2(uint8_t ifindex); ip_resolution_result_t resolve_ipv4_to_interface(uint32_t dst_ip); ip_resolution_result_t resolve_ipv6_to_interface(const uint8_t dst_ip[16]); -bool check_ipv4_overlap(uint32_t new_ip, uint32_t mask, uint8_t ifindex); -bool check_ipv6_overlap(const uint8_t new_ip[16], uint8_t prefix_len, uint8_t ifindex); - -static inline uint32_t ipv4_net(uint32_t ip, uint32_t mask){ return ip & mask; } -static inline uint32_t ipv4_broadcast_calc(uint32_t ip, uint32_t mask){ return (mask==0)?0:((ip & mask) | ~mask); } - - static inline port_manager_t* ifmgr_pm_v4(uint8_t l3_id){ l3_ipv4_interface_t* n = l3_ipv4_find_by_id(l3_id); return n ? n->port_manager : NULL; @@ -153,6 +192,12 @@ static inline port_manager_t* ifmgr_pm_v6(uint8_t l3_id){ return n ? n->port_manager : NULL; } +static inline uint8_t make_l3_id_v4(uint8_t ifindex, uint8_t local_slot){ return (uint8_t)((ifindex<<4) | (local_slot & 0x03)); } +static inline uint8_t make_l3_id_v6(uint8_t ifindex, uint8_t local_slot){ return (uint8_t)((ifindex<<4) | 0x08 | (local_slot & 0x03)); } +static inline uint8_t l3_ifindex_from_id(uint8_t l3_id){ return (uint8_t)((l3_id >> 4) & 0x0F); } +static inline uint8_t l3_is_v6_from_id(uint8_t l3_id){ return (uint8_t)((l3_id & 0x08) ? 1 : 0); } +static inline uint8_t l3_slot_from_id(uint8_t l3_id){ return (uint8_t)(l3_id & 0x03); } + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/kernel/networking/internet_layer/icmp.c b/kernel/networking/internet_layer/icmp.c index 9a83503b..fc4a91fc 100644 --- a/kernel/networking/internet_layer/icmp.c +++ b/kernel/networking/internet_layer/icmp.c @@ -68,7 +68,7 @@ static uintptr_t build_echo(uint16_t id, uint16_t seq, const uint8_t* payload, u memset(pkt->payload, 0, 56); if (payload && pay_len) memcpy(pkt->payload, payload, (pay_len > 56 ? 56 : pay_len)); pkt->checksum = 0; - pkt->checksum = checksum16((uint16_t*)pkt, len); + pkt->checksum = checksum16((uint16_t*)pkt, (len+1)/2); return buf; } @@ -99,8 +99,23 @@ bool icmp_ping(uint32_t dst_ip, uint16_t id, uint16_t seq, uint32_t timeout_ms, return false; } - ipv4_send_packet(dst_ip, 1, (sizedptr){buf, tot_len}, (const ipv4_tx_opts_t*)tx_opts_or_null, (uint8_t)ttl); + uint32_t headroom = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)sizeof(ipv4_hdr_t); + netpkt_t* pkt = netpkt_alloc(tot_len, headroom, 0); + if (!pkt) { + free_sized((void*)buf, 8 + 56); + g_pending[slot].in_use = false; + return false; + } + void* p = netpkt_put(pkt, tot_len); + if (!p) { + netpkt_unref(pkt); + free_sized((void*)buf, 8 + 56); + g_pending[slot].in_use = false; + return false; + } + memcpy(p, (const void*)buf, tot_len); free_sized((void*)buf, 8 + 56); + ipv4_send_packet(dst_ip, 1, pkt, (const ipv4_tx_opts_t*)tx_opts_or_null, (uint8_t)ttl, 0); uint32_t start = (uint32_t)get_time(); for (;;) { @@ -159,7 +174,7 @@ void icmp_input(uintptr_t ptr, uint32_t len, uint32_t src_ip, uint32_t dst_ip) { icmp_packet* pkt = (icmp_packet*)ptr; uint16_t recv_ck = pkt->checksum; pkt->checksum = 0; - uint16_t calc = checksum16((uint16_t*)pkt, len); + uint16_t calc = checksum16((uint16_t*)pkt, (len+1)/2); pkt->checksum = recv_ck; if (calc != recv_ck) return; @@ -182,12 +197,22 @@ void icmp_input(uintptr_t ptr, uint32_t len, uint32_t src_ip, uint32_t dst_ip) { if (pay) memcpy(rp->payload, pkt->payload, pay); rp->checksum = 0; uint32_t rlen = 8 + pay; - rp->checksum = checksum16((uint16_t*)rp, rlen); + rp->checksum = checksum16((uint16_t*)rp, (rlen+1)/2); l3_ipv4_interface_t* l3 = l3_ipv4_find_by_ip(dst_ip); if (l3 && l3->l2) { - ipv4_tx_opts_t o = {.index = l3->l3_id, .scope = IPV4_TX_BOUND_L3}; - ipv4_send_packet(src_ip, 1, (sizedptr){buf, rlen}, &o, IP_TTL_DEFAULT); + ipv4_tx_opts_t o = {.index = l3->l3_id, .scope = IP_TX_BOUND_L3}; + uint32_t headroom = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)sizeof(ipv4_hdr_t); + netpkt_t* pkt = netpkt_alloc(rlen, headroom, 0); + if (pkt) { + void* p = netpkt_put(pkt, rlen); + if (p) { + memcpy(p, (const void*)buf, rlen); + ipv4_send_packet(src_ip, 1, pkt, &o, IP_TTL_DEFAULT, 0); + } else { + netpkt_unref(pkt); + } + } } free_sized((void*)buf, 8 + 56); return; diff --git a/kernel/networking/internet_layer/icmpv6.c b/kernel/networking/internet_layer/icmpv6.c new file mode 100644 index 00000000..4b1ca415 --- /dev/null +++ b/kernel/networking/internet_layer/icmpv6.c @@ -0,0 +1,314 @@ +#include "icmpv6.h" +#include "std/memory.h" +#include "net/checksums.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/internet_layer/ipv6_route.h" +#include "networking/link_layer/eth.h" +#include "networking/link_layer/ndp.h" +#include "networking/internet_layer/mld.h" +#include "syscalls/syscalls.h" + +#define MAX_PENDING 16 + +typedef struct __attribute__((packed)) { + icmpv6_hdr_t hdr; + uint16_t id; + uint16_t seq; +} icmpv6_echo_t; + +typedef struct { + bool in_use; + uint16_t id; + uint16_t seq; + bool received; + uint8_t rx_type; + uint8_t rx_code; + uint32_t start_ms; + uint32_t end_ms; + uint8_t rx_src_ip[16]; +} ping6_slot_t; + +static ping6_slot_t g_pending[MAX_PENDING] = {0}; + +static int alloc_slot(uint16_t id, uint16_t seq) { + for (int i = 0; i < MAX_PENDING; i++) { + if (!g_pending[i].in_use) { + g_pending[i].in_use = true; + g_pending[i].id = id; + g_pending[i].seq = seq; + g_pending[i].received = false; + g_pending[i].rx_type = 0xFF; + g_pending[i].rx_code = 0xFF; + g_pending[i].start_ms = (uint32_t)get_time(); + g_pending[i].end_ms = 0; + memset(g_pending[i].rx_src_ip, 0, 16); + return i; + } + } + return -1; +} + +static void mark_received(uint16_t id, uint16_t seq, uint8_t type, uint8_t code, const uint8_t src_ip[16]) { + for (int i = 0; i < MAX_PENDING; i++) { + if (g_pending[i].in_use && g_pending[i].id == id && g_pending[i].seq == seq) { + g_pending[i].received = true; + g_pending[i].rx_type = type; + g_pending[i].rx_code = code; + g_pending[i].end_ms = (uint32_t)get_time(); + if (src_ip) memcpy(g_pending[i].rx_src_ip, src_ip, 16); + return; + } + } +} + +bool icmpv6_send_on_l2(uint8_t ifindex, const uint8_t dst_ip[16], const uint8_t src_ip[16], const uint8_t dst_mac[6], const void *icmp, uint32_t icmp_len, uint8_t hop_limit) { + if (!ifindex || !dst_ip || !src_ip || !dst_mac || !icmp || !icmp_len) return false; + + uint32_t total = (uint32_t)sizeof(ipv6_hdr_t) + icmp_len; + netpkt_t* pkt = netpkt_alloc(total, (uint32_t)sizeof(eth_hdr_t), 0); + if (!pkt) return false; + void* buf = netpkt_put(pkt, total); + if (!buf) { + netpkt_unref(pkt); + return false; + } + + ipv6_hdr_t *ip6 = (ipv6_hdr_t*)buf; + ip6->ver_tc_fl = bswap32((uint32_t)(6u << 28)); + ip6->payload_len = bswap16((uint16_t)icmp_len); + ip6->next_header = 58; + ip6->hop_limit = hop_limit ? hop_limit : 64; + memcpy(ip6->src, src_ip, 16); + memcpy(ip6->dst, dst_ip, 16); + + memcpy((void*)((uintptr_t)buf + sizeof(ipv6_hdr_t)), icmp, icmp_len); + + return eth_send_frame_on(ifindex, ETHERTYPE_IPV6, dst_mac, pkt); +} + +static bool icmpv6_send_echo_reply(uint16_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], const uint8_t *icmp, uint32_t icmp_len, const uint8_t src_mac[6], uint8_t hop_limit) { + if (!dst_ip || !icmp || icmp_len < sizeof(icmpv6_echo_t)) return false; + + uintptr_t buf = (uintptr_t)malloc(icmp_len); + if (!buf) return false; + + memcpy((void*)buf, icmp, icmp_len); + + icmpv6_echo_t *e = (icmpv6_echo_t*)buf; + e->hdr.type = ICMPV6_ECHO_REPLY; + e->hdr.code = 0; + e->hdr.checksum = 0; + + ipv6_tx_plan_t plan; + if (!ipv6_build_tx_plan(dst_ip, 0 ,0, 0, &plan)) { + free_sized((void*)buf, icmp_len); + return false; + } + + e->hdr.checksum = bswap16(checksum16_pipv6(dst_ip, src_ip, 58, (const uint8_t*)buf, icmp_len)); + + icmpv6_send_on_l2(ifindex, src_ip, dst_ip, src_mac, (const void*)buf, icmp_len, hop_limit ? hop_limit : 64); + + free_sized((void*)buf, icmp_len); + return true; +} + +static bool icmpv6_send_echo_request(const uint8_t dst_ip[16], uint16_t id, uint16_t seq, const void *payload, uint32_t payload_len, const void *tx_opts_or_null, uint8_t hop_limit) { + if (!dst_ip) return false; + + uint32_t len = (uint32_t)sizeof(icmpv6_echo_t) + payload_len; + uint32_t headroom = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)sizeof(ipv6_hdr_t); + netpkt_t* pkt = netpkt_alloc(len, headroom, 0); + if (!pkt) return false; + void* buf = netpkt_put(pkt, len); + if (!buf) { + netpkt_unref(pkt); + return false; + } + + icmpv6_echo_t *e = (icmpv6_echo_t*)buf; + e->hdr.type = ICMPV6_ECHO_REQUEST; + e->hdr.code = 0; + e->hdr.checksum = 0; + e->id = bswap16(id); + e->seq = bswap16(seq); + + if (payload_len) memcpy((void*)((uintptr_t)buf + sizeof(icmpv6_echo_t)), payload, payload_len); + + ipv6_tx_plan_t plan; + if (!ipv6_build_tx_plan(dst_ip, tx_opts_or_null, 0, 0, &plan)) { + netpkt_unref(pkt); + return false; + } + e->hdr.checksum = bswap16(checksum16_pipv6(plan.src_ip, dst_ip, 58, (const uint8_t*)buf, len)); + + ipv6_send_packet(dst_ip, 58, pkt, (const ipv6_tx_opts_t*)tx_opts_or_null, hop_limit ? hop_limit : 64, 0); + return true; +} + +bool icmpv6_ping(const uint8_t dst_ip[16], uint16_t id, uint16_t seq, uint32_t timeout_ms, const void *tx_opts_or_null, uint8_t hop_limit, ping6_result_t *out) { + int slot = alloc_slot(id, seq); + if (slot < 0) { + if (out) { + out->rtt_ms = 0; + out->status = PING_UNKNOWN_ERROR; + out->icmp_type = 0xFF; + out->icmp_code = 0xFF; + memset(out->responder_ip, 0, 16); + } + return false; + } + + uint8_t payload[32]; + memset(payload, 0, sizeof(payload)); + + if (!icmpv6_send_echo_request(dst_ip, id, seq, payload, sizeof(payload), tx_opts_or_null, hop_limit)) { + if (out) { + out->rtt_ms = 0; + out->status = PING_UNKNOWN_ERROR; + out->icmp_type = 0xFF; + out->icmp_code = 0xFF; + memset(out->responder_ip, 0, 16); + } + g_pending[slot].in_use =false; + return false; + } + + uint32_t start = (uint32_t)get_time(); + for (;;) { + if (g_pending[slot].received) { + if (out) { + out->icmp_type = g_pending[slot].rx_type; + out->icmp_code = g_pending[slot].rx_code; + memcpy(out->responder_ip, g_pending[slot].rx_src_ip, 16); + + switch (g_pending[slot].rx_type) { + case ICMPV6_ECHO_REPLY: + out->status = PING_OK; + break; + case ICMPV6_DEST_UNREACH: + switch (g_pending[slot].rx_code) { + case 0: out->status = PING_NET_UNREACH; break; + case 1: out->status = PING_ADMIN_PROHIBITED; break; + case 2: out->status = PING_ADMIN_PROHIBITED; break; + case 3: out->status = PING_HOST_UNREACH; break; + case 4: out->status = PING_PORT_UNREACH; break; + default: out->status = PING_UNKNOWN_ERROR; break; + } + break; + case ICMPV6_PACKET_TOO_BIG: + out->status = PING_FRAG_NEEDED; + break; + case ICMPV6_TIME_EXCEEDED: + out->status = PING_TTL_EXPIRED; + break; + case ICMPV6_PARAM_PROBLEM: + out->status = PING_PARAM_PROBLEM; + break; + default: + out->status = PING_UNKNOWN_ERROR; + break; + } + + if (g_pending[slot].end_ms >= g_pending[slot].start_ms) out->rtt_ms = g_pending[slot].end_ms - g_pending[slot].start_ms; + else out->rtt_ms = 0; + } + + bool ok = (g_pending[slot].rx_type == ICMPV6_ECHO_REPLY); + g_pending[slot].in_use = false; + return ok; + } + + uint32_t now = (uint32_t)get_time(); + if (now - start >= timeout_ms) break; + msleep(5); + } + + if (out) { + out->rtt_ms = 0; + out->status = PING_TIMEOUT; + out->icmp_type = 0xFF; + out->icmp_code = 0xFF; + memset(out->responder_ip, 0, 16); + } + + g_pending[slot].in_use = false; + return false; +} + +static bool extract_echo_id_seq_from_error(const uint8_t *icmp, uint32_t icmp_len, uint16_t *out_id, uint16_t *out_seq) {//b + if (!icmp || icmp_len < 8u + (uint32_t)sizeof(ipv6_hdr_t) + (uint32_t)sizeof(icmpv6_echo_t)) return false; + + const ipv6_hdr_t *inner = (const ipv6_hdr_t*)(icmp + 8); + uint32_t v = bswap32(inner->ver_tc_fl); + if ((v >>28) != 6) return false; + if (inner->next_header != 58) return false; + + const uint8_t *inner_icmp = (const uint8_t*)(inner + 1); + if ((uintptr_t)inner_icmp + sizeof(icmpv6_echo_t)>(uintptr_t)icmp + icmp_len) return false; + + const icmpv6_echo_t *e = (const icmpv6_echo_t*)inner_icmp; + if (e->hdr.type != ICMPV6_ECHO_REQUEST) return false; + + if (out_id) *out_id = bswap16(e->id); + if (out_seq) *out_seq = bswap16(e->seq); + return true; +} + +void icmpv6_input(uint16_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], uint8_t hop_limit, const uint8_t src_mac[6], const uint8_t *icmp, uint32_t icmp_len) { + if (!ifindex || !src_ip || !dst_ip || !icmp || icmp_len < sizeof(icmpv6_hdr_t)) return; + + const icmpv6_hdr_t *h = (const icmpv6_hdr_t*)icmp; + if (h->code != 0 && (h->type == ICMPV6_ECHO_REQUEST || h->type == ICMPV6_ECHO_REPLY)) return; + + uint16_t calc = bswap16(checksum16_pipv6(src_ip, dst_ip, 58, icmp, icmp_len)); + if (calc != 0) return; + + if ((h->type == 133 || h->type == 134 || h->type == 135 || h->type == 136 || h->type == 137) && hop_limit != 255) return; + if (h->type == 130 || h->type == 131 || h->type == 132 || h->type == 143) { + mld_input((uint8_t)ifindex, src_ip, dst_ip, icmp, icmp_len); + return; + } + + + if (h->type == ICMPV6_ECHO_REQUEST) { + icmpv6_send_echo_reply(ifindex, src_ip, dst_ip, icmp, icmp_len, src_mac, hop_limit); + return; + } + + if (h->type == ICMPV6_ECHO_REPLY) { + if (icmp_len < sizeof(icmpv6_echo_t)) return; + const icmpv6_echo_t *e = (const icmpv6_echo_t*)icmp; + mark_received(bswap16(e->id), bswap16(e->seq), h->type, h->code, src_ip); + return; + } + + if (h->type == 133 || h->type == 134 || h->type == 135 || h->type == 136 || h->type == 137) { + ndp_input(ifindex, src_ip, dst_ip, src_mac, icmp, icmp_len); + return; + } + + if (h->type == ICMPV6_PACKET_TOO_BIG) { + + if (icmp_len >= 8u + (uint32_t)sizeof(ipv6_hdr_t)) { + uint32_t mtu = bswap32(*(const uint32_t *)(icmp + 4)); + const ipv6_hdr_t *inner = (const ipv6_hdr_t *)(icmp + 8); + uint32_t v = bswap32(inner->ver_tc_fl); + + if ((v >> 28) == 6 && mtu >= 1280u && mtu <= 65535u) + ipv6_pmtu_note(inner->dst, (uint16_t)mtu); + + uint16_t id = 0, seq = 0; + if (extract_echo_id_seq_from_error(icmp, icmp_len, &id, &seq)) + mark_received(id, seq, h->type, h->code, src_ip); + } + return; + } + + if (h->type == ICMPV6_DEST_UNREACH || h->type == ICMPV6_TIME_EXCEEDED || h->type == ICMPV6_PARAM_PROBLEM) { + uint16_t id = 0, seq = 0; + if (extract_echo_id_seq_from_error(icmp, icmp_len, &id, &seq)) mark_received(id, seq, h->type, h->code, src_ip); + return; + } +} \ No newline at end of file diff --git a/kernel/networking/internet_layer/icmpv6.h b/kernel/networking/internet_layer/icmpv6.h new file mode 100644 index 00000000..58c54ca4 --- /dev/null +++ b/kernel/networking/internet_layer/icmpv6.h @@ -0,0 +1,39 @@ +#pragma once + +#include "types.h" +#include "net/network_types.h" +#include "networking/internet_layer/icmp.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ICMPV6_DEST_UNREACH 1 +#define ICMPV6_PACKET_TOO_BIG 2 +#define ICMPV6_TIME_EXCEEDED 3 +#define ICMPV6_PARAM_PROBLEM 4 +#define ICMPV6_ECHO_REQUEST 128 +#define ICMPV6_ECHO_REPLY 129 + +typedef struct __attribute__((packed)) { + uint8_t type; + uint8_t code; + uint16_t checksum; +} icmpv6_hdr_t; + +typedef struct { + uint32_t rtt_ms; + uint8_t status; + uint8_t icmp_type; + uint8_t icmp_code; + uint8_t _pad; + uint8_t responder_ip[16]; +} ping6_result_t; + +void icmpv6_input(uint16_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], uint8_t hop_limit, const uint8_t src_mac[6], const uint8_t *icmp, uint32_t icmp_len); +bool icmpv6_ping(const uint8_t dst_ip[16], uint16_t id, uint16_t seq, uint32_t timeout_ms, const void *tx_opts_or_null, uint8_t hop_limit, ping6_result_t *out); +bool icmpv6_send_on_l2(uint8_t ifindex, const uint8_t dst_ip[16], const uint8_t src_ip[16], const uint8_t dst_mac[6], const void *icmp, uint32_t icmp_len, uint8_t hop_limit); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/internet_layer/igmp.c b/kernel/networking/internet_layer/igmp.c new file mode 100644 index 00000000..3e67c1e9 --- /dev/null +++ b/kernel/networking/internet_layer/igmp.c @@ -0,0 +1,234 @@ +#include "igmp.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "net/checksums.h" +#include "networking/interface_manager.h" +#include "kernel_processes/kprocess_loader.h" +#include "math/rng.h" +#include "std/memory.h" +#include "std/string.h" +#include "syscalls/syscalls.h" + +#define IGMP_TYPE_QUERY 0x11 +#define IGMP_TYPE_V2_REPORT 0x16 +#define IGMP_TYPE_V2_LEAVE 0x17 + +typedef struct __attribute__((packed)) igmp_hdr_t { + uint8_t type; + uint8_t max_resp_time; + uint16_t checksum; + uint32_t group; +} igmp_hdr_t; + +typedef struct { + uint8_t used; + uint8_t ifindex; + uint32_t group; + uint32_t refresh_ms; + uint32_t query_due_ms; + uint8_t query_pending; +} igmp_state_t; + +static volatile int igmp_daemon_running = 0; +static uint32_t igmp_uptime_ms = 0; +static rng_t igmp_rng; +static int igmp_rng_inited = 0; + +#define IGMP_MAX_TRACK 64 +#define IGMP_REFRESH_PERIOD_MS 60000 + +static igmp_state_t igmp_states[IGMP_MAX_TRACK]; + +static bool send_igmp(uint8_t ifindex, uint32_t dst, uint8_t type, uint32_t group) { + uint32_t headroom = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)sizeof(ipv4_hdr_t); + netpkt_t* pkt = netpkt_alloc(sizeof(igmp_hdr_t),headroom, 0); + if (!pkt) return false; + + igmp_hdr_t* h = (igmp_hdr_t*)netpkt_put(pkt, sizeof(igmp_hdr_t)); + if (!h) { + netpkt_unref(pkt); + return false; + } + + h->type = type; + h->max_resp_time = 0; + h->group = bswap32(group); + h->checksum = 0; + h->checksum = checksum16((const uint16_t*)h, sizeof(igmp_hdr_t)/2); + + ipv4_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L2; + tx.index = ifindex; + + ipv4_send_packet(dst, 2, pkt, &tx, 1, 0); + return true; +} + +static igmp_state_t* igmp_find_state(uint8_t ifindex, uint32_t group) { + for (int i = 0; i < IGMP_MAX_TRACK; ++i) { + igmp_state_t* s = &igmp_states[i]; + if (!s->used) continue; + if (s->ifindex == ifindex &&s->group == group) return s; + } + return 0; +} + +static igmp_state_t* igmp_get_state(uint8_t ifindex, uint32_t group) { + igmp_state_t* s = igmp_find_state(ifindex, group); + if (s) return s; + for (int i = 0; i < IGMP_MAX_TRACK; ++i) { + if (!igmp_states[i].used) { + igmp_states[i].used = 1; + igmp_states[i].ifindex = ifindex; + igmp_states[i].group = group; + igmp_states[i].refresh_ms = 0; + igmp_states[i].query_due_ms = 0; + igmp_states[i].query_pending = 0; + return &igmp_states[i]; + } + } + return 0; +} + +static int igmp_has_pending_timers(void) { + for (int i = 0; i < IGMP_MAX_TRACK; ++i) { + igmp_state_t* s = &igmp_states[i]; + if (!s->used) continue; + if (s->query_pending) return 1; + if (s->refresh_ms < IGMP_REFRESH_PERIOD_MS) return 1; + } + return 0; +} + +static int igmp_daemon_entry(int argc, char* argv[]) { + (void)argc; + (void)argv; + + igmp_daemon_running = 1; + + if (!igmp_rng_inited) { + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&igmp_rng, virt_timer); + igmp_rng_inited = 1; + } + + const uint32_t tick_ms = 100; + + while (igmp_has_pending_timers()) { + igmp_uptime_ms += tick_ms; + + for (int i = 0; i < IGMP_MAX_TRACK; ++i) { + igmp_state_t* s = &igmp_states[i]; + if (!s->used) continue; + + l2_interface_t* l2 = l2_interface_find_by_index(s->ifindex); + bool still_joined = false; + if (l2) { + for (int j = 0; j < (int)l2->ipv4_mcast_count; ++j) { + if (l2->ipv4_mcast[j] == s->group) { + still_joined = true; + break; + } + } + } + if (!still_joined) { + s->used = 0; + continue; + } + + s->refresh_ms+= tick_ms; + if (s->refresh_ms>= IGMP_REFRESH_PERIOD_MS) { + s->refresh_ms = 0; + (void)send_igmp(s->ifindex, s->group, IGMP_TYPE_V2_REPORT, s->group); + } + + if (s->query_pending && igmp_uptime_ms >= s->query_due_ms) { + s->query_pending = 0; + (void)send_igmp(s->ifindex, s->group, IGMP_TYPE_V2_REPORT, s->group); + } + } + msleep(tick_ms); + } + + igmp_daemon_running = 0; + return 0; +} + +static void igmp_daemon_kick(void) { + if (igmp_daemon_running) return; + if (!igmp_has_pending_timers()) return; + create_kernel_process("igmp_daemon", igmp_daemon_entry, 0, 0); +} + +bool igmp_send_join(uint8_t ifindex, uint32_t group) { + if (!ipv4_is_multicast(group)) return false; + igmp_state_t* s = igmp_get_state(ifindex, group); + if (s) s->refresh_ms = 0; + igmp_daemon_kick(); + return send_igmp(ifindex, group, IGMP_TYPE_V2_REPORT, group); +} + +bool igmp_send_leave(uint8_t ifindex, uint32_t group) { + if (!ipv4_is_multicast(group)) return false; + igmp_state_t* s = igmp_find_state(ifindex, group); + if (s) s->used = 0; + igmp_daemon_kick(); + return send_igmp(ifindex, IPV4_MCAST_ALL_ROUTERS, IGMP_TYPE_V2_LEAVE, group); +} + +static void schedule_report(uint8_t ifindex, uint32_t group, uint32_t max_resp_ds) { + if (!ipv4_is_multicast(group)) return; + igmp_state_t* s = igmp_get_state(ifindex, group); + + if (!s) return; + uint32_t max_ms = (uint32_t)max_resp_ds * 100; + if (max_ms == 0) max_ms = 100; + uint32_t delay = rng_between32(&igmp_rng, 0, max_ms); + uint32_t due = igmp_uptime_ms + delay; + if (!s->query_pending || due < s->query_due_ms) { + s->query_pending = 1; + s->query_due_ms = due; + } + igmp_daemon_kick(); +} + +void igmp_input(uint8_t ifindex, uint32_t src, uint32_t dst, const void* l4, uint32_t l4_len) { + if (!l4 || l4_len < sizeof(igmp_hdr_t)) return; + const igmp_hdr_t* h = (const igmp_hdr_t*)l4; + uint16_t saved = h->checksum; + igmp_hdr_t tmp; + memcpy(&tmp, h, sizeof(tmp)); + tmp.checksum = 0; + if (checksum16((const uint16_t*)&tmp, sizeof(tmp) / 2) != saved) return; + + uint8_t type = h->type; + uint32_t group = bswap32(h->group); + + uint32_t max_resp_ds = (uint32_t)h->max_resp_time; + + if (type != IGMP_TYPE_QUERY) return; + + if (group == 0) { + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if (!l2) return; + for (int i = 0; i < (int)l2->ipv4_mcast_count; ++i) { + uint32_t g = l2->ipv4_mcast[i]; + if (ipv4_is_multicast(g)) schedule_report(ifindex, g, max_resp_ds); + } + return; + } + + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if (!l2) return; + + for (int i = 0; i < (int)l2->ipv4_mcast_count; ++i) { + if (l2->ipv4_mcast[i] == group) { + schedule_report(ifindex, group, max_resp_ds); + return; + } + } + + (void)src; + (void)dst; +} \ No newline at end of file diff --git a/kernel/networking/internet_layer/igmp.h b/kernel/networking/internet_layer/igmp.h new file mode 100644 index 00000000..ae56c209 --- /dev/null +++ b/kernel/networking/internet_layer/igmp.h @@ -0,0 +1,14 @@ +#pragma once +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool igmp_send_join(uint8_t ifindex, uint32_t group); +bool igmp_send_leave(uint8_t ifindex, uint32_t group); +void igmp_input(uint8_t ifindex, uint32_t src, uint32_t dst, const void* l4, uint32_t l4_len); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/internet_layer/ipv4.c b/kernel/networking/internet_layer/ipv4.c index 72df9a01..88aeb058 100644 --- a/kernel/networking/internet_layer/ipv4.c +++ b/kernel/networking/internet_layer/ipv4.c @@ -2,43 +2,36 @@ #include "ipv4_route.h" #include "networking/link_layer/arp.h" #include "networking/internet_layer/icmp.h" +#include "networking/internet_layer/igmp.h" #include "std/memory.h" #include "std/string.h" #include "networking/transport_layer/tcp.h" #include "networking/transport_layer/udp.h" #include "console/kio.h" #include "syscalls/syscalls.h" +#include "ipv4_utils.h" +#include "net/network_types.h" +#include "networking/link_layer/nic_types.h" static uint16_t g_ip_ident = 1; -static int mask_prefix_len(uint32_t m) { - int n = 0; - while (m & 0x80000000u) { n++; m <<= 1; } - return n; -} - -static inline bool is_lbcast(uint32_t ip) { return ip == 0xFFFFFFFFu; } -static inline bool is_mcast(uint32_t ip) { return (ip & 0xF0000000u) == 0xE0000000u; } -static inline bool is_directed_bcast_for(const l3_ipv4_interface_t* v4, uint32_t dst) { - if (!v4) return false; - if (!v4->mask) return false; - uint32_t b = ipv4_broadcast_calc(v4->ip, v4->mask); - return b == dst; -} static l3_ipv4_interface_t* best_v4_on_l2_for_dst(l2_interface_t* l2, uint32_t dst) { l3_ipv4_interface_t* best = NULL; - int best_pl = -1; + uint32_t best_mask = 0; for (int s = 0; s < MAX_IPV4_PER_INTERFACE; s++) { l3_ipv4_interface_t* v4 = l2->l3_v4[s]; if (!v4) continue; if (v4->mode == IPV4_CFG_DISABLED) continue; if (!v4->ip) continue; uint32_t m = v4->mask; - if (m && ((dst & m) == (v4->ip & m))) { - int pl = mask_prefix_len(m); - if (pl > best_pl) { best_pl = pl; best = v4; } + if (m && (ipv4_net(dst, m) == ipv4_net(v4->ip, m))) { + if (!best || m > best_mask) { + best = v4; + best_mask = m; + } } else if (!best) { best = v4; + best_mask = m; } } return best; @@ -89,8 +82,7 @@ static bool pick_broadcast_bound_l3(uint8_t l3_id, uint8_t* out_ifx, uint32_t* o if (v4->mode == IPV4_CFG_DISABLED) return false; if (out_ifx) *out_ifx = v4->l2->ifindex; - if (out_src) *out_src = v4->ip; - if (!v4->ip && v4->mode == IPV4_CFG_DHCP && out_src) *out_src = 0; + if (out_src) *out_src = v4->ip ? v4->ip : 0; if (out_nh) *out_nh = 0xFFFFFFFFu; return true; } @@ -146,12 +138,12 @@ static bool pick_broadcast_global(uint8_t* out_ifx, uint32_t* out_src, uint32_t* } static bool pick_route_global(uint32_t dst, uint8_t* out_ifx, uint32_t* out_src, uint32_t* out_nh) { - if (is_lbcast(dst)) return pick_broadcast_global(out_ifx, out_src, out_nh); + if (dst == 0xFFFFFFFFu) return pick_broadcast_global(out_ifx, out_src, out_nh); ip_resolution_result_t r = resolve_ipv4_to_interface(dst); if (r.found && r.ipv4 && r.l2) { uint32_t m = r.ipv4->mask; - if (m && ((dst & m) == (r.ipv4->ip & m))) { + if (m && (ipv4_net(dst, m) == ipv4_net(r.ipv4->ip, m))) { if (out_ifx) *out_ifx = r.l2->ifindex; if (out_src) *out_src = r.ipv4->ip; if (out_nh) *out_nh = dst; @@ -177,7 +169,7 @@ static bool pick_route_global(uint32_t dst, uint8_t* out_ifx, uint32_t* out_src, } static bool pick_route_bound_l3(uint8_t l3_id, uint32_t dst, uint8_t* out_ifx, uint32_t* out_src, uint32_t* out_nh) { - if (is_lbcast(dst)) return pick_broadcast_bound_l3(l3_id, out_ifx, out_src, out_nh); + if (dst == 0xFFFFFFFFu) return pick_broadcast_bound_l3(l3_id, out_ifx, out_src, out_nh); l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(l3_id); if (!v4 || !v4->l2) return false; @@ -185,7 +177,7 @@ static bool pick_route_bound_l3(uint8_t l3_id, uint32_t dst, uint8_t* out_ifx, u if (!v4->ip) return false; uint32_t m = v4->mask; - if (m && ((dst & m) == (v4->ip & m))) { + if (m && (ipv4_net(dst, m) == ipv4_net(v4->ip, m))) { if (out_ifx) *out_ifx = v4->l2->ifindex; if (out_src) *out_src = v4->ip; if (out_nh) *out_nh = dst; @@ -210,7 +202,7 @@ static bool pick_route_bound_l3(uint8_t l3_id, uint32_t dst, uint8_t* out_ifx, u } static bool pick_route_bound_l2(uint8_t ifindex, uint32_t dst, uint8_t* out_ifx, uint32_t* out_src, uint32_t* out_nh) { - if (is_lbcast(dst)) return pick_broadcast_bound_l2(ifindex, out_ifx, out_src, out_nh); + if (dst == 0xFFFFFFFFu) return pick_broadcast_bound_l2(ifindex, out_ifx, out_src, out_nh); l2_interface_t* l2 = l2_interface_find_by_index(ifindex); if (!l2) return false; @@ -220,7 +212,7 @@ static bool pick_route_bound_l2(uint8_t ifindex, uint32_t dst, uint8_t* out_ifx, if (v4->mode == IPV4_CFG_DISABLED) return false; uint32_t m = v4->mask; - if (m && ((dst & m) == (v4->ip & m))) { + if (m && (ipv4_net(dst, m) == ipv4_net(v4->ip, m))) { if (out_ifx) *out_ifx = l2->ifindex; if (out_src) *out_src = v4->ip; if (out_nh) *out_nh = dst; @@ -246,21 +238,27 @@ static bool pick_route_bound_l2(uint8_t ifindex, uint32_t dst, uint8_t* out_ifx, static bool pick_route(uint32_t dst, const ipv4_tx_opts_t* opts, uint8_t* out_ifx, uint32_t* out_src, uint32_t* out_nh) { if (opts) { - if (opts->scope == IPV4_TX_BOUND_L3) return pick_route_bound_l3(opts->index, dst, out_ifx, out_src, out_nh); - if (opts->scope == IPV4_TX_BOUND_L2) return pick_route_bound_l2(opts->index, dst, out_ifx, out_src, out_nh); + if (opts->scope == IP_TX_BOUND_L3) return pick_route_bound_l3(opts->index, dst, out_ifx, out_src, out_nh); + if (opts->scope == IP_TX_BOUND_L2) return pick_route_bound_l2(opts->index, dst, out_ifx, out_src, out_nh); return pick_route_global(dst, out_ifx, out_src, out_nh); } return pick_route_global(dst, out_ifx, out_src, out_nh); } -void ipv4_send_packet(uint32_t dst_ip, uint8_t proto, sizedptr segment, const ipv4_tx_opts_t* opts, uint8_t ttl) { - if (!segment.ptr || !segment.size) return; +void ipv4_send_packet(uint32_t dst_ip, uint8_t proto, netpkt_t* pkt, const ipv4_tx_opts_t* opts, uint8_t ttl, uint8_t dontfrag) { + if (!pkt || !netpkt_len(pkt)) { + if (pkt) netpkt_unref(pkt); + return; + } uint8_t ifx = 0; uint32_t src_ip = 0; uint32_t nh = 0; - if (!pick_route(dst_ip, opts, &ifx, &src_ip, &nh)) return; + if (!pick_route(dst_ip, opts, &ifx, &src_ip, &nh)) { + netpkt_unref(pkt); + return; + } uint8_t dst_mac[6]; bool is_dbcast = false; @@ -269,44 +267,70 @@ void ipv4_send_packet(uint32_t dst_ip, uint8_t proto, sizedptr segment, const ip for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { l3_ipv4_interface_t* v4 = l2->l3_v4[s]; if (!v4 || v4->mode == IPV4_CFG_DISABLED) continue; - if (is_directed_bcast_for(v4, dst_ip)) { is_dbcast = true; break; } + if (v4->mask && ipv4_broadcast_calc(v4->ip, v4->mask) == dst_ip) { is_dbcast = true; break; } } } if (is_dbcast) { memset(dst_mac, 0xFF, 6); + } else if (ipv4_is_multicast(dst_ip)) { + ipv4_mcast_to_mac(dst_ip, dst_mac); } else { - if (!arp_resolve_on(ifx, nh, dst_mac, 200)) return; + if (l2 && l2->kind == NET_IFK_LOCALHOST) { + memset(dst_mac, 0, 6); + } else if (!arp_resolve_on(ifx, nh, dst_mac, 1000)) { + netpkt_unref(pkt); + return; + } } - uint32_t hdr_len = IP_IHL_NOOPTS * 4; - uint32_t total = hdr_len + (uint32_t)segment.size; + uint16_t mtu = 1500; + if (l2) { + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4) continue; + if (v4->mode == IPV4_CFG_DISABLED) continue; + if (v4->ip != src_ip) continue; + if (v4->runtime_opts_v4.mtu) mtu = v4->runtime_opts_v4.mtu; + break; + } + } - uintptr_t buf = (uintptr_t)malloc(total); - if (!buf) return; + uint32_t hdr_len = IP_IHL_NOOPTS * 4; + uint32_t seg_len = netpkt_len(pkt); + void* hdrp = netpkt_push(pkt, hdr_len); + if (!hdrp) { + netpkt_unref(pkt); + return; + } - ipv4_hdr_t* ip = (ipv4_hdr_t*)buf; + uint32_t total = hdr_len + seg_len; + if (dontfrag && total > (uint32_t)mtu) { + netpkt_unref(pkt); + return; + } + ipv4_hdr_t* ip = (ipv4_hdr_t*)hdrp; ip->version_ihl = (uint8_t)((IP_VERSION_4 << 4) | IP_IHL_NOOPTS); ip->dscp_ecn = 0; ip->total_length = bswap16((uint16_t)total); ip->identification = bswap16(g_ip_ident++); - ip->flags_frag_offset = bswap16(0); + uint16_t ff = 0; + if (dontfrag) ff |= 0x4000u; + ip->flags_frag_offset = bswap16(ff); ip->ttl = ttl ? ttl : IP_TTL_DEFAULT; ip->protocol = proto; ip->header_checksum = 0; ip->src_ip = bswap32(src_ip); ip->dst_ip = bswap32(dst_ip); - - memcpy((void*)(buf + hdr_len), (const void*)segment.ptr, segment.size); ip->header_checksum = checksum16((const uint16_t*)ip, hdr_len / 2); - sizedptr payload = { buf, total }; - eth_send_frame_on(ifx, ETHERTYPE_IPV4, dst_mac, payload); - - free_sized((void*)buf, total); + eth_send_frame_on(ifx, ETHERTYPE_IPV4, dst_mac, pkt); } -void ipv4_input(uint16_t ifindex, uintptr_t ip_ptr, uint32_t ip_len, const uint8_t src_mac[6]) { +void ipv4_input(uint16_t ifindex, netpkt_t* pkt, const uint8_t src_mac[6]) { + if (!pkt) return; + uint32_t ip_len = netpkt_len(pkt); + uintptr_t ip_ptr = netpkt_data(pkt); if (ip_len < sizeof(ipv4_hdr_t)) return; ipv4_hdr_t* ip = (ipv4_hdr_t*)ip_ptr; @@ -329,6 +353,8 @@ void ipv4_input(uint16_t ifindex, uintptr_t ip_ptr, uint32_t ip_len, const uint8 uint16_t ip_totlen = bswap16(ip->total_length); if (ip_totlen < hdr_len) return; if (ip_len < ip_totlen) return; + (void)netpkt_trim(pkt, ip_totlen); + ip_len = ip_totlen; uintptr_t l4 = ip_ptr + hdr_len; uint32_t l4_len = (uint32_t)ip_totlen - hdr_len; @@ -361,9 +387,27 @@ void ipv4_input(uint16_t ifindex, uintptr_t ip_ptr, uint32_t ip_len, const uint8 } if (ccount == 0) return; - if (is_mcast(dst)) return; + if (ipv4_is_multicast(dst)) { + bool joined = false; + for (int i = 0; i < (int)l2->ipv4_mcast_count; ++i) if (l2->ipv4_mcast[i] == dst) { + joined = true; + break; + } + if (!joined) return; + for (int i = 0; i < ccount; ++i) { + uint8_t l3id = cand[i]->l3_id; + switch (proto) { + case 2: igmp_input((uint8_t)ifindex, src, dst, (const void*)l4, l4_len); break; + case 6: tcp_input(IP_VER4, &src, &dst, l3id, l4, l4_len); break; + case 17: udp_input(IP_VER4, &src, &dst, l3id, l4, l4_len); break; + default: break; + } + } + return; + } + - if (is_lbcast(dst)) { + if (dst == 0xFFFFFFFFu) { if (ccount == 1) { uint8_t l3id = cand[0]->l3_id; switch (proto) { @@ -427,7 +471,7 @@ void ipv4_input(uint16_t ifindex, uintptr_t ip_ptr, uint32_t ip_len, const uint8 l3_ipv4_interface_t* v4 = l2x->l3_v4[s]; if (!v4) continue; if (v4->mode == IPV4_CFG_DISABLED) continue; - if (is_directed_bcast_for(v4, dst)) { + if (v4->mask && ipv4_broadcast_calc(v4->ip, v4->mask) == dst) { any_dbcast = 1; uint8_t l3id = v4->l3_id; switch (proto) { diff --git a/kernel/networking/internet_layer/ipv4.h b/kernel/networking/internet_layer/ipv4.h index 96e23bfd..54638eac 100644 --- a/kernel/networking/internet_layer/ipv4.h +++ b/kernel/networking/internet_layer/ipv4.h @@ -5,6 +5,7 @@ #include "net/network_types.h" #include "net/checksums.h" #include "networking/interface_manager.h" +#include "networking/netpkt.h" #define IP_IHL_NOOPTS 5 #define IP_VERSION_4 4 @@ -27,19 +28,11 @@ typedef struct __attribute__((packed)) ipv4_hdr_t { uint32_t dst_ip; } ipv4_hdr_t; -typedef enum { - IPV4_TX_AUTO = 0, - IPV4_TX_BOUND_L2 = 1, - IPV4_TX_BOUND_L3 = 2 -} ipv4_tx_scope_t; +typedef ip_tx_scope_t ipv4_tx_scope_t; +typedef ip_tx_opts_t ipv4_tx_opts_t; -typedef struct { - uint8_t index; - ipv4_tx_scope_t scope; -} ipv4_tx_opts_t; - -void ipv4_send_packet(uint32_t dst_ip, uint8_t proto, sizedptr segment, const ipv4_tx_opts_t* opts, uint8_t ttl); -void ipv4_input(uint16_t ifindex, uintptr_t ip_ptr, uint32_t ip_len, const uint8_t src_mac[6]); +void ipv4_send_packet(uint32_t dst_ip, uint8_t proto, netpkt_t* pkt, const ipv4_tx_opts_t* opts, uint8_t ttl, uint8_t dontfrag); +void ipv4_input(uint16_t ifindex, netpkt_t* pkt, const uint8_t src_mac[6]); #ifdef __cplusplus } diff --git a/kernel/networking/internet_layer/ipv4_route.c b/kernel/networking/internet_layer/ipv4_route.c index a3ff1e12..71b67e0a 100644 --- a/kernel/networking/internet_layer/ipv4_route.c +++ b/kernel/networking/internet_layer/ipv4_route.c @@ -3,6 +3,82 @@ #include "networking/interface_manager.h" #include "syscalls/syscalls.h" +static bool v4_l3_ok_for_tx(l3_ipv4_interface_t* v4){ + if (!v4 || !v4->l2) return false; + if (!v4->l2->is_up) return false; + if (v4->mode == IPV4_CFG_DISABLED) return false; + if (v4->is_localhost) return false; + if (!v4->ip) return false; + if (!v4->port_manager) return false; + return true; +} + +static bool l3_allowed(uint8_t id, const uint8_t* allowed, int n){ + if (!allowed || n <= 0) return true; + for (int i = 0; i < n; ++i) if (allowed[i] == id) return true; + return false; +} + +bool ipv4_build_tx_plan(uint32_t dst, const ip_tx_opts_t* hint, const uint8_t* allowed_l3, int allowed_n, ipv4_tx_plan_t* out){ + if (!out) return false; + out->l3_id = 0; + out->src_ip = 0; + out->fixed_opts.scope = IP_TX_AUTO; + out->fixed_opts.index = 0; + + if (hint && hint->scope == IP_TX_BOUND_L3) { + uint8_t id = hint->index; + if (!l3_allowed(id, allowed_l3, allowed_n)) return false; + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); + if (!v4_l3_ok_for_tx(v4)) return false; + out->l3_id = id; + out->src_ip = v4->ip; + out->fixed_opts.scope = IP_TX_BOUND_L3; + out->fixed_opts.index = id; + return true; + } + + uint8_t cand[64]; + int n = 0; + + if (hint && hint->scope == IP_TX_BOUND_L2) { + l2_interface_t* l2 = l2_interface_find_by_index(hint->index); + if (!l2 || !l2->is_up) return false; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE && n < (int)sizeof(cand); ++s){ + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4_l3_ok_for_tx(v4)) continue; + if (!l3_allowed(v4->l3_id, allowed_l3, allowed_n)) continue; + cand[n++] = v4->l3_id; + } + } else { + uint8_t cnt = l2_interface_count(); + for (uint8_t i = 0; i < cnt && n < (int)sizeof(cand); ++i){ + l2_interface_t* l2 = l2_interface_at(i); + if (!l2 || !l2->is_up) continue; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE && n < (int)sizeof(cand); ++s){ + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4_l3_ok_for_tx(v4)) continue; + if (!l3_allowed(v4->l3_id, allowed_l3, allowed_n)) continue; + cand[n++] = v4->l3_id; + } + } + } + + if (n == 0) return false; + + uint8_t chosen = 0; + if (!ipv4_rt_pick_best_l3_in(cand, n, dst, &chosen)) chosen = cand[0]; + + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen); + if (!v4_l3_ok_for_tx(v4)) return false; + + out->l3_id = chosen; + out->src_ip = v4->ip; + out->fixed_opts.scope = IP_TX_BOUND_L3; + out->fixed_opts.index = chosen; + return true; +} + struct ipv4_rt_table { ipv4_rt_entry_t e[IPV4_RT_PER_IF_MAX]; int len; @@ -146,4 +222,4 @@ bool ipv4_rt_pick_best_l3_in(const uint8_t* l3_ids, int n_ids, uint32_t dst, uin if (best_pl < 0) return false; if (out_l3) *out_l3 = best_l3; return true; -} +} \ No newline at end of file diff --git a/kernel/networking/internet_layer/ipv4_route.h b/kernel/networking/internet_layer/ipv4_route.h index 59fd03df..20b3c9c3 100644 --- a/kernel/networking/internet_layer/ipv4_route.h +++ b/kernel/networking/internet_layer/ipv4_route.h @@ -1,5 +1,6 @@ #pragma once #include "types.h" +#include "net/network_types.h" #ifdef __cplusplus extern "C" { @@ -28,8 +29,16 @@ bool ipv4_rt_lookup_in(const ipv4_rt_table_t* t, uint32_t dst, uint32_t *next_ho void ipv4_rt_ensure_basics(ipv4_rt_table_t* t, uint32_t ip, uint32_t mask, uint32_t gw, uint16_t base_metric); void ipv4_rt_sync_basics(ipv4_rt_table_t* t, uint32_t ip, uint32_t mask, uint32_t gw, uint16_t base_metric); +typedef struct { + uint8_t l3_id; + uint32_t src_ip; + ip_tx_opts_t fixed_opts; +} ipv4_tx_plan_t; + +bool ipv4_build_tx_plan(uint32_t dst, const ip_tx_opts_t* hint, const uint8_t* allowed_l3, int allowed_n, ipv4_tx_plan_t* out); + bool ipv4_rt_pick_best_l3_in(const uint8_t* l3_ids, int n_ids, uint32_t dst, uint8_t* out_l3); #ifdef __cplusplus } -#endif +#endif \ No newline at end of file diff --git a/kernel/networking/internet_layer/ipv4_utils.c b/kernel/networking/internet_layer/ipv4_utils.c index 714df187..cc7825da 100644 --- a/kernel/networking/internet_layer/ipv4_utils.c +++ b/kernel/networking/internet_layer/ipv4_utils.c @@ -15,12 +15,94 @@ static char* u8_to_str(uint8_t val, char* out) { return out; } -void ipv4_to_string(uint32_t ip, char* buf) { - uint8_t a = (ip >> 24) & 0xFF; - uint8_t b = (ip >> 16) & 0xFF; - uint8_t c = (ip >> 8) & 0xFF; - uint8_t d = ip & 0xFF; +bool ipv4_is_unspecified(uint32_t ip) { return ip == 0; } +bool ipv4_is_loopback(uint32_t ip) { return (ip & 0xFF000000u) == 0x7F000000u; } +bool ipv4_is_multicast(uint32_t ip) { return (ip & 0xF0000000u) == 0xE0000000u; } +bool ipv4_is_link_local(uint32_t ip) { return (ip & 0xFFFF0000u) == 0xA9FE0000u; } +bool ipv4_is_private(uint32_t ip) { + if ((ip & 0xFF000000u) == 0x0A000000u) return true; + if ((ip & 0xFFF00000u) == 0xAC100000u) return true; + if ((ip & 0xFFFF0000u) == 0xC0A80000u) return true; + return false; +} +bool ipv4_is_cgnat(uint32_t ip) { return (ip & 0xFFC00000u) == 0x64400000u; } +bool ipv4_is_documentation(uint32_t ip) { + if ((ip & 0xFFFFFF00u) == 0xC0000200u) return true; + if ((ip & 0xFFFFFF00u) == 0xC6336400u) return true; + if ((ip & 0xFFFFFF00u) == 0xCB007100u) return true; + return false; +} +bool ipv4_is_benchmark(uint32_t ip) { return (ip & 0xFFFE0000u) == 0xC6120000u; } +bool ipv4_is_reserved(uint32_t ip) { + if ((ip & 0xF0000000u) == 0xF0000000u) return true; + if ((ip & 0xFF000000u) == 0xFF000000u) return true; + return false; +} +bool ipv4_is_reserved_special(uint32_t ip) { + if (ipv4_is_unspecified(ip)) return true; + if (ipv4_is_loopback(ip)) return true; + if (ipv4_is_link_local(ip)) return true; + if (ipv4_is_multicast(ip)) return true; + if (ipv4_is_reserved(ip)) return true; + return false; +} +bool ipv4_is_unicast_global(uint32_t ip) { + if (ipv4_is_unspecified(ip)) return false; + if (ipv4_is_loopback(ip)) return false; + if (ipv4_is_multicast(ip)) return false; + if (ipv4_is_link_local(ip)) return false; + if (ipv4_is_private(ip)) return false; + if (ipv4_is_cgnat(ip)) return false; + if (ipv4_is_documentation(ip)) return false; + if (ipv4_is_benchmark(ip)) return false; + if (ipv4_is_reserved(ip)) return false; + return true; +} + +bool ipv4_mask_is_contiguous(uint32_t mask) { + if (mask == 0) return true; + return ((mask | (mask - 1u)) == 0xFFFFFFFFu); +} + +int ipv4_prefix_len(uint32_t mask) { + int n = 0; + while (mask & 0x80000000u) { n++; mask <<= 1; } + return n; +} + +uint32_t ipv4_net(uint32_t ip, uint32_t mask) { return ip & mask; } +uint32_t ipv4_broadcast_calc(uint32_t ip, uint32_t mask) { return (mask == 0) ? 0 : ((ip & mask) | ~mask); } +bool ipv4_is_network_address(uint32_t ip, uint32_t mask) { + if (!ipv4_mask_is_contiguous(mask)) return false; + if (mask == 0 || mask == 0xFFFFFFFFu) return false; + return (ip & ~mask) == 0; +} + +bool ipv4_is_broadcast_address(uint32_t ip, uint32_t mask) { + if (!ipv4_mask_is_contiguous(mask)) return false; + if (mask == 0 || mask == 0xFFFFFFFFu) return false; + return (ip & ~mask) == ~mask; +} + +bool ipv4_is_limited_broadcast(uint32_t ip) { return ip == 0xFFFFFFFFu; } + +bool ipv4_is_directed_broadcast(uint32_t ip, uint32_t mask, uint32_t dst) { + if (!ipv4_mask_is_contiguous(mask)) return false; + if (mask == 0 || mask == 0xFFFFFFFFu) return false; + return ipv4_broadcast_calc(ip, mask) == dst; +} + +bool ipv4_same_subnet(uint32_t a, uint32_t b, uint32_t mask) { + if (!ipv4_mask_is_contiguous(mask)) return false; + return (a & mask) == (b & mask); +} + +void ipv4_to_string(uint32_t ip, char* buf) { + uint8_t a = (uint8_t)(ip >> 24); + uint8_t b = (uint8_t)(ip >> 16); + uint8_t c = (uint8_t)(ip >> 8); + uint8_t d = (uint8_t)(ip); char* p = buf; p = u8_to_str(a, p); *p++ = '.'; p = u8_to_str(b, p); *p++ = '.'; @@ -32,23 +114,36 @@ void ipv4_to_string(uint32_t ip, char* buf) { bool ipv4_parse(const char* s, uint32_t* out) { if (!s || !out) return false; uint32_t ip = 0, v = 0; - int oct = 0; + int oct = 0, digits = 0; const char* p = s; while (*p) { if (*p == '.') { - if (v > 255 || oct >= 3) return false; + if (digits == 0 || v > 255 || oct >= 3) return false; ip = (ip << 8) | (v & 0xFF); - v = 0; oct++; + v = 0; + digits = 0; + oct++; } else if (*p >= '0' && *p <= '9') { v = v * 10 + (uint32_t)(*p - '0'); if (v > 255) return false; + digits++; } else { return false; } ++p; } - if (oct != 3 || v > 255) return false; + if (oct != 3 || digits == 0 || v > 255) return false; ip = (ip << 8) | (v & 0xFF); *out = ip; return true; } + +void ipv4_mcast_to_mac(uint32_t group, uint8_t out_mac[6]) { + if (!out_mac) return; + out_mac[0] = 0x01; + out_mac[1] = 0x00; + out_mac[2] = 0x5e; + out_mac[3] = (uint8_t)((group >> 16) & 0x7Fu); + out_mac[4] = (uint8_t)((group >> 8) & 0xFFu); + out_mac[5] = (uint8_t)(group & 0xFFu); +} diff --git a/kernel/networking/internet_layer/ipv4_utils.h b/kernel/networking/internet_layer/ipv4_utils.h index 6413217f..6fe4da8c 100644 --- a/kernel/networking/internet_layer/ipv4_utils.h +++ b/kernel/networking/internet_layer/ipv4_utils.h @@ -1,13 +1,41 @@ #pragma once #include "types.h" +#define IPV4_MCAST_ALL_HOSTS 0xE0000001u +#define IPV4_MCAST_ALL_ROUTERS 0xE0000002u #ifdef __cplusplus extern "C" { #endif +bool ipv4_is_unspecified(uint32_t ip); +bool ipv4_is_loopback(uint32_t ip); +bool ipv4_is_multicast(uint32_t ip); +bool ipv4_is_link_local(uint32_t ip); +bool ipv4_is_private(uint32_t ip); +bool ipv4_is_cgnat(uint32_t ip); +bool ipv4_is_documentation(uint32_t ip); +bool ipv4_is_benchmark(uint32_t ip); +bool ipv4_is_reserved(uint32_t ip); +bool ipv4_is_reserved_special(uint32_t ip); +bool ipv4_is_unicast_global(uint32_t ip); + +bool ipv4_mask_is_contiguous(uint32_t mask); +int ipv4_prefix_len(uint32_t mask); + +uint32_t ipv4_net(uint32_t ip, uint32_t mask); +uint32_t ipv4_broadcast_calc(uint32_t ip, uint32_t mask); + +bool ipv4_is_network_address(uint32_t ip, uint32_t mask); +bool ipv4_is_broadcast_address(uint32_t ip, uint32_t mask); +bool ipv4_is_limited_broadcast(uint32_t ip); +bool ipv4_is_directed_broadcast(uint32_t ip, uint32_t mask, uint32_t dst); +bool ipv4_same_subnet(uint32_t a, uint32_t b, uint32_t mask); + void ipv4_to_string(uint32_t ip, char* buf); bool ipv4_parse(const char* s, uint32_t* out); +void ipv4_mcast_to_mac(uint32_t group, uint8_t out_mac[6]); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/kernel/networking/internet_layer/ipv6.c b/kernel/networking/internet_layer/ipv6.c new file mode 100644 index 00000000..05b2cd6a --- /dev/null +++ b/kernel/networking/internet_layer/ipv6.c @@ -0,0 +1,945 @@ +#include "ipv6.h" +#include "ipv6_utils.h" +#include "std/memory.h" +#include "std/string.h" +#include "networking/link_layer/eth.h" +#include "networking/interface_manager.h" +#include "networking/link_layer/ndp.h" +#include "networking/transport_layer/tcp.h" +#include "networking/transport_layer/udp.h" +#include "net/network_types.h" +#include "console/kio.h" +#include "syscalls/syscalls.h" +#include "networking/internet_layer/ipv6_route.h" +#include "networking/internet_layer/icmpv6.h" +#include "math/rng.h" +#include "net/checksums.h" +#include "networking/link_layer/nic_types.h" + +#define IPV6_MIN_MTU 1280u +#define PMTU_CACHE_SIZE 16 +#define REASS_SLOTS 8 + +typedef struct { + uint8_t used; + uint8_t dst[16]; + uint16_t mtu; + uint32_t timestamp_ms; +} pmtu_entry_t; + +typedef struct { + uint8_t used; + uint8_t ifindex; + uint32_t ident; + uint8_t src[16]; + uint8_t dst[16]; + uint8_t next_header; + + uint32_t first_rx_ms; + uint32_t last_update_ms; + + uint32_t total_len; + uint8_t have_last; + uint8_t have_first; + uint8_t first_src_mac[6]; + uint8_t _pad0[1]; + + uint16_t first_pkt_len; + uint8_t _pad1[2]; + uint8_t first_pkt[1280]; + + uint8_t *buf; + uint8_t bitmap[2048]; +} reass_slot_t; + +typedef struct __attribute__((packed)) { + uint8_t next_header; + uint8_t reserved; + uint16_t offset_flags; + uint32_t identification; +} ipv6_frag_hdr_t; + +static pmtu_entry_t g_pmtu[PMTU_CACHE_SIZE] = {0}; +static reass_slot_t g_reass[REASS_SLOTS] = {0}; + +uint16_t ipv6_pmtu_get(const uint8_t dst[16]) { + if (!dst) return 0; + uint32_t now = (uint32_t)get_time(); + for (int i = 0; i < PMTU_CACHE_SIZE; i++) { + if (!g_pmtu[i].used) continue; + if (ipv6_cmp(g_pmtu[i].dst, dst) != 0) continue; + if (now - g_pmtu[i].timestamp_ms > 600000u) { + g_pmtu[i].used = 0; + continue; + } + return g_pmtu[i].mtu; + } + return 0; +} + +void ipv6_pmtu_note(const uint8_t dst[16], uint16_t mtu) { + if (!dst) return; + if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; + uint32_t now = (uint32_t)get_time(); + + int free_i = -1; + int lru_i = 0; + uint32_t lru_t = 0xFFFFFFFFu; + + for (int i = 0; i < PMTU_CACHE_SIZE; i++) { + if (g_pmtu[i].used) { + if (ipv6_cmp(g_pmtu[i].dst, dst) == 0) { + g_pmtu[i].mtu = mtu; + g_pmtu[i].timestamp_ms = now; + return; + } + if (g_pmtu[i].timestamp_ms < lru_t) { + lru_t = g_pmtu[i].timestamp_ms; + lru_i = i; + } + } else if (free_i < 0) free_i = i; + } + + int idx = (free_i >= 0) ? free_i : lru_i; + g_pmtu[idx].used = 1; + ipv6_cpy(g_pmtu[idx].dst, dst); + g_pmtu[idx].mtu = mtu; + g_pmtu[idx].timestamp_ms = now; +} + +static void reass_free(reass_slot_t *s) { + if (!s) return; + if (s->buf) free_sized(s->buf, 2048u * 8u); + memset(s, 0, sizeof(*s)); +} + +static void icmpv6_send_error(uint8_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], const uint8_t dst_mac[6], uint8_t type, uint8_t code, uint32_t param32, const uint8_t *invoking, uint32_t invoking_len) { + if (!ifindex || !src_ip || !dst_ip || !dst_mac || !invoking || !invoking_len) return; + + uint32_t max_invoke = 1280u; + uint32_t base = (uint32_t)sizeof(icmpv6_hdr_t) + 4u; + if (base >= max_invoke) return; + + uint32_t copy = invoking_len; + if (copy > max_invoke - base) copy = max_invoke - base; + + uint32_t icmp_len = base + copy; + uint8_t *buf = (uint8_t*)malloc(icmp_len); + if (!buf) return; + + icmpv6_hdr_t *h = (icmpv6_hdr_t*)buf; + h->type = type; + h->code = code; + h->checksum = 0; + + *(uint32_t*)(buf + sizeof(icmpv6_hdr_t)) = bswap32(param32); + + memcpy(buf + base, invoking, copy); + + h->checksum =bswap16(checksum16_pipv6(src_ip, dst_ip, 58, buf, icmp_len)); + + icmpv6_send_on_l2(ifindex, dst_ip, src_ip, dst_mac, buf, icmp_len, 64); + + free_sized(buf, icmp_len); +} + +static l3_ipv6_interface_t* best_v6_on_l2_for_dst(l2_interface_t* l2, const uint8_t dst[16]) { + l3_ipv6_interface_t* best = NULL; + int best_cmp = -1; + int best_cost = 0x7FFFFFFF; + int dst_is_ll = (ipv6_is_linklocal(dst) || ipv6_is_linkscope_mcast(dst)) ? 1 : 0; + + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; s++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (ipv6_is_unspecified(v6->ip)) continue; + if (v6->dad_state != IPV6_DAD_OK) continue; + + int v6_is_ll = ipv6_is_linklocal(v6->ip) ? 1 : 0; + if (v6_is_ll != dst_is_ll) continue; + + int cmp = ipv6_common_prefix_len(dst, v6->ip); + int cost = (int)l2->base_metric; + if (cmp > best_cmp || (cmp == best_cmp && cost < best_cost)) { + best_cmp = cmp; + best_cost = cost; + best = v6; + } + } + return best; +} + +static bool pick_route_bound_l3(uint8_t l3_id, const uint8_t dst[16], uint8_t* out_ifx, uint8_t out_src[16], uint8_t out_nh[16]) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(l3_id); + if (!v6 || !v6->l2) return false; + if (v6->cfg == IPV6_CFG_DISABLE) return false; + if (ipv6_is_unspecified(v6->ip)) return false; + if (v6->dad_state != IPV6_DAD_OK) return false; + + int dst_is_ll = (ipv6_is_linklocal(dst) || ipv6_is_linkscope_mcast(dst)) ? 1 : 0; + int src_is_ll = ipv6_is_linklocal(v6->ip) ? 1 : 0; + if (dst_is_ll != src_is_ll) return false; + + if (out_ifx) *out_ifx = v6->l2->ifindex; + if (out_src) ipv6_cpy(out_src, v6->ip); + + uint8_t nh[16]; + ipv6_cpy(nh, dst); + + if (!dst_is_ll && v6->prefix_len && ipv6_common_prefix_len(dst, v6->ip) < v6->prefix_len) { + uint8_t via[16] = {0}; + int pl = -1,met = 0x7FFF; + + if (v6->routing_table && + ipv6_rt_lookup_in((const ipv6_rt_table_t*)v6->routing_table, dst, via, &pl, &met)) + { + if (!ipv6_is_unspecified(via)) ipv6_cpy(nh, via); + } else if (!ipv6_is_unspecified(v6->gateway) && ipv6_is_linklocal(v6->gateway)) { + ipv6_cpy(nh, v6->gateway); + } + } + + if (out_nh) ipv6_cpy(out_nh, nh); + return true; +} + +static bool pick_route_bound_l2(uint8_t ifindex, const uint8_t dst[16], uint8_t* out_ifx, uint8_t out_src[16], uint8_t out_nh[16]) { + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if (!l2) return false; + + l3_ipv6_interface_t* v6 = best_v6_on_l2_for_dst(l2, dst); + if (!v6) return false; + + if (out_ifx) *out_ifx = l2->ifindex; + if (out_src) ipv6_cpy(out_src, v6->ip); + + uint8_t nh[16]; + ipv6_cpy(nh, dst); + + if (!ipv6_is_linklocal(dst) && v6->prefix_len && ipv6_common_prefix_len(dst, v6->ip) < v6->prefix_len) { + uint8_t via[16] = {0}; + int pl = -1; + int met =0x7FFF; + + if (v6->routing_table && ipv6_rt_lookup_in((const ipv6_rt_table_t*)v6->routing_table, dst, via, &pl, &met)) { + if (!ipv6_is_unspecified(via)) ipv6_cpy(nh, via); + } else if (!ipv6_is_unspecified(v6->gateway) && ipv6_is_linklocal(v6->gateway))ipv6_cpy(nh, v6->gateway); + } + + if (out_nh) ipv6_cpy(out_nh, nh); + return true; +} + +static bool pick_route_global(const uint8_t dst[16], uint8_t* out_ifx, uint8_t out_src[16], uint8_t out_nh[16]) { + int dst_is_ll = ipv6_is_linklocal(dst) ? 1 : 0; + + ip_resolution_result_t r = resolve_ipv6_to_interface(dst); + if (r.found && r.ipv6 && r.l2) { + if (r.ipv6->cfg != IPV6_CFG_DISABLE && + !ipv6_is_unspecified(r.ipv6->ip) && + r.ipv6->dad_state == IPV6_DAD_OK) + { + int src_is_ll = ipv6_is_linklocal(r.ipv6->ip) ? 1 : 0; + if (src_is_ll == dst_is_ll) { + if (out_ifx) *out_ifx = r.l2->ifindex; + if (out_src) ipv6_cpy(out_src, r.ipv6->ip); + + uint8_t nh[16]; + ipv6_cpy(nh, dst); + + if (!dst_is_ll && r.ipv6->prefix_len && ipv6_common_prefix_len(dst, r.ipv6->ip) < r.ipv6->prefix_len) { + uint8_t via[16] = {0}; + int pl = -1; + int met = 0x7FFF; + + if (r.ipv6->routing_table && ipv6_rt_lookup_in((const ipv6_rt_table_t*)r.ipv6->routing_table, dst, via, &pl, &met)) { + if (!ipv6_is_unspecified(via)) ipv6_cpy(nh, via); + } else if (!ipv6_is_unspecified(r.ipv6->gateway) && ipv6_is_linklocal(r.ipv6->gateway)) { + ipv6_cpy(nh, r.ipv6->gateway); + } + } + + if (out_nh) ipv6_cpy(out_nh, nh); + return true; + } + } + } + + uint8_t best_ifx = 0; + uint8_t best_src[16] ={0}; + uint8_t best_nh[16] ={0}; + int best_pl = -1; + int best_cost = 0x7FFFFFFF; + + uint8_t n = l2_interface_count(); + for (uint8_t i = 0; i < n; i++) { + l2_interface_t* l2 = l2_interface_at(i); + if (!l2) continue; + + for (int s = 0; s< MAX_IPV6_PER_INTERFACE; s++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (ipv6_is_unspecified(v6->ip)) continue; + if (v6->dad_state != IPV6_DAD_OK) continue; + + int src_is_ll = ipv6_is_linklocal(v6->ip) ? 1 : 0; + if (src_is_ll != dst_is_ll) continue; + + int pl_conn = -1; + if (!dst_is_ll && v6->prefix_len && ipv6_common_prefix_len(dst, v6->ip) >= v6->prefix_len) pl_conn = v6->prefix_len; + if (dst_is_ll) pl_conn = 128; + + int pl_tab = -1; + int met_tab = 0x7FFF; + uint8_t via[16] = {0}; + + if (!dst_is_ll && v6->routing_table) { + int out_pl = -1; + int out_met = 0x7FFF; + if(ipv6_rt_lookup_in((const ipv6_rt_table_t*)v6->routing_table, dst, via, &out_pl, &out_met)) { + pl_tab = out_pl; + met_tab = out_met; + } + } + + int cand_pl = pl_conn; + int cand_cost = (int)l2->base_metric; + uint8_t cand_nh[16]; + ipv6_cpy(cand_nh, dst); + + if (pl_tab > cand_pl || (pl_tab == cand_pl && ((int)l2->base_metric + met_tab) < cand_cost)) { + cand_pl =pl_tab; + cand_cost = (int)l2->base_metric + met_tab; + if (!ipv6_is_unspecified(via)) ipv6_cpy(cand_nh, via); + } else if (cand_pl < 0) { + if (!ipv6_is_unspecified(v6->gateway) && ipv6_is_linklocal(v6->gateway)) ipv6_cpy(cand_nh, v6->gateway); + } + + if (cand_pl > best_pl || (cand_pl == best_pl && cand_cost < best_cost)) { + best_pl = cand_pl; + best_cost = cand_cost; + best_ifx = l2->ifindex; + ipv6_cpy(best_src, v6->ip); + ipv6_cpy(best_nh, cand_nh); + } + } + } + + if (best_pl < 0) return false; + if (out_ifx) *out_ifx = best_ifx; + if (out_src) ipv6_cpy(out_src, best_src); + if (out_nh) ipv6_cpy(out_nh, best_nh); + return true; +} + +static bool pick_route(const uint8_t dst[16], const ipv6_tx_opts_t* opts, uint8_t* out_ifx, uint8_t out_src[16], uint8_t out_nh[16]) { + if (opts) { + if (opts->scope == IP_TX_BOUND_L3) return pick_route_bound_l3(opts->index, dst, out_ifx, out_src, out_nh); + if (opts->scope == IP_TX_BOUND_L2) return pick_route_bound_l2(opts->index, dst, out_ifx, out_src, out_nh); + } + return pick_route_global(dst, out_ifx, out_src, out_nh); +} + +void ipv6_send_packet(const uint8_t dst[16], uint8_t next_header, netpkt_t* pkt, const ipv6_tx_opts_t* opts, uint8_t hop_limit, uint8_t dontfrag) { + if (!dst || !pkt || !netpkt_len(pkt)) { + if (pkt) netpkt_unref(pkt); + return; + } + + uint8_t ifx = 0; + uint8_t src[16] = {0}; + uint8_t nh[16] = {0}; + + l3_ipv6_interface_t* src_v6 = NULL; + + if (!pick_route(dst, opts, &ifx, src, nh)) { + netpkt_unref(pkt); + return; + } + + if (!ipv6_is_unspecified(src)) { + l2_interface_t* l2 = l2_interface_find_by_index(ifx); + if (!l2) { + netpkt_unref(pkt); + return; + } + + int ok = 0; + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (ipv6_cmp(v6->ip, src) != 0) continue; + if (v6->cfg == IPV6_CFG_DISABLE) { + netpkt_unref(pkt); + return; + } + if (v6->dad_state != IPV6_DAD_OK) { + netpkt_unref(pkt); + return; + } + ok = 1; + src_v6 = v6; + break; + } + if (!ok) { + netpkt_unref(pkt); + return; + } + } + + if (ipv6_is_linklocal(src) && !ipv6_is_linklocal(dst) && !ipv6_is_multicast(dst)) { + netpkt_unref(pkt); + return; + } + + uint8_t dst_mac[6]; + l2_interface_t* l2 = l2_interface_find_by_index(ifx); + if (ipv6_is_multicast(dst)) ipv6_multicast_mac(dst, dst_mac); + else if (l2 && l2->kind == NET_IFK_LOCALHOST) memset(dst_mac, 0, 6); + else if (!ndp_resolve_on(ifx, nh, dst_mac, 200)) { + netpkt_unref(pkt); + return; + } + + uint16_t mtu = 1500; + + if (!src_v6 && opts && opts->scope == IP_TX_BOUND_L3) src_v6 = l3_ipv6_find_by_id(opts->index); + if (src_v6 && src_v6->mtu) mtu = src_v6->mtu; + + uint16_t pmtu = ipv6_pmtu_get(dst); + if (pmtu && pmtu ver_tc_fl = bswap32((uint32_t)(6u << 28)); + ip6->payload_len = bswap16((uint16_t)seg_len); + ip6->next_header = next_header; + ip6->hop_limit = hop_limit ? hop_limit : 64; + memcpy(ip6->src, src, 16); + memcpy(ip6->dst, dst, 16); + + eth_send_frame_on(ifx, ETHERTYPE_IPV6, dst_mac, pkt); + return; + } + + if (dontfrag) { + netpkt_unref(pkt); + return; + } + + uint32_t frag_hdr_len = (uint32_t)sizeof(ipv6_frag_hdr_t); + if ((uint32_t)mtu < hdr_len + frag_hdr_len + 8u) { + netpkt_unref(pkt); + return; + } + + uint32_t max_chunk = (uint32_t)mtu - hdr_len - frag_hdr_len; + max_chunk = (max_chunk / 8u) * 8u; + if (max_chunk == 0) { + netpkt_unref(pkt); + return; + } + + rng_t rng; + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&rng, virt_timer); + uint32_t ident = rng_next32(&rng); + + uint32_t off = 0; + const uint8_t* data = (const uint8_t*)netpkt_data(pkt); + uint32_t data_len = seg_len; + + while (off < data_len) { + uint32_t remain = data_len - off; + uint32_t chunk = (remain > max_chunk)? max_chunk : remain; + uint8_t more = (off + chunk < data_len) ? 1u : 0u; + + uint32_t payload_len = frag_hdr_len + chunk; + uint32_t frame_len = hdr_len + payload_len; + + netpkt_t* fpkt = netpkt_alloc(frame_len, (uint32_t)sizeof(eth_hdr_t), 0); + if (!fpkt) break; + void* buf = netpkt_put(fpkt, frame_len); + if (!buf) { + netpkt_unref(fpkt); + break; + } + + ipv6_hdr_t* ip6 = (ipv6_hdr_t*)buf; + ip6->ver_tc_fl = bswap32((uint32_t)(6u << 28)); + ip6->payload_len = bswap16((uint16_t)payload_len); + ip6->next_header = 44; + ip6->hop_limit = hop_limit ? hop_limit : 64; + memcpy(ip6->src, src, 16); + memcpy(ip6->dst, dst, 16); + + ipv6_frag_hdr_t* fh = (ipv6_frag_hdr_t*)((uintptr_t)buf + hdr_len); + fh->next_header = next_header; + fh->reserved = 0; + uint16_t off_flags = (uint16_t)(((off / 8u) & 0x1FFFu) << 3); + if (more) off_flags |= 0x0001u; + fh->offset_flags = bswap16(off_flags); + fh->identification = bswap32(ident); + + memcpy((uint8_t*)(fh + 1), data + off, chunk); + + eth_send_frame_on(ifx, ETHERTYPE_IPV6, dst_mac, fpkt); + + off += chunk; + } + + netpkt_unref(pkt); +} + +static bool ipv6_skip_ext_headers(uint8_t* nh, uintptr_t* l4, uint32_t* l4_len) { + if (!nh || !l4 || !l4_len) return false; + + for(;;) { + uint8_t h = *nh; + if (h == 44) return true; + + if (h == 0 ||h == 43 || h == 60) { + if (*l4_len < 2) return false; + const uint8_t* p = (const uint8_t*)(*l4); + uint8_t next = p[0]; + uint8_t hlen = p[1]; + uint32_t bytes = (uint32_t)(hlen + 1u)*8; + if (bytes > *l4_len) return false; + *nh = next; + *l4 += bytes; + *l4_len -= bytes; + continue; + } + + if (h == 51) { + if (*l4_len < 2) return false; + const uint8_t* p = (const uint8_t*)(*l4); + uint8_t next = p[0]; + uint8_t plen = p[1]; + uint32_t bytes = ((uint32_t)plen + 2u)*4; + if (bytes > *l4_len) return false; + *nh = next; + *l4 += bytes; + *l4_len -= bytes; + continue; + } + + return true; + } +} + +void ipv6_input(uint16_t ifindex, netpkt_t* pkt, const uint8_t src_mac[6]) { + if (!pkt) return; + uint32_t ip_len = netpkt_len(pkt); + uintptr_t ip_ptr = netpkt_data(pkt); + if (ip_len < sizeof(ipv6_hdr_t)) return; + + ipv6_hdr_t* ip6 = (ipv6_hdr_t*)ip_ptr; + uint32_t v = bswap32(ip6->ver_tc_fl); + if ((v >> 28) != 6) return; + uint32_t now = (uint32_t)get_time(); + for (int i = 0; i < REASS_SLOTS; i++) { + reass_slot_t *s = &g_reass[i]; + if (!s->used) continue; + + if (now - s->first_rx_ms < 60000u) continue; + + if (s->have_first && s->first_pkt_len) { + icmpv6_send_error(s->ifindex, s->dst, s->src, s->first_src_mac, 3, 1, 0, s->first_pkt, s->first_pkt_len); + } + + reass_free(s); + } + + uint16_t payload_len = bswap16(ip6->payload_len); + if ((uint32_t)payload_len + sizeof(ipv6_hdr_t) > ip_len) return; + (void)netpkt_trim(pkt, (uint32_t)payload_len + (uint32_t)sizeof(ipv6_hdr_t)); + ip_len = netpkt_len(pkt); + + if (ipv6_is_linklocal(ip6->src) && + !ipv6_is_linklocal(ip6->dst) && + !ipv6_is_multicast(ip6->dst) && + ip6->next_header != 58){ + bool dst_is_local = false; + + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + if (l2) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (ipv6_cmp(v6->ip, ip6->dst) == 0) { + dst_is_local = true; + break; + } + } + } + + if (!dst_is_local) return; + } + + uintptr_t l4 = ip_ptr + sizeof(ipv6_hdr_t); + uint32_t l4_len = (uint32_t)payload_len; + + if (ipv6_is_linklocal(ip6->dst) && !ipv6_is_unspecified(ip6->src) && !ipv6_is_linklocal(ip6->src)) return; + + if (ifindex && !ipv6_is_unspecified(ip6->src) && src_mac) ndp_table_put_for_l2((uint8_t)ifindex, ip6->src, src_mac, 180000, false); + + uint8_t nh = ip6->next_header; + + if (!ipv6_skip_ext_headers(&nh, &l4, &l4_len)) return; + + if (nh == 44) {//b + if (l4_len < sizeof(ipv6_frag_hdr_t)) return; + + const ipv6_frag_hdr_t* fh = (const ipv6_frag_hdr_t*)l4; + uint8_t inner_nh = fh->next_header; + uint16_t off_flags = bswap16(fh->offset_flags); + uint32_t ident = bswap32(fh->identification); + + uint32_t off = ((uint32_t)(off_flags >> 3) & 0x1FFFu) * 8u; + uint8_t more = (off_flags & 0x0001u) ? 1u : 0u; + + const uint8_t* frag = (const uint8_t*)(fh + 1); + uint32_t frag_len = l4_len-(uint32_t)sizeof(ipv6_frag_hdr_t); + + if (more && (frag_len & 7u)) { + uint8_t invoke_buf[sizeof(ipv6_hdr_t) + sizeof(ipv6_frag_hdr_t) + 8]; + uint32_t inv_len = (uint32_t)sizeof(ipv6_hdr_t) + l4_len; + const uint8_t *inv =(const uint8_t*) ip6; + if (inv_len > sizeof(invoke_buf)) { + memcpy(invoke_buf, ip6, sizeof(ipv6_hdr_t)); + uint32_t cpy = l4_len; + uint32_t max = (uint32_t)sizeof(ipv6_frag_hdr_t) + 8u; + if (cpy > max) cpy = max; + memcpy(invoke_buf + sizeof(ipv6_hdr_t), (void*)l4, cpy); + inv = invoke_buf; + inv_len = (uint32_t)sizeof(invoke_buf); + } + icmpv6_send_error((uint8_t)ifindex, ip6->dst, ip6->src, src_mac, 4, 0, 4u, inv, inv_len); + return; + } + + if (off + frag_len > 65535u) { + uint8_t invoke_buf[sizeof(ipv6_hdr_t) + sizeof(ipv6_frag_hdr_t) + 8]; + uint32_t inv_len = (uint32_t)sizeof(ipv6_hdr_t) + l4_len; + const uint8_t *inv = (const uint8_t*)ip6; + if (inv_len > sizeof(invoke_buf)) { + memcpy(invoke_buf, ip6, sizeof(ipv6_hdr_t)); + uint32_t cpy = l4_len; + uint32_t max = (uint32_t)sizeof(ipv6_frag_hdr_t) + 8u; + if (cpy > max) cpy = max; + memcpy(invoke_buf + sizeof(ipv6_hdr_t), (void*)l4, cpy); + inv = invoke_buf; + inv_len = (uint32_t)sizeof(invoke_buf); + } + icmpv6_send_error((uint8_t)ifindex, ip6->dst, ip6->src, src_mac, 4, 0, 42u, inv, inv_len); + return; + } + + if (off + frag_len > 2048u * 8u) return; + + reass_slot_t* s = NULL; + uint32_t now = (uint32_t)get_time(); + + for (int i = 0; i < REASS_SLOTS; i++) { + reass_slot_t *t = &g_reass[i]; + if (!t->used) continue; + if (t->ifindex != (uint8_t)ifindex) continue; + if (t->ident != ident) continue; + if (t->next_header != inner_nh) continue; + if (ipv6_cmp(t->src, ip6->src) != 0) continue; + if (ipv6_cmp(t->dst, ip6->dst) != 0) continue; + if (now - t->last_update_ms > 60000u) { + if (t->buf) free_sized(t->buf, 2048u * 8u); + memset(t, 0, sizeof(*t)); + continue; + } + s = t; + break; + } + + if (!s) { + for (int i = 0; i < REASS_SLOTS; i++) { + reass_slot_t *t = &g_reass[i]; + if (t->used) continue; + + t->buf = (uint8_t*)malloc(2048u * 8u); + if (!t->buf) return; + + t->used = 1; + t->ifindex = (uint8_t)ifindex; + t->ident = ident; + ipv6_cpy(t->src, ip6->src); + ipv6_cpy(t->dst, ip6->dst); + t->next_header = inner_nh; + t->first_rx_ms = now; + t->last_update_ms = now; + t->total_len = 0; + t->have_last = 0; + t->have_first = 0; + memset(t->first_src_mac, 0, 6); + t->first_pkt_len = 0; + memset(t->first_pkt, 0, sizeof(t->first_pkt)); + memset(t->bitmap, 0, sizeof(t->bitmap)); + s = t; + break; + } + } + + if (!s) return; + int overlap = 0; + uint32_t start = off / 8u; + uint32_t end = (off + frag_len + 7u) / 8u; + if (end > sizeof(s->bitmap)) end = sizeof(s->bitmap); + for (uint32_t i = start; i < end; i++) { + if (s->bitmap[i]) { + overlap = 1; + break; + } + } + if (overlap) { + reass_free(s); + return; + } + + int has_ulh = 0; + if (off == 0) { + uint32_t ulh_off = 0; + uint8_t nh = inner_nh; + int ok = 1; + + while (nh == 0 || nh == 43 || nh == 60 || nh == 51) { + const uint8_t *p = frag + ulh_off; + uint32_t avail = frag_len - ulh_off; + if (avail < 2) { ok = 0; break; } + + uint32_t hlen = 0; + if (nh == 0 || nh == 43 || nh == 60) hlen = ((uint32_t)p[1] + 1u) * 8u; + else hlen = ((uint32_t)p[1] + 2u) * 4u; + + if (hlen > avail) { ok = 0; break; } + + nh = p[0]; + ulh_off += hlen; + } + + if (ok) { + uint32_t need = 1; + if (nh == 6) need = 20; + else if (nh == 17) need = 8; + else if (nh == 58) need = 4; + if (frag_len - ulh_off >= need) has_ulh = 1; + } + } + + if (off == 0 && !has_ulh) { + uint8_t invoke_buf[sizeof(ipv6_hdr_t) + sizeof(ipv6_frag_hdr_t) + 64]; + uint32_t inv_len = (uint32_t)sizeof(ipv6_hdr_t) + l4_len; + const uint8_t *inv = (const uint8_t*)ip6; + if (inv_len > sizeof(invoke_buf)) { + memcpy(invoke_buf, ip6, sizeof(ipv6_hdr_t)); + uint32_t cpy = l4_len; + uint32_t max = (uint32_t)sizeof(ipv6_frag_hdr_t) + 64u; + if (cpy > max) cpy = max; + memcpy(invoke_buf + sizeof(ipv6_hdr_t), (void*)l4, cpy); + inv = invoke_buf; + inv_len = (uint32_t)sizeof(invoke_buf); + } + icmpv6_send_error((uint8_t)ifindex, ip6->dst, ip6->src, src_mac, 4, 3, 0u, inv, inv_len); + reass_free(s); + return; + } + + if (off == 0 && !s->have_first) { + uint32_t inv_len = (uint32_t)sizeof(ipv6_hdr_t) + l4_len; + if (inv_len > sizeof(s->first_pkt)) inv_len = sizeof(s->first_pkt); + memcpy(s->first_pkt, ip6, inv_len); + s->first_pkt_len = (uint16_t)inv_len; + memcpy(s->first_src_mac, src_mac, 6); + s->have_first = 1; + } + + memcpy(s->buf + off, frag, frag_len); + + start = off / 8u; + end = (off + frag_len + 7u) / 8u; + if (end > sizeof(s->bitmap)) end = sizeof(s->bitmap); + for (uint32_t i = start; i < end; i++) s->bitmap[i] = 1; + + s->last_update_ms = (uint32_t)get_time(); + + if (!more) { + s->have_last = 1; + s->total_len = off + frag_len; + } + + int complete = 0; + if (s->have_last) { + uint32_t needed = (s->total_len + 7u) / 8u; + if (needed <= sizeof(s->bitmap)) { + complete = 1; + for (uint32_t i = 0; i < needed; i++) if (!s->bitmap[i]) { + complete = 0; + break; + } + } + } + if (!complete) return; + + + uintptr_t payload_ptr = (uintptr_t)s->buf; + uint32_t payload_size = s->total_len; + if (!ipv6_skip_ext_headers(&inner_nh, &payload_ptr, &payload_size)) { + reass_free(s); + return; + } + + if (inner_nh == 58) { + icmpv6_input(ifindex, ip6->src, ip6->dst, ip6->hop_limit, src_mac, (const uint8_t*)payload_ptr, payload_size); + reass_free(s); + return; + } + + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + if (!l2) { reass_free(s); return; } + + l3_ipv6_interface_t* cand[MAX_IPV6_PER_INTERFACE]; + int ccount = 0; + for (int x = 0; x < MAX_IPV6_PER_INTERFACE; x++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[x]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + cand[ccount++] = v6; + } + if (ccount == 0) { + reass_free(s); + return; + } + + if (ipv6_is_multicast(ip6->dst)) { + int joined = 0; + for (int m = 0; m < l2->ipv6_mcast_count; m++) { + if (ipv6_cmp(l2->ipv6_mcast[m], ip6->dst) == 0) { + joined = 1; + break; + } + } + if (!joined) { + reass_free(s); + return; + } + + for (int i = 0; i < ccount; i++) { + l3_ipv6_interface_t* v6 = cand[i]; + if (!ipv6_is_linklocal(v6->ip) && ipv6_is_linklocal(ip6->dst)) continue; + if (inner_nh == 17) udp_input(IP_VER6, ip6->src, ip6->dst, v6->l3_id, payload_ptr, payload_size); + else if (inner_nh == 6) tcp_input(IP_VER6, ip6->src, ip6->dst, v6->l3_id, payload_ptr, payload_size); + } + + reass_free(s); + return; + } + + int match_count = 0; + uint8_t match_l3id = 0; + for (int i = 0; i < ccount; i++) { + if (ipv6_cmp(cand[i]->ip, ip6->dst) == 0){ + match_count++; + if (match_count == 1) match_l3id = cand[i]->l3_id; + } + } + + if (match_count >= 1) { + if (inner_nh == 6) tcp_input(IP_VER6, ip6->src, ip6->dst, match_l3id, payload_ptr, payload_size); + else if (inner_nh == 17) udp_input(IP_VER6, ip6->src, ip6->dst, match_l3id, payload_ptr, payload_size); + } + + reass_free(s); + return; + } + + if (nh == 58) { + icmpv6_input(ifindex, ip6->src, ip6->dst, ip6->hop_limit, src_mac, (const uint8_t*)l4, l4_len); + return; + } + + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + if (!l2) return; + + l3_ipv6_interface_t* cand[MAX_IPV6_PER_INTERFACE]; + int ccount = 0; + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + cand[ccount++] = v6; + } + if (ccount == 0) return; + + if (ipv6_is_multicast(ip6->dst)) { + int joined = 0; + for (int m = 0; m < l2->ipv6_mcast_count; m++) { + if (ipv6_cmp(l2->ipv6_mcast[m], ip6->dst) == 0) { + joined = 1; + break; + } + } + if (!joined) return; + + for (int i = 0; i < ccount; i++) { + l3_ipv6_interface_t* v6 = cand[i]; + if (!ipv6_is_linklocal(v6->ip) && ipv6_is_linklocal(ip6->dst)) + continue; + + switch (ip6->next_header) { + case 17: + udp_input(IP_VER6, ip6->src, ip6->dst, v6->l3_id, l4, l4_len); + break; + case 6: + tcp_input(IP_VER6, ip6->src, ip6->dst, v6->l3_id, l4, l4_len); + break; + default: + break; + } + } + return; + } + + int match_count = 0; + uint8_t match_l3id = 0; + for (int i = 0; i < ccount; ++i) { + if (ipv6_cmp(cand[i]->ip, ip6->dst) == 0) { + match_count++; + if (match_count == 1) match_l3id = cand[i]->l3_id; + } + } + + if (match_count >= 1) { + switch (ip6->next_header) { + case 6: + tcp_input(IP_VER6, ip6->src, ip6->dst, match_l3id, l4, l4_len); + break; + case 17: + udp_input(IP_VER6, ip6->src, ip6->dst, match_l3id, l4, l4_len); + break; + default: + break; + } + return; + } +} \ No newline at end of file diff --git a/kernel/networking/internet_layer/ipv6.h b/kernel/networking/internet_layer/ipv6.h new file mode 100644 index 00000000..a8e8fd36 --- /dev/null +++ b/kernel/networking/internet_layer/ipv6.h @@ -0,0 +1,32 @@ +#pragma once + +#include "types.h" +#include "net/network_types.h" +#include "networking/netpkt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct __attribute__((packed)) { + uint32_t ver_tc_fl; + uint16_t payload_len; + uint8_t next_header; + uint8_t hop_limit; + uint8_t src[16]; + uint8_t dst[16]; +} ipv6_hdr_t; + +typedef ip_tx_scope_t ipv6_tx_scope_t; +typedef ip_tx_opts_t ipv6_tx_opts_t; + +void ipv6_send_packet(const uint8_t dst[16], uint8_t next_header, netpkt_t* pkt, const ipv6_tx_opts_t* opts, uint8_t hop_limit, uint8_t dontfrag); +void ipv6_input(uint16_t ifindex, netpkt_t* pkt, const uint8_t src_mac[6]); + +uint16_t ipv6_pmtu_get(const uint8_t dst[16]); +void ipv6_pmtu_note(const uint8_t dst[16], uint16_t mtu); + + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/internet_layer/ipv6_route.c b/kernel/networking/internet_layer/ipv6_route.c new file mode 100644 index 00000000..201bcea3 --- /dev/null +++ b/kernel/networking/internet_layer/ipv6_route.c @@ -0,0 +1,313 @@ +#include "ipv6_route.h" +#include "std/memory.h" +#include "std/string.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/interface_manager.h" +#include "syscalls/syscalls.h" + +static bool v6_l3_ok_for_tx(l3_ipv6_interface_t* v6, int dst_is_ll, int dst_is_loop) { + if (!v6 || !v6->l2) return false; + if (!v6->l2->is_up) return false; + if (v6->cfg == IPV6_CFG_DISABLE) return false; + if (v6->is_localhost && !dst_is_loop) return false; + if (ipv6_is_unspecified(v6->ip)) return false; + if (v6->dad_state != IPV6_DAD_OK)return false; + if (!v6->port_manager) return false; + + int src_is_ll = ipv6_is_linklocal(v6->ip) ? 1 : 0; + if (src_is_ll != dst_is_ll) return false; + return true; +} + +static bool l3_allowed(uint8_t id, const uint8_t* allowed, int n) { + if (!allowed || n <= 0) return true; + for (int i = 0; i < n; ++i) if (allowed[i] == id) return true; + return false; +} + +bool ipv6_build_tx_plan(const uint8_t dst[16], const ip_tx_opts_t* hint, const uint8_t* allowed_l3, int allowed_n, ipv6_tx_plan_t* out) { + if (!dst || !out) return false; + + memset(out, 0, sizeof(*out)); + out->fixed_opts.scope = IP_TX_AUTO; + out->fixed_opts.index = 0; + + int dst_is_ll = (ipv6_is_linklocal(dst) || ipv6_is_linkscope_mcast(dst)) ? 1 : 0; + int dst_is_loop = ipv6_is_loopback(dst) ? 1 : 0; + + if (hint && hint->scope == IP_TX_BOUND_L3) { + uint8_t id = hint->index; + if (!l3_allowed(id, allowed_l3, allowed_n)) return false; + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); + if (!v6_l3_ok_for_tx(v6, dst_is_ll, dst_is_loop)) return false; + out->l3_id = id; + memcpy(out->src_ip, v6->ip, 16); + out->fixed_opts.scope = IP_TX_BOUND_L3; + out->fixed_opts.index = id; + return true; + } + + uint8_t cand[64]; + int n = 0; + + if (hint && hint->scope == IP_TX_BOUND_L2) { + l2_interface_t* l2 = l2_interface_find_by_index(hint->index); + if (!l2 || !l2->is_up) return false; + for (int s = 0; s < MAX_IPV6_PER_INTERFACE && n < (int)sizeof(cand); ++s){ + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6_l3_ok_for_tx(v6, dst_is_ll, dst_is_loop)) continue; + if (!l3_allowed(v6->l3_id, allowed_l3, allowed_n)) continue; + cand[n++] = v6->l3_id; + } + } else { + uint8_t cnt = l2_interface_count(); + for (uint8_t i = 0; i < cnt && n < (int)sizeof(cand); ++i){ + l2_interface_t* l2 = l2_interface_at(i); + if (!l2 || !l2->is_up) continue; + for (int s = 0; s < MAX_IPV6_PER_INTERFACE && n < (int)sizeof(cand); ++s){ + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6_l3_ok_for_tx(v6, dst_is_ll, dst_is_loop)) continue; + if (!l3_allowed(v6->l3_id, allowed_l3, allowed_n)) continue; + cand[n++] = v6->l3_id; + } + } + } + + if (n == 0) return false; + + uint8_t chosen = 0; + if (!ipv6_rt_pick_best_l3_in(cand, n, dst, &chosen)) chosen = cand[0]; + + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(chosen); + if (!v6_l3_ok_for_tx(v6, dst_is_ll, dst_is_loop)) return false; + + out->l3_id = chosen; + memcpy(out->src_ip, v6->ip, 16); + out->fixed_opts.scope = IP_TX_BOUND_L3; + out->fixed_opts.index = chosen; + return true; +} + +struct ipv6_rt_table { + ipv6_rt_entry_t e[IPV6_RT_PER_IF_MAX]; + int len; +}; + +ipv6_rt_table_t* ipv6_rt_create(void) { + ipv6_rt_table_t* t = malloc(sizeof(*t)); + if (!t) return 0; + + memset(t, 0, sizeof(*t)); + return t; +} + +void ipv6_rt_destroy(ipv6_rt_table_t* t) { + if (!t) return; + + free_sized(t, sizeof(*t)); +} + +void ipv6_rt_clear(ipv6_rt_table_t* t) { + if (!t) return; + + t->len = 0; + memset(t->e, 0, sizeof(t->e)); +} + +bool ipv6_rt_add_in(ipv6_rt_table_t* t, const uint8_t net[16], uint8_t plen, const uint8_t gw[16], uint16_t metric) { + if (!t) return false; + + for (int i = 0; i < t->len; i++) { + if (t->e[i].prefix_len == plen && memcmp(t->e[i].network, net, 16) == 0) { + memcpy(t->e[i].gateway, gw, 16); + t->e[i].metric = metric; + return true; + } + } + + if (t->len >= IPV6_RT_PER_IF_MAX) return false; + + memcpy(t->e[t->len].network, net, 16); + memcpy(t->e[t->len].gateway, gw, 16); + t->e[t->len].prefix_len = plen; + t->e[t->len].metric = metric; + t->len++; + + return true; +} + +bool ipv6_rt_del_in(ipv6_rt_table_t* t, const uint8_t net[16], uint8_t plen) { + if (!t) return false; + + for (int i = 0; i < t->len; i++) { + if (t->e[i].prefix_len == plen && memcmp(t->e[i].network, net, 16) == 0) { + t->e[i] = t->e[--t->len]; + memset(&t->e[t->len], 0, sizeof(t->e[0])); + return true; + } + } + + return false; +} + +bool ipv6_rt_lookup_in(const ipv6_rt_table_t* t, const uint8_t dst[16], uint8_t next_hop[16], int* out_pl, int* out_metric) { + if (!t) return false; + + int best_pl = -1; + int best_metric = 0x7FFF; + uint8_t best_gw[16] = {0}; + + for (int i = 0; i < t->len; i++) { + bool match = false; + + if (t->e[i].prefix_len == 0) { + match = true; + } else { + int plen = t->e[i].prefix_len; + int fb = plen / 8; + int rb = plen % 8; + + match = true; + for (int j = 0; j < fb; j++) { + if (dst[j] != t->e[i].network[j]) { + match = false; + break; + } + } + + if (match && rb) { + uint8_t m = (uint8_t)(0xFF << (8 - rb)); + if ((dst[fb] & m) != (t->e[i].network[fb] & m)) match = false; + } + } + + if (!match) continue; + + int pl = t->e[i].prefix_len; + int met = t->e[i].metric; + + if (pl > best_pl || (pl == best_pl && met < best_metric)) { + best_pl = pl; + best_metric = met; + memcpy(best_gw, t->e[i].gateway, 16); + } + } + + if (best_pl < 0) return false; + + if (next_hop) memcpy(next_hop, best_gw, 16); + if (out_pl) *out_pl =best_pl; + if (out_metric) *out_metric = best_metric; + + return true; +} + +void ipv6_rt_ensure_basics(ipv6_rt_table_t* t, const uint8_t ip[16], uint8_t plen, const uint8_t gw[16], uint16_t base_metric) { + if (!t) return; + + if (ip && plen &&!ipv6_is_unspecified(ip)) { + uint8_t net[16]; + ipv6_cpy(net, ip); + + if (plen < 128) { + int fb = plen / 8; + int rb = plen % 8; + + for (int i = fb + (rb > 0); i < 16; i++) net[i] = 0; + + if (rb) { + uint8_t m = (uint8_t)(0xFF <<(8 - rb)); + net[fb] &=m; + } + } + + ipv6_rt_add_in(t, net, plen, (const uint8_t[16]){0}, base_metric); + } + + if (gw && !ipv6_is_unspecified(gw)) { + uint8_t z[16] = {0}; + ipv6_rt_add_in(t, z, 0, gw, (uint16_t)(base_metric + 1)); + } +} + +void ipv6_rt_sync_basics(ipv6_rt_table_t* t, const uint8_t ip[16], uint8_t plen, const uint8_t gw[16], uint16_t base_metric) { + if (!t) return; + + uint8_t z[16] = {0}; + + if (gw && !ipv6_is_unspecified(gw)) ipv6_rt_add_in(t, z, 0,gw, (uint16_t)(base_metric + 1)); + else ipv6_rt_del_in(t, z, 0); + + if (ip && plen && !ipv6_is_unspecified(ip)) { + uint8_t net[16]; + ipv6_cpy(net, ip); + + if (plen < 128) { + int fb = plen / 8; + int rb = plen % 8; + + for (int i = fb + (rb > 0); i < 16; i++)net[i] = 0; + + if (rb) { + uint8_t m = (uint8_t)(0xFF << (8 - rb)); + net[fb] &= m; + } + } + + ipv6_rt_add_in(t, net, plen, (const uint8_t[16]) {0}, base_metric); + } +} + +bool ipv6_rt_pick_best_l3_in(const uint8_t* l3_ids, int n_ids, const uint8_t dst[16], uint8_t* out_l3) { + int best_pl = -1; + int best_cost = 0x7FFFFFFF; + uint8_t best_l3 = 0; + + for (int i = 0; i < n_ids; i++) { + l3_ipv6_interface_t* x = l3_ipv6_find_by_id(l3_ids[i]); + if (!x || !x->l2)continue; + if (x->cfg == IPV6_CFG_DISABLE) continue; + if (ipv6_is_unspecified(x->ip)) continue; + + int l2base = x->l2->base_metric; + + int pl_conn = -1; + if (x->prefix_len) { + int pl = ipv6_common_prefix_len(dst, x->ip); + if (pl >= x->prefix_len) pl_conn = x->prefix_len; + } + + int pl_tab = -1; + int met_tab = 0x7FFF; + + if (x->routing_table) { + uint8_t via[16] = {0}; + int out_pl = -1; + int out_met = 0x7FFF; + + if (ipv6_rt_lookup_in((const ipv6_rt_table_t*)x->routing_table, dst, via, &out_pl, &out_met)) { + pl_tab = out_pl; + met_tab = out_met; + } + } + + int cand_pl = pl_conn; + int cand_cost = l2base; + + if (pl_tab > cand_pl || (pl_tab == cand_pl && l2base + met_tab < cand_cost)) { + cand_pl = pl_tab; + cand_cost = l2base + met_tab; + } + + if (cand_pl > best_pl || (cand_pl == best_pl && cand_cost = 0; --bpos) { + if (x & (1u << bpos)) return bits + (7 - bpos); + } + } + return 128; +} + +void ipv6_make_multicast(uint8_t scope, ipv6_mcast_kind_t kind, const uint8_t unicast[16], uint8_t out[16]) { + memset(out, 0, 16); + out[0] = 0xFF; + out[1] = scope & 0x0F; + + switch (kind) { + case IPV6_MCAST_ALL_NODES: + out[15] = 0x01; + break; + case IPV6_MCAST_MDNS: + out[15] = 0xFB; + break; + case IPV6_MCAST_ALL_ROUTERS: + out[15] = 0x02; + break; + case IPV6_MCAST_SSDP: + out[15] = 0x0c; + break; + case IPV6_MCAST_DHCPV6_SERVERS: + out[11] = 0x00; + out[12] = 0x01; + out[13] = 0x00; + out[14] = 0x00; + out[15] = 0x02; + break; + case IPV6_MCAST_MLDV2_ROUTERS: + out[11] = 0x00; + out[12] = 0x00; + out[13] = 0x00; + out[14] = 0x00; + out[15] = 0x16; + break; + case IPV6_MCAST_SOLICITED_NODE: + default: + out[11] = 0x01; + out[12] = 0xFF; + out[13] = unicast ? unicast[13] : 0; + out[14] = unicast ? unicast[14] : 0; + out[15] = unicast ? unicast[15] : 0; + break; + } +} + +static int hexval(int c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + +bool ipv6_parse(const char* s, uint8_t out[16]) { + if (!s || !out) return false; + + uint16_t words[8] = { 0 }; + int wi = 0, zpos = -1; + const char* p = s; + + if (p[0] == ':' && p[1] == ':') { + zpos = 0; + p += 2; + } + + while (*p) { + if (wi >= 8) return false; + + int val = 0, cnt = 0, hv; + while ((hv = hexval(*p)) >= 0) { + val = (val << 4) | hv; + cnt++; + if (cnt > 4) return false; + p++; + } + if (cnt == 0) return false; + + words[wi++] = (uint16_t)val; + + if (*p == 0) break; + if (*p != ':') return false; + + if (*(p + 1) == ':') { + if (zpos >= 0) return false; + zpos = wi; + p += 2; + if (*p == 0) break; + } else { + p++; + } + } + + int fill = 8 - wi, o = 0; + if (zpos < 0 && wi != 8) return false; + + for (int i = 0; i < 8; i++) { + uint16_t w; + if (zpos >= 0) { + if (i < zpos) w = words[i]; + else if (i < zpos + fill) w = 0; + else w = words[i - fill]; + } else { + w = words[i]; + } + out[o++] = (uint8_t)(w >> 8); + out[o++] = (uint8_t)(w & 0xFF); + } + + return true; +} + +void ipv6_to_string(const uint8_t ip[16], char* buf, int buflen) { + uint16_t w[8]; + for (int i = 0; i < 8; i++) w[i] = (uint16_t)((ip[2 * i] << 8) | ip[2 * i + 1]); + + int best_s = -1, best_l = 0, cur_s = -1, cur_l = 0; + for (int i = 0; i < 8; i++) { + if (w[i] == 0) { + if (cur_s < 0) { cur_s = i; cur_l = 1; } else cur_l++; + if (cur_l > best_l) { + best_l = cur_l; + best_s = cur_s; + } + } else { + cur_s = -1; + cur_l = 0; + } + } + if (best_l <2) { + best_s = -1; + best_l = 0; + } + + int n = 0; + int need_colon = 0; + + for (int i = 0; i < 8; ) { + if (best_l > 0 && i == best_s) { + if (n < buflen) buf[n++] = ':'; + if (n < buflen) buf[n++] = ':'; + need_colon = 0; + i += best_l; + if (i >= 8) break; + continue; + } + + if (need_colon) { + if (n < buflen) buf[n++] = ':'; + } + + int v = w[i]; + int started = 0; + for (int sh = 12; sh >= 0; sh -= 4) { + int d = (v >> sh) & 0xF; + if (!started && d == 0 && sh > 0) continue; + started = 1; + if (n < buflen) buf[n++] = "0123456789abcdef"[d]; + } + if (!started) { + if (n < buflen) buf[n++] = '0'; + } + + need_colon = 1; + i++; + } + + if (n < buflen) buf[n] = 0; + else if (buflen > 0) buf[buflen - 1] = 0; +} + +void ipv6_multicast_mac(const uint8_t ip[16], uint8_t mac[6]) { + mac[0] = 0x33; + mac[1] = 0x33; + mac[2] = ip[12]; + mac[3] = ip[13]; + mac[4] = ip[14]; + mac[5] = ip[15]; +} + +void ipv6_make_lla_from_mac(uint8_t ifindex, uint8_t out[16]) { + const uint8_t* mac = network_get_mac(ifindex); + memset(out, 0, 16); + out[0] = 0xFE; + out[1] = 0x80; + if (!mac) { + out[8] = 0x02 ^ 0x02; + out[9] = ifindex; + out[10] = 0x00; + out[11] = 0xFF; + out[12] = 0xFE; + out[13] = ifindex; + out[14] = 0x00; + out[15] = 0x01; + return; + } + out[8] = mac[0]^ 0x02; + out[9] = mac[1]; + out[10] = mac[2]; + out[11] = 0xFF; + out[12] = 0xFE; + out[13] = mac[3]; + out[14] = mac[4]; + out[15] = mac[5]; +} \ No newline at end of file diff --git a/kernel/networking/internet_layer/ipv6_utils.h b/kernel/networking/internet_layer/ipv6_utils.h new file mode 100644 index 00000000..69ee0724 --- /dev/null +++ b/kernel/networking/internet_layer/ipv6_utils.h @@ -0,0 +1,55 @@ +#pragma once +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + IPV6_MCAST_SOLICITED_NODE = 0, + IPV6_MCAST_ALL_NODES= 1, + IPV6_MCAST_ALL_ROUTERS = 2, + IPV6_MCAST_DHCPV6_SERVERS = 3, + IPV6_MCAST_SSDP = 4, + IPV6_MCAST_MDNS = 5, + IPV6_MCAST_MLDV2_ROUTERS = 6, +} ipv6_mcast_kind_t; + +bool ipv6_is_unspecified(const uint8_t ip[16]); +bool ipv6_is_loopback(const uint8_t ip[16]); +bool ipv6_is_multicast(const uint8_t ip[16]); +bool ipv6_is_ula(const uint8_t ip[16]); +bool ipv6_is_linklocal(const uint8_t ip[16]); +int ipv6_cmp(const uint8_t a[16], const uint8_t b[16]); +void ipv6_cpy(uint8_t dst[16], const uint8_t src[16]); +int ipv6_common_prefix_len(const uint8_t a[16], const uint8_t b[16]); +void ipv6_make_multicast(uint8_t scope, ipv6_mcast_kind_t kind, const uint8_t unicast[16], uint8_t out[16]); +void ipv6_to_string(const uint8_t ip[16], char* buf, int buflen); +bool ipv6_parse(const char* s, uint8_t out[16]); +void ipv6_multicast_mac(const uint8_t ip[16], uint8_t mac[6]); +void ipv6_make_lla_from_mac(uint8_t ifindex, uint8_t out[16]); + +static inline int ipv6_is_placeholder_gua(const uint8_t ip[16]) { + if (!ip) return 0; + if (ip[0] != 0x20 || ip[1] != 0x00) return 0; + for (int i = 2; i < 16; i++) if (ip[i] != 0) return 0; + return 1; +} + +static inline void ipv6_make_placeholder_gua(uint8_t out[16]) { + if (!out) return; + for (int i = 0; i < 16; i++) out[i] = 0; + out[0] = 0x20; + out[1] = 0x00; +} + +static inline bool ipv6_is_linkscope_mcast(const uint8_t ip[16]){ + if (!ip) return false; + if (ip[0] != 0xFF) return false; + uint8_t scope = (uint8_t)(ip[1] & 0x0F); + return scope == 0x02; +} + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/internet_layer/mld.c b/kernel/networking/internet_layer/mld.c new file mode 100644 index 00000000..33413473 --- /dev/null +++ b/kernel/networking/internet_layer/mld.c @@ -0,0 +1,393 @@ +#include "networking/internet_layer/mld.h" + +#include "kernel_processes/kprocess_loader.h" +#include "math/rng.h" +#include "networking/interface_manager.h" +#include "net/checksums.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/link_layer/eth.h" +#include "std/memory.h" +#include "syscalls/syscalls.h" + +#define MLD_TYPE_QUERY 130 +#define MLD_TYPE_REPORT_V1 131 +#define MLD_TYPE_DONE_V1 132 +#define MLD_TYPE_REPORT_V2 143 + +#define MLDV2_RTYPE_MODE_IS_INCLUDE 1 +#define MLDV2_RTYPE_MODE_IS_EXCLUDE 2 +#define MLDV2_RTYPE_CHANGE_TO_INCLUDE 3 +#define MLDV2_RTYPE_CHANGE_TO_EXCLUDE 4 +#define MLDV2_RTYPE_ALLOW_NEW_SOURCES 5 +#define MLDV2_RTYPE_BLOCK_OLD_SOURCES 6 + +typedef struct { + uint8_t used; + uint8_t ifindex; + uint8_t group[16]; + uint32_t refresh_ms; + uint32_t query_due_ms; + uint8_t query_pending; +} mld_state_t; + +static volatile int mld_daemon_running = 0; +static uint32_t mld_uptime_ms = 0; +static rng_t mld_rng; +static int mld_rng_inited = 0; + +#define MLD_MAX_TRACK 64 +#define MLD_REFRESH_PERIOD_MS 60000 + +static mld_state_t mld_states[MLD_MAX_TRACK]; + +static bool mld_pick_src_ip(uint8_t ifindex, uint8_t out_src_ip[16]); +static mld_state_t* mld_find_state(uint8_t ifindex, const uint8_t group[16]); + +static int mld_is_our_src(uint8_t ifindex, const uint8_t src_ip[16]) { + uint8_t my_ip[16]; + if(!mld_pick_src_ip(ifindex, my_ip)) return 0; + return (ipv6_cmp(my_ip, src_ip) == 0); +} + +static void mld_suppress_pending(uint8_t ifindex, const uint8_t src_ip[16], const uint8_t group[16]) { + if(mld_is_our_src(ifindex, src_ip)) return; + + mld_state_t* s = mld_find_state(ifindex, group); + if(!s) return; + if(!s->query_pending) return; + + s->query_pending = 0; + s->query_due_ms = 0; +} + +static bool mld_pick_src_ip(uint8_t ifindex, uint8_t out_src_ip[16]) { + l2_interface_t *l2; + l3_ipv6_interface_t *best; + l3_ipv6_interface_t *v6; + uint8_t i; + + l2 = l2_interface_find_by_index(ifindex); + if(!l2) return false; + + best = NULL; + for(i = 0; i < l2->ipv6_count; i++) { + v6 = l2->l3_v6[i]; + if(!v6) continue; + if(ipv6_is_unspecified(v6->ip)) continue; + if(ipv6_is_linklocal(v6->ip)) { + + + memcpy(out_src_ip, v6->ip, 16); + return true; + } + if(!best) best = v6; + } + + if(!best) return false; + memcpy(out_src_ip, best->ip, 16); + return true; +} + +static bool mld_send_report(uint8_t ifindex, const uint8_t group[16], uint8_t record_type) { + uint8_t src_ip[16]; + uint8_t dst_ip[16]; + uint8_t dst_mac[6]; + uint8_t icmp[28]; + + if(!mld_pick_src_ip(ifindex, src_ip)) return false; + + ipv6_make_multicast(2, IPV6_MCAST_MLDV2_ROUTERS, NULL, dst_ip); + ipv6_multicast_mac(dst_ip, dst_mac); + + memset(icmp, 0, sizeof(icmp)); + icmp[0] = MLD_TYPE_REPORT_V2; + icmp[6] = 0; + icmp[7] = 1; + + icmp[8] = record_type; + icmp[9] = 0; + icmp[10] = 0; + icmp[11] = 0; + memcpy(icmp + 12, group, 16); + + uint16_t csum = checksum16_pipv6(src_ip, dst_ip, 58, icmp, sizeof(icmp)); + icmp[2] = (uint8_t)(csum >> 8); + icmp[3] = (uint8_t)(csum & 0xFF); + + uint8_t hbh[8]; + hbh[0] = 58; + hbh[1] = 0; + hbh[2] = 5; + hbh[3] = 2; + hbh[4] = 0; + hbh[5] = 0; + hbh[6] = 0; + hbh[7] = 0; + + uint32_t payload_len = (uint32_t)sizeof(hbh) + (uint32_t)sizeof(icmp); + uint32_t total = (uint32_t)sizeof(ipv6_hdr_t) + payload_len; + uint32_t headroom = (((uint32_t)sizeof(eth_hdr_t) + 7u) & ~7u); + + netpkt_t* pkt = netpkt_alloc(total, headroom, 0); + if(!pkt) return false; + + ipv6_hdr_t* ip6 = (ipv6_hdr_t*)netpkt_put(pkt, (uint32_t)sizeof(ipv6_hdr_t)); + if(!ip6) { + netpkt_unref(pkt); + return false; + } + + ((uint8_t*)&ip6->ver_tc_fl)[0] = 0x60; + ((uint8_t*)&ip6->ver_tc_fl)[1] = 0x00; + ((uint8_t*)&ip6->ver_tc_fl)[2] = 0x00; + ((uint8_t*)&ip6->ver_tc_fl)[3] = 0x00; + + ip6->payload_len = bswap16((uint16_t)payload_len); + ip6->next_header = 0; + ip6->hop_limit = 1; + memcpy(ip6->src, src_ip, 16); + memcpy(ip6->dst, dst_ip, 16); + + uint8_t* hb = (uint8_t*)netpkt_put(pkt, (uint32_t)sizeof(hbh)); + if(!hb) { + netpkt_unref(pkt); + return false; + } + memcpy(hb, hbh, sizeof(hbh)); + + uint8_t* icmp_p = (uint8_t*)netpkt_put(pkt, (uint32_t)sizeof(icmp)); + if(!icmp_p) { + netpkt_unref(pkt); + return false; + } + memcpy(icmp_p, icmp, sizeof(icmp)); + + return eth_send_frame_on(ifindex, ETHERTYPE_IPV6, dst_mac, pkt); +} + +static mld_state_t* mld_find_state(uint8_t ifindex, const uint8_t group[16]) { + for(int i = 0; i < MLD_MAX_TRACK; i++) { + mld_state_t* s = &mld_states[i]; + if(!s->used) continue; + if(s->ifindex != ifindex) continue; + if(ipv6_cmp(s->group, group) == 0) return s; + } + return NULL; +} + +static mld_state_t* mld_get_state(uint8_t ifindex, const uint8_t group[16]) { + mld_state_t* s = mld_find_state(ifindex, group); + if(s) return s; + + for(int i = 0; i < MLD_MAX_TRACK; i++) { + if(!mld_states[i].used) { + mld_states[i].used = 1; + mld_states[i].ifindex = ifindex; + ipv6_cpy(mld_states[i].group, group); + mld_states[i].refresh_ms = 0; + mld_states[i].query_due_ms = 0; + mld_states[i].query_pending = 0; + return &mld_states[i]; + } + } + + return NULL; +} + +static int mld_has_pending_timers(void) { + for(int i = 0; i < MLD_MAX_TRACK; i++) { + mld_state_t* s =&mld_states[i]; + if(!s->used) continue; + if(s->query_pending) return 1; + if(s->refresh_ms < MLD_REFRESH_PERIOD_MS) return 1; + } + return 0; +} + +static int mld_is_still_joined(uint8_t ifindex, const uint8_t group[16]) { + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if(!l2) return 0; + + for(int i = 0; i < (int)l2->ipv6_mcast_count; i++) { + if(ipv6_cmp(l2->ipv6_mcast[i], group) == 0) return 1; + } + + return 0; +} + +static int mld_daemon_entry(int argc, char* argv[]) { + (void)argc; + (void)argv; + + mld_daemon_running = 1; + + if(! mld_rng_inited) { + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&mld_rng, virt_timer); + mld_rng_inited = 1; + } + + const uint32_t tick_ms = 100; + + while(mld_has_pending_timers()) { + mld_uptime_ms += tick_ms; + + for(int i = 0; i < MLD_MAX_TRACK; i++) { + mld_state_t* s = &mld_states[i]; + if(!s->used) continue; + + if(!mld_is_still_joined(s->ifindex, s->group)) { + s->used = 0; + continue; + } + + s->refresh_ms += tick_ms; + if(s->refresh_ms >= MLD_REFRESH_PERIOD_MS) { + s->refresh_ms = 0; + (void)mld_send_report(s->ifindex, s->group, MLDV2_RTYPE_MODE_IS_EXCLUDE); + } + + if(s->query_pending && mld_uptime_ms >= s->query_due_ms) { + s->query_pending = 0; + (void)mld_send_report(s->ifindex, s->group, MLDV2_RTYPE_MODE_IS_EXCLUDE); + } + } + + msleep(tick_ms); + } + + mld_daemon_running = 0; + return 0; +} + +static void mld_daemon_kick(void) { + if(mld_daemon_running) return; + if(!mld_has_pending_timers()) return; + create_kernel_process("mld_daemon", mld_daemon_entry, 0, 0); +} + +bool mld_send_join(uint8_t ifindex, const uint8_t group[16]) { + if(!ipv6_is_multicast(group)) return false; + + mld_state_t* s = mld_get_state(ifindex, group); + if(s) s->refresh_ms = 0; + mld_daemon_kick(); + + return mld_send_report(ifindex, group, MLDV2_RTYPE_MODE_IS_EXCLUDE); +} + +bool mld_send_leave(uint8_t ifindex, const uint8_t group[16]) { + if(!ipv6_is_multicast(group)) return false; + + mld_state_t* s = mld_find_state(ifindex, group); + if(s) s->used = 0; + mld_daemon_kick(); + + return mld_send_report(ifindex, group, MLDV2_RTYPE_MODE_IS_INCLUDE); +} + +static void schedule_report(uint8_t ifindex, const uint8_t group[16], uint16_t max_resp_ms) { + if(!ipv6_is_multicast(group)) return; + + if(!mld_rng_inited) { + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&mld_rng, virt_timer); + mld_rng_inited = 1; + } + + mld_state_t* s = mld_get_state(ifindex, group); + if(!s) return; + + uint32_t max_ms = (uint32_t)max_resp_ms; + if(max_ms == 0) max_ms = 100; + + uint32_t delay = rng_between32(&mld_rng, 0, max_ms); + uint32_t due = mld_uptime_ms + delay; + + if(!s->query_pending || due < s->query_due_ms) { + s->query_pending = 1; + s->query_due_ms = due; + } + + mld_daemon_kick(); +} + +void mld_input(uint8_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], const void* l4, uint32_t l4_len) { + if(!ifindex || !src_ip || !dst_ip || !l4) return; + if(l4_len < 8) return; + + const uint8_t* p = (const uint8_t*)l4; + uint8_t type = p[0]; + + if(type == MLD_TYPE_REPORT_V2) { + if(l4_len < 8) return; + uint16_t nrec = (uint16_t)((uint16_t)p[6] << 8) | (uint16_t)p[7]; + uint32_t off = 8; + + for(uint16_t i = 0; i < nrec; i++) { + if(off + 20u > l4_len) break; + + uint8_t rtype = p[off + 0]; + uint8_t aux_words = p[off + 1]; + uint16_t nsrc = (uint16_t)((uint16_t)p[off + 2] << 8) | (uint16_t)p[off + 3]; + const uint8_t* group = p + off + 4; + off += 20; + + uint32_t src_bytes = (uint32_t)nsrc * 16u; + if(off + src_bytes > l4_len) break; + off += src_bytes; + + uint32_t aux_bytes = (uint32_t)aux_words * 4u; + if(off + aux_bytes > l4_len) break; + off += aux_bytes; + + int interest = 0; + if(rtype == MLDV2_RTYPE_MODE_IS_EXCLUDE || rtype == MLDV2_RTYPE_CHANGE_TO_EXCLUDE || rtype == MLDV2_RTYPE_ALLOW_NEW_SOURCES) { + interest = 1; + } else if((rtype == MLDV2_RTYPE_MODE_IS_INCLUDE || rtype == MLDV2_RTYPE_CHANGE_TO_INCLUDE) && nsrc) { + interest = 1; + } + if(!interest) continue; + + mld_suppress_pending(ifindex, src_ip, group); + } + return; + } + + if(type == MLD_TYPE_REPORT_V1) { + if(l4_len < 24) return; + uint8_t group[16]; + memcpy(group, p + 8, 16); + if(ipv6_is_multicast(group)) mld_suppress_pending(ifindex, src_ip, group); + return; + } + + if(type != MLD_TYPE_QUERY) return; + if(l4_len < 24) return; + + uint16_t max_resp_ms = (uint16_t)((uint16_t)p[4] << 8) | (uint16_t)p[5]; + + uint8_t group[16]; + memcpy(group, p + 8, 16); + + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if(!l2) return; + + if(ipv6_is_unspecified(group)) { + for(int i = 0; i < (int)l2->ipv6_mcast_count; i++) { + const uint8_t* g = l2->ipv6_mcast[i]; + if(ipv6_is_multicast(g)) schedule_report(ifindex, g, max_resp_ms); + } + return; + } + + for(int i = 0; i < (int)l2->ipv6_mcast_count; i++) { + if(ipv6_cmp(l2->ipv6_mcast[i], group) == 0) { + schedule_report(ifindex, group, max_resp_ms); + return; + } + } +} diff --git a/kernel/networking/internet_layer/mld.h b/kernel/networking/internet_layer/mld.h new file mode 100644 index 00000000..35269e92 --- /dev/null +++ b/kernel/networking/internet_layer/mld.h @@ -0,0 +1,17 @@ +#pragma once + +#include "types.h" +#include "net/network_types.h" +#include "networking/netpkt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool mld_send_join(uint8_t ifindex, const uint8_t group[16]); +bool mld_send_leave(uint8_t ifindex, const uint8_t group[16]); +void mld_input(uint8_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], const void* l4, uint32_t l4_len); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/link_layer/arp.c b/kernel/networking/link_layer/arp.c index e329a1d9..a1534b3c 100644 --- a/kernel/networking/link_layer/arp.c +++ b/kernel/networking/link_layer/arp.c @@ -160,12 +160,15 @@ void arp_send_request_on(uint8_t ifindex, uint32_t target_ip){ memcpy(hdr.sender_mac, local_mac, 6); hdr.sender_ip = bswap32(spa); hdr.target_ip = bswap32(target_ip); - uintptr_t buf = (uintptr_t)malloc(sizeof(hdr)); - if (!buf) return; - memcpy((void*)buf, &hdr, sizeof(hdr)); - sizedptr payload = { buf, sizeof(hdr) }; - (void)eth_send_frame_on(ifindex, ETHERTYPE_ARP, dst_mac, payload); - free_sized((void*)buf, sizeof(hdr)); + netpkt_t* pkt = netpkt_alloc((uint32_t)sizeof(hdr), (uint32_t)sizeof(eth_hdr_t), 0); + if (!pkt) return; + void* p = netpkt_put(pkt, (uint32_t)sizeof(hdr)); + if (!p) { + netpkt_unref(pkt); + return; + } + memcpy(p, &hdr, sizeof(hdr)); + (void)eth_send_frame_on(ifindex, ETHERTYPE_ARP, dst_mac, pkt); } static bool l2_has_ip(uint8_t ifindex, uint32_t ip){ @@ -191,16 +194,22 @@ static void arp_send_reply_on(uint8_t ifindex, const arp_hdr_t* in_arp, const ui reply.target_ip = in_arp->sender_ip; reply.sender_ip = bswap32(spa); reply.opcode = bswap16(ARP_OPCODE_REPLY); - uintptr_t buf = (uintptr_t)malloc(sizeof(reply)); - if (!buf) return; - memcpy((void*)buf, &reply, sizeof(reply)); - sizedptr payload = { buf, sizeof(reply) }; - (void)eth_send_frame_on(ifindex, ETHERTYPE_ARP, in_src_mac, payload); - free_sized((void*)buf, sizeof(reply)); + netpkt_t* pkt = netpkt_alloc((uint32_t)sizeof(reply), (uint32_t)sizeof(eth_hdr_t), 0); + if (!pkt) return; + void* p = netpkt_put(pkt, (uint32_t)sizeof(reply)); + if (!p) { + netpkt_unref(pkt); + return; + } + memcpy(p, &reply, sizeof(reply)); + (void)eth_send_frame_on(ifindex, ETHERTYPE_ARP, in_src_mac, pkt); } -void arp_input(uint16_t ifindex, uintptr_t frame_ptr, uint32_t frame_len){ - if (frame_len < sizeof(eth_hdr_t) + sizeof(arp_hdr_t)) return; +void arp_input(uint16_t ifindex, netpkt_t* pkt) { + if (!pkt) return; + uint32_t frame_len = netpkt_len(pkt); + uintptr_t frame_ptr = netpkt_data(pkt); + if (frame_len < (uint32_t)sizeof(eth_hdr_t) + (uint32_t)sizeof(arp_hdr_t)) return; const eth_hdr_t* eth = (const eth_hdr_t*)frame_ptr; const uint8_t* src_mac = eth->src_mac; diff --git a/kernel/networking/link_layer/arp.h b/kernel/networking/link_layer/arp.h index 0f3e11ba..badfb6ee 100644 --- a/kernel/networking/link_layer/arp.h +++ b/kernel/networking/link_layer/arp.h @@ -1,6 +1,7 @@ #pragma once #include "types.h" #include "networking/interface_manager.h" +#include "networking/netpkt.h" #ifdef __cplusplus extern "C" { @@ -43,7 +44,7 @@ void arp_tick_all(uint32_t ms); bool arp_resolve_on(uint8_t ifindex, uint32_t ip, uint8_t mac_out[6], uint32_t timeout_ms); void arp_send_request_on(uint8_t ifindex, uint32_t target_ip); -void arp_input(uint16_t ifindex, uintptr_t frame_ptr, uint32_t frame_len); +void arp_input(uint16_t ifindex, netpkt_t* pkt); void arp_set_pid(uint16_t pid); uint16_t arp_get_pid(void); diff --git a/kernel/networking/link_layer/eth.c b/kernel/networking/link_layer/eth.c index d18cd5ad..e50400fc 100644 --- a/kernel/networking/link_layer/eth.c +++ b/kernel/networking/link_layer/eth.c @@ -3,23 +3,17 @@ #include "networking/network.h" #include "arp.h" #include "networking/internet_layer/ipv4.h" -//#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv6.h" #include "console/kio.h" #include "syscalls/syscalls.h" - - -uintptr_t create_eth_packet(uintptr_t p, - const uint8_t src_mac[6], - const uint8_t dst_mac[6], - uint16_t type) -{ +uintptr_t create_eth_packet(uintptr_t p, const uint8_t src_mac[6], const uint8_t dst_mac[6], uint16_t type) { eth_hdr_t* eth =(eth_hdr_t*)p; memcpy(eth->dst_mac, dst_mac, 6); memcpy(eth->src_mac, src_mac, 6); eth->ethertype = bswap16(type); - return p + sizeof(eth_hdr_t); + return p + (uint32_t)sizeof(eth_hdr_t); } uint16_t eth_parse_type(uintptr_t ptr){ @@ -37,39 +31,47 @@ const uint8_t* eth_dst(uintptr_t ptr){ return eth->dst_mac; } -bool eth_send_frame_on(uint16_t ifindex, uint16_t ethertype, const uint8_t dst_mac[6], sizedptr payload){ +bool eth_send_frame_on(uint16_t ifindex, uint16_t ethertype, const uint8_t dst_mac[6], netpkt_t* pkt){ const uint8_t* src_mac = network_get_mac(ifindex); - if (!src_mac || !dst_mac) return false; - - uint32_t total = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)payload.size; - uintptr_t buf = (uintptr_t)malloc(total); - if (!buf) return false; + if (!src_mac || !dst_mac || !pkt) { + if (pkt) netpkt_unref(pkt); + return false; + } - uintptr_t ptr = create_eth_packet(buf, src_mac, dst_mac, ethertype); + void* hdrp = netpkt_push(pkt, (uint32_t)sizeof(eth_hdr_t)); + if (!hdrp) { + netpkt_unref(pkt); + return false; + } - if (payload.size) memcpy((void*)ptr, (const void*)payload.ptr, payload.size); + (void)create_eth_packet((uintptr_t)hdrp, src_mac, dst_mac, ethertype); - bool ok = (net_tx_frame_on(ifindex, buf, total) == 0); - free_sized((void*)buf, total); + bool ok = (net_tx_frame_on(ifindex, netpkt_data(pkt), netpkt_len(pkt)) == 0); + netpkt_unref(pkt); return ok; } -void eth_input(uint16_t ifindex, uintptr_t frame_ptr, uint32_t frame_len){ +void eth_input(uint16_t ifindex, netpkt_t* pkt) { + if (!pkt) return; + uint32_t frame_len = netpkt_len(pkt); + uintptr_t frame_ptr = netpkt_data(pkt); if (frame_len < sizeof(eth_hdr_t)) return; uint16_t type = eth_parse_type(frame_ptr); const uint8_t* src_mac = eth_src(frame_ptr); - uintptr_t payload_ptr = frame_ptr + sizeof(eth_hdr_t); - uint32_t payload_len = frame_len - (uint32_t)sizeof(eth_hdr_t); + switch (type) { case ETHERTYPE_ARP: - arp_input(ifindex, frame_ptr, frame_len); + arp_input(ifindex, pkt); break; case ETHERTYPE_IPV4: - ipv4_input(ifindex, payload_ptr, payload_len, src_mac); + if (!netpkt_pull(pkt, (uint32_t)sizeof(eth_hdr_t))) break; + ipv4_input(ifindex, pkt, src_mac); break; - case ETHERTYPE_IPV6: //TODO IPV6 + case ETHERTYPE_IPV6: + if (!netpkt_pull(pkt, (uint32_t)sizeof(eth_hdr_t))) break; + ipv6_input(ifindex, pkt, src_mac); break; case ETHERTYPE_VLAN1Q: //TODO vlan break; diff --git a/kernel/networking/link_layer/eth.h b/kernel/networking/link_layer/eth.h index 2ffdedd4..38dc465b 100644 --- a/kernel/networking/link_layer/eth.h +++ b/kernel/networking/link_layer/eth.h @@ -1,6 +1,7 @@ #pragma once #include "types.h" #include "net/network_types.h" +#include "networking/netpkt.h" #ifdef __cplusplus extern "C" { @@ -22,9 +23,9 @@ uint16_t eth_parse_type(uintptr_t frame_ptr); const uint8_t* eth_src(uintptr_t frame_ptr); const uint8_t* eth_dst(uintptr_t frame_ptr); -bool eth_send_frame_on(uint16_t ifindex, uint16_t ethertype, const uint8_t dst_mac[6], sizedptr payload); +bool eth_send_frame_on(uint16_t ifindex, uint16_t ethertype, const uint8_t dst_mac[6], netpkt_t* pkt); -void eth_input(uint16_t ifindex, uintptr_t frame_ptr, uint32_t frame_len); +void eth_input(uint16_t ifindex, netpkt_t* pkt); #ifdef __cplusplus } diff --git a/kernel/networking/link_layer/link_utils.c b/kernel/networking/link_layer/link_utils.c new file mode 100644 index 00000000..09d6af3e --- /dev/null +++ b/kernel/networking/link_layer/link_utils.c @@ -0,0 +1,13 @@ +#include "link_utils.h" + +void mac_to_string(const uint8_t mac[6], char out[18]){ + static const char HEX[] = "0123456789abcdef"; + int p = 0; + for (int i = 0; i < 6; ++i) { + uint8_t b = mac ? mac[i] : 0; + out[p++] = HEX[b >> 4]; + out[p++] = HEX[b & 0x0F]; + if (i != 5) out[p++] = ':'; + } + out[p] = 0; +} diff --git a/kernel/networking/link_layer/link_utils.h b/kernel/networking/link_layer/link_utils.h new file mode 100644 index 00000000..78d12204 --- /dev/null +++ b/kernel/networking/link_layer/link_utils.h @@ -0,0 +1,13 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void mac_to_string(const uint8_t mac[6], char out[18]); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/link_layer/ndp.c b/kernel/networking/link_layer/ndp.c new file mode 100644 index 00000000..dfa878d8 --- /dev/null +++ b/kernel/networking/link_layer/ndp.c @@ -0,0 +1,1228 @@ +#include "ndp.h" +#include "networking/internet_layer/icmpv6.h" +#include "std/memory.h" +#include "std/string.h" +#include "networking/interface_manager.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/internet_layer/ipv6_route.h" +#include "net/checksums.h" +#include "syscalls/syscalls.h" +#include "networking/network.h" +#include "process/scheduler.h" +#include "math/rng.h" + +typedef struct { + ndp_entry_t entries[NDP_TABLE_MAX]; + uint8_t init; +} ndp_table_impl_t; + +static uint32_t g_ndp_reachable_time_ms = 30000; +static uint32_t g_ndp_retrans_timer_ms = 1000; +static uint8_t g_ndp_max_probes = 3; +static volatile uint16_t g_ndp_pid = 0xFFFF; + +static rng_t g_rng; + +typedef struct __attribute__((packed)) { + icmpv6_hdr_t hdr; + uint32_t rsv; + uint8_t target[16]; +} icmpv6_ns_t; + +typedef struct __attribute__((packed)) { + icmpv6_hdr_t hdr; + uint32_t flags; + uint8_t target[16]; +} icmpv6_na_t; + +typedef struct __attribute__((packed)) { + uint8_t type; + uint8_t length; + uint8_t mac[6]; +} icmpv6_opt_lladdr_t; + +typedef struct __attribute__((packed)) { + icmpv6_hdr_t hdr; + uint8_t cur_hop_limit; + uint8_t flags; + uint16_t router_lifetime; + uint32_t reachable_time; + uint32_t retrans_timer; +} icmpv6_ra_t; + +typedef struct __attribute__((packed)) { + uint8_t type; + uint8_t length; + uint8_t prefix_length; + uint8_t flags; + uint32_t valid_lifetime; + uint32_t preferred_lifetime; + uint32_t reserved2; + uint8_t prefix[16]; +} ndp_opt_prefix_info_t; + +typedef struct __attribute__((packed)) { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t mtu; +} ndp_opt_mtu_t; + +static uint8_t g_rs_tries[MAX_L2_INTERFACES]; +static uint32_t g_rs_timer_ms[MAX_L2_INTERFACES]; + +static void make_random_iid(uint8_t out_iid[8]) { + uint64_t x = 0; + + do x = rng_next64(&g_rng); + while (x == 0); + + out_iid[0] = (uint8_t)((x >> 56) & 0xFF); + out_iid[1] = (uint8_t)((x >> 48) & 0xFF); + out_iid[2] = (uint8_t)((x >> 40) & 0xFF); + out_iid[3] = (uint8_t)((x >> 32) & 0xFF); + out_iid[4] = (uint8_t)((x >> 24) & 0xFF); + out_iid[5] = (uint8_t)((x >> 16) & 0xFF); + out_iid[6] = (uint8_t)((x >> 8) & 0xFF); + out_iid[7] = (uint8_t)(x & 0xFF); +} + +static void handle_dad_failed(l3_ipv6_interface_t* v6) { + if (!v6) return; + + uint8_t iid[8]; + uint8_t new_ip[16]; + uint8_t zero16[16] = {0}; + + make_random_iid(iid); + + if (ipv6_is_linklocal(v6->ip)) { + new_ip[0] = 0xFE; + new_ip[1] = 0x80; + memset(new_ip + 2, 0, 6); + memcpy(new_ip + 8, iid, 8); + + (void)l3_ipv6_update(v6->l3_id, new_ip, 64, zero16, v6->cfg, v6->kind); + (void)ndp_request_dad_on(v6->l2 ? v6->l2->ifindex : 0, new_ip); + return; + } + + if (v6->prefix_len != 64) { + ipv6_cpy(new_ip, v6->ip); + memcpy(new_ip + 8, iid, 8); + + (void)l3_ipv6_update(v6->l3_id, new_ip, v6->prefix_len, v6->gateway, v6->cfg, v6->kind); + (void)ndp_request_dad_on(v6->l2 ? v6->l2->ifindex : 0, new_ip); + return; + } + + if (memcmp(v6->prefix, zero16, 16) != 0) ipv6_cpy(new_ip, v6->prefix); + else { + ipv6_cpy(new_ip, v6->ip); + memset(new_ip + 8, 0, 8); + } + + memcpy(new_ip + 8, iid, 8); + + (void)l3_ipv6_update(v6->l3_id, new_ip, 64, v6->gateway, v6->cfg, v6->kind); + (void)ndp_request_dad_on(v6->l2 ? v6->l2->ifindex : 0, new_ip); +} + +static void handle_lifetimes(uint32_t now_ms, l3_ipv6_interface_t* v6) { + if (!v6) return; + if (ipv6_is_placeholder_gua(v6->ip)) return; + if (ipv6_is_unspecified(v6->ip)) return; + if (ipv6_is_linklocal(v6->ip)) return; + if (!v6->ra_last_update_ms) return; + + uint32_t elapsed_ms = now_ms >= v6->ra_last_update_ms ? now_ms - v6->ra_last_update_ms : 0; + + if (v6->preferred_lifetime && v6->preferred_lifetime != 0xFFFFFFFFu) { + uint64_t pref_ms = (uint64_t)v6->preferred_lifetime * 1000ull; + if ((uint64_t)elapsed_ms >= pref_ms) v6->preferred_lifetime = 0; + } + + if (v6->valid_lifetime == 0xFFFFFFFFu) return; + + uint64_t valid_ms = (uint64_t)v6->valid_lifetime * 1000ull; + if ((uint64_t)elapsed_ms >= valid_ms) { + if (!l3_ipv6_remove_from_interface(v6->l3_id)) (void)l3_ipv6_set_enabled(v6->l3_id, false); + } +} + +static void apply_ra_policy(uint32_t now_ms, l2_interface_t* l2) { + if (!l2) return; + + uint8_t ifx = l2->ifindex; + if (!ifx || ifx > MAX_L2_INTERFACES) return; + + int has_lla_ok = 0; + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (!ipv6_is_linklocal(v6->ip)) continue; + if (v6->dad_state == IPV6_DAD_OK) { + has_lla_ok = 1; + break; + } + } + + if (!has_lla_ok) return; + + uint8_t zero16[16] = {0}; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (!(v6->kind & IPV6_ADDRK_GLOBAL)) continue; + if (!(v6->cfg & (IPV6_CFG_SLAAC | IPV6_CFG_DHCPV6))) continue; + if (!v6->ra_has)continue; + if (memcmp(v6->prefix, zero16, 16) == 0) continue; + uint8_t m = (v6->ra_flags & RA_FLAG_M) ? 1u : 0u; + uint8_t o = (v6->ra_flags & RA_FLAG_O) ? 1u : 0u; + if (!v6->ra_autonomous) { + if (m) { + uint8_t gw[16]; + + if (v6->ra_is_default) ipv6_cpy(gw, v6->gateway); + else memset(gw, 0, 16); + + v6->dhcpv6_stateless = 0; + v6->dhcpv6_stateless_done = 0; + + if (v6->cfg != IPV6_CFG_DHCPV6 || ipv6_is_placeholder_gua(v6->ip)) { + uint8_t z[16] = {0}; + (void)l3_ipv6_update(v6->l3_id, z, 0, gw, IPV6_CFG_DHCPV6, v6->kind); + } else { + (void)l3_ipv6_update(v6->l3_id, v6->ip, v6->prefix_len, gw, IPV6_CFG_DHCPV6, v6->kind); + } + } else { + v6->dhcpv6_stateless = o ? 1 : 0; + v6->dhcpv6_stateless_done = 0; + } + + continue; + } + v6->dhcpv6_stateless = o ? 1 : 0; + v6->dhcpv6_stateless_done = 0; + + if (v6->cfg != IPV6_CFG_SLAAC) { + uint8_t ph[16]; + uint8_t gw[16]; + + ipv6_make_placeholder_gua(ph); + + if (v6->ra_is_default) ipv6_cpy(gw, v6->gateway); + else memset(gw, 0, 16); + + (void)l3_ipv6_update(v6->l3_id, ph, 64, gw, IPV6_CFG_SLAAC, v6->kind); + } + + if (ipv6_is_placeholder_gua(v6->ip)) { + uint8_t iid[8]; + uint8_t ip[16]; + + make_random_iid(iid); + ipv6_cpy(ip, v6->prefix); + memcpy(ip + 8, iid, 8); + + (void)l3_ipv6_update(v6->l3_id, ip, 64, v6->gateway, IPV6_CFG_SLAAC, v6->kind); + + v6->timestamp_created = now_ms; + memcpy(v6->interface_id, ip + 8, 8); + + if (v6->dad_state == IPV6_DAD_NONE && !v6->dad_requested) (void)ndp_request_dad_on(ifx, ip); + continue; + } + + uint8_t gw[16]; + if (v6->ra_is_default) ipv6_cpy(gw, v6->gateway); + else memset(gw, 0, 16); + + (void)l3_ipv6_update(v6->l3_id, v6->ip, v6->prefix_len, gw, IPV6_CFG_SLAAC, v6->kind); + + v6->timestamp_created = now_ms; + memcpy(v6->interface_id, v6->ip + 8, 8); + } +} + +static void ndp_on_ra(uint8_t ifindex, const uint8_t router_ip[16], uint16_t router_lifetime, const uint8_t prefix[16], uint8_t prefix_len, uint32_t valid_lft, uint32_t preferred_lft, uint8_t autonomous, uint8_t ra_flags) { + if (!ifindex) return; + if (prefix_len != 64) return; + if (ipv6_is_unspecified(prefix) || ipv6_is_multicast(prefix) || ipv6_is_linklocal(prefix)) return; + + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if (!l2) return; + + uint32_t now_ms = get_time(); + uint8_t zero16[16] = {0}; + l3_ipv6_interface_t* slot = NULL; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (!(v6->kind == IPV6_ADDRK_GLOBAL)) continue; + if (!(v6->cfg & (IPV6_CFG_SLAAC | IPV6_CFG_DHCPV6))) continue; + + if (memcmp(v6->prefix, zero16, 16) != 0) { + if (ipv6_common_prefix_len(v6->prefix, prefix) >=64) { + slot = v6; + break; + } + } else { + if (ipv6_is_placeholder_gua(v6->ip)) { + slot = v6; + break; + } + + if (!ipv6_is_unspecified(v6->ip) && !ipv6_is_multicast(v6->ip) && !ipv6_is_linklocal(v6->ip)) { + if (ipv6_common_prefix_len(v6->ip, prefix) >= 64) { + slot = v6; + break; + } + } + } + } + + if (!slot) { + uint8_t ph[16]; + ipv6_make_placeholder_gua(ph); + + uint8_t id = l3_ipv6_add_to_interface(ifindex, ph, 64, zero16, IPV6_CFG_SLAAC, IPV6_ADDRK_GLOBAL); + if (!id) return; + + slot = l3_ipv6_find_by_id(id); + if (!slot) return; + } + + slot->ra_has = 1; + slot->ra_autonomous = autonomous ? 1 : 0; + slot->ra_is_default = router_lifetime != 0; + slot->ra_last_update_ms = now_ms; + slot->ra_flags = ra_flags; + + ipv6_cpy(slot->prefix, prefix); + + if (slot->ra_is_default && router_ip) ipv6_cpy(slot->gateway, router_ip); + else ipv6_cpy(slot->gateway, zero16); + + slot->valid_lifetime = valid_lft; + slot->preferred_lifetime = preferred_lft; + + if (memcmp(slot->ip, zero16, 16) == 0) slot->timestamp_created = now_ms; + + if(!ipv6_is_placeholder_gua(slot->ip) && !ipv6_is_unspecified(slot->ip)) memcpy(slot->interface_id, slot->ip + 8, 8); +} + +ndp_table_t* ndp_table_create(void) { + ndp_table_impl_t* t = (ndp_table_impl_t*)malloc(sizeof(ndp_table_impl_t)); + if (!t) return 0; + + memset(t, 0, sizeof(*t)); + t->init = 1; + + return (ndp_table_t*)t; +} + +void ndp_table_destroy(ndp_table_t* t) { + if (!t) return; + free_sized(t, sizeof(ndp_table_impl_t)); +} + +static ndp_table_impl_t* l2_ndp(uint8_t ifindex) { + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if (!l2) return 0; + if (!l2->nd_table) l2->nd_table = ndp_table_create(); + return (ndp_table_impl_t*)l2->nd_table; +} + +static int ndp_find_slot(ndp_table_impl_t* t, const uint8_t ip[16]) { + if (!t) return -1; + + for (int i = 0; i < NDP_TABLE_MAX; i++) { + if (!t->entries[i].ttl_ms) continue; + if (memcmp(t->entries[i].ip, ip, 16) == 0) return i; + } + + return -1; +} + +static int ndp_find_free(ndp_table_impl_t* t) { + if (!t) return -1; + + for (int i = 0; i < NDP_TABLE_MAX; i++) if (t->entries[i].ttl_ms == 0 && t->entries[i].state == NDP_STATE_UNUSED) return i; + + return -1; +} + +static void ndp_entry_clear(ndp_entry_t* e) { + memset(e, 0, sizeof(*e)); + e->state = NDP_STATE_UNUSED; +} + +void ndp_table_put_for_l2(uint8_t ifindex, const uint8_t ip[16], const uint8_t mac[6], uint32_t ttl_ms, bool router) { + ndp_table_impl_t* t = l2_ndp(ifindex); + if (!t) return; + + int idx = ndp_find_slot(t, ip); + if (idx < 0) idx = ndp_find_free(t); + + if (idx < 0) { + uint32_t best_ttl = 0xFFFFFFFFu; + int best_i = -1; + + for (int i = 0; i < NDP_TABLE_MAX; i++) { + ndp_entry_t* e = &t->entries[i]; + if (e->state == NDP_STATE_UNUSED || e->ttl_ms == 0) { + best_i = i; + break; + } + + if (e->is_router && e->router_lifetime_ms) continue; + + if (e->ttl_ms < best_ttl) { + best_ttl = e->ttl_ms; + best_i = i; + } + } + + if (best_i < 0) best_i = 0; + idx = best_i; + } + + ndp_entry_t* e = &t->entries[idx]; + memcpy(e->ip, ip, 16); + + if (mac) { + memcpy(e->mac, mac, 6); + e->state = NDP_STATE_REACHABLE; + e->timer_ms = g_ndp_reachable_time_ms; + } + + if (ttl_ms == 0) { + ttl_ms = g_ndp_reachable_time_ms * 4; + if (ttl_ms == 0) ttl_ms = 1; + } + + e->ttl_ms = ttl_ms; + e->is_router = router ? 1 : 0; + e->router_lifetime_ms = router ? ttl_ms : 0; + e->probes_sent = 0; +} + +static bool ndp_table_get_for_l2(uint8_t ifindex, const uint8_t ip[16], uint8_t mac_out[6]) { + ndp_table_impl_t* t = l2_ndp(ifindex); + if (!t) return false; + + for (int i = 0; i < NDP_TABLE_MAX; i++) { + ndp_entry_t* e = &t->entries[i]; + if (!e->ttl_ms) continue; + if (e->state == NDP_STATE_UNUSED) continue; + if (e->state == NDP_STATE_INCOMPLETE) continue; + if (memcmp(e->ip, ip, 16) != 0) continue; + + memcpy(mac_out, e->mac, 6); + return true; + } + + return false; +} + +static bool ndp_send_na_on(uint8_t ifindex, const uint8_t dst_ip[16], const uint8_t src_ip[16], const uint8_t target_ip[16], const uint8_t dst_mac_in[6], const uint8_t my_mac[6], uint8_t solicited) { + uint32_t plen = (uint32_t)(sizeof(icmpv6_na_t) + sizeof(icmpv6_opt_lladdr_t)); + uintptr_t buf = (uintptr_t)malloc(plen); + if (!buf) return false; + + icmpv6_na_t* na = (icmpv6_na_t*)buf; + na->hdr.type = 136; + na->hdr.code = 0; + na->hdr.checksum = 0; + + uint32_t flags = 0; + if (solicited) flags |= (1u << 30); + flags |= (1u << 29); + na->flags = bswap32(flags); + + memcpy(na->target, target_ip, 16); + + icmpv6_opt_lladdr_t* opt = (icmpv6_opt_lladdr_t*)(buf + sizeof(icmpv6_na_t)); + opt->type = 2; + opt->length = 1; + memcpy(opt->mac, my_mac, 6); + + na->hdr.checksum = bswap16(checksum16_pipv6(src_ip, dst_ip, 58, (const uint8_t*)buf, plen)); + + uint8_t dst_mac[6]; + if (ipv6_is_multicast(dst_ip)) ipv6_multicast_mac(dst_ip, dst_mac); + else memcpy(dst_mac, dst_mac_in, 6); + + bool ok = icmpv6_send_on_l2(ifindex, dst_ip, src_ip, dst_mac, (const void*)buf, plen, 255); + + free_sized((void*)buf, plen); + return ok; +} + +static void ndp_send_ns_on(uint8_t ifindex, const uint8_t target_ip[16], const uint8_t src_ip[16]) { + bool dad = ipv6_is_unspecified(src_ip); + uint32_t plen = (uint32_t)sizeof(icmpv6_ns_t) + (dad ? 0u : (uint32_t)sizeof(icmpv6_opt_lladdr_t)); + uintptr_t buf = (uintptr_t)malloc(plen); + if (!buf) return; + + icmpv6_ns_t* ns = (icmpv6_ns_t*)buf; + ns->hdr.type = 135; + ns->hdr.code = 0; + ns->hdr.checksum = 0; + ns->rsv = 0; + + memcpy(ns->target, target_ip, 16); + + if (!dad) { + icmpv6_opt_lladdr_t* opt = (icmpv6_opt_lladdr_t*)(buf + sizeof(icmpv6_ns_t)); + opt->type = 1; + opt->length = 1; + + const uint8_t* mac = network_get_mac(ifindex); + if (mac) memcpy(opt->mac, mac, 6); + else memset(opt->mac, 0, 6); + } + + uint8_t dst_ip[16]; + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, target_ip, dst_ip); + + ns->hdr.checksum = bswap16(checksum16_pipv6(src_ip, dst_ip, 58, (const uint8_t*)buf, plen)); + + uint8_t dst_mac[6]; + ipv6_multicast_mac(dst_ip, dst_mac); + + icmpv6_send_on_l2(ifindex, dst_ip, src_ip, dst_mac, (const void*)buf, plen, 255); + free_sized((void*)buf, plen); +} + +static void ndp_send_rs_on(uint8_t ifindex) { + uint8_t src_ip[16] = {0}; + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + + if (l2) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (v6->dad_state == IPV6_DAD_FAILED) continue; + + if (ipv6_is_linklocal(v6->ip)) { + ipv6_cpy(src_ip, v6->ip); + break; + } + } + } + + uint8_t dst_ip[16]; + ipv6_make_multicast(2, IPV6_MCAST_ALL_ROUTERS, 0, dst_ip); + + typedef struct __attribute__((packed)) { + icmpv6_hdr_t hdr; + uint32_t reserved; + } icmpv6_rs_t; + + uint32_t plen = (uint32_t)(sizeof(icmpv6_rs_t) + sizeof(icmpv6_opt_lladdr_t)); + uintptr_t buf = (uintptr_t)malloc(plen); + if (!buf) return; + + icmpv6_rs_t* rs = (icmpv6_rs_t*)buf; + rs->hdr.type = 133; + rs->hdr.code = 0; + rs->hdr.checksum = 0; + rs->reserved =0; + + icmpv6_opt_lladdr_t* opt = (icmpv6_opt_lladdr_t*)(buf + sizeof(icmpv6_rs_t)); + opt->type = 1; + opt->length = 1; + + const uint8_t* mac = network_get_mac(ifindex); + if (mac) memcpy(opt->mac, mac, 6); + else memset(opt->mac, 0, 6); + + rs->hdr.checksum = bswap16(checksum16_pipv6(src_ip, dst_ip, 58, (const uint8_t*)buf, plen)); + + uint8_t dst_mac[6]; + ipv6_multicast_mac(dst_ip, dst_mac); + + icmpv6_send_on_l2(ifindex, dst_ip, src_ip, dst_mac, (const void*)buf, plen, 255); + free_sized((void*)buf, plen); +} + +static void ndp_send_probe(uint8_t ifindex, ndp_entry_t* e) { + uint8_t src_ip[16] = {0}; + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + + if (l2) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (v6->dad_state!= IPV6_DAD_OK) continue; + + if (ipv6_is_linklocal(v6->ip)) { + memcpy(src_ip, v6->ip, 16); + break; + } + + if (ipv6_is_unspecified(src_ip) && !ipv6_is_unspecified(v6->ip)) + memcpy(src_ip, v6->ip, 16); + } + } + + ndp_send_ns_on(ifindex, e->ip, src_ip); +} + +static void ndp_table_tick_for_l2(uint8_t ifindex, uint32_t ms) { + ndp_table_impl_t* t = l2_ndp(ifindex); + if (!t) return; + + for (int i = 0; i < NDP_TABLE_MAX; i++) { + ndp_entry_t* e =&t->entries[i]; + + if (!e->ttl_ms) { + if (e->state != NDP_STATE_UNUSED) ndp_entry_clear(e); + continue; + } + + if (e->ttl_ms <= ms) { + ndp_entry_clear(e); + continue; + } + + e->ttl_ms -= ms; + + if (e->is_router && e->router_lifetime_ms) { + if (e->router_lifetime_ms <= ms) { + e->is_router = 0; + e->router_lifetime_ms = 0; + } else { + e->router_lifetime_ms -= ms; + } + } + + if (e->timer_ms) { + if (e->timer_ms <= ms)e->timer_ms = 0; + else e->timer_ms -= ms; + } + + switch (e->state) { + case NDP_STATE_INCOMPLETE: + if (e->timer_ms == 0) { + if (e->probes_sent < g_ndp_max_probes) { + e->probes_sent++; + e->timer_ms = g_ndp_retrans_timer_ms; + ndp_send_probe(ifindex, e); + } else { + ndp_entry_clear(e); + } + } + break; + + case NDP_STATE_REACHABLE: + if (e->timer_ms == 0) e->state = NDP_STATE_STALE; + break; + + case NDP_STATE_DELAY: + if (e->timer_ms == 0) { + e->state = NDP_STATE_PROBE; + e->probes_sent = 0; + e->timer_ms = g_ndp_retrans_timer_ms; + ndp_send_probe(ifindex, e); + } + break; + + case NDP_STATE_PROBE: + if (e->timer_ms == 0) { + if (e->probes_sent < g_ndp_max_probes) { + e->probes_sent++; + e->timer_ms = g_ndp_retrans_timer_ms; + ndp_send_probe(ifindex, e); + } else ndp_entry_clear(e); + } + break; + + default: + break; + } + } +} + +static void ndp_tick_all(uint32_t ms) { + uint8_t n = l2_interface_count(); + + for (uint8_t i = 0; i < n; i++) { + l2_interface_t* l2 = l2_interface_at(i); + if (!l2) continue; + if (!l2->is_up) continue; + + ndp_table_tick_for_l2(l2->ifindex, ms); + } +} + +bool ndp_resolve_on(uint16_t ifindex, const uint8_t next_hop[16], uint8_t out_mac[6], uint32_t timeout_ms) { + if (ipv6_is_multicast(next_hop)) { + ipv6_multicast_mac(next_hop, out_mac); + return true; + } + + if (ndp_table_get_for_l2((uint8_t)ifindex, next_hop, out_mac)) return true; + + ndp_table_impl_t* t = l2_ndp((uint8_t)ifindex); + if (t) { + int idx = ndp_find_slot(t, next_hop); + if (idx >= 0) { + ndp_entry_t* e = &t->entries[idx]; + if (e->ttl_ms && e->is_router && e->state != NDP_STATE_UNUSED && e->state != NDP_STATE_INCOMPLETE) { + memcpy(out_mac, e->mac, 6); + return true; + } + } + } + + uint8_t src_ip[16] = {0}; + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + + if (l2) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (v6->dad_state != IPV6_DAD_OK) continue; + + if (ipv6_is_linklocal(v6->ip)) { + memcpy(src_ip, v6->ip, 16); + break; + } + + if (ipv6_is_unspecified(src_ip) && !ipv6_is_unspecified(v6->ip)) + memcpy(src_ip, v6->ip, 16); + } + } + + t = l2_ndp((uint8_t)ifindex); + if (t) { + int idx = ndp_find_slot(t, next_hop); + if (idx < 0) idx = ndp_find_free(t); + + if (idx >= 0) { + ndp_entry_t* e = &t->entries[idx]; + memcpy(e->ip, next_hop, 16); + memset(e->mac, 0, 6); + e->ttl_ms = g_ndp_reachable_time_ms * 4; + e->is_router = 0; + e->router_lifetime_ms = 0; + e->state = NDP_STATE_INCOMPLETE; + e->timer_ms = g_ndp_retrans_timer_ms; + e->probes_sent = 0; + } + } + + ndp_send_ns_on((uint8_t)ifindex, next_hop, src_ip); + + uint32_t waited = 0; + const uint32_t poll = 50; + + while (waited < timeout_ms) { + ndp_table_tick_for_l2((uint8_t)ifindex, poll); + if (ndp_table_get_for_l2((uint8_t)ifindex, next_hop, out_mac)) return true; + msleep(poll); + waited += poll; + } + + return false; +} + +bool ndp_request_dad_on(uint8_t ifindex, const uint8_t ip[16]) { + if (!ifindex || !ip) return false; + + l2_interface_t* l2 = l2_interface_find_by_index(ifindex); + if (!l2) return false; + + if (ipv6_is_unspecified(ip) || ipv6_is_multicast(ip)) return false; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (ipv6_cmp(v6->ip, ip) != 0) continue; + + v6->dad_state = IPV6_DAD_NONE; + v6->dad_timer_ms = 0; + v6->dad_probes_sent = 0; + v6->dad_requested = 1; + + uint8_t sn[16]; + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, v6->ip, sn); + (void)l2_ipv6_mcast_join(ifindex, sn); + + return true; + } + + return false; +} + +void ndp_input(uint16_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], const uint8_t src_mac[6], const uint8_t* icmp, uint32_t icmp_len) { + if (!ifindex || !src_ip || !dst_ip || !icmp || icmp_len < sizeof(icmpv6_hdr_t)) return; + + const icmpv6_hdr_t* h = (const icmpv6_hdr_t*)icmp; + if (h->code != 0) return; + + if (h->type == 135) { + if (icmp_len < sizeof(icmpv6_ns_t)) return; + + const icmpv6_ns_t* ns = (const icmpv6_ns_t*)icmp; + if (ipv6_is_multicast(ns->target)) return; + + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + if (!l2) return; + + l3_ipv6_interface_t* self = 0; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + + if (ipv6_cmp(v6->ip, ns->target) == 0) { + self = v6; + break; + } + } + + if (!self) return; + + if (ipv6_is_unspecified(src_ip)) { + if (self->dad_state == IPV6_DAD_IN_PROGRESS || self->dad_requested) { + self->dad_state = IPV6_DAD_FAILED; + self->dad_timer_ms = 0; + self->dad_probes_sent = 0; + self->dad_requested = 0; + } + return; + } + + if (self->dad_state != IPV6_DAD_OK) return; + + ndp_table_put_for_l2((uint8_t)ifindex, src_ip, src_mac, 180000, false); + + uint8_t src_my[16] = {0}; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (v6->dad_state != IPV6_DAD_OK) continue; + + if (ipv6_cmp(v6->ip, ns->target) == 0) { + ipv6_cpy(src_my, v6->ip); + break; + } + } + + if (ipv6_is_unspecified(src_my)) return; + + const uint8_t* my_mac = network_get_mac((uint8_t)ifindex); + if (!my_mac) return; + + ndp_send_na_on((uint8_t)ifindex, src_ip, src_my, ns->target, src_mac, my_mac, 1); + return; + } + + if (h->type == 136) { + if (icmp_len < sizeof(icmpv6_na_t)) return; + + const icmpv6_na_t* na = (const icmpv6_na_t*)icmp; + if (ipv6_is_multicast(na->target)) return; + + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + + if (l2) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (ipv6_cmp(v6->ip, na->target) != 0) continue; + + if (v6->dad_state == IPV6_DAD_IN_PROGRESS || v6->dad_requested) { + v6->dad_state = IPV6_DAD_FAILED; + v6->dad_requested = 0; + v6->dad_timer_ms = 0; + v6->dad_probes_sent = 0; + return; + } + } + } + + if (ipv6_is_unspecified(src_ip)) return; + + uint32_t f = bswap32(na->flags); + uint8_t router = (uint8_t)((f >> 31) & 1u); + uint8_t solicited = (uint8_t)((f >> 30) & 1u); + uint8_t override = (uint8_t)((f >> 29) & 1u); + + ndp_table_impl_t* t = l2_ndp((uint8_t)ifindex); + if (!t) return; + + int idx = ndp_find_slot(t, na->target); + if (idx < 0) idx = ndp_find_free(t); + + if (idx < 0) { + uint32_t best_ttl = 0xFFFFFFFFu; + int best_i = -1; + + for (int i = 0; i < NDP_TABLE_MAX; i++) { + ndp_entry_t* e = &t->entries[i]; + if (e->state == NDP_STATE_UNUSED || e->ttl_ms == 0) { + best_i = i; + break; + } + + if (e->is_router && e->router_lifetime_ms) continue; + + if (e->ttl_ms < best_ttl) { + best_ttl = e->ttl_ms; + best_i = i; + } + } + + if (best_i < 0) best_i = 0; + idx = best_i; + } + + ndp_entry_t* e = &t->entries[idx]; + + uint8_t old_mac[6]; + memcpy(old_mac, e->mac, 6); + + if (e->ttl_ms == 0 && e->state == NDP_STATE_UNUSED) { + memcpy(e->ip, na->target, 16); + memcpy(e->mac, src_mac, 6); + e->ttl_ms = g_ndp_reachable_time_ms * 4; + e->probes_sent = 0; + e->is_router = router ? 1 : 0; + e->router_lifetime_ms = e->is_router ? e->ttl_ms : 0; + + if (solicited) { + e->state = NDP_STATE_REACHABLE; + e->timer_ms = g_ndp_reachable_time_ms; + } else { + e->state = NDP_STATE_STALE; + e->timer_ms = 0; + } + } else { + int mac_changed = memcmp(old_mac, src_mac, 6) != 0; + + if (e->state == NDP_STATE_INCOMPLETE) { + memcpy(e->mac, src_mac, 6); + e->ttl_ms = g_ndp_reachable_time_ms * 4; + + if (solicited) { + e->state = NDP_STATE_REACHABLE; + e->timer_ms = g_ndp_reachable_time_ms; + } else { + e->state = NDP_STATE_STALE; + e->timer_ms = 0; + } + } else { + if (!mac_changed) { + if (solicited) { + e->state = NDP_STATE_REACHABLE; + e->timer_ms = g_ndp_reachable_time_ms; + } + } else { + if (override) { + memcpy(e->mac, src_mac, 6); + e->ttl_ms = g_ndp_reachable_time_ms * 4; + + if (solicited) { + e->state = NDP_STATE_REACHABLE; + e->timer_ms = g_ndp_reachable_time_ms; + } else { + e->state = NDP_STATE_STALE; + e->timer_ms = 0; + } + } else { + e->state = NDP_STATE_STALE; + e->timer_ms = 0; + } + } + } + + if (router) e->is_router = 1; + if (!e->is_router) e->router_lifetime_ms = 0; + if (e->is_router && !e->router_lifetime_ms) e->router_lifetime_ms = e->ttl_ms; + } + + e->probes_sent = 0; + return; + } + + if (h->type == 134) { + if (icmp_len < sizeof(icmpv6_ra_t)) return; + + const icmpv6_ra_t* ra = (const icmpv6_ra_t*)icmp; + + uint16_t router_lifetime = bswap16(ra->router_lifetime); + uint32_t reachable_time = bswap32(ra->reachable_time); + uint32_t retrans_timer = bswap32(ra->retrans_timer); + + uint32_t router_lifetime_ms = (uint32_t)router_lifetime * 1000u; + + if (router_lifetime == 0) ndp_table_put_for_l2((uint8_t)ifindex, src_ip, src_mac, 180000, false); + else ndp_table_put_for_l2((uint8_t)ifindex, src_ip, src_mac, router_lifetime_ms, true); + + if (reachable_time) g_ndp_reachable_time_ms = reachable_time; + if (retrans_timer) g_ndp_retrans_timer_ms = retrans_timer; + + const uint8_t* opt = (const uint8_t*)(ra + 1); + uint32_t opt_len = icmp_len - (uint32_t)sizeof(icmpv6_ra_t); + + uint8_t idx = (uint8_t)(ifindex - 1); + + if (idx < MAX_L2_INTERFACES) { + g_rs_tries[idx] = 3; + g_rs_timer_ms[idx] = 0; + } + + while (opt_len >= 2) { + uint8_t opt_type = opt[0]; + uint8_t opt_units = opt[1]; + if (opt_units == 0) break; + + uint32_t opt_size = (uint32_t)opt_units * 8u; + if (opt_size > opt_len) break; + + if (opt_type == 3&&opt_size >= (uint32_t)sizeof(ndp_opt_prefix_info_t)) { + const ndp_opt_prefix_info_t* pio = (const ndp_opt_prefix_info_t*)opt; + + uint8_t pfx_len = pio->prefix_length; + uint8_t autonomous = (pio->flags & 0x40u) ? 1u : 0u; + uint32_t valid_lft = bswap32(pio->valid_lifetime); + uint32_t pref_lft = bswap32(pio->preferred_lifetime); + + uint8_t pfx[16]; + memcpy(pfx, pio->prefix, 16); + + if (pfx_len != 0) ndp_on_ra((uint8_t)ifindex, src_ip, router_lifetime, pfx, pfx_len, valid_lft, pref_lft, autonomous, ra->flags); + } else if (opt_type == 5 && opt_size >= (uint32_t)sizeof(ndp_opt_mtu_t)) { + uint32_t mtu32 = 0; + memcpy(&mtu32, opt + 4, 4); + mtu32= bswap32(mtu32); + + if (mtu32 >= 1280u && mtu32 <= 65535u) { + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + if (l2) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (v6->mtu < 1280) continue; + v6->mtu = mtu32; + } + } + } + } else if (opt_type == 25 && opt_size >= 24u) { + l2_interface_t* l2 = l2_interface_find_by_index((uint8_t)ifindex); + if (l2) { + uint32_t addr_bytes = opt_size - 8u; + uint32_t addr_count = addr_bytes / 16u; + + uint8_t zero16[16] = {0}; + + const uint8_t* a0 = (addr_count >= 1) ? (opt + 8) : zero16; + const uint8_t* a1 = (addr_count >= 2) ? (opt + 24) : zero16; + + l3_ipv6_interface_t* slot = NULL; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (!(v6->kind & IPV6_ADDRK_GLOBAL)) continue; + if (!(v6->cfg & (IPV6_CFG_SLAAC | IPV6_CFG_DHCPV6))) continue; + + if (memcmp(v6->prefix, zero16, 16) != 0) { + if (ipv6_common_prefix_len(v6->prefix, src_ip) >= 64) { + slot = v6; + break; + } + } else { + if (ipv6_is_placeholder_gua(v6->ip)) { + slot = v6; + break; + } + if (!ipv6_is_unspecified(v6->ip) && !ipv6_is_multicast(v6->ip) && !ipv6_is_linklocal(v6->ip)) { + if (ipv6_common_prefix_len(v6->ip, src_ip) >= 64) { + slot = v6; + break; + } + } + } + } + + if (!slot) { + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (!(v6->kind & IPV6_ADDRK_GLOBAL)) continue; + if (!(v6->cfg & (IPV6_CFG_SLAAC | IPV6_CFG_DHCPV6))) continue; + slot = v6; + break; + } + } + + if (slot) { + if (addr_count >= 1) memcpy(slot->runtime_opts_v6.dns[0], a0, 16); + else memset(slot->runtime_opts_v6.dns[0], 0, 16); + + if (addr_count >= 2) memcpy(slot->runtime_opts_v6.dns[1], a1, 16); + else memset(slot->runtime_opts_v6.dns[1], 0, 16); + } + } + } + + opt += opt_size; + opt_len -= opt_size; + } + + return; + } +} + +int ndp_daemon_entry(int argc, char* argv[]) { + (void)argc; + (void)argv; + + g_ndp_pid = (uint16_t)get_current_proc_pid(); + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&g_rng, virt_timer); + + const uint32_t tick_ms = 1000; + + while (1) { + ndp_tick_all(tick_ms); + + uint32_t now_ms = get_time(); + uint8_t n = l2_interface_count(); + + for (uint8_t i = 0; i < n; i++) { + l2_interface_t* l2 = l2_interface_at(i); + if (!l2) continue; + + if (!l2->is_up) { + if (l2->ifindex && l2->ifindex <= MAX_L2_INTERFACES) { + g_rs_tries[l2->ifindex - 1] = 0; + g_rs_timer_ms[l2->ifindex - 1] = 0; + } + continue; + } + + int is_v6_local = 0; + + for (int i = 0; i < MAX_IPV6_PER_INTERFACE; i++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[i]; + if (v6 && v6->is_localhost) { + is_v6_local = 1; + break; + } + } + + if (!is_v6_local) apply_ra_policy(now_ms, l2); + int has_lla_ok = 0; + + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; s++) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!v6) continue; + if (v6->cfg == IPV6_CFG_DISABLE) continue; + if (ipv6_is_unspecified(v6->ip) || ipv6_is_multicast(v6->ip)) continue; + + if (v6->dad_state == IPV6_DAD_FAILED) { + handle_dad_failed(v6); + continue; + } + + if (v6->dad_requested && v6->dad_state == IPV6_DAD_NONE) { + if (ipv6_is_unspecified(v6->ip) || ipv6_is_multicast(v6->ip) || ipv6_is_placeholder_gua(v6->ip)) { + v6->dad_requested = 0; + continue; + } + + v6->dad_requested = 0; + v6->dad_state = IPV6_DAD_IN_PROGRESS; + v6->dad_probes_sent = 0; + v6->dad_timer_ms = 0; + + uint8_t sn[16]; + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, v6->ip, sn); + (void)l2_ipv6_mcast_join(l2->ifindex, sn); + } + + if (v6->dad_state == IPV6_DAD_IN_PROGRESS) { + v6->dad_timer_ms += tick_ms; + + if (v6->dad_probes_sent < g_ndp_max_probes) { + if (v6->dad_timer_ms >= 1000) { + v6->dad_timer_ms = 0; + + uint8_t sn[16]; + uint8_t zero16[16] = {0}; + + ipv6_make_multicast(2, IPV6_MCAST_SOLICITED_NODE, v6->ip, sn); + (void)l2_ipv6_mcast_join(l2->ifindex, sn); + + ndp_send_ns_on(l2->ifindex, v6->ip, zero16); + v6->dad_probes_sent++; + } + } else { + if (v6->dad_timer_ms >= 1000) { + v6->dad_timer_ms = 0; + v6->dad_state = IPV6_DAD_OK; + + uint8_t all_nodes[16]; + uint8_t zero16[16] = {0}; + ipv6_make_multicast(2, IPV6_MCAST_ALL_NODES, zero16, all_nodes); + + const uint8_t* my_mac = network_get_mac(l2->ifindex); + if (my_mac) (void)ndp_send_na_on(l2->ifindex, all_nodes, v6->ip, v6->ip, 0, my_mac, 0); + } + } + } + + if (v6->dad_state == IPV6_DAD_OK && ipv6_is_linklocal(v6->ip)) has_lla_ok = 1; + handle_lifetimes(now_ms, v6); + } + + if (!has_lla_ok && l2->ifindex && l2->ifindex <= MAX_L2_INTERFACES) { + g_rs_tries[l2->ifindex - 1] = 0; + g_rs_timer_ms[l2->ifindex - 1] = 0; + } + + if (has_lla_ok && l2->ifindex && l2->ifindex <= MAX_L2_INTERFACES) { + uint8_t idx = (uint8_t)(l2->ifindex - 1); + + if (g_rs_tries[idx] == 0) { + ndp_send_rs_on(l2->ifindex); + g_rs_tries[idx] = 1; + g_rs_timer_ms[idx] = 0; + } else if (g_rs_tries[idx] < 3) { + g_rs_timer_ms[idx] += tick_ms; + + if (g_rs_timer_ms[idx] >= 4000) { + g_rs_timer_ms[idx] = 0; + ndp_send_rs_on(l2->ifindex); + g_rs_tries[idx]++; + } + } + } + } + + msleep(tick_ms); + } +} \ No newline at end of file diff --git a/kernel/networking/link_layer/ndp.h b/kernel/networking/link_layer/ndp.h new file mode 100644 index 00000000..85b3e5ee --- /dev/null +++ b/kernel/networking/link_layer/ndp.h @@ -0,0 +1,51 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ndp_table ndp_table_t; + +#define RA_FLAG_M 0x80 +#define RA_FLAG_O 0x40 + +typedef enum { + NDP_STATE_UNUSED = 0, + NDP_STATE_INCOMPLETE = 1, + NDP_STATE_REACHABLE = 2, + NDP_STATE_STALE = 3, + NDP_STATE_DELAY = 4, + NDP_STATE_PROBE = 5 +} ndp_state_t; + +typedef struct { + uint8_t ip[16]; + uint8_t mac[6]; + uint32_t ttl_ms; + uint32_t timer_ms; + uint8_t state; + uint8_t probes_sent; + uint8_t is_router; + uint32_t router_lifetime_ms; +} ndp_entry_t; + +#define NDP_TABLE_MAX 64 + +ndp_table_t* ndp_table_create(void); +void ndp_table_destroy(ndp_table_t* t); + +void ndp_input(uint16_t ifindex, const uint8_t src_ip[16], const uint8_t dst_ip[16], const uint8_t src_mac[6], const uint8_t* icmp, uint32_t icmp_len); + +void ndp_table_put_for_l2(uint8_t ifindex, const uint8_t ip[16], const uint8_t mac[6], uint32_t ttl_ms, bool router); + +bool ndp_resolve_on(uint16_t ifindex, const uint8_t next_hop[16], uint8_t out_mac[6], uint32_t timeout_ms); + +bool ndp_request_dad_on(uint8_t ifindex, const uint8_t ip[16]); + +int ndp_daemon_entry(int argc, char* argv[]); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/link_layer/nic_types.h b/kernel/networking/link_layer/nic_types.h new file mode 100644 index 00000000..90e37d65 --- /dev/null +++ b/kernel/networking/link_layer/nic_types.h @@ -0,0 +1,25 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum NetIfKind : uint8_t { + NET_IFK_ETH = 0x00, + NET_IFK_WIFI = 0x01, + NET_IFK_OTHER = 0x02, + NET_IFK_LOCALHOST = 0xFE, + NET_IFK_UNKNOWN = 0xFF +} NetIfKind; + +typedef enum LinkDuplex : uint8_t { + LINK_DUPLEX_HALF = 0, + LINK_DUPLEX_FULL = 1, + LINK_DUPLEX_UNKNOWN = 0xFF +} LinkDuplex; + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/net_logger/net_logger.c b/kernel/networking/net_logger/net_logger.c new file mode 100644 index 00000000..2ab6b449 --- /dev/null +++ b/kernel/networking/net_logger/net_logger.c @@ -0,0 +1,348 @@ +#include "net_logger.h" +#include "console/kio.h" +#include "networking/transport_layer/trans_utils.h" +#include "networking/application_layer/http.h" + +static const char* http_method_str(uint32_t m) { + switch ((HTTPMethod)m) { + case HTTP_METHOD_GET: return "GET"; + case HTTP_METHOD_POST: return "POST"; + case HTTP_METHOD_PUT: return "PUT"; + case HTTP_METHOD_DELETE: return "DELETE"; + default: return ""; + } +} + +static const char* comp_str(netlog_component_t c) { + switch (c) { + case NETLOG_COMP_UDP: return "UDP"; + case NETLOG_COMP_TCP: return "TCP"; + case NETLOG_COMP_HTTP_CLIENT: return "HTTP-C"; + case NETLOG_COMP_HTTP_SERVER: return "HTTP-S"; + default: return ""; + } +} + +static const char* act_str(netlog_action_t a) { + switch (a) { + case NETLOG_ACT_BIND: return "bind"; + case NETLOG_ACT_CONNECT: return "connect"; + case NETLOG_ACT_CONNECTED: return "connected"; + case NETLOG_ACT_LISTEN: return "listen"; + case NETLOG_ACT_ACCEPT: return "accept"; + case NETLOG_ACT_SEND: return "send"; + case NETLOG_ACT_SENDTO: return "sendto"; + case NETLOG_ACT_RECV: return "recv"; + case NETLOG_ACT_RECVFROM: return "recvfrom"; + case NETLOG_ACT_CLOSE: return "close"; + case NETLOG_ACT_HTTP_SEND_REQUEST: return "send_request"; + case NETLOG_ACT_HTTP_RECV_RESPONSE: return "recv_response"; + case NETLOG_ACT_HTTP_RECV_REQUEST: return "recv_request"; + case NETLOG_ACT_HTTP_SEND_RESPONSE: return "send_response"; + default: return "event"; + } +} + +static const char* bind_kind_str(SockBindKind k) { + switch (k) { + case BIND_L3: return "L3"; + case BIND_L2: return "L2"; + case BIND_IP: return "IP"; + case BIND_ANY: return "ANY"; + default: return ""; + } +} + +static const char* dst_kind_str(SockDstKind k) { + switch (k) { + case DST_ENDPOINT: return "EP"; + case DST_DOMAIN: return "DNS"; + default: return ""; + } +} + +void netlog_socket_event(const SocketExtraOptions* extra, const netlog_socket_event_t* e) { + if (!extra) return; + if (!e) return; + + if ((extra->flags & SOCK_OPT_DEBUG) == 0) return; + + SockDebugLevel lvl = extra->debug_level; + if (lvl > SOCK_DBG_ALL) lvl = SOCK_DBG_ALL; + + const char* c = comp_str(e->comp); + const char* a = act_str(e->action); + + if (lvl == SOCK_DBG_LOW) { + if (e->action == NETLOG_ACT_CONNECTED) { + kprintf("[NET][%s] %s lp=%u rp=%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1); + return; + } + if (e->action == NETLOG_ACT_SEND || e->action == NETLOG_ACT_RECV || e->action == NETLOG_ACT_SENDTO || e->action == NETLOG_ACT_RECVFROM) { + kprintf("[NET][%s] %s n=%u", c, a, (uint32_t)e->u0); + return; + } + if (e->action == NETLOG_ACT_HTTP_SEND_REQUEST) { + kprintf("[NET][%s] %s bytes=%u sent=%lld", c, a, (uint32_t)e->u0, (long long)e->i0); + return; + } + if (e->action == NETLOG_ACT_HTTP_RECV_RESPONSE) { + kprintf("[NET][%s] %s code=%u body=%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1); + return; + } + + if (e->action == NETLOG_ACT_CONNECT && (e->comp == NETLOG_COMP_HTTP_CLIENT || e->comp == NETLOG_COMP_HTTP_SERVER)) { + kprintf("[NET][%s] %s port=%u r=%lld", c, a, (uint32_t)e->u0, (long long)e->i0); + return; + } + + if (e->action == NETLOG_ACT_HTTP_RECV_REQUEST) { + kprintf("[NET][%s] %s method=%s path_len=%u body=%u", c, a, http_method_str(e->u0), (uint32_t)e->u1, (uint32_t)e->i0); + return; + } + + if (e->action == NETLOG_ACT_HTTP_SEND_RESPONSE) { + kprintf("[NET][%s] %s code=%u bytes=%u sent=%lld", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (long long)e->i0); + return; + } + + kprintf("[NET][%s] %s", c, a); + return; + } + + if (lvl == SOCK_DBG_MEDIUM) { + if (e->action == NETLOG_ACT_BIND) { + kprintf("[NET][%s] %s port=%u kind=%s", c, a, (uint32_t)e->u0, bind_kind_str(e->bind_spec.kind)); + return; + } + + if (e->action == NETLOG_ACT_CONNECT) { + if (e->comp == NETLOG_COMP_HTTP_CLIENT || e->comp == NETLOG_COMP_HTTP_SERVER) { + char dip[80]; + bool dv6 = false; + uint16_t dport = 0; + net_ep_split(&e->dst_ep, dip, (int)sizeof(dip), &dv6, &dport); + + if (e->dst_kind == DST_DOMAIN && e->s0) { + if (dv6) kprintf("[NET][%s] %s host=%s port=%u dst=[%s]:%u r=%lld", c, a, e->s0, (uint32_t)e->u0, dip, (uint32_t)dport, (long long)e->i0); + else kprintf("[NET][%s] %s host=%s port=%u dst=%s:%u r=%lld", c, a, e->s0, (uint32_t)e->u0, dip, (uint32_t)dport, (long long)e->i0); + } else { + if (dv6) kprintf("[NET][%s] %s dst=[%s]:%u r=%lld", c, a, dip, (uint32_t)dport, (long long)e->i0); + else kprintf("[NET][%s] %s dst=%s:%u r=%lld", c, a, dip, (uint32_t)dport, (long long)e->i0); + } + } else { + kprintf("[NET][%s] %s kind=%s port=%u", c, a, dst_kind_str(e->dst_kind), (uint32_t)e->u0); + } + return; + } + + if (e->action == NETLOG_ACT_HTTP_RECV_REQUEST) { + char rip[80]; + bool rv6 = false; + uint16_t rport = 0; + net_ep_split(&e->remote_ep, rip, (int)sizeof(rip), &rv6, &rport); + + if (rv6) kprintf("[NET][%s] %s method=%s path_len=%u body=%u remote=[%s]:%u", c, a, http_method_str(e->u0), (uint32_t)e->u1, (uint32_t)e->i0, rip, (uint32_t)rport); + else kprintf("[NET][%s] %s method=%s path_len=%u body=%u remote=%s:%u", c, a, http_method_str(e->u0), (uint32_t)e->u1, (uint32_t)e->i0, rip, (uint32_t)rport); + return; + } + + if (e->action == NETLOG_ACT_HTTP_SEND_RESPONSE) { + char rip[80]; + bool rv6 = false; + uint16_t rport = 0; + net_ep_split(&e->remote_ep, rip, (int)sizeof(rip), &rv6, &rport); + + if (rv6) kprintf("[NET][%s] %s code=%u bytes=%u sent=%lld remote=[%s]:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (long long)e->i0, rip, (uint32_t)rport); + else kprintf("[NET][%s] %s code=%u bytes=%u sent=%lld remote=%s:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (long long)e->i0, rip, (uint32_t)rport); + return; + } + + if (e->action == NETLOG_ACT_LISTEN) { + kprintf("[NET][%s] %s backlog=%u", c, a, (uint32_t)e->u0); + return; + } + + if (e->action == NETLOG_ACT_ACCEPT) { + char rip[80]; + bool rv6 = false; + uint16_t rport = 0; + net_ep_split(&e->remote_ep, rip, (int)sizeof(rip), &rv6, &rport); + + if (rv6) kprintf("[NET][%s] %s client=%p remote=[%s]:%u", c, a, (void*)(uintptr_t)e->i0, rip, (uint32_t)rport); + else kprintf("[NET][%s] %s client=%p remote=%s:%u", c, a, (void*)(uintptr_t)e->i0, rip, (uint32_t)rport); + return; + } + + if (e->action == NETLOG_ACT_CONNECTED) { + kprintf("[NET][%s] %s local=%u remote=%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1); + return; + } + + if (e->action == NETLOG_ACT_SEND || e->action == NETLOG_ACT_RECV) { + kprintf("[NET][%s] %s n=%u", c, a, (uint32_t)e->u0); + return; + } + + if (e->action == NETLOG_ACT_SENDTO) { + kprintf("[NET][%s] %s kind=%s port=%u n=%u", c, a, dst_kind_str(e->dst_kind), (uint32_t)e->u0, (uint32_t)e->u1); + return; + } + + if (e->action == NETLOG_ACT_RECVFROM) { + kprintf("[NET][%s] %s cap=%u", c, a, (uint32_t)e->u0); + return; + } + + if (e->action == NETLOG_ACT_HTTP_SEND_REQUEST) { + char rip[80]; + bool rv6 = false; + uint16_t rport = 0; + net_ep_split(&e->remote_ep, rip, (int)sizeof(rip), &rv6, &rport); + + if (rv6) kprintf("[NET][%s] %s bytes=%u sent=%lld remote=[%s]:%u", c, a, (uint32_t)e->u0, (long long)e->i0, rip, (uint32_t)rport); + else kprintf("[NET][%s] %s bytes=%u sent=%lld remote=%s:%u", c, a, (uint32_t)e->u0, (long long)e->i0, rip, (uint32_t)rport); + return; + } + + if (e->action == NETLOG_ACT_HTTP_RECV_RESPONSE) { + char rip[80]; + bool rv6 = false; + uint16_t rport = 0; + net_ep_split(&e->remote_ep, rip, (int)sizeof(rip), &rv6, &rport); + + if (rv6) kprintf("[NET][%s] %s code=%u body=%u remote=[%s]:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, rip, (uint32_t)rport); + else kprintf("[NET][%s] %s code=%u body=%u remote=%s:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, rip, (uint32_t)rport); + return; + } + + kprintf("[NET][%s] %s", c, a); + return; + } + + char dst_ip[80]; + char rem_ip[80]; + bool dst_v6 = false; + bool rem_v6 = false; + uint16_t dst_port = 0; + uint16_t rem_port = 0; + net_ep_split(&e->dst_ep, dst_ip, (int)sizeof(dst_ip), &dst_v6, &dst_port); + net_ep_split(&e->remote_ep, rem_ip, (int)sizeof(rem_ip), &rem_v6, &rem_port); + + if (e->action == NETLOG_ACT_BIND) { + kprintf("[NET][%s] %s port=%u kind=%s l3=%u if=%u", c, a, (uint32_t)e->u0, bind_kind_str(e->bind_spec.kind), (uint32_t)e->bind_spec.l3_id, (uint32_t)e->bind_spec.ifindex); + return; + } + + if (e->action == NETLOG_ACT_CONNECT) { + if (e->dst_kind == DST_DOMAIN && e->s0) { + if (dst_v6) kprintf("[NET][%s] %s host=%s port=%u dst=[%s]:%u r=%lld", c, a, e->s0, (uint32_t)e->u0, dst_ip, (uint32_t)dst_port, (long long)e->i0); + else kprintf("[NET][%s] %s host=%s port=%u dst=%s:%u r=%lld", c, a, e->s0, (uint32_t)e->u0, dst_ip, (uint32_t)dst_port, (long long)e->i0); + } else if (dst_v6) { + kprintf("[NET][%s] %s dst=[%s]:%u r=%lld", c, a, dst_ip, (uint32_t)dst_port, (long long)e->i0); + } else { + kprintf("[NET][%s] %s dst=%s:%u r=%lld", c, a, dst_ip, (uint32_t)dst_port, (long long)e->i0); + } + return; + } + + if (e->action == NETLOG_ACT_SENDTO) { + if (e->dst_kind == DST_DOMAIN && e->s0) + kprintf("[NET][%s] %s host=%s port=%u n=%u", c, a, e->s0, (uint32_t)e->u0, (uint32_t)e->u1); + else if (dst_v6) + kprintf("[NET][%s] %s dst=[%s]:%u n=%u", c, a, dst_ip, (uint32_t)dst_port, (uint32_t)e->u1); + else + kprintf("[NET][%s] %s dst=%s:%u n=%u", c, a, dst_ip, (uint32_t)dst_port, (uint32_t)e->u1); + return; + } + + if (e->action == NETLOG_ACT_CONNECTED) { + if (rem_v6) + kprintf("[NET][%s] %s local=%u remote=[%s]:%u", c, a, (uint32_t)e->u0, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s local=%u remote=%s:%u", c, a, (uint32_t)e->u0, rem_ip, (uint32_t)rem_port); + return; + } + + if (e->action == NETLOG_ACT_SEND) { + if (rem_v6) + kprintf("[NET][%s] %s n=%u remote=[%s]:%u", c, a, (uint32_t)e->u0, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s n=%u remote=%s:%u", c, a, (uint32_t)e->u0, rem_ip, (uint32_t)rem_port); + return; + } + + if (e->action == NETLOG_ACT_RECV) { + if (rem_v6) + kprintf("[NET][%s] %s cap=%u remote=[%s]:%u", c, a, (uint32_t)e->u0, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s cap=%u remote=%s:%u", c, a, (uint32_t)e->u0, rem_ip, (uint32_t)rem_port); + return; + } + + if (e->action == NETLOG_ACT_RECVFROM) { + kprintf("[NET][%s] %s cap=%u", c, a, (uint32_t)e->u0); + return; + } + + if (e->action == NETLOG_ACT_CLOSE) { + if (rem_v6) + kprintf("[NET][%s] %s lp=%u remote=[%s]:%u", c, a, (uint32_t)e->local_port, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s lp=%u remote=%s:%u", c, a, (uint32_t)e->local_port, rem_ip, (uint32_t)rem_port); + return; + } + + if (e->action == NETLOG_ACT_ACCEPT) { + if (rem_v6) + kprintf("[NET][%s] %s client=%p remote=[%s]:%u", c, a, (void*)(uintptr_t)e->i0, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s client=%p remote=%s:%u", c, a, (void*)(uintptr_t)e->i0, rem_ip, (uint32_t)rem_port); + return; + } + + if (e->action == NETLOG_ACT_LISTEN) { + kprintf("[NET][%s] %s backlog=%u", c, a, (uint32_t)e->u0); + return; + } + + if (e->action == NETLOG_ACT_HTTP_SEND_REQUEST) { + if (rem_v6) { + if (e->s0) kprintf("[NET][%s] %s path=%s bytes=%u sent=%lld remote=[%s]:%u", c, a, e->s0, (uint32_t)e->u0, (long long)e->i0, rem_ip, (uint32_t)rem_port); + else kprintf("[NET][%s] %s bytes=%u sent=%lld remote=[%s]:%u", c, a, (uint32_t)e->u0, (long long)e->i0, rem_ip, (uint32_t)rem_port); + } else { + if (e->s0) kprintf("[NET][%s] %s path=%s bytes=%u sent=%lld remote=%s:%u", c, a, e->s0, (uint32_t)e->u0, (long long)e->i0, rem_ip, (uint32_t)rem_port); + else kprintf("[NET][%s] %s bytes=%u sent=%lld remote=%s:%u", c, a, (uint32_t)e->u0, (long long)e->i0, rem_ip, (uint32_t)rem_port); + } + return; + } + + if (e->action == NETLOG_ACT_HTTP_RECV_RESPONSE) { + if (rem_v6) + kprintf("[NET][%s] %s code=%u body=%u remote=[%s]:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s code=%u body=%u remote=%s:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, rem_ip, (uint32_t)rem_port); + return; + } + + if (e->action == NETLOG_ACT_HTTP_RECV_REQUEST) { + if (rem_v6) { + if (e->s0) kprintf("[NET][%s] %s method=%s path=%s body=%u remote=[%s]:%u", c, a, http_method_str(e->u0), e->s0, (uint32_t)e->i0, rem_ip, (uint32_t)rem_port); + else kprintf("[NET][%s] %s method=%u path_len=%u body=%u remote=[%s]:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (uint32_t)e->i0, rem_ip, (uint32_t)rem_port); + } else { + if (e->s0) kprintf("[NET][%s] %s method=%s path=%s body=%u remote=%s:%u", c, a, http_method_str(e->u0), e->s0, (uint32_t)e->i0, rem_ip, (uint32_t)rem_port); + else kprintf("[NET][%s] %s method=%u path_len=%u body=%u remote=%s:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (uint32_t)e->i0, rem_ip, (uint32_t)rem_port); + } + return; + } + + if (e->action == NETLOG_ACT_HTTP_SEND_RESPONSE) { + if (rem_v6) + kprintf("[NET][%s] %s code=%u bytes=%u sent=%lld remote=[%s]:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (long long)e->i0, rem_ip, (uint32_t)rem_port); + else + kprintf("[NET][%s] %s code=%u bytes=%u sent=%lld remote=%s:%u", c, a, (uint32_t)e->u0, (uint32_t)e->u1, (long long)e->i0, rem_ip, (uint32_t)rem_port); + return; + } + + kprintf("[NET][%s] %s", c, a); +} \ No newline at end of file diff --git a/kernel/networking/net_logger/net_logger.h b/kernel/networking/net_logger/net_logger.h new file mode 100644 index 00000000..17dedc27 --- /dev/null +++ b/kernel/networking/net_logger/net_logger.h @@ -0,0 +1,60 @@ +#pragma once +#include "types.h" +#include "net/network_types.h" +#include "net/socket_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + NETLOG_COMP_UDP = 0, + NETLOG_COMP_TCP = 1, + NETLOG_COMP_HTTP_CLIENT = 2, + NETLOG_COMP_HTTP_SERVER = 3 +} netlog_component_t; + +typedef enum { + NETLOG_ACT_BIND = 0, + NETLOG_ACT_CONNECT = 1, + NETLOG_ACT_CONNECTED = 2, + NETLOG_ACT_LISTEN = 3, + NETLOG_ACT_ACCEPT = 4, + NETLOG_ACT_SEND = 5, + NETLOG_ACT_SENDTO = 6, + NETLOG_ACT_RECV = 7, + NETLOG_ACT_RECVFROM = 8, + NETLOG_ACT_CLOSE = 9, + NETLOG_ACT_HTTP_SEND_REQUEST = 10, + NETLOG_ACT_HTTP_RECV_RESPONSE = 11, + NETLOG_ACT_HTTP_RECV_REQUEST = 12, + NETLOG_ACT_HTTP_SEND_RESPONSE = 13 +} netlog_action_t; + +typedef struct netlog_socket_event_t { + netlog_component_t comp; + netlog_action_t action; + + int64_t i0; + int64_t i1; + uint32_t u0; + uint32_t u1; + + uint16_t pid; + uint16_t local_port; + + SockBindSpec bind_spec; + + SockDstKind dst_kind; + net_l4_endpoint dst_ep; + net_l4_endpoint remote_ep; + + const char* s0; + const char* s1; +} netlog_socket_event_t; + +void netlog_socket_event(const SocketExtraOptions*extra, const netlog_socket_event_t* e); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/netpkt.c b/kernel/networking/netpkt.c new file mode 100644 index 00000000..3f6f1167 --- /dev/null +++ b/kernel/networking/netpkt.c @@ -0,0 +1,160 @@ +#include "netpkt.h" +#include "std/std.h" +#include "syscalls/syscalls.h" + +struct netpkt { + uintptr_t base; + uint32_t alloc; + uint32_t head; + uint32_t len; + uint32_t refs; + netpkt_free_fn free_fn; + void* free_ctx; +}; + +static void netpkt_free_malloc(void* ctx, uintptr_t base, uint32_t alloc_size) { + (void)ctx; + if (base && alloc_size) free_sized((void*)base, alloc_size); +} + +static bool netpkt_realloc_to(netpkt_t* p, uint32_t new_head, uint32_t new_alloc) { + if (!p) return false; + if (new_alloc < new_head + p->len) return false; + + uintptr_t nb = (uintptr_t)malloc(new_alloc); + if (!nb) return false; + + if (p->len) memcpy((void*)(nb + new_head), (const void*)(p->base + p->head), p->len); + if (p->free_fn) p->free_fn(p->free_ctx, p->base, p->alloc); + + p->base = nb; + p->alloc = new_alloc; + p->head = new_head; + if (!p->free_fn) p->free_fn = netpkt_free_malloc; + p->free_ctx = 0; + return true; +} + +netpkt_t* netpkt_alloc(uint32_t data_capacity, uint32_t headroom, uint32_t tailroom) { + uint32_t alloc = headroom+ data_capacity + tailroom; + if (alloc == 0) alloc = 1; + + uintptr_t base = (uintptr_t)malloc(alloc); + if (!base) return 0; + + netpkt_t* p = (netpkt_t*)malloc(sizeof(netpkt_t)); + if (!p) { + free_sized((void*)base, alloc); + return 0; + } + + p->base = base; + p->alloc = alloc; + p->head = headroom; + p->len = 0; + p->refs = 1; + p->free_fn = netpkt_free_malloc; + p->free_ctx = 0; + return p; +} + +netpkt_t* netpkt_wrap(uintptr_t base, uint32_t alloc_size, uint32_t data_len, netpkt_free_fn free_fn, void* ctx) { + if (!base || !alloc_size) return 0; + if (data_len > alloc_size) return 0; + + netpkt_t* p = (netpkt_t*)malloc(sizeof(netpkt_t)); + if (!p) return 0; + + p->base = base; + p->alloc = alloc_size; + p->head = 0; + p->len = data_len; + p->refs = 1; + p->free_fn = free_fn ? free_fn : netpkt_free_malloc; + p->free_ctx = ctx; + return p; +} + +void netpkt_ref(netpkt_t* p){ + if (p)p->refs++; +} + +void netpkt_unref(netpkt_t* p) { + if (!p) return; + if (p->refs > 1) { + p->refs--; + return; + } + if (p->free_fn) p->free_fn(p->free_ctx, p->base, p->alloc); + free_sized(p, sizeof(*p)); +} + +uintptr_t netpkt_data(const netpkt_t* p) { + if (!p) return 0; + return p->base + p->head; +} + +uint32_t netpkt_len(const netpkt_t* p) { + return p ? p->len : 0; +} + +uint32_t netpkt_headroom(const netpkt_t* p) { + return p ? p->head : 0; +} + +uint32_t netpkt_tailroom(const netpkt_t* p) { + if (!p) return 0; + uint32_t used = p->head + p->len; + return used >= p->alloc ? 0: (p->alloc - used); +} + +bool netpkt_ensure_headroom(netpkt_t* p, uint32_t need) { + if (!p) return false; + if (p->head >= need) return true; + + uint32_t tail = netpkt_tailroom(p); + uint32_t new_head = need; + uint32_t new_alloc = new_head + p->len + tail; + if (new_alloc < p->alloc + (need - p->head)) new_alloc = p->alloc + (need - p->head); + return netpkt_realloc_to(p, new_head, new_alloc); +} + +bool netpkt_ensure_tailroom(netpkt_t* p, uint32_t need) { + if (!p) return false; + if (netpkt_tailroom(p) >= need) return true; + + uint32_t new_alloc = p->head + p->len + need; + if (new_alloc < p->alloc + (need - netpkt_tailroom(p))) new_alloc = p->alloc + (need - netpkt_tailroom(p)); + return netpkt_realloc_to(p, p->head, new_alloc); +} + +void* netpkt_push(netpkt_t* p, uint32_t bytes) { + if (!p || bytes == 0) return (void*)netpkt_data(p); + if (!netpkt_ensure_headroom(p, bytes)) return 0; + p->head -= bytes; + p->len += bytes; + return (void*)(p->base + p->head); +} + +void* netpkt_put(netpkt_t* p, uint32_t bytes) { + if (!p || bytes == 0) return (void*)(netpkt_data(p) + p->len); + if (!netpkt_ensure_tailroom(p, bytes)) return 0; + uintptr_t out = p->base + p->head + p->len; + p->len += bytes; + return (void*)out; +} + +bool netpkt_pull(netpkt_t* p, uint32_t bytes) { + if (!p) return false; + if (bytes > p->len) return false; + p->head += bytes; + p->len -= bytes; + return true; +} + +bool netpkt_trim(netpkt_t* p, uint32_t new_len) { + if (!p) return false; + if (new_len > p->len) return false; + p->len = new_len; + return true; +} diff --git a/kernel/networking/netpkt.h b/kernel/networking/netpkt.h new file mode 100644 index 00000000..441005cb --- /dev/null +++ b/kernel/networking/netpkt.h @@ -0,0 +1,36 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct netpkt netpkt_t; + +typedef void (*netpkt_free_fn)(void* ctx, uintptr_t base, uint32_t alloc_size); + +netpkt_t* netpkt_alloc(uint32_t data_capacity, uint32_t headroom, uint32_t tailroom); +netpkt_t* netpkt_wrap(uintptr_t base, uint32_t alloc_size, uint32_t data_len, netpkt_free_fn free_fn, void* ctx); + +void netpkt_ref(netpkt_t* p); +void netpkt_unref(netpkt_t* p); + +uintptr_t netpkt_data(const netpkt_t* p); +uint32_t netpkt_len(const netpkt_t* p); + +uint32_t netpkt_headroom(const netpkt_t* p); +uint32_t netpkt_tailroom(const netpkt_t* p); + +bool netpkt_pull(netpkt_t* p, uint32_t bytes); +bool netpkt_trim(netpkt_t* p, uint32_t new_len); + +void* netpkt_push(netpkt_t* p, uint32_t bytes); +void* netpkt_put(netpkt_t* p, uint32_t bytes); + +bool netpkt_ensure_headroom(netpkt_t* p, uint32_t need); +bool netpkt_ensure_tailroom(netpkt_t* p, uint32_t need); + +#ifdef __cplusplus +} +#endif diff --git a/kernel/networking/network.cpp b/kernel/networking/network.cpp index fd949d61..d1d9e755 100644 --- a/kernel/networking/network.cpp +++ b/kernel/networking/network.cpp @@ -23,13 +23,6 @@ int network_net_task_entry(int argc, char* argv[]) { return 0; } -int net_tx_frame(uintptr_t frame_ptr, uint32_t frame_len) { - if (!dispatch || !frame_ptr || !frame_len) return -1; - uint8_t ix = 1; //legacy - if (ix == 0xFF) return -1; - return dispatch->enqueue_frame(ix, {frame_ptr, frame_len}) ? 0 : -1; -} - int net_tx_frame_on(uint16_t ifindex, uintptr_t frame_ptr, uint32_t frame_len) { if (!dispatch || !frame_ptr || !frame_len) return -1; return dispatch->enqueue_frame(ifindex, {frame_ptr, frame_len}) ? 0 : -1; @@ -42,14 +35,6 @@ int net_rx_frame(sizedptr* out_frame) { return 0; } -const uint8_t* network_get_local_mac() { - static uint8_t dummy[6] = {0,0,0,0,0,0}; - if (!dispatch) return dummy; - if (1 == 0xFF) return dummy; - const uint8_t* m = dispatch->mac(1); //kegacy - return m ? m : dummy; -} - const uint8_t* network_get_mac(uint16_t ifindex) { static uint8_t dummy[6] = {0,0,0,0,0,0}; if (!dispatch) return dummy; @@ -94,6 +79,13 @@ void network_dump_interfaces() { if (dispatch) dispatch->dump_interfaces(); } +bool network_sync_multicast(uint16_t ifindex, const uint8_t* macs, uint32_t count) { + if (!dispatch) return false; + NetDriver* drv = dispatch->driver_at((uint8_t)ifindex); + if (!drv) return false; + return drv->sync_multicast(macs, count); +} + system_module net_module = (system_module){ .name = "net", .mount = "/net", diff --git a/kernel/networking/network.h b/kernel/networking/network.h index a919b8c2..913ca034 100644 --- a/kernel/networking/network.h +++ b/kernel/networking/network.h @@ -35,6 +35,7 @@ size_t network_nic_count(void); void network_update_local_ip(uint32_t ip); void network_dump_interfaces(void); +bool network_sync_multicast(uint16_t ifindex, const uint8_t* macs, uint32_t count); extern system_module net_module; diff --git a/kernel/networking/network_dispatch.cpp b/kernel/networking/network_dispatch.cpp index 05a55229..1a3318fa 100644 --- a/kernel/networking/network_dispatch.cpp +++ b/kernel/networking/network_dispatch.cpp @@ -11,6 +11,10 @@ #include "networking/interface_manager.h" #include "process/scheduler.h" #include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/netpkt.h" +#include "networking/link_layer/link_utils.h" +#include "networking/drivers/loopback/loopback_driver.hpp" #define RX_INTR_BATCH_LIMIT 64 #define TASK_RX_BATCH_LIMIT 256 @@ -64,20 +68,7 @@ bool NetworkDispatch::init() void NetworkDispatch::handle_rx_irq(size_t nic_id) { if (nic_id >= nic_num) return; - NetDriver* driver = nics[nic_id].drv; - if (!driver) return; - - for (int i = 0; i < RX_INTR_BATCH_LIMIT; ++i) { - sizedptr raw = driver->handle_receive_packet(); - if (!raw.ptr || raw.size == 0) break; - if (raw.size < sizeof(eth_hdr_t)) { free_frame(raw); continue; } - if (!nics[nic_id].rx.push(raw)) { - free_frame(raw); - nics[nic_id].rx_dropped++; - continue; - } - nics[nic_id].rx_produced++; - } + if (!nics[nic_id].drv) return; } void NetworkDispatch::handle_tx_irq(size_t nic_id) @@ -119,13 +110,36 @@ int NetworkDispatch::net_task() bool did_work = false; for (size_t n = 0; n < nic_num; ++n) { + NetDriver* driver = nics[n].drv; + if (driver) { + int lim = nics[n].kind_val == NET_IFK_LOCALHOST ? TASK_RX_BATCH_LIMIT : RX_INTR_BATCH_LIMIT; + for (int i = 0; i < lim; ++i) { + sizedptr raw = driver->handle_receive_packet(); + if (!raw.ptr || raw.size == 0) break; + if (raw.size < sizeof(eth_hdr_t)) { + free_frame(raw); + continue; + } + if (!nics[n].rx.push(raw)) { + free_frame(raw); + nics[n].rx_dropped++; + continue; + } + nics[n].rx_produced++; + } + } int processed = 0; for (int i = 0; i < TASK_RX_BATCH_LIMIT; ++i) { if (nics[n].rx.is_empty()) break; sizedptr pkt{0,0}; if (!nics[n].rx.pop(pkt)) break; - eth_input(nics[n].ifindex, pkt.ptr, pkt.size); - free_frame(pkt); + netpkt_t* np = netpkt_wrap(pkt.ptr, pkt.size, pkt.size, NULL, 0); + if (np) { + eth_input(nics[n].ifindex, np); + netpkt_unref(np); + } else { + free_frame(pkt); + } nics[n].rx_consumed++; processed++; } @@ -140,7 +154,10 @@ int NetworkDispatch::net_task() if (nics[n].tx.is_empty()) break; sizedptr pkt{0,0}; if (!nics[n].tx.pop(pkt)) break; - driver->send_packet(pkt); + if (!driver->send_packet(pkt)) { + free_frame(pkt); + nics[n].tx_dropped++; + } nics[n].tx_consumed++; processed++; } @@ -190,7 +207,7 @@ uint16_t NetworkDispatch::mtu(uint8_t ifindex) const return nic_id < 0 ? 0 : nics[nic_id].mtu_val; } -uint16_t NetworkDispatch::header_size(uint8_t ifindex) const +uint16_t NetworkDispatch::header_size(uint8_t ifindex) const { int nic_id = nic_for_ifindex(ifindex); return nic_id < 0 ? 0 : nics[nic_id].hdr_sz; @@ -227,7 +244,7 @@ uint8_t NetworkDispatch::kind(uint8_t ifindex) const void NetworkDispatch::free_frame(const sizedptr &f) { - if (f.ptr) free_sizedptr(f); + if (f.ptr) free_sized((void*)f.ptr, f.size); } bool NetworkDispatch::register_all_from_bus() { @@ -248,11 +265,16 @@ bool NetworkDispatch::register_all_from_bus() { if (!name) continue; if (!drv) { - bool is_lo0 = (name[0]=='l' && name[1]=='o' && name[2]=='0' && name[3]==0); - if (!is_lo0) continue; - uint8_t ix = l2_interface_create(name, nullptr, 0); - l2_interface_set_up(ix, true); - continue; + if (kd != NET_IFK_LOCALHOST) continue; + LoopbackDriver* lo_drv = new LoopbackDriver(); + if (!lo_drv) continue; + if (!lo_drv->init_at(0, 0)) { + delete lo_drv; + continue; + } + drv = lo_drv; + if (!hs) hs = 0; + if (!m) m = 65535; } uint16_t type_cost = @@ -291,7 +313,7 @@ bool NetworkDispatch::register_all_from_bus() { c->duplex_mode = dp; c->kind_val = kd; - uint8_t ix = l2_interface_create(c->ifname_str, (void*)drv, base_metric); + uint8_t ix = l2_interface_create(c->ifname_str, (void*)drv, base_metric, kd); l2_interface_set_up(ix, true); c->ifindex = ix; @@ -315,20 +337,6 @@ void NetworkDispatch::dump_interfaces() { kprintf("[net]interface dump start"); - auto ipv6_to_str = [&](const uint8_t ip[16], char out[41]){ //TODO: move this to ipv6 file - static const char HEX[] = "0123456789abcdef"; - int p = 0; - for (int g = 0; g < 8; ++g) { - uint16_t w = (uint16_t(ip[g*2]) << 8) | uint16_t(ip[g*2 + 1]); - out[p++] = HEX[(w >> 12) & 0xF]; - out[p++] = HEX[(w >> 8) & 0xF]; - out[p++] = HEX[(w >> 4) & 0xF]; - out[p++] = HEX[w & 0xF]; - if (g != 7) out[p++] = ':'; - } - out[p] = 0; - }; - for (uint8_t ifx = 1; ifx <= (uint8_t)MAX_L2_INTERFACES; ++ifx){ l2_interface_t* l2 = l2_interface_find_by_index(ifx); if (!l2) continue; @@ -342,20 +350,9 @@ void NetworkDispatch::dump_interfaces() if (nid >= 0){ char macs[18]; - { - static const char HEX[] = "0123456789abcdef"; - int p = 0; - for (int i = 0; i < 6; ++i) { - uint8_t b = nics[nid].mac_addr[i]; - macs[p++] = HEX[b >> 4]; - macs[p++] = HEX[b & 0x0F]; - if (i != 5) macs[p++] = ':'; - } - macs[p] = 0; - } + mac_to_string(nics[nid].mac_addr, macs); - const char* dpx = (nics[nid].duplex_mode == 0) ? "half" : - (nics[nid].duplex_mode == 1) ? "full" : "unknown"; + const char* dpx = (nics[nid].duplex_mode == 0) ? "half" : (nics[nid].duplex_mode == 1) ? "full" : "unknown"; kprintf(" driver: nic_id=%u ifname=%s hw=%s mtu=%u hdr=%u mac=%s drv=%x spd=%u dup=%s kind=%u", (unsigned)nid, @@ -404,12 +401,15 @@ void NetworkDispatch::dump_interfaces() for (int s = 0; s < (int)MAX_IPV6_PER_INTERFACE; ++s){ l3_ipv6_interface_t* v6 = l2->l3_v6[s]; if (!v6) continue; + char ip6[41], gw6[41]; - ipv6_to_str(v6->ip, ip6); - ipv6_to_str(v6->gateway, gw6); + ipv6_to_string(v6->ip, ip6, (int)sizeof(ip6)); + ipv6_to_string(v6->gateway, gw6, (int)sizeof(gw6)); + uint32_t llc = (v6->kind & IPV6_ADDRK_LINK_LOCAL) ? 1u : 0u; uint32_t gua = (v6->kind & IPV6_ADDRK_GLOBAL) ? 1u : 0u; uint32_t en = (v6->cfg != IPV6_CFG_DISABLE) ? 1u : 0u; + kprintf(" - slot=%u l3_id=%u kind=%u cfg=%i llc=%u gua=%u en=%u ip=%s/%u gw=%s vlft=%u plft=%u tsc=%u localhost=%u", (unsigned)s, (unsigned)v6->l3_id, (unsigned)v6->kind, (int)v6->cfg, llc, gua, en, ip6, (unsigned)v6->prefix_len, gw6, @@ -419,4 +419,4 @@ void NetworkDispatch::dump_interfaces() } kprintf("[net]interface dump end"); -} +} \ No newline at end of file diff --git a/kernel/networking/port_manager.h b/kernel/networking/port_manager.h index ac031f2a..e2cde304 100644 --- a/kernel/networking/port_manager.h +++ b/kernel/networking/port_manager.h @@ -13,7 +13,7 @@ extern "C" { #define PROTO_COUNT 2 -typedef void (*port_recv_handler_t)( +typedef uint32_t (*port_recv_handler_t)( uint8_t ifindex, ip_version_t ipver, const void* src_ip_addr, @@ -21,7 +21,8 @@ typedef void (*port_recv_handler_t)( uintptr_t frame_ptr, uint32_t frame_len, uint16_t src_port, - uint16_t dst_port); + uint16_t dst_port +); typedef struct { uint16_t pid; diff --git a/kernel/networking/processes/net_proc.c b/kernel/networking/processes/net_proc.c index bd71c613..b9d214e8 100644 --- a/kernel/networking/processes/net_proc.c +++ b/kernel/networking/processes/net_proc.c @@ -1,4 +1,4 @@ -#include "net_proc.h" +#include "net_proc.h" #include "kernel_processes/kprocess_loader.h" #include "process/scheduler.h" #include "console/kio.h" @@ -10,41 +10,42 @@ #include "networking/interface_manager.h" #include "networking/link_layer/arp.h" +#include "networking/link_layer/ndp.h" #include "networking/internet_layer/ipv4.h" #include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "net/checksums.h" + #include "networking/transport_layer/csocket_udp.h" +#include "networking/transport_layer/trans_utils.h" #include "networking/application_layer/csocket_http_client.h" #include "networking/application_layer/csocket_http_server.h" #include "networking/application_layer/dhcp_daemon.h" -#include "networking/application_layer/dns_daemon.h" -#include "networking/application_layer/dns.h" +#include "networking/application_layer/dns/dns_daemon.h" +#include "networking/application_layer/dns/mdns_responder.h" +#include "networking/application_layer/dns/dns.h" #include "networking/application_layer/sntp_daemon.h" +#include "networking/application_layer/ntp.h" +#include "networking/application_layer/ntp_daemon.h" +#include "networking/application_layer/dhcpv6_daemon.h" +#include "networking/application_layer/ssdp_daemon.h" #include "exceptions/timer.h" #include "syscalls/syscalls.h" -static inline int ipv4_is_loopback_u32(uint32_t ip) { - return ((ip & 0xFF000000u) == 0x7F000000u); -} - -static uint32_t pick_probe_ip_v4(const l3_ipv4_interface_t *ifv4) { - if (ifv4->ip && ifv4->mask) return ipv4_broadcast_calc(ifv4->ip, ifv4->mask); - return 0; -} static int udp_probe_server(uint32_t probe_ip, uint16_t probe_port, net_l4_endpoint *out_l4) { - socket_handle_t sock = udp_socket_create(SOCK_ROLE_CLIENT, (uint16_t)get_current_proc_pid()); + socket_handle_t sock = udp_socket_create(SOCK_ROLE_CLIENT, (uint16_t)get_current_proc_pid(), NULL); if (!sock) return 0; - net_l4_endpoint dst = (net_l4_endpoint){0}; - dst.ver = IP_VER4; - memcpy(dst.ip, &probe_ip, 4); - dst.port = probe_port; + net_l4_endpoint dst; + make_ep(probe_ip, probe_port, IP_VER4, &dst); static const char greeting[] = "hello"; if (socket_sendto_udp_ex(sock, DST_ENDPOINT, &dst, 0, greeting, sizeof(greeting)) < 0) { @@ -77,32 +78,35 @@ static int udp_probe_server(uint32_t probe_ip, uint16_t probe_port, net_l4_endpo } static void free_request(HTTPRequestMsg *req) { - if (req->path.mem_length) - free_sized(req->path.data, req->path.mem_length); - for (uint32_t i = 0; i < req->extra_header_count; i++) { - HTTPHeader *h = &req->extra_headers[i]; - if (h->key.mem_length) - free_sized(h->key.data, h->key.mem_length); - if (h->value.mem_length) - free_sized(h->value.data, h->value.mem_length); - } - if (req->extra_headers) - free_sized(req->extra_headers, req->extra_header_count * sizeof(HTTPHeader)); - if (req->body.ptr && req->body.size) - free_sized((void*)req->body.ptr, req->body.size); + if (!req) return; + + if (req->path.mem_length) free_sized(req->path.data, req->path.mem_length); + + http_headers_common_free(&req->headers_common); + http_headers_extra_free(req->extra_headers, req->extra_header_count); + req->extra_headers = NULL; + req->extra_header_count = 0; + + if (req->body.ptr && req->body.size) free_sized((void*)req->body.ptr, req->body.size); + + req->path = (string){0}; + req->body = (sizedptr){0}; } + static void run_http_server() { - kprintf("[HTTP] server bootstrap"); uint16_t pid = get_current_proc_pid(); - http_server_handle_t srv = http_server_create(pid); + SocketExtraOptions opt = {0}; + opt.debug_level = SOCK_DBG_ALL; + opt.flags = SOCK_OPT_DEBUG; + http_server_handle_t srv = http_server_create(pid, &opt); if (!srv) { stop_current_process(1); return; } struct SockBindSpec spec = {0}; spec.kind = BIND_ANY; - if (http_server_bind_ex(srv, &spec, 80) < 0) { + if (http_server_bind(srv, &spec, 80) < 0) { http_server_destroy(srv); stop_current_process(2); return; @@ -115,7 +119,7 @@ static void run_http_server() { return; } - kprintf("[HTTP] listening on port 80"); + mdns_register_service("RedactedOS", "http", "tcp", 80, "path=/"); static const char HTML_ROOT[] = "

Hello, world!

\n" @@ -132,18 +136,19 @@ static void run_http_server() { while (1) { http_connection_handle_t conn = http_server_accept(srv); - if (!conn) + if (!conn){ + msleep(10); continue; + } HTTPRequestMsg req = http_server_recv_request(srv, conn); if (req.path.length) { char tmp[128] = {0}; uint32_t n = req.path.length < sizeof(tmp) - 1 ? req.path.length : sizeof(tmp) - 1; memcpy(tmp, req.path.data, n); - kprintf("[HTTP] GET %s", tmp); } HTTPResponseMsg res = (HTTPResponseMsg){0}; - + if (req.path.length == 1 && req.path.data[0] == '/') { res.status_code = HTTP_OK; res.reason = STR_OK; @@ -179,15 +184,20 @@ static void test_http(const net_l4_endpoint* ep) { } uint16_t pid = get_current_proc_pid(); - http_client_handle_t cli = http_client_create(pid); + http_client_handle_t cli = http_client_create(pid, NULL); if (!cli) { kprintf("[HTTP] http_client_create FAIL"); return; } net_l4_endpoint e = {0}; - e.ver = IP_VER4; - memcpy(e.ip, ep->ip, 4); + e.ver = ep->ver; + if (e.ver == IP_VER4) memcpy(e.ip, ep->ip, 4); + else if (e.ver == IP_VER6) memcpy(e.ip, ep->ip, 16); + else { + http_client_destroy(cli); + return; + } e.port = 80; int rc = http_client_connect_ex(cli, DST_ENDPOINT, &e, 0); @@ -203,9 +213,6 @@ static void test_http(const net_l4_endpoint* ep) { HTTPResponseMsg resp = http_client_send_request(cli, &req); - //free(req.path.data, req.path.mem_length); - //free(req.headers_common.connection.data, req.headers_common.connection.mem_length); - if ((int)resp.status_code < 0) { kprintf("[HTTP] request FAIL status=%i", (int)resp.status_code); http_client_close(cli); @@ -227,33 +234,12 @@ static void test_http(const net_l4_endpoint* ep) { http_client_close(cli); http_client_destroy(cli); - if (resp.reason.data && resp.reason.mem_length) - free_sized(resp.reason.data, resp.reason.mem_length); + if (resp.body.ptr && resp.body.size) free_sized((void*)resp.body.ptr, resp.body.size); - for (uint32_t i = 0; i < resp.extra_header_count; i++) { - HTTPHeader *h = &resp.extra_headers[i]; - if (h->key.mem_length) - free_sized(h->key.data, h->key.mem_length); - if (h->value.mem_length) - free_sized(h->value.data, h->value.mem_length); - } + http_headers_common_free(&resp.headers_common); - if (resp.extra_headers) - free_sized(resp.extra_headers, resp.extra_header_count * sizeof(HTTPHeader)); -} - - -static void print_info_for_ifv4(const l3_ipv4_interface_t* ifv4) { - if (!ifv4 || !ifv4->ip) return; - if (ifv4->is_localhost) return; - if (ipv4_is_loopback_u32(ifv4->ip)) return; - char ip_str[16]; - char mask_str[16]; - char gw_str[16]; - ipv4_to_string(ifv4->ip, ip_str); - ipv4_to_string(ifv4->mask, mask_str); - ipv4_to_string(ifv4->gw, gw_str); - kprintf("[NET] IF l3_id=%u IP: %s MASK: %s GW: %s", (unsigned)ifv4->l3_id, ip_str, mask_str, gw_str); + if (resp.reason.data && resp.reason.mem_length) free_sized(resp.reason.data, resp.reason.mem_length); + http_headers_extra_free(resp.extra_headers, resp.extra_header_count); } static int ifv4_is_ready_nonlocal(const l3_ipv4_interface_t* ifv4) { @@ -261,7 +247,17 @@ static int ifv4_is_ready_nonlocal(const l3_ipv4_interface_t* ifv4) { if (ifv4->mode == IPV4_CFG_DISABLED) return 0; if (!ifv4->ip) return 0; if (ifv4->is_localhost) return 0; - if (ipv4_is_loopback_u32(ifv4->ip)) return 0; + if ((ifv4->ip & 0xFF000000u) == 0x7F000000u) return 0; + return 1; +} + +static int ifv6_is_ready_nonlocal(const l3_ipv6_interface_t* ifv6) { + if (!ifv6) return 0; + if (ifv6->cfg == IPV6_CFG_DISABLE) return 0; + if (ifv6->is_localhost) return 0; + if (ipv6_is_unspecified(ifv6->ip)) return 0; + if (ifv6->dad_state != IPV6_DAD_OK) return 0; + if (ipv6_is_loopback(ifv6->ip)) return 0; return 1; } @@ -278,43 +274,38 @@ static int any_ipv4_ready(void) { return 0; } -static void print_info() { - network_dump_interfaces(); +static int any_ipv6_ready(void) { uint8_t n_if = l2_interface_count(); for (uint8_t i = 0; i < n_if; i++) { l2_interface_t* l2 = l2_interface_at(i); if (!l2 || !l2->is_up) continue; - for (uint8_t j = 0; j < MAX_IPV4_PER_INTERFACE; j++) { - l3_ipv4_interface_t* ifv4 = l2->l3_v4[j]; - if (!ifv4_is_ready_nonlocal(ifv4)) continue; - print_info_for_ifv4(ifv4); + for (uint8_t j = 0; j < MAX_IPV6_PER_INTERFACE; j++) { + l3_ipv6_interface_t* ifv6 = l2->l3_v6[j]; + if (ifv6_is_ready_nonlocal(ifv6)) return 1; } } - if (!sntp_is_running()) { - kprintf("[TIME] starting SNTP..."); - create_kernel_process("sntpd", sntp_daemon_entry, 0, 0); + return 0; +} + +static int ntp(int argc, char* argv[]) { + if (!ntp_is_running()) { + kprintf("[TIME] starting NTP..."); + create_kernel_process("ntpd", ntp_daemon_entry, 0, 0); uint32_t waited = 0; const uint32_t step = 200; const uint32_t timeout = 10000; while (!timer_is_synchronised() && waited < timeout) { - if ((waited % 1000) == 0) kprintf("[TIME] waiting SNTP sync..."); + if ((waited % 1000) == 0) kprintf("[TIME] waiting NTP sync..."); msleep(step); waited += step; } - if (!timer_is_synchronised()) kprintf("[TIME] SNTP sync timeout, continuing"); + if (!timer_is_synchronised()) kprintf("[TIME] NTP sync timeout, continuing"); } - kprintf("[NET] PIDs -- NET: %i ARP: %i DHCP: %i DNS: %i SNTP: %i", - network_net_get_pid(), - arp_get_pid(), - dhcp_get_pid(), - dns_get_pid(), - sntp_get_pid()); - timer_set_timezone_minutes(120); kprintf("[TIME]timezone offset %i minutes", (int32_t)timer_get_timezone_minutes()); - DateTime now_dt_utc, now_dt_loc; + DateTime now_dt_utc, now_dt_loc; if (timer_now_datetime(&now_dt_utc, 0)) { char s[20]; timer_datetime_to_string(&now_dt_utc, s, sizeof s); @@ -325,12 +316,18 @@ static void print_info() { timer_datetime_to_string(&now_dt_loc, s, sizeof s); kprintf("[TIME] LOCAL: %s (TZ %i min)", s, (int32_t)timer_get_timezone_minutes()); } + return 0; } static void test_net_for_interface(l3_ipv4_interface_t* ifv4) { if (!ifv4_is_ready_nonlocal(ifv4)) return; - print_info_for_ifv4(ifv4); - uint32_t probe_ip = pick_probe_ip_v4(ifv4); + char ip_str[16]; + char mask_str[16]; + char gw_str[16]; + ipv4_to_string(ifv4->ip, ip_str); + ipv4_to_string(ifv4->mask, mask_str); + ipv4_to_string(ifv4->gw, gw_str); + uint32_t probe_ip = (ifv4->ip && ifv4->mask) ? ipv4_broadcast_calc(ifv4->ip, ifv4->mask) : 0; if (!probe_ip) return; char probe_str[16]; ipv4_to_string(probe_ip, probe_str); @@ -344,7 +341,7 @@ static void test_net_for_interface(l3_ipv4_interface_t* ifv4) { } static void test_net() { - print_info(); + create_kernel_process("ntp", ntp, 0, NULL); uint8_t n_if = l2_interface_count(); int tested_any = 0; for (uint8_t i = 0; i < n_if; i++) { @@ -377,8 +374,8 @@ static int net_test_entry(int argc, char* argv[]) { static int ip_waiter_entry(int argc, char* argv[]) { (void)argc; (void)argv; uint32_t waited = 0; - while (!any_ipv4_ready()) { - if ((waited % 1000) == 0) kprintf("[NET] ip_waiter: waiting for ipv4..."); + while (!any_ipv4_ready() && !any_ipv6_ready()) { + if ((waited % 1000) == 0) kprintf("[NET] ip_waiter: waiting for ip..."); msleep(200); waited += 200; } @@ -389,16 +386,18 @@ static int ip_waiter_entry(int argc, char* argv[]) { process_t* launch_net_process() { create_kernel_process("net_net", network_net_task_entry, 0, 0); create_kernel_process("arp_daemon", arp_daemon_entry, 0, 0); + create_kernel_process("ndp_daemon", ndp_daemon_entry, 0, 0); + //create_kernel_process("ssdp_daemon", ssdp_daemon_entry, 0, 0); create_kernel_process("dhcp_daemon", dhcp_daemon_entry, 0, 0); - + create_kernel_process("dhcpv6_daemon", dhcpv6_daemon_entry, 0, 0); create_kernel_process("dns_daemon", dns_deamon_entry, 0, 0); - - if (any_ipv4_ready()) { - kprintf("[NET] ipv4 ready, starting net_test"); + + if (any_ipv4_ready() || any_ipv6_ready()) { + kprintf("[NET] ip ready, starting net_test"); create_kernel_process("net_test", net_test_entry, 0, 0); return NULL; } create_kernel_process("ip_waiter", ip_waiter_entry, 0, 0); return NULL; -} +} \ No newline at end of file diff --git a/kernel/networking/transport_layer/csocket.c b/kernel/networking/transport_layer/csocket.c index b38ed624..46c6a17b 100644 --- a/kernel/networking/transport_layer/csocket.c +++ b/kernel/networking/transport_layer/csocket.c @@ -31,15 +31,15 @@ static inline void check_mem(){ } } -bool create_socket(Socket_Role role, protocol_t protocol, uint16_t pid, SocketHandle *out_handle){ +bool create_socket(Socket_Role role, protocol_t protocol, const SocketExtraOptions* extra, uint16_t pid, SocketHandle *out_handle){ check_mem(); socket_handle_t *in_handle = {}; switch (protocol) { case PROTO_UDP: - in_handle = udp_socket_create(role, pid); + in_handle = udp_socket_create(role, pid, extra); break; case PROTO_TCP: - in_handle = socket_tcp_create(role, pid); + in_handle = socket_tcp_create(role, pid, extra); break; } if (!in_handle){ @@ -75,7 +75,9 @@ int32_t bind_socket(SocketHandle *handle, uint16_t port, ip_version_t ip_version SockBindSpec *spec = kalloc(sock_mem_page, sizeof(SockBindSpec), ALIGN_64B, MEM_PRIV_KERNEL); spec->kind = BIND_IP; spec->ver = ip_version; - memcpy(&spec->ip, handle->connection.ip, 4); + memset(spec->ip, 0, sizeof(spec->ip)); + if (ip_version == IP_VER4) memcpy(spec->ip, handle->connection.ip, 4); + else if (ip_version == IP_VER6) memcpy(spec->ip, handle->connection.ip, 16); int32_t res = -1; switch (protocol) { case PROTO_TCP: @@ -199,4 +201,4 @@ void accept_on_socket(SocketHandle *handle, uint16_t pid){ kprintf("[SOCKET] accept is a TCP-only function and isn't needed in UDP sockets"); break; } -} +} \ No newline at end of file diff --git a/kernel/networking/transport_layer/csocket.h b/kernel/networking/transport_layer/csocket.h index da60fcf3..0434c30b 100644 --- a/kernel/networking/transport_layer/csocket.h +++ b/kernel/networking/transport_layer/csocket.h @@ -2,8 +2,9 @@ #include "types.h" #include "net/network_types.h" +#include "net/socket_types.h" -bool create_socket(Socket_Role role, protocol_t protocol, uint16_t pid, SocketHandle *out_handle); +bool create_socket(Socket_Role role, protocol_t protocol, const SocketExtraOptions* extra, uint16_t pid, SocketHandle *out_handle); int32_t bind_socket(SocketHandle *handle, uint16_t port, ip_version_t ip_vers, uint16_t pid); int32_t connect_socket(SocketHandle *handle, uint8_t dst_kind, const void* dst, uint16_t port, uint16_t pid); diff --git a/kernel/networking/transport_layer/csocket_tcp.cpp b/kernel/networking/transport_layer/csocket_tcp.cpp index 4e3bf8c9..63659d87 100644 --- a/kernel/networking/transport_layer/csocket_tcp.cpp +++ b/kernel/networking/transport_layer/csocket_tcp.cpp @@ -4,8 +4,8 @@ extern "C" { -socket_handle_t socket_tcp_create(uint8_t role, uint32_t pid) { - return reinterpret_cast(new TCPSocket(role, pid)); +socket_handle_t socket_tcp_create(uint8_t role, uint32_t pid, const SocketExtraOptions* extra) { + return reinterpret_cast(new TCPSocket(role, pid, extra)); } int32_t socket_bind_tcp_ex(socket_handle_t sh, const SockBindSpec* spec, uint16_t port) { diff --git a/kernel/networking/transport_layer/csocket_tcp.h b/kernel/networking/transport_layer/csocket_tcp.h index f843489f..bbc7deb2 100644 --- a/kernel/networking/transport_layer/csocket_tcp.h +++ b/kernel/networking/transport_layer/csocket_tcp.h @@ -10,7 +10,7 @@ extern "C" { typedef void* socket_handle_t; -socket_handle_t socket_tcp_create(uint8_t role, uint32_t pid); +socket_handle_t socket_tcp_create(uint8_t role, uint32_t pid, const SocketExtraOptions* extra); int32_t socket_bind_tcp_ex(socket_handle_t sh, const SockBindSpec* spec, uint16_t port); int32_t socket_listen_tcp(socket_handle_t sh, int32_t backlog); socket_handle_t socket_accept_tcp(socket_handle_t sh); diff --git a/kernel/networking/transport_layer/csocket_udp.cpp b/kernel/networking/transport_layer/csocket_udp.cpp index b45acab4..3dc04248 100644 --- a/kernel/networking/transport_layer/csocket_udp.cpp +++ b/kernel/networking/transport_layer/csocket_udp.cpp @@ -2,8 +2,8 @@ #include "networking/transport_layer/socket.hpp" #include "csocket_udp.h" -extern "C" socket_handle_t udp_socket_create(uint8_t role, uint32_t pid) { - return reinterpret_cast(new UDPSocket(role, pid)); +extern "C" socket_handle_t udp_socket_create(uint8_t role, uint32_t pid, const SocketExtraOptions* extra) { + return reinterpret_cast(new UDPSocket(role, pid, extra)); } extern "C" int32_t socket_bind_udp_ex(socket_handle_t sh, const SockBindSpec* spec, uint16_t port) { diff --git a/kernel/networking/transport_layer/csocket_udp.h b/kernel/networking/transport_layer/csocket_udp.h index 7e6b671a..479fefcf 100644 --- a/kernel/networking/transport_layer/csocket_udp.h +++ b/kernel/networking/transport_layer/csocket_udp.h @@ -10,7 +10,7 @@ extern "C" { typedef void* socket_handle_t; -socket_handle_t udp_socket_create(uint8_t role, uint32_t pid); +socket_handle_t udp_socket_create(uint8_t role, uint32_t pid, const SocketExtraOptions* extra); int32_t socket_bind_udp_ex(socket_handle_t sh, const SockBindSpec* spec, uint16_t port); int64_t socket_sendto_udp_ex(socket_handle_t sh, uint8_t dst_kind, const void* dst, uint16_t port, const void* buf, uint64_t len); int64_t socket_recvfrom_udp_ex(socket_handle_t sh, void* buf, uint64_t len, net_l4_endpoint* out_src); diff --git a/kernel/networking/transport_layer/socket.hpp b/kernel/networking/transport_layer/socket.hpp index c2e3a612..2c7a3030 100644 --- a/kernel/networking/transport_layer/socket.hpp +++ b/kernel/networking/transport_layer/socket.hpp @@ -5,6 +5,8 @@ #include "tcp.h" #include "udp.h" #include "net/socket_types.h" +#include "console/kio.h" +#include "networking/net_logger/net_logger.h" #ifdef __cplusplus extern "C" { @@ -44,10 +46,14 @@ class Socket { bool connected = false; uint16_t pid = 0; + SocketExtraOptions extraOpts = {}; + uint8_t bound_l3[SOCK_MAX_L3] = {0}; int bound_l3_count = 0; - Socket(uint8_t protocol, uint8_t r) : proto(protocol), role(r) {} + Socket(uint8_t protocol, uint8_t r, const SocketExtraOptions* extra) : proto(protocol), role(r) { + if (extra) extraOpts = *extra; + } virtual void do_unbind_one(uint8_t l3_id, uint16_t port, uint16_t pid) = 0; diff --git a/kernel/networking/transport_layer/socket_tcp.hpp b/kernel/networking/transport_layer/socket_tcp.hpp old mode 100644 new mode 100755 index 917832ae..bad5d0b4 --- a/kernel/networking/transport_layer/socket_tcp.hpp +++ b/kernel/networking/transport_layer/socket_tcp.hpp @@ -5,24 +5,25 @@ #include "socket.hpp" #include "networking/transport_layer/tcp.h" #include "networking/internet_layer/ipv4.h" -#include "networking/application_layer/dns.h" +#include "networking/application_layer/dns/dns.h" #include "types.h" #include "data_struct/ring_buffer.hpp" #include "net/socket_types.h" #include "networking/internet_layer/ipv4_route.h" +#include "networking/internet_layer/ipv6_route.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/transport_layer/trans_utils.h" #include "syscalls/syscalls.h" - static constexpr int TCP_MAX_BACKLOG = 8; static constexpr dns_server_sel_t TCP_DNS_SEL = DNS_USE_BOTH; static constexpr uint32_t TCP_DNS_TIMEOUT_MS = 3000; class TCPSocket : public Socket { - inline static TCPSocket* s_list_head = nullptr; - static constexpr int TCP_RING_CAP = 1024; - RingBuffer ring; + static constexpr uint32_t TCP_RING_CAP = 256 * 1024; + RingBuffer ring; tcp_data* flow = nullptr; TCPSocket* pending[TCP_MAX_BACKLOG] = { nullptr }; @@ -30,12 +31,7 @@ class TCPSocket : public Socket { int backlogLen = 0; TCPSocket* next = nullptr; - static bool contains_id(const uint8_t* arr, int n, uint8_t id){ - for (int i=0;il2) return false; if (!v4->l2->is_up) return false; if (v4->is_localhost) return false; @@ -45,129 +41,209 @@ class TCPSocket : public Socket { return true; } - static bool socket_matches_dst(TCPSocket* s, uint8_t ifx, ip_version_t ver, const void* dst_ip_addr, uint16_t dst_port){ - if (!s->bound) return false; - if (s->localPort != dst_port) return false; - for (int i=0;ibound_l3_count;++i){ - uint8_t id = s->bound_l3[i]; - if (ver == IP_VER4){ - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); - if (!is_valid_v4_l3_for_bind(v4)) continue; - if (v4->l2->ifindex != ifx) continue; - if (v4->ip == *(const uint32_t*)dst_ip_addr) return true; - } else if (ver == IP_VER6){ - l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); - if (!v6 || !v6->l2 || !v6->l2->is_up || v6->is_localhost || v6->cfg == IPV6_CFG_DISABLE) continue; - if (v6->l2->ifindex != ifx) continue; - if (memcmp(v6->ip, dst_ip_addr, 16) == 0) return true; - } - } - return false; - } - - static uint8_t find_matching_l3(TCPSocket* s, uint8_t ifx, ip_version_t ver, const void* dst_ip_addr){ - for (int i=0;ibound_l3_count;++i){ - uint8_t id = s->bound_l3[i]; - if (ver == IP_VER4){ - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); - if (!is_valid_v4_l3_for_bind(v4)) continue; - if (v4->l2->ifindex != ifx) continue; - if (v4->ip == *(const uint32_t*)dst_ip_addr) return id; - } else if (ver == IP_VER6){ - l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); - if (!v6 || !v6->l2 || !v6->l2->is_up || v6->is_localhost || v6->cfg == IPV6_CFG_DISABLE) continue; - if (v6->l2->ifindex != ifx) continue; - if (memcmp(v6->ip, dst_ip_addr, 16) == 0) return id; - } - } - if (ver == IP_VER4){ - uint32_t v4dst; memcpy(&v4dst, dst_ip_addr, 4); - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_ip(v4dst); - if (is_valid_v4_l3_for_bind(v4) && v4->l2 && v4->l2->ifindex == ifx) return v4->l3_id; - } - return 0; - } - - static bool socket_matches_flow(TCPSocket* s, uint8_t ifx, ip_version_t ver, const void* dst_ip_addr, uint16_t dst_port, const void* src_ip_addr, uint16_t src_port){ - (void)ifx; - if (!s->connected) return false; - if (s->localPort != dst_port) return false; - if (s->remoteEP.port != src_port) return false; - if (s->remoteEP.ver != ver) return false; - if (ver == IP_VER4){ - if (*(const uint32_t*)s->remoteEP.ip != *(const uint32_t*)src_ip_addr) return false; - } else { - if (memcmp(s->remoteEP.ip, src_ip_addr, 16) != 0) return false; - } + static bool is_valid_v6_l3_for_bind(l3_ipv6_interface_t* v6) { + if (!v6 || !v6->l2) return false; + if (!v6->l2->is_up) return false; + if (v6->is_localhost) return false; + if (v6->cfg == IPV6_CFG_DISABLE) return false; + if (ipv6_is_unspecified(v6->ip)) return false; + if (v6->dad_state == IPV6_DAD_FAILED) return false; + if (!(v6->kind & IPV6_ADDRK_LINK_LOCAL) && v6->dad_state != IPV6_DAD_OK) return false; + if (!v6->port_manager) return false; return true; } - static void dispatch(uint8_t ifindex, ip_version_t ipver, const void* src_ip_addr, const void* dst_ip_addr, - uintptr_t frame_ptr, uint32_t frame_len, uint16_t src_port, uint16_t dst_port) - { + static uint32_t dispatch(uint8_t ifindex, ip_version_t ipver, const void* src_ip_addr, const void* dst_ip_addr, uintptr_t frame_ptr, uint32_t frame_len, uint16_t src_port, uint16_t dst_port) { if (frame_len == 0){ for (TCPSocket* srv = s_list_head; srv; srv = srv->next){ if (srv->role != SOCK_ROLE_SERVER) continue; - if (!socket_matches_dst(srv, ifindex, ipver, dst_ip_addr, dst_port)) continue; + if (!srv->bound) continue; + if (srv->localPort != dst_port) continue; + + bool matches_dst = false; + for (int i = 0; i < srv->bound_l3_count; ++i) { + uint8_t id = srv->bound_l3[i]; + + if (ipver == IP_VER4) { + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (v4->l2->ifindex != ifindex) continue; + if (v4->ip == *(const uint32_t*)dst_ip_addr) { + matches_dst = true; + break; + } + } else if (ipver == IP_VER6) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (v6->l2->ifindex != ifindex) continue; + if (memcmp(v6->ip, dst_ip_addr, 16) == 0){ + matches_dst = true; + break; + } + } + } + + if (!matches_dst) continue; if (srv->backlogLen >= srv->backlogCap) break; - TCPSocket* child = new TCPSocket(SOCK_ROLE_CLIENT, srv->pid); + + TCPSocket* child = new TCPSocket(SOCK_ROLE_CLIENT, srv->pid, &srv->extraOpts); + child->localPort = dst_port; child->connected = true; + child->remoteEP.ver = ipver; memset(child->remoteEP.ip, 0, 16); if (ipver == IP_VER4) memcpy(child->remoteEP.ip, src_ip_addr, 4); else memcpy(child->remoteEP.ip, src_ip_addr, 16); child->remoteEP.port = src_port; - uint8_t l3id = find_matching_l3(srv, ifindex, ipver, dst_ip_addr); + + uint8_t l3id = 0; + + for (int i = 0; i < srv->bound_l3_count; ++i) { + uint8_t id = srv->bound_l3[i]; + + if (ipver == IP_VER4) { + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (v4->l2->ifindex != ifindex) continue; + if (v4->ip == *(const uint32_t*)dst_ip_addr) { + l3id = id; + break; + } + } else if (ipver == IP_VER6) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (v6->l2->ifindex != ifindex) continue; + if (memcmp(v6->ip, dst_ip_addr, 16) == 0) { + l3id = id; + break; + } + } + } + + if (!l3id) { + if (ipver == IP_VER4) { + uint32_t v4dst = 0; + memcpy(&v4dst, dst_ip_addr, 4); + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_ip(v4dst); + if (is_valid_v4_l3_for_bind(v4) && v4->l2 && v4->l2->ifindex == ifindex) l3id = v4->l3_id; + } else if (ipver == IP_VER6) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_ip((const uint8_t*)dst_ip_addr); + if (is_valid_v6_l3_for_bind(v6) && v6->l2 && v6->l2->ifindex == ifindex) l3id = v6->l3_id; + } + } + child->clear_bound_l3(); + if (l3id) { child->add_bound_l3(l3id); } else if (ipver == IP_VER4){ - uint32_t v4dst; memcpy(&v4dst, dst_ip_addr, 4); + uint32_t v4dst = 0; + memcpy(&v4dst, dst_ip_addr, 4); l3_ipv4_interface_t* v4 = l3_ipv4_find_by_ip(v4dst); - if (is_valid_v4_l3_for_bind(v4) && v4->l2 && v4->l2->ifindex == ifindex){ + if (is_valid_v4_l3_for_bind(v4) && v4->l2 && v4->l2->ifindex == ifindex) { child->add_bound_l3(v4->l3_id); } else { - for (int i=0;ibound_l3_count;i++){ + for (int i = 0; i < srv->bound_l3_count; ++i) { l3_ipv4_interface_t* sv4 = l3_ipv4_find_by_id(srv->bound_l3[i]); - if (sv4 && sv4->l2 && sv4->l2->is_up && sv4->l2->ifindex == ifindex){ - child->add_bound_l3(sv4->l3_id); - } + if (!sv4 || !sv4->l2) continue; + if (!sv4->l2->is_up) continue; + if (sv4->l2->ifindex != ifindex) continue; + child->add_bound_l3(sv4->l3_id); } } + } else if (ipver == IP_VER6) { + for (int i = 0; i < srv->bound_l3_count; ++i) { + l3_ipv6_interface_t* sv6 = l3_ipv6_find_by_id(srv->bound_l3[i]); + if (!is_valid_v6_l3_for_bind(sv6)) continue; + if (!sv6->l2 || !sv6->l2->is_up) continue; + if (sv6->l2->ifindex != ifindex) continue; + child->add_bound_l3(sv6->l3_id); + } } - child->flow = tcp_get_ctx(dst_port, ipver, child->remoteEP.ip, src_port); + + child->flow = tcp_get_ctx(dst_port, ipver, dst_ip_addr, child->remoteEP.ip, src_port); if (!child->flow){ child->close(); delete child; break; } + + child->insert_in_list(); + srv->pending[srv->backlogLen++] = child; break; } - return; - } - for (TCPSocket* s = s_list_head; s; s = s->next){ - if (!socket_matches_flow(s, ifindex, ipver, dst_ip_addr, dst_port, src_ip_addr, src_port)) continue; - s->on_receive(frame_ptr, frame_len); - return; + return 0; } - if (frame_ptr && frame_len){ - free_sized((void*)frame_ptr, frame_len); + + for (TCPSocket* s = s_list_head; s; s = s->next) { + if (!s->connected) continue; + if (s->localPort != dst_port) continue; + if (s->remoteEP.port != src_port) continue; + if (s->remoteEP.ver != ipver) continue; + + bool matches_dst = (s->bound_l3_count == 0); + for (int i = 0; !matches_dst && i < s->bound_l3_count; ++i) { + uint8_t id = s->bound_l3[i]; + + if (ipver == IP_VER4) { + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (v4->l2->ifindex != ifindex) continue; + if (v4->ip == *(const uint32_t*)dst_ip_addr) { + matches_dst = true; + break; + } + } else { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (v6->l2->ifindex != ifindex) continue; + if (memcmp(v6->ip, dst_ip_addr, 16) == 0) { + matches_dst = true; + break; + } + } + } + + if (!matches_dst) continue; + + if (ipver == IP_VER4) { + if (*(const uint32_t*)s->remoteEP.ip != *(const uint32_t*)src_ip_addr) continue; + } else { + if (memcmp(s->remoteEP.ip, src_ip_addr, 16) != 0) continue; + } + + return s->on_receive(frame_ptr, frame_len); } + + return 0; } + + uint32_t on_receive(uintptr_t ptr, uint32_t len) { + if(!ptr || !len) return 0; + + uint64_t limit = ring.capacity(); + if ((extraOpts.flags & SOCK_OPT_BUF_SIZE) && extraOpts.buf_size) { + uint64_t m = extraOpts.buf_size; + if (m < limit) limit = m; + } + if (!limit) return 0; + + const uint8_t* src = (const uint8_t*)ptr; + uint32_t pushed = 0; + + uint64_t sz = ring.size(); + if (sz < limit) { + uint64_t free = limit - sz; - void on_receive(uintptr_t ptr, uint32_t len) { - uint8_t* data = (uint8_t*)malloc(len); - if (!data) return; - memcpy(data, (void*)ptr, len); - sizedptr packet { (uintptr_t)data, len }; - if (!ring.push(packet)) { - sizedptr dropped; - ring.pop(dropped); - free_sized((void*)dropped.ptr, dropped.size); - ring.push(packet); + uint32_t accept = len; + if((uint64_t)accept > free) accept = (uint32_t)free; + + pushed = (uint32_t)ring.push_buf(src, accept); } + + return pushed; } void insert_in_list() { @@ -183,7 +259,10 @@ class TCPSocket : public Socket { void remove_from_list() { TCPSocket** cur = &s_list_head; while (*cur) { - if (*cur == this) { *cur = (*cur)->next; break; } + if (*cur == this) { + *cur = (*cur)->next; + break; + } cur = &((*cur)->next); } next = nullptr; @@ -195,37 +274,74 @@ class TCPSocket : public Socket { (void)tcp_unbind_l3(l3_id, port, pid); } - bool add_all_l3_on_l2(uint8_t ifindex, uint8_t* tmp_ids, int& n){ + bool add_all_l3_on_l2(uint8_t ifindex, uint8_t* tmp_ids, ip_version_t* tmp_ver, int& n) { l2_interface_t* l2 = l2_interface_find_by_index(ifindex); if (!l2 || !l2->is_up) return false; - for (int s=0; sl3_v4[s]; - if (is_valid_v4_l3_for_bind(v4)){ if (n < SOCK_MAX_L3) tmp_ids[n++] = v4->l3_id; } + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (n < SOCK_MAX_L3) { + tmp_ids[n] = v4->l3_id; + tmp_ver[n] = IP_VER4; + ++n; + } } + + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (n < SOCK_MAX_L3) { + tmp_ids[n] = v6->l3_id; + tmp_ver[n] = IP_VER6; + ++n; + } + } + return n > 0; } - bool add_all_l3_any(uint8_t* tmp_ids, int& n){ + bool add_all_l3_any(uint8_t* tmp_ids, ip_version_t* tmp_ver, int& n) { uint8_t cnt = l2_interface_count(); - for (uint8_t i=0;iis_up) continue; - for (int s=0; sl3_v4[s]; - if (is_valid_v4_l3_for_bind(v4)){ if (n < SOCK_MAX_L3) tmp_ids[n++] = v4->l3_id; } + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (n < SOCK_MAX_L3) { + tmp_ids[n] = v4->l3_id; + tmp_ver[n] = IP_VER4; + ++n; + } + } + + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { + l3_ipv6_interface_t* v6 = l2->l3_v6[s]; + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (n < SOCK_MAX_L3) { + tmp_ids[n] = v6->l3_id; + tmp_ver[n] = IP_VER6; + ++n; + } } } - return n > 0; - } - static bool is_zero_ip16(const uint8_t ip[16]){ - for (int i=0;i<16;++i) if (ip[i]) return false; - return true; + return n > 0; } public: - explicit TCPSocket(uint8_t r = SOCK_ROLE_CLIENT, uint32_t pid_ = 0) : Socket(PROTO_TCP, r){ + explicit TCPSocket(uint8_t r = SOCK_ROLE_CLIENT, uint32_t pid_ = 0, const SocketExtraOptions* extra = nullptr) : Socket(PROTO_TCP, r, extra) { pid = pid_; + if (!(extraOpts.flags & SOCK_OPT_BUF_SIZE)) { + extraOpts.flags |= SOCK_OPT_BUF_SIZE; + extraOpts.buf_size = TCP_RING_CAP; + } + + if (!extraOpts.buf_size) extraOpts.buf_size = TCP_RING_CAP; + if (extraOpts.buf_size > TCP_RING_CAP) extraOpts.buf_size = TCP_RING_CAP; insert_in_list(); } @@ -235,54 +351,108 @@ class TCPSocket : public Socket { } int32_t bind(const SockBindSpec& spec_in, uint16_t port) override { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_TCP; + ev.action = NETLOG_ACT_BIND; + ev.pid = pid; + ev.u0 = port; + ev.bind_spec = spec_in; + netlog_socket_event(&extraOpts, &ev); if (role != SOCK_ROLE_SERVER) return SOCK_ERR_PERM; if (bound) return SOCK_ERR_BOUND; SockBindSpec spec = spec_in; - bool empty = spec.kind == BIND_L3 && spec.l3_id==0 && spec.ifindex==0 && spec.ver==0 && is_zero_ip16(spec.ip); + bool empty = spec.kind == BIND_L3 && spec.l3_id == 0 && spec.ifindex == 0 && spec.ver == 0 && ipv6_is_unspecified(spec.ip); if (empty) spec.kind = BIND_ANY; uint8_t ids[SOCK_MAX_L3]; + ip_version_t vers[SOCK_MAX_L3]; int n = 0; if (spec.kind == BIND_L3){ - if (spec.l3_id) ids[n++] = spec.l3_id; - if (n==0) return SOCK_ERR_INVAL; + if (!spec.l3_id) return SOCK_ERR_INVAL; + + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(spec.l3_id); + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(spec.l3_id); + + bool ok4 = is_valid_v4_l3_for_bind(v4); + bool ok6 = is_valid_v6_l3_for_bind(v6); + + if (!ok4 && !ok6) return SOCK_ERR_INVAL; + + if (ok4 && n < SOCK_MAX_L3) { + ids[n] = spec.l3_id; + vers[n] = IP_VER4; + ++n; + } + if (ok6 && n < SOCK_MAX_L3) { + ids[n] = spec.l3_id; + vers[n] = IP_VER6; + ++n; + } } else if (spec.kind == BIND_L2){ - if (!add_all_l3_on_l2(spec.ifindex, ids, n)) return SOCK_ERR_INVAL; + if (!add_all_l3_on_l2(spec.ifindex, ids, vers, n)) return SOCK_ERR_INVAL; } else if (spec.kind == BIND_IP){ if (spec.ver == IP_VER4){ - uint32_t v4; memcpy(&v4, spec.ip, 4); - l3_ipv4_interface_t* ipif = l3_ipv4_find_by_ip(v4); + uint32_t v4ip = 0; + memcpy(&v4ip, spec.ip, 4); + l3_ipv4_interface_t* ipif = l3_ipv4_find_by_ip(v4ip); if (!is_valid_v4_l3_for_bind(ipif)) return SOCK_ERR_INVAL; - ids[n++] = ipif->l3_id; - } else if (spec.ver == IP_VER6){ - return SOCK_ERR_PROTO; + ids[n] = ipif->l3_id; + vers[n] = IP_VER4; + ++n; + } else if (spec.ver == IP_VER6) { + l3_ipv6_interface_t* ipif = l3_ipv6_find_by_ip(spec.ip); + if (!is_valid_v6_l3_for_bind(ipif)) return SOCK_ERR_INVAL; + ids[n] = ipif->l3_id; + vers[n] = IP_VER6; + ++n; } else return SOCK_ERR_INVAL; } else if (spec.kind == BIND_ANY){ - if (!add_all_l3_any(ids, n)) return SOCK_ERR_INVAL; + if (!add_all_l3_any(ids, vers, n)) return SOCK_ERR_INVAL; } else return SOCK_ERR_INVAL; if (n==0) return SOCK_ERR_INVAL; - uint8_t dedup[SOCK_MAX_L3]; int m=0; - for (int i=0;i TCP_MAX_BACKLOG ? TCP_MAX_BACKLOG : max_backlog; backlogLen = 0; return SOCK_OK; @@ -299,132 +470,255 @@ class TCPSocket : public Socket { TCPSocket* accept(){ const int max_iters = 100; int iter = 0; + while (backlogLen == 0){ if (++iter > max_iters) return nullptr; msleep(10); } + TCPSocket* client = pending[0]; - for (int i=1;iis_up) continue; - for (int s = 0; s < MAX_IPV4_PER_INTERFACE && n < SOCK_MAX_L3; ++s){ - l3_ipv4_interface_t* v4 = l2->l3_v4[s]; - if (!is_valid_v4_l3_for_bind(v4)) continue; - ids[n++] = v4->l3_id; + if (dr6 != DNS_OK && dr4 != DNS_OK) return SOCK_ERR_DNS; + + if (dr6 == DNS_OK) { + net_l4_endpoint d6{}; + d6.ver = IP_VER6; + memcpy(d6.ip, v6addr, 16); + d6.port = port; + + ipv6_tx_plan_t p6; + if (ipv6_build_tx_plan(d6.ip, nullptr, n6 ? allow_v6 : nullptr, n6, &p6)) { + d = d6; + chosen_l3 = p6.l3_id; + } + } + + if (!chosen_l3 && dr4 == DNS_OK) { + net_l4_endpoint d4{}; + make_ep(v4addr, port, IP_VER4, &d4); + + uint32_t dip = 0; + memcpy(&dip, d4.ip, 4); + ipv4_tx_plan_t p4; + if (ipv4_build_tx_plan(dip, nullptr, n4 ? allow_v4 : nullptr, n4, &p4)) { + d = d4; + chosen_l3 = p4.l3_id; } } - if (n == 0) return SOCK_ERR_SYS; - uint32_t dip; memcpy(&dip, d.ip, 4); - if (!ipv4_rt_pick_best_l3_in(ids, n, dip, &chosen_l3)) return SOCK_ERR_SYS; + + if (!chosen_l3) return SOCK_ERR_SYS; + } else return SOCK_ERR_INVAL; + + if (!chosen_l3) return SOCK_ERR_SYS; + + if (d.ver == IP_VER4) { l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); if (!is_valid_v4_l3_for_bind(v4)) return SOCK_ERR_SYS; + } else if (d.ver == IP_VER6) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(chosen_l3); + if (!is_valid_v6_l3_for_bind(v6)) return SOCK_ERR_SYS; + } else return SOCK_ERR_INVAL; + + if (localPort == 0) { int p = tcp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); if (p < 0) return SOCK_ERR_NO_PORT; localPort = (uint16_t)p; - clear_bound_l3(); - add_bound_l3(chosen_l3); - } else if (bound_l3_count == 1){ - chosen_l3 = bound_l3[0]; + } + + clear_bound_l3(); + add_bound_l3(chosen_l3); + bound = true; + + tcp_data ctx_copy{}; + if (!tcp_handshake_l3(chosen_l3, localPort, &d, &ctx_copy, pid, &extraOpts)) { + Socket::close(); + return SOCK_ERR_SYS; + } + + uint8_t local_ip[16]; + memset(local_ip, 0, sizeof(local_ip)); + if (d.ver == IP_VER4) { l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); - if (!is_valid_v4_l3_for_bind(v4)) return SOCK_ERR_SYS; - if (localPort == 0){ - int p = tcp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); - if (p < 0) return SOCK_ERR_NO_PORT; - localPort = (uint16_t)p; + if (!is_valid_v4_l3_for_bind(v4)) { + Socket::close(); + return SOCK_ERR_SYS; } + memcpy(local_ip, &v4->ip, 4); } else { - uint32_t dip; memcpy(&dip, d.ip, 4); - if (!ipv4_rt_pick_best_l3_in(bound_l3, bound_l3_count, dip, &chosen_l3)) return SOCK_ERR_SYS; - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); - if (!is_valid_v4_l3_for_bind(v4)) return SOCK_ERR_SYS; - if (localPort == 0){ - int p = tcp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); - if (p < 0) return SOCK_ERR_NO_PORT; - localPort = (uint16_t)p; - } + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(chosen_l3); + if (!is_valid_v6_l3_for_bind(v6)) { + Socket::close(); + return SOCK_ERR_SYS; + } + memcpy(local_ip, v6->ip, 16); } - tcp_data ctx_copy{}; - if (!tcp_handshake_l3(chosen_l3, localPort, &d, &ctx_copy, pid)) return SOCK_ERR_SYS; - - flow = tcp_get_ctx(localPort, d.ver, (const void*)d.ip, d.port); - if (!flow) return SOCK_ERR_SYS; + flow = tcp_get_ctx(localPort, d.ver, local_ip, (const void*)d.ip, d.port); + if (!flow) { + Socket::close(); + return SOCK_ERR_SYS; + } remoteEP = d; connected = true; + netlog_socket_event_t ev1{}; + ev1.comp = NETLOG_COMP_TCP; + ev1.action = NETLOG_ACT_CONNECTED; + ev1.pid = pid; + ev1.u0 = localPort; + ev1.u1 = remoteEP.port; + ev1.local_port = localPort; + ev1.remote_ep = remoteEP; + netlog_socket_event(&extraOpts, &ev1); return SOCK_OK; } - int64_t send(const void* buf, uint64_t len){ + int64_t send(const void* buf, uint64_t len) { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_TCP; + ev.action = NETLOG_ACT_SEND; + ev.pid = pid; + ev.u0 = (uint32_t)len; + ev.local_port = localPort; + ev.remote_ep = remoteEP; + netlog_socket_event(&extraOpts, &ev); if (!connected || !flow) return SOCK_ERR_STATE; - flow->payload = { (uintptr_t)buf, (uint32_t)len }; - flow->flags = (1u< UINT32_MAX ? UINT32_MAX : (uint32_t)remain; + flow->payload.ptr = (uintptr_t)(p + sent_total); + flow->payload.size = chunk; + flow->flags = (1u<payload.size; + if (!pushed) break; + sent_total += pushed; + } + + if (sent_total) return (int64_t)sent_total; + return TCP_WOULDBLOCK; } int64_t recv(void* buf, uint64_t len){ - sizedptr p; - if (!ring.pop(p)) return 0; - uint32_t tocpy = p.size < (uint32_t)len ? p.size : (uint32_t)len; - memcpy(buf, (void*)p.ptr, tocpy); - free_sized((void*)p.ptr, p.size); - return tocpy; + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_TCP; + ev.action = NETLOG_ACT_RECV; + ev.pid = pid; + ev.u0 = (uint32_t)len; + ev.local_port = localPort; + ev.remote_ep = remoteEP; + netlog_socket_event(&extraOpts, &ev); + if (!buf || !len) return 0; + + uint8_t* out = (uint8_t*)buf; + uint64_t n = 0; + + n = ring.pop_buf(out, len); + + if (n) { + if (flow) tcp_flow_on_app_read(flow, (uint32_t)n); + return (int64_t)n; + } + if (connected) return TCP_WOULDBLOCK; + return 0; } int32_t close() override { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_TCP; + ev.action = NETLOG_ACT_CLOSE; + ev.pid = pid; + ev.local_port = localPort; + ev.remote_ep = remoteEP; + netlog_socket_event(&extraOpts, &ev); if (connected && flow){ tcp_flow_close(flow); connected = false; flow = nullptr; } - sizedptr pkt; - while (ring.pop(pkt)){ free_sized((void*)pkt.ptr, pkt.size); } - for (int i=0;il2) return false; + if (!v4->l2->is_up) return false; + if (v4->mode == IPV4_CFG_DISABLED) return false; + if (!v4->port_manager) return false; + return true; + } + + static bool is_valid_v6_l3_for_bind(l3_ipv6_interface_t* v6) { + if (!v6) return false; + if (!v6->l2) return false; + if (!v6->l2->is_up) return false; + if (v6->cfg == IPV6_CFG_DISABLE) return false; + if (v6->dad_state != IPV6_DAD_OK) return false; + if (!v6->port_manager) return false; + return true; } - static bool is_dbcast_for(const l3_ipv4_interface_t* v4, uint32_t dst) { - if (!v4 || !v4->mask) return false; - uint32_t b = ipv4_broadcast_calc(v4->ip, v4->mask); - return b == dst; + static bool is_dbcast(uint32_t ip, uint8_t* out_l3) { + uint8_t cnt = l2_interface_count(); + for (uint8_t i = 0; i < cnt; ++i) { + l2_interface_t* l2 = l2_interface_at(i); + if (!l2) continue; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { + l3_ipv4_interface_t* v4 = l2->l3_v4[s]; + if (!v4) continue; + if (v4->mode == IPV4_CFG_DISABLED) continue; + if (!v4->ip) continue; + if (!v4->mask) continue; + uint32_t b = ipv4_broadcast_calc(v4->ip, v4->mask); + if (b != ip) continue; + if (out_l3) *out_l3 = v4->l3_id; + return true; + } + } + return false; } static bool socket_matches_dst(UDPSocket* s, uint8_t ifx, ip_version_t ver, const void* dst_ip_addr, uint16_t dst_port) { + if (!s) return false; if (!s->bound) return false; if (s->localPort != dst_port) return false; + if (!dst_ip_addr) return false; + if (ver == IP_VER4) { uint32_t dip = *(const uint32_t*)dst_ip_addr; - bool lb = is_lbcast_ip(dip); + bool lb = dip == 0xFFFFFFFFu; + bool mc = ipv4_is_multicast(dip); + for (int i = 0; i < s->bound_l3_count; ++i) { uint8_t id = s->bound_l3[i]; l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(id); - if (!v4 || !v4->l2) continue; + if (!is_valid_v4_l3_for_bind(v4)) continue; if (v4->l2->ifindex != ifx) continue; if (lb) return true; - if (is_dbcast_for(v4, dip)) return true; - if (v4->ip && v4->ip == dip) return true; + if (mc) return true; + + if (v4->mask) { + uint32_t b = ipv4_broadcast_calc(v4->ip, v4->mask); + if (b == dip)return true; + } + + if (v4->ip == dip) return true; } return false; - } else if (ver == IP_VER6) { + } + + if (ver == IP_VER6) { + bool mcast = ipv6_is_multicast((const uint8_t*)dst_ip_addr); + for (int i = 0; i < s->bound_l3_count; ++i) { uint8_t id = s->bound_l3[i]; l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(id); - if (!v6 || !v6->l2) continue; + if (!is_valid_v6_l3_for_bind(v6)) continue; if (v6->l2->ifindex != ifx) continue; + + if (mcast) return true; if (memcmp(v6->ip, dst_ip_addr, 16) == 0) return true; } return false; - } else { - return false; } + + return false; } - static void dispatch(uint8_t ifindex, ip_version_t ipver, const void* src_ip_addr, const void* dst_ip_addr, uintptr_t frame_ptr, uint32_t frame_len, uint16_t src_port, uint16_t dst_port) { + static uint32_t dispatch(uint8_t ifindex, ip_version_t ipver, const void* src_ip_addr, const void* dst_ip_addr, uintptr_t frame_ptr, uint32_t frame_len, uint16_t src_port, uint16_t dst_port) { + for (UDPSocket* s = s_list_head; s; s = s->next) { - if (socket_matches_dst(s, ifindex, ipver, dst_ip_addr, dst_port)) { - s->on_receive(ipver, src_ip_addr, src_port, frame_ptr, frame_len); - return; - } + if (!socket_matches_dst(s, ifindex, ipver, dst_ip_addr, dst_port)) + continue; + + uintptr_t copy = (uintptr_t)malloc(frame_len); + if (!copy) continue; + + memcpy((void*)copy, (const void*)frame_ptr, frame_len); + s->on_receive(ipver, src_ip_addr, src_port, copy, frame_len); } if (frame_ptr && frame_len) free_sized((void*)frame_ptr, frame_len); + return frame_len; } void on_receive(ip_version_t ver, const void* src_ip_addr, uint16_t src_port, uintptr_t ptr, uint32_t len) { - uintptr_t copy = (uintptr_t)malloc(len); + uint32_t limit = 0xFFFFFFFFu; + if ((extraOpts.flags & SOCK_OPT_BUF_SIZE) && extraOpts.buf_size) limit = extraOpts.buf_size; + if (len > limit) { + if (ptr && len) free_sized((void*)ptr, len); + return; + } + while (rx_bytes + len > limit && r_head != r_tail) { + rx_bytes -= ring[r_head].size; + free_sized((void*)ring[r_head].ptr, ring[r_head].size); + r_head = (r_head + 1) % UDP_RING_CAP; + } + uintptr_t copy = (uintptr_t)malloc(len); if (!copy) { if (ptr && len) free_sized((void*)ptr, len); return; } + memcpy((void*)copy, (void*)ptr, len); if (ptr && len) free_sized((void*)ptr, len); int nexti = (r_tail + 1) % UDP_RING_CAP; if (nexti == r_head) { + rx_bytes -= ring[r_head].size; free_sized((void*)ring[r_head].ptr, ring[r_head].size); r_head = (r_head + 1) % UDP_RING_CAP; } - ring[r_tail] = { (uintptr_t)copy, len }; + ring[r_tail].ptr = copy; + ring[r_tail].size = len; + rx_bytes += len; + src_eps[r_tail].ver = ver; memset(src_eps[r_tail].ip, 0, 16); + if (ver == IP_VER4) { uint32_t v4 = *(const uint32_t*)src_ip_addr; memcpy(src_eps[r_tail].ip, &v4, 4); } else if (ver == IP_VER6) { memcpy(src_eps[r_tail].ip, src_ip_addr, 16); } + src_eps[r_tail].port = src_port; + r_tail = nexti; remoteEP = src_eps[(r_tail + UDP_RING_CAP - 1) % UDP_RING_CAP]; } @@ -125,18 +200,20 @@ class UDPSocket : public Socket { bool add_all_l3_on_l2(uint8_t ifindex, uint8_t* tmp_ids, int& n) { l2_interface_t* l2 = l2_interface_find_by_index(ifindex); if (!l2) return false; + if (!l2->is_up) return false; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { l3_ipv4_interface_t* v4 = l2->l3_v4[s]; - if (v4 && v4->mode != IPV4_CFG_DISABLED) { - if (n < SOCK_MAX_L3) tmp_ids[n++] = v4->l3_id; - } + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (n < SOCK_MAX_L3) tmp_ids[n++] = v4->l3_id; } + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { l3_ipv6_interface_t* v6 = l2->l3_v6[s]; - if (v6 && v6->cfg != IPV6_CFG_DISABLE) { - if (n < SOCK_MAX_L3) tmp_ids[n++] = v6->l3_id; - } + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (n < SOCK_MAX_L3) tmp_ids[n++] = v6->l3_id; } + return n > 0; } @@ -145,17 +222,18 @@ class UDPSocket : public Socket { for (uint8_t i = 0; i < cnt; ++i) { l2_interface_t* l2 = l2_interface_at(i); if (!l2) continue; + if (!l2->is_up) continue; + for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { l3_ipv4_interface_t* v4 = l2->l3_v4[s]; - if (v4 && v4->mode != IPV4_CFG_DISABLED) { - if (n < SOCK_MAX_L3) tmp_ids[n++] = v4->l3_id; - } + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (n < SOCK_MAX_L3) tmp_ids[n++] = v4->l3_id; } + for (int s = 0; s < MAX_IPV6_PER_INTERFACE; ++s) { l3_ipv6_interface_t* v6 = l2->l3_v6[s]; - if (v6 && v6->cfg != IPV6_CFG_DISABLE) { - if (n < SOCK_MAX_L3) tmp_ids[n++] = v6->l3_id; - } + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (n < SOCK_MAX_L3) tmp_ids[n++] = v6->l3_id; } } return n > 0; @@ -166,64 +244,100 @@ class UDPSocket : public Socket { udp_unbind_l3(l3_id, port, pid); } - static bool contains_id(const uint8_t* arr, int n, uint8_t id) { - for (int i = 0; i < n; ++i) if (arr[i] == id) return true; - return false; + static bool pick_v4_l3_for_unicast(uint32_t dip, const uint8_t* candidates, int n, uint8_t* out_l3) { + if (!out_l3) return false; + ipv4_tx_plan_t plan; + if (!ipv4_build_tx_plan(dip, nullptr, candidates, n, &plan)) return false; + *out_l3 = plan.l3_id; + return true; } - static bool is_lbcast(uint32_t ip) { - return ip == 0xFFFFFFFFu; - } - - static bool is_dbcast(uint32_t ip, uint8_t* out_l3) { - uint8_t cnt = l2_interface_count(); - for (uint8_t i = 0; i < cnt; ++i) { - l2_interface_t* l2 = l2_interface_at(i); - if (!l2) continue; - for (int s = 0; s < MAX_IPV4_PER_INTERFACE; ++s) { - l3_ipv4_interface_t* v4 = l2->l3_v4[s]; - if (!v4) continue; - if (v4->mode == IPV4_CFG_DISABLED) continue; - if (!v4->ip || !v4->mask) continue; - uint32_t b = ipv4_broadcast_calc(v4->ip, v4->mask); - if (b == ip) { if (out_l3) *out_l3 = v4->l3_id; return true; } - } - } - return false; + static bool pick_v6_l3_for_unicast(const uint8_t dst_ip[16], const uint8_t* candidates, int n, uint8_t* out_l3) { + if (!out_l3) return false; + ipv6_tx_plan_t plan; + if (!ipv6_build_tx_plan(dst_ip, nullptr, candidates, n, &plan)) return false; + *out_l3 = plan.l3_id; + return true; } public: - UDPSocket(uint8_t r, uint32_t pid_) : Socket(PROTO_UDP, r) { + UDPSocket(uint8_t r, uint32_t pid_, const SocketExtraOptions* extra = nullptr) : Socket(PROTO_UDP, r, extra) { pid = pid_; insert_in_list(); } ~UDPSocket() override { + if ((extraOpts.flags & SOCK_OPT_MCAST_JOIN) && extraOpts.mcast_ver) { + if (extraOpts.mcast_ver == IP_VER4) { + uint32_t g = 0; + memcpy(&g, extraOpts.mcast_group, 4); + if (ipv4_is_multicast(g)) { + for (int i = 0; i < bound_l3_count; ++i) { + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(bound_l3[i]); + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (!v4->l2) continue; + (void)l2_ipv4_mcast_leave(v4->l2->ifindex, g); + } + } + } else if (extraOpts.mcast_ver == IP_VER6) { + if (ipv6_is_multicast(extraOpts.mcast_group)) { + for (int i = 0; i < bound_l3_count; ++i) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(bound_l3[i]); + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (!v6->l2) continue; + (void)l2_ipv6_mcast_leave(v6->l2->ifindex, extraOpts.mcast_group); + } + } + } + } close(); remove_from_list(); } - int32_t bind(const SockBindSpec& spec, uint16_t port) override { + int32_t bind(const SockBindSpec& spec_in, uint16_t port) override { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_UDP; + ev.action = NETLOG_ACT_BIND; + ev.pid = pid; + ev.u0 = port; + ev.bind_spec = spec_in; + netlog_socket_event(&extraOpts, &ev); if (role != SOCK_ROLE_SERVER) return SOCK_ERR_PERM; if (bound) return SOCK_ERR_BOUND; + + SockBindSpec spec = spec_in; + bool empty = spec.kind == BIND_L3 && spec.l3_id == 0 && spec.ifindex == 0 && spec.ver == 0 && ipv6_is_unspecified(spec.ip); + if (empty) spec.kind = BIND_ANY; + uint8_t ids[SOCK_MAX_L3]; int n = 0; + if (spec.kind == BIND_L3) { - if (spec.l3_id) { ids[n++] = spec.l3_id; } + if (!spec.l3_id) return SOCK_ERR_INVAL; + + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(spec.l3_id); + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(spec.l3_id); + + bool ok4 = is_valid_v4_l3_for_bind(v4); + bool ok6 = is_valid_v6_l3_for_bind(v6); + + if (!ok4 && !ok6) return SOCK_ERR_INVAL; + + if (ok4 && n < SOCK_MAX_L3) ids[n++] = spec.l3_id; + if (ok6 && n < SOCK_MAX_L3) ids[n++] = spec.l3_id; } else if (spec.kind == BIND_L2) { if (!add_all_l3_on_l2(spec.ifindex, ids, n)) return SOCK_ERR_INVAL; } else if (spec.kind == BIND_IP) { if (spec.ver == IP_VER4) { - uint32_t v4; - memcpy(&v4, spec.ip, 4); - v4 = __builtin_bswap32(v4); - ip_resolution_result_t r = resolve_ipv4_to_interface(v4); - if (!r.found || !r.ipv4) return SOCK_ERR_INVAL; - ids[n++] = r.ipv4->l3_id; + uint32_t v4ip = 0; + memcpy(&v4ip, spec.ip, 4); + l3_ipv4_interface_t* ipif = l3_ipv4_find_by_ip(v4ip); + if (!is_valid_v4_l3_for_bind(ipif)) return SOCK_ERR_INVAL; + if (n < SOCK_MAX_L3) ids[n++] = ipif->l3_id; } else if (spec.ver == IP_VER6) { - ip_resolution_result_t r6 = resolve_ipv6_to_interface(spec.ip); - if (!r6.found || !r6.ipv6) return SOCK_ERR_INVAL; - ids[n++] = r6.ipv6->l3_id; + l3_ipv6_interface_t* ipif6 = l3_ipv6_find_by_ip(spec.ip); + if (!is_valid_v6_l3_for_bind(ipif6)) return SOCK_ERR_INVAL; + if (n < SOCK_MAX_L3) ids[n++] = ipif6->l3_id; } else { return SOCK_ERR_INVAL; } @@ -232,32 +346,82 @@ class UDPSocket : public Socket { } else { return SOCK_ERR_INVAL; } + + if (n == 0) return SOCK_ERR_INVAL; + uint8_t dedup[SOCK_MAX_L3]; int m = 0; - for (int i = 0; i < n; ++i) { if (!contains_id(dedup, m, ids[i])) dedup[m++] = ids[i]; } + for (int i = 0; i < n; ++i) { + bool seen = false; + for (int j = 0; j < m; ++j) { + if (dedup[j] == ids[i]) { + seen = true; + break; + } + } + if (!seen && m < SOCK_MAX_L3) dedup[m++] = ids[i]; + } + if (m == 0) return SOCK_ERR_INVAL; + int bdone = 0; for (int i = 0; i < m; ++i) { uint8_t id = dedup[i]; - bool ok = udp_bind_l3(id, port, pid, dispatch); - if (!ok) { - for (int j = 0; j < bdone; ++j) { - uint8_t rid = dedup[j]; - udp_unbind_l3(rid, port, pid); - } - return SOCK_ERR_SYS; + if (udp_bind_l3(id, port, pid, dispatch)) { + bdone++; + continue; } - bdone++; + for (int j = 0; j < bdone; ++j) udp_unbind_l3(dedup[j], port, pid); + return SOCK_ERR_SYS; } + clear_bound_l3(); for (int i = 0; i < m; ++i) add_bound_l3(dedup[i]); + + if ((extraOpts.flags & SOCK_OPT_MCAST_JOIN) && extraOpts.mcast_ver) { + if (extraOpts.mcast_ver == IP_VER4) { + uint32_t g = 0; + memcpy(&g, extraOpts.mcast_group, 4); + if (ipv4_is_multicast(g)) { + for (int i = 0; i < bound_l3_count; ++i) { + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(bound_l3[i]); + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (!v4->l2) continue; + (void)l2_ipv4_mcast_join(v4->l2->ifindex, g); + } + } + } else if (extraOpts.mcast_ver == IP_VER6) { + if (ipv6_is_multicast(extraOpts.mcast_group)) { + for (int i = 0; i < bound_l3_count; ++i) { + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(bound_l3[i]); + if (!is_valid_v6_l3_for_bind(v6)) continue; + if (!v6->l2) continue; + (void)l2_ipv6_mcast_join(v6->l2->ifindex, extraOpts.mcast_group); + } + } + } + } + localPort = port; bound = true; return SOCK_OK; } int64_t sendto(SockDstKind kind, const void* dst, uint16_t port, const void* buf, uint64_t len) { - if (!dst || !buf || len == 0) return SOCK_ERR_INVAL; - net_l4_endpoint d{}; + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_UDP; + ev.action = NETLOG_ACT_SENDTO; + ev.pid = pid; + ev.dst_kind = kind; + ev.u0 = port; + ev.u1 = (uint32_t)len; + if (kind == DST_ENDPOINT && dst) ev.dst_ep = *(const net_l4_endpoint*)dst; + if (kind == DST_DOMAIN) ev.s0 = (const char*)dst; + netlog_socket_event(&extraOpts, &ev); + if (!dst) return SOCK_ERR_INVAL; + if (!buf) return SOCK_ERR_INVAL; + if (len == 0) return SOCK_ERR_INVAL; + + net_l4_endpoint d; if (kind == DST_ENDPOINT) { const net_l4_endpoint* ed = (const net_l4_endpoint*)dst; d = *ed; @@ -266,181 +430,330 @@ class UDPSocket : public Socket { } else if (kind == DST_DOMAIN) { const char* host = (const char*)dst; if (!port) return SOCK_ERR_INVAL; + + uint8_t a6[16]; + memset(a6, 0, 16); uint32_t a4 = 0; - dns_result_t dr = dns_resolve_a(host, &a4, UDP_DNS_SEL, UDP_DNS_TIMEOUT_MS); - if (dr != DNS_OK) return SOCK_ERR_DNS; - d.ver = IP_VER4; - memset(d.ip, 0, 16); - memcpy(d.ip, &a4, 4); - d.port = port; + + dns_result_t dr6 = dns_resolve_aaaa(host, a6, UDP_DNS_SEL, UDP_DNS_TIMEOUT_MS); + dns_result_t dr4 = dns_resolve_a(host, &a4, UDP_DNS_SEL, UDP_DNS_TIMEOUT_MS); + if (dr6 != DNS_OK && dr4 != DNS_OK) return SOCK_ERR_DNS; + + uint8_t allow_v4[SOCK_MAX_L3]; + uint8_t allow_v6[SOCK_MAX_L3]; + int n4 = 0; + int n6 = 0; + for (int i = 0; i < bound_l3_count; ++i) { + uint8_t id = bound_l3[i]; + if (n4 < SOCK_MAX_L3 && l3_ipv4_find_by_id(id)) allow_v4[n4++] = id; + if (n6 < SOCK_MAX_L3 && l3_ipv6_find_by_id(id)) allow_v6[n6++] = id; + } + + if (dr6 == DNS_OK) { + ipv6_tx_plan_t p6; + if (ipv6_build_tx_plan(a6, nullptr, n6 ? allow_v6 : nullptr, n6, &p6)) { + d.ver = IP_VER6; + memcpy(d.ip, a6, 16); + d.port = port; + } + } + + if (d.ver == 0 && dr4 == DNS_OK) { + ipv4_tx_plan_t p4; + if (ipv4_build_tx_plan(a4, nullptr, n4 ? allow_v4 : nullptr, n4, &p4)) { + make_ep(a4, port, IP_VER4, &d); + } + } + + if (d.ver == 0) return SOCK_ERR_SYS; } else { return SOCK_ERR_INVAL; } + sizedptr pay; + pay.ptr = (uintptr_t)buf; + pay.size = (uint32_t)len; + if (d.ver == IP_VER4) { - uint32_t dip; memcpy(&dip, d.ip, 4); - uint8_t chosen_l3 = 0; + uint32_t dip = 0; + memcpy(&dip, d.ip, 4); + bool is_bcast = false; - uint8_t db_l3 = 0; - if (is_lbcast(dip)) { - is_bcast = true; - } else if (is_dbcast(dip, &db_l3)) { - is_bcast = true; - chosen_l3 = db_l3; + if (dip == 0xFFFFFFFFu) is_bcast = true; + else { + uint8_t dummy = 0; + if (is_dbcast(dip, &dummy)) is_bcast = true; } if (is_bcast) { if (dip == 0xFFFFFFFFu) { if (bound_l3_count == 0) return SOCK_ERR_SYS; + for (int i = 0; i < bound_l3_count; ++i) { uint8_t bl3 = bound_l3[i]; l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(bl3); - if (!v4 || !v4->l2) continue; + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (!v4->l2) continue; + if (!bound) { int p = udp_alloc_ephemeral_l3(bl3, pid, dispatch); if (p < 0) continue; localPort = (uint16_t)p; add_bound_l3(bl3); bound = true; - } else { - bool present = false; - for (int k = 0; k < bound_l3_count; ++k) if (bound_l3[k] == bl3) { present = true; break; } - if (!present) { - if (!udp_bind_l3(bl3, localPort, pid, dispatch)) continue; - add_bound_l3(bl3); - } + } else if (localPort == 0) { + int p = udp_alloc_ephemeral_l3(bl3, pid, dispatch); + if (p < 0) continue; + localPort = (uint16_t)p; } + net_l4_endpoint src; src.ver = IP_VER4; memset(src.ip, 0, 16); memcpy(src.ip, &v4->ip, 4); src.port = localPort; - sizedptr pay{ (uintptr_t)buf, (uint32_t)len }; - ipv4_tx_opts_t tx; tx.scope = IPV4_TX_BOUND_L3; tx.index = bl3; - udp_send_segment(&src, &d, pay, &tx); + + ipv4_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L3; + tx.index = bl3; + + udp_send_segment(&src, &d, pay, &tx, (extraOpts.flags & SOCK_OPT_TTL) ? extraOpts.ttl : 0, (extraOpts.flags & SOCK_OPT_DONTFRAG) ? 1 : 0); } + remoteEP = d; return (int64_t)len; - } else { - if (!chosen_l3) return SOCK_ERR_SYS; - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); - if (!v4 || !v4->l2) return SOCK_ERR_SYS; + } + + uint8_t db_l3 = 0; + if (!is_dbcast(dip, &db_l3)) return SOCK_ERR_SYS; + + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(db_l3); + if (!is_valid_v4_l3_for_bind(v4)) return SOCK_ERR_SYS; + if (!v4->l2) return SOCK_ERR_SYS; + + if (!bound) { + int p = udp_alloc_ephemeral_l3(db_l3, pid, dispatch); + if (p < 0) return SOCK_ERR_NO_PORT; + localPort = (uint16_t)p; + add_bound_l3(db_l3); + bound = true; + } else if (localPort == 0) { + int p = udp_alloc_ephemeral_l3(db_l3, pid, dispatch); + if (p < 0) return SOCK_ERR_NO_PORT; + localPort = (uint16_t)p; + } + + net_l4_endpoint src; + src.ver = IP_VER4; + memset(src.ip, 0, 16); + memcpy(src.ip, &v4->ip, 4); + src.port = localPort; + + ipv4_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L3; + tx.index = db_l3; + + udp_send_segment(&src, &d, pay, &tx, (extraOpts.flags & SOCK_OPT_TTL) ? extraOpts.ttl : 0, (extraOpts.flags & SOCK_OPT_DONTFRAG) ? 1 : 0); + remoteEP = d; + return (int64_t)len; + } + + if (ipv4_is_multicast(dip)) { + if (bound_l3_count == 0) return SOCK_ERR_SYS; + + for (int i = 0; i < bound_l3_count; ++i) { + uint8_t bl3 = bound_l3[i]; + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(bl3); + if (!is_valid_v4_l3_for_bind(v4)) continue; + if (!v4->l2) continue; + if (!bound) { - int p = udp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); - if (p < 0) return SOCK_ERR_NO_PORT; + int p = udp_alloc_ephemeral_l3(bl3, pid, dispatch); + if (p < 0) continue; localPort = (uint16_t)p; - add_bound_l3(chosen_l3); + add_bound_l3(bl3); bound = true; - } else { - bool present = false; - for (int i = 0; i < bound_l3_count; ++i) if (bound_l3[i] == chosen_l3) { present = true; break; } - if (!present) { - if (!udp_bind_l3(chosen_l3, localPort, pid, dispatch)) return SOCK_ERR_SYS; - add_bound_l3(chosen_l3); - } + } else if (localPort == 0) { + int p = udp_alloc_ephemeral_l3(bl3, pid, dispatch); + if (p < 0) continue; + localPort = (uint16_t)p; } + + (void)l2_ipv4_mcast_join(v4->l2->ifindex, dip); + (void)igmp_send_join(v4->l2->ifindex, dip); + net_l4_endpoint src; src.ver = IP_VER4; memset(src.ip, 0, 16); memcpy(src.ip, &v4->ip, 4); src.port = localPort; - sizedptr pay{ (uintptr_t)buf, (uint32_t)len }; - ipv4_tx_opts_t tx; tx.scope = IPV4_TX_BOUND_L3; tx.index = chosen_l3; - udp_send_segment(&src, &d, pay, &tx); - remoteEP = d; - return (int64_t)len; + + ipv4_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L3; + tx.index = bl3; + + udp_send_segment(&src, &d, pay, &tx, (extraOpts.flags & SOCK_OPT_TTL) ? extraOpts.ttl : 0, (extraOpts.flags & SOCK_OPT_DONTFRAG) ? 1 : 0); } - } else { - if (bound_l3_count == 0) { - uint8_t ids[SOCK_MAX_L3]; int n = 0; - uint8_t cnt = l2_interface_count(); - for (uint8_t i = 0; i < cnt; ++i) { - l2_interface_t* l2 = l2_interface_at(i); - if (!l2 || !l2->is_up) continue; - for (int s = 0; s < MAX_IPV4_PER_INTERFACE && n < SOCK_MAX_L3; ++s) { - l3_ipv4_interface_t* v4 = l2->l3_v4[s]; - if (!v4) continue; - if (v4->mode == IPV4_CFG_DISABLED) continue; - ids[n++] = v4->l3_id; - } - } - if (n == 0) return SOCK_ERR_SYS; - if (!ipv4_rt_pick_best_l3_in(ids, n, dip, &chosen_l3)) return SOCK_ERR_SYS; - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); - if (!v4 || !v4->l2) return SOCK_ERR_SYS; - int p = udp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); - if (p < 0) return SOCK_ERR_NO_PORT; - localPort = (uint16_t)p; - add_bound_l3(chosen_l3); - bound = true; - net_l4_endpoint src; - src.ver = IP_VER4; - memset(src.ip, 0, 16); - memcpy(src.ip, &v4->ip, 4); - src.port = localPort; - sizedptr pay{ (uintptr_t)buf, (uint32_t)len }; - const ipv4_tx_opts_t* txp = nullptr; - udp_send_segment(&src, &d, pay, txp); - remoteEP = d; - return (int64_t)len; - } else if (bound_l3_count == 1) { - chosen_l3 = bound_l3[0]; - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); - if (!v4 || !v4->l2) return SOCK_ERR_SYS; - net_l4_endpoint src; - src.ver = IP_VER4; - memset(src.ip, 0, 16); - memcpy(src.ip, &v4->ip, 4); - src.port = localPort; - sizedptr pay{ (uintptr_t)buf, (uint32_t)len }; - const ipv4_tx_opts_t* txp = nullptr; - udp_send_segment(&src, &d, pay, txp); - remoteEP = d; - return (int64_t)len; - } else { - if (!ipv4_rt_pick_best_l3_in(bound_l3, bound_l3_count, dip, &chosen_l3)) return SOCK_ERR_SYS; - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); - if (!v4 || !v4->l2) return SOCK_ERR_SYS; - bool present = false; - for (int i = 0; i < bound_l3_count; ++i) if (bound_l3[i] == chosen_l3) { present = true; break; } - if (!present) { - if (!udp_bind_l3(chosen_l3, localPort, pid, dispatch)) return SOCK_ERR_SYS; - add_bound_l3(chosen_l3); - } + + remoteEP = d; + return (int64_t)len; + } + + uint8_t allowed_v4[SOCK_MAX_L3]; + int n_allowed = 0; + for (int i = 0; i < bound_l3_count && n_allowed < SOCK_MAX_L3; ++i) { + uint8_t id = bound_l3[i]; + if (l3_ipv4_find_by_id(id)) allowed_v4[n_allowed++] = id; + } + if (bound_l3_count > 0 && n_allowed == 0) return SOCK_ERR_SYS; + + ipv4_tx_plan_t plan; + if (!ipv4_build_tx_plan(dip, nullptr, n_allowed ? allowed_v4 : nullptr, n_allowed, &plan)) return SOCK_ERR_SYS; + + uint8_t chosen_l3 = plan.l3_id; + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(chosen_l3); + if (!is_valid_v4_l3_for_bind(v4)) return SOCK_ERR_SYS; + + if (!bound) { + int p = udp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); + if (p < 0) return SOCK_ERR_NO_PORT; + localPort = (uint16_t)p; + add_bound_l3(chosen_l3); + bound = true; + } else if (localPort == 0) { + int p = udp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); + if (p < 0) return SOCK_ERR_NO_PORT; + localPort = (uint16_t)p; + } + + net_l4_endpoint src; + src.ver = IP_VER4; + memset(src.ip, 0, 16); + memcpy(src.ip, &v4->ip, 4); + src.port = localPort; + + ipv4_tx_opts_t tx; + tx.scope = (ip_tx_scope_t)plan.fixed_opts.scope; + tx.index = plan.fixed_opts.index; + + udp_send_segment(&src, &d, pay, &tx, (extraOpts.flags & SOCK_OPT_TTL) ? extraOpts.ttl : 0, (extraOpts.flags & SOCK_OPT_DONTFRAG) ? 1 : 0); + remoteEP = d; + return (int64_t)len; + } + + if (d.ver == IP_VER6) { + bool is_mcast = ipv6_is_multicast(d.ip); + + if (is_mcast) { + if (!bound) return SOCK_ERR_BOUND; + if (!localPort) return SOCK_ERR_BOUND; + if (bound_l3_count == 0) return SOCK_ERR_SYS; + + for (int i = 0; i < bound_l3_count; ++i) { + uint8_t bl3 = bound_l3[i]; + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(bl3); + if (!is_valid_v6_l3_for_bind(v6)) continue; + net_l4_endpoint src; - src.ver = IP_VER4; + src.ver = IP_VER6; memset(src.ip, 0, 16); - memcpy(src.ip, &v4->ip, 4); + memcpy(src.ip, v6->ip, 16); src.port = localPort; - sizedptr pay{ (uintptr_t)buf, (uint32_t)len }; - ipv4_tx_opts_t tx; tx.scope = IPV4_TX_BOUND_L3; tx.index = chosen_l3; - udp_send_segment(&src, &d, pay, &tx); - remoteEP = d; - return (int64_t)len; + + ipv6_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L3; + tx.index = bl3; + + udp_send_segment(&src, &d, pay, &tx, (extraOpts.flags & SOCK_OPT_TTL) ? extraOpts.ttl : 0, (extraOpts.flags & SOCK_OPT_DONTFRAG) ? 1 : 0); } + + remoteEP = d; + return (int64_t)len; } - } else if (d.ver == IP_VER6) { - return SOCK_ERR_PROTO; - } else { - return SOCK_ERR_INVAL; + + uint8_t allowed_v6[SOCK_MAX_L3]; + int n_allowed = 0; + for (int i = 0; i < bound_l3_count && n_allowed < SOCK_MAX_L3; ++i) { + uint8_t id = bound_l3[i]; + if (l3_ipv6_find_by_id(id)) allowed_v6[n_allowed++] = id; + } + if (bound_l3_count > 0 && n_allowed == 0) return SOCK_ERR_SYS; + + ipv6_tx_plan_t plan; + if (!ipv6_build_tx_plan(d.ip, nullptr, n_allowed ? allowed_v6 : nullptr, n_allowed, &plan)) return SOCK_ERR_SYS; + + uint8_t chosen_l3 = plan.l3_id; + + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(chosen_l3); + if (!is_valid_v6_l3_for_bind(v6)) return SOCK_ERR_SYS; + + if (!bound) { + int p = udp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); + if (p < 0) return SOCK_ERR_NO_PORT; + localPort = (uint16_t)p; + add_bound_l3(chosen_l3); + bound = true; + } else if (localPort == 0) { + int p = udp_alloc_ephemeral_l3(chosen_l3, pid, dispatch); + if (p < 0) return SOCK_ERR_NO_PORT; + localPort = (uint16_t)p; + } + + net_l4_endpoint src; + src.ver = IP_VER6; + memset(src.ip, 0, 16); + memcpy(src.ip, v6->ip, 16); + src.port = localPort; + + ipv6_tx_opts_t tx; + tx.scope = (ip_tx_scope_t)plan.fixed_opts.scope; + tx.index = plan.fixed_opts.index; + + udp_send_segment(&src, &d, pay, &tx, (extraOpts.flags & SOCK_OPT_TTL) ? extraOpts.ttl : 0, (extraOpts.flags & SOCK_OPT_DONTFRAG) ? 1 : 0); + remoteEP = d; + return (int64_t)len; } + + return SOCK_ERR_INVAL; } int64_t recvfrom(void* buf, uint64_t len, net_l4_endpoint* src) { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_UDP; + ev.action = NETLOG_ACT_RECVFROM; + ev.pid = pid; + ev.u0 = (uint32_t)len; + ev.local_port = localPort; + ev.remote_ep = remoteEP; + netlog_socket_event(&extraOpts, &ev); if (r_head == r_tail) return 0; + sizedptr p = ring[r_head]; net_l4_endpoint se = src_eps[r_head]; r_head = (r_head + 1) % UDP_RING_CAP; - uint32_t tocpy = p.size < len ? p.size : (uint32_t)len; + rx_bytes -= p.size; + + uint32_t tocpy = p.size; + if (tocpy > len) tocpy = (uint32_t)len; + memcpy(buf, (void*)p.ptr, tocpy); if (src) *src = se; + free_sized((void*)p.ptr, p.size); remoteEP = se; return tocpy; } int32_t close() override { + netlog_socket_event_t ev{}; + ev.comp = NETLOG_COMP_UDP; + ev.action = NETLOG_ACT_CLOSE; + ev.pid = pid; + ev.local_port = localPort; + ev.remote_ep = remoteEP; + netlog_socket_event(&extraOpts, &ev); while (r_head != r_tail) { + rx_bytes -= ring[r_head].size; free_sized((void*)ring[r_head].ptr, ring[r_head].size); r_head = (r_head + 1) % UDP_RING_CAP; } @@ -450,4 +763,4 @@ class UDPSocket : public Socket { net_l4_endpoint get_remote_ep() const { return remoteEP; } -}; +}; \ No newline at end of file diff --git a/kernel/networking/transport_layer/tcp.c b/kernel/networking/transport_layer/tcp.c deleted file mode 100644 index 286fca6a..00000000 --- a/kernel/networking/transport_layer/tcp.c +++ /dev/null @@ -1,657 +0,0 @@ -#include "tcp.h" -#include "types.h" -#include "networking/port_manager.h" -#include "networking/internet_layer/ipv4.h" -#include "std/memory.h" -#include "random/random.h" -#include "syscalls/syscalls.h" -//TODO: add mtu check and fragmentation. also fragment rebuild - - -static tcp_flow_t tcp_flows[MAX_TCP_FLOWS]; - -static inline int ip_len(ip_version_t ver){ return ver==IP_VER6 ? 16 : 4; } -static inline uint32_t v4_u32_from_ptr(const void *p){ return *(const uint32_t*)p; } - -tcp_data* tcp_get_ctx(uint16_t local_port, ip_version_t ver, const void *remote_ip, uint16_t remote_port){ - int idx = find_flow(local_port, ver, remote_ip, remote_port); - return (idx < 0) ? NULL : &tcp_flows[idx].ctx; -} - -static uint32_t checksum_add(uint32_t sum, uint16_t val){ - sum += val; - if (sum > 0xFFFF) sum = (sum & 0xFFFF) + 1; - return sum; -} - -static uint16_t tcp_compute_checksum_v4(const void *segment, uint16_t seg_len, uint32_t src_ip, uint32_t dst_ip){ - const uint8_t *seg = (const uint8_t*)segment; - const uint64_t total_len = 12 + seg_len; - - uintptr_t raw = (uintptr_t)malloc(total_len); - if (!raw) return 0; - - uint8_t *buf = (uint8_t *)raw; - - buf[0] = (src_ip >> 24) & 0xFF; - buf[1] = (src_ip >> 16) & 0xFF; - buf[2] = (src_ip >> 8) & 0xFF; - buf[3] = (src_ip >> 0) & 0xFF; - buf[4] = (dst_ip >> 24) & 0xFF; - buf[5] = (dst_ip >> 16) & 0xFF; - buf[6] = (dst_ip >> 8) & 0xFF; - buf[7] = (dst_ip >> 0) & 0xFF; - buf[8] = 0; - buf[9] = 6; - buf[10] = (seg_len >> 8) & 0xFF; - buf[11] = (seg_len >> 0) & 0xFF; - memcpy(buf + 12, seg, seg_len); - buf[12 + 16] = 0; - buf[12 + 17] = 0; - - uint32_t sum = 0; - for (uint64_t i = 0; i + 1 < total_len; i += 2) { - uint16_t word = (uint16_t)buf[i] << 8 | buf[i + 1]; - sum = checksum_add(sum, word); - } - if (total_len & 1) { - uint16_t word = (uint16_t)buf[total_len - 1] << 8; - sum = checksum_add(sum, word); - } - - uint16_t res = (uint16_t)(~sum & 0xFFFF); - free_sized((void *)raw, total_len); - return bswap16(res); -} - -static uint16_t tcp_compute_checksum_v6(const void*, uint16_t, const uint8_t[16], const uint8_t[16]) { - //TODO IPV6 - return 0; -} - -int find_flow(uint16_t local_port, ip_version_t ver, const void *remote_ip, uint16_t remote_port) { - for (int i = 0; i < MAX_TCP_FLOWS; ++i) { - tcp_flow_t *f = &tcp_flows[i]; - if (f->state == TCP_STATE_CLOSED) continue; - if (f->local_port != local_port) continue; - - if (f->state == TCP_LISTEN){ - if (!remote_ip && remote_port == 0) return i; - continue; - } - if (f->remote.ver == ver && f->remote.port == remote_port && remote_ip) { - if (memcmp(f->remote.ip, remote_ip, (uint64_t)ip_len(ver)) == 0) return i; - } - } - return -1; -} - -static int allocate_flow_entry() { - for (int i = 0; i < MAX_TCP_FLOWS; ++i) { - if (tcp_flows[i].state == TCP_STATE_CLOSED) { - tcp_flows[i].retries = 0; - return i; - } - } - return -1; -} - -static void free_flow_entry(int idx) { - if (idx < 0 || idx >= MAX_TCP_FLOWS) return; - tcp_flow_t *f = &tcp_flows[idx]; - - f->state = TCP_STATE_CLOSED; - f->local_port = 0; - - f->local.ver = 0; - memset(f->local.ip, 0, 16); - f->local.port = 0; - f->remote.ver = 0; - memset(f->remote.ip, 0, 16); - f->remote.port = 0; - - f->ctx.sequence = 0; - f->ctx.ack = 0; - f->ctx.flags = 0; - f->ctx.window = 0; - f->ctx.options.ptr = 0; - f->ctx.options.size = 0; - f->ctx.payload.ptr = 0; - f->ctx.payload.size = 0; - f->ctx.expected_ack = 0; - f->ctx.ack_received = 0; - f->retries = 0; -} - -static inline port_manager_t* pm_for_l3(uint8_t l3_id){ - if (l3_ipv4_find_by_id(l3_id)) return ifmgr_pm_v4(l3_id); - if (l3_ipv6_find_by_id(l3_id)) return ifmgr_pm_v6(l3_id); - return NULL; -} - -static bool build_tx_opts_from_local_v4(const void* src_ip_addr, ipv4_tx_opts_t* out){ - if (!out) return false; - uint32_t lip = v4_u32_from_ptr(src_ip_addr); - l3_ipv4_interface_t* v4 = l3_ipv4_find_by_ip(lip); - if (v4){ - out->scope = IPV4_TX_BOUND_L3; - out->index = v4->l3_id; - } else { - out->scope = IPV4_TX_AUTO; - out->index = 0; - } - return true; -} - -static bool build_tx_opts_from_l3(uint8_t l3_id, ipv4_tx_opts_t* out){ - if (!out) return false; - out->scope = IPV4_TX_BOUND_L3; - out->index = l3_id; - return true; -} - -static bool send_tcp_segment(ip_version_t ver, const void *src_ip_addr, const void *dst_ip_addr, tcp_hdr_t *hdr, const uint8_t *payload, uint16_t payload_len, const ipv4_tx_opts_t* txp){ - uint8_t header_words = sizeof(tcp_hdr_t) / 4; - hdr->data_offset_reserved = (uint8_t)((header_words << 4) | 0x0); - hdr->window = bswap16(hdr->window); - - uint16_t tcp_len = (uint16_t)(sizeof(tcp_hdr_t) + payload_len); - uint8_t *segment = (uint8_t*) malloc(tcp_len); - if (!segment) return false; - - memcpy(segment, hdr, sizeof(tcp_hdr_t)); - if (payload_len) memcpy(segment + sizeof(tcp_hdr_t), payload, payload_len); - tcp_hdr_t *hdr_on_buf = (tcp_hdr_t*)segment; - hdr_on_buf->checksum = 0; - - if (ver == IP_VER4){ - uint32_t s = v4_u32_from_ptr(src_ip_addr); - uint32_t d = v4_u32_from_ptr(dst_ip_addr); - hdr_on_buf->checksum = tcp_compute_checksum_v4(segment, tcp_len, s, d); - ipv4_send_packet(d, 6, (sizedptr){ (uintptr_t)segment, tcp_len }, txp, 0); - } else { - free_sized(segment, tcp_len); - return false; - } - - free_sized(segment, tcp_len); - return true; -} - -static void send_reset(ip_version_t ver, const void *src_ip_addr, const void *dst_ip_addr, - uint16_t src_port, uint16_t dst_port, uint32_t seq, uint32_t ack, bool ack_valid) { - tcp_hdr_t rst_hdr; - rst_hdr.src_port = bswap16(src_port); - rst_hdr.dst_port = bswap16(dst_port); - if (ack_valid){ - rst_hdr.sequence = bswap32(0); - rst_hdr.ack = bswap32(seq + 1); - rst_hdr.flags = (1 << RST_F) | (1 << ACK_F); - } else { - rst_hdr.sequence = bswap32(ack); - rst_hdr.ack = bswap32(0); - rst_hdr.flags = (1 << RST_F); - } - rst_hdr.window = 0; - rst_hdr.urgent_ptr = 0; - ipv4_tx_opts_t tx; build_tx_opts_from_local_v4(src_ip_addr, &tx); - send_tcp_segment(ver, src_ip_addr, dst_ip_addr, &rst_hdr, NULL, 0, (ver==IP_VER4? &tx : NULL)); -} - -bool tcp_bind_l3(uint8_t l3_id, uint16_t port, uint16_t pid, port_recv_handler_t handler) { - port_manager_t* pm = pm_for_l3(l3_id); - if (!pm) return false; - if (!port_bind_manual(pm, PROTO_TCP, port, pid, handler)) return false; - - int idx = allocate_flow_entry(); - if (idx >= 0){ - tcp_flow_t *f = &tcp_flows[idx]; - f->local_port = port; - - f->local.ver = 0; - memset(f->local.ip, 0, 16); - f->local.port = 0; - f->remote.ver = 0; - memset(f->remote.ip, 0, 16); - f->remote.port = 0; - f->state = TCP_LISTEN; - f->ctx.sequence = 0; - f->ctx.ack = 0; - f->ctx.flags = 0; - f->ctx.window = 0xFFFF; - f->ctx.options.ptr = 0; - f->ctx.options.size = 0; - f->ctx.payload.ptr = 0; - f->ctx.payload.size = 0; - f->ctx.expected_ack = 0; - f->ctx.ack_received = 0; - f->retries = 0; - } - return true; -} - -int tcp_alloc_ephemeral_l3(uint8_t l3_id, uint16_t pid, port_recv_handler_t handler) { - port_manager_t* pm = pm_for_l3(l3_id); - if (!pm) return -1; - return port_alloc_ephemeral(pm, PROTO_TCP, pid, handler); -} - -bool tcp_unbind_l3(uint8_t l3_id, uint16_t port, uint16_t pid) { - port_manager_t* pm = pm_for_l3(l3_id); - if (!pm) return false; - - bool res = port_unbind(pm, PROTO_TCP, port, pid); - if (res) { - for (int i = 0; i < MAX_TCP_FLOWS; ++i) { - if (tcp_flows[i].local_port == port) { - if (tcp_flows[i].state == TCP_LISTEN) free_flow_entry(i); - } - } - } - return res; -} - -bool tcp_handshake_l3(uint8_t l3_id, uint16_t local_port, net_l4_endpoint *dst, tcp_data *flow_ctx, uint16_t pid) { - int idx = allocate_flow_entry(); - if (idx < 0) return false; - - tcp_flow_t *flow = &tcp_flows[idx]; - flow->local_port = local_port; - - flow->remote.ver = dst->ver; - memcpy(flow->remote.ip, dst->ip, (uint64_t)ip_len(dst->ver)); - flow->remote.port = dst->port; - - if (dst->ver == IP_VER4){ - l3_ipv4_interface_t *v4 = l3_ipv4_find_by_id(l3_id); - if (!v4 || !v4->ip) { free_flow_entry(idx); return false; } - flow->local.ver = IP_VER4; - memset(flow->local.ip, 0, 16); - memcpy(flow->local.ip, &v4->ip, 4); - flow->local.port = local_port; - } else { - memset(flow->local.ip, 0, 16); - flow->local.ver = IP_VER6; - flow->local.port = local_port; - } - - flow->state = TCP_SYN_SENT; - flow->retries = TCP_SYN_RETRIES; - - const uint32_t iss = 1; - flow->ctx.sequence = iss; - flow->ctx.ack = 0; - flow->ctx.window = 0xFFFF; - flow->ctx.options.ptr = 0; - flow->ctx.options.size = 0; - flow->ctx.payload.ptr = 0; - flow->ctx.payload.size = 0; - flow->ctx.flags = (uint8_t)(1 << SYN_F); - flow->ctx.expected_ack = iss + 1; - flow->ctx.ack_received = 0; - - tcp_hdr_t syn_hdr; - syn_hdr.src_port = bswap16(local_port); - syn_hdr.dst_port = bswap16(dst->port); - syn_hdr.sequence = bswap32(flow->ctx.sequence); - syn_hdr.ack = bswap32(0); - syn_hdr.flags = (uint8_t)(1<ctx.window; - syn_hdr.urgent_ptr = 0; - - bool sent = false; - if (dst->ver == IP_VER4) { - ipv4_tx_opts_t tx; build_tx_opts_from_l3(l3_id, &tx); - while (flow->retries-- > 0) { - sent = send_tcp_segment(IP_VER4, flow->local.ip, flow->remote.ip, &syn_hdr, NULL, 0, &tx); - if (!sent) { - break; - } - uint64_t wait_ms = TCP_RETRY_TIMEOUT_MS, elapsed = 0; - const uint64_t interval = 50; - while (elapsed < wait_ms) { - if (flow->state == TCP_ESTABLISHED) { - *flow_ctx = flow->ctx; - return true; - } - if (flow->state == TCP_STATE_CLOSED) { - free_flow_entry(idx); - return false; - } - msleep(interval); - elapsed += interval; - } - } - } else { - //TODO IPV6 - } - - free_flow_entry(idx); - return false; -} - -tcp_result_t tcp_flow_send(tcp_data *flow_ctx) { - if (!flow_ctx) return TCP_INVALID; - - tcp_flow_t *flow = NULL; - for (int i = 0; i < MAX_TCP_FLOWS; ++i) { - if (&tcp_flows[i].ctx == flow_ctx) { flow = &tcp_flows[i]; break; } - } - if (!flow) return TCP_INVALID; - - const uint8_t flags = flow_ctx->flags; - uint8_t *payload_ptr = (uint8_t*) flow_ctx->payload.ptr; - uint16_t payload_len = flow_ctx->payload.size; - if (flow->state != TCP_ESTABLISHED && !(flags & (1<state == TCP_CLOSE_WAIT && (flags & (1<local_port); - hdr.dst_port = bswap16(flow->remote.port); - hdr.sequence = bswap32(flow_ctx->sequence); - hdr.ack = bswap32(flow_ctx->ack); - hdr.flags = flags; - hdr.window = flow_ctx->window ? flow_ctx->window : 0xFFFF; - hdr.urgent_ptr = 0; - - bool sent = false; - if (flow->remote.ver == IP_VER4) { - ipv4_tx_opts_t tx; build_tx_opts_from_local_v4(flow->local.ip, &tx); - sent = send_tcp_segment(IP_VER4, flow->local.ip, flow->remote.ip, &hdr, payload_ptr, payload_len, &tx); - } else { - sent = false; //TODO IPV6 - } - if (!sent) return TCP_RESET; - - uint32_t seq_incr = payload_len; - if (flags & (1<sequence += seq_incr; - - if ((flags & (1< 0) { - flow_ctx->expected_ack = flow_ctx->sequence; - - int retries = TCP_DATA_RETRIES; - while (retries-- > 0) { - uint64_t wait_ms = TCP_RETRY_TIMEOUT_MS; - uint64_t elapsed = 0; - const uint64_t interval = 50; - while (elapsed < wait_ms) { - if (flow_ctx->ack_received >= flow_ctx->expected_ack) break; - if (flow->state == TCP_STATE_CLOSED) return TCP_RESET; - msleep(interval); - elapsed += interval; - } - if (flow_ctx->ack_received >= flow_ctx->expected_ack) break; - if (flow->state >= TCP_CLOSING || flow->state == TCP_STATE_CLOSED) break; - - flow_ctx->sequence -= seq_incr; - if (flow->remote.ver == IP_VER4) { - ipv4_tx_opts_t tx; build_tx_opts_from_local_v4(flow->local.ip, &tx); - send_tcp_segment(IP_VER4, flow->local.ip, flow->remote.ip, &hdr, payload_ptr, payload_len, &tx); - } - flow_ctx->sequence += seq_incr; - } - if (flow_ctx->ack_received < flow_ctx->expected_ack) { - return TCP_TIMEOUT; - } - } - return TCP_OK; -} - -tcp_result_t tcp_flow_close(tcp_data *flow_ctx) { - if (!flow_ctx) return TCP_INVALID; - - tcp_flow_t *flow = NULL; - for (int i = 0; i < MAX_TCP_FLOWS; ++i) { - if (&tcp_flows[i].ctx == flow_ctx) { flow = &tcp_flows[i]; break; } - } - if (!flow) return TCP_INVALID; - - if (flow->state == TCP_ESTABLISHED || flow->state == TCP_CLOSE_WAIT) { - flow_ctx->sequence = flow->ctx.sequence; - flow_ctx->ack = flow->ctx.ack; - flow_ctx->window = flow->ctx.window ? flow->ctx.window : 0xFFFF; - - flow_ctx->payload.ptr = 0; - flow_ctx->payload.size = 0; - flow_ctx->flags = (uint8_t)((1u << FIN_F) | (1u << ACK_F)); - - tcp_result_t res = tcp_flow_send(flow_ctx); - if (res != TCP_OK) return res; - - if (flow->state == TCP_ESTABLISHED) flow->state = TCP_FIN_WAIT_1; - else flow->state = TCP_LAST_ACK; - - const uint64_t max_wait = 2000; - const uint64_t interval = 100; - uint64_t elapsed = 0; - while (elapsed < max_wait) { - if (flow->state == TCP_STATE_CLOSED) break; - msleep(interval); - elapsed += interval; - } - - int idx = (int)(flow - tcp_flows); - free_flow_entry(idx); - return TCP_OK; - } - return TCP_INVALID; -} - -void tcp_input(ip_version_t ipver, const void *src_ip_addr, const void *dst_ip_addr, uint8_t l3_id, uintptr_t ptr, uint32_t len) { - if (len < sizeof(tcp_hdr_t)) return; - - tcp_hdr_t *hdr = (tcp_hdr_t*) ptr; - - if (ipver == IP_VER4){ - uint16_t recv_checksum = hdr->checksum; - hdr->checksum = 0; - uint16_t calc = tcp_compute_checksum_v4((uint8_t*)hdr, (uint16_t)len, v4_u32_from_ptr(src_ip_addr), v4_u32_from_ptr(dst_ip_addr)); - hdr->checksum = recv_checksum; - if (recv_checksum != calc) return; - } else { - //TODO IPV6 - } - - uint16_t src_port = bswap16(hdr->src_port); - uint16_t dst_port = bswap16(hdr->dst_port); - uint32_t seq = bswap32(hdr->sequence); - uint32_t ack = bswap32(hdr->ack); - uint8_t flags = hdr->flags; - uint16_t window = bswap16(hdr->window); - - int idx = find_flow(dst_port, ipver, src_ip_addr, src_port); - tcp_flow_t *flow = (idx >= 0 ? &tcp_flows[idx] : NULL); - if (flow) flow->ctx.window = window; - - l3_ipv4_interface_t *v4 = l3_ipv4_find_by_id(l3_id); - l3_ipv6_interface_t *v6 = v4 ? NULL : l3_ipv6_find_by_id(l3_id); - port_manager_t *pm = v4 ? ifmgr_pm_v4(l3_id) : (v6 ? ifmgr_pm_v6(l3_id) : NULL); - if (!pm) return; - - uint8_t ifx = v4 ? (v4->l2 ? v4->l2->ifindex : 0) : (v6 && v6->l2 ? v6->l2->ifindex : 0); - - if (!flow) { - int listen_idx = find_flow(dst_port, IP_VER4, NULL, 0); - if ((flags & (1u<= 0) { - rng_t rng; - rng_init_random(&rng); - - tcp_flow_t *lf = &tcp_flows[listen_idx]; - int new_idx = allocate_flow_entry(); - if (new_idx < 0) return; - - flow = &tcp_flows[new_idx]; - flow->local_port = dst_port; - - flow->remote.ver = ipver; - memset(flow->remote.ip, 0, 16); - memcpy(flow->remote.ip, src_ip_addr, (uint64_t)ip_len(ipver)); - flow->remote.port = src_port; - - flow->local.ver = ipver; - memset(flow->local.ip, 0, 16); - if (ipver == IP_VER4 && v4) memcpy(flow->local.ip, &v4->ip, 4); - flow->local.port = dst_port; - - flow->state = TCP_SYN_RECEIVED; - flow->retries = TCP_SYN_RETRIES; - - flow->ctx.window = lf->ctx.window ? lf->ctx.window : 0xFFFF; - flow->ctx.flags = 0; - flow->ctx.options = lf->ctx.options; - flow->ctx.payload.ptr = 0; - flow->ctx.payload.size = 0; - - uint32_t iss = rng_next32(&rng); - flow->ctx.sequence = iss; - flow->ctx.ack = seq + 1; - flow->ctx.expected_ack = iss + 1; - flow->ctx.ack_received = 0; - - tcp_hdr_t synack_hdr; - synack_hdr.src_port = bswap16(dst_port); - synack_hdr.dst_port = bswap16(src_port); - synack_hdr.sequence = bswap32(iss); - synack_hdr.ack = bswap32(seq + 1); - synack_hdr.flags = (uint8_t)((1u<ctx.window; - synack_hdr.urgent_ptr = 0; - - if (ipver == IP_VER4 && v4) { - ipv4_tx_opts_t tx; build_tx_opts_from_l3(l3_id, &tx); - send_tcp_segment(IP_VER4, flow->local.ip, src_ip_addr, &synack_hdr, NULL, 0, &tx); - } - return; - } else { - if (!(flags & (1u<state) { - case TCP_SYN_SENT: - if ((flags & (1<ctx.expected_ack) { - flow->ctx.ack = seq + 1; - flow->ctx.ack_received = ack; - flow->ctx.sequence += 1; - - tcp_hdr_t final_ack; - final_ack.src_port = bswap16(flow->local_port); - final_ack.dst_port = bswap16(flow->remote.port); - final_ack.sequence = bswap32(flow->ctx.sequence); - final_ack.ack = bswap32(flow->ctx.ack); - final_ack.flags = (1<ctx.window; - final_ack.urgent_ptr = 0; - - if (flow->remote.ver == IP_VER4){ - ipv4_tx_opts_t tx; build_tx_opts_from_local_v4(flow->local.ip, &tx); - (void)send_tcp_segment(IP_VER4, flow->local.ip, flow->remote.ip, &final_ack, NULL, 0, &tx); - } - flow->state = TCP_ESTABLISHED; - } - } else if (flags & (1<state = TCP_STATE_CLOSED; - } - return; - case TCP_SYN_RECEIVED: - if ((flags & (1<ctx.expected_ack) { - flow->ctx.sequence += 1; - flow->state = TCP_ESTABLISHED; - flow->ctx.ack_received = ack; - - if (pm){ - port_recv_handler_t h = port_get_handler(pm, PROTO_TCP, dst_port); - if (h) h(ifx, ipver, src_ip_addr, dst_ip_addr, 0, 0, src_port, dst_port); - } - } - } else if (flags & (1<data_offset_reserved >> 4) * 4); - if (len < hdr_len) return; - uint32_t data_len = len - hdr_len; - - if ((flags & (1u << ACK_F)) && ack > flow->ctx.ack_received) { - flow->ctx.ack_received = ack; - if (flow->state == TCP_FIN_WAIT_1 && ack == flow->ctx.expected_ack){ - flow->state = TCP_FIN_WAIT_2; - } else if ((flow->state == TCP_LAST_ACK || flow->state == TCP_CLOSING) && ack == flow->ctx.expected_ack) { - free_flow_entry(idx); - return; - } - } - - uint32_t rcv_next_old = flow->ctx.ack; - uint32_t rcv_next_new = rcv_next_old; - - bool data_inseq = (data_len > 0) && (seq == rcv_next_old); - if (data_inseq) { - rcv_next_new += data_len; - if (pm){ - port_recv_handler_t h = port_get_handler(pm, PROTO_TCP, dst_port); - if (h) h(ifx, ipver, src_ip_addr, dst_ip_addr, ptr + hdr_len, data_len, src_port, dst_port); - } - } - - bool fin_set = (flags & (1<ctx.ack = rcv_next_new; - - tcp_hdr_t ackhdr = { - .src_port = bswap16(flow->local_port), - .dst_port = bswap16(flow->remote.port), - .sequence = bswap32(flow->ctx.sequence), - .ack = bswap32(flow->ctx.ack), - .flags = (uint8_t)(1<ctx.window ? flow->ctx.window : 0xFFFF, - .urgent_ptr = 0 - }; - if (flow->remote.ver == IP_VER4){ - ipv4_tx_opts_t tx; build_tx_opts_from_local_v4(flow->local.ip, &tx); - (void)send_tcp_segment(IP_VER4, flow->local.ip, flow->remote.ip, &ackhdr, NULL, 0, &tx); - } - } - - if (fin_inseq){ - tcp_state_t old = flow->state; - if (old == TCP_ESTABLISHED) flow->state = TCP_CLOSE_WAIT; - else if (old == TCP_FIN_WAIT_1) flow->state = TCP_CLOSING; - else if (old == TCP_FIN_WAIT_2) flow->state = TCP_TIME_WAIT; - else if (old == TCP_CLOSING) flow->state = TCP_TIME_WAIT; - else if (old == TCP_LAST_ACK) flow->state = TCP_TIME_WAIT; - } - return; - } - - default: - break; - } -} diff --git a/kernel/networking/transport_layer/tcp.h b/kernel/networking/transport_layer/tcp.h index 3ead4932..3498771f 100644 --- a/kernel/networking/transport_layer/tcp.h +++ b/kernel/networking/transport_layer/tcp.h @@ -5,6 +5,8 @@ #include "networking/link_layer/eth.h" #include "std/memory.h" #include "net/network_types.h" +#include "net/socket_types.h" + #ifdef __cplusplus extern "C" { #endif @@ -68,33 +70,39 @@ typedef enum { TCP_TIME_WAIT } tcp_state_t; -typedef struct { - uint16_t local_port; - net_l4_endpoint local; - net_l4_endpoint remote; - tcp_state_t state; - tcp_data ctx; - uint8_t retries; -} tcp_flow_t; - #define MAX_TCP_FLOWS 512 #define TCP_SYN_RETRIES 5 #define TCP_DATA_RETRIES 5 -#define TCP_RETRY_TIMEOUT_MS 1000 +#define TCP_RETRY_TIMEOUT_MS 200 +#define TCP_RECV_WINDOW 65535 +#define TCP_MAX_TX_SEGS 16 +#define TCP_INIT_RTO 200 +#define TCP_MIN_RTO 200 +#define TCP_MAX_RTO 60000 +#define TCP_MSL_MS 30000 +#define TCP_2MSL_MS (2 * TCP_MSL_MS) +#define TCP_MAX_RETRANS 8 +#define TCP_MAX_PERSIST_PROBES 8 -int find_flow(uint16_t local_port, ip_version_t ver, const void *remote_ip, uint16_t remote_port); -tcp_data* tcp_get_ctx(uint16_t local_port, ip_version_t ver, const void *remote_ip, uint16_t remote_port); +int find_flow(uint16_t local_port, ip_version_t ver, const void *local_ip, const void *remote_ip, uint16_t remote_port); +tcp_data* tcp_get_ctx(uint16_t local_port, ip_version_t ver, const void *local_ip, const void *remote_ip, uint16_t remote_port); -bool tcp_bind_l3(uint8_t l3_id, uint16_t port, uint16_t pid, port_recv_handler_t handler); +bool tcp_bind_l3(uint8_t l3_id, uint16_t port, uint16_t pid, port_recv_handler_t handler, const SocketExtraOptions* extra); int tcp_alloc_ephemeral_l3(uint8_t l3_id, uint16_t pid, port_recv_handler_t handler); bool tcp_unbind_l3(uint8_t l3_id, uint16_t port, uint16_t pid); -bool tcp_handshake_l3(uint8_t l3_id, uint16_t local_port, net_l4_endpoint *dst, tcp_data *flow_ctx, uint16_t pid); +bool tcp_handshake_l3(uint8_t l3_id, uint16_t local_port, net_l4_endpoint *dst, tcp_data *flow_ctx, uint16_t pid, const SocketExtraOptions* extra); tcp_result_t tcp_flow_send(tcp_data *flow_ctx); tcp_result_t tcp_flow_close(tcp_data *flow_ctx); +void tcp_flow_window_update(tcp_data *flow_ctx); +void tcp_flow_on_app_read(tcp_data *flow_ctx, uint32_t bytes_read); + void tcp_input(ip_version_t ipver, const void *src_ip_addr, const void *dst_ip_addr, uint8_t l3_id, uintptr_t ptr, uint32_t len); +void tcp_tick_all(uint32_t elapsed_ms); +int tcp_daemon_entry(int argc, char *argv[]); + #ifdef __cplusplus } #endif diff --git a/kernel/networking/transport_layer/tcp/tcp_core.c b/kernel/networking/transport_layer/tcp/tcp_core.c new file mode 100644 index 00000000..3d6f9a3b --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_core.c @@ -0,0 +1,546 @@ +#include "tcp_internal.h" +#include "networking/port_manager.h" +#include "networking/interface_manager.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "std/memory.h" +#include "math/rng.h" +#include "syscalls/syscalls.h" +#include "networking/transport_layer/trans_utils.h" + +tcp_flow_t *tcp_flows[MAX_TCP_FLOWS]; + + +int find_flow(uint16_t local_port, ip_version_t ver, const void *local_ip, const void *remote_ip, uint16_t remote_port){ + for (int i = 0; i < MAX_TCP_FLOWS; i++){ + tcp_flow_t *f = tcp_flows[i]; + if (!f) continue; + + if (f->state == TCP_STATE_CLOSED) continue; + if (f->local_port != local_port) continue; + + if (f->state == TCP_LISTEN){ + if (remote_ip || remote_port) continue; + if (f->local.ver && f->local.ver != ver) continue; + if (!local_ip) return i; + + size_t l = (size_t)(ver == IP_VER6 ? 16 : 4); + int unspec = 1; + for (size_t k = 0; k < l; ++k){ + if (f->local.ip[k]){ + unspec = 0; + break; + } + } + if (unspec) return i; + if (memcmp(f->local.ip, local_ip, l) == 0) return i; + continue; + } + + if (!remote_ip) continue; + if (!local_ip) continue; + if (f->remote.ver != ver) continue; + if (f->remote.port != remote_port) continue; + + size_t l = (size_t)(ver == IP_VER6 ? 16 : 4); + if (memcmp(f->local.ip, local_ip, l) != 0) continue; + if (memcmp(f->remote.ip, remote_ip, l) != 0) continue; + + return i; + } + + return -1; +} + +tcp_data *tcp_get_ctx(uint16_t local_port, ip_version_t ver, const void *local_ip, const void *remote_ip, uint16_t remote_port){ + int idx = find_flow(local_port, ver, local_ip, remote_ip, remote_port); + + if (idx < 0) return NULL; + return &tcp_flows[idx]->ctx; +} + +static void clear_txq(tcp_flow_t *f){ + for (int i = 0; i < TCP_MAX_TX_SEGS; i++){ + tcp_tx_seg_t *s = &f->txq[i]; + + if (s->used && s->buf && s->len) free_sized((void *)s->buf, s->len); + + s->used = 0; + s->syn = 0; + s->fin = 0; + s->rtt_sample = 0; + s->retransmit_cnt = 0; + s->seq = 0; + s->len = 0; + s->buf = 0; + s->timer_ms = 0; + s->timeout_ms = 0; +} +} + +static void clear_reass(tcp_flow_t *f){ + for (int i = 0; i < TCP_REASS_MAX_SEGS; i++){ + if (f->reass[i].buf && f->reass[i].end > f->reass[i].seq){ + uint32_t l = f->reass[i].end - f->reass[i].seq; + free_sized((void *)f->reass[i].buf, l); + } + + f->reass[i].seq = 0; + f->reass[i].end = 0; + f->reass[i].buf = 0; + } + + f->reass_count = 0; + f->rcv_buf_used = 0; +} + +tcp_flow_t *tcp_alloc_flow(void){ + for (int i = 0; i < MAX_TCP_FLOWS; i++){ + if (tcp_flows[i]) continue; + + tcp_flow_t *f = (tcp_flow_t *)malloc(sizeof(tcp_flow_t)); + if (!f) return NULL; + memset(f, 0, sizeof(tcp_flow_t)); + tcp_flows[i] = f; + + f->rto = TCP_INIT_RTO; + f->rcv_wnd_max = TCP_DEFAULT_RCV_BUF; + f->rcv_wnd = f->rcv_wnd_max; + + f->mss = TCP_DEFAULT_MSS; + f->cwnd = f->mss; + f->ssthresh = TCP_RECV_WINDOW; + + clear_reass(f); + clear_txq(f); + + return f; + } + + return NULL; +} + +void tcp_free_flow(int idx) { + if (idx < 0 || idx >= MAX_TCP_FLOWS) return; + + tcp_flow_t *f = tcp_flows[idx]; + if (!f) return; + + clear_txq(f); + clear_reass(f); + + memset(f, 0, sizeof(*f)); + + f->state = TCP_STATE_CLOSED; + + f->rto = TCP_INIT_RTO; + + f->rcv_wnd_max = TCP_DEFAULT_RCV_BUF; + f->rcv_wnd = f->rcv_wnd_max; + + f->mss = TCP_DEFAULT_MSS; + f->cwnd = f->mss; + f->ssthresh = TCP_RECV_WINDOW; + + free_sized(f, sizeof(*f)); + tcp_flows[idx] = NULL; +} + +bool tcp_send_segment(ip_version_t ver, const void *src_ip_addr, const void *dst_ip_addr, tcp_hdr_t *hdr, const uint8_t *opts, uint8_t opts_len, const uint8_t *payload, uint16_t payload_len, const ip_tx_opts_t *txp, uint8_t ttl, uint8_t dontfrag){ + if (!hdr) return false; + + if (opts_len & 3u) return false; + if (opts_len > 40u) return false; + + uint16_t tcp_len = (uint16_t)(sizeof(tcp_hdr_t) + opts_len + payload_len); + uint32_t headroom = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)(ver == IP_VER4 ? sizeof(ipv4_hdr_t) : sizeof(ipv6_hdr_t)); + netpkt_t *pkt = netpkt_alloc(tcp_len, headroom, 0); + if (!pkt) return false; + uint8_t *segment = (uint8_t*)netpkt_put(pkt, tcp_len); + if (!segment) { + netpkt_unref(pkt); + return false; + } + + tcp_hdr_t h = *hdr; + + uint8_t header_words = (uint8_t)((sizeof(tcp_hdr_t) + opts_len) / 4); + h.data_offset_reserved = (uint8_t)(header_words << 4); + h.window = bswap16(h.window); + h.checksum = 0; + + memcpy(segment, &h, sizeof(tcp_hdr_t)); + if (opts_len && opts) memcpy(segment + sizeof(tcp_hdr_t), opts, opts_len); + if (payload_len && payload) memcpy(segment + sizeof(tcp_hdr_t) + opts_len, payload, payload_len); + + if (ver == IP_VER4){ + uint32_t s = *(const uint32_t *)src_ip_addr; + uint32_t d = *(const uint32_t *)dst_ip_addr; + + ((tcp_hdr_t *)segment)->checksum = tcp_checksum_ipv4(segment, tcp_len, s, d); + ipv4_send_packet(d, 6, pkt, (const ipv4_tx_opts_t *)txp, ttl, dontfrag); + return true; + } else if (ver == IP_VER6){ + ((tcp_hdr_t *)segment)->checksum = tcp_checksum_ipv6(segment, tcp_len, (const uint8_t *)src_ip_addr, (const uint8_t *)dst_ip_addr); + ipv6_send_packet((const uint8_t *)dst_ip_addr, 6, pkt, (const ipv6_tx_opts_t *)txp, ttl, dontfrag); + return true; + } + + netpkt_unref(pkt); + return false; +} + +void tcp_send_reset(ip_version_t ver, const void *src_ip_addr, const void *dst_ip_addr, uint16_t src_port, uint16_t dst_port, uint32_t seq, uint32_t ack, bool ack_valid){ + tcp_hdr_t rst_hdr; + + rst_hdr.src_port = bswap16(src_port); + rst_hdr.dst_port = bswap16(dst_port); + + if (ack_valid){ + rst_hdr.sequence = bswap32(0); + rst_hdr.ack = bswap32(ack); + rst_hdr.flags = (uint8_t)((1u << RST_F) | (1u << ACK_F)); + } else{ + rst_hdr.sequence = bswap32(seq); + rst_hdr.ack = bswap32(0); + rst_hdr.flags = (uint8_t)(1u << RST_F); + } + + rst_hdr.window = 0; + rst_hdr.urgent_ptr = 0; + + if (ver == IP_VER4){ + ipv4_tx_opts_t tx; + + tcp_build_tx_opts_from_local_v4(src_ip_addr, &tx); + tcp_send_segment(IP_VER4, src_ip_addr, dst_ip_addr, &rst_hdr, NULL, 0, NULL, 0, (const ip_tx_opts_t *)&tx, 0, 0); + } else if (ver == IP_VER6){ + ipv6_tx_opts_t tx; + + tcp_build_tx_opts_from_local_v6(src_ip_addr, &tx); + tcp_send_segment(IP_VER6, src_ip_addr, dst_ip_addr, &rst_hdr, NULL, 0, NULL, 0, (const ip_tx_opts_t *)&tx, 0, 0); + } +} + +void tcp_rtt_update(tcp_flow_t *flow, uint32_t sample_ms){ + if (sample_ms == 0) sample_ms = 1; + + if (!flow->rtt_valid){ + flow->srtt = sample_ms; + flow->rttvar = sample_ms / 2; + + uint32_t rto = flow->srtt + (flow->rttvar << 2); + if (rto < TCP_MIN_RTO) rto = TCP_MIN_RTO; + if (rto > TCP_MAX_RTO) rto = TCP_MAX_RTO; + + flow->rto = rto; + flow->rtt_valid = 1; + + return; + } + + uint32_t srtt = flow->srtt; + uint32_t rttvar = flow->rttvar; + + uint32_t diff = srtt > sample_ms ? srtt - sample_ms : sample_ms - srtt; + uint32_t new_rttvar = (uint32_t)((3 * (uint64_t)rttvar + (uint64_t)diff) >> 2); + uint32_t new_srtt = (uint32_t)(((uint64_t)7 * srtt + sample_ms) >> 3); + + flow->srtt = new_srtt; + flow->rttvar = new_rttvar; + + uint32_t rto = new_srtt + (new_rttvar << 2); + if (rto < TCP_MIN_RTO) rto = TCP_MIN_RTO; + if (rto > TCP_MAX_RTO) rto = TCP_MAX_RTO; + + flow->rto = rto; +} + +bool tcp_bind_l3(uint8_t l3_id, uint16_t port, uint16_t pid, port_recv_handler_t handler, const SocketExtraOptions* extra){ + ip_version_t ver = l3_is_v6_from_id(l3_id) ? IP_VER6 : IP_VER4; + + port_manager_t *pm = (ver == IP_VER6) ? ifmgr_pm_v6(l3_id) : ifmgr_pm_v4(l3_id); + + if (!pm) return false; + if (!port_bind_manual(pm, PROTO_TCP, port, pid, handler)) return false; + + int listen_idx = find_flow(port, ver, NULL, NULL, 0); + if (listen_idx >= 0) return true; + + tcp_flow_t *f = tcp_alloc_flow(); + if (!f) { + (void)port_unbind(pm, PROTO_TCP, port, pid); + return false; + } + if (f){ + f->local_port = port; + f->l3_id = l3_id; + + f->local.ver = l3_is_v6_from_id(l3_id) ? IP_VER6 : IP_VER4; + memset(f->local.ip, 0, sizeof(f->local.ip)); + f->local.port = port; + + f->remote.ver = 0; + memset(f->remote.ip, 0, sizeof(f->remote.ip)); + f->remote.port = 0; + + f->state = TCP_LISTEN; + + f->ctx.sequence = 0; + f->ctx.ack = 0; + f->ctx.flags = 0; + + f->rcv_wnd_max = TCP_DEFAULT_RCV_BUF; + if (extra && (extra->flags & SOCK_OPT_BUF_SIZE) && extra->buf_size) f->rcv_wnd_max = extra->buf_size; + f->rcv_buf_used = 0; + f->rcv_adv_edge = 0; + + f->ip_ttl = extra && (extra->flags & SOCK_OPT_TTL) ? extra->ttl : 0; + f->ip_dontfrag = extra && (extra->flags & SOCK_OPT_DONTFRAG) ? 1 : 0; + f->keepalive_on = extra && (extra->flags & SOCK_OPT_KEEPALIVE) ? 1 : 0; + f->keepalive_ms = extra && (extra->flags & SOCK_OPT_KEEPALIVE) ? extra->keepalive_ms : 0; + f->keepalive_idle_ms = 0; + + f->mss = TCP_DEFAULT_MSS; + if (f->rcv_wnd_max > 65535u) { + f->ws_send = 8; + f->ws_recv = 0; + f->ws_ok = 1; + } else { + f->ws_send = 0; + f->ws_recv = 0; + f->ws_ok = 0; + } + f->sack_ok = 1; + + (void)tcp_calc_adv_wnd_field(f, 1); + + f->ctx.options.ptr = 0; + f->ctx.options.size = 0; + f->ctx.payload.ptr = 0; + f->ctx.payload.size = 0; + + f->ctx.expected_ack = 0; + f->ctx.ack_received = 0; + + f->time_wait_ms = 0; + f->fin_wait2_ms = 0; + } + + return true; +} + +int tcp_alloc_ephemeral_l3(uint8_t l3_id, uint16_t pid, port_recv_handler_t handler){ + + port_manager_t *pm = l3_is_v6_from_id(l3_id) ? ifmgr_pm_v6(l3_id) : ifmgr_pm_v4(l3_id); + if (!pm) return -1; + + if (!pm) return -1; + return port_alloc_ephemeral(pm, PROTO_TCP, pid, handler); +} + +bool tcp_unbind_l3(uint8_t l3_id, uint16_t port, uint16_t pid){ + ip_version_t ver = l3_is_v6_from_id(l3_id) ? IP_VER6 : IP_VER4; + + port_manager_t *pm = (ver == IP_VER6) ? ifmgr_pm_v6(l3_id) : ifmgr_pm_v4(l3_id); + if (!pm) return false; + + bool res = port_unbind(pm, PROTO_TCP, port, pid); + + if (res){ + for (int i = 0; i < MAX_TCP_FLOWS; i++){ + tcp_flow_t *f = tcp_flows[i]; + if (!f) continue; + if (f->state==TCP_LISTEN && f->local_port==port && f->local.ver==ver) tcp_free_flow(i); + } + } + + return res; +} + +bool tcp_handshake_l3(uint8_t l3_id, uint16_t local_port, net_l4_endpoint *dst, tcp_data *flow_ctx, uint16_t pid, const SocketExtraOptions* extra){ + (void)pid; + + tcp_flow_t *flow = tcp_alloc_flow(); + if (!flow) return false; + + int idx = -1; + for (int i = 0; i < MAX_TCP_FLOWS; i++) { + if (tcp_flows[i] == flow) { + idx = i; + break; + } + } + if (idx < 0) return false; + + flow->local_port = local_port; + flow->l3_id = l3_id; + + flow->remote.ver = dst->ver; + memcpy(flow->remote.ip, dst->ip, (size_t)(dst->ver == IP_VER6 ? 16 : 4)); + flow->remote.port = dst->port; + + if (dst->ver == IP_VER4){ + l3_ipv4_interface_t *v4 = l3_ipv4_find_by_id(l3_id); + + if (!v4 || !v4->ip){ + tcp_free_flow(idx); + return false; + } + + make_ep(v4->ip, local_port, IP_VER4, &flow->local); + } else{ + l3_ipv6_interface_t *v6 = l3_ipv6_find_by_id(l3_id); + + if (!v6 || ipv6_is_unspecified(v6->ip)){ + tcp_free_flow(idx); + return false; + } + + flow->local.ver = IP_VER6; + memset(flow->local.ip, 0, sizeof(flow->local.ip)); + memcpy(flow->local.ip, v6->ip, sizeof(flow->local.ip)); + flow->local.port = local_port; + } + + flow->state = TCP_SYN_SENT; + flow->retries = TCP_SYN_RETRIES; + + rng_t rng; + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&rng, virt_timer); + uint32_t iss = rng_next32(&rng); + + flow->ctx.sequence = iss; + flow->ctx.ack = 0; + + flow->rcv_nxt = 0; + flow->rcv_buf_used = 0; + flow->rcv_wnd_max = TCP_DEFAULT_RCV_BUF; + if (extra && (extra->flags & SOCK_OPT_BUF_SIZE) && extra->buf_size) flow->rcv_wnd_max = extra->buf_size; + flow->rcv_adv_edge = 0; + + flow->mss = tcp_calc_mss_for_l3(l3_id, dst->ver, dst->ip); + + if (flow->rcv_wnd_max > 65535u) { + flow->ws_send = 8; + flow->ws_recv = 0; + flow->ws_ok = 1; + } else { + flow->ws_send = 0; + flow->ws_recv = 0; + flow->ws_ok = 0; + } + flow->sack_ok = 1; + + (void)tcp_calc_adv_wnd_field(flow, 1); + + flow->ip_ttl = extra && (extra->flags & SOCK_OPT_TTL) ? extra->ttl : 0; + flow->ip_dontfrag = extra && (extra->flags & SOCK_OPT_DONTFRAG) ? 1 : 0; + flow->keepalive_on = extra && (extra->flags & SOCK_OPT_KEEPALIVE) ? 1 : 0; + flow->keepalive_ms = extra && (extra->flags & SOCK_OPT_KEEPALIVE) ? extra->keepalive_ms : 0; + flow->keepalive_idle_ms = 0; + + flow->ctx.options.ptr = 0; + flow->ctx.options.size = 0; + flow->ctx.payload.ptr = 0; + flow->ctx.payload.size = 0; + + flow->ctx.flags = (uint8_t)(1u << SYN_F); + flow->ctx.expected_ack = iss + 1; + flow->ctx.ack_received = 0; + + flow->snd_una = iss; + flow->snd_nxt = iss; + flow->snd_wnd = 0; + + flow->cwnd = flow->mss; + flow->ssthresh = TCP_RECV_WINDOW; + flow->dup_acks = 0; + flow->in_fast_recovery = 0; + flow->recover = 0; + flow->cwnd_acc = 0; + + flow->time_wait_ms = 0; + flow->fin_wait2_ms = 0; + + clear_reass(flow); + clear_txq(flow); + + tcp_tx_seg_t *seg = tcp_alloc_tx_seg(flow); + + if (!seg){ + tcp_free_flow(idx); + return false; + } + + seg->syn = 1; + seg->fin = 0; + seg->rtt_sample = 1; + seg->retransmit_cnt = 0; + seg->seq = flow->snd_nxt; + seg->len = 0; + seg->buf = 0; + seg->timer_ms = 0; + seg->timeout_ms = flow->rto ? flow->rto : TCP_INIT_RTO; + + tcp_hdr_t syn_hdr; + + syn_hdr.src_port = bswap16(local_port); + syn_hdr.dst_port = bswap16(dst->port); + syn_hdr.sequence = bswap32(flow->snd_nxt); + syn_hdr.ack = bswap32(0); + syn_hdr.flags = (uint8_t)(1u << SYN_F); + syn_hdr.window = flow->ctx.window; + syn_hdr.urgent_ptr = 0; + + uint8_t syn_opts[40]; + uint8_t syn_opts_len = tcp_build_syn_options(syn_opts, (uint16_t)flow->mss, flow->rcv_wnd_max > 65535u ? flow->ws_send : 0xffu, flow->sack_ok); + + if (dst->ver == IP_VER4){ + ipv4_tx_opts_t tx; + + tcp_build_tx_opts_from_l3(l3_id, &tx); + tcp_send_segment(IP_VER4, flow->local.ip, flow->remote.ip, &syn_hdr, syn_opts, syn_opts_len, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } else{ + ipv6_tx_opts_t tx; + + tx.scope = IP_TX_BOUND_L3; + tx.index = l3_id; + tcp_send_segment(IP_VER6, flow->local.ip, flow->remote.ip, &syn_hdr, syn_opts, syn_opts_len, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } + + flow->snd_nxt += 1; + flow->ctx.sequence = flow->snd_nxt; + flow->ctx.expected_ack = flow->snd_nxt; + + tcp_daemon_kick(); + + uint64_t waited = 0; + const uint64_t interval = 50; + const uint64_t max_wait = (uint64_t)TCP_MAX_RTO * (uint64_t)(TCP_SYN_RETRIES + 1); + + while (waited < max_wait){ + if (flow->state == TCP_ESTABLISHED){ + tcp_data *ctx = tcp_get_ctx(local_port, dst->ver, flow->local.ip, dst->ip, dst->port); + if (!ctx) return false; + + *flow_ctx = *ctx; + return true; + } + + if (flow->state == TCP_STATE_CLOSED){ + tcp_free_flow(idx); + return false; + } + + msleep(interval); + waited += interval; + } + + tcp_free_flow(idx); + return false; +} \ No newline at end of file diff --git a/kernel/networking/transport_layer/tcp/tcp_internal.h b/kernel/networking/transport_layer/tcp/tcp_internal.h new file mode 100644 index 00000000..b11bb212 --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_internal.h @@ -0,0 +1,138 @@ +#pragma once + +#include "../tcp.h" +#include "types.h" +#include "networking/port_manager.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "net/checksums.h" +#include "std/memory.h" +#include "math/rng.h" +#include "syscalls/syscalls.h" +#include "tcp_utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define TCP_REASS_MAX_SEGS 32 +#define TCP_DEFAULT_MSS 1460 +#define TCP_DEFAULT_RCV_BUF (256u * 1024u) +#define TCP_PERSIST_PROBE_BUFSZ 1 + +#define TCP_DELAYED_ACK_MS 200 +#define TCP_PERSIST_MIN_MS 500 +#define TCP_PERSIST_MAX_MS 60000 + +typedef struct { + uint8_t used; + uint8_t syn; + uint8_t fin; + uint8_t rtt_sample; + uint8_t retransmit_cnt; + uint32_t seq; + uint64_t len; + uintptr_t buf; + uint32_t timer_ms; + uint32_t timeout_ms; +} tcp_tx_seg_t; + +typedef struct { + uint32_t seq; + uint32_t end; + uintptr_t buf; +} tcp_reass_seg_t; + +typedef struct { + uint16_t local_port; + net_l4_endpoint local; + net_l4_endpoint remote; + uint8_t l3_id; + tcp_state_t state; + tcp_data ctx; + uint8_t retries; + uint32_t snd_wnd; + uint32_t snd_una; + uint32_t snd_nxt; + uint32_t srtt; + uint32_t rttvar; + uint32_t rto; + uint8_t rtt_valid; + uint32_t time_wait_ms; + uint32_t fin_wait2_ms; + + uint32_t rcv_nxt; + uint32_t rcv_buf_used; + uint32_t rcv_wnd; + uint32_t rcv_wnd_max; + uint32_t rcv_adv_edge; + + uint32_t cwnd; + uint32_t ssthresh; + uint32_t mss; + + uint8_t ws_send; + uint8_t ws_recv; + uint8_t ws_ok; + uint8_t sack_ok; + uint8_t dup_acks; + uint8_t in_fast_recovery; + uint32_t recover; + uint32_t cwnd_acc; + + uint8_t persist_active; + uint8_t persist_probe_cnt; + uint32_t persist_timer_ms; + uint32_t persist_timeout_ms; + + uint8_t delayed_ack_pending; + uint32_t delayed_ack_timer_ms; + + tcp_reass_seg_t reass[TCP_REASS_MAX_SEGS]; + uint8_t reass_count; + tcp_tx_seg_t txq[TCP_MAX_TX_SEGS]; + uint8_t fin_pending; + uint32_t fin_seq; + + uint8_t ip_ttl; + uint8_t ip_dontfrag; + uint8_t keepalive_on; + uint32_t keepalive_ms; + uint32_t keepalive_idle_ms; +} tcp_flow_t; + +extern tcp_flow_t *tcp_flows[MAX_TCP_FLOWS]; + +tcp_flow_t *tcp_alloc_flow(void); +void tcp_free_flow(int idx); + +void tcp_rtt_update(tcp_flow_t *flow, uint32_t sample_ms); + +tcp_tx_seg_t *tcp_alloc_tx_seg(tcp_flow_t *flow); +void tcp_send_from_seg(tcp_flow_t *flow, tcp_tx_seg_t *seg); +void tcp_send_ack_now(tcp_flow_t *flow); + +static inline uint16_t tcp_checksum_ipv4(const void *segment, uint16_t seg_len, uint32_t src_ip, uint32_t dst_ip) { + uint16_t csum = checksum16_pipv4(src_ip, dst_ip, 6, (const uint8_t *)segment, seg_len); + return bswap16(csum); +} +static inline uint16_t tcp_checksum_ipv6(const void *segment, uint16_t seg_len, const uint8_t src_ip[16], const uint8_t dst_ip[16]) { + uint16_t csum = checksum16_pipv6(src_ip, dst_ip, 6, (const uint8_t *)segment, seg_len); + return bswap16(csum); +} + +bool tcp_send_segment(ip_version_t ver, const void *src_ip_addr, const void *dst_ip_addr, tcp_hdr_t *hdr, const uint8_t *opts, uint8_t opts_len, const uint8_t *payload, uint16_t payload_len, const ip_tx_opts_t *txp, uint8_t ttl, uint8_t dontfrag); +void tcp_send_reset(ip_version_t ver, const void *src_ip_addr, const void *dst_ip_addr, uint16_t src_port, uint16_t dst_port, uint32_t seq, uint32_t ack, bool ack_valid); +tcp_tx_seg_t *tcp_find_first_unacked(tcp_flow_t *flow); +void tcp_cc_on_timeout(tcp_flow_t *f); + +int tcp_has_pending_timers(void); + +void tcp_daemon_kick(void); +uint16_t tcp_calc_adv_wnd_field(tcp_flow_t *flow, uint8_t apply_scale); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/transport_layer/tcp/tcp_rx.c b/kernel/networking/transport_layer/tcp/tcp_rx.c new file mode 100644 index 00000000..1d5deb66 --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_rx.c @@ -0,0 +1,922 @@ +#include "tcp_internal.h" +#include "networking/port_manager.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "std/memory.h" +#include "math/rng.h" +#include "syscalls/syscalls.h" +#include "../tcp.h" + +static void tcp_reass_evict_tail(tcp_flow_t *flow, uint32_t need) { + while (flow->reass_count && flow->rcv_buf_used + need > flow->rcv_wnd_max) { + int idx = 0; + uint32_t best = flow->reass[0].seq; + for (int i = 1; i < flow->reass_count; i++) { + if (flow->reass[i].seq > best) { + best = flow->reass[i].seq; + idx = i; + } + } + + tcp_reass_seg_t *r = &flow->reass[idx]; + uint32_t olen = r->end - r->seq; + if (r->buf && olen) free_sized((void *)r->buf, olen); + if (flow->rcv_buf_used >= olen) flow->rcv_buf_used -= olen; + else flow->rcv_buf_used = 0; + + flow->reass[idx] = flow->reass[flow->reass_count - 1]; + flow->reass[flow->reass_count - 1].seq = 0; + flow->reass[flow->reass_count - 1].end = 0; + flow->reass[flow->reass_count - 1].buf = 0; + flow->reass_count--; + } +} + +static void tcp_reass_insert(tcp_flow_t *flow, uint32_t seq, const uint8_t *data, uint32_t len) { + if (!len) return; + if (flow->reass_count >= TCP_REASS_MAX_SEGS) return; + if (seq < flow->rcv_nxt) { + uint32_t d = flow->rcv_nxt - seq; + if (d >= len) return; + seq += d; + data += d; + len -= d; + } + + uint32_t wnd_end = flow->rcv_nxt + flow->rcv_wnd; + if (seq >= wnd_end) return; + if (seq + len > wnd_end) len = wnd_end - seq; + if (!len) return; + + if (flow->rcv_buf_used + len > flow->rcv_wnd_max) tcp_reass_evict_tail(flow, len); + if (flow->rcv_buf_used + len > flow->rcv_wnd_max) return; + + uint32_t orig_seq = seq; + uint32_t start = seq; + uint32_t end = seq + len; + + for(;;){ + int changed = 0; + + for (int i = 0; i < flow->reass_count; i++){ + tcp_reass_seg_t *r = &flow->reass[i]; + uint32_t rs = r->seq; + uint32_t re = r->end; + + if (end <= rs || start >= re) continue; + if (start >= rs && end <= re) return; + + if (start <= rs && end >= re) { + uint32_t olen = re - rs; + + if (r->buf && olen) free_sized((void *)r->buf, olen); + + flow->reass[i] = flow->reass[flow->reass_count - 1]; + flow->reass[flow->reass_count - 1].seq = 0; + flow->reass[flow->reass_count - 1].end = 0; + flow->reass[flow->reass_count - 1].buf = 0; + flow->reass_count--; + + flow->rcv_buf_used -= olen; + changed = 1; + break; + } + + if (start < rs && end > rs && end <= re) { + end = rs; + len = end - start; + changed = 1; + break; + } + + if (start >= rs && start < re && end > re){ + start = re; + len = end - start; + changed = 1; + break; + } + } + + if (!changed) break; + if (!len) return; + } + + if (!len) return; + if (flow->reass_count >= TCP_REASS_MAX_SEGS) return; + if (flow->rcv_buf_used + len > flow->rcv_wnd_max) tcp_reass_evict_tail(flow, len); + if (flow->rcv_buf_used + len > flow->rcv_wnd_max) return; + + uintptr_t buf = (uintptr_t)malloc(len); + if (!buf) return; + + uint32_t offset = start - orig_seq; + memcpy((void *)buf, data + offset, len); + + int pos = flow->reass_count; + while (pos > 0 && flow->reass[pos - 1].seq > start){ + flow->reass[pos] = flow->reass[pos - 1]; + pos--; + } + + flow->reass[pos].seq = start; + flow->reass[pos].end = start + len; + flow->reass[pos].buf = buf; + flow->reass_count++; + + flow->rcv_buf_used += len; + + (void)tcp_calc_adv_wnd_field(flow, 1); +} + +static void tcp_reass_drain_inseq(tcp_flow_t *flow, port_manager_t *pm, uint8_t ifx, ip_version_t ipver, const void *src_ip_addr, const void *dst_ip_addr, uint16_t src_port, uint16_t dst_port) { + uint32_t rcv_nxt = flow->rcv_nxt; + + for(;;){ + int idx = -1; + + for (int i = 0; i < flow->reass_count; i++){ + if (flow->reass[i].seq != rcv_nxt) continue; + idx = i; + break; + } + + if (idx < 0) break; + + tcp_reass_seg_t *seg = &flow->reass[idx]; + uint32_t seg_len = seg->end - seg->seq; + + if (!seg_len) { + flow->reass[idx] = flow->reass[flow->reass_count - 1]; + flow->reass[flow->reass_count - 1].seq = 0; + flow->reass[flow->reass_count - 1].end = 0; + flow->reass[flow->reass_count - 1].buf = 0; + flow->reass_count--; + continue; + } + + if (pm) { + port_recv_handler_t h = port_get_handler(pm, PROTO_TCP, dst_port); + uint32_t accepted = seg_len; + + if (h) accepted = h(ifx, ipver, src_ip_addr, dst_ip_addr, seg->buf, seg_len, src_port, dst_port); + if (accepted > seg_len) accepted = seg_len; + + if (accepted == 0) { + if (flow->state == TCP_FIN_WAIT_1 || flow->state == TCP_FIN_WAIT_2 || flow->state == TCP_CLOSING || flow->state == TCP_LAST_ACK || flow->state == TCP_TIME_WAIT) { + if (seg->buf) free_sized((void *)seg->buf, seg_len); + + if (flow->rcv_buf_used >= seg_len) flow->rcv_buf_used -= seg_len; + else flow->rcv_buf_used = 0; + + rcv_nxt += seg_len; + + flow->reass[idx] = flow->reass[flow->reass_count - 1]; + flow->reass[flow->reass_count - 1].seq = 0; + flow->reass[flow->reass_count - 1].end = 0; + flow->reass[flow->reass_count - 1].buf = 0; + flow->reass_count--; + continue; + } + + break; + } + + if (accepted < seg_len) { + uint32_t rem = seg_len - accepted; + uintptr_t newbuf = (uintptr_t)malloc(rem); + if (!newbuf) break; + + memcpy((void *)newbuf, ((const uint8_t *)seg->buf) + accepted, rem); + if (seg->buf) free_sized((void *)seg->buf, seg_len); + + seg->buf = newbuf; + seg->seq += accepted; + + if (flow->rcv_buf_used >= accepted) flow->rcv_buf_used -= accepted; + else flow->rcv_buf_used = 0; + + rcv_nxt += accepted; + + flow->rcv_nxt = rcv_nxt; + flow->ctx.ack = rcv_nxt; + + (void)tcp_calc_adv_wnd_field(flow, 1); + continue; + } + } + + rcv_nxt += seg_len; + + if (seg->buf) free_sized((void *)seg->buf, seg_len); + if (flow->rcv_buf_used >= seg_len) flow->rcv_buf_used -= seg_len; + else flow->rcv_buf_used = 0; + + flow->reass[idx] = flow->reass[flow->reass_count - 1]; + flow->reass[flow->reass_count - 1].seq = 0; + flow->reass[flow->reass_count - 1].end = 0; + flow->reass[flow->reass_count - 1].buf = 0; + flow->reass_count--; + } + + flow->rcv_nxt = rcv_nxt; + flow->ctx.ack = rcv_nxt; + + (void)tcp_calc_adv_wnd_field(flow, 1); +} + +tcp_tx_seg_t *tcp_find_first_unacked(tcp_flow_t *flow) { + tcp_tx_seg_t *best = NULL; + uint32_t best_seq = 0; + + for (int i = 0; i < TCP_MAX_TX_SEGS; i++){ + tcp_tx_seg_t *s = &flow->txq[i]; + + if (!s->used) continue; + + uint32_t end = s->seq + s->len + (s->syn ? 1u : 0u) + (s->fin ? 1u : 0u); + if (end <= flow->snd_una) continue; + + if (!best || s->seq < best_seq){ + best = s; + best_seq = s->seq; + } + } + + return best; +} + +void tcp_cc_on_timeout(tcp_flow_t *f){ + uint32_t mss = f->mss ? f->mss : TCP_DEFAULT_MSS; + uint32_t flight = f->snd_nxt > f->snd_una ? f->snd_nxt - f->snd_una : 0; + uint32_t half = flight / 2; + uint32_t minth = 2u * mss; + + if (half < minth) half = minth; + + f->ssthresh = half; + f->cwnd = mss; + f->cwnd_acc = 0; + f->dup_acks = 0; + f->in_fast_recovery = 0; + f->recover = 0; +} + +static void tcp_cc_on_new_ack(tcp_flow_t *f, uint32_t ack) { + uint32_t mss = f->mss ? f->mss : TCP_DEFAULT_MSS; + + if (f->in_fast_recovery){ + if (ack >= f->recover){ + f->cwnd = f->ssthresh; + if (f->cwnd < mss) f->cwnd = mss; + + f->in_fast_recovery = 0; + f->dup_acks = 0; + f->cwnd_acc = 0; + return; + } + + f->cwnd = f->ssthresh; + if (f->cwnd < mss) f->cwnd = mss; + return; + } + + if (f->cwnd < f->ssthresh){ + f->cwnd += mss; + if (f->cwnd < mss) f->cwnd = mss; + return; + } + + uint32_t denom = f->cwnd ? f->cwnd : 1u; + uint32_t inc = (mss * mss) / denom; + + if (inc == 0) inc = 1; + + f->cwnd += inc; +} + +static void tcp_cc_on_dupack(tcp_flow_t *f) { + uint32_t mss = f->mss ? f->mss : TCP_DEFAULT_MSS; + + if (f->in_fast_recovery){ + f->cwnd += mss; + return; + } + + if (f->dup_acks != 3) return; + + uint32_t flight = f->snd_nxt - f->snd_una; + uint32_t half = flight / 2; + uint32_t minth = 2u * mss; + + if (half < minth) half = minth; + + f->ssthresh = half; + f->recover = f->snd_nxt; + f->cwnd = f->ssthresh + 3u * mss; + f->in_fast_recovery = 1; + + tcp_tx_seg_t *s = tcp_find_first_unacked(f); + if (s) { + tcp_send_from_seg(f, s); + s->retransmit_cnt++; + s->timer_ms = 0; + } +} + +void tcp_input(ip_version_t ipver, const void *src_ip_addr, const void *dst_ip_addr, uint8_t l3_id, uintptr_t ptr, uint32_t len) { + if (len < sizeof(tcp_hdr_t)) return; + + tcp_hdr_t *hdr = (tcp_hdr_t *)ptr; + + uint16_t recv_checksum = hdr->checksum; + hdr->checksum = 0; + + uint16_t calc; + + if (ipver == IP_VER4) calc = tcp_checksum_ipv4(hdr, (uint16_t)len, *(const uint32_t *)src_ip_addr, *(const uint32_t *)dst_ip_addr); + else calc = tcp_checksum_ipv6(hdr, (uint16_t)len, (const uint8_t *)src_ip_addr, (const uint8_t *)dst_ip_addr); + + hdr->checksum = recv_checksum; + if (recv_checksum != calc) return; + + uint16_t src_port = bswap16(hdr->src_port); + uint16_t dst_port = bswap16(hdr->dst_port); + uint32_t seq = bswap32(hdr->sequence); + uint32_t ack = bswap32(hdr->ack); + uint8_t flags = hdr->flags; + uint16_t window = bswap16(hdr->window); + + uint8_t hdr_len = (uint8_t)((hdr->data_offset_reserved >> 4) * 4); + if (len < hdr_len) return; + + uint32_t data_len = len - hdr_len; + + int idx = find_flow(dst_port, ipver, dst_ip_addr, src_ip_addr, src_port); + tcp_flow_t *flow = idx >= 0 ? tcp_flows[idx] : NULL; + if (flow) flow->keepalive_idle_ms = 0; + if (flow) flow->l3_id = l3_id; + + port_manager_t *pm = NULL; + uint8_t ifx = 0; + + if (ipver == IP_VER4) { + l3_ipv4_interface_t *v4 = l3_ipv4_find_by_id(l3_id); + if (!v4 || !v4->l2) return; + pm = ifmgr_pm_v4(l3_id); + ifx = v4->l2->ifindex; + } else { + l3_ipv6_interface_t *v6 = l3_ipv6_find_by_id(l3_id); + if (!v6 || !v6->l2) return; + pm = ifmgr_pm_v6(l3_id); + ifx = v6->l2->ifindex; + } + + if (!pm) return; + + if (!flow){ + int listen_idx = find_flow(dst_port, ipver, dst_ip_addr, NULL, 0); + if (listen_idx < 0) + listen_idx = find_flow(dst_port, ipver, NULL, NULL, 0); + + if ((flags & (1u << SYN_F)) && !(flags & (1u << ACK_F)) && listen_idx >= 0){ + rng_t rng; + uint64_t virt_timer; + asm volatile ("mrs %0, cntvct_el0" : "=r"(virt_timer)); + rng_seed(&rng, virt_timer); + + int syn_total = 0; + int syn_port = 0; + for (int k = 0; k < MAX_TCP_FLOWS; k++){ + tcp_flow_t *f = tcp_flows[k]; + if (!f) continue; + if (f->state != TCP_SYN_RECEIVED) continue; + syn_total++; + if (f->local_port == dst_port && f->l3_id == l3_id) syn_port++; + } + if (syn_total >= (MAX_TCP_FLOWS / 4) || syn_port >= 32) return; + + tcp_flow_t *lf = tcp_flows[listen_idx]; + tcp_flow_t *nf = tcp_alloc_flow(); + if (!nf) return; + + flow = nf; + for (int k = 0; k < MAX_TCP_FLOWS; k++) { + if (tcp_flows[k] == nf) { + idx = k; + break; + } + } + + flow->local_port = dst_port; + flow->l3_id = l3_id; + + flow->remote.ver = ipver; + memset(flow->remote.ip, 0, 16); + memcpy(flow->remote.ip, src_ip_addr, (uint64_t)(ipver == IP_VER6 ? 16 : 4)); + flow->remote.port = src_port; + + flow->local.ver = ipver; + memset(flow->local.ip, 0, 16); + memcpy(flow->local.ip, dst_ip_addr, (uint64_t)(ipver == IP_VER6 ? 16 : 4)); + flow->local.port = dst_port; + + flow->state = TCP_SYN_RECEIVED; + flow->retries = TCP_SYN_RETRIES; + + tcp_parsed_opts_t pop; + tcp_parse_options((const uint8_t *)(ptr + sizeof(tcp_hdr_t)), (uint32_t)(hdr_len > sizeof(tcp_hdr_t) ? hdr_len - sizeof(tcp_hdr_t) : 0), &pop); + + flow->ws_send = lf->ws_send; + flow->ws_recv = 0; + flow->ws_ok = (lf->ws_ok && pop.has_wscale) ? 1 : 0; + if (flow->ws_ok) { + flow->ws_recv = pop.wscale; + if (flow->ws_recv > 14) flow->ws_recv = 14; + } + else { + flow->ws_send = 0; + flow->ws_recv = 0; + } + + flow->sack_ok = (lf->sack_ok && pop.sack_permitted) ? 1 : 0; + + if (pop.has_mss && pop.mss){ + uint32_t m = pop.mss; + uint32_t minm = ipver == IP_VER6 ? 1220u : 536u; + uint32_t maxm = tcp_calc_mss_for_l3(l3_id, ipver, src_ip_addr); + if (m < minm) m = minm; + if (m > maxm) m = maxm; + flow->mss = m; + } else flow->mss = tcp_calc_mss_for_l3(l3_id, ipver, src_ip_addr); + flow->ctx.flags = 0; + flow->ctx.options = lf->ctx.options; + flow->ctx.payload.ptr = 0; + flow->ctx.payload.size = 0; + + uint32_t iss = rng_next32(&rng); + + flow->ctx.sequence = iss; + flow->snd_una = iss; + flow->snd_nxt = iss; + + flow->ctx.ack = seq + 1; + flow->rcv_nxt = seq + 1; + + flow->ctx.expected_ack = iss + 1; + flow->ctx.ack_received = 0; + uint32_t new_wnd = window; + if (flow->ws_ok && flow->ws_recv) new_wnd <<= flow->ws_recv; + flow->snd_wnd = new_wnd; + + flow->persist_active = 0; + flow->persist_timer_ms = 0; + flow->persist_timeout_ms = 0; + + flow->delayed_ack_pending = 0; + flow->delayed_ack_timer_ms = 0; + + flow->rcv_wnd_max = lf->rcv_wnd_max; + flow->rcv_buf_used = 0; + uint16_t synack_wnd = tcp_calc_adv_wnd_field(flow, flow->ws_ok ? 1 : 0); + + flow->ip_ttl = lf->ip_ttl; + flow->ip_dontfrag = lf->ip_dontfrag; + flow->keepalive_on = lf->keepalive_on; + flow->keepalive_ms = lf->keepalive_ms; + flow->keepalive_idle_ms = 0; + + flow->cwnd = flow->mss; + flow->ssthresh = TCP_RECV_WINDOW; + flow->dup_acks = 0; + flow->in_fast_recovery = 0; + flow->recover = 0; + flow->cwnd_acc = 0; + + flow->time_wait_ms = 0; + flow->fin_wait2_ms = 0; + + tcp_hdr_t synack_hdr; + synack_hdr.src_port = bswap16(dst_port); + synack_hdr.dst_port = bswap16(src_port); + synack_hdr.sequence = bswap32(iss); + synack_hdr.ack = bswap32(seq + 1); + synack_hdr.flags = (uint8_t)((1u << SYN_F) | (1u << ACK_F)); + synack_hdr.window = synack_wnd; + synack_hdr.urgent_ptr = 0; + + uint8_t syn_opts[40]; + uint8_t syn_opts_len = tcp_build_syn_options(syn_opts, (uint16_t)flow->mss, flow->ws_ok ? flow->ws_send : 0xffu, flow->sack_ok); + + if (ipver == IP_VER4) { + ipv4_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L3; + tx.index = l3_id; + tcp_send_segment(IP_VER4, flow->local.ip, src_ip_addr, &synack_hdr, syn_opts, syn_opts_len, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } else { + ipv6_tx_opts_t tx; + tx.scope = IP_TX_BOUND_L3; + tx.index = l3_id; + tcp_send_segment(IP_VER6, flow->local.ip, src_ip_addr, &synack_hdr, syn_opts, syn_opts_len, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } + + tcp_daemon_kick(); + return; + } + + if (!(flags & (1u << RST_F))){ + if (flags & (1u << ACK_F)){ + tcp_send_reset(ipver, dst_ip_addr, src_ip_addr, dst_port, src_port, ack, 0, false); + } else { + uint32_t seg_len = data_len; + + if (flags & (1u << SYN_F)) seg_len++; + if (flags & (1u << FIN_F)) seg_len++; + + tcp_send_reset(ipver, dst_ip_addr, src_ip_addr, dst_port, src_port, seq, seq + seg_len, true); + } + } + + return; + } + + if (flow->state == TCP_TIME_WAIT){ + if (flags & (1u << RST_F)) return; + + uint32_t seg_len = data_len; + + if (flags & (1u << SYN_F)) seg_len++; + if (flags & (1u << FIN_F)) seg_len++; + + uint32_t seg_end = seq + seg_len; + + if (seq <= flow->rcv_nxt && seg_end >= flow->rcv_nxt){ + flow->time_wait_ms = 0; + tcp_send_ack_now(flow); + } + + return; + } + uint32_t new_wnd = window; + if (flow->ws_ok && flow->ws_recv) new_wnd <<= flow->ws_recv; + flow->snd_wnd = new_wnd; + + if (flow->snd_wnd > 0){ + flow->persist_active = 0; + flow->persist_timer_ms = 0; + flow->persist_timeout_ms = 0; + flow->persist_probe_cnt = 0; + } else { + tcp_daemon_kick(); + } + + uint8_t fin = (flags & (1u << FIN_F)) ? 1u : 0u; + + if (flags & (1u << ACK_F)){ + if (ack > flow->snd_una && ack <= flow->snd_nxt){ + uint32_t prev_una = flow->snd_una; + + flow->snd_una = ack; + flow->ctx.ack_received = ack; + flow->dup_acks = 0; + + for (int i = 0; i < TCP_MAX_TX_SEGS; i++){ + tcp_tx_seg_t *s = &flow->txq[i]; + if (!s->used) continue; + + uint32_t s_end = s->seq + s->len + (s->syn ? 1u : 0u) + (s->fin ? 1u : 0u); + + if (s_end <= ack){ + if (s->rtt_sample && s->retransmit_cnt == 0) tcp_rtt_update(flow, s->timer_ms); + + if (s->buf && s->len) free_sized((void *)s->buf, s->len); + + s->used = 0; + s->buf = 0; + s->len = 0; + } + } + + if (ack > prev_una) tcp_cc_on_new_ack(flow, ack); + + if (flow->state == TCP_FIN_WAIT_1 && ack >= flow->ctx.expected_ack){ + flow->state = TCP_FIN_WAIT_2; + flow->fin_wait2_ms = 0; + tcp_daemon_kick(); + } else if ((flow->state == TCP_LAST_ACK || flow->state == TCP_CLOSING) && ack >= flow->ctx.expected_ack){ + tcp_free_flow(idx); + return; + } + } else if (ack == flow->snd_una && data_len == 0 && !fin){ + if (flow->dup_acks < UINT8_MAX) flow->dup_acks++; + tcp_cc_on_dupack(flow); + } else { + flow->dup_acks = 0; + } + } + + uint32_t seg_seq = seq; + + switch (flow->state){ + case TCP_SYN_SENT: + if ((flags & (1u << SYN_F)) && (flags & (1u << ACK_F)) && ack == flow->ctx.expected_ack){ + flow->ctx.ack = seq + 1; + flow->rcv_nxt = seq + 1; + flow->ctx.ack_received = ack; + flow->snd_una = ack; + flow->snd_nxt = flow->ctx.sequence; + flow->ctx.sequence = flow->snd_nxt; + flow->ctx.flags = 0; + + tcp_parsed_opts_t pop; + tcp_parse_options((const uint8_t *)(ptr + sizeof(tcp_hdr_t)), (uint32_t)(hdr_len > sizeof(tcp_hdr_t) ? hdr_len - sizeof(tcp_hdr_t) : 0), &pop); + + flow->ws_recv = pop.has_wscale ? pop.wscale : 0; + if (flow->ws_recv > 14) flow->ws_recv = 14; + flow->ws_ok = (flow->ws_send != 0) && pop.has_wscale ? 1 : 0; + if (!flow->ws_ok) { + flow->ws_send = 0; + flow->ws_recv = 0; + } + + flow->sack_ok = pop.sack_permitted ? 1 : 0; + + if (pop.has_mss && pop.mss){ + uint32_t m = pop.mss; + uint32_t minm = ipver == IP_VER6 ? 1220u : 536u; + uint32_t maxm = tcp_calc_mss_for_l3(l3_id, ipver, src_ip_addr); + if (m < minm) m = minm; + if (m > maxm) m = maxm; + flow->mss = m; + } else { + flow->mss = tcp_calc_mss_for_l3(l3_id, ipver, src_ip_addr); + } + + uint32_t new_wnd = window; + if (flow->ws_ok && flow->ws_recv) new_wnd <<= flow->ws_recv; + flow->snd_wnd = new_wnd; + + (void)tcp_calc_adv_wnd_field(flow, 1); + + tcp_hdr_t final_ack; + final_ack.src_port = bswap16(flow->local_port); + final_ack.dst_port = bswap16(flow->remote.port); + final_ack.sequence = bswap32(flow->ctx.sequence); + final_ack.ack = bswap32(flow->ctx.ack); + final_ack.flags = (uint8_t)(1u << ACK_F); + final_ack.window = flow->ctx.window; + final_ack.urgent_ptr = 0; + + if (flow->local.ver == IP_VER4) { + ipv4_tx_opts_t tx; + tcp_build_tx_opts_from_local_v4(flow->local.ip, &tx); + tcp_send_segment(IP_VER4, flow->local.ip, flow->remote.ip, &final_ack, NULL, 0, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } else { + ipv6_tx_opts_t tx; + tcp_build_tx_opts_from_local_v6(flow->local.ip, &tx); + tcp_send_segment(IP_VER6, flow->local.ip, flow->remote.ip, &final_ack, NULL, 0, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } + + flow->state = TCP_ESTABLISHED; + flow->delayed_ack_pending = 0; + flow->delayed_ack_timer_ms = 0; + tcp_daemon_kick(); + } else if (flags & (1u << RST_F)){ + flow->state = TCP_STATE_CLOSED; + } + + return; + + case TCP_SYN_RECEIVED: + if ((flags & (1u << ACK_F)) && !(flags & (1u << SYN_F)) && !(flags & (1u << RST_F)) && ack == flow->ctx.expected_ack){ + flow->ctx.sequence += 1; + flow->snd_una = ack; + flow->snd_nxt = flow->ctx.sequence; + flow->state = TCP_ESTABLISHED; + flow->delayed_ack_pending = 0; + flow->delayed_ack_timer_ms = 0; + flow->ctx.ack_received = ack; + + port_recv_handler_t h = port_get_handler(pm, PROTO_TCP, dst_port); + if (h) (void)h(ifx, ipver, src_ip_addr, dst_ip_addr, 0, 0, src_port, dst_port); + + tcp_daemon_kick(); + } else if (flags & (1u << RST_F)){ + tcp_free_flow(idx); + } + + return; + + default: + break; + } + + if (flags & (1u << RST_F)) { + tcp_free_flow(idx); + return; + } + + int need_ack = 0; + int ack_immediate = 0; + int ack_defer = 0; + + if (data_len || fin) { + uint32_t rcv_nxt = flow->rcv_nxt; + uint32_t wnd_end = rcv_nxt + flow->rcv_wnd; + + uint32_t orig_data_len = data_len; + uint8_t fin_in = fin; + uint32_t fin_seq = seg_seq + orig_data_len; + uint32_t orig_end = seg_seq + orig_data_len + (fin ? 1u : 0u); + + if (orig_end <= rcv_nxt || seg_seq >= wnd_end) { + need_ack = 1; + ack_immediate = 1; + } else { + if (fin_in) { + if (fin_seq < rcv_nxt || fin_seq >= wnd_end) fin_in = 0; + } + + const uint8_t *payload = (const uint8_t *)(ptr + hdr_len); + + if (seg_seq < rcv_nxt) { + uint32_t d = rcv_nxt - seg_seq; + if (d >= data_len) { + payload += data_len; + data_len = 0; + seg_seq = rcv_nxt; + } else { + payload += d; + data_len -= d; + seg_seq = rcv_nxt; + } + } + + if (data_len) { + if (seg_seq >= wnd_end) data_len = 0; + else if (seg_seq + data_len > wnd_end) data_len = wnd_end - seg_seq; + } + + if (!data_len && !fin_in){ + need_ack = 1; + ack_immediate = 1; + } else if (seg_seq == flow->rcv_nxt) { + if (data_len){ + uint32_t free_space = (flow->rcv_buf_used < flow->rcv_wnd_max) ? (flow->rcv_wnd_max - flow->rcv_buf_used) : 0; + + port_recv_handler_t h = port_get_handler(pm, PROTO_TCP, dst_port); + + uint32_t offer = data_len; + if (offer > free_space) offer = free_space; + + uint32_t accepted = 0; + if (offer && h) accepted = h(ifx, ipver, src_ip_addr, dst_ip_addr, (uintptr_t)payload, offer, src_port, dst_port); + if (accepted > offer) accepted = offer; + + if (h && accepted == 0 && data_len) { + (void)tcp_calc_adv_wnd_field(flow, 1); + need_ack = 1; + ack_immediate = 1; + } + + if (!accepted && offer && (flow->state == TCP_FIN_WAIT_1 || flow->state == TCP_FIN_WAIT_2 || flow->state == TCP_CLOSING || flow->state == TCP_LAST_ACK || flow->state == TCP_TIME_WAIT)) { + flow->rcv_nxt += offer; + flow->ctx.ack = flow->rcv_nxt; + accepted = offer; + } else if (accepted) { + flow->rcv_nxt += accepted; + flow->ctx.ack = flow->rcv_nxt; + flow->rcv_buf_used += accepted; + } + if (accepted < data_len) { + ack_immediate = 1; + } + } + + if (fin_in) { + if (flow->rcv_nxt == fin_seq) { + flow->rcv_nxt += 1; + flow->ctx.ack = flow->rcv_nxt; + + tcp_state_t old = flow->state; + + if (old == TCP_ESTABLISHED) flow->state = TCP_CLOSE_WAIT; + else if (old == TCP_FIN_WAIT_1) flow->state = TCP_CLOSING; + else if (old == TCP_FIN_WAIT_2 || old == TCP_CLOSING || old == TCP_LAST_ACK) { + flow->state = TCP_TIME_WAIT; + flow->time_wait_ms = 0; + tcp_daemon_kick(); + } + + ack_immediate = 1; + } else { + flow->fin_pending = 1; + flow->fin_seq = fin_seq; + } + } + + tcp_reass_drain_inseq(flow, pm, ifx, ipver, src_ip_addr, dst_ip_addr, src_port, dst_port); + + if (flow->fin_pending && flow->fin_seq == flow->rcv_nxt){ + flow->fin_pending = 0; + flow->rcv_nxt += 1; + flow->ctx.ack = flow->rcv_nxt; + + tcp_state_t old = flow->state; + + if (old == TCP_ESTABLISHED) flow->state = TCP_CLOSE_WAIT; + else if (old == TCP_FIN_WAIT_1) flow->state = TCP_CLOSING; + else if (old == TCP_FIN_WAIT_2 || old == TCP_CLOSING || old == TCP_LAST_ACK) { + flow->state = TCP_TIME_WAIT; + flow->time_wait_ms = 0; + tcp_daemon_kick(); + } + + ack_immediate = 1; + } + + (void)tcp_calc_adv_wnd_field(flow, 1); + + if (!ack_immediate && data_len) ack_defer = 1; + need_ack = 1; + } else { + if (!(flow->state == TCP_FIN_WAIT_1 || flow->state == TCP_FIN_WAIT_2 || flow->state == TCP_CLOSING || flow->state == TCP_LAST_ACK || flow->state == TCP_TIME_WAIT) && data_len) tcp_reass_insert(flow, seg_seq, payload, data_len); + + if (fin_in){ + flow->fin_pending = 1; + flow->fin_seq = fin_seq; + } + + need_ack = 1; + ack_immediate = 1; + } + } + } + + if (need_ack){ + if (ack_immediate){ + tcp_send_ack_now(flow); + } else if (ack_defer){ + if (!flow->delayed_ack_pending){ + flow->delayed_ack_pending = 1; + flow->delayed_ack_timer_ms = 0; + tcp_daemon_kick(); + } else { + tcp_send_ack_now(flow); + } + } else { + if (!flow->delayed_ack_pending){ + flow->delayed_ack_pending = 1; + flow->delayed_ack_timer_ms = 0; + tcp_daemon_kick(); + } else { + tcp_send_ack_now(flow); + } + } + } +} + +void tcp_flow_on_app_read(tcp_data *flow_ctx, uint32_t bytes_read){ + if (!flow_ctx || bytes_read == 0) return; + + tcp_flow_t *flow = NULL; + for (int i = 0; i < MAX_TCP_FLOWS; ++i) { + tcp_flow_t *f = tcp_flows[i]; + if (!f) continue; + if (&f->ctx == flow_ctx) { + flow = f; + break; + } + } + if (!flow) return; + + if (bytes_read > flow->rcv_buf_used) bytes_read = flow->rcv_buf_used; + flow->rcv_buf_used -= bytes_read; + + port_manager_t *pm = NULL; + uint8_t ifx = 0; + + if (flow->local.ver == IP_VER4) { + l3_ipv4_interface_t *v4 = l3_ipv4_find_by_id(flow->l3_id); + if (v4 && v4->l2) { + pm = ifmgr_pm_v4(flow->l3_id); + ifx = v4->l2->ifindex; + } + } else if (flow->local.ver == IP_VER6) { + l3_ipv6_interface_t *v6 = l3_ipv6_find_by_id(flow->l3_id); + if (v6 && v6->l2) { + pm = ifmgr_pm_v6(flow->l3_id); + ifx = v6->l2->ifindex; + } + } + + if (pm) { + tcp_reass_drain_inseq(flow, pm, ifx, flow->local.ver, flow->remote.ip, flow->local.ip, flow->remote.port, flow->local_port); + } + + if (flow->state != TCP_STATE_CLOSED && flow->state != TCP_TIME_WAIT) { + (void)tcp_calc_adv_wnd_field(flow, 1); + tcp_send_ack_now(flow); + } +} diff --git a/kernel/networking/transport_layer/tcp/tcp_timer.c b/kernel/networking/transport_layer/tcp/tcp_timer.c new file mode 100644 index 00000000..b3a92597 --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_timer.c @@ -0,0 +1,236 @@ +#include "tcp_internal.h" +#include "kernel_processes/kprocess_loader.h" +#include "exceptions/irq.h" + +static volatile int tcp_daemon_running = 0; +//TODO make tcp_daemon_running atomic or use a lock, this may end in a double deamon process +void tcp_daemon_kick(void) { + if(!tcp_has_pending_timers()) return; + + disable_interrupt(); + if(tcp_daemon_running){ + enable_interrupt(); + return; + } + tcp_daemon_running = 1; + enable_interrupt(); + + process_t *p = create_kernel_process("tcp_timer", tcp_daemon_entry, 0, 0); + if(!p){ + disable_interrupt(); + tcp_daemon_running = 0; + enable_interrupt(); + } +} + +int tcp_has_pending_timers(void) { //TODO mhh this should be event driven to avoid MAX_TCP_FLOWS*TCP_MAX_TX_SEGS scans. + + + for (int i = 0; i < MAX_TCP_FLOWS; i++) { + tcp_flow_t *f = tcp_flows[i]; + if (!f) continue; + if (f->state == TCP_STATE_CLOSED) continue; + + if (f->state == TCP_TIME_WAIT) return 1; + if (f->state == TCP_FIN_WAIT_2) return 1; + if (f->delayed_ack_pending) return 1; + if (f->persist_active) return 1; + if (f->keepalive_on && f->state == TCP_ESTABLISHED && f->keepalive_ms) return 1; + + for (int j = 0; j < TCP_MAX_TX_SEGS; j++) { + tcp_tx_seg_t *s = &f->txq[j]; + if (!s->used) continue; + uint32_t end = s->seq + s->len + (s->syn ? 1u : 0u) + (s->fin ? 1u : 0u); + if (end > f->snd_una) return 1; + } + } + + return 0; +} + +void tcp_tick_all(uint32_t elapsed_ms) { + for (int i = 0; i < MAX_TCP_FLOWS; i++) { + tcp_flow_t *f = tcp_flows[i]; + if (!f) continue; + if (f->state == TCP_STATE_CLOSED) continue; + + if (f->state == TCP_TIME_WAIT) { + f->time_wait_ms += elapsed_ms; + if (f->time_wait_ms >= TCP_2MSL_MS) { + tcp_free_flow(i); + continue; + } + } + + if (f->state == TCP_FIN_WAIT_2) { + f->fin_wait2_ms += elapsed_ms; + if (f->fin_wait2_ms >= TCP_2MSL_MS) { + tcp_free_flow(i); + continue; + } + } + + if (f->delayed_ack_pending) { + f->delayed_ack_timer_ms += elapsed_ms; + if (f->delayed_ack_timer_ms >= TCP_DELAYED_ACK_MS) tcp_send_ack_now(f); + } + + if (f->keepalive_on && f->state == TCP_ESTABLISHED && f->keepalive_ms) { + f->keepalive_idle_ms += elapsed_ms; + if (f->keepalive_idle_ms >= f->keepalive_ms) { + tcp_hdr_t hdr; + hdr.src_port = bswap16(f->local_port); + hdr.dst_port = bswap16(f->remote.port); + uint32_t seq = f->snd_nxt; + if (seq) seq -= 1; + hdr.sequence = bswap32(seq); + hdr.ack = bswap32(f->ctx.ack); + hdr.flags = (uint8_t)(1u << ACK_F); + hdr.window = tcp_calc_adv_wnd_field(f, 1); + hdr.urgent_ptr = 0; + + if (f->local.ver == IP_VER4) { + ipv4_tx_opts_t tx; + tcp_build_tx_opts_from_local_v4(f->local.ip, &tx); + (void)tcp_send_segment(IP_VER4, f->local.ip, f->remote.ip, &hdr, NULL, 0, NULL, 0, (const ip_tx_opts_t *)&tx, f->ip_ttl, f->ip_dontfrag); + } else if (f->local.ver == IP_VER6) { + ipv6_tx_opts_t tx; + tcp_build_tx_opts_from_local_v6(f->local.ip, &tx); + (void)tcp_send_segment(IP_VER6, f->local.ip, f->remote.ip, &hdr, NULL, 0, NULL, 0, (const ip_tx_opts_t *)&tx, f->ip_ttl, f->ip_dontfrag); + } + f->keepalive_idle_ms = 0; + } + } + + if (f->snd_wnd == 0 && f->snd_nxt > f->snd_una) { + if (!f->persist_active) { + f->persist_active = 1; + f->persist_timer_ms = 0; + f->persist_probe_cnt = 0; + f->persist_timeout_ms = TCP_PERSIST_MIN_MS; + } else { + f->persist_timer_ms += elapsed_ms; + if (f->persist_timer_ms >= f->persist_timeout_ms) { + if (f->persist_probe_cnt >= TCP_MAX_PERSIST_PROBES) { + if (f->state == TCP_ESTABLISHED) { + f->ctx.flags = (uint8_t)((1u << FIN_F) | (1u << ACK_F)); + f->ctx.payload.ptr = 0; + f->ctx.payload.size = 0; + + tcp_flow_send(&f->ctx); + f->state = TCP_FIN_WAIT_1; + f->ctx.expected_ack = f->snd_nxt; + tcp_daemon_kick(); + } else { + tcp_free_flow(i); + } + continue; + } + tcp_tx_seg_t *best = tcp_find_first_unacked(f); + + tcp_hdr_t hdr; + hdr.src_port = bswap16(f->local_port); + hdr.dst_port = bswap16(f->remote.port); + + uint8_t payload[1]; + const uint8_t *pp = NULL; + uint16_t pl = 0; + + uint32_t probe_seq = f->snd_una; + + if (best && best->buf && best->len && probe_seq >= best->seq && probe_seq < best->seq + best->len) { + payload[0] = *((uint8_t *)best->buf + (probe_seq - best->seq)); + pp = payload; + pl = 1; + } + + hdr.sequence = bswap32(probe_seq); + hdr.ack = bswap32(f->ctx.ack); + hdr.flags = (uint8_t)(1u << ACK_F); + hdr.window = tcp_calc_adv_wnd_field(f, 1); + hdr.urgent_ptr = 0; + + if (f->local.ver == IP_VER4) { + ipv4_tx_opts_t tx; + tcp_build_tx_opts_from_local_v4(f->local.ip, &tx); + (void)tcp_send_segment(IP_VER4, f->local.ip, f->remote.ip, &hdr, NULL, 0, pp, pl, (const ip_tx_opts_t *)&tx, f->ip_ttl, f->ip_dontfrag); + } else if (f->local.ver == IP_VER6) { + ipv6_tx_opts_t tx; + tcp_build_tx_opts_from_local_v6(f->local.ip, &tx); + (void)tcp_send_segment(IP_VER6, f->local.ip, f->remote.ip, &hdr, NULL, 0, pp, pl, (const ip_tx_opts_t *)&tx, f->ip_ttl, f->ip_dontfrag); + } + + if (f->persist_probe_cnt < UINT8_MAX) f->persist_probe_cnt++; + f->persist_timer_ms = 0; + + if (f->persist_timeout_ms < TCP_PERSIST_MAX_MS) { + uint32_t next = f->persist_timeout_ms << 1; + if (next > TCP_PERSIST_MAX_MS) next = TCP_PERSIST_MAX_MS; + f->persist_timeout_ms = next; + } + } + } + } else { + f->persist_active = 0; + f->persist_timer_ms = 0; + f->persist_timeout_ms = 0; + f->persist_probe_cnt = 0; + } + + for (int j = 0; j < TCP_MAX_TX_SEGS; j++) { + tcp_tx_seg_t *s = &f->txq[j]; + if (!s->used) continue; + + s->timer_ms += elapsed_ms; + if (s->timer_ms < s->timeout_ms) continue; + + if (s->retransmit_cnt >= TCP_MAX_RETRANS) { + tcp_free_flow(i); + break; + } + + tcp_cc_on_timeout(f); + + tcp_send_from_seg(f, s); + + s->retransmit_cnt++; + s->timer_ms = 0; + + if (s->timeout_ms == 0) { + uint32_t rto = f->rto ? f->rto : TCP_INIT_RTO; + if (rto < TCP_MIN_RTO) rto = TCP_MIN_RTO; + s->timeout_ms = rto; + } else if (s->timeout_ms < TCP_MAX_RTO) { + uint32_t next = s->timeout_ms << 1; + if (next > TCP_MAX_RTO) next = TCP_MAX_RTO; + s->timeout_ms = next; + } + } + } +} + + +int tcp_daemon_entry(int argc, char *argv[]) { + (void)argc; + (void)argv; + + const uint32_t tick_ms = 25; + const uint32_t grace_ms = 10000; + uint32_t idle_ms = 0; + + while (1) { + if (tcp_has_pending_timers()) { + tcp_tick_all(tick_ms); + idle_ms = 0; + } else { + idle_ms += tick_ms; + if(idle_ms >= grace_ms) break; + } + msleep(tick_ms); + } + + disable_interrupt(); + tcp_daemon_running = 0; + enable_interrupt(); + return 0; +} diff --git a/kernel/networking/transport_layer/tcp/tcp_tx.c b/kernel/networking/transport_layer/tcp/tcp_tx.c new file mode 100644 index 00000000..7f453581 --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_tx.c @@ -0,0 +1,315 @@ +#include "tcp_internal.h" + +uint16_t tcp_calc_adv_wnd_field(tcp_flow_t *flow, uint8_t apply_scale) { + if (!flow) return 0; + + uint32_t quantum = 1; + if (apply_scale && flow->ws_ok && flow->ws_send) quantum = 1u << flow->ws_send; + + uint32_t maxw = flow->rcv_wnd_max; + uint32_t used = flow->rcv_buf_used; + uint32_t freew = maxw > used ? maxw - used: 0; + + uint32_t free_q = quantum == 1 ? freew : (freew & ~(quantum - 1)); + + if (flow->rcv_adv_edge < flow->rcv_nxt) flow->rcv_adv_edge = flow->rcv_nxt; + uint32_t candidate_edge = flow->rcv_nxt + free_q; + if (candidate_edge > flow->rcv_adv_edge) flow->rcv_adv_edge = candidate_edge; + + uint32_t adv = flow->rcv_adv_edge - flow->rcv_nxt; + + uint32_t field = adv; + if (!apply_scale || !flow->ws_ok || flow->ws_send == 0) { + if (field > 65535u) field = 65535u; + adv = field; + } else { + field = adv >> flow->ws_send; + if (field > 65535u) field = 65535u; + adv = field << flow->ws_send; + } + + flow->rcv_wnd = adv; + flow->rcv_adv_edge = flow->rcv_nxt + adv; + flow->ctx.window = (uint16_t)field; + return (uint16_t)field; +} + + +static void tcp_persist_arm(tcp_flow_t *flow) { + if (!flow) return; + flow->persist_active = 1; + flow->persist_timer_ms = 0; + if (flow->persist_timeout_ms == 0) flow->persist_timeout_ms = TCP_PERSIST_MIN_MS; + if (flow->persist_timeout_ms < TCP_PERSIST_MIN_MS) flow->persist_timeout_ms = TCP_PERSIST_MIN_MS; + if (flow->persist_timeout_ms > TCP_PERSIST_MAX_MS) flow->persist_timeout_ms = TCP_PERSIST_MAX_MS; + tcp_daemon_kick(); +} + +tcp_tx_seg_t *tcp_alloc_tx_seg(tcp_flow_t *flow){ + for (int i = 0; i < TCP_MAX_TX_SEGS; i++) { + if (!flow->txq[i].used) { + tcp_tx_seg_t *s = &flow->txq[i]; + s->used = 1; + s->syn = 0; + s->fin = 0; + s->rtt_sample = 0; + s->retransmit_cnt = 0; + s->seq = 0; + s->len = 0; + s->buf = 0; + s->timer_ms = 0; + s->timeout_ms = flow->rto ? flow->rto : TCP_INIT_RTO; + tcp_daemon_kick(); + return s; + } + } + return NULL; +} + +void tcp_send_from_seg(tcp_flow_t *flow, tcp_tx_seg_t *seg){ + if (flow) flow->keepalive_idle_ms = 0; + tcp_hdr_t hdr; + + hdr.src_port = bswap16(flow->local_port); + hdr.dst_port = bswap16(flow->remote.port); + hdr.sequence = bswap32(seg->seq); + hdr.ack = bswap32(flow->ctx.ack); + + uint8_t flags = 0; + if (!(flow->state == TCP_SYN_SENT && seg->syn && flow->ctx.ack == 0)) flags |= (uint8_t)(1u << ACK_F); + if (seg->syn) flags |= (uint8_t)(1u << SYN_F); + if (seg->fin) flags |= (uint8_t)(1u << FIN_F); + hdr.flags = flags; + + hdr.window = tcp_calc_adv_wnd_field(flow, seg->syn ? 0 : 1); + hdr.urgent_ptr = 0; + + if (flow->local.ver == IP_VER4) { + ipv4_tx_opts_t tx; + tcp_build_tx_opts_from_local_v4(flow->local.ip, &tx); + (void)tcp_send_segment(IP_VER4, flow->local.ip, flow->remote.ip, &hdr, NULL, 0, seg->buf ? (const uint8_t *)seg->buf : NULL, seg->len, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } else if (flow->local.ver == IP_VER6) { + ipv6_tx_opts_t tx; + tcp_build_tx_opts_from_local_v6(flow->local.ip, &tx); + (void)tcp_send_segment(IP_VER6, flow->local.ip, flow->remote.ip, &hdr, NULL, 0, seg->buf ? (const uint8_t *)seg->buf : NULL, seg->len, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } + + tcp_daemon_kick(); +} + +void tcp_send_ack_now(tcp_flow_t *flow){ + if (!flow) return; + + tcp_hdr_t ackhdr; + ackhdr.src_port = bswap16(flow->local_port); + ackhdr.dst_port = bswap16(flow->remote.port); + ackhdr.sequence = bswap32(flow->ctx.sequence); + ackhdr.ack = bswap32(flow->ctx.ack); + ackhdr.flags = (uint8_t)(1u << ACK_F); + ackhdr.window = tcp_calc_adv_wnd_field(flow, 1); + ackhdr.urgent_ptr = 0; + + uint8_t opts[64]; + uint8_t opts_len = 0; + + opts_len = 0; + + if (flow->sack_ok && flow->reass_count > 0) { + uint32_t n = flow->reass_count; + if (n > 4) n = 4; + + uint32_t need = 2 + 8 * n; + uint32_t pad = (4 - (need & 3)) & 3; + + if (need + pad <= sizeof(opts)) { + opts[0] = 5; + opts[1] = (uint8_t)need; + uint32_t o = 2; + + uint32_t idx[4]; + for (uint32_t i = 0; i < n; i++) idx[i] = i; + + for (uint32_t i = 0; i + 1 < n; i++) { + for (uint32_t j = i + 1; j < n; j++) { + if ((int32_t)(flow->reass[idx[j]].seq > flow->reass[idx[i]].seq)) { + uint32_t t = idx[i]; + idx[i] = idx[j]; + idx[j] = t; + } + } + } + + for (uint32_t i = 0; i < n; i++) { + const tcp_reass_seg_t *s = &flow->reass[idx[i]]; + uint32_t left = s->seq; + uint32_t right = s->end; + + opts[o + 0] = (uint8_t)(left >> 24); + opts[o + 1] = (uint8_t)(left >> 16); + opts[o + 2] = (uint8_t)(left >> 8); + opts[o + 3] = (uint8_t)(left); + opts[o + 4] = (uint8_t)(right >> 24); + opts[o + 5] = (uint8_t)(right >> 16); + opts[o + 6] = (uint8_t)(right >> 8); + opts[o + 7] = (uint8_t)(right); + o += 8; + } + + for (uint32_t i = 0; i < pad; i++) opts[o + i] = 1; + + opts_len = (uint8_t)(need + pad); + } + } + + if (flow->local.ver == IP_VER4) { + ipv4_tx_opts_t tx; + tcp_build_tx_opts_from_local_v4(flow->local.ip, &tx); + (void)tcp_send_segment(IP_VER4, flow->local.ip, flow->remote.ip, &ackhdr, opts_len ? opts : NULL, opts_len, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } else if (flow->local.ver == IP_VER6) { + ipv6_tx_opts_t tx; + tcp_build_tx_opts_from_local_v6(flow->local.ip, &tx); + (void)tcp_send_segment(IP_VER6, flow->local.ip, flow->remote.ip, &ackhdr, opts_len ? opts : NULL, opts_len, NULL, 0, (const ip_tx_opts_t *)&tx, flow->ip_ttl, flow->ip_dontfrag); + } + + flow->delayed_ack_pending = 0; + flow->delayed_ack_timer_ms = 0; + tcp_daemon_kick(); +} + +tcp_result_t tcp_flow_send(tcp_data *flow_ctx){ + if (!flow_ctx) return TCP_INVALID; + + tcp_flow_t *flow = NULL; + for (int i = 0; i < MAX_TCP_FLOWS; i++) { + if (!tcp_flows[i]) continue; + if (&tcp_flows[i]->ctx == flow_ctx) { + flow = tcp_flows[i]; + break; + } + } + if (!flow) return TCP_INVALID; + + uint8_t flags = flow_ctx->flags; + uint8_t *payload_ptr = (uint8_t *)flow_ctx->payload.ptr; + uint64_t payload_len = flow_ctx->payload.size; + flow_ctx->payload.size = 0; + + if (flow->state != TCP_ESTABLISHED && !(flags & (1u << FIN_F))) { + if (!(flow->state == TCP_CLOSE_WAIT && (flags & (1u << FIN_F)))) return TCP_INVALID; + } + + if (flow->snd_wnd == 0 && !(flags & (1u << FIN_F))) { + tcp_persist_arm(flow); + return TCP_WOULDBLOCK; + } + + uint64_t in_flight = flow->snd_nxt - flow->snd_una; + uint32_t wnd = flow->snd_wnd; + uint32_t cwnd = flow->cwnd ? flow->cwnd : (flow->mss ? flow->mss : TCP_DEFAULT_MSS); + uint32_t eff_wnd = wnd < cwnd ? wnd : cwnd; + + if (eff_wnd == 0) eff_wnd = 1; + if (in_flight >= eff_wnd && !(flags & (1u << FIN_F))) return TCP_WOULDBLOCK; + + uint64_t can_send = eff_wnd - in_flight; + if (can_send == 0 && !(flags & (1u << FIN_F))) return TCP_WOULDBLOCK; + + uint64_t remaining = payload_len; + uint64_t sent_bytes = 0; + int first_segment = 1; + + while (remaining > 0 && can_send > 0) { + uint64_t seg_len = (uint64_t)(remaining > can_send ? can_send : remaining); + if (flow->mss && seg_len > flow->mss) seg_len = (uint64_t)flow->mss; + + tcp_tx_seg_t *seg = tcp_alloc_tx_seg(flow); + if (!seg) break; + + uintptr_t buf = 0; + if (seg_len) { + buf = (uintptr_t)malloc(seg_len); + if (!buf) { seg->used = 0; break; } + memcpy((void *)buf, payload_ptr + sent_bytes, seg_len); + } + + seg->seq = flow->snd_nxt; + seg->len = seg_len; + seg->buf = buf; + seg->syn = 0; + seg->fin = 0; + seg->timer_ms = 0; + seg->timeout_ms = flow->rto ? flow->rto : TCP_INIT_RTO; + seg->retransmit_cnt = 0; + seg->rtt_sample = 0; + if (!flow->rtt_valid && first_segment) seg->rtt_sample = 1; + + tcp_send_from_seg(flow, seg); + + flow->snd_nxt += seg_len; + sent_bytes += seg_len; + remaining -= seg_len; + can_send -= seg_len; + first_segment = 0; + } + + if ((flags & (1u << FIN_F)) && remaining == 0) { + tcp_tx_seg_t *seg = tcp_alloc_tx_seg(flow); + if (!seg) return sent_bytes ? TCP_OK : TCP_WOULDBLOCK; + + seg->seq = flow->snd_nxt; + seg->len = 0; + seg->buf = 0; + seg->syn = 0; + seg->fin = 1; + seg->timer_ms = 0; + seg->timeout_ms = flow->rto ? flow->rto : TCP_INIT_RTO; + seg->retransmit_cnt = 0; + seg->rtt_sample = 0; + + tcp_send_from_seg(flow, seg); + + flow->snd_nxt += 1; + flow->ctx.expected_ack = flow->snd_nxt; + } + + flow_ctx->sequence = flow->snd_nxt; + flow->ctx.sequence = flow->snd_nxt; + + tcp_daemon_kick(); + + flow_ctx->payload.size = sent_bytes; + return sent_bytes || (flags & (1u << FIN_F)) ? TCP_OK : TCP_WOULDBLOCK; +} + +tcp_result_t tcp_flow_close(tcp_data *flow_ctx){ + if (!flow_ctx) return TCP_INVALID; + + tcp_flow_t *flow = NULL; + for (int i = 0; i < MAX_TCP_FLOWS; i++) { + if (!tcp_flows[i]) continue; + if (&tcp_flows[i]->ctx == flow_ctx) { + flow = tcp_flows[i]; + break; + } + } + if (!flow) return TCP_INVALID; + + if (flow->state == TCP_ESTABLISHED || flow->state == TCP_CLOSE_WAIT) { + flow_ctx->sequence = flow->snd_nxt; + flow_ctx->ack = flow->ctx.ack; + flow_ctx->window = tcp_calc_adv_wnd_field(flow, 1); + flow_ctx->payload.ptr = 0; + flow_ctx->payload.size = 0; + flow_ctx->flags = (uint8_t)((1u << FIN_F) | (1u << ACK_F)); + + tcp_result_t res = tcp_flow_send(flow_ctx); + if (res == TCP_OK || res == TCP_WOULDBLOCK) { + if (flow->state == TCP_ESTABLISHED) flow->state = TCP_FIN_WAIT_1; + else flow->state = TCP_LAST_ACK; + } + tcp_daemon_kick(); + return res; + } + + return TCP_INVALID; +} \ No newline at end of file diff --git a/kernel/networking/transport_layer/tcp/tcp_utils.c b/kernel/networking/transport_layer/tcp/tcp_utils.c new file mode 100644 index 00000000..8ce48329 --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_utils.c @@ -0,0 +1,124 @@ +#include "tcp_utils.h" + +uint32_t tcp_calc_mss_for_l3(uint8_t l3_id, ip_version_t ver, const void *remote_ip){ + uint32_t mtu = 1500; + l3_ipv6_interface_t* v6 = l3_ipv6_find_by_id(l3_id); + if (v6) mtu =v6->mtu ? v6->mtu : 1500; + + l3_ipv4_interface_t* v4 = l3_ipv4_find_by_id(l3_id); + if (v4) mtu = v4->runtime_opts_v4.mtu ? v4->runtime_opts_v4.mtu : 1500; + + if (ver == IP_VER6 && remote_ip){ + uint16_t pmtu =ipv6_pmtu_get((const uint8_t*)remote_ip); + if (pmtu && pmtu < mtu) mtu = pmtu; + } + + uint32_t ih = (ver == IP_VER6) ? 40u : 20u; + uint32_t th = 20u; + if (mtu <= ih + th) return 256; + uint32_t mss = mtu - ih - th; + if (mss < 256u) mss = 256u; + return mss; +} + +bool tcp_build_tx_opts_from_local_v4(const void *src_ip_addr, ipv4_tx_opts_t *out){ + if (!out) return false; + l3_ipv4_interface_t *v4 = l3_ipv4_find_by_ip(*(const uint32_t *)src_ip_addr); + if (v4) { + out->scope = IP_TX_BOUND_L3; + out->index = v4->l3_id; + } else { + out->scope = IP_TX_AUTO; + out->index = 0; + } + return true; +} + +bool tcp_build_tx_opts_from_l3(uint8_t l3_id, ipv4_tx_opts_t *out){ + if (!out) return false; + out->scope = IP_TX_BOUND_L3; + out->index = l3_id; + return true; +} + +bool tcp_build_tx_opts_from_local_v6(const void *src_ip_addr, ipv6_tx_opts_t *out){ + if (!out) return false; + const uint8_t *sip = (const uint8_t *)src_ip_addr; + l3_ipv6_interface_t *v6 = l3_ipv6_find_by_ip(sip); + if (v6 && v6->l2) { + out->scope = IP_TX_BOUND_L3; + out->index = v6->l3_id; + } else { + out->scope = IP_TX_AUTO; + out->index = 0; + } + return true; +} + +void tcp_parse_options(const uint8_t *opts, uint32_t len, tcp_parsed_opts_t *out) { + if (!out) return; + + out->mss = 0; + out->wscale = 0; + out->sack_permitted = 0; + out->has_mss = 0; + out->has_wscale = 0; + + if (!opts || len == 0) return; + + uint32_t i = 0; + while (i < len){ + uint8_t kind = opts[i]; + if (kind == 0) break; + if (kind == 1) { + i++; + continue; + } + + if (i + 1 >= len) break; + uint8_t olen = opts[i + 1]; + if (olen < 2) break; + if (i + olen > len) break; + + if (kind == 2 && olen == 4) { + out->mss = (uint16_t)((opts[i + 2] << 8) | opts[i + 3]); + out->has_mss = 1; + } else if (kind == 3 && olen == 3) { + out->wscale =opts[i + 2]; + out->has_wscale = 1; + } else if (kind == 4 && olen == 2) { + out->sack_permitted = 1; + } + + i += olen; + } +} + +uint8_t tcp_build_syn_options(uint8_t *out, uint16_t mss, uint8_t wscale, uint8_t sack_permitted) { + if (!out) return 0; + + uint8_t i = 0; + + out[i++] = 2; + out[i++] = 4; + out[i++] = (uint8_t)(mss >> 8); + out[i++] = (uint8_t)(mss & 0xff); + + if (wscale != 0xffu){ + out[i++] = 1; + out[i++] = 3; + out[i++] = 3; + out[i++] = wscale; + } + + if (sack_permitted){ + out[i++] = 1; + out[i++] = 1; + out[i++] = 4; + out[i++] = 2; + } + + while (i & 3) out[i++] = 1; + + return i; +} \ No newline at end of file diff --git a/kernel/networking/transport_layer/tcp/tcp_utils.h b/kernel/networking/transport_layer/tcp/tcp_utils.h new file mode 100644 index 00000000..30e9c9b9 --- /dev/null +++ b/kernel/networking/transport_layer/tcp/tcp_utils.h @@ -0,0 +1,34 @@ +#pragma once + +#include "net/network_types.h" +#include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv6.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "networking/port_manager.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + uint16_t mss; + uint8_t wscale; + uint8_t sack_permitted; + uint8_t has_mss; + uint8_t has_wscale; +} tcp_parsed_opts_t; + +void tcp_parse_options(const uint8_t *opts, uint32_t len, tcp_parsed_opts_t *out); +uint8_t tcp_build_syn_options(uint8_t *out, uint16_t mss, uint8_t wscale, uint8_t sack_permitted); + +port_manager_t *tcp_pm_for_l3(uint8_t l3_id); +uint32_t tcp_calc_mss_for_l3(uint8_t l3_id, ip_version_t ver, const void *remote_ip); + +bool tcp_build_tx_opts_from_local_v4(const void *src_ip_addr, ipv4_tx_opts_t *out); +bool tcp_build_tx_opts_from_l3(uint8_t l3_id, ipv4_tx_opts_t *out); +bool tcp_build_tx_opts_from_local_v6(const void *src_ip_addr, ipv6_tx_opts_t *out); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/transport_layer/trans_utils.h b/kernel/networking/transport_layer/trans_utils.h new file mode 100644 index 00000000..19418cfb --- /dev/null +++ b/kernel/networking/transport_layer/trans_utils.h @@ -0,0 +1,46 @@ +#pragma once +#include "types.h" +#include "net/network_types.h" +#include "networking/internet_layer/ipv4_utils.h" +#include "networking/internet_layer/ipv6_utils.h" +#include "std/std.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void net_ep_split(const net_l4_endpoint* ep, char* ip, int iplen, bool* is_v6, uint16_t* port) { + if (ip && iplen > 0) { + ip[0] = '-'; + if (iplen > 1) ip[1] = 0; + } + if (is_v6) *is_v6 = false; + if (port) *port = 0; + if (!ep || !ip || iplen<= 0) return; + + if (port)*port = ep->port; + if (ep->ver ==IP_VER4) { + uint32_t v4 = 0; + memcpy(&v4, ep->ip, 4); + ipv4_to_string(v4, ip); + if (is_v6) *is_v6 = false; + return; + } + + if (ep->ver ==IP_VER6) { + ipv6_to_string(ep->ip, ip, iplen); + if (is_v6) *is_v6 = true; + return; + } +} + +static void make_ep(uint32_t ip_host, uint16_t port, ip_version_t ver, net_l4_endpoint* ep) { + memset(ep, 0, sizeof(*ep)); + ep->ver = ver; + memcpy(ep->ip, &ip_host, 4); + ep->port = port; +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/kernel/networking/transport_layer/udp.c b/kernel/networking/transport_layer/udp.c index acb885da..36e025e4 100644 --- a/kernel/networking/transport_layer/udp.c +++ b/kernel/networking/transport_layer/udp.c @@ -1,10 +1,12 @@ #include "udp.h" #include "net/checksums.h" #include "networking/internet_layer/ipv4.h" +#include "networking/internet_layer/ipv6.h" #include "networking/port_manager.h" #include "std/memory.h" #include "types.h" #include "syscalls/syscalls.h" +#include "networking/internet_layer/ipv4_utils.h" static inline uint32_t v4_u32_from_arr(const uint8_t ip16[16]) { uint32_t v = 0; @@ -28,26 +30,35 @@ size_t create_udp_segment(uintptr_t buf, const net_l4_endpoint *src, const net_l uint16_t csum = checksum16_pipv4(s, d, 0x11, (const uint8_t *)udp, full_len); udp->checksum = bswap16(csum); } else if (src->ver == IP_VER6) { - //TODO IPV6 - udp->checksum = 0; + uint16_t csum = checksum16_pipv6(src->ip, dst->ip, 17, (const uint8_t *)udp, full_len); + udp->checksum = bswap16(csum); } return full_len; } -void udp_send_segment(const net_l4_endpoint *src, const net_l4_endpoint *dst, sizedptr payload, const ipv4_tx_opts_t* tx_opts) { - if (src->ver == IP_VER4) { - uint32_t udp_len = (uint32_t)(sizeof(udp_hdr_t) + payload.size); - uintptr_t buf = (uintptr_t)malloc(udp_len); - if (!buf) return; +void udp_send_segment(const net_l4_endpoint *src, const net_l4_endpoint *dst, sizedptr payload, const ip_tx_opts_t* tx_opts, uint8_t ttl, uint8_t dontfrag) { + uint32_t udp_len = (uint32_t)(sizeof(udp_hdr_t) + payload.size); + uint32_t headroom = (uint32_t)sizeof(eth_hdr_t) + (uint32_t)(src->ver == IP_VER4 ? sizeof(ipv4_hdr_t) : sizeof(ipv6_hdr_t)); + netpkt_t* pkt = netpkt_alloc(udp_len, headroom, 0); + if (!pkt) return; + void* buf = netpkt_put(pkt, udp_len); + if (!buf) { + netpkt_unref(pkt); + return; + } - size_t written = create_udp_segment(buf, src, dst, payload); - uint32_t dst_ip = v4_u32_from_arr(dst->ip); + size_t written = create_udp_segment((uintptr_t)buf, src, dst, payload); - ipv4_send_packet(dst_ip, 0x11, (sizedptr){ buf, (uint32_t)written }, tx_opts, 0); - free_sized((void *)buf, udp_len); + if (src->ver == IP_VER4) { + uint32_t dst_ip = v4_u32_from_arr(dst->ip); + (void)netpkt_trim(pkt, (uint32_t)written); + ipv4_send_packet(dst_ip, 0x11, pkt, (const ipv4_tx_opts_t*)tx_opts, ttl, dontfrag); } else if (src->ver == IP_VER6) { - //TODO IPV6 + (void)netpkt_trim(pkt, (uint32_t)written); + ipv6_send_packet(dst->ip, 0x11, pkt, (const ipv6_tx_opts_t*)tx_opts, ttl, dontfrag); + } else { + netpkt_unref(pkt); } } @@ -83,16 +94,29 @@ void udp_input(ip_version_t ipver, const void *src_ip_addr, const void *dst_ip_a hdr->checksum = recv; if (calc != bswap16(recv)) return; } else if (ipver == IP_VER6) { - //TODO IPV& + uint16_t recv = hdr->checksum; + hdr->checksum = 0; + uint16_t calc = checksum16_pipv6( (const uint8_t*)src_ip_addr, (const uint8_t*)dst_ip_addr, 0x11, (const uint8_t*)hdr, (uint32_t)(pl.size + sizeof(*hdr))); + hdr->checksum = recv; + if (calc != bswap16(recv)) return; } } uint16_t dst_port = bswap16(hdr->dst_port); uint16_t src_port = bswap16(hdr->src_port); - l3_ipv4_interface_t *v4 = l3_ipv4_find_by_id(l3_id); - l3_ipv6_interface_t *v6 = v4 ? NULL : l3_ipv6_find_by_id(l3_id); - port_manager_t *pm = v4 ? ifmgr_pm_v4(l3_id) : (v6 ? ifmgr_pm_v6(l3_id) : NULL); + l3_ipv4_interface_t *v4 = NULL; + l3_ipv6_interface_t *v6 = NULL; + port_manager_t *pm = NULL; + + if (ipver == IP_VER4) { + v4 = l3_ipv4_find_by_id(l3_id); + if (v4) pm = ifmgr_pm_v4(l3_id); + } else if (ipver == IP_VER6) { + v6 = l3_ipv6_find_by_id(l3_id); + if (v6) pm = ifmgr_pm_v6(l3_id); + } + if (!pm) return; port_recv_handler_t handler = port_get_handler(pm, PROTO_UDP, dst_port); @@ -101,7 +125,10 @@ void udp_input(ip_version_t ipver, const void *src_ip_addr, const void *dst_ip_a if (!copy) return; memcpy((void*)copy, (const void*)pl.ptr, pl.size); - uint8_t ifx = v4 ? v4->l2->ifindex : (v6 && v6->l2 ? v6->l2->ifindex : 0); + uint8_t ifx = 0; + if (v4 && v4->l2) ifx = v4->l2->ifindex; + else if (v6 && v6->l2) ifx = v6->l2->ifindex; + handler(ifx, ipver, src_ip_addr, dst_ip_addr, copy, pl.size, src_port, dst_port); } } diff --git a/kernel/networking/transport_layer/udp.h b/kernel/networking/transport_layer/udp.h index dcdbc0b4..074012bf 100644 --- a/kernel/networking/transport_layer/udp.h +++ b/kernel/networking/transport_layer/udp.h @@ -16,11 +16,11 @@ typedef struct __attribute__((packed)) { } udp_hdr_t; size_t create_udp_segment(uintptr_t buf, - const net_l4_endpoint *src, - const net_l4_endpoint *dst, - sizedptr payload); + const net_l4_endpoint *src, + const net_l4_endpoint *dst, + sizedptr payload); -void udp_send_segment(const net_l4_endpoint *src, const net_l4_endpoint *dst, sizedptr payload, const ipv4_tx_opts_t* tx_opts); +void udp_send_segment(const net_l4_endpoint *src, const net_l4_endpoint *dst, sizedptr payload, const ip_tx_opts_t* tx_opts, uint8_t ttl, uint8_t dontfrag); void udp_input(ip_version_t ipver, const void *src_ip_addr, diff --git a/kernel/process/kernel_syscall_impl.c b/kernel/process/kernel_syscall_impl.c index a2d39c90..2f42472f 100644 --- a/kernel/process/kernel_syscall_impl.c +++ b/kernel/process/kernel_syscall_impl.c @@ -12,11 +12,13 @@ #include "net/network_types.h" #include "filesystem/filesystem.h" #include "sysregs.h" +#include "memory/mmu.h" void* malloc(size_t size){ - uintptr_t heap = get_proc_by_pid(1)->heap; - heap = VIRT_TO_PHYS(heap); - return kalloc((void*)heap, size, ALIGN_16B, MEM_PRIV_KERNEL); + process_t* k = get_proc_by_pid(1); + uintptr_t heap_pa = mmu_translate(k->heap); + if (!heap_pa) return 0; + return kalloc((void*)heap_pa, size, ALIGN_16B, MEM_PRIV_KERNEL); } void free_sized(void*ptr, size_t size){ @@ -68,8 +70,8 @@ extern uint64_t get_time(){ return timer_now_msec(); } -extern bool socket_create(Socket_Role role, protocol_t protocol, SocketHandle *out_handle){ - return create_socket(role, protocol, get_current_proc_pid(), out_handle); +extern bool socket_create(Socket_Role role, protocol_t protocol, const SocketExtraOptions* extra, SocketHandle *out_handle){ + return create_socket(role, protocol, extra, get_current_proc_pid(), out_handle); } extern int32_t socket_bind(SocketHandle *handle, ip_version_t ip_version, uint16_t port){ diff --git a/kernel/process/syscall.c b/kernel/process/syscall.c index ba9097a1..8a5d320e 100644 --- a/kernel/process/syscall.c +++ b/kernel/process/syscall.c @@ -133,9 +133,9 @@ uint64_t syscall_get_time(process_t *ctx){ uint64_t syscall_socket_create(process_t *ctx){ Socket_Role role = (Socket_Role)ctx->PROC_X0; protocol_t protocol = (protocol_t)ctx->PROC_X1; - SocketHandle *out_handle = (SocketHandle*)ctx->PROC_X2; - - return create_socket(role, protocol, ctx->id, out_handle); + const SocketExtraOptions* extra = (const SocketExtraOptions*)ctx->PROC_X2; + SocketHandle *out_handle = (SocketHandle*)ctx->PROC_X3; + return create_socket(role, protocol, extra, ctx->id, out_handle); } uint64_t syscall_socket_bind(process_t *ctx){ diff --git a/kernel/sysregs.h b/kernel/sysregs.h index 6855cf20..b93950e2 100644 --- a/kernel/sysregs.h +++ b/kernel/sysregs.h @@ -63,8 +63,8 @@ #define MAIR_VALUE ((MAIR_DEVICE_nGnRnE << (MAIR_IDX_DEVICE * 8)) | (MAIR_NORMAL_NOCACHE << (MAIR_IDX_NORMAL * 8))) #define HIGH_VA 0xFFFF000000000000ULL -#define VIRT_TO_PHYS(x) (x & ~HIGH_VA) -#define PHYS_TO_VIRT(x) (x | HIGH_VA) +#define PHYS_TO_VIRT(x) (((uintptr_t)(x) != 0) ? ((uintptr_t)(x) | HIGH_VA) : 0) +#define VIRT_TO_PHYS(x) (((uintptr_t)(x) != 0) ? ((uintptr_t)(x) & ~HIGH_VA) : 0) -#define VIRT_TO_PHYS_P(x) ((void*)(((uintptr_t)x) & ~HIGH_VA)) -#define PHYS_TO_VIRT_P(x) ((void*)(((uintptr_t)x) | HIGH_VA)) \ No newline at end of file +#define PHYS_TO_VIRT_P(x) (((uintptr_t)(x) != 0) ? (void*)(((uintptr_t)(x)) | HIGH_VA) : 0) +#define VIRT_TO_PHYS_P(x) (((uintptr_t)(x) != 0) ? (void*)(((uintptr_t)(x)) & ~HIGH_VA) : 0) \ No newline at end of file diff --git a/kernel/usb/USBKeyboard.cpp b/kernel/usb/USBKeyboard.cpp index c3c29b38..e2ba55ff 100644 --- a/kernel/usb/USBKeyboard.cpp +++ b/kernel/usb/USBKeyboard.cpp @@ -5,6 +5,9 @@ #include "async.h" #include "exceptions/timer.h" +static uint8_t held[256]; +static uint64_t next_repeat[256]; + void USBKeyboard::request_data(USBDriver *driver){ requesting = true; @@ -33,16 +36,11 @@ void USBKeyboard::process_data(USBDriver *driver){ } } -uint64_t last_registered[256]; - void USBKeyboard::process_keypress(keypress *rkp){ bool handled_key = false; - keypress kp = {}; - if (is_new_keypress(rkp, &last_keypress) || repeated_keypresses > 2){ - //TODO: review this code. It's here to prevent qemu's duplicate keyboard input - if (is_new_keypress(rkp, &last_keypress)){ - repeated_keypresses = 0; - } + + if (is_new_keypress(rkp, &last_keypress)) { + keypress kp = {}; kp.modifier = rkp->modifier; // kprintf("Mod: %i", kp.modifier); for (int i = 0; i < 6; i++){ @@ -50,42 +48,77 @@ void USBKeyboard::process_keypress(keypress *rkp){ // if (i == 0) kprintf("Key [%i]: %x", i, kp.keys[i]); } handled_key = register_keypress(kp); - } else - repeated_keypresses++; - + } if (!handled_key){ + uint64_t now = get_time(); + for (int i = 0; i < 8; i++){ - char oldkey = (last_keypress.modifier & (1 << i)); - char newkey = (rkp->modifier & (1 << i)); - if (oldkey != newkey){ - kbd_event event = {}; - event.type = oldkey ? MOD_RELEASE : MOD_PRESS; - event.modifier = oldkey ? oldkey : newkey; - register_event(event); - } + char oldkey = (char)(last_keypress.modifier & (1 << i)); + char newkey = (char)(rkp->modifier & (1 << i)); + if (oldkey == newkey) continue; + + kbd_event event = {}; + event.type = oldkey ? MOD_RELEASE : MOD_PRESS; + event.modifier = oldkey ? oldkey : newkey; + register_event(event); } for (int i = 0; i < 6; i++) { - if (rkp->keys[i] != last_keypress.keys[i]){ - if (last_keypress.keys[i]){ - kbd_event event = {}; - event.type = KEY_RELEASE; - event.key = last_keypress.keys[i]; - last_registered[(uint8_t)event.key] = timer_now_msec(); - register_event(event); + char key = (char)last_keypress.keys[i]; + if (!key) continue; + bool present = false; + for (int j = 0; j < 6; j++) + if ((char)rkp->keys[j] == key) { + present = true; + break; } - if (rkp->keys[i]) { - kbd_event event = {}; - event.type = KEY_PRESS; - event.key = rkp->keys[i]; - if (timer_now_msec()-last_registered[(uint8_t)event.key] > 75){ - last_registered[(uint8_t)event.key] = timer_now_msec(); - register_event(event); - } + if (present) continue; + + kbd_event event = {}; + event.type = KEY_RELEASE; + event.key = key; + register_event(event); + + held[(uint8_t)key] = 0; + next_repeat[(uint8_t)key] = 0; + } + + for (int i = 0; i < 6; i++) { + uint8_t key = (uint8_t)rkp->keys[i]; + if (!key) continue; + + bool present = false; + for (int j = 0; j < 6; j++) + if ((uint8_t)last_keypress.keys[j] == key) { + present = true; + break; } + + if (!present) { + kbd_event event = {}; + event.type = KEY_PRESS; + event.key = (char)key; + register_event(event); + + held[key] = 1; + next_repeat[key] = now + 500; + continue; } + + if (!held[key]) { + held[key] = 1; + next_repeat[key] = now + 500; + } + + if (now < next_repeat[key]) continue; + + kbd_event event = {}; + event.type = KEY_PRESS; + event.key = (char)key; + register_event(event); + + next_repeat[key] = now + 33; } } - memcpy(&last_keypress, rkp, sizeof(keypress)); requesting = false; diff --git a/kernel/virtio/virtio_pci.c b/kernel/virtio/virtio_pci.c index 9d9272f1..ca98150a 100644 --- a/kernel/virtio/virtio_pci.c +++ b/kernel/virtio/virtio_pci.c @@ -6,6 +6,8 @@ #include "async.h" #include "sysregs.h" +//TODO implement proper virtqueue handling w/ descriptor allocation, reuse and support for multiple in-flight requests using used.ring completions + #define VIRTIO_STATUS_RESET 0x0 #define VIRTIO_STATUS_ACKNOWLEDGE 0x1 #define VIRTIO_STATUS_DRIVER 0x2 @@ -52,7 +54,7 @@ void virtio_enable_verbose(){ }\ }) -void virtio_set_feature_mask(uint32_t mask){ +void virtio_set_feature_mask(uint64_t mask){ feature_mask = mask; } @@ -125,13 +127,15 @@ bool virtio_init_device(virtio_device *dev) { cfg->driver_feature_select = 1; cfg->driver_feature = (uint32_t)(negotiated >> 32); + dev->negotiated_features = negotiated; + cfg->device_status |= VIRTIO_STATUS_FEATURES_OK; if (!(cfg->device_status & VIRTIO_STATUS_FEATURES_OK)){ kprintf("Failed to negotiate features. Supported features %llx",(unsigned long long)features); return false; } - dev->memory_page = palloc(0x1000, MEM_PRIV_KERNEL, MEM_DEV | MEM_RW, false); + dev->memory_page = palloc(0x10000, MEM_PRIV_KERNEL, MEM_DEV | MEM_RW, false); if (!dev->memory_page) return false; dev->status_dma = (uint8_t*)kalloc(dev->memory_page, 64, ALIGN_4KB, MEM_PRIV_KERNEL); @@ -142,15 +146,21 @@ bool virtio_init_device(virtio_device *dev) { uint32_t size; while ((size = select_queue(dev,queue_index))){ uint64_t desc_sz = 16ULL * size; - uint64_t avail_sz = 4ULL + 2ULL * size; - uint64_t used_sz = 4ULL + 8ULL * size; - uint64_t base = (uintptr_t)kalloc(dev->memory_page, desc_sz, ALIGN_4KB, MEM_PRIV_KERNEL); - uint64_t avail = (uintptr_t)kalloc(dev->memory_page, avail_sz, ALIGN_4KB, MEM_PRIV_KERNEL); - uint64_t used = (uintptr_t)kalloc(dev->memory_page, used_sz, ALIGN_4KB, MEM_PRIV_KERNEL); + uint64_t avail_sz = 6ULL + 2ULL * size; + uint64_t used_sz = 6ULL + 8ULL * size; + + uint64_t desc_alloc = (desc_sz + (uint64_t)(PAGE_SIZE - 1)) & ~(uint64_t)(PAGE_SIZE - 1); + uint64_t avail_alloc = (avail_sz + (uint64_t)(PAGE_SIZE - 1)) & ~(uint64_t)(PAGE_SIZE - 1); + uint64_t used_alloc = (used_sz + (uint64_t)(PAGE_SIZE - 1)) & ~(uint64_t)(PAGE_SIZE - 1); - dev->common_cfg->queue_desc = VIRT_TO_PHYS(base); - dev->common_cfg->queue_driver = VIRT_TO_PHYS(avail); - dev->common_cfg->queue_device = VIRT_TO_PHYS(used); + void* base = palloc(desc_alloc, MEM_PRIV_KERNEL, MEM_DEV | MEM_RW, true); + void* avail = palloc(avail_alloc, MEM_PRIV_KERNEL, MEM_DEV | MEM_RW, true); + void* used = palloc(used_alloc, MEM_PRIV_KERNEL, MEM_DEV | MEM_RW, true); + if (!base || !avail || !used) return false; + + dev->common_cfg->queue_desc = VIRT_TO_PHYS((uint64_t)base); + dev->common_cfg->queue_driver = VIRT_TO_PHYS((uint64_t)avail); + dev->common_cfg->queue_device = VIRT_TO_PHYS((uint64_t)used); volatile virtq_avail* A = (volatile virtq_avail*)(uintptr_t)avail; A->flags = 0; @@ -178,86 +188,38 @@ uint32_t select_queue(virtio_device *dev, uint32_t index){ return dev->common_cfg->queue_size; } -bool virtio_send_3d(virtio_device *dev, uint64_t cmd, uint32_t cmd_len, uint64_t resp, uint32_t resp_len, uint8_t flags) { - volatile virtq_desc* d = PHYS_TO_VIRT_P((virtq_desc*)dev->common_cfg->queue_desc); - volatile virtq_avail* a = PHYS_TO_VIRT_P((virtq_avail*)dev->common_cfg->queue_driver); - volatile virtq_used* u = PHYS_TO_VIRT_P((virtq_used*)dev->common_cfg->queue_device); - - d[0].addr = VIRT_TO_PHYS(cmd); - d[0].len = cmd_len; - d[0].flags = VIRTQ_DESC_F_NEXT; - d[0].next = 1; - - d[1].addr = VIRT_TO_PHYS(resp); - d[1].len = resp_len; - d[1].flags = VIRTQ_DESC_F_NEXT | flags; - d[1].next = 2; - - *dev->status_dma = 0; - d[2].addr = VIRT_TO_PHYS((uint64_t)dev->status_dma); - d[2].len = 1; - d[2].flags = VIRTQ_DESC_F_WRITE; - d[2].next = 0; - - uint16_t last_used_idx = u->idx; - a->ring[a->idx % dev->common_cfg->queue_size] = 0; - a->idx++; - - *(volatile uint16_t*)(dev->notify_cfg + dev->notify_off_multiplier * dev->common_cfg->queue_select) = 0; - - while (last_used_idx == u->idx);//TODO: OPT +bool virtio_send_nd(virtio_device *dev, const virtio_buf *bufs, uint16_t n) { - uint8_t status = *dev->status_dma; - if (status != 0) - kprintf("[VIRTIO OPERATION ERROR]: Wrong status %x",status); - - return status == 0; -} + if (!dev || !bufs || !n) return false; -bool virtio_send_2d(virtio_device *dev, uint64_t cmd, uint32_t cmd_len, uint64_t resp, uint32_t resp_len, uint8_t flags) { + uint16_t qsz = dev->common_cfg->queue_size; + if (!qsz || n > qsz) return false; volatile virtq_desc* d = PHYS_TO_VIRT_P((virtq_desc*)dev->common_cfg->queue_desc); volatile virtq_avail* a = PHYS_TO_VIRT_P((virtq_avail*)dev->common_cfg->queue_driver); volatile virtq_used* u = PHYS_TO_VIRT_P((virtq_used*)dev->common_cfg->queue_device); uint16_t last_used_idx = u->idx; - d[0].addr = VIRT_TO_PHYS(cmd); - d[0].len = cmd_len; - d[0].flags = flags; - d[0].next = 1; - - d[1].addr = VIRT_TO_PHYS(resp); - d[1].len = resp_len; - d[1].flags = VIRTQ_DESC_F_WRITE; - d[1].next = 0; - - a->ring[a->idx % dev->common_cfg->queue_size] = 0; - a->idx++; - - *(volatile uint16_t*)(dev->notify_cfg + dev->notify_off_multiplier * dev->common_cfg->queue_select) = 0; - - while (last_used_idx == u->idx);//TODO: OPT - - return true; -} - -bool virtio_send_1d(virtio_device *dev, uint64_t cmd, uint32_t cmd_len) { + for (uint16_t i = 0; i < n; ++i) { + if (!bufs[i].addr || !bufs[i].len) return false; + d[i].addr = VIRT_TO_PHYS(bufs[i].addr); + d[i].len = bufs[i].len; + d[i].flags = bufs[i].flags; + if (i + 1 < n) { + d[i].flags |= VIRTQ_DESC_F_NEXT; + d[i].next = (uint16_t)(i + 1); + } else { + d[i].next = 0; + } + } - volatile virtq_desc* d = PHYS_TO_VIRT_P((virtq_desc*)dev->common_cfg->queue_desc); - volatile virtq_avail* a = PHYS_TO_VIRT_P((virtq_avail*)dev->common_cfg->queue_driver); - volatile virtq_used* u = PHYS_TO_VIRT_P((virtq_used*)dev->common_cfg->queue_device); - uint16_t last_used_idx = u->idx; - - d[0].addr = VIRT_TO_PHYS(cmd); - d[0].len = cmd_len; - d[0].flags = 0; - d[0].next = 0; - - a->ring[a->idx % dev->common_cfg->queue_size] = 0; + asm volatile ("dmb ishst" ::: "memory"); + a->ring[a->idx % qsz] = 0; + asm volatile ("dmb ishst" ::: "memory"); a->idx++; - - *(volatile uint16_t*)(dev->notify_cfg + dev->notify_off_multiplier * dev->common_cfg->queue_select) = 0; + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(dev); while (last_used_idx == u->idx);//TODO: OPT @@ -273,9 +235,11 @@ void virtio_add_buffer(virtio_device *dev, uint16_t index, uint64_t buf, uint32_ d[index].len = buf_len; d[index].flags = host_to_dev ? 0 : VIRTQ_DESC_F_WRITE; d[index].next = 0; - + + asm volatile ("dmb ishst" ::: "memory"); a->ring[a->idx % dev->common_cfg->queue_size] = index; + asm volatile ("dmb ishst" ::: "memory"); a->idx++; - - *(volatile uint16_t*)(dev->notify_cfg + dev->notify_off_multiplier * dev->common_cfg->queue_select) = 0; + asm volatile ("dmb ishst" ::: "memory"); + virtio_notify(dev); } \ No newline at end of file diff --git a/kernel/virtio/virtio_pci.h b/kernel/virtio/virtio_pci.h index 9196fc69..2f5266b4 100644 --- a/kernel/virtio/virtio_pci.h +++ b/kernel/virtio/virtio_pci.h @@ -11,6 +11,9 @@ extern "C" { #define VIRTIO_VENDOR 0x1AF4 +#define VIRTIO_F_VERSION_1 32 +#define VIRTIO_F_NOTIFICATION_DATA 38 + typedef struct virtio_pci_common_cfg { uint32_t device_feature_select; uint32_t device_feature; @@ -53,7 +56,7 @@ typedef struct { typedef struct { uint16_t flags; uint16_t idx; - virtq_used_elem ring[128]; + virtq_used_elem ring[]; }__attribute__((packed)) virtq_used; typedef struct virtio_device { @@ -64,15 +67,35 @@ typedef struct virtio_device { uint32_t notify_off_multiplier; void *memory_page; uint8_t* status_dma; + uint64_t negotiated_features; } virtio_device; -void virtio_set_feature_mask(uint32_t mask); +typedef struct { + uint64_t addr; + uint32_t len; + uint16_t flags; +} virtio_buf; + +#define VBUF(a,l,f) ((virtio_buf){.addr = (uint64_t)(a), .len = (uint32_t)(l), .flags = (uint16_t)(f)}) + +static inline void virtio_notify(virtio_device *dev){ + if(!dev) return; + if(!dev->common_cfg) return; + if(!dev->notify_cfg) return; + + uint16_t off=dev->common_cfg->queue_notify_off; + uint32_t mul=dev->notify_off_multiplier; + if(!mul) mul=1; + + uint16_t v=(dev->negotiated_features&(1ULL<common_cfg->queue_notify_data:dev->common_cfg->queue_select; + *(volatile uint16_t*)(dev->notify_cfg+(uint64_t)off*(uint64_t)mul)=v; +} + +void virtio_set_feature_mask(uint64_t mask); void virtio_enable_verbose(); void virtio_get_capabilities(virtio_device *dev, uint64_t pci_addr, uint64_t *mmio_start, uint64_t *mmio_size); bool virtio_init_device(virtio_device *dev); -bool virtio_send_3d(virtio_device *dev, uint64_t cmd, uint32_t cmd_len, uint64_t resp, uint32_t resp_len, uint8_t flags); -bool virtio_send_2d(virtio_device *dev, uint64_t cmd, uint32_t cmd_len, uint64_t resp, uint32_t resp_len, uint8_t flags); -bool virtio_send_1d(virtio_device *dev, uint64_t cmd, uint32_t cmd_len); +bool virtio_send_nd(virtio_device *dev, const virtio_buf *bufs, uint16_t n); void virtio_add_buffer(virtio_device *dev, uint16_t index, uint64_t buf, uint32_t buf_len, bool host_to_dev); uint32_t select_queue(virtio_device *dev, uint32_t index); diff --git a/modules/audio/virt/virtio_audio_pci.cpp b/modules/audio/virt/virtio_audio_pci.cpp index 23d96084..e7864667 100644 --- a/modules/audio/virt/virtio_audio_pci.cpp +++ b/modules/audio/virt/virtio_audio_pci.cpp @@ -157,7 +157,8 @@ bool VirtioAudioDriver::config_streams(uint32_t streams){ uintptr_t resp = (uintptr_t)kalloc(audio_dev.memory_page, resp_size, ALIGN_64B, MEM_PRIV_KERNEL); - if (!virtio_send_3d(&audio_dev, (uintptr_t)cmd, sizeof(virtio_snd_query_info), resp, resp_size, VIRTQ_DESC_F_WRITE)){ + virtio_buf b[2] = {VBUF(cmd, sizeof(virtio_snd_query_info), 0), VBUF((void*)resp, resp_size, VIRTQ_DESC_F_WRITE)}; + if(!virtio_send_nd(&audio_dev, b, 2)){ kfree(cmd, sizeof(virtio_snd_query_info)); kfree((void*)resp, resp_size); return false; @@ -239,8 +240,8 @@ bool VirtioAudioDriver::stream_set_params(uint32_t stream_id, uint32_t features, virtio_snd_info_hdr *resp = (virtio_snd_info_hdr*)kalloc(audio_dev.memory_page, sizeof(virtio_snd_info_hdr), ALIGN_64B, MEM_PRIV_KERNEL); - bool result = virtio_send_3d(&audio_dev, (uintptr_t)cmd, sizeof(virtio_snd_pcm_set_params), (uintptr_t)resp, sizeof(virtio_snd_info_hdr), VIRTQ_DESC_F_WRITE); - + virtio_buf b[2] = {VBUF(cmd, sizeof(virtio_snd_pcm_set_params), 0), VBUF(resp, sizeof(virtio_snd_info_hdr), VIRTQ_DESC_F_WRITE)}; + bool result=virtio_send_nd(&audio_dev, b, 2); kfree(cmd, sizeof(virtio_snd_query_info)); kfree((void*)resp, sizeof(virtio_snd_info_hdr)); @@ -261,7 +262,8 @@ bool VirtioAudioDriver::send_simple_stream_cmd(uint32_t stream_id, uint32_t comm virtio_snd_info_hdr *resp = (virtio_snd_info_hdr*)kalloc(audio_dev.memory_page, sizeof(virtio_snd_info_hdr), ALIGN_64B, MEM_PRIV_KERNEL); - bool result = virtio_send_3d(&audio_dev, (uintptr_t)cmd, sizeof(virtio_snd_pcm_hdr), (uintptr_t)resp, sizeof(virtio_snd_info_hdr), VIRTQ_DESC_F_WRITE); + virtio_buf b[2] = {VBUF(cmd, sizeof(virtio_snd_pcm_hdr), 0), VBUF(resp, sizeof(virtio_snd_info_hdr), VIRTQ_DESC_F_WRITE)}; + bool result=virtio_send_nd(&audio_dev, b, 2); kfree(cmd, sizeof(virtio_snd_query_info)); kfree((void*)resp, sizeof(virtio_snd_info_hdr)); diff --git a/modules/disk/virt/disk.c b/modules/disk/virt/disk.c index 95dff4cc..cfbeaad2 100644 --- a/modules/disk/virt/disk.c +++ b/modules/disk/virt/disk.c @@ -71,8 +71,9 @@ void disk_write(const void *buffer, uint32_t sector, uint32_t count){ req->reserved = 0; req->sector = sector; - virtio_send_3d(&blk_dev, (uintptr_t)disk_cmd, sizeof(virtio_blk_req), (uintptr_t)data, count * 512, 0); - + uint8_t status = 0; + virtio_buf b[3] = {VBUF(disk_cmd, sizeof(virtio_blk_req), 0), VBUF(data, count * 512, 0), VBUF(&status, 1, VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&blk_dev, b, 3); kfree((void *)data,count * 512); } @@ -84,7 +85,9 @@ void disk_read(void *buffer, uint32_t sector, uint32_t count){ req->reserved = 0; req->sector = sector; - virtio_send_3d(&blk_dev, VIRT_TO_PHYS((uintptr_t)disk_cmd), sizeof(virtio_blk_req), VIRT_TO_PHYS((uintptr_t)buffer), count * 512, VIRTQ_DESC_F_WRITE); + uint8_t status = 0; + virtio_buf b[3] = {VBUF(disk_cmd, sizeof(virtio_blk_req), 0), VBUF(buffer, count * 512, VIRTQ_DESC_F_WRITE), VBUF(&status, 1, VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&blk_dev, b, 3); } system_module disk_module = (system_module){ diff --git a/modules/graph/virt/virtio_gpu_pci.cpp b/modules/graph/virt/virtio_gpu_pci.cpp index efefafc6..d58c815f 100644 --- a/modules/graph/virt/virtio_gpu_pci.cpp +++ b/modules/graph/virt/virtio_gpu_pci.cpp @@ -147,7 +147,8 @@ gpu_size VirtioGPUDriver::get_display_info(){ scanout_found = false; - if (!virtio_send_3d(&gpu_dev, (uintptr_t)cmd, sizeof(virtio_gpu_ctrl_hdr), (uintptr_t)resp, sizeof(virtio_gpu_resp_display_info), VIRTQ_DESC_F_WRITE)){ + virtio_buf b[2] = {VBUF(cmd, sizeof(virtio_gpu_ctrl_hdr), 0), VBUF(resp, sizeof(virtio_gpu_resp_display_info), VIRTQ_DESC_F_WRITE)}; + if(!virtio_send_nd(&gpu_dev, b, 2)){ kfree(cmd, sizeof(virtio_gpu_ctrl_hdr)); kfree(resp, sizeof(virtio_gpu_resp_display_info)); return (gpu_size){0, 0}; @@ -193,7 +194,8 @@ bool VirtioGPUDriver::create_2d_resource(uint32_t resource_id, gpu_size size) { virtio_gpu_ctrl_hdr* resp = (virtio_gpu_ctrl_hdr*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_ctrl_hdr), ALIGN_4KB, MEM_PRIV_KERNEL); - if (!virtio_send_3d(&gpu_dev, (uintptr_t)cmd, sizeof(virtio_2d_resource), (uintptr_t)resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)){ + virtio_buf b[2] = {VBUF(cmd, sizeof(virtio_2d_resource), 0), VBUF(resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)}; + if(!virtio_send_nd(&gpu_dev, b, 2)){ kfree((void*)cmd, sizeof(virtio_2d_resource)); kfree((void*)resp, sizeof(virtio_gpu_ctrl_hdr)); return false; @@ -242,7 +244,8 @@ bool VirtioGPUDriver::attach_backing(uint32_t resource_id, sizedptr ptr) { virtio_gpu_ctrl_hdr* resp = (virtio_gpu_ctrl_hdr*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_ctrl_hdr), ALIGN_4KB, MEM_PRIV_KERNEL); - if (!virtio_send_2d(&gpu_dev, (uintptr_t)cmd, sizeof(*cmd), (uintptr_t)resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_NEXT)){ + virtio_buf b[2] = {VBUF(cmd, sizeof(*cmd), 0), VBUF(resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)}; + if (!virtio_send_nd(&gpu_dev, b, 2)){ kfree((void*)cmd, sizeof(*cmd)); kfree((void*)resp, sizeof(virtio_gpu_ctrl_hdr)); return false; @@ -288,7 +291,8 @@ bool VirtioGPUDriver::set_scanout() { virtio_gpu_ctrl_hdr* resp = (virtio_gpu_ctrl_hdr*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_ctrl_hdr), ALIGN_4KB, MEM_PRIV_KERNEL); - if (!virtio_send_3d(&gpu_dev, (uintptr_t)cmd, sizeof(*cmd), (uintptr_t)resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)){ + virtio_buf b[2] = {VBUF(cmd, sizeof(*cmd), 0), VBUF(resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)}; + if (!virtio_send_nd(&gpu_dev, b, 2)){ kfree((void*)cmd, sizeof(virtio_scanout_cmd)); kfree((void*)resp, sizeof(virtio_gpu_ctrl_hdr)); return false; @@ -326,7 +330,8 @@ bool VirtioGPUDriver::transfer_to_host(uint32_t resource_id, gpu_rect rect) { if (!trans_resp) trans_resp = (virtio_gpu_ctrl_hdr*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_ctrl_hdr), ALIGN_4KB, MEM_PRIV_KERNEL); - return virtio_send_3d(&gpu_dev,(uintptr_t)trans_cmd, sizeof(virtio_transfer_cmd), (uintptr_t)trans_resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE); + virtio_buf b[2] = {VBUF(trans_cmd, sizeof(virtio_transfer_cmd), 0), VBUF(trans_resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)}; + return virtio_send_nd(&gpu_dev, b, 2); } void VirtioGPUDriver::flush() { @@ -363,7 +368,8 @@ void VirtioGPUDriver::flush() { if (!flush_resp) flush_resp = (virtio_gpu_ctrl_hdr*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_ctrl_hdr), ALIGN_4KB, MEM_PRIV_KERNEL); - virtio_send_3d(&gpu_dev, (uintptr_t)flush_cmd, sizeof(virtio_flush_cmd), (uintptr_t)flush_resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE); + virtio_buf b[2] = {VBUF(flush_cmd, sizeof(virtio_flush_cmd), 0), VBUF(flush_resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&gpu_dev, b, 2); } struct virtio_gpu_get_capset_info { @@ -391,7 +397,8 @@ void VirtioGPUDriver::get_capset(uint32_t capset){ virtio_gpu_resp_capset_info* resp = (virtio_gpu_resp_capset_info*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_resp_capset_info), ALIGN_4KB, MEM_PRIV_KERNEL); - if (!virtio_send_3d(&gpu_dev, (uintptr_t)cmd, sizeof(virtio_gpu_get_capset_info), (uintptr_t)resp, sizeof(virtio_gpu_resp_capset_info), VIRTQ_DESC_F_WRITE)){ + virtio_buf b[2] = {VBUF(cmd, sizeof(virtio_gpu_get_capset_info), 0), VBUF(resp, sizeof(virtio_gpu_resp_capset_info), VIRTQ_DESC_F_WRITE)}; + if (!virtio_send_nd(&gpu_dev, b, 2)){ kprintf("Could not send command"); kfree((void*)cmd, sizeof(virtio_gpu_get_capset_info)); kfree((void*)resp, sizeof(virtio_gpu_resp_capset_info)); @@ -489,7 +496,8 @@ void VirtioGPUDriver::update_cursor(uint32_t x, uint32_t y, bool full) if (!cursor_resp) cursor_resp = (virtio_gpu_ctrl_hdr*)kalloc(gpu_dev.memory_page, sizeof(virtio_gpu_ctrl_hdr), ALIGN_4KB, MEM_PRIV_KERNEL); - virtio_send_3d(&gpu_dev, (uintptr_t)cursor_cmd, sizeof(virtio_gpu_update_cursor), (uintptr_t)cursor_resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE); + virtio_buf b[2] = {VBUF(cursor_cmd, sizeof(virtio_gpu_update_cursor), 0), VBUF(cursor_resp, sizeof(virtio_gpu_ctrl_hdr), VIRTQ_DESC_F_WRITE)}; + virtio_send_nd(&gpu_dev, b, 2); select_queue(&gpu_dev, CONTROL_QUEUE); } diff --git a/modules/serial/raspi/uart.c b/modules/serial/raspi/uart.c index 37f0c4f3..d4d0e045 100644 --- a/modules/serial/raspi/uart.c +++ b/modules/serial/raspi/uart.c @@ -28,7 +28,6 @@ volatile uint32_t uart_mbox[9] __attribute__((aligned(16))) = { void prepare_uart_hw() { if (RPI_BOARD != 5){ - reset_gpio(); enable_gpio_pin(14); enable_gpio_pin(15); } diff --git a/run_virt b/run_virt index 6e9492f1..42771fc2 100755 --- a/run_virt +++ b/run_virt @@ -39,6 +39,7 @@ if [ "$OS_TYPE" = "Darwin" ]; then AUDIO_BACKEND="coreaudio" GL="off" elif [ "$OS_TYPE" = "Linux" ]; then + fuser -k -KILL ./disk.img NETDEV="-netdev user,id=net0" PRIVILEGE="" DISPLAY_MODE="sdl" @@ -55,6 +56,7 @@ if [ -d /sys/class/net/tap0 ] && [ -d /sys/class/net/br0 ]; then #tap bridge NETDEV="-netdev tap,id=net0,ifname=tap0,script=no,downscript=no,vnet_hdr=off" PRIVILEGE="" + sudo lsof /dev/net/tun >/dev/null 2>&1 && sudo fuser -k /dev/net/tun fi NETDRIV="-device virtio-net-pci,netdev=net0,mac=52:54:00:12:34:56,speed=10000,duplex=full" diff --git a/shared/data/scanner/scanner.h b/shared/data/scanner/scanner.h index 2335df9e..b9824a2a 100644 --- a/shared/data/scanner/scanner.h +++ b/shared/data/scanner/scanner.h @@ -23,4 +23,4 @@ char scan_next(Scanner *s); bool scan_match(Scanner *s, char c); bool scan_match_string(Scanner *s, const char *str); -void scan_skip_ws(Scanner *s, bool skip_nl); \ No newline at end of file +void scan_skip_ws(Scanner *s, bool skip_nl); diff --git a/shared/data/tokenizer/tokenizer.h b/shared/data/tokenizer/tokenizer.h index 54e29f03..faed6310 100644 --- a/shared/data/tokenizer/tokenizer.h +++ b/shared/data/tokenizer/tokenizer.h @@ -80,4 +80,4 @@ static inline string_slice token_to_slice(Token t){ return (string_slice){ .data = (char*)t.start, .length = t.length }; } -char* token_name(TokenKind kind); \ No newline at end of file +char* token_name(TokenKind kind); diff --git a/shared/data_struct/indexmap.hpp b/shared/data_struct/indexmap.hpp index e4db63ea..822e876a 100644 --- a/shared/data_struct/indexmap.hpp +++ b/shared/data_struct/indexmap.hpp @@ -67,4 +67,4 @@ class IndexMap { uint32_t count; uint32_t capacity; }; -//TEST: when assigning an indexmap (like in xhci's endpoint_map), it gets copied. Make sure the old one gets freed \ No newline at end of file +//TEST: when assigning an indexmap (like in xhci's endpoint_map), it gets copied. Make sure the old one gets freed diff --git a/shared/data_struct/ring_buffer.hpp b/shared/data_struct/ring_buffer.hpp index f5af1488..d194a608 100644 --- a/shared/data_struct/ring_buffer.hpp +++ b/shared/data_struct/ring_buffer.hpp @@ -1,5 +1,6 @@ #pragma once #include "types.h" +#include "std/memory.h" //TODO: review allocs & C template @@ -13,6 +14,55 @@ class RingBuffer { public: RingBuffer() : head(0), tail(0), full(0) {} + uint64_t push_buf(const T* src, uint64_t n) { + if (!src || !n) return 0; + uint64_t used = size(); + if (used >= Capacity) return 0; + uint64_t avail = Capacity - used; + uint64_t to = (n < avail) ? n : avail; + uint64_t cont = 0; + + if (!full && head < tail) cont = tail - head; + else cont = Capacity - head; + + uint64_t first = (to < cont) ? to : cont; + memcpy(data + head, src, first * sizeof(T)); + head = (head + first) % Capacity; + + uint64_t rem = to - first; + if (rem) { + memcpy(data + head, src + first, rem * sizeof(T)); + head = (head + rem) % Capacity; + } + + full = (head == tail); + return to; + } + + uint64_t pop_buf(T* dst, uint64_t n) { + if (!dst || !n) return 0; + uint64_t used = size(); + if (!used) return 0; + uint64_t to = (n < used) ? n : used; + uint64_t cont = 0; + + if (full || tail >= head) cont = Capacity - tail; + else cont = head - tail; + + uint64_t first = (to < cont) ? to : cont; + memcpy(dst, data + tail, first * sizeof(T)); + tail = (tail + first) % Capacity; + full = 0; + + uint64_t rem = to - first; + if (rem) { + memcpy(dst + first, data + tail, rem * sizeof(T)); + tail = (tail + rem) % Capacity; + } + + return to; + } + int32_t push(const T& item) { if (full) return 0; data[head] = item; diff --git a/shared/math/math.h b/shared/math/math.h index cc41f15e..78a089ae 100644 --- a/shared/math/math.h +++ b/shared/math/math.h @@ -78,6 +78,33 @@ static inline double floor(double val){ return (uint64_t)val; } +static inline int64_t abs_i64(int64_t v){ + return v < 0 ? -v : v; +} + +static inline int64_t clamp_i64(int64_t v, int64_t lo, int64_t hi){ + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} + +static uint64_t sqrt_u64(uint64_t x){ + uint64_t op = x; + uint64_t res = 0; + uint64_t one = 1ULL << 62; + while (one > op) one >>= 2; + while (one != 0) { + if (op >= res + one) { + op -= res + one; + res = (res >> 1) + one; + } else { + res >>= 1; + } + one >>= 2; + } + return res; +} + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/shared/net/checksums.c b/shared/net/checksums.c index 6eb0dfa0..e9bcdbcc 100644 --- a/shared/net/checksums.c +++ b/shared/net/checksums.c @@ -33,3 +33,21 @@ uint16_t checksum16_pipv4(uint32_t src_ip, return (uint16_t)~sum; } + +uint16_t checksum16_pipv6(const uint8_t src_ip[16], + const uint8_t dst_ip[16], + uint8_t protocol, + const uint8_t *payload, + uint32_t length) +{ + uint32_t sum = 0; + for (int i=0;i<16;i+=2) sum += (uint32_t)((src_ip[i] << 8) | src_ip[i+1]); + for (int i=0;i<16;i+=2) sum += (uint32_t)((dst_ip[i] << 8) | dst_ip[i+1]); + sum += (length >> 16) & 0xFFFF; + sum += length & 0xFFFF; + sum += protocol; + for (uint32_t i = 0; i + 1 < length; i += 2) sum += (uint32_t)((payload[i] << 8) | payload[i + 1]); + if (length & 1) sum += (uint32_t)(payload[length - 1] << 8); + while (sum >> 16) sum = (sum & 0xFFFF) + (sum >> 16); + return (uint16_t)~sum; +} \ No newline at end of file diff --git a/shared/net/checksums.h b/shared/net/checksums.h index 0e0b777e..47f3a834 100644 --- a/shared/net/checksums.h +++ b/shared/net/checksums.h @@ -12,6 +12,12 @@ uint16_t checksum16_pipv4(uint32_t src_ip, const uint8_t *payload, uint16_t length); +uint16_t checksum16_pipv6(const uint8_t src_ip[16], + const uint8_t dst_ip[16], + uint8_t protocol, + const uint8_t *payload, + uint32_t length); + #ifdef __cplusplus } #endif diff --git a/shared/net/net.h b/shared/net/net.h deleted file mode 100644 index 63bb0a4c..00000000 --- a/shared/net/net.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#define FORMAT_IP(ipv4) ((ipv4 >> 24) & 0xFF), ((ipv4 >> 16) & 0xFF), ((ipv4 >> 8) & 0xFF), ((ipv4) & 0xFF) -#define IP_ENCODE(ip1,ip2,ip3,ip4) (((ipv4 << 24) & 0xFF) | ((ipv4 << 16) & 0xFF) | ((ipv4 << 8) & 0xFF) | ((ipv4) & 0xFF)) \ No newline at end of file diff --git a/shared/net/network_types.h b/shared/net/network_types.h index a930b660..52b8eaf0 100644 --- a/shared/net/network_types.h +++ b/shared/net/network_types.h @@ -33,6 +33,17 @@ typedef struct SocketHandle { protocol_t protocol; } SocketHandle; +typedef enum { + IP_TX_AUTO = 0, + IP_TX_BOUND_L2 = 1, + IP_TX_BOUND_L3 = 2 +} ip_tx_scope_t; + +typedef struct { + uint8_t index; + ip_tx_scope_t scope; +} ip_tx_opts_t; + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/shared/net/socket_types.h b/shared/net/socket_types.h index f16c46c1..06a249ff 100644 --- a/shared/net/socket_types.h +++ b/shared/net/socket_types.h @@ -18,6 +18,31 @@ typedef enum { DST_DOMAIN = 1 } SockDstKind; +typedef enum { + SOCK_OPT_DEBUG = 1u << 0, + SOCK_OPT_KEEPALIVE = 1u << 1, + SOCK_OPT_BUF_SIZE = 1u << 2, + SOCK_OPT_DONTFRAG = 1u << 3, + SOCK_OPT_TTL = 1u << 4, + SOCK_OPT_MCAST_JOIN = 1u << 5 +} SockOptFlags; + +typedef enum { + SOCK_DBG_LOW = 0, + SOCK_DBG_MEDIUM = 1, + SOCK_DBG_ALL = 2 +} SockDebugLevel; + +typedef struct SocketExtraOptions { + uint32_t flags; + SockDebugLevel debug_level; + uint32_t buf_size; + uint32_t keepalive_ms; + uint8_t ttl; + ip_version_t mcast_ver; + uint8_t mcast_group[16]; +} SocketExtraOptions; + typedef struct SockBindSpec{ SockBindKind kind; ip_version_t ver; diff --git a/shared/std/memory.c b/shared/std/memory.c index c12b59bd..7eef7dae 100644 --- a/shared/std/memory.c +++ b/shared/std/memory.c @@ -263,3 +263,20 @@ void* memmove(void *dest, const void *src, size_t count) { return dest; } } + +void* memmem(const void* haystack, size_t haystack_len, const void* needle, size_t needle_len) { + if(!haystack || !needle) return 0; + if(!needle_len) return (void*)haystack; + if(haystack_len < needle_len) return 0; + + const unsigned char* h = (const unsigned char*)haystack; + const unsigned char* n = (const unsigned char*)needle; + + size_t last = haystack_len - needle_len; + for(size_t i = 0; i <= last; i++) { + if(h[i] != n[0]) continue; + if(!memcmp(h + i, n, needle_len)) return (void*)(h+i); + } + + return 0; +} diff --git a/shared/std/memory.h b/shared/std/memory.h index 9b154e4d..1fa658a3 100644 --- a/shared/std/memory.h +++ b/shared/std/memory.h @@ -10,6 +10,7 @@ void* memset32(void* dest, uint32_t val, size_t count); void* memcpy(void *dest, const void *src, size_t count); void memreverse(void *ptr, size_t n); void* memmove(void *dest, const void *src, size_t count); +void* memmem(const void* haystack, size_t haystack_len, const void* needle, size_t needle_len); #ifdef __cplusplus } diff --git a/shared/std/string.c b/shared/std/string.c index 82cc36b8..d3c9a3f5 100644 --- a/shared/std/string.c +++ b/shared/std/string.c @@ -1164,7 +1164,7 @@ string string_from_const(const char *lit) { uint32_t len = strlen(lit); char* nlit = malloc(len+1); - strncpy(nlit, lit, len); + strncpy(nlit, lit, len+1); return (string){ nlit, len, len + 1}; } @@ -1233,13 +1233,38 @@ bool parse_uint32_dec(const char *s, uint32_t *out) { return true; } + +char* strcasestr(const char* haystack, const char* needle) { + if (!haystack) return 0; + if (!needle) return (char*)haystack; + if (!*needle) return (char*)haystack; + + for (const char* h = haystack; *h; h++) { + const char* hp = h; + const char* np = needle; + + while (*hp && *np) { + char a = tolower(*hp); + char b = tolower(*np); + if (a != b) break; + hp++; + np++; + } + + if (!*np) return (char*)h; + } + + return 0; +} + void strcat_buf(const char *a, const char *b, char *dest){ while (*a) *dest++ = *a++; while (*b) *dest++ = *b++; + *dest = 0; } char* strcat_new(const char *a, const char *b){ - char* dest = (char*)malloc(strlen(a) + strlen(b)); + char* dest = (char*)malloc(strlen(a) + strlen(b) + 1); strcat_buf(a,b,dest); return dest; } diff --git a/shared/std/string.h b/shared/std/string.h index fdf5a52b..949fbc0c 100644 --- a/shared/std/string.h +++ b/shared/std/string.h @@ -19,9 +19,6 @@ typedef struct string_list { uint32_t count; char array[]; } string_list; - -extern void free_sized(void*,size_t); - static inline bool is_alpha(char c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } @@ -34,6 +31,13 @@ static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); } +static inline int32_t str_has_char(const char* s, uint32_t n, char c){ + for (uint32_t i = 0; i < n && s[i] != '\0'; i++) if (s[i] == c) return i; + return -1; +} + +extern void free_sized(void*,size_t); + static inline int hex_val(char c) { if (is_digit(c)) return c - '0'; if (c >= 'a' && c <= 'f') return 10 + (c - 'a'); @@ -91,6 +95,7 @@ void string_append_bytes(string *dest, const void *buf, uint32_t len); const char* seek_to(const char *string, char character); char* strncpy(char* dst, const char* src, size_t cap); bool parse_uint32_dec(const char *s, uint32_t *out); +char* strcasestr(const char* haystack, const char* needle); string string_replace_character(char* original, char symbol, char *value); diff --git a/shared/syscalls/syscalls.h b/shared/syscalls/syscalls.h index 1c2a1cd5..7584d088 100644 --- a/shared/syscalls/syscalls.h +++ b/shared/syscalls/syscalls.h @@ -40,7 +40,7 @@ extern uint32_t gpu_char_size(uint32_t scale); extern uint64_t get_time(); -extern bool socket_create(Socket_Role role, protocol_t protocol, SocketHandle *out_handle); +extern bool socket_create(Socket_Role role, protocol_t protocol, const SocketExtraOptions* extra, SocketHandle *out_handle); extern int32_t socket_bind(SocketHandle *handle, ip_version_t ip_version, uint16_t port); extern int32_t socket_connect(SocketHandle *handle, SockDstKind dst_kind, void* dst, uint16_t port); extern int32_t socket_listen(SocketHandle *handle); diff --git a/user/default_process.c b/user/default_process.c index fbe712c4..2f8e9495 100644 --- a/user/default_process.c +++ b/user/default_process.c @@ -48,7 +48,7 @@ int img_example() { int net_example() { SocketHandle spec = {}; - socket_create(SOCKET_SERVER, PROTO_UDP, &spec); + socket_create(SOCKET_SERVER, PROTO_UDP, NULL, &spec); printf("Created socket for type %i",spec.protocol); //Fill in manually with your local IP. A syscall will be added soon to get it for you spec.connection.ip[0] = 0; diff --git a/utils/terminal/terminal.cpp b/utils/terminal/terminal.cpp index d5dbbf9e..df4abf14 100644 --- a/utils/terminal/terminal.cpp +++ b/utils/terminal/terminal.cpp @@ -9,18 +9,143 @@ Terminal::Terminal() : Console() { bg_color = color_buf[0]; default_text_color = color_buf[1]; text_color = color_buf[1]; + char_scale = 2; - put_string("> "); prompt_length = 2; - draw_cursor(); - flush(dctx); + command_running = false; + + input_len = 0; + input_cursor = 0; + input_buf[0] = 0; + + history_len = 0; + history_index = 0; + for (uint32_t i = 0; i < history_max; i++) history[i] = nullptr; + + last_blink_ms = get_time(); + cursor_visible = true; + + dirty = false; + + put_string("> "); + redraw_input_line(); + if (dirty) { + flush(dctx); + dirty = false; + } } void Terminal::update(){ - if (!command_running) handle_input(); - else { + if (!command_running) { + bool did = handle_input(); + if (!did) cursor_tick(); + } else { end_command(); } + + if (dirty) { + flush(dctx); + dirty = false; + } +} + +void Terminal::cursor_set_visible(bool visible){ + if (visible == cursor_visible) { + if (!visible) return; + if (last_drawn_cursor_x == (int32_t)cursor_x && last_drawn_cursor_y == (int32_t)cursor_y) return; + } + + uint32_t cw = (uint32_t)char_scale * CHAR_SIZE; + uint32_t lh = (uint32_t)char_scale * CHAR_SIZE * 2; + cursor_visible = visible; + + if (last_drawn_cursor_x >= 0 && last_drawn_cursor_y >= 0) { + if ((uint32_t)last_drawn_cursor_x < columns && (uint32_t)last_drawn_cursor_y < rows) { + fb_fill_rect(dctx, + (uint32_t)last_drawn_cursor_x * cw, + (uint32_t)last_drawn_cursor_y * lh, + cw, lh, bg_color + ); + + char *prev_line = row_data + (((scroll_row_offset + (uint32_t)last_drawn_cursor_y) % rows) * columns); + char ch = prev_line[last_drawn_cursor_x]; + if (ch) { + uint32_t py = ((uint32_t)last_drawn_cursor_y * lh) + (lh / 2); + fb_draw_char(dctx, (uint32_t)last_drawn_cursor_x * cw, py, ch, char_scale, text_color); + } + } + last_drawn_cursor_x = -1; + last_drawn_cursor_y = -1; + } + + if (cursor_visible) { + fb_fill_rect(dctx, cursor_x * cw, cursor_y * lh, cw, lh, 0xFFFFFFFF); + last_drawn_cursor_x = (int32_t)cursor_x; + last_drawn_cursor_y = (int32_t)cursor_y; + } + + dirty = true; +} + +void Terminal::cursor_tick(){ + uint64_t now = get_time(); + if ((now - last_blink_ms) < 500) return; + last_blink_ms = now; + cursor_set_visible(!cursor_visible); +} + +void Terminal::redraw_input_line(){ + if (!check_ready()) return; + + uint32_t cw = (uint32_t)char_scale * CHAR_SIZE; + uint32_t lh = (uint32_t)char_scale * CHAR_SIZE * 2; + + fb_fill_rect(dctx, 0, cursor_y * lh, columns * cw, lh, bg_color); + + char* line = row_data + (((scroll_row_offset + cursor_y) % rows) * columns); + memset(line, 0, columns); + + if (columns == 0) return; + if (prompt_length >= (int)columns) return; + + line[0] = '>'; + line[1] = ' '; + + uint32_t max_input = columns - (uint32_t)prompt_length - 1; + uint32_t draw_len = input_len; + if (draw_len > max_input) draw_len = max_input; + + for (uint32_t i = 0; i < draw_len; i++) line[prompt_length + i] = input_buf[i]; + line[prompt_length + draw_len] = 0; + + uint32_t ypix = (cursor_y * lh) + (lh / 2); + fb_draw_char(dctx, 0, ypix, '>', char_scale, text_color); + fb_draw_char(dctx, cw, ypix, ' ', char_scale, text_color); + for (uint32_t i = 0; i < draw_len; i++) fb_draw_char(dctx, (prompt_length + i) * cw, ypix, input_buf[i], char_scale, text_color); + + if (input_cursor > draw_len) input_cursor = draw_len; + cursor_x = (uint32_t)prompt_length + input_cursor; + + last_blink_ms = get_time(); + cursor_set_visible(true); +} + +void Terminal::set_input_line(const char *s){ + input_len = 0; + input_cursor = 0; + + if (s) { + uint32_t i = 0; + while (s[i] && (i + 1) < input_max) { + input_buf[i] = s[i]; + i++; + } + input_len = i; + } + + input_buf[input_len] = 0; + input_cursor = input_len; + redraw_input_line(); } void Terminal::end_command(){ @@ -29,37 +154,53 @@ void Terminal::end_command(){ put_char('\n'); put_string("> "); prompt_length = 2; - draw_cursor(); - flush(dctx); + + set_input_line(""); set_text_color(default_text_color); } bool Terminal::exec_cmd(const char *cmd, int argc, const char *argv[]){ uint16_t proc = exec(cmd, argc, argv); if (!proc) return false; - string s1 = string_format("/proc/%i/out",proc); - string s2 = string_format("/proc/%i/state",proc); + + string s1 = string_format("/proc/%i/out", proc); + string s2 = string_format("/proc/%i/state", proc); + file out_fd, state_fd; openf(s1.data, &out_fd); free_sized(s1.data, s1.mem_length); openf(s2.data, &state_fd); free_sized(s2.data, s2.mem_length); + int state = 1; size_t amount = 0x100; - char *buf = (char*)malloc(amount); + char *buf = (char*)malloc(amount + 1); + if (!buf) { + closef(&out_fd); + closef(&state_fd); + return true; + } + do { size_t n = readf(&out_fd, buf, amount); - put_string(buf); - memset(buf,0,n); + buf[n] = 0; + if (n) put_string(buf); + readf(&state_fd, (char*)&state, sizeof(int)); } while (state); - readf(&out_fd, buf, amount); - put_string(buf); - free_sized(buf, amount); + + for (;;) { + size_t n = readf(&out_fd, buf, amount); + if (!n) break; + buf[n] = 0; + put_string(buf); + } + + free_sized(buf, amount + 1); closef(&out_fd); closef(&state_fd); - string exit_msg = string_format("\nProcess %i ended.",proc); - //TODO: format message + + string exit_msg = string_format("\nProcess %i ended.", proc); put_string(exit_msg.data); free_sized(exit_msg.data, exit_msg.mem_length); return true; @@ -69,12 +210,15 @@ const char** Terminal::parse_arguments(char *args, int *count){ *count = 0; const char **argv = (const char**)malloc(16 * sizeof(uintptr_t)); char* p = args; + while (*p && *count < 16){ while (*p == ' ' || *p == '\t') p++; if (!*p) break; + char* start = p; while (*p && *p != ' ' && *p != '\t') p++; if (*p) { *p = '\0'; p++; } + argv[*count] = start; (*count)++; } @@ -82,18 +226,30 @@ const char** Terminal::parse_arguments(char *args, int *count){ } void Terminal::run_command(){ - const char* fullcmd = get_current_line(); - if (fullcmd[0] == '>' && fullcmd[1] == ' ') { - fullcmd += 2; + if (input_len) { + if (history_len == history_max) { + if (history[0]) free_sized(history[0], strlen(history[0]) + 1); + for (uint32_t i = 1; i < history_max; i++) history[i - 1] = history[i]; + history_len = history_max - 1; + } + + uint32_t n = input_len; + char *copy = (char*)malloc(n + 1); + if (copy) { + memcpy(copy, input_buf, n); + copy[n] = 0; + history[history_len++] = copy; + } } + history_index = history_len; + + const char* fullcmd = input_buf; while (*fullcmd == ' ' || *fullcmd == '\t') fullcmd++; - if (*fullcmd == '\0') { - put_char('\r'); - put_char('\n'); - put_string("> "); - prompt_length = 2; - draw_cursor(); - flush(dctx); + + put_char('\r'); + put_char('\n'); + + if (*fullcmd == 0) { command_running = true; return; } @@ -111,52 +267,114 @@ void Terminal::run_command(){ } else { size_t cmd_len = (size_t)(args - fullcmd); cmd = string_from_literal_length(fullcmd, cmd_len); + const char* argstart = args; while (*argstart == ' ' || *argstart == '\t') argstart++; + args_copy = string_from_literal(argstart); argv = parse_arguments(args_copy.data, &argc); } - put_char('\r'); - put_char('\n'); - if (!exec_cmd(cmd.data, argc, argv)){ - if (strcmp_case(cmd.data, "exit",true) == 0){ + if (strcmp_case(cmd.data, "exit", true) == 0){ halt(0); } else { - string s = string_format("Unknown command %s with args %s", cmd.data, args); + string s = string_format("Unknown command %s", cmd.data); put_string(s.data); free_sized(s.data, s.mem_length); } } + if (argv) free_sized((void*)argv, 16 * sizeof(uintptr_t)); free_sized(cmd.data, cmd.mem_length); if (args_copy.mem_length) free_sized(args_copy.data, args_copy.mem_length); - draw_cursor(); - flush(dctx); command_running = true; } -void Terminal::handle_input(){ +bool Terminal::handle_input(){ kbd_event event; - if (read_event(&event)){ - if (event.type == KEY_PRESS){ - char key = event.key; - char readable = hid_to_char((uint8_t)key); - if (key == KEY_ENTER || key == KEY_KPENTER){ - run_command(); - } else if (readable){ - put_char(readable); - draw_cursor(); - flush(dctx); - } else if (key == KEY_BACKSPACE){ - if (strlen_max(get_current_line(), 1024) > (uint32_t)prompt_length) { - delete_last_char(); - } + if (!read_event(&event)) return false; + if (event.type == KEY_RELEASE) return true; + if (event.type != KEY_PRESS) return false; + + char key = event.key; + char readable = hid_to_char((uint8_t)key); + + if (key == KEY_ENTER || key == KEY_KPENTER){ + run_command(); + return true; + } + + if (key == KEY_LEFT) { + if (input_cursor) input_cursor--; + cursor_x = (uint32_t)prompt_length + input_cursor; + last_blink_ms = get_time(); + cursor_set_visible(true); + return true; + } + + if (key == KEY_RIGHT) { + if (input_cursor < input_len) input_cursor++; + cursor_x = (uint32_t)prompt_length + input_cursor; + last_blink_ms = get_time(); + cursor_set_visible(true); + return true; + } + + if (key == KEY_UP) { + if (history_len && history_index) { + history_index--; + set_input_line(history[history_index]); + } + return true; + } + + if (key == KEY_DOWN) { + if (history_len) { + if (history_index + 1 < history_len) { + history_index++; + set_input_line(history[history_index]); + } else { + history_index = history_len; + set_input_line(""); } } + return true; + } + + if (key == KEY_BACKSPACE){ + if (!input_cursor) return true; + for (uint32_t i = input_cursor; i < input_len; i++) input_buf[i - 1] = input_buf[i]; + input_len--; + input_cursor--; + input_buf[input_len] = 0; + redraw_input_line(); + return true; } + + if (key == KEY_DELETE) { + if (input_cursor >= input_len) return true; + for (uint32_t i = input_cursor + 1; i <= input_len; i++) input_buf[i - 1] = input_buf[i]; + input_len--; + redraw_input_line(); + return true; + } + + if (!readable) return true; + + uint32_t max_visible = 0; + if (columns > (uint32_t)prompt_length + 1) max_visible = columns - (uint32_t)prompt_length - 1; + if (input_len >= input_max - 1) return true; + if (max_visible && input_len >= max_visible) return true; + + for (uint32_t i = input_len; i > input_cursor; i--) input_buf[i] = input_buf[i - 1]; + input_buf[input_cursor] = readable; + input_len++; + input_cursor++; + input_buf[input_len] = 0; + redraw_input_line(); + return true; } draw_ctx* Terminal::get_ctx(){ diff --git a/utils/terminal/terminal.hpp b/utils/terminal/terminal.hpp index 4ae29dd6..265dc6f6 100644 --- a/utils/terminal/terminal.hpp +++ b/utils/terminal/terminal.hpp @@ -7,12 +7,18 @@ class Terminal: public Console { Terminal(); void update(); protected: - void handle_input(); + bool handle_input(); + void repeat_tick(); void end_command(); int prompt_length; void run_command(); const char** parse_arguments(char *args, int *count); + void redraw_input_line(); + void set_input_line(const char *s); + void cursor_tick(); + void cursor_set_visible(bool visible); + bool exec_cmd(const char *cmd, int argc, const char *args[]); draw_ctx* get_ctx() override; @@ -20,4 +26,19 @@ class Terminal: public Console { bool screen_ready() override; bool command_running; -}; + + static constexpr uint32_t input_max = 1024; + char input_buf[input_max]; + uint32_t input_len; + uint32_t input_cursor; + + static constexpr uint32_t history_max = 32; + char *history[history_max]; + uint32_t history_len; + uint32_t history_index; + + uint64_t last_blink_ms; + bool cursor_visible; + + bool dirty; +}; \ No newline at end of file