From b6166c8d5470d594c1385ef54b02db459ad56bcc Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Wed, 23 Jul 2025 17:40:05 +0300 Subject: [PATCH] aarch64: initial pauth support 1. Support PAuth dynamic relocs: - R_AARCH64_AUTH_ABS64; - R_AARCH64_AUTH_RELATIVE (including relr); - R_AARCH64_JUMP_SLOT (sign slot contents if DT_AARCH64_PAC_PLT dynamic tag is present); - R_AARCH64_AUTH_GLOB_DAT; - R_AARCH64_AUTH_TLSDESC. 2. Support signed function pointers in init/fini arrays (with optional address discrimination enabled). 3. Check PAuth core info compatibility for DSOs in the process. TODO: 1. Support function pointer type discrimination. This should not be enabled under normal conditions though, and pauthtest ABI in LLVM intentionally does not include that. 2. Do not store unsigned LR in memory in raw assembly code: see https://github.com/access-softek/musl/issues/3. 3. Support non-null `__ehdr_start`. See in-code comment in `__dls2` function in src/dynlink.c for details. 4. Enhance test coverage: current proof-of-concept is good enough to run llvm-test-suite, but we lack test coverage of some parts of musl. Co-authored-by: Evgeny Leviant Co-authored-by: Anatoly Trosinenko --- arch/aarch64/reloc.h | 32 ++++ crt/aarch64/crti.s | 2 + crt/aarch64/crtn.s | 2 + ldso/dynlink.c | 183 +++++++++++++++++++++- src/internal/dynlink.h | 2 +- src/internal/vdso.c | 11 +- src/ldso/aarch64/reloc.c | 165 +++++++++++++++++++ src/ldso/aarch64/{tlsdesc.s => tlsdesc.S} | 39 +++++ src/signal/sigaction.c | 5 + src/thread/aarch64/{clone.s => clone.S} | 8 + 10 files changed, 443 insertions(+), 6 deletions(-) create mode 100644 src/ldso/aarch64/reloc.c rename src/ldso/aarch64/{tlsdesc.s => tlsdesc.S} (54%) rename src/thread/aarch64/{clone.s => clone.S} (81%) diff --git a/arch/aarch64/reloc.h b/arch/aarch64/reloc.h index b1b68c725..8ab8decfa 100644 --- a/arch/aarch64/reloc.h +++ b/arch/aarch64/reloc.h @@ -18,7 +18,39 @@ #define REL_DTPMOD R_AARCH64_TLS_DTPMOD64 #define REL_DTPOFF R_AARCH64_TLS_DTPREL64 #define REL_TPOFF R_AARCH64_TLS_TPREL64 + +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#if __has_feature(ptrauth_elf_got) +#define R_AARCH64_AUTH_TLSDESC 0x413 +#define REL_TLSDESC R_AARCH64_AUTH_TLSDESC +#else #define REL_TLSDESC R_AARCH64_TLSDESC +#endif #define CRTJMP(pc,sp) __asm__ __volatile__( \ "mov sp,%1 ; br %0" : : "r"(pc), "r"(sp) : "memory" ) + +#if __has_feature(ptrauth_intrinsics) + +#include + +#define TARGET_RELOCATE(dso, type, reladdr, sym, addend, is_phase_2, dyn, error_sym) \ + do_target_reloc(dso, type, reladdr, sym, addend, is_phase_2, dyn, error_sym) +#define DO_TARGET_RELR(dso, dyn) do_pauth_relr(dso, dyn) + +int do_target_reloc(int type, uint64_t* reladdr, uint64_t base, uint64_t symval, + uint64_t addend, int is_phase_2, uint64_t* dyn, uint64_t error_sym); + +void do_pauth_relr(uint64_t base, uint64_t* dyn); + +#define GETFUNCSYM(fp, sym, got) do { \ + hidden void sym(); \ + *(fp) = sym; } while(0) + +#define FPTR_CAST(fty, p) \ + ((fty)__builtin_ptrauth_sign_unauthenticated((void*)(p), 0, 0)) + +#endif diff --git a/crt/aarch64/crti.s b/crt/aarch64/crti.s index 775df0ac0..5c16d50ba 100644 --- a/crt/aarch64/crti.s +++ b/crt/aarch64/crti.s @@ -2,6 +2,7 @@ .global _init .type _init,%function _init: + paciasp stp x29,x30,[sp,-16]! mov x29,sp @@ -9,5 +10,6 @@ _init: .global _fini .type _fini,%function _fini: + paciasp stp x29,x30,[sp,-16]! mov x29,sp diff --git a/crt/aarch64/crtn.s b/crt/aarch64/crtn.s index 73cab6926..4da1882ac 100644 --- a/crt/aarch64/crtn.s +++ b/crt/aarch64/crtn.s @@ -1,7 +1,9 @@ .section .init ldp x29,x30,[sp],#16 + autiasp ret .section .fini ldp x29,x30,[sp],#16 + autiasp ret diff --git a/ldso/dynlink.c b/ldso/dynlink.c index 324aa8591..aef856e07 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -1,5 +1,6 @@ #define _GNU_SOURCE #define SYSCALL_NO_TLS 1 +#include #include #include #include @@ -19,6 +20,9 @@ #include #include #include +#if __has_feature(ptrauth_intrinsics) +#include +#endif #include "pthread_impl.h" #include "fork_impl.h" #include "dynlink.h" @@ -45,6 +49,18 @@ static void (*error)(const char *, ...) = error_noop; #define container_of(p,t,m) ((t*)((char *)(p)-offsetof(t,m))) #define countof(a) ((sizeof (a))/(sizeof (a)[0])) +#ifndef TARGET_RELOCATE +#define TARGET_RELOCATE(...) 0 +#endif + +#ifndef DO_TARGET_RELR +#define DO_TARGET_RELR(...) +#endif + +#ifndef FPTR_CAST +#define FPTR_CAST(fty, p) ((fty)(p)) +#endif + struct debug { int ver; void *head; @@ -111,6 +127,11 @@ struct dso { size_t *got; } *funcdescs; size_t *got; +#ifdef __aarch64__ + /* PAuth core info as defined in PAUTHABIELF64: + * https://github.com/ARM-software/abi-aa/blob/2025Q1/pauthabielf64/pauthabielf64.rst#core-information */ + size_t* pauth; +#endif char buf[]; }; @@ -471,6 +492,9 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri case REL_GOT: case REL_PLT: *reloc_addr = sym_val + addend; + /* If AArch64 PAC is enabled and DT_AARCH64_PAC_PLT is present, sign the contents of R_AARCH64_JUMP_SLOT. + * Otherwise, do nothing. */ + TARGET_RELOCATE(type, reloc_addr, (size_t)base, sym_val, addend, head == &ldso, dso->dynv, (uint64_t)error); break; case REL_USYMBOLIC: memcpy(reloc_addr, &(size_t){sym_val + addend}, sizeof(size_t)); @@ -518,6 +542,15 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri #endif case REL_TLSDESC: if (stride<3) addend = reloc_addr[!TLSDESC_BACKWARDS]; +#ifdef __aarch64__ + /* TODO: Submit implementation of undefined weak TLS symbols support to + * mainline musl when it's implemented for other architectures. + * The patch is work-in-progress. */ + if (sym && sym->st_info>>4 == STB_WEAK && sym->st_shndx == SHN_UNDEF) { + reloc_addr[0] = (size_t)__tlsdesc_undef_weak; + reloc_addr[1] = 0; + } else +#endif if (def.dso->tls_id > static_tls_cnt) { struct td_index *new = malloc(sizeof *new); if (!new) { @@ -533,7 +566,11 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri reloc_addr[0] = (size_t)__tlsdesc_dynamic; reloc_addr[1] = (size_t)new; } else { +#if __has_feature(ptrauth_intrinsics) && !__has_feature(ptrauth_elf_got) + reloc_addr[0] = (size_t)ptrauth_strip(&__tlsdesc_static, 0); +#else reloc_addr[0] = (size_t)__tlsdesc_static; +#endif #ifdef TLS_ABOVE_TP reloc_addr[1] = tls_val + def.dso->tls.offset + TPOFF_K + addend; @@ -549,8 +586,18 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri reloc_addr[0] = reloc_addr[1]; reloc_addr[1] = tmp; } +#if __has_feature(ptrauth_elf_got) + /* FIXME: actually, signing scheme is written in-place in relocation slot, and we should read and use that. + * However, the scheme is known (IA key + addr div for function and DA key + addr div for data). + * So, we just hard-code that. See also: + * https://github.com/ARM-software/abi-aa/blob/2025Q1/pauthabielf64/pauthabielf64.rst#default-signing-schema */ + reloc_addr[0] = (size_t)(ptrauth_auth_and_resign((void*)(reloc_addr[0]), 0, 0, 0, (size_t)(reloc_addr))); + reloc_addr[1] = (size_t)(ptrauth_sign_unauthenticated((void*)(reloc_addr[1]), 2, (size_t)(reloc_addr) + 8)); +#endif break; default: + if (TARGET_RELOCATE(type, reloc_addr, (size_t)base, sym_val, addend, head == &ldso, dso->dynv, (uint64_t)error)) + break; error("Error relocating %s: unsupported relocation type %d", dso->name, type); if (runtime) longjmp(*rtld_fail, 1); @@ -684,6 +731,84 @@ static void unmap_library(struct dso *dso) } } +#ifdef __aarch64__ + +/* See https://github.com/ARM-software/abi-aa/blob/2025Q1/pauthabielf64/pauthabielf64.rst#elf-marking */ +#define GNU_PROPERTY_AARCH64_FEATURE_PAUTH 0xc0000001 + +static uint32_t align8(uint32_t val) { + if (val % 8 == 0) + return val; + return val + 8 - (val % 8); +} + +static void get_pauth_core_info(struct dso *dso) { + for (int i = 0; i < dso->phnum; ++i) { + Phdr *ph = &dso->phdr[i]; + + /* Minimal GNU property section containing PAuth core info has 40 bytes size. */ + if (!(ph->p_type == PT_NOTE && ph->p_memsz >= 40)) + continue; + + uint32_t *note = laddr(dso, ph->p_vaddr); + uint32_t *note_arr_end = (uint32_t*)((uintptr_t)note + ph->p_memsz); + + for (; note != note_arr_end; + /* We can hardcode 8-byte alignment since this code runs only on AArch64. */ + note = (uint32_t*)((uintptr_t)note + 4 + 4 + align8(4 + note[0]) + align8(note[1]))) { + /* Note segment is ill-formed: last note information entry exceeds the right segment boundary. */ + if (note > note_arr_end) return; + + if (!(note[0] == 4 && note[2] == NT_GNU_PROPERTY_TYPE_0 && strncmp((char*)¬e[3], "GNU", 4) == 0)) + continue; + + uint32_t *prop = ¬e[4]; + uint32_t *prop_arr_end = (uint32_t*)((uintptr_t)prop + note[1]); + for (; prop != prop_arr_end; + /* We can hardcode 8-byte alignment since this code runs only on AArch64. */ + prop = (uint32_t*)((uintptr_t)prop + 4 + 4 + align8(prop[1]))) { + /* GNU property array is ill-formed: its last element end exceeds the right array boundary. */ + if (prop > prop_arr_end) return; + + if (prop[0] != GNU_PROPERTY_AARCH64_FEATURE_PAUTH) continue; + + /* PAuth GNU property must have exactly 16 bytes length: + * 8 bytes for platform and 8 bytes for version value. */ + if (prop[1] != 16) return; + + /* We do not expect multiple PAuth GNU properties. */ + if (dso->pauth != 0) return; + + dso->pauth = (size_t*)&prop[2]; + } + } + } +} + +static void print_pauth_core_info(size_t *pauth, const char *name) { + if (pauth == 0) { + dprintf(2, "%s: no PAuth core info\n", name); + return; + } + dprintf(2, "%s: (platform: 0x%" PRIx64 "; version: 0x%" PRIx64 ")\n", name, pauth[0], pauth[1]); +} + +static int check_pauth_core_info_compatibility(size_t *pauth1, const char *name1, size_t *pauth2, const char *name2) { + if (pauth1 == pauth2) + return 1; + + if (pauth1 == 0 || pauth2 == 0 || pauth1[0] != pauth2[0] || pauth1[1] != pauth2[1]) { + dprintf(2, "incompatible PAuth core info between %s and %s\n", name1, name2); + print_pauth_core_info(pauth1, name1); + print_pauth_core_info(pauth2, name2); + return 0; + } + + return 1; +} + +#endif + static void *map_library(int fd, struct dso *dso) { Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)]; @@ -860,6 +985,9 @@ static void *map_library(int fd, struct dso *dso) dso->base = base; dso->dynv = laddr(dso, dyn); if (dso->tls.size) dso->tls.image = laddr(dso, tls_image); +#ifdef __aarch64__ + get_pauth_core_info(dso); +#endif free(allocated_buf); return map; noexec: @@ -1184,6 +1312,11 @@ static struct dso *load_library(const char *name, struct dso *needed_by) close(fd); if (!map) return 0; +#ifdef __aarch64__ + if (!check_pauth_core_info_compatibility(head->pauth, head->name, temp_dso.pauth, name)) + return 0; +#endif + /* Avoid the danger of getting two versions of libc mapped into the * same process when an absolute pathname was used. The symbols * checked are chosen to catch both musl and glibc, and to avoid @@ -1421,6 +1554,9 @@ static void reloc_all(struct dso *p) do_relocs(p, laddr(p, dyn[DT_RELA]), dyn[DT_RELASZ], 3); if (!DL_FDPIC) do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]); + if (p != &ldso) { + DO_TARGET_RELR((uint64_t)p->base, p->dynv); + } if (head != &ldso && p->relro_start != p->relro_end) { long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start), @@ -1464,6 +1600,9 @@ static void kernel_mapped_dso(struct dso *p) p->map = p->base + min_addr; p->map_len = max_addr - min_addr; p->kernel_mapped = 1; +#ifdef __aarch64__ + get_pauth_core_info(p); +#endif } void __libc_exit_fini() @@ -1487,7 +1626,20 @@ void __libc_exit_fini() if (dyn[0] & (1<