From 5801720868e1330fce4a544f2c28824fa39ada00 Mon Sep 17 00:00:00 2001 From: henderkes Date: Fri, 5 Jun 2026 18:42:09 +0700 Subject: [PATCH 1/4] move EG() and CG() to __thread storage --- TSRM/TSRM.c | 60 ++++++++++++++++++++++++++++++++++---- TSRM/TSRM.h | 4 ++- Zend/Zend.m4 | 17 +++++++++++ Zend/zend.c | 13 +++++++++ Zend/zend_globals_macros.h | 14 +++++++-- 5 files changed, 100 insertions(+), 8 deletions(-) diff --git a/TSRM/TSRM.c b/TSRM/TSRM.c index e99993204b6f..d55a70c02ed2 100644 --- a/TSRM/TSRM.c +++ b/TSRM/TSRM.c @@ -42,6 +42,8 @@ typedef struct { ts_allocate_ctor ctor; ts_allocate_dtor dtor; size_t fast_offset; + /* When set, storage comes from __thread memory instead of being allocated by TSRM. */ + void *(*tls_addr)(void); int done; } tsrm_resource_type; @@ -163,14 +165,19 @@ TSRM_API bool tsrm_startup(int expected_threads, int expected_resources, int deb static void ts_free_resources(tsrm_tls_entry *thread_resources) { + bool own_thread = thread_resources->thread_id == tsrm_thread_id(); + /* Need to destroy in reverse order to respect dependencies. */ for (int i = thread_resources->count - 1; i >= 0; i--) { if (!resource_types_table[i].done) { + if (resource_types_table[i].tls_addr && !own_thread) { + continue; + } if (resource_types_table[i].dtor) { resource_types_table[i].dtor(thread_resources->storage[i]); } - if (!resource_types_table[i].fast_offset) { + if (!resource_types_table[i].fast_offset && !resource_types_table[i].tls_addr) { free(thread_resources->storage[i]); } } @@ -256,7 +263,10 @@ static void tsrm_update_active_threads(void) p->storage = (void *) realloc(p->storage, sizeof(void *)*id_count); for (j=p->count; jthread_id == tsrm_thread_id()); + p->storage[j] = resource_types_table[j].tls_addr(); + } else if (resource_types_table[j].fast_offset) { p->storage[j] = (void *) (((char*)p) + resource_types_table[j].fast_offset); } else { p->storage[j] = (void *) malloc(resource_types_table[j].size); @@ -301,6 +311,7 @@ TSRM_API ts_rsrc_id ts_allocate_id(ts_rsrc_id *rsrc_id, size_t size, ts_allocate resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].ctor = ctor; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].dtor = dtor; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].fast_offset = 0; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].tls_addr = NULL; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].done = 0; tsrm_update_active_threads(); @@ -359,6 +370,7 @@ TSRM_API ts_rsrc_id ts_allocate_fast_id(ts_rsrc_id *rsrc_id, size_t *offset, siz resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].ctor = ctor; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].dtor = dtor; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].fast_offset = *offset; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].tls_addr = NULL; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].done = 0; tsrm_update_active_threads(); @@ -368,6 +380,41 @@ TSRM_API ts_rsrc_id ts_allocate_fast_id(ts_rsrc_id *rsrc_id, size_t *offset, siz return *rsrc_id; }/*}}}*/ +/* allocates a resource id whose per-thread storage is a native __thread block */ +TSRM_API ts_rsrc_id ts_allocate_tls_id(ts_rsrc_id *rsrc_id, void *(*tls_addr)(void), size_t size, ts_allocate_ctor ctor, ts_allocate_dtor dtor) +{/*{{{*/ + TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Obtaining a new TLS resource id, %d bytes", size)); + + tsrm_mutex_lock(tsmm_mutex); + + *rsrc_id = TSRM_SHUFFLE_RSRC_ID(id_count++); + + if (resource_types_table_size < id_count) { + tsrm_resource_type *_tmp; + _tmp = (tsrm_resource_type *) realloc(resource_types_table, sizeof(tsrm_resource_type)*id_count); + if (!_tmp) { + TSRM_ERROR((TSRM_ERROR_LEVEL_ERROR, "Unable to allocate storage for resource")); + *rsrc_id = 0; + tsrm_mutex_unlock(tsmm_mutex); + return 0; + } + resource_types_table = _tmp; + resource_types_table_size = id_count; + } + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].size = size; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].ctor = ctor; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].dtor = dtor; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].fast_offset = 0; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].tls_addr = tls_addr; + resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].done = 0; + + tsrm_update_active_threads(); + tsrm_mutex_unlock(tsmm_mutex); + + TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Successfully allocated new TLS resource id %d", *rsrc_id)); + return *rsrc_id; +}/*}}}*/ + static void set_thread_local_storage_resource_to(tsrm_tls_entry *thread_resource) { tsrm_tls_set(thread_resource); @@ -397,7 +444,9 @@ static void allocate_new_resource(tsrm_tls_entry **thread_resources_ptr, THREAD_ if (resource_types_table[i].done) { (*thread_resources_ptr)->storage[i] = NULL; } else { - if (resource_types_table[i].fast_offset) { + if (resource_types_table[i].tls_addr) { + (*thread_resources_ptr)->storage[i] = resource_types_table[i].tls_addr(); + } else if (resource_types_table[i].fast_offset) { (*thread_resources_ptr)->storage[i] = (void *) (((char*)(*thread_resources_ptr)) + resource_types_table[i].fast_offset); } else { (*thread_resources_ptr)->storage[i] = (void *) malloc(resource_types_table[i].size); @@ -485,7 +534,8 @@ TSRM_API void *ts_resource_ex(ts_rsrc_id id, THREAD_T *th_id) /* In case that extensions don't use the pointer passed from the dtor, but incorrectly * use the global pointer, we need to setup the global pointer temporarily here. */ set_thread_local_storage_resource_to(thread_resources); - /* Free up the old resource from the old thread instance */ + /* Dead thread, recycled id: already freed, so just zero it. */ + thread_resources->thread_id = 0; ts_free_resources(thread_resources); free(thread_resources); /* Allocate a new resource at the same point in the linked list, and relink the next pointer */ @@ -559,7 +609,7 @@ void ts_free_id(ts_rsrc_id id) if (resource_types_table[rsrc_id].dtor) { resource_types_table[rsrc_id].dtor(p->storage[rsrc_id]); } - if (!resource_types_table[rsrc_id].fast_offset) { + if (!resource_types_table[rsrc_id].fast_offset && !resource_types_table[rsrc_id].tls_addr) { free(p->storage[rsrc_id]); } } diff --git a/TSRM/TSRM.h b/TSRM/TSRM.h index ea13552c8374..237cb8fc1c4c 100644 --- a/TSRM/TSRM.h +++ b/TSRM/TSRM.h @@ -93,6 +93,8 @@ TSRM_API ts_rsrc_id ts_allocate_id(ts_rsrc_id *rsrc_id, size_t size, ts_allocate /* Fast resource in reserved (pre-allocated) space */ TSRM_API void tsrm_reserve(size_t size); TSRM_API ts_rsrc_id ts_allocate_fast_id(ts_rsrc_id *rsrc_id, size_t *offset, size_t size, ts_allocate_ctor ctor, ts_allocate_dtor dtor); +/* Must be called at startup before any other thread exists. */ +TSRM_API ts_rsrc_id ts_allocate_tls_id(ts_rsrc_id *rsrc_id, void *(*tls_addr)(void), size_t size, ts_allocate_ctor ctor, ts_allocate_dtor dtor); /* fetches the requested resource for the current thread */ TSRM_API void *ts_resource_ex(ts_rsrc_id id, THREAD_T *th_id); @@ -155,7 +157,7 @@ TSRM_API bool tsrm_is_managed_thread(void); #if !__has_attribute(tls_model) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__MUSL__) || defined(__HAIKU__) # define TSRM_TLS_MODEL_ATTR # define TSRM_TLS_MODEL_DEFAULT -#elif __PIC__ +#elif __PIC__ && !defined(__PIE__) # define TSRM_TLS_MODEL_ATTR __attribute__((tls_model("initial-exec"))) # define TSRM_TLS_MODEL_INITIAL_EXEC #else diff --git a/Zend/Zend.m4 b/Zend/Zend.m4 index d0c682e8e659..ed5a59d06672 100644 --- a/Zend/Zend.m4 +++ b/Zend/Zend.m4 @@ -178,6 +178,23 @@ AC_MSG_RESULT([$ZEND_ZTS]) AS_VAR_IF([ZEND_ZTS], [yes], [ AC_DEFINE([ZTS], [1], [Define to 1 if thread safety (ZTS) is enabled.]) AS_VAR_APPEND([CFLAGS], [" -DZTS"]) + + AC_CACHE_CHECK([for __thread support], [php_cv_have_thread_local], [ + AC_LINK_IFELSE([AC_LANG_PROGRAM( + [[static __thread int tls_var;]], + [[tls_var = 1; return tls_var;]])], + [php_cv_have_thread_local=yes], [php_cv_have_thread_local=no]) + ]) + AS_VAR_IF([php_cv_have_thread_local], [yes], [ + AC_DEFINE([ZEND_EG_TLS], [1], + [Define to hold EG()/CG() in a __thread variable under ZTS.]) + AS_VAR_APPEND([CFLAGS], [" -DZEND_EG_TLS"]) + + dnl -mtls-size=12 drops the dead high-bits offset add from TLS access, + dnl valid while the thread-local block stays under 4 KiB. + AX_CHECK_COMPILE_FLAG([-mtls-size=12], + [AS_VAR_APPEND([CFLAGS], [" -mtls-size=12"])]) + ]) ]) AC_MSG_CHECKING([whether to enable Zend debugging]) diff --git a/Zend/zend.c b/Zend/zend.c index f16b1a30dbbc..f36c49247cc2 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -54,6 +54,14 @@ ZEND_API int compiler_globals_id; ZEND_API int executor_globals_id; ZEND_API size_t compiler_globals_offset; ZEND_API size_t executor_globals_offset; +# ifdef ZEND_EG_TLS +ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; +ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; +/* ts_allocate_tls_id takes a callback so each thread resolves its own block. + * A plain &..._tls would capture only the registering thread's address. */ +static void *executor_globals_tls_addr(void) { return &executor_globals_tls; } +static void *compiler_globals_tls_addr(void) { return &compiler_globals_tls; } +# endif static HashTable *global_function_table = NULL; static HashTable *global_class_table = NULL; static HashTable *global_constants_table = NULL; @@ -1019,8 +1027,13 @@ void zend_startup(zend_utility_functions *utility_functions) /* {{{ */ zend_init_rsrc_list_dtors(); #ifdef ZTS +#ifdef ZEND_EG_TLS + ts_allocate_tls_id(&compiler_globals_id, compiler_globals_tls_addr, sizeof(zend_compiler_globals), (ts_allocate_ctor) compiler_globals_ctor, (ts_allocate_dtor) compiler_globals_dtor); + ts_allocate_tls_id(&executor_globals_id, executor_globals_tls_addr, sizeof(zend_executor_globals), (ts_allocate_ctor) executor_globals_ctor, (ts_allocate_dtor) executor_globals_dtor); +#else ts_allocate_fast_id(&compiler_globals_id, &compiler_globals_offset, sizeof(zend_compiler_globals), (ts_allocate_ctor) compiler_globals_ctor, (ts_allocate_dtor) compiler_globals_dtor); ts_allocate_fast_id(&executor_globals_id, &executor_globals_offset, sizeof(zend_executor_globals), (ts_allocate_ctor) executor_globals_ctor, (ts_allocate_dtor) executor_globals_dtor); +#endif ts_allocate_fast_id(&language_scanner_globals_id, &language_scanner_globals_offset, sizeof(zend_php_scanner_globals), (ts_allocate_ctor) php_scanner_globals_ctor, NULL); ts_allocate_fast_id(&ini_scanner_globals_id, &ini_scanner_globals_offset, sizeof(zend_ini_scanner_globals), (ts_allocate_ctor) ini_scanner_globals_ctor, NULL); compiler_globals = ts_resource(compiler_globals_id); diff --git a/Zend/zend_globals_macros.h b/Zend/zend_globals_macros.h index bde10a0989d1..3f8c17cbaebd 100644 --- a/Zend/zend_globals_macros.h +++ b/Zend/zend_globals_macros.h @@ -30,7 +30,12 @@ BEGIN_EXTERN_C() /* Compiler */ #ifdef ZTS -# define CG(v) ZEND_TSRMG_FAST(compiler_globals_offset, zend_compiler_globals *, v) +# ifdef ZEND_EG_TLS +extern ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; +# define CG(v) (compiler_globals_tls.v) +# else +# define CG(v) ZEND_TSRMG_FAST(compiler_globals_offset, zend_compiler_globals *, v) +# endif #else # define CG(v) (compiler_globals.v) extern ZEND_API struct _zend_compiler_globals compiler_globals; @@ -40,7 +45,12 @@ ZEND_API int zendparse(void); /* Executor */ #ifdef ZTS -# define EG(v) ZEND_TSRMG_FAST(executor_globals_offset, zend_executor_globals *, v) +# ifdef ZEND_EG_TLS +extern ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; +# define EG(v) (executor_globals_tls.v) +# else +# define EG(v) ZEND_TSRMG_FAST(executor_globals_offset, zend_executor_globals *, v) +# endif #else # define EG(v) (executor_globals.v) extern ZEND_API zend_executor_globals executor_globals; From 08c631902d0a66bfffa663f321ea7552b97ef8e0 Mon Sep 17 00:00:00 2001 From: henderkes Date: Fri, 5 Jun 2026 18:42:09 +0700 Subject: [PATCH 2/4] fix jit --- ext/opcache/jit/ir/ir_aarch64.dasc | 6 +- ext/opcache/jit/zend_jit_ir.c | 118 ++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 5 deletions(-) diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index fc4bb84f1e05..04d46cf0dfa6 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -5868,8 +5868,12 @@ static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) | ldr Rx(reg), [Rx(reg), #insn->op3] || } ||# else +|| /* op2 == 0 with no index requests the bare thread pointer (used to form +|| * &EG/&CG with an add); a real TLS var never sits at tprel offset 0. */ +|| if (insn->op2 != 0 || insn->op3 != IR_NULL) { ||//??? IR_ASSERT(insn->op2 <= LDR_STR_PIMM64); -| ldr Rx(reg), [Rx(reg), #insn->op2] +| ldr Rx(reg), [Rx(reg), #insn->op2] +|| } ||# endif ||#endif if (IR_REG_SPILLED(ctx->regs[def][0])) { diff --git a/ext/opcache/jit/zend_jit_ir.c b/ext/opcache/jit/zend_jit_ir.c index cf43d3ad840f..d9789682aab8 100644 --- a/ext/opcache/jit/zend_jit_ir.c +++ b/ext/opcache/jit/zend_jit_ir.c @@ -205,17 +205,52 @@ static size_t tsrm_ls_cache_tcb_offset = 0; static size_t tsrm_tls_index = -1; static size_t tsrm_tls_offset = -1; +# ifdef ZEND_EG_TLS +/* When nonzero, &executor_globals_tls/&compiler_globals_tls equal the thread + * pointer plus this offset, so the JIT forms them without a runtime call. */ +static size_t eg_tls_tcb_offset = 0; +static size_t cg_tls_tcb_offset = 0; +/* gottpoff yields the offset from the %fs-based thread pointer that ir_TLS(0) + * loads. */ +# if defined(__ELF__) && defined(__x86_64__) && defined(__GNUC__) && !defined(TSRM_TLS_MODEL_DEFAULT) +# define ZEND_JIT_TLS_TCB_OFFSET(sym) __extension__({ \ + size_t _off; \ + __asm__ ("movq " #sym "@gottpoff(%%rip),%0" : "=r" (_off)); \ + _off; \ + }) +# elif defined(__ELF__) && defined(__aarch64__) && !defined(__APPLE__) && \ + (defined(__GNUC__) || defined(__clang__)) +/* The TLS variable sits at a fixed offset from tpidr_el0 (the thread pointer + * the JIT reads with mrs); compute it once on the main thread. Subtracting the + * thread pointer is model-independent (works for both local- and initial-exec) + * and matches tsrm_get_ls_cache_tcb_offset()'s tprel reasoning. */ +# define ZEND_JIT_TLS_TCB_OFFSET(sym) __extension__({ \ + char *_tp; \ + __asm__ ("mrs %0, tpidr_el0" : "=r" (_tp)); \ + (size_t)((char*)&(sym) - _tp); \ + }) +# else +# define ZEND_JIT_TLS_TCB_OFFSET(sym) ((size_t)0) +# endif +# endif + # define EG_TLS_OFFSET(field) \ (executor_globals_offset + offsetof(zend_executor_globals, field)) # define CG_TLS_OFFSET(field) \ (compiler_globals_offset + offsetof(zend_compiler_globals, field)) -# define jit_EG(_field) \ +# ifdef ZEND_EG_TLS +# define jit_EG(_field) \ + ir_ADD_OFFSET(jit_EG_base(jit), offsetof(zend_executor_globals, _field)) +# define jit_CG(_field) \ + ir_ADD_OFFSET(jit_CG_base(jit), offsetof(zend_compiler_globals, _field)) +# else +# define jit_EG(_field) \ ir_ADD_OFFSET(jit_TLS(jit), EG_TLS_OFFSET(_field)) - -# define jit_CG(_field) \ +# define jit_CG(_field) \ ir_ADD_OFFSET(jit_TLS(jit), CG_TLS_OFFSET(_field)) +# endif #else @@ -299,6 +334,11 @@ typedef struct _zend_jit_ctx { int b; /* current basic block number or -1 */ #ifdef ZTS ir_ref tls; +# ifdef ZEND_EG_TLS + ir_ref tp; /* cached thread pointer for &EG/&CG */ + ir_ref eg_tls; /* cached base of __thread executor_globals_tls */ + ir_ref cg_tls; /* cached base of __thread compiler_globals_tls */ +# endif #endif ir_ref fp; ir_ref poly_func_ref; /* restored from parent trace snapshot */ @@ -494,7 +534,64 @@ static void * ZEND_FASTCALL zend_jit_get_tsrm_ls_cache(void) return _tsrm_ls_cache; } -static ir_ref jit_TLS(zend_jit_ctx *jit) +# ifdef ZEND_EG_TLS +static void * ZEND_FASTCALL zend_jit_get_eg_tls(void) +{ + return &executor_globals_tls; +} +static void * ZEND_FASTCALL zend_jit_get_cg_tls(void) +{ + return &compiler_globals_tls; +} + +/* Walk the control chain back from the current point: reuse the cached ref if we + * reach it (it still dominates here), but bail at a block start or a call, since + * the cached value lives in a caller-saved register that a call would clobber. */ +static ir_ref jit_tls_reuse(zend_jit_ctx *jit, ir_ref cached) +{ + ir_ref ref = jit->ctx.control; + + while (cached) { + if (ref == cached) { + return cached; + } + ir_insn *insn = &jit->ctx.ir_base[ref]; + if (insn->op >= IR_START || insn->op == IR_CALL) { + break; + } + ref = insn->op1; + } + return IR_UNUSED; +} + +/* Thread pointer, cached per basic block, used to form &EG/&CG with an add. */ +static ir_ref jit_TP(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->ctx.control); + if (!jit_tls_reuse(jit, jit->tp)) { + jit->tp = ir_TLS(0, IR_NULL); + } + return jit->tp; +} + +/* Used where the TCB offset is unknown: resolve the base via a cached call. */ +static ir_ref jit_GLOBALS_TLS_call(zend_jit_ctx *jit, ir_ref *cache, const void *fn) +{ + ZEND_ASSERT(jit->ctx.control); + if (!jit_tls_reuse(jit, *cache)) { + *cache = ir_CALL(IR_ADDR, ir_CONST_FC_FUNC(fn)); + } + return *cache; +} +# define jit_EG_base(jit) (eg_tls_tcb_offset \ + ? ir_ADD_OFFSET(jit_TP(jit), eg_tls_tcb_offset) \ + : jit_GLOBALS_TLS_call((jit), &(jit)->eg_tls, zend_jit_get_eg_tls)) +# define jit_CG_base(jit) (cg_tls_tcb_offset \ + ? ir_ADD_OFFSET(jit_TP(jit), cg_tls_tcb_offset) \ + : jit_GLOBALS_TLS_call((jit), &(jit)->cg_tls, zend_jit_get_cg_tls)) +# endif + +static ZEND_ATTRIBUTE_UNUSED ir_ref jit_TLS(zend_jit_ctx *jit) { ZEND_ASSERT(jit->ctx.control); if (jit->tls) { @@ -2821,6 +2918,11 @@ static void zend_jit_init_ctx(zend_jit_ctx *jit, uint32_t flags) jit->b = -1; #ifdef ZTS jit->tls = IR_UNUSED; +# ifdef ZEND_EG_TLS + jit->tp = IR_UNUSED; + jit->eg_tls = IR_UNUSED; + jit->cg_tls = IR_UNUSED; +# endif #endif jit->fp = IR_UNUSED; jit->poly_func_ref = IR_UNUSED; @@ -3215,6 +3317,10 @@ static void zend_jit_setup_disasm(void) REGISTER_DATA(CG(map_ptr_base)); #else /* ZTS */ REGISTER_HELPER(zend_jit_get_tsrm_ls_cache); +# ifdef ZEND_EG_TLS + REGISTER_HELPER(zend_jit_get_eg_tls); + REGISTER_HELPER(zend_jit_get_cg_tls); +# endif #endif #endif } @@ -3434,6 +3540,10 @@ static void zend_jit_setup(bool reattached) zend_accel_error(ACCEL_LOG_INFO, "Could not get _tsrm_ls_cache offsets, will fallback to runtime resolution"); } +# ifdef ZEND_EG_TLS + eg_tls_tcb_offset = ZEND_JIT_TLS_TCB_OFFSET(executor_globals_tls); + cg_tls_tcb_offset = ZEND_JIT_TLS_TCB_OFFSET(compiler_globals_tls); +# endif #endif #if !defined(ZEND_WIN32) && !defined(IR_TARGET_AARCH64) From 7d846b42515a90f74cec98c36a80cb5144bc4879 Mon Sep 17 00:00:00 2001 From: henderkes Date: Sat, 6 Jun 2026 12:46:09 +0700 Subject: [PATCH 3/4] remove unused old paths --- Zend/Zend.m4 | 20 ++----- Zend/zend.c | 9 ---- Zend/zend_globals.h | 2 - Zend/zend_globals_macros.h | 12 +---- ext/opcache/jit/zend_jit_ir.c | 99 ++++------------------------------- 5 files changed, 16 insertions(+), 126 deletions(-) diff --git a/Zend/Zend.m4 b/Zend/Zend.m4 index ed5a59d06672..319ddca1e025 100644 --- a/Zend/Zend.m4 +++ b/Zend/Zend.m4 @@ -179,22 +179,10 @@ AS_VAR_IF([ZEND_ZTS], [yes], [ AC_DEFINE([ZTS], [1], [Define to 1 if thread safety (ZTS) is enabled.]) AS_VAR_APPEND([CFLAGS], [" -DZTS"]) - AC_CACHE_CHECK([for __thread support], [php_cv_have_thread_local], [ - AC_LINK_IFELSE([AC_LANG_PROGRAM( - [[static __thread int tls_var;]], - [[tls_var = 1; return tls_var;]])], - [php_cv_have_thread_local=yes], [php_cv_have_thread_local=no]) - ]) - AS_VAR_IF([php_cv_have_thread_local], [yes], [ - AC_DEFINE([ZEND_EG_TLS], [1], - [Define to hold EG()/CG() in a __thread variable under ZTS.]) - AS_VAR_APPEND([CFLAGS], [" -DZEND_EG_TLS"]) - - dnl -mtls-size=12 drops the dead high-bits offset add from TLS access, - dnl valid while the thread-local block stays under 4 KiB. - AX_CHECK_COMPILE_FLAG([-mtls-size=12], - [AS_VAR_APPEND([CFLAGS], [" -mtls-size=12"])]) - ]) + dnl -mtls-size=12 drops the dead high-bits offset add from TLS access, + dnl valid while the thread-local block stays under 4 KiB. + AX_CHECK_COMPILE_FLAG([-mtls-size=12], + [AS_VAR_APPEND([CFLAGS], [" -mtls-size=12"])]) ]) AC_MSG_CHECKING([whether to enable Zend debugging]) diff --git a/Zend/zend.c b/Zend/zend.c index f36c49247cc2..544d74c6d65a 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -52,16 +52,12 @@ static bool startup_done = false; #ifdef ZTS ZEND_API int compiler_globals_id; ZEND_API int executor_globals_id; -ZEND_API size_t compiler_globals_offset; -ZEND_API size_t executor_globals_offset; -# ifdef ZEND_EG_TLS ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; /* ts_allocate_tls_id takes a callback so each thread resolves its own block. * A plain &..._tls would capture only the registering thread's address. */ static void *executor_globals_tls_addr(void) { return &executor_globals_tls; } static void *compiler_globals_tls_addr(void) { return &compiler_globals_tls; } -# endif static HashTable *global_function_table = NULL; static HashTable *global_class_table = NULL; static HashTable *global_constants_table = NULL; @@ -1027,13 +1023,8 @@ void zend_startup(zend_utility_functions *utility_functions) /* {{{ */ zend_init_rsrc_list_dtors(); #ifdef ZTS -#ifdef ZEND_EG_TLS ts_allocate_tls_id(&compiler_globals_id, compiler_globals_tls_addr, sizeof(zend_compiler_globals), (ts_allocate_ctor) compiler_globals_ctor, (ts_allocate_dtor) compiler_globals_dtor); ts_allocate_tls_id(&executor_globals_id, executor_globals_tls_addr, sizeof(zend_executor_globals), (ts_allocate_ctor) executor_globals_ctor, (ts_allocate_dtor) executor_globals_dtor); -#else - ts_allocate_fast_id(&compiler_globals_id, &compiler_globals_offset, sizeof(zend_compiler_globals), (ts_allocate_ctor) compiler_globals_ctor, (ts_allocate_dtor) compiler_globals_dtor); - ts_allocate_fast_id(&executor_globals_id, &executor_globals_offset, sizeof(zend_executor_globals), (ts_allocate_ctor) executor_globals_ctor, (ts_allocate_dtor) executor_globals_dtor); -#endif ts_allocate_fast_id(&language_scanner_globals_id, &language_scanner_globals_offset, sizeof(zend_php_scanner_globals), (ts_allocate_ctor) php_scanner_globals_ctor, NULL); ts_allocate_fast_id(&ini_scanner_globals_id, &ini_scanner_globals_offset, sizeof(zend_ini_scanner_globals), (ts_allocate_ctor) ini_scanner_globals_ctor, NULL); compiler_globals = ts_resource(compiler_globals_id); diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 8257df32e831..8402faaf8116 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -51,8 +51,6 @@ BEGIN_EXTERN_C() ZEND_API extern int compiler_globals_id; ZEND_API extern int executor_globals_id; -ZEND_API extern size_t compiler_globals_offset; -ZEND_API extern size_t executor_globals_offset; END_EXTERN_C() #endif diff --git a/Zend/zend_globals_macros.h b/Zend/zend_globals_macros.h index 3f8c17cbaebd..7c30c49c57d9 100644 --- a/Zend/zend_globals_macros.h +++ b/Zend/zend_globals_macros.h @@ -30,12 +30,8 @@ BEGIN_EXTERN_C() /* Compiler */ #ifdef ZTS -# ifdef ZEND_EG_TLS extern ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; -# define CG(v) (compiler_globals_tls.v) -# else -# define CG(v) ZEND_TSRMG_FAST(compiler_globals_offset, zend_compiler_globals *, v) -# endif +# define CG(v) (compiler_globals_tls.v) #else # define CG(v) (compiler_globals.v) extern ZEND_API struct _zend_compiler_globals compiler_globals; @@ -45,12 +41,8 @@ ZEND_API int zendparse(void); /* Executor */ #ifdef ZTS -# ifdef ZEND_EG_TLS extern ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; -# define EG(v) (executor_globals_tls.v) -# else -# define EG(v) ZEND_TSRMG_FAST(executor_globals_offset, zend_executor_globals *, v) -# endif +# define EG(v) (executor_globals_tls.v) #else # define EG(v) (executor_globals.v) extern ZEND_API zend_executor_globals executor_globals; diff --git a/ext/opcache/jit/zend_jit_ir.c b/ext/opcache/jit/zend_jit_ir.c index d9789682aab8..91548eca45c3 100644 --- a/ext/opcache/jit/zend_jit_ir.c +++ b/ext/opcache/jit/zend_jit_ir.c @@ -201,56 +201,35 @@ static uint32_t default_mflags = 0; static bool delayed_call_chain = false; // TODO: remove this var (use jit->delayed_call_level) ??? #ifdef ZTS -static size_t tsrm_ls_cache_tcb_offset = 0; -static size_t tsrm_tls_index = -1; -static size_t tsrm_tls_offset = -1; - -# ifdef ZEND_EG_TLS -/* When nonzero, &executor_globals_tls/&compiler_globals_tls equal the thread - * pointer plus this offset, so the JIT forms them without a runtime call. */ static size_t eg_tls_tcb_offset = 0; static size_t cg_tls_tcb_offset = 0; /* gottpoff yields the offset from the %fs-based thread pointer that ir_TLS(0) * loads. */ -# if defined(__ELF__) && defined(__x86_64__) && defined(__GNUC__) && !defined(TSRM_TLS_MODEL_DEFAULT) -# define ZEND_JIT_TLS_TCB_OFFSET(sym) __extension__({ \ +# if defined(__ELF__) && defined(__x86_64__) && defined(__GNUC__) && !defined(TSRM_TLS_MODEL_DEFAULT) +# define ZEND_JIT_TLS_TCB_OFFSET(sym) __extension__({ \ size_t _off; \ __asm__ ("movq " #sym "@gottpoff(%%rip),%0" : "=r" (_off)); \ _off; \ }) -# elif defined(__ELF__) && defined(__aarch64__) && !defined(__APPLE__) && \ +# elif defined(__ELF__) && defined(__aarch64__) && !defined(__APPLE__) && \ (defined(__GNUC__) || defined(__clang__)) /* The TLS variable sits at a fixed offset from tpidr_el0 (the thread pointer * the JIT reads with mrs); compute it once on the main thread. Subtracting the * thread pointer is model-independent (works for both local- and initial-exec) * and matches tsrm_get_ls_cache_tcb_offset()'s tprel reasoning. */ -# define ZEND_JIT_TLS_TCB_OFFSET(sym) __extension__({ \ +# define ZEND_JIT_TLS_TCB_OFFSET(sym) __extension__({ \ char *_tp; \ __asm__ ("mrs %0, tpidr_el0" : "=r" (_tp)); \ (size_t)((char*)&(sym) - _tp); \ }) -# else -# define ZEND_JIT_TLS_TCB_OFFSET(sym) ((size_t)0) -# endif +# else +# define ZEND_JIT_TLS_TCB_OFFSET(sym) ((size_t)0) # endif -# define EG_TLS_OFFSET(field) \ - (executor_globals_offset + offsetof(zend_executor_globals, field)) - -# define CG_TLS_OFFSET(field) \ - (compiler_globals_offset + offsetof(zend_compiler_globals, field)) - -# ifdef ZEND_EG_TLS -# define jit_EG(_field) \ +# define jit_EG(_field) \ ir_ADD_OFFSET(jit_EG_base(jit), offsetof(zend_executor_globals, _field)) -# define jit_CG(_field) \ +# define jit_CG(_field) \ ir_ADD_OFFSET(jit_CG_base(jit), offsetof(zend_compiler_globals, _field)) -# else -# define jit_EG(_field) \ - ir_ADD_OFFSET(jit_TLS(jit), EG_TLS_OFFSET(_field)) -# define jit_CG(_field) \ - ir_ADD_OFFSET(jit_TLS(jit), CG_TLS_OFFSET(_field)) -# endif #else @@ -333,12 +312,9 @@ typedef struct _zend_jit_ctx { uint32_t delayed_call_level; int b; /* current basic block number or -1 */ #ifdef ZTS - ir_ref tls; -# ifdef ZEND_EG_TLS ir_ref tp; /* cached thread pointer for &EG/&CG */ ir_ref eg_tls; /* cached base of __thread executor_globals_tls */ ir_ref cg_tls; /* cached base of __thread compiler_globals_tls */ -# endif #endif ir_ref fp; ir_ref poly_func_ref; /* restored from parent trace snapshot */ @@ -529,12 +505,6 @@ static const char* zend_reg_name(int8_t reg) /* IR helpers */ #ifdef ZTS -static void * ZEND_FASTCALL zend_jit_get_tsrm_ls_cache(void) -{ - return _tsrm_ls_cache; -} - -# ifdef ZEND_EG_TLS static void * ZEND_FASTCALL zend_jit_get_eg_tls(void) { return &executor_globals_tls; @@ -583,44 +553,12 @@ static ir_ref jit_GLOBALS_TLS_call(zend_jit_ctx *jit, ir_ref *cache, const void } return *cache; } -# define jit_EG_base(jit) (eg_tls_tcb_offset \ +# define jit_EG_base(jit) (eg_tls_tcb_offset \ ? ir_ADD_OFFSET(jit_TP(jit), eg_tls_tcb_offset) \ : jit_GLOBALS_TLS_call((jit), &(jit)->eg_tls, zend_jit_get_eg_tls)) -# define jit_CG_base(jit) (cg_tls_tcb_offset \ +# define jit_CG_base(jit) (cg_tls_tcb_offset \ ? ir_ADD_OFFSET(jit_TP(jit), cg_tls_tcb_offset) \ : jit_GLOBALS_TLS_call((jit), &(jit)->cg_tls, zend_jit_get_cg_tls)) -# endif - -static ZEND_ATTRIBUTE_UNUSED ir_ref jit_TLS(zend_jit_ctx *jit) -{ - ZEND_ASSERT(jit->ctx.control); - if (jit->tls) { - /* Emit "TLS" once for basic block */ - ir_insn *insn; - ir_ref ref = jit->ctx.control; - - while (1) { - if (ref == jit->tls) { - return jit->tls; - } - insn = &jit->ctx.ir_base[ref]; - if (insn->op >= IR_START || insn->op == IR_CALL) { - break; - } - ref = insn->op1; - } - } - - if (tsrm_ls_cache_tcb_offset == 0 && tsrm_tls_index == -1) { - jit->tls = ir_CALL(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_get_tsrm_ls_cache)); - } else { - jit->tls = ir_TLS( - tsrm_ls_cache_tcb_offset ? tsrm_ls_cache_tcb_offset : tsrm_tls_index, - tsrm_ls_cache_tcb_offset ? IR_NULL : tsrm_tls_offset); - } - - return jit->tls; -} #endif static ir_ref jit_CONST_ADDR(zend_jit_ctx *jit, uintptr_t addr) @@ -2917,12 +2855,9 @@ static void zend_jit_init_ctx(zend_jit_ctx *jit, uint32_t flags) delayed_call_chain = false; jit->b = -1; #ifdef ZTS - jit->tls = IR_UNUSED; -# ifdef ZEND_EG_TLS jit->tp = IR_UNUSED; jit->eg_tls = IR_UNUSED; jit->cg_tls = IR_UNUSED; -# endif #endif jit->fp = IR_UNUSED; jit->poly_func_ref = IR_UNUSED; @@ -3316,11 +3251,8 @@ static void zend_jit_setup_disasm(void) REGISTER_DATA(CG(map_ptr_base)); #else /* ZTS */ - REGISTER_HELPER(zend_jit_get_tsrm_ls_cache); -# ifdef ZEND_EG_TLS REGISTER_HELPER(zend_jit_get_eg_tls); REGISTER_HELPER(zend_jit_get_cg_tls); -# endif #endif #endif } @@ -3531,19 +3463,8 @@ static void zend_jit_setup(bool reattached) #endif #ifdef ZTS - zend_result result = zend_jit_resolve_tsrm_ls_cache_offsets( - &tsrm_ls_cache_tcb_offset, - &tsrm_tls_index, - &tsrm_tls_offset - ); - if (result == FAILURE) { - zend_accel_error(ACCEL_LOG_INFO, - "Could not get _tsrm_ls_cache offsets, will fallback to runtime resolution"); - } -# ifdef ZEND_EG_TLS eg_tls_tcb_offset = ZEND_JIT_TLS_TCB_OFFSET(executor_globals_tls); cg_tls_tcb_offset = ZEND_JIT_TLS_TCB_OFFSET(compiler_globals_tls); -# endif #endif #if !defined(ZEND_WIN32) && !defined(IR_TARGET_AARCH64) From a4ec0fc2849a46b47112d16a4cc701248dcf4fb9 Mon Sep 17 00:00:00 2001 From: henderkes Date: Sat, 6 Jun 2026 20:18:44 +0700 Subject: [PATCH 4/4] fix windows --- Zend/zend.c | 4 ++-- Zend/zend_globals_macros.h | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Zend/zend.c b/Zend/zend.c index 544d74c6d65a..cb403ea0469d 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -52,8 +52,8 @@ static bool startup_done = false; #ifdef ZTS ZEND_API int compiler_globals_id; ZEND_API int executor_globals_id; -ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; -ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; +ZEND_TLS_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; +ZEND_TLS_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; /* ts_allocate_tls_id takes a callback so each thread resolves its own block. * A plain &..._tls would capture only the registering thread's address. */ static void *executor_globals_tls_addr(void) { return &executor_globals_tls; } diff --git a/Zend/zend_globals_macros.h b/Zend/zend_globals_macros.h index 7c30c49c57d9..b1dec1ffb6e3 100644 --- a/Zend/zend_globals_macros.h +++ b/Zend/zend_globals_macros.h @@ -26,12 +26,26 @@ typedef struct _zend_executor_globals zend_executor_globals; typedef struct _zend_php_scanner_globals zend_php_scanner_globals; typedef struct _zend_ini_scanner_globals zend_ini_scanner_globals; +#ifdef ZEND_WIN32 +# define ZEND_TLS_API +# ifdef LIBZEND_EXPORTS +# define ZEND_TLS_DIRECT 1 +# endif +#else +# define ZEND_TLS_API ZEND_API +# define ZEND_TLS_DIRECT 1 +#endif + BEGIN_EXTERN_C() /* Compiler */ #ifdef ZTS -extern ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; -# define CG(v) (compiler_globals_tls.v) +# ifdef ZEND_TLS_DIRECT +extern ZEND_TLS_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_compiler_globals compiler_globals_tls; +# define CG(v) (compiler_globals_tls.v) +# else +# define CG(v) ZEND_TSRMG(compiler_globals_id, zend_compiler_globals *, v) +# endif #else # define CG(v) (compiler_globals.v) extern ZEND_API struct _zend_compiler_globals compiler_globals; @@ -41,8 +55,12 @@ ZEND_API int zendparse(void); /* Executor */ #ifdef ZTS -extern ZEND_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; -# define EG(v) (executor_globals_tls.v) +# ifdef ZEND_TLS_DIRECT +extern ZEND_TLS_API TSRM_TLS TSRM_TLS_MODEL_ATTR zend_executor_globals executor_globals_tls; +# define EG(v) (executor_globals_tls.v) +# else +# define EG(v) ZEND_TSRMG(executor_globals_id, zend_executor_globals *, v) +# endif #else # define EG(v) (executor_globals.v) extern ZEND_API zend_executor_globals executor_globals;