From 2829ab4cae3eb66ed94099d909891c1e15c8e86c Mon Sep 17 00:00:00 2001 From: Nicolae Cudlenco <146981376+ncudlenco@users.noreply.github.com> Date: Mon, 18 May 2026 23:53:11 +0200 Subject: [PATCH 1/2] feat(embed): support baking PythonCall into a juliacall system image When PythonCall is compiled into a juliacall system image, its __init__ runs during jl_init_with_image, before juliacall's bootstrap defines Main.__PythonCall_libptr. Embedding was therefore mis-detected as non-embedded and failed with "'juliacall' module already exists". Add an opt-in embedded preference / JULIA_PYTHONCALL_EMBEDDED (via the same getpref mechanism as exe/lib) that forces the embedded path and obtains libpython from the lib preference / JULIA_PYTHONCALL_LIB (already loaded in the host process). Unset, behaviour is unchanged. Docs and CHANGELOG updated. --- CHANGELOG.md | 3 +++ docs/src/juliacall.md | 20 ++++++++++++++++++++ src/C/context.jl | 31 ++++++++++++++++++++++++++++--- src/Utils/Utils.jl | 1 + 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1bc832e..f0b97aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # Changelog ## Unreleased +* Support baking `PythonCall` into a juliacall system image via the new opt-in + `embedded` preference / `JULIA_PYTHONCALL_EMBEDDED` option, removing the + `using PythonCall` cost from cold start. No behaviour change unless opted in. * Added option `lib` to JuliaCall. Setting this will skip the discovery subprocess. * Bug fixes. diff --git a/docs/src/juliacall.md b/docs/src/juliacall.md index 55ad5868..05c8c4ff 100644 --- a/docs/src/juliacall.md +++ b/docs/src/juliacall.md @@ -115,6 +115,26 @@ systems that may be readonly. Note that the project set in `PYTHON_JULIACALL_PROJECT` *must* already have PythonCall.jl installed and it *must* match the JuliaCall version, otherwise loading Julia will fail. +### Baking PythonCall into a system image + +For the fastest possible startup you can compile `PythonCall` itself (alongside +your own packages) into a system image with +[PackageCompiler.jl](https://github.com/JuliaLang/PackageCompiler.jl), so that +the `using PythonCall` performed at startup is a memory-map rather than a load. + +When `PythonCall` is baked into the system image its `__init__` runs *during* +`jl_init_with_image`, before juliacall's bootstrap has defined the +`Main.__PythonCall_libptr` global it normally uses to detect that it is +embedded. To support this, set the `embedded` preference (or the +`JULIA_PYTHONCALL_EMBEDDED=yes` environment variable) together with the `lib` +preference / `JULIA_PYTHONCALL_LIB` pointing at the running interpreter's +libpython. With `embedded` set, PythonCall takes the embedded path even without +the global and opens libpython by path (it is already loaded in the process, so +this is just a handle). The default is `no`, leaving normal behaviour +unchanged. Use this together with `PYTHON_JULIACALL_SYSIMAGE` (below), and +`PYTHON_JULIACALL_EXE` / `PYTHON_JULIACALL_PROJECT` so juliacall resolves the +baked environment directly. + ## [Configuration](@id julia-config) Some features of the Julia process, such as the optimization level or number of threads, may diff --git a/src/C/context.jl b/src/C/context.jl index 0a1c9eb6..9a784a12 100644 --- a/src/C/context.jl +++ b/src/C/context.jl @@ -105,11 +105,36 @@ on_main_thread function init_context() - CTX.is_embedded = hasproperty(Base.Main, :__PythonCall_libptr) + # Normally PythonCall is embedded when Python (via juliacall) defines the + # global `Main.__PythonCall_libptr`, set by juliacall's bootstrap *after* + # `jl_init_with_image`. If PythonCall is baked into a juliacall system + # image, its `__init__` runs *during* `jl_init_with_image` — before that + # global exists — yet we are still embedded (Python is the running host). + # The opt-in `embedded` preference / `JULIA_PYTHONCALL_EMBEDDED` forces the + # embedded path in that case; libpython is obtained by path since it is + # already loaded in this process. Unset, behaviour is unchanged. + has_libptr = hasproperty(Base.Main, :__PythonCall_libptr) + CTX.is_embedded = has_libptr || Utils.getpref_embedded() if CTX.is_embedded - # In this case, getting a handle to libpython is easy - CTX.lib_ptr = Base.Main.__PythonCall_libptr::Ptr{Cvoid} + if has_libptr + # In this case, getting a handle to libpython is easy + CTX.lib_ptr = Base.Main.__PythonCall_libptr::Ptr{Cvoid} + else + # Baked into a sysimage: open libpython by path (the `lib` + # preference / JULIA_PYTHONCALL_LIB). dlopen of an + # already-loaded library just returns a handle to it. + lib_path = something(Utils.getpref_lib(), Some(nothing)) + lib_path === nothing && error( + "JULIA_PYTHONCALL_EMBEDDED is set but libpython is unknown; " * + "set the `lib` preference or JULIA_PYTHONCALL_LIB to its path.", + ) + lib_ptr = dlopen_e(lib_path, CTX.dlopen_flags) + lib_ptr == C_NULL && + error("Python library $(repr(lib_path)) could not be opened.") + CTX.lib_path = lib_path + CTX.lib_ptr = lib_ptr + end init_pointers() # Check Python is initialized Py_IsInitialized() == 0 && error("Python is not already initialized.") diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index a26b3198..08b8dd4c 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -17,6 +17,7 @@ checkpref(::Type{String}, x::AbstractString) = convert(String, x) getpref_exe() = getpref(String, "exe", "JULIA_PYTHONCALL_EXE", "") getpref_lib() = getpref(String, "lib", "JULIA_PYTHONCALL_LIB", nothing) getpref_pickle() = getpref(String, "pickle", "JULIA_PYTHONCALL_PICKLE", "pickle") +getpref_embedded() = getpref(String, "embedded", "JULIA_PYTHONCALL_EMBEDDED", "no") == "yes" function explode_union(T) @nospecialize T From 58ffd5a9b459986a16c9026b55582837a9ade7e4 Mon Sep 17 00:00:00 2001 From: Nicolae Cudlenco Date: Fri, 29 May 2026 13:12:50 +0300 Subject: [PATCH 2/2] feat(embed): bake the embedded flag into the sysimage Address review feedback from @cjdoris on #773: > if you know that you are embedded, then you can find the libptr (by > calling into the C-API functions, which will be globally available in > this case) [...] if we can 'bake in' the fact that PythonCall is > embedded into the sysimg, then we won't need any of these preferences. > I wonder if we could simply do '@eval PythonCall _is_embedded=true' or > something when we make the sysimg, so it's baked into PythonCall, then > test for this variable in 'PythonCall.__init__'? > [...] could you document the basic steps to actually create a sysimg? > I don't think you need to go much into the why [...] nor mention > Main.__PythonCall_libptr (which is internal). Instead, you can pretty > much just say that you need to set the prefs [...]. But what's not > totally obvious is how you set up a project and these prefs and use > PackageCompiler to actually make the sysimg. Design ------ Add a module-level 'const _is_embedded = Ref(false)' on PythonCall, flipped at sysimage build time via PackageCompiler's 'script=' keyword (NOT 'precompile_execution_file=', which runs in a separate child process whose state is not snapshotted). The mutated value is captured in the snapshot; at runtime, 'PythonCall.__init__' reads it and takes the embedded path. A 'const Ref' is preferred over a rebound non-const global so the C submodule can 'import' the name once and read it without 'parentmodule' indirection. Same baked-into-sysimage behaviour as the literal '@eval' form suggested in review. libpython is opened from the existing 'lib' preference / JULIA_PYTHONCALL_LIB (added in 0.9.33). The PR does not introduce new preferences or environment variables. The interpreter's executable path is resolved via 'sys.executable' using PyImport_ImportModule + PyObject_GetAttrString + PyUnicode_AsUTF8AndSize - stable across all supported CPython versions and platforms. If '_is_embedded[]' is true but 'Py_IsInitialized()' returns 0 - e.g. the sysimage is loaded by a 'julia.exe' child of 'Base.compilecache' rather than by juliacall - init_context resets CTX and downstream module __init__s short-circuit. PythonCall loads as inactive instead of erroring. Files ----- src/PythonCall.jl: declare 'const _is_embedded = Ref(false)'. src/C/C.jl: import _is_embedded into the C submodule. src/C/context.jl: rewrite init_context() embedded branch; add _embedded_program_path() reading sys.executable. src/Core/Core.jl, src/Convert/Convert.jl, src/Wrap/Wrap.jl, src/JlWrap/JlWrap.jl, src/JlWrap/C.jl, src/Compat/Compat.jl: guard __init__ on CTX.is_initialized for the inactive-load case. docs/src/juliacall.md: rewrite the 'Baking PythonCall into a system image' section with a worked example. CHANGELOG.md: Unreleased entry. --- CHANGELOG.md | 5 +- docs/src/juliacall.md | 67 ++++++++++++++++------- src/C/C.jl | 3 +- src/C/context.jl | 117 +++++++++++++++++++++++++++++------------ src/Compat/Compat.jl | 1 + src/Convert/Convert.jl | 1 + src/Core/Core.jl | 3 ++ src/JlWrap/C.jl | 1 + src/JlWrap/JlWrap.jl | 1 + src/PythonCall.jl | 8 +++ src/Utils/Utils.jl | 1 - src/Wrap/Wrap.jl | 1 + 12 files changed, 152 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0b97aef..14923360 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,8 @@ # Changelog ## Unreleased -* Support baking `PythonCall` into a juliacall system image via the new opt-in - `embedded` preference / `JULIA_PYTHONCALL_EMBEDDED` option, removing the - `using PythonCall` cost from cold start. No behaviour change unless opted in. +* Support baking `PythonCall` into a juliacall system image via + `PythonCall._is_embedded[] = true` in a PackageCompiler `script=`. * Added option `lib` to JuliaCall. Setting this will skip the discovery subprocess. * Bug fixes. diff --git a/docs/src/juliacall.md b/docs/src/juliacall.md index 05c8c4ff..69272132 100644 --- a/docs/src/juliacall.md +++ b/docs/src/juliacall.md @@ -115,25 +115,54 @@ systems that may be readonly. Note that the project set in `PYTHON_JULIACALL_PROJECT` *must* already have PythonCall.jl installed and it *must* match the JuliaCall version, otherwise loading Julia will fail. -### Baking PythonCall into a system image - -For the fastest possible startup you can compile `PythonCall` itself (alongside -your own packages) into a system image with -[PackageCompiler.jl](https://github.com/JuliaLang/PackageCompiler.jl), so that -the `using PythonCall` performed at startup is a memory-map rather than a load. - -When `PythonCall` is baked into the system image its `__init__` runs *during* -`jl_init_with_image`, before juliacall's bootstrap has defined the -`Main.__PythonCall_libptr` global it normally uses to detect that it is -embedded. To support this, set the `embedded` preference (or the -`JULIA_PYTHONCALL_EMBEDDED=yes` environment variable) together with the `lib` -preference / `JULIA_PYTHONCALL_LIB` pointing at the running interpreter's -libpython. With `embedded` set, PythonCall takes the embedded path even without -the global and opens libpython by path (it is already loaded in the process, so -this is just a handle). The default is `no`, leaving normal behaviour -unchanged. Use this together with `PYTHON_JULIACALL_SYSIMAGE` (below), and -`PYTHON_JULIACALL_EXE` / `PYTHON_JULIACALL_PROJECT` so juliacall resolves the -baked environment directly. +### [Baking PythonCall into a system image](@id baking-sysimage) + +The first `import juliacall` in a fresh process is slow - typically 10-20 +seconds in a clean container - because Julia starts, deserialises +`PythonCall` from cache, and JIT-compiles the bridge's hot paths. Long- +running processes amortise that cost. Short-lived ones - serverless +functions, queue workers, CI jobs that start, handle one request, and +exit - pay it on every invocation. + +Compiling `PythonCall` into a system image with +[PackageCompiler.jl](https://github.com/JuliaLang/PackageCompiler.jl) +collapses load+compile into a memory-map at startup, typically cutting +that cost by an order of magnitude. To bake the resulting image so +`import juliacall` picks it up automatically, set +`PythonCall._is_embedded[] = true` inside the sysimage-build process. + +PackageCompiler's `precompile_execution_file=` is run in a separate child +process whose state is not snapshotted, so the flag must be set via the +`script=` keyword instead. + +```julia +# bake_embedded.jl +PythonCall._is_embedded[] = true +``` + +```julia +using PackageCompiler +create_sysimage(["PythonCall"]; + sysimage_path = "myapp.so", + script = "bake_embedded.jl", + project = ".", +) +``` + +Pass `precompile_execution_file=` alongside `script=` to also bake your own +hot code paths into the image. + +At runtime, point juliacall at the resulting sysimage via +[`PYTHON_JULIACALL_SYSIMAGE`](@ref julia-config), and set the +[`lib`](@ref pythoncall-config) preference / `JULIA_PYTHONCALL_LIB` to the +path of the host's libpython - the embedded path needs an explicit handle +to libpython since the bridge does not load the interpreter itself. + +#### Subprocess behaviour + +If a julia process without a running Python interpreter loads a sysimage +baked with `_is_embedded[] = true` (for example a `Base.compilecache` +child), `PythonCall` loads as inactive - no error, no Python state. ## [Configuration](@id julia-config) diff --git a/src/C/C.jl b/src/C/C.jl index f7b4e020..90102224 100644 --- a/src/C/C.jl +++ b/src/C/C.jl @@ -19,7 +19,8 @@ if @load_preference("exe", "@CondaPkg") == "@CondaPkg" end import ..PythonCall: - python_executable_path, python_library_path, python_library_handle, python_version + python_executable_path, python_library_path, python_library_handle, python_version, + _is_embedded include("consts.jl") include("pointers.jl") diff --git a/src/C/context.jl b/src/C/context.jl index 9a784a12..54866636 100644 --- a/src/C/context.jl +++ b/src/C/context.jl @@ -105,46 +105,57 @@ on_main_thread function init_context() - # Normally PythonCall is embedded when Python (via juliacall) defines the - # global `Main.__PythonCall_libptr`, set by juliacall's bootstrap *after* - # `jl_init_with_image`. If PythonCall is baked into a juliacall system - # image, its `__init__` runs *during* `jl_init_with_image` — before that - # global exists — yet we are still embedded (Python is the running host). - # The opt-in `embedded` preference / `JULIA_PYTHONCALL_EMBEDDED` forces the - # embedded path in that case; libpython is obtained by path since it is - # already loaded in this process. Unset, behaviour is unchanged. + # Embedded if juliacall set Main.__PythonCall_libptr or the sysimage baked + # `_is_embedded[]` to `true`. has_libptr = hasproperty(Base.Main, :__PythonCall_libptr) - CTX.is_embedded = has_libptr || Utils.getpref_embedded() + CTX.is_embedded = has_libptr || _is_embedded[] if CTX.is_embedded + # Locate libpython. if has_libptr - # In this case, getting a handle to libpython is easy CTX.lib_ptr = Base.Main.__PythonCall_libptr::Ptr{Cvoid} else - # Baked into a sysimage: open libpython by path (the `lib` - # preference / JULIA_PYTHONCALL_LIB). dlopen of an - # already-loaded library just returns a handle to it. - lib_path = something(Utils.getpref_lib(), Some(nothing)) - lib_path === nothing && error( - "JULIA_PYTHONCALL_EMBEDDED is set but libpython is unknown; " * - "set the `lib` preference or JULIA_PYTHONCALL_LIB to its path.", - ) - lib_ptr = dlopen_e(lib_path, CTX.dlopen_flags) - lib_ptr == C_NULL && - error("Python library $(repr(lib_path)) could not be opened.") - CTX.lib_path = lib_path - CTX.lib_ptr = lib_ptr + lib_path = Utils.getpref_lib() + if lib_path !== nothing + lib_ptr = dlopen_e(lib_path, CTX.dlopen_flags) + if lib_ptr != C_NULL + CTX.lib_path = lib_path + CTX.lib_ptr = lib_ptr + end + end end - init_pointers() - # Check Python is initialized - Py_IsInitialized() == 0 && error("Python is not already initialized.") - CTX.is_initialized = true - CTX.which = :embedded - exe_path = Utils.getpref_exe() - if exe_path != "" - CTX.exe_path = exe_path - # this ensures PyCall uses the same Python interpreter - get!(ENV, "PYTHON", exe_path) + + embedded_ok = false + if CTX.lib_ptr != C_NULL + init_pointers() + embedded_ok = Py_IsInitialized() != 0 + end + + if embedded_ok + CTX.is_initialized = true + CTX.which = :embedded + exe_pref = Utils.getpref_exe() + if exe_pref != "" + CTX.exe_path = exe_pref + get!(ENV, "PYTHON", exe_pref) + else + exe_path = _embedded_program_path() + if exe_path !== nothing + CTX.exe_path = exe_path + get!(ENV, "PYTHON", exe_path) + end + end + elseif has_libptr + error("PythonCall is in embedded mode but no Python interpreter is running in this process.") + else + # Either the `lib` preference is unset, or Python is not running + # in this process (e.g. a julia.exe child of `Base.compilecache` + # loaded a sysimage baked for the embedded path). Leave PythonCall + # inactive instead of erroring. + CTX.is_embedded = false + CTX.lib_ptr = C_NULL + CTX.lib_path = missing + return end else # Find Python executable @@ -347,6 +358,46 @@ function init_context() return end +# Return `sys.executable` as a String, or nothing. Requires init_pointers(). +function _embedded_program_path() + import_mod = dlsym_e(CTX.lib_ptr, :PyImport_ImportModule) + getattr = dlsym_e(CTX.lib_ptr, :PyObject_GetAttrString) + asutf8 = dlsym_e(CTX.lib_ptr, :PyUnicode_AsUTF8AndSize) + decref = dlsym_e(CTX.lib_ptr, :Py_DecRef) + errclear = dlsym_e(CTX.lib_ptr, :PyErr_Clear) + (import_mod == C_NULL || getattr == C_NULL || asutf8 == C_NULL || + decref == C_NULL || errclear == C_NULL) && return nothing + + sys_mod = ccall(import_mod, Ptr{Cvoid}, (Ptr{Cchar},), "sys") + if sys_mod == C_NULL + ccall(errclear, Cvoid, ()) + return nothing + end + result = nothing + try + exec_obj = ccall(getattr, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cchar}), sys_mod, "executable") + if exec_obj == C_NULL + ccall(errclear, Cvoid, ()) + return nothing + end + try + size_ref = Ref{Cssize_t}(0) + cstr = ccall(asutf8, Ptr{Cchar}, (Ptr{Cvoid}, Ref{Cssize_t}), exec_obj, size_ref) + if cstr == C_NULL + ccall(errclear, Cvoid, ()) + return nothing + end + size_ref[] == 0 && return nothing + result = unsafe_string(cstr, size_ref[]) + finally + ccall(decref, Cvoid, (Ptr{Cvoid},), exec_obj) + end + finally + ccall(decref, Cvoid, (Ptr{Cvoid},), sys_mod) + end + return result +end + function Base.show(io::IO, ::MIME"text/plain", ctx::Context) show(io, typeof(io)) print(io, ":") diff --git a/src/Compat/Compat.jl b/src/Compat/Compat.jl index d9b8a148..2be524ee 100644 --- a/src/Compat/Compat.jl +++ b/src/Compat/Compat.jl @@ -23,6 +23,7 @@ include("serialization.jl") include("tables.jl") function __init__() + C.CTX.is_initialized || return init_gui() init_pyshow() end diff --git a/src/Convert/Convert.jl b/src/Convert/Convert.jl index fc506fc6..444fbbc0 100644 --- a/src/Convert/Convert.jl +++ b/src/Convert/Convert.jl @@ -36,6 +36,7 @@ include("numpy.jl") include("pandas.jl") function __init__() + C.CTX.is_initialized || return init_pyconvert() init_ctypes() init_numpy() diff --git a/src/Core/Core.jl b/src/Core/Core.jl index 9d14f93a..d8e6f2f7 100644 --- a/src/Core/Core.jl +++ b/src/Core/Core.jl @@ -209,6 +209,9 @@ include("juliacall.jl") include("pyconst_macro.jl") function __init__() + # Skip if C bailed out (e.g. a julia.exe child of Base.compilecache + # loaded a sysimage baked for the embedded path). + C.CTX.is_initialized || return init_consts() init_datetime() init_stdlib() diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index 19ff2904..818e2cb6 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -364,6 +364,7 @@ function init_c() end function __init__() + C.CTX.is_initialized || return init_c() end diff --git a/src/JlWrap/JlWrap.jl b/src/JlWrap/JlWrap.jl index 802ce465..c53fb3e2 100644 --- a/src/JlWrap/JlWrap.jl +++ b/src/JlWrap/JlWrap.jl @@ -51,6 +51,7 @@ include("set.jl") include("callback.jl") function __init__() + C.CTX.is_initialized || return init_base() init_raw() init_any() diff --git a/src/PythonCall.jl b/src/PythonCall.jl index 46c89c89..10b420e8 100644 --- a/src/PythonCall.jl +++ b/src/PythonCall.jl @@ -2,6 +2,14 @@ module PythonCall const ROOT_DIR = dirname(@__DIR__) +""" + PythonCall._is_embedded + +Marks the running sysimage as embedded in a Python host. Set to `true` in a +PackageCompiler `script=` to bake the embedded path into the sysimage. +""" +const _is_embedded = Ref(false) + include("API/API.jl") include("Utils/Utils.jl") include("NumpyDates/NumpyDates.jl") diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 08b8dd4c..a26b3198 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -17,7 +17,6 @@ checkpref(::Type{String}, x::AbstractString) = convert(String, x) getpref_exe() = getpref(String, "exe", "JULIA_PYTHONCALL_EXE", "") getpref_lib() = getpref(String, "lib", "JULIA_PYTHONCALL_LIB", nothing) getpref_pickle() = getpref(String, "pickle", "JULIA_PYTHONCALL_PICKLE", "pickle") -getpref_embedded() = getpref(String, "embedded", "JULIA_PYTHONCALL_EMBEDDED", "no") == "yes" function explode_union(T) @nospecialize T diff --git a/src/Wrap/Wrap.jl b/src/Wrap/Wrap.jl index d3b30ff2..6ba273af 100644 --- a/src/Wrap/Wrap.jl +++ b/src/Wrap/Wrap.jl @@ -32,6 +32,7 @@ include("PyTable.jl") include("PyPandasDataFrame.jl") function __init__() + C.CTX.is_initialized || return priority = PYCONVERT_PRIORITY_ARRAY pyconvert_add_rule("", PyArray, pyconvert_rule_array_nocopy, priority) pyconvert_add_rule("", PyArray, pyconvert_rule_array_nocopy, priority)