Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/code_quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ jobs:
uses: astral-sh/setup-uv@v7
with:
version: "0.9.0"
python-version: "3.12"
# 3.13: the cmake-format pre-commit hook is pinned to python3.13
# (cmakelang crashes under 3.14). Keeping this in sync means the hook
# resolves to the running interpreter instead of hunting PATH.
python-version: "3.13"

- name: pre-commit (cache)
uses: actions/cache@v4
Expand Down
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ repos:
rev: v0.6.13
hooks:
- id: cmake-format
# cmakelang is unmaintained and crashes under Python 3.14
# ("Cannot use capturing groups in re.Scanner"). Pin this hook's
# environment to 3.13 so it never picks up a 3.14 interpreter. The
# code_quality CI job provisions Python 3.13 to match, so the hook
# resolves to the running interpreter there.
language_version: python3.13

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.18.2
Expand Down
1 change: 1 addition & 0 deletions src/duckdb_py/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ add_library(
importer.cpp
map.cpp
path_like.cpp
python_log_storage.cpp
pyconnection.cpp
pyexpression.cpp
pyfilesystem.cpp
Expand Down
4 changes: 4 additions & 0 deletions src/duckdb_py/duckdb_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "duckdb/parser/parser.hpp"

#include "duckdb_python/python_objects.hpp"
#include "duckdb_python/python_log_storage.hpp"
#include "duckdb_python/pyconnection/pyconnection.hpp"
#include "duckdb_python/pystatement.hpp"
#include "duckdb_python/pyrelation.hpp"
Expand Down Expand Up @@ -1135,6 +1136,9 @@ PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
"Tokenizes a SQL string, returning a list of (position, type) tuples that can be "
"used for e.g., syntax highlighting",
py::arg("query"));
m.def("_drain_log_forwarding", &PythonLogStorage::DrainForwarder,
"Block until all engine log entries queued for Python's logging module have been "
"forwarded. Forwarding is asynchronous; this is a test/synchronization aid.");
py::enum_<PySQLTokenType>(m, "token_type", py::module_local())
.value("identifier", PySQLTokenType::PY_SQL_TOKEN_IDENTIFIER)
.value("numeric_const", PySQLTokenType::PY_SQL_TOKEN_NUMERIC_CONSTANT)
Expand Down
86 changes: 86 additions & 0 deletions src/duckdb_py/include/duckdb_python/python_log_storage.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/python_log_storage.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/logging/log_storage.hpp"
#include "duckdb/common/map.hpp"
#include "duckdb/common/unique_ptr.hpp"

namespace duckdb {

class ColumnDataCollection;
class DatabaseInstance;

//! Scan state backing PythonLogStorage's in-memory buffers (so `duckdb_logs` can read them).
//! We define our own rather than reuse the engine's InMemoryLogStorageScanState to avoid
//! depending on whether that type's symbols are exported across platforms.
class PythonLogStorageScanState : public LogStorageScanState {
public:
explicit PythonLogStorageScanState(LoggingTargetTable table) : LogStorageScanState(table) {
}
~PythonLogStorageScanState() override = default;

ColumnDataScanState scan_state;
};

//! A composite log storage that does two things for every engine log entry:
//! 1. forwards it to Python's standard `logging` module (logging.getLogger("duckdb")), and
//! 2. retains it in-memory so `SELECT * FROM duckdb_logs` keeps working.
//!
//! It subclasses BufferingLogStorage with a buffer size of 1 so each entry is flushed
//! immediately, rather than batched until a 2048-entry buffer fills — engine WARNINGs are
//! sparse and must surface promptly to be useful.
//!
//! Forwarding to Python is ASYNCHRONOUS. The engine calls FlushChunk while holding
//! LogManager::lock (a non-recursive mutex also taken by CreateLogger/WriteLogEntry). Acquiring
//! the GIL there would deadlock against any other thread that holds the GIL and then enters one
//! of those LogManager methods (i.e. ordinary concurrent queries). So FlushChunk only copies
//! (level, message) into a process-global queue, and a single background thread — which holds
//! no engine lock — drains it and forwards to `logging`. See python_log_storage.cpp.
class PythonLogStorage : public BufferingLogStorage {
public:
explicit PythonLogStorage(DatabaseInstance &db);
~PythonLogStorage() override;

const string GetStorageName() override {
return "python_log_storage";
}

//! Starts the process-global forwarder thread (idempotent). MUST be called with the GIL held
//! and no engine lock held — i.e. from Connect(), never from the engine log-write path.
static void EnsureForwarderStarted();

//! Blocks (releasing the GIL) until every queued entry has been forwarded to `logging`.
//! Forwarding is asynchronous, so callers that need to observe a just-emitted warning on the
//! Python side must drain first. Exposed to Python as `_duckdb._drain_log_forwarding`
//! for deterministic tests; harmless if the forwarder was never started.
static void DrainForwarder();

//! Single-threaded scan interface — mirrors InMemoryLogStorage so duckdb_logs can read us.
bool CanScan(LoggingTargetTable table) override;
unique_ptr<LogStorageScanState> CreateScanState(LoggingTargetTable table) const override;
bool Scan(LogStorageScanState &state, DataChunk &result) const override;
void InitializeScan(LogStorageScanState &state) const override;

protected:
//! Stores the chunk for duckdb_logs and (for LOG_ENTRIES) queues it for async forwarding.
void FlushChunk(LoggingTargetTable table, DataChunk &chunk) override;
//! Clears the in-memory buffers.
void ResetAllBuffers() override;

private:
ColumnDataCollection &GetBuffer(LoggingTargetTable table) const;
//! Copies each row of a LOG_ENTRIES chunk into the global forward queue. Never touches the
//! GIL or calls Python (it runs under LogManager::lock). Never throws.
void EnqueueEntriesForPython(DataChunk &chunk);

map<LoggingTargetTable, unique_ptr<ColumnDataCollection>> log_storage_buffers;
};

} // namespace duckdb
22 changes: 22 additions & 0 deletions src/duckdb_py/pyconnection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include "duckdb_python/numpy/numpy_type.hpp"
#include "duckdb/main/prepared_statement.hpp"
#include "duckdb_python/jupyter_progress_bar_display.hpp"
#include "duckdb_python/python_log_storage.hpp"
#include "duckdb/logging/log_manager.hpp"
#include "duckdb_python/pyfilesystem.hpp"
#include "duckdb/main/client_config.hpp"
#include "duckdb/function/table/read_csv.hpp"
Expand Down Expand Up @@ -2283,6 +2285,26 @@ shared_ptr<DuckDBPyConnection> DuckDBPyConnection::Connect(const py::object &dat
auto res = FetchOrCreateInstance(database, config);
auto &client_context = *res->con.GetConnection().context;
SetDefaultConfigArguments(client_context);
{
auto &db_instance = *res->con.GetDatabase().instance;
auto &log_manager = db_instance.GetLogManager();
auto storage = make_shared_ptr<PythonLogStorage>(db_instance);
shared_ptr<LogStorage> storage_base = storage;
// RegisterLogStorage returns false if the name is already registered on this
// DatabaseInstance. Instances are cached and shared across connections/cursors, so
// only configure logging on the first registration. SetLogStorage/SetEnableLogging/
// SetLogLevel are NOT idempotent — re-running them on every Connect() would silently
// stomp a user's explicit `SET enable_logging` / `SET logging_level` / storage choice.
if (log_manager.RegisterLogStorage("python_log_storage", storage_base)) {
log_manager.SetLogStorage(db_instance, "python_log_storage");
log_manager.SetEnableLogging(true);
log_manager.SetLogLevel(LogLevel::LOG_WARNING);
// Start the background thread that forwards queued entries to Python's logging
// module. We're here with the GIL held and no engine lock taken — the only safe
// place to do it (the engine log-write path holds LogManager::lock).
PythonLogStorage::EnsureForwarderStarted();
}
}
return res;
}

Expand Down
Loading
Loading