From 0159aa3c577dbbea629e8d2f3ae72a515c57f836 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 08:41:09 -0700 Subject: [PATCH 01/52] removes the enabled flag on the protocols table, adds a new migration_status to differentiate between not started and in progress migrations --- docs/feature-design/data-migrations.md | 1084 +++++++----------------- 1 file changed, 312 insertions(+), 772 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index 66f47359e..e54fd8d79 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -17,18 +17,18 @@ and live ingestion processes. ```sql CREATE TABLE protocols ( id TEXT PRIMARY KEY, -- "BLEND", "SEP50", etc. - classification_status TEXT DEFAULT 'not_started', - history_migration_status TEXT DEFAULT 'not_started', - current_state_migration_status TEXT DEFAULT 'not_started', + migration_status TEXT DEFAULT 'not_started', created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); --- Status values for each column: --- 'not_started' - Initial state --- 'in_progress' - Process running --- 'success' - Process complete --- 'failed' - Process failed +-- migration_status values: +-- 'not_started' - Initial state after registration +-- 'classification_in_progress' - Checkpoint classification running +-- 'classification_success' - Checkpoint classification complete +-- 'backfilling_in_progress' - Historical state migration running +-- 'backfilling_success' - Migration complete, data is complete +-- 'failed' - Migration failed ``` **Migration Cursor Tracking** (via `ingest_store` table): @@ -42,51 +42,13 @@ CREATE TABLE ingest_store ( key TEXT PRIMARY KEY, value TEXT NOT NULL ); -``` - -Each protocol has two CAS cursors, one per migration subcommand. Each cursor is shared between its respective migration subcommand and live ingestion, and serves as both the convergence mechanism and crash recovery cursor — eliminating the need for a separate migration cursor. - -**History Cursor** (via `ingest_store` table): - -Tracks the last ledger for which protocol state changes were written: -```sql --- History cursor example: -INSERT INTO ingest_store (key, value) VALUES ('protocol_SEP41_history_cursor', '50000'); +-- Protocol migration cursor example: +INSERT INTO ingest_store (key, value) VALUES ('protocol_SEP41_migration_cursor', '50000'); ``` -The history cursor (e.g., `protocol_{PROTOCOL_ID}_history_cursor`) is **shared between history migration and live ingestion**. It is advanced atomically via compare-and-swap (CAS) within the same DB transaction that writes state change data. It also serves as the crash recovery cursor for history migration. - -**Current State Cursor** (via `ingest_store` table): - -Tracks the last ledger for which current state was produced: - -```sql --- Current state cursor example: -INSERT INTO ingest_store (key, value) VALUES ('protocol_SEP41_current_state_cursor', '50000'); -``` - -The current state cursor (e.g., `protocol_{PROTOCOL_ID}_current_state_cursor`) is **shared between current-state migration and live ingestion**. It is advanced atomically via compare-and-swap (CAS) within the same DB transaction that writes current state data. It also serves as the crash recovery cursor for current-state migration. - -**CAS Mechanism** (shared by both cursors): - -```sql --- CAS: only advance if the cursor is at the expected value -UPDATE ingest_store SET value = $new WHERE key = $cursor_name AND value = $expected; --- Returns rows_affected = 1 on success, 0 if another process already advanced it -``` - -This requires a new `CompareAndSwap` method on `IngestStoreModel`. The existing `Update()` (`ingest_store.go:48`) is an unconditional upsert and cannot be used for this purpose. - -The CAS mechanism ensures that exactly one process (migration or live ingestion) writes data for any given ledger on each cursor, enabling a seamless handoff without coordination between the two processes (see [Convergence Model](#backfill-migration)). - -**Cursor Initialization** (during `protocol-setup`): - -Both cursors are initialized when `classification_status` moves to `success`: -- `protocol_{ID}_history_cursor` = `oldest_ledger_cursor - 1` -- `protocol_{ID}_current_state_cursor` = 0 (or left uninitialized until current-state migration starts) - -This ensures live ingestion has cursors to gate against from the start, even if migrations haven't run yet. +Each protocol migration has its own cursor key (e.g., `protocol_{PROTOCOL_ID}_migration_cursor`). +This cursor is updated atomically with each batch commit for crash recovery and can be deleted after the migration completes. ### protocol_contracts @@ -95,21 +57,20 @@ Maps protocols to the contracts that make up their systems. ```sql CREATE TABLE protocol_contracts ( contract_id TEXT NOT NULL, -- C... address - protocol_id TEXT REFERENCES protocols(id), - wasm_hash TEXT NOT NULL REFERENCES protocol_wasms(wasm_hash), + protocol_id TEXT NOT NULL REFERENCES protocols(id), name TEXT, -- "pool", "factory", "token", etc. created_at TIMESTAMPTZ DEFAULT NOW(), PRIMARY KEY (contract_id, protocol_id) ); ``` -### protocol_wasms +### known_wasms A cache for all known WASM blobs. This acts as a filter for the classification process to reduce the overhead of classifying new contract instances that use the same WASM code. ```sql -CREATE TABLE protocol_wasms ( +CREATE TABLE known_wasms ( wasm_hash TEXT PRIMARY KEY, protocol_id TEXT REFERENCES protocols(id), -- NULL if unknown/unclassified created_at TIMESTAMPTZ DEFAULT NOW() @@ -118,71 +79,42 @@ CREATE TABLE protocol_wasms ( ## Overview -Adding a new protocol requires four coordinated processes: +Adding a new protocol requires three coordinated processes: ``` ┌─────────────────────────────────────────────────────────────────────────────────┐ │ PROTOCOL ONBOARDING WORKFLOW │ └─────────────────────────────────────────────────────────────────────────────────┘ - STEP 1: SETUP STEP 2: LIVE INGESTION -┌──────────────────────┐ ┌──────────────────────┐ -│ ./wallet-backend │ │ Restart ingestion │ -│ protocol-setup │───▶│ with new processor │ -│ │ │ │ -│ Classifies existing │ │ Produces state from │ -│ contracts │ │ restart ledger onward│ -└──────────────────────┘ └──────────┬───────────┘ - │ - Steps 2, 3a, and 3b run concurrently - │ - ┌──────────────────┼──────────────────┐ - │ │ │ - ▼ ▼ ▼ - ┌──────────────────┐ ┌──────────────┐ ┌──────────────────┐ - │ Live ingestion: │ │ STEP 3a: │ │ STEP 3b: │ - │ state changes │ │ HISTORY │ │ CURRENT-STATE │ - │ after history │ │ MIGRATION │ │ MIGRATION │ - │ convergence, │ │ │ │ │ - │ current state │ │ protocol- │ │ protocol- │ - │ after current- │ │ migrate │ │ migrate │ - │ state │ │ history │ │ current-state │ - │ convergence │ │ │ │ │ - │ │ │ Retention │ │ From start- │ - │ │ │ window only │ │ ledger to tip │ - └────────┬─────────┘ └──────┬───────┘ └────────┬─────────┘ - │ │ │ - │◄── CAS handoff ──▶│ │ - │ (history cursor)│ │ - │ │ │ - │◄────── CAS handoff ──────────────────▶│ - │ (current-state cursor) │ - │ │ - └──────────────────┬────────────────────┘ - │ - Each migration CAS fails = handoff - Live ingestion takes over that responsibility - │ - ▼ - Complete coverage via two independent cursors: - - History cursor: state changes [retention_start → current] - - Current-state cursor: current state [start_ledger → current] + STEP 1: SETUP STEP 2: LIVE INGESTION STEP 3: BACKFILL +┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ +│ ./wallet-backend │ │ Restart ingestion │ │ ./wallet-backend │ +│ protocol-setup │───▶│ with new processor │───▶│ protocol-migrate │ +│ │ │ │ │ │ +│ Classifies existing │ │ Note the restart │ │ Backfills historical │ +│ contracts │ │ ledger number │ │ state │ +└──────────────────────┘ └──────────────────────┘ └──────────────────────┘ + ▲ │ │ + │ │ │ + │ ▼ │ + │ ┌──────────────────┐ │ + │ │ Live ingestion │ │ + │ │ produces state │ │ + │ │ from restart │ │ + │ │ ledger onward │ │ + │ └──────────────────┘ │ + │ │ + └────────────────────────────────────────────────────────┘ + Complete coverage: [first_block → current] ``` ## Process Dependencies | Step | Requires | Produces | |------|----------|----------| -| **1. protocol-setup** | Protocol migration SQL file, protocol implementation in code | Protocol in DB, `protocol_wasms`, `protocol_contracts`, `classification_status = success`, both cursors initialized | -| **2. ingest (live)** | `classification_status = success`, processor registered | State changes after history convergence (history cursor). Current state after current-state convergence (current-state cursor). | -| **3a. protocol-migrate history** | `classification_status = success` | Protocol state changes within retention window, through convergence with live ingestion | -| **3b. protocol-migrate current-state** | `classification_status = success` | Current state from `start_ledger` through convergence with live ingestion | - -Steps 2, 3a, and 3b run **concurrently**. Each migration subcommand converges independently with live ingestion via its own CAS cursor: -- History migration converges via `protocol_{ID}_history_cursor` — when its CAS fails, live ingestion owns state change production -- Current-state migration converges via `protocol_{ID}_current_state_cursor` — when its CAS fails, live ingestion owns current state production - -The two subcommands are fully independent. They write to different tables, use different CAS cursors, and track different status columns. They can run in any order, concurrently, or only one can be run. +| **1. protocol-setup** | Protocol migration SQL file, protocol implementation in code | Protocol in DB, `known_wasms`, `protocol_contracts`, status = `classification_success` | +| **2. ingest (live)** | Status = `classification_success`, processor registered | State from `restart_ledger` onward | +| **3. protocol-migrate** | `protocol_contracts` populated, status = `classification_success` | Historical state from `first_block` to `restart_ledger - 1` | Both live ingestion and backfill migration need the `protocol_contracts` table populated to know which contracts to process. The `protocol-setup` command ensures this data exists before either process runs. @@ -190,7 +122,7 @@ Both live ingestion and backfill migration need the `protocol_contracts` table p Classification is the act of identifying new and existing contracts on the network and assigning a relationship to a known protocol. This has to happen in 2 stages during the migration process: - checkpoint population: We will use a history archive from the latest checkpoint in order to classify all contracts on the network. We will rely on the latest checkpoint available at the time of the migration. -- live ingestion: during live ingestion, we classify new WASM uploads by validating the bytecode against protocol specs, and map contract deployments/upgrades to protocols by looking up their WASM hash in `protocol_wasms`. +- live ingestion: during live ingestion, we classify new contracts by watching for contract deployments/upgrades and comparing the wasm blob to the known protocols. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -214,55 +146,58 @@ This has to happen in 2 stages during the migration process: │ (iterates all ledger entries) │ └───────────────┬───────────────┘ │ - ┌────────────┴────────────┐ - │ │ - ▼ ▼ - ┌──────────────────┐ ┌──────────────────┐ - │ LedgerEntryType │ │ LedgerEntryType │ - │ ContractCode │ │ ContractData │ - │ │ │ (Instance) │ - └────────┬─────────┘ └────────┬─────────┘ - │ │ - ▼ ▼ - ┌──────────────────┐ ┌──────────────────┐ - │ Extract WASM │ │ Check SAC? │ - │ bytecode + hash │ │ (AssetFromData) │ - └────────┬─────────┘ └────────┬─────────┘ - │ │ - ▼ ┌───────┴───────┐ - ┌──────────────────┐ │ │ - │ Validate WASM │ ▼ ▼ - │ against protocol │ YES NO - │ validators │ │ │ - └────────┬─────────┘ ▼ ▼ - │ ┌────────┐ ┌──────────────────┐ - ┌────┴────┐ │SAC │ │ Extract wasm_ref │ - │ │ │contract│ │ (hash) from │ - MATCH NO MATCH └────────┘ │ instance data │ - │ │ └────────┬─────────┘ - ▼ ▼ │ - ┌────────┐ ┌──────────┐ ▼ - │Store │ │Store │ ┌──────────────────┐ - │hash in │ │hash in │ │ Map contract ID │ - │protocol│ │protocol │ │ to WASM hash │ - │_wasms │ │_wasms │ │ (for later lookup│ - │with │ │with NULL │ │ in protocol_wasms) │ - │protocol│ │protocol │ └──────────────────┘ - └────────┘ └──────────┘ + ┌────────────────────────┼────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌──────────────────┐ ┌──────────────────┐ ┌──────────────────────────┐ +│ LedgerEntryType │ │ LedgerEntryType │ │ LedgerEntryType │ +│ ContractCode │ │ ContractData │ │ ContractData │ +│ │ │ (Instance) │ │ (Balance) │ +└────────┬─────────┘ └────────┬─────────┘ └────────────┬─────────────┘ + │ │ │ + ▼ ▼ ▼ +┌──────────────────┐ ┌──────────────────┐ ┌──────────────────────────┐ +│ Extract WASM │ │ Check SAC? │ │ Extract holder address │ +│ bytecode + hash │ │ (AssetFromData) │ │ from Balance key │ +└────────┬─────────┘ └────────┬─────────┘ └────────────┬─────────────┘ + │ │ │ + ▼ │ ▼ +┌──────────────────┐ │ ┌──────────────────────────┐ +│ Group contracts │ ┌───────┴───────┐ │ Track in │ +│ by WASM hash │ │ │ │ contractTokensByHolder │ +│ │ ▼ ▼ │ Address map │ +└────────┬─────────┘ YES NO └──────────────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌──────────────────┐ ┌────────┐ ┌──────────────┐ +│ Validate WASM │ │SAC │ │Compare WASM │ +│ against protocol │ │contract│ │to known │ +│ spec │ │ │ │protocol spec │ +└────────┬─────────┘ └────────┘ └──────────────┘ + │ + ┌────┴────┐ + │ │ + MATCH NO MATCH + │ │ + ▼ ▼ +┌────────┐ ┌──────────┐ +│Insert │ │Skip │ +│to │ │(unknown) │ +│protocol│ │ │ +│_contracts└──────────┘ +└────────┘ ┌───────────────────────────────┐ │ Post-Processing: │ │ 1. Store in protocol_contracts│ - │ (contract → protocol via │ - │ wasm hash → protocol_wasms) │ - │ 2. Cache in protocol_wasms │ + │ 2. Cache in known_wasms │ └───────────────────────────────┘ ``` Contracts are grouped by WASM hash before validation. This means we validate each unique WASM blob once, then apply the result to all contracts using that same code. -Once a WASM hash is classified, it is stored in the `protocol_wasms` table to avoid re-classification of future contracts using the same code. +Once a WASM hash is classified, it is stored in the `known_wasms` table to avoid re-classification of future contracts using the same code. -During live ingestion, classification happens in two parts: (1) new WASM uploads are validated against protocol specs and stored in `protocol_wasms`, and (2) contract deployments/upgrades are mapped to protocols via their WASM hash lookup in `protocol_wasms`. +During live ingestion, new contracts are classified when they appear in ledger changes. The key difference from checkpoint population is that live ingestion watches for contract deployments/upgrades and compares the WASM blob to known protocols. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -277,52 +212,62 @@ During live ingestion, classification happens in two parts: (1) new WASM uploads ▼ ┌──────────────────────────────┐ │ ProcessLedger() │ - │ iterate ledger entry changes │ + │ (iterate transaction changes)│ └──────────────┬───────────────┘ │ - ┌───────────────────┴───────────────────┐ - │ │ - ▼ ▼ - ┌─────────────────────┐ ┌──────────────────────────┐ - │ ContractCode │ │ ContractData Instance │ - │ (new WASM upload) │ │ (deployment or upgrade) │ - └──────────┬──────────┘ └────────────┬─────────────┘ - │ │ - ▼ ▼ - ┌─────────────────────┐ ┌──────────────────────────┐ - │ Extract WASM │ │ Extract WASM hash │ - │ bytecode + hash │ │ from instance wasm_ref │ - └──────────┬──────────┘ └────────────┬─────────────┘ - │ │ - ▼ ▼ - ┌─────────────────────┐ ┌──────────────────────────┐ - │ Validate against │ │ Lookup hash in │ - │ protocol validators │ │ protocol_wasms │ - └──────────┬──────────┘ └────────────┬─────────────┘ - │ │ - ┌────┴────┐ ┌───────────┴───────────┐ - │ │ │ │ - MATCH NO MATCH FOUND NOT FOUND - │ │ │ │ - ▼ ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────────┐ ┌──────────────────┐ - │Store in │ │Store in │ │ Map contract │ │ Fetch WASM via │ - │protocol │ │protocol │ │ to protocol │ │ RPC, validate, │ - │_wasms w/ │ │_wasms w/ │ │ from cached │ │ then map contract│ - │protocol │ │NULL │ │ classification │ (rare edge case) │ - └──────────┘ └──────────┘ └──────────────┘ └──────────────────┘ - │ │ - └───────────┬───────────┘ - │ - ▼ - ┌──────────────────────────┐ - │ Insert contract mapping │ - │ to protocol_contracts │ - └──────────────────────────┘ + ▼ + ┌──────────────────────────────┐ + │ Watch for: │ + │ - Contract deployments │ + │ - Contract upgrades │ + └──────────────┬───────────────┘ + │ + ▼ + ┌──────────────────────────────┐ + │ Extract WASM bytecode │ + │ from deployment/upgrade │ + └──────────────┬───────────────┘ + │ + ▼ + ┌──────────────────────────────┐ + │ Check known_wasms table: │ + │ Is this WASM hash known? │ + └──────────────┬───────────────┘ + │ + ┌───────────────┴───────────────┐ + │ │ + ▼ ▼ + ┌──────────────┐ ┌──────────────────┐ + │ WASM hash │ │ WASM hash │ + │ already known│ │ NOT known │ + │ │ │ │ + │ Use cached │ │ Compare WASM to │ + │classification│ │ protocol specs │ + └──────┬───────┘ └────────┬─────────┘ + │ │ + │ ┌───────┴───────┐ + │ │ │ + │ MATCH NO MATCH + │ │ │ + │ ▼ ▼ + │ ┌──────────────┐ ┌──────────────┐ + │ │ Insert to │ │ Mark as │ + │ │ known_wasms │ │ unknown in │ + │ │ + protocol_ │ │ known_wasms │ + │ │ contracts │ │ │ + │ └──────┬───────┘ └──────────────┘ + │ │ + └────────────────────────┘ + │ + ▼ + ┌──────────────────────────────┐ + │ Insert contract mapping to │ + │ protocol_contracts table │ + └──────────────────────────────┘ ``` -The classifier validates WASM bytecode from ContractCode entries against protocol specifications. -This validation uses the same logic as checkpoint population: +The classifier compares the WASM blob from new deployments/upgrades against known protocol specifications. +This comparison uses the same validation logic as checkpoint population: 1. Compile WASM with wazero runtime 2. Extract `contractspecv0` custom section @@ -370,7 +315,7 @@ and live ingestion. │ │ Custom Sections │ │ │ │ └── "contractspecv0" ◄─────────────┼── XDR-encoded spec │ │ └──────────────────────────────────────┘ │ -│ │ +│ │ │ for _, section := range compiledModule.CustomSections() { │ │ if section.Name() == "contractspecv0" { │ │ specBytes = section.Data() │ @@ -381,14 +326,14 @@ and live ingestion. ▼ ┌──────────────────────────────────────────────────────────────────┐ │ Step 3: XDR Unmarshal → []ScSpecEntry │ -│ │ +│ │ │ reader := bytes.NewReader(specBytes) │ │ for reader.Len() > 0 { │ │ var spec xdr.ScSpecEntry │ │ xdr.Unmarshal(reader, &spec) │ │ specs = append(specs, spec) │ │ } │ -│ │ +│ │ │ Each ScSpecEntry represents: │ │ - Function definitions (name, inputs, outputs) │ │ - Type definitions (structs, enums) │ @@ -398,18 +343,18 @@ and live ingestion. ▼ ┌──────────────────────────────────────────────────────────────────┐ │ Step 4: Protocol Signature Validation │ -│ │ +│ │ │ For each function in contractSpec: │ │ - Extract function name │ │ - Extract input parameters (name → type mapping) │ │ - Extract output types │ │ - Compare against protocol's required functions │ -│ │ +│ │ │ Example (SEP-41 Token Standard): │ │ - Required: balance, allowance, decimals, name, symbol, │ │ approve, transfer, transfer_from, burn, burn_from │ │ - All parameter names and types must match exactly │ -│ │ +│ │ │ foundFunctions.Add(funcName) if signature matches │ │ MATCH = foundFunctions.Cardinality() == len(requiredSpecs) │ └──────────────────────────────────────────────────────────────────┘ @@ -435,15 +380,15 @@ and live ingestion. - Parameter names must match exactly (`from`, `to`, `amount`, etc.) - Parameter types must match (Address, i128, u32, etc.) -**protocol_wasms Table Usage**: +**known_wasms Table Usage**: -The `protocol_wasms` table stores classification results by WASM hash. The table stores +The `known_wasms` table stores classification results by WASM hash. The table stores a `protocol_id` for each WASM hash - this is `NULL` for WASM blobs that don't match any known protocol. ``` ┌────────────────────────────────────────────────────────────────────────────┐ -│ protocol_wasms CACHE FLOW │ +│ known_wasms CACHE FLOW │ └────────────────────────────────────────────────────────────────────────────┘ New Contract Deployment @@ -456,7 +401,7 @@ any known protocol. ▼ ┌─────────────────────┐ │ SELECT protocol_id │ - │ FROM protocol_wasms │ + │ FROM known_wasms │ │ WHERE wasm_hash = ? │ └──────────┬──────────┘ │ @@ -470,7 +415,7 @@ any known protocol. │ protocol_id │ │ validation │ │ │ │ │ │ Skip WASM │ │ Then INSERT INTO │ - │ validation │ │ protocol_wasms │ + │ validation │ │ known_wasms │ └──────────────┘ └──────────────────┘ ``` @@ -479,7 +424,7 @@ This optimization is critical for performance because: - WASM compilation with wazero is CPU-intensive - A single validation per unique WASM hash serves all contracts using that code -When a new protocol is registered, running `protocol-setup` re-validates previously unknown WASM hashes (those with `protocol_id = NULL`) against the new protocol's spec. This ensures contracts deployed before the protocol was added can still be classified correctly. +When a new protocol is registered, previously unknown WASM hashes (those with `protocol_id = NULL`) must be re-validated against the new protocol's spec. This ensures contracts deployed before the protocol was added can still be classified correctly. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ RE-CLASSIFICATION ON NEW PROTOCOL REGISTRATION │ @@ -493,18 +438,18 @@ When a new protocol is registered, running `protocol-setup` re-validates previou │ ▼ ┌─────────────────────────────┐ - │ protocol-setup │ - │ --protocol-id BLEND │ - │ │ + │ Restart Live Ingestion │ + │ (triggers checkpoint │ + │ population for new protocol)│ └─────────────┬───────────────┘ │ ▼ ┌─────────────────────────────┐ - │ Query protocol_wasms for │ + │ Query known_wasms for │ │ unclassified entries: │ │ │ │ SELECT wasm_hash │ - │ FROM protocol_wasms │ + │ FROM known_wasms │ │ WHERE protocol_id IS NULL │ └─────────────┬───────────────┘ │ @@ -522,7 +467,7 @@ When a new protocol is registered, running `protocol-setup` re-validates previou ▼ ▼ ┌─────────────────┐ ┌─────────────────┐ │ UPDATE │ │ Leave as │ - │ protocol_wasms │ │ protocol_id │ + │ known_wasms │ │ protocol_id │ │ SET protocol_id │ │ = NULL │ │ = 'BLEND' │ │ │ │ WHERE wasm_hash │ │ (still unknown) │ @@ -567,11 +512,15 @@ current SEP-41 validator - compile WASM, extract `contractspecv0` section, parse XDR spec entries, check for required functions. When checkpoint population runs for a newly registered protocol, it validates contracts whose WASM hash is either: -1. **Not in protocol_wasms** (never seen before) -2. **In protocol_wasms with `protocol_id IS NULL`** (previously unknown) +1. **Not in known_wasms** (never seen before) +2. **In known_wasms with `protocol_id IS NULL`** (previously unknown) #### When Checkpoint Classification Runs -Backfill migrations rely on checkpoint population being complete before they can produce state changes for a new protocol. If checkpoint population does not run before a backfill migration is started for a new protocol, backfill migration will fail and exit since it does not classify protocols and cannot produce state without any classification being available. +Both backfill migrations and live ingestion will rely on checkpoint population being complete before they can produce state changes for a new protocol. + +If checkpoint population does not run before live ingestion is processing a new protocol, live ingestion will potentially experience more pressure from the additional classification it has to do due to the missing seed of `protocol_contracts`. + +If checkpoint population does not run before a backfill migration is started for a new protocol, backfill migration will fail and exit since it does not classify protocols and cannot produce state without any classification being available. ### Command @@ -583,13 +532,14 @@ Backfill migrations rely on checkpoint population being complete before they can 1. **Runs protocol migrations** - Executes SQL migrations from `internal/data/migrations/protocols/` to register new protocols in the `protocols` table with status `not_started` 2. **Sets status** to `classification_in_progress` for specified protocols -3. **Queries existing unclassified entries** from `protocol_wasms WHERE protocol_id IS NULL` -4. **Gets bytecode** from all unknown contracts using RPC -5. **Validates each WASM** against all specified protocols' validators -6. **Populates tables**: - - `protocol_wasms`: Maps WASM hashes to protocol IDs +3. **Reads the latest checkpoint** from the history archive +4. **Extracts all WASM code** from contract entries in the checkpoint +5. **Queries existing unclassified entries** from `known_wasms WHERE protocol_id IS NULL` +6. **Validates each WASM** against all specified protocols' validators +7. **Populates tables**: + - `known_wasms`: Maps WASM hashes to protocol IDs - `protocol_contracts`: Maps contract IDs to protocols -7. **Updates status** to `classification_success` for all processed protocols +8. **Updates status** to `classification_success` for all processed protocols ### Protocol Migration Files @@ -611,271 +561,82 @@ The command requires an explicit list of protocols to set up via the `--protocol **Benefits:** - Opt-in protocol support - operators control which protocols are enabled - Clear operator intent - no accidental protocol enablement -- Consistent with `protocol-migrate` subcommand interfaces +- Consistent with `protocol-migrate` command interface ## State Production -State produced by new protocols is split into two independent responsibilities, each handled by a dedicated migration subcommand: -- **History (state changes)**: `protocol-migrate history` writes protocol state changes (operation enrichment) for ledgers within the retention window. It starts at `oldest_ledger_cursor` and converges with live ingestion via the history cursor. Since it only processes the retention window, ALL processed ledgers produce persisted state changes — no "process but discard" logic needed. -- **Current state**: `protocol-migrate current-state` builds current state from a protocol's deployment ledger forward. It starts at `--start-ledger` and converges with live ingestion via the current-state cursor. It processes ALL ledgers from start to tip to build accurate additive state, but writes only current state — no state changes. -- **Live ingest state**: Live ingestion produces both state changes and current state, but only after converging with the respective migration subcommand for each. It gates state change writes on the history cursor and current state writes on the current-state cursor. - -### Additive vs Non-Additive Current State - -Protocol current state falls into two categories that affect how migration and live ingestion interact: - -**Non-additive state** (e.g., collectible ownership): The current state at ledger N can be determined from the ledger data alone, without knowing the state at ledger N-1. Live ingestion can write current state immediately for any ledger, independent of migration progress. - -**Additive state** (e.g., token balances): The current state at ledger N depends on the state at ledger N-1. A "transfer of 5 tokens" event at ledger N requires knowing the balance before ledger N to compute the new balance. During migration, that previous balance doesn't exist until all prior ledgers are processed. - -``` -Non-additive example (collectible ownership): - Ledger N says "User A owns collectible X" → write directly, no prior state needed. - -Additive example (token balance): - Ledger N says "Transfer 5 tokens from A to B" - → Need balance of A at ledger N-1 to compute new balance - → That balance doesn't exist until migration processes ledgers 1 through N-1 -``` - -This distinction drives the convergence model: migration must run to the tip (not stop at a fixed end-ledger) so that additive current state is continuously built without gaps. The shared current-state cursor with CAS ensures exactly one process produces current state for each ledger, with a seamless handoff when migration catches up to live ingestion. +State produced by new protocols is done through dual processes in order to cover historical state and new state production during live ingestion. +- Historical state: A backfill style migration will run for all ledgers that are needed to produce historical state enrichment, as well as current state tracking. +- Live ingest state: live ingestion will produce state defined by a protocol, this state can be an enrichment for an operation(richer data for history) and/or can be an update to the tracking of the current-state of a protocol as it relates to a user(which collectibles does a user own?). ### Backfill Migration - -The backfill migration is split into two independent subcommands that handle different responsibilities: - -#### History Migration (`protocol-migrate history`) - -The history migration writes protocol state changes (operation enrichment) for ledgers within the retention window. - +The migration runner processes historical ledgers to enrich operations with protocol state and produce state changes/current state. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ HISTORY MIGRATION FLOW │ +│ BACKFILL MIGRATION FLOW │ └─────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────┐ │ ./wallet-backend │ - │ protocol-migrate history │ - │ --protocol-id SEP50 SEP41 │ + │ protocol-migrate │ + │ --protocol-id SEP50 ... │ + │ --start-ledger 1 │ + │ --end-ledger 5 │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ - │ Start() │ - │ - Validate classification │ - │ _status = 'success' │ - │ - Set history_migration │ - │ _status = 'in_progress' │ - │ - Read oldest_ledger_cursor│ - │ from ingest_store │ - │ - Initialize history_cursor│ - │ = oldest_ledger_cursor-1 │ + │ MigrationRunner.Run() │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ - │ Read latest_ledger_cursor │ - │ Split [start, target] into │ - │ batches. Process in │ - │ parallel with ordered │◀──────────────┐ - │ commit. │ │ - └─────────────┬──────────────┘ │ - │ │ - ▼ │ - ┌────────────────────────────┐ │ - │ Per batch commit: │ │ - │ - CAS-advance history │ │ - │ _cursor │ │ - │ - Write state changes │ │ - │ (if CAS succeeded) │ │ - └─────────────┬──────────────┘ │ - │ │ - ┌────────┴────────┐ │ - │ │ │ - CAS success CAS failure │ - │ │ │ - ▼ ▼ │ - ┌──────────┐ ┌──────────────────┐ │ - │ Continue │ │ Handoff detected │ │ - │ to next │ │ Live ingestion │ │ - │ batch │ │ took over. │ │ - └────┬─────┘ │ Exit loop. │ │ - │ └────────┬─────────┘ │ - │ │ │ - ▼ │ │ - ┌──────────────┐ │ │ - │ More batches │ │ │ - │ remaining? │ │ │ - │ │ │ │ - │ YES: continue│ │ │ - │ NO: re-read │────────┼────────────────────┘ - │ latest_ledger│ │ (fetch new target, - │ _cursor, loop│ │ process remaining) - └──────────────┘ │ - │ - ▼ - ┌────────────────────────────┐ - │ Complete() │ - │ - Set history_migration │ - │ _status = 'success' │ - │ - Clean up resources │ - └────────────────────────────┘ -``` - -**Key simplification**: Since history migration starts at retention start, ALL processed ledgers are within retention. No need for the "process but discard" logic — every batch produces persisted state changes. - -**Parallelization advantage**: State changes for ledger N do not depend on state changes for ledger N-1, so batches are truly independent. History migration can be more aggressively parallelized than current-state migration. - -#### Current-State Migration (`protocol-migrate current-state`) - -The current-state migration builds current state from a protocol's deployment ledger forward. - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ CURRENT-STATE MIGRATION FLOW │ -└─────────────────────────────────────────────────────────────────────────────┘ - - ┌────────────────────────────┐ - │ ./wallet-backend │ - │ protocol-migrate │ - │ current-state │ - │ --protocol-id SEP50 │ - │ --start-ledger 1000 │ + │ Start() │ + │ - Validate protocol exists │ + │ - Set status = backfilling │ + │ _in_progress │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ - │ Start() │ - │ - Validate classification │ - │ _status = 'success' │ - │ - Set current_state │ - │ _migration_status = │ - │ 'in_progress' │ - │ - Initialize current_state │ - │ _cursor = start-ledger-1 │ + │ For each ledger in range: │ + │(start-ledger to end-ledger)│ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ - │ Read latest_ledger_cursor │ - │ Split [start, target] into │ - │ batches. Process in │ - │ parallel with ordered │◀──────────────┐ - │ commit. │ │ - └─────────────┬──────────────┘ │ - │ │ - ▼ │ - ┌────────────────────────────┐ │ - │ Per batch commit: │ │ - │ - CAS-advance current │ │ - │ _state_cursor │ │ - │ - Write current state │ │ - │ (if CAS succeeded) │ │ - │ - No state changes written │ │ - └─────────────┬──────────────┘ │ - │ │ - ┌────────┴────────┐ │ - │ │ │ - CAS success CAS failure │ - │ │ │ - ▼ ▼ │ - ┌──────────┐ ┌──────────────────┐ │ - │ Continue │ │ Handoff detected │ │ - │ to next │ │ Live ingestion │ │ - │ batch │ │ took over. │ │ - └────┬─────┘ │ Exit loop. │ │ - │ └────────┬─────────┘ │ - │ │ │ - ▼ │ │ - ┌──────────────┐ │ │ - │ More batches │ │ │ - │ remaining? │ │ │ - │ │ │ │ - │ YES: continue│ │ │ - │ NO: re-read │────────┼────────────────────┘ - │ latest_ledger│ │ (fetch new target, - │ _cursor, loop│ │ process remaining) - └──────────────┘ │ - │ - ▼ + │ Use processor to: │ + │ - Find operations involving│ + │ protocol contracts │ + │ - Produce state │ + │ - Enrich historical data │ + └─────────────┬──────────────┘ + │ + ▼ ┌────────────────────────────┐ │ Complete() │ - │ - Set current_state │ - │ _migration_status = │ - │ 'success' │ - │ - Clean up resources │ + │ - Set status = │ + │ backfilling_success │ └────────────────────────────┘ -``` - -**Processing range**: Current-state migration processes ALL ledgers from `--start-ledger` to tip. This is necessary for accurate additive state (e.g., token balances) where ledger N depends on ledger N-1. - -#### Independence - -The two subcommands are fully independent: -- They write to different tables (state changes vs current state) -- They use different CAS cursors (`history_cursor` vs `current_state_cursor`) -- They track different status columns (`history_migration_status` vs `current_state_migration_status`) -- They can run in any order, concurrently, or only one can be run -#### Convergence Model -Two independent convergence paths: - -``` -HISTORY CONVERGENCE: -┌────────────────────────────────────────────────────────────────────────────┐ -│ History migration CAS-advances protocol_{ID}_history_cursor from │ -│ retention_start. Live ingestion also CAS-advances the same cursor. │ -│ When history migration CAS fails → live ingestion owns state change │ -│ production. │ -│ │ -│ Timeline example: │ -│ T=0s: History cursor=10004. Migration CAS 10004→10005. Success. │ -│ T=0.5s: Migration CAS 10005→10006. Success. │ -│ T=5s: Live ingestion processes 10008. Cursor=10007 >= 10007. YES. │ -│ Live CAS 10007→10008. Success. │ -│ T=5.5s: Migration tries CAS 10007→10008. FAILS. Handoff detected. │ -│ │ -│ No gap: every ledger gets state changes from exactly one process. │ -└────────────────────────────────────────────────────────────────────────────┘ - -CURRENT STATE CONVERGENCE: -┌────────────────────────────────────────────────────────────────────────────┐ -│ Current-state migration CAS-advances protocol_{ID}_current_state_cursor │ -│ from start_ledger. Live ingestion also CAS-advances the same cursor. │ -│ When current-state migration CAS fails → live ingestion owns current │ -│ state production. │ -│ │ -│ Same CAS mechanism as history convergence, but using a separate cursor. │ -│ No gap: every ledger gets current state from exactly one process. │ -└────────────────────────────────────────────────────────────────────────────┘ -``` - -#### Migration Dependencies - -``` MIGRATION DEPENDENCIES: ┌────────────────────────────────────────────────────────────────────────────┐ -│ Both migration subcommands depend on protocol-setup, │ -│ and run concurrently with live ingestion: │ -│ 1. Checkpoint population must have completed (classification_status = │ -│ 'success') │ -│ 2. Live ingestion should be running with the same processor │ -│ 3. History migration: retention_start → tip (until CAS fails) | -│ 4. Current-state migration: start-ledger → tip (until CAS fails) │ -│ 5. Live ingestion gates both state changes and current state on their │ -│ respective cursors │ -│ 6. Handoff: each migration's CAS fails → live ingestion owns that │ -│ responsibility │ +│ The migration has an explicit dependency on protocol-setup, │ +│ and an implicit dependency on live-ingestion | +│ 1. Live ingestion must be running with the same processor │ +│ 2. Checkpoint population must have completed for the protocol │ +│ 3. Migration processes: start-ledger → (live ingestion start - 1) │ +│ 4. Live ingestion continues from its start point onward │ │ │ -│ This ensures zero-gap coverage via CAS serialization on each cursor. │ +│ This ensures no ledger gap between backfill and live ingestion. │ └────────────────────────────────────────────────────────────────────────────┘ ``` ### Live State Production -During live ingestion, two related but distinct processes run sequentially: +During live ingestion, two related but distinct processes run: 1. **Classification** - Identifies and classifies new contracts as they are deployed -2. **State Production** - Produces protocol-specific state using registered processors (depends on classification) +2. **State Production** - Produces protocol-specific state using registered processors ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -887,32 +648,31 @@ During live ingestion, two related but distinct processes run sequentially: │ (from RPC) │ └────────┬─────────┘ │ + ┌────────────────────┴────────────────────┐ + │ │ + ▼ ▼ +┌─────────────────────────────────────┐ ┌─────────────────────────────────────┐ +│ 1. CLASSIFICATION │ │ 2. STATE PRODUCTION │ +│ │ │ │ +│ Watch for contract deployments/ │ │ Run protocol processors on │ +│ upgrades in ledger changes │ │ transactions in ledger │ +└─────────────────┬───────────────────┘ └─────────────────┬───────────────────┘ + │ │ + ▼ ▼ +┌─────────────────────────────────────┐ ┌─────────────────────────────────────┐ +│ For each new contract: │ │ For each protocol processor: │ +│ ┌───────────────────────────────┐ │ │ ┌───────────────────────────────┐ │ +│ │ 1. Check known_wasms cache │ │ │ │ Processor.Process(ledger) │ │ +│ │ 2. If not known → validate │ │ │ │ │ │ +│ │ WASM against protocol specs│ │ │ │ - Examines transactions │ │ +│ │ 3. Update known_wasms + │ │ │ │ - Produces protocol-specific │ │ +│ │ protocol_contracts │ │ │ │ state changes │ │ +│ └───────────────────────────────┘ │ │ └───────────────────────────────┘ │ +└─────────────────┬───────────────────┘ └─────────────────┬───────────────────┘ + │ │ + └────────────────────┬────────────────────┘ + │ ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ 1. CLASSIFICATION │ -│ │ -│ Process ledger entry changes to classify contracts: │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ ContractCode entries: validate WASM, store in protocol_wasms │ │ -│ │ ContractData Instance entries: lookup hash in protocol_wasms, │ │ -│ │ map contract to protocol_contracts │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────┬───────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ 2. STATE PRODUCTION │ -│ │ -│ Run protocol processors on transactions (using updated classifications): │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ For each protocol processor: │ │ -│ │ Processor.Process(ledger) │ │ -│ │ - Examines transactions involving protocol contracts │ │ -│ │ - Produces protocol-specific state changes │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────┬───────────────────────────────────────┘ - │ - ▼ ┌──────────────────────────┐ │ PersistLedgerData() │ │ (single DB transaction) │ @@ -925,55 +685,13 @@ During live ingestion, two related but distinct processes run sequentially: │ New contract │ │ Protocol-specific │ │ Operations, │ │ classifications │ │ state changes │ │ transactions, │ │ (protocol_contracts, │ │ (from processors) │ │ accounts, etc. │ -│ protocol_wasms) │ │ │ │ │ +│ known_wasms) │ │ │ │ │ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘ - -┌─────────────────────────────────────────────────────────────────────────────┐ -│ PER-PROTOCOL DUAL GATING │ -│ │ -│ Within PersistLedgerData, for each registered protocol at ledger N: │ -│ │ -│ === PROTOCOL STATE CHANGES === │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ 1. Read protocol_{ID}_history_cursor │ │ -│ │ │ │ -│ │ 2. If cursor >= N-1: │ │ -│ │ - CAS history cursor from N-1 to N │ │ -│ │ - If CAS succeeds: WRITE state changes for N │ │ -│ │ - If CAS fails: skip (history migration wrote them) │ │ -│ │ │ │ -│ │ 3. If cursor < N-1: │ │ -│ │ - SKIP state changes (history migration hasn't caught up) │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ === CURRENT STATE === │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ 4. Read protocol_{ID}_current_state_cursor │ │ -│ │ │ │ -│ │ 5. If cursor >= N-1: │ │ -│ │ - CAS current_state cursor from N-1 to N │ │ -│ │ - If CAS succeeds: WRITE current state for N │ │ -│ │ - If CAS fails: skip (current-state migration wrote it) │ │ -│ │ │ │ -│ │ 6. If cursor < N-1: │ │ -│ │ - SKIP current state (current-state migration hasn't caught up) │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ Why gate state changes: The existing BatchCopy write path (pgx COPY │ -│ protocol) fails on duplicate records — it does not support ON CONFLICT. │ -│ Gating prevents duplicates and follows the same proven CAS pattern │ -│ already designed for current state. │ -│ │ -│ This logic is per-protocol. Different protocols can be at different │ -│ stages — one may have history migration complete while another is still │ -│ running, and current-state migration may be at a different stage than │ -│ history migration for the same protocol. │ -└─────────────────────────────────────────────────────────────────────────────┘ ``` -## protocol_wasms Lookup Optimization +## known_wasms Lookup Optimization -The `protocol_wasms` table grows unboundedly as new contracts are deployed on the network. Since +The `known_wasms` table grows unboundedly as new contracts are deployed on the network. Since every live ingestion lookup queries this table, optimizing lookup performance is critical. #### Default Implementation: LRU Cache + PostgreSQL @@ -982,7 +700,7 @@ The recommended approach is an in-memory LRU cache layered over the PostgreSQL t ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ protocol_wasms LOOKUP OPTIMIZATION │ +│ known_wasms LOOKUP OPTIMIZATION │ └─────────────────────────────────────────────────────────────────────────────┘ New Contract Deployment @@ -1005,7 +723,7 @@ The recommended approach is an in-memory LRU cache layered over the PostgreSQL t ▼ ▼ ┌──────────────┐ ┌──────────────────────┐ │ Return │ │ Query PostgreSQL │ - │ cached │ │ protocol_wasms table │ + │ cached │ │ known_wasms table │ │ protocol_id │ │ (1-5ms) │ └──────────────┘ └──────────┬───────────┘ │ @@ -1036,7 +754,7 @@ func (c *KnownWasmsCache) Lookup(ctx context.Context, hash []byte) (*string, boo // Cache miss: query DB (~1-5ms) var protocolID *string err := c.db.QueryRowContext(ctx, - "SELECT protocol_id FROM protocol_wasms WHERE wasm_hash = $1", key).Scan(&protocolID) + "SELECT protocol_id FROM known_wasms WHERE wasm_hash = $1", key).Scan(&protocolID) if err == sql.ErrNoRows { return nil, false, nil // Not in DB at all @@ -1051,227 +769,62 @@ func (c *KnownWasmsCache) Lookup(ctx context.Context, hash []byte) (*string, boo } ``` -## Write-Through Current State Cache - -When live ingestion first takes over current-state production for a protocol (its first successful CAS), it needs the current state to compute the next state. This is handled by a write-through in-memory cache, similar in pattern to the protocol_wasms LRU cache above. - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ WRITE-THROUGH CURRENT STATE CACHE │ -└─────────────────────────────────────────────────────────────────────────────┘ - - Live Ingestion at Ledger N - │ - ▼ - ┌──────────────────────┐ - │ Check in-memory │ - │ state cache for │ - │ protocol │ - └──────────┬───────────┘ - │ - ┌───────────┴───────────┐ - │ │ - POPULATED EMPTY - │ │ - ▼ ▼ - ┌──────────────┐ ┌──────────────────────┐ - │ Use cached │ │ Read current state │ - │ state to │ │ from protocol state │ - │ compute N │ │ tables (one-time DB │ - │ │ │ read at handoff) │ - └──────────────┘ └──────────┬───────────┘ - │ │ - │ ▼ - │ ┌──────────────────────┐ - │ │ Populate in-memory │ - │ │ cache │ - │ └──────────┬───────────┘ - │ │ - └───────────┬───────────┘ - │ - ▼ - ┌──────────────────────┐ - │ Compute new state │ - │ for ledger N │ - └──────────┬───────────┘ - │ - ▼ - ┌──────────────────────┐ - │ Update in-memory │ - │ cache + write to │ - │ protocol state │ - │ tables in DB │ - │ (write-through) │ - └──────────────────────┘ -``` - -**Cache structure**: -```go -// Per-protocol current state cache -map[protocolID] -> { - currentStateCursor uint32 // last ledger for which state was produced - stateData protocolState // protocol-specific current state -} -``` - -**Lifecycle**: -- **Empty at start**: Cache is unpopulated when live ingestion starts -- **Populated from DB**: When live ingestion first successfully CAS-advances the cursor (handoff from migration), it reads current state from the protocol's state tables (one-time read) -- **Updated per ledger**: On each subsequent ledger, cache is updated in-memory and written through to DB -- **Lost on restart**: If live ingestion restarts, the cache is repopulated from DB on the next current-state production - ## Backfill Migrations -Backfill migrations are split into two independent subcommands: -- `protocol-migrate history` — writes protocol state changes within the retention window -- `protocol-migrate current-state` — builds current state from a protocol's deployment ledger +Backfill migrations build current state and/or write state changes according to the logic defined in the processor for the protocol being migrated. -Each subcommand converges independently with live ingestion via its own CAS cursor. They can run in any order, concurrently, or only one can be run. +The `protocol-migrate` command accepts a set of protocol IDs for an explicit signal to migrate those protocols. Each protocol migration requires a specific range, which may not be exactly what other migrations need even if they are implemented at the same time. Migrations that do share a ledger range can run in one process. -### History Migration Command +### Migration Command ```bash -./wallet-backend protocol-migrate history --protocol-id SEP50 SEP41 +./wallet-backend protocol-migrate --protocol-id SEP50 SEP41 --start-ledger 1 --end-ledger 5 ``` **Parameters**: -- `--protocol-id`: Protocol(s) to migrate (required) -- No `--start-ledger` — always reads `oldest_ledger_cursor` from `ingest_store` - -The history migration runs until it converges with live ingestion. It processes batches from `oldest_ledger_cursor` toward the tip, CAS-advancing the history cursor with each batch commit. When a CAS fails (because live ingestion advanced the cursor first), the migration detects the handoff, sets `history_migration_status = 'success'`, and exits. +- `--protocol-id`: The protocol(s) to migrate (must exist in `protocols` table) +- `--start-ledger`: First ledger to process +- `--end-ledger`: Last ledger to process (should be the ledger before live ingestion started) -### Current-State Migration Command - -```bash -./wallet-backend protocol-migrate current-state --protocol-id SEP50 --start-ledger 1000 -``` - -**Parameters**: -- `--protocol-id`: Protocol(s) to migrate (required) -- `--start-ledger`: First ledger to process (required, based on protocol deployment) - -The current-state migration runs until it converges with live ingestion. It processes ALL ledgers from `--start-ledger` toward the tip, CAS-advancing the current-state cursor with each batch commit. It writes only current state — no state changes. When a CAS fails, the migration detects the handoff, sets `current_state_migration_status = 'success'`, and exits. - -### History Migration Workflow - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ HISTORY MIGRATION RUNNER WORKFLOW │ -└─────────────────────────────────────────────────────────────────────────────┘ - -┌────────────────────────────────────────────────────────────────────────────┐ -│ 1. VALIDATE │ -├────────────────────────────────────────────────────────────────────────────┤ -│ - Verify protocol(s) exists in registry │ -│ - Verify classification_status = 'success' │ -│ - Set history_migration_status = 'in_progress' │ -│ - Read oldest_ledger_cursor from ingest_store (retention window start) │ -│ - Initialize protocol_{ID}_history_cursor = oldest_ledger_cursor - 1 │ -└────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌────────────────────────────────────────────────────────────────────────────┐ -│ 2. PROCESS BATCHES TO TIP │ -├────────────────────────────────────────────────────────────────────────────┤ -│ Loop: │ -│ a. Read latest_ledger_cursor to get target │ -│ b. Split [cursor+1, target] into batches │ -│ c. Process batches in parallel with ordered commit │ -│ d. Each batch commit: │ -│ - CAS-advance protocol_{ID}_history_cursor │ -│ - If CAS succeeds: write state changes │ -│ - If CAS fails: handoff detected → go to step 3 │ -│ e. After all batches: re-read latest_ledger_cursor │ -│ f. If more ledgers remain: repeat from (b) │ -│ g. If no more ledgers: block on RPC for next ledger (~5s), repeat │ -└────────────────────────────────────────────────────────────────────────────┘ - │ - CAS failure - (handoff) - │ - ▼ -┌────────────────────────────────────────────────────────────────────────────┐ -│ 3. COMPLETE │ -├────────────────────────────────────────────────────────────────────────────┤ -│ - Verify cursor is at or past the ledger migration tried to write │ -│ - Set history_migration_status = 'success' │ -│ - Clean up migration resources │ -│ - Live ingestion now owns state change production for this protocol │ -└────────────────────────────────────────────────────────────────────────────┘ -``` - -### Current-State Migration Workflow +### Migration Workflow ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ CURRENT-STATE MIGRATION RUNNER WORKFLOW │ +│ MIGRATION RUNNER WORKFLOW │ └─────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────┐ │ 1. VALIDATE │ ├────────────────────────────────────────────────────────────────────────────┤ -│ - Verify protocol(s) exists in registry │ -│ - Verify classification_status = 'success' │ -│ - Set current_state_migration_status = 'in_progress' │ -│ - Initialize protocol_{ID}_current_state_cursor = start_ledger - 1 │ +│ - Verify protocol(s) exists in registry │ +│ - Verify migration_status = 'classification_success' │ +│ - Set migration_status = 'backfilling_in_progress' │ └────────────────────────────────────────────────────────────────────────────┘ │ ▼ ┌────────────────────────────────────────────────────────────────────────────┐ -│ 2. PROCESS BATCHES TO TIP │ +│ 2. PROCESS LEDGER RANGE │ ├────────────────────────────────────────────────────────────────────────────┤ -│ Loop: │ -│ a. Read latest_ledger_cursor to get target │ -│ b. Split [cursor+1, target] into batches │ -│ c. Process batches in parallel with ordered commit │ -│ d. Each batch commit: │ -│ - CAS-advance protocol_{ID}_current_state_cursor │ -│ - If CAS succeeds: write current state │ -│ - If CAS fails: handoff detected → go to step 3 │ -│ e. After all batches: re-read latest_ledger_cursor │ -│ f. If more ledgers remain: repeat from (b) │ -│ g. If no more ledgers: block on RPC for next ledger (~5s), repeat │ +│ For ledger = start-ledger to end-ledger: │ +│ - Fetch ledger data (from archive or RPC) │ +│ - Run processor to find protocol operations │ +│ - Produce state changes / current state │ └────────────────────────────────────────────────────────────────────────────┘ │ - CAS failure - (handoff) - │ ▼ ┌────────────────────────────────────────────────────────────────────────────┐ │ 3. COMPLETE │ ├────────────────────────────────────────────────────────────────────────────┤ -│ - Verify cursor is at or past the ledger migration tried to write │ -│ - Set current_state_migration_status = 'success' │ -│ - Clean up migration resources │ -│ - Live ingestion now owns current-state production for this protocol │ +│ - Set migration_status = 'backfilling_success' │ +│ - Current state APIs now serve this protocol's data │ └────────────────────────────────────────────────────────────────────────────┘ -ERROR HANDLING (applies to both subcommands): +ERROR HANDLING: ┌────────────────────────────────────────────────────────────────────────────┐ │ If migration fails at any point: │ -│ - Set respective status column = 'failed' │ +│ - Set migration_status = 'failed' │ │ - Log error details │ │ - Migration can be retried after fixing the issue │ -│ - On restart: resume from the respective CAS cursor + 1 │ -│ (history_cursor for history, current_state_cursor for current-state) │ -└────────────────────────────────────────────────────────────────────────────┘ - -STATUS TRANSITIONS (per column): -┌────────────────────────────────────────────────────────────────────────────┐ -│ classification_status: │ -│ not_started → in_progress (protocol-setup starts) │ -│ → success (protocol-setup completes) │ -│ → failed (error) │ -│ │ -│ history_migration_status: │ -│ not_started → in_progress (protocol-migrate history starts) │ -│ → success (CAS fails = live ingestion took over) │ -│ → failed (error) │ -│ │ -│ current_state_migration_status: │ -│ not_started → in_progress (protocol-migrate current-state starts) │ -│ → success (CAS fails = live ingestion took over) │ -│ → failed (error) │ └────────────────────────────────────────────────────────────────────────────┘ ``` @@ -1279,11 +832,7 @@ STATUS TRANSITIONS (per column): Protocol backfill migrations can process millions of ledgers. Sequential processing (ledger-by-ledger) is slow because each ledger must wait for the previous to complete. This section describes how to -parallelize backfill migrations while preserving correctness. - -The two migration subcommands have different parallelization characteristics: -- **History migration** (`protocol-migrate history`): State changes for ledger N do not depend on state changes for ledger N-1, so batches are truly independent. History migration can be more aggressively parallelized — batches can be processed and committed in any order without affecting correctness. -- **Current-state migration** (`protocol-migrate current-state`): Current state is order-dependent (see below), so batches must be committed in order even though they can be processed in parallel. +parallelize backfill migrations while preserving the correctness of order-dependent current state tracking. ### Order-Dependent Current State Tracking @@ -1307,11 +856,11 @@ The final current state must reflect ledger 300's removal, not ledger 100's addi ### Parallel Processing with Ordered Commit -The solution uses a **streaming ordered commit** pattern (required for current-state migration; history migration can use simpler unordered commit since state changes are independent): +The solution uses a **streaming ordered commit** pattern: 1. **PARALLEL PHASE**: Process ledger batches concurrently (each batch gets isolated state) 2. **ORDERED COMMIT**: A committer goroutine writes completed batches to the database **in order** -3. **CURSOR TRACKING**: Each batch commit CAS-advances the respective cursor (history or current-state). If a CAS fails during any batch commit, migration detects that live ingestion has taken over and exits. +3. **CURSOR TRACKING**: Each batch commit updates the migration cursor for crash recovery ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -1343,10 +892,7 @@ The solution uses a **streaming ordered commit** pattern (required for current-s │ 1. Creates isolated LedgerBackend │ │ 2. Creates isolated BatchBuffer │ │ 3. Processes ledgers sequentially within batch │ -│ 4. Generates output per subcommand: │ -│ - History: state changes for each ledger │ -│ - Current-state: current state running totals │ -│ 5. Sends BatchResult to results channel │ +│ 4. Sends BatchResult to results channel │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ │ ▼ ▼ ▼ ▼ ▼ @@ -1371,27 +917,25 @@ The solution uses a **streaming ordered commit** pattern (required for current-s │ When batch 1 arrives: ┌────────────────┼────────────────┐ - │ Commit 1, then 2, then 3 │ + │ Commit 1, then 2, then 3 │ │ (sequential, in order) │ └────────────────┼────────────────┘ │ ┌───────────────────────────┼───────────────────────────┐ ▼ ▼ ▼ -┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐ -│ COMMIT Batch 1 │ │ COMMIT Batch 2 │ │ COMMIT Batch 3 │ -│ CAS cursor→1000 │ ──▶ │ CAS cursor→2000 │ ──▶ │ CAS cursor→3000 │ -│ + batch data │ │ + batch data │ │ + batch data │ -│ (atomic tx) │ │ (atomic tx) │ │ (atomic tx) │ -└──────────────────┘ └──────────────────┘ └──────────────────┘ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ COMMIT Batch 1 │ │ COMMIT Batch 2 │ │ COMMIT Batch 3 │ +│ cursor = 1000 │ ──▶ │ cursor = 2000 │ ──▶ │ cursor = 3000 │ +│ (atomic tx) │ │ (atomic tx) │ │ (atomic tx) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ │ │ │ ▼ ▼ ▼ - CAS fail? CAS fail? CAS fail? - No → continue No → continue No → continue - Yes → handoff Yes → handoff Yes → handoff + Crash? Crash? Crash? + Resume @ 1 Resume @ 1001 Resume @ 2001 ``` -**Crash Recovery**: If the process crashes after committing batch 2, the respective CAS cursor is at ledger 2000. -On restart, processing resumes from ledger 2001 — no work is lost. Each subcommand uses its own CAS cursor for crash recovery, eliminating the need for a separate migration cursor. +**Crash Recovery**: If the process crashes after committing batch 2, the cursor is at ledger 2000. +On restart, processing resumes from ledger 2001 - no work is lost. **Example**: @@ -1437,9 +981,7 @@ type OperationProtocol { type Protocol { id: String! - classificationStatus: String! - historyMigrationStatus: String! - currentStateMigrationStatus: String! + displayName: String! } ``` @@ -1447,12 +989,25 @@ type Protocol { Some migrations will write to new tables that will represent the current state produced by a protocol in relation to accounts. An example of this is SEP-50 Collectibles, where we will track collectible mints/transfers in order to maintain a table of collectibles owned by accounts. -The API exposes per-process status fields so clients can independently check whether each migration responsibility is complete. This pushes the responsibility to clients, keeping queries cleaner and faster. +Current state APIs should use `protocols.migration_status` in order to reject queries before the migration for that data type has completed. Some protocols require a complete view of the protocols history in order to correctly represent current state. -**Client responsibility**: -- Check `historyMigrationStatus = 'success'` before relying on enriched operation history -- Check `currentStateMigrationStatus = 'success'` before relying on current state completeness -- Clients that query data during an in-progress migration may receive incomplete results +Example error for in-progress migration: + +```json +{ + "errors": [ + { + "message": "BLEND protocol data is being migrated; please try again later", + "extensions": { + "code": "PROTOCOL_NOT_READY", + "protocol": "BLEND", + "migration_status": "backfilling_in_progress" + } + } + ], + "data": null +} +``` The `Operation.protocols` field exposes which protocols were involved in an operation. The query path uses existing tables without requiring a dedicated mapping table: @@ -1464,13 +1019,11 @@ The query path uses existing tables without requiring a dedicated mapping table: GraphQL Query: ┌──────────────────────────────────────────────────────────────────────────┐ -│ query { │ +│ query { │ │ operation(id: "12345") { │ │ id │ │ protocols { │ -│ protocol { id, classificationStatus, │ -│ historyMigrationStatus, │ -│ currentStateMigrationStatus } │ +│ protocol { id, displayName } │ │ contractId │ │ } │ │ } │ @@ -1479,7 +1032,7 @@ GraphQL Query: │ ▼ ┌──────────────────────────────────────────────────────────────────────────┐ -│ DATABASE QUERY │ +│ DATABASE QUERY │ └──────────────────────────────────────────────────────────────────────────┘ SELECT DISTINCT p.id, pc.contract_id @@ -1492,7 +1045,7 @@ WHERE o.id = $1; │ ▼ ┌──────────────────────────────────────────────────────────────────────────┐ -│ JOIN VISUALIZATION │ +│ JOIN VISUALIZATION │ └──────────────────────────────────────────────────────────────────────────┘ ┌────────────┐ ┌──────────────────┐ ┌───────────────────┐ ┌───────────┐ @@ -1500,16 +1053,8 @@ WHERE o.id = $1; │ │ │ accounts │ │ contracts │ │ │ ├────────────┤ ├──────────────────┤ ├───────────────────┤ ├───────────┤ │ id │ │ operation_id (FK)│ │ contract_id (PK) │ │ id (PK) │ -│ ... │ │ account_id │ │ protocol_id (FK) │ │ classifi- │ -│ │ │ │ │ name │ │ cation_ │ -│ │ │ │ │ │ │ status, │ -│ │ │ │ │ │ │ history_ │ -│ │ │ │ │ │ │ migration │ -│ │ │ │ │ │ │ _status, │ -│ │ │ │ │ │ │ current_ │ -│ │ │ │ │ │ │ state_ │ -│ │ │ │ │ │ │ migration │ -│ │ │ │ │ │ │ _status │ +│ ... │ │ account_id │ │ protocol_id (FK) │ │ migration │ +│ │ │ │ │ name │ │ _status │ └────────────┘ └──────────────────┘ └───────────────────┘ └───────────┘ │ │ │ │ │ │ │ │ @@ -1546,6 +1091,7 @@ INDEXES REQUIRED: │ │ │ protocols: │ │ PRIMARY KEY (id) -- fast lookup by id │ +│ INDEX on (migration_status) -- filter by status │ │ │ └────────────────────────────────────────────────────────────────────────────┘ @@ -1571,31 +1117,25 @@ QUERY COST BREAKDOWN (per operation): ### Client Handling of Migration Status -The API exposes per-process status fields on `Protocol` to allow clients to handle in-progress migrations appropriately. - -**For historical data** (enriched operations with state changes): +During migration, historical data may be partially enriched. Clients can: 1. **Accept partial data**: Display enriched data where available -2. **Wait for completion**: Check `historyMigrationStatus = 'success'` and defer display until complete - -**For current state data**: - -Current state is **progressively available** during migration — the current-state cursor advances incrementally as migration processes each ledger. However, until `currentStateMigrationStatus = 'success'`, the current state only reflects ledgers up to the cursor position and may not include recent activity. - -- `in_progress`: Current state exists but may lag behind live activity. The cursor indicates how far the migration has progressed. -- `success`: Live ingestion has fully taken over current-state production. Current state is up-to-date and will stay current going forward. - -Clients should check `currentStateMigrationStatus = 'success'` before relying on current state queries for completeness. Clients that can tolerate partial data may use current state during `in_progress` with the understanding that it reflects state up to the migration cursor, not necessarily the latest ledger. - -Example query to check migration status: - -```graphql -query { - protocols { - id - classificationStatus - historyMigrationStatus - currentStateMigrationStatus - } +2. **Wait for completion**: Check `protocols.migration_status` and defer display until `'backfilling_success'` + +For current state APIs, queries should return an error if the protocol migration is not complete: + +```json +{ + "errors": [ + { + "message": "BLEND protocol data is being migrated; please try again later", + "extensions": { + "code": "PROTOCOL_NOT_READY", + "protocol": "BLEND", + "migration_status": "backfilling_in_progress" + } + } + ], + "data": null } ``` \ No newline at end of file From 8e56f1a292c61dcde2a798e7f7204c840aed44e7 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 08:51:12 -0700 Subject: [PATCH 02/52] updates diagram for checkpoint population flow to better reflect the steps in the ContractData branch, removes Balance branch --- docs/feature-design/data-migrations.md | 77 +++++++++++++------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index e54fd8d79..ee73ad009 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -146,50 +146,47 @@ This has to happen in 2 stages during the migration process: │ (iterates all ledger entries) │ └───────────────┬───────────────┘ │ - ┌────────────────────────┼────────────────────────┐ - │ │ │ - ▼ ▼ ▼ -┌──────────────────┐ ┌──────────────────┐ ┌──────────────────────────┐ -│ LedgerEntryType │ │ LedgerEntryType │ │ LedgerEntryType │ -│ ContractCode │ │ ContractData │ │ ContractData │ -│ │ │ (Instance) │ │ (Balance) │ -└────────┬─────────┘ └────────┬─────────┘ └────────────┬─────────────┘ - │ │ │ - ▼ ▼ ▼ -┌──────────────────┐ ┌──────────────────┐ ┌──────────────────────────┐ -│ Extract WASM │ │ Check SAC? │ │ Extract holder address │ -│ bytecode + hash │ │ (AssetFromData) │ │ from Balance key │ -└────────┬─────────┘ └────────┬─────────┘ └────────────┬─────────────┘ - │ │ │ - ▼ │ ▼ -┌──────────────────┐ │ ┌──────────────────────────┐ -│ Group contracts │ ┌───────┴───────┐ │ Track in │ -│ by WASM hash │ │ │ │ contractTokensByHolder │ -│ │ ▼ ▼ │ Address map │ -└────────┬─────────┘ YES NO └──────────────────────────┘ - │ │ │ - ▼ ▼ ▼ -┌──────────────────┐ ┌────────┐ ┌──────────────┐ -│ Validate WASM │ │SAC │ │Compare WASM │ -│ against protocol │ │contract│ │to known │ -│ spec │ │ │ │protocol spec │ -└────────┬─────────┘ └────────┘ └──────────────┘ - │ - ┌────┴────┐ - │ │ - MATCH NO MATCH - │ │ - ▼ ▼ -┌────────┐ ┌──────────┐ -│Insert │ │Skip │ -│to │ │(unknown) │ -│protocol│ │ │ -│_contracts└──────────┘ -└────────┘ + ┌────────────┴────────────┐ + │ │ + ▼ ▼ + ┌──────────────────┐ ┌──────────────────┐ + │ LedgerEntryType │ │ LedgerEntryType │ + │ ContractCode │ │ ContractData │ + │ │ │ (Instance) │ + └────────┬─────────┘ └────────┬─────────┘ + │ │ + ▼ ▼ + ┌──────────────────┐ ┌──────────────────┐ + │ Extract WASM │ │ Check SAC? │ + │ bytecode + hash │ │ (AssetFromData) │ + └────────┬─────────┘ └────────┬─────────┘ + │ │ + ▼ ┌───────┴───────┐ + ┌──────────────────┐ │ │ + │ Validate WASM │ ▼ ▼ + │ against protocol │ YES NO + │ validators │ │ │ + └────────┬─────────┘ ▼ ▼ + │ ┌────────┐ ┌──────────────────┐ + ┌────┴────┐ │SAC │ │ Extract wasm_ref │ + │ │ │contract│ │ (hash) from │ + MATCH NO MATCH └────────┘ │ instance data │ + │ │ └────────┬─────────┘ + ▼ ▼ │ + ┌────────┐ ┌──────────┐ ▼ + │Store │ │Store │ ┌──────────────────┐ + │hash in │ │hash in │ │ Map contract ID │ + │known_ │ │known_ │ │ to WASM hash │ + │wasms │ │wasms │ │ (for later lookup│ + │with │ │with NULL │ │ in known_wasms) │ + │protocol│ │protocol │ └──────────────────┘ + └────────┘ └──────────┘ ┌───────────────────────────────┐ │ Post-Processing: │ │ 1. Store in protocol_contracts│ + │ (contract → protocol via │ + │ wasm hash → known_wasms) │ │ 2. Cache in known_wasms │ └───────────────────────────────┘ ``` From 049947e66fce37785b4eda79d8bca9f2bc3d6fa4 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 08:59:06 -0700 Subject: [PATCH 03/52] updates live ingestion classification diagram to better reflect the distinction between uploads and upgrades/deployments --- docs/feature-design/data-migrations.md | 96 ++++++++++++-------------- 1 file changed, 43 insertions(+), 53 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index ee73ad009..e79cdf51a 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -194,7 +194,7 @@ This has to happen in 2 stages during the migration process: Contracts are grouped by WASM hash before validation. This means we validate each unique WASM blob once, then apply the result to all contracts using that same code. Once a WASM hash is classified, it is stored in the `known_wasms` table to avoid re-classification of future contracts using the same code. -During live ingestion, new contracts are classified when they appear in ledger changes. The key difference from checkpoint population is that live ingestion watches for contract deployments/upgrades and compares the WASM blob to known protocols. +During live ingestion, classification happens in two parts: (1) new WASM uploads are validated against protocol specs and stored in `known_wasms`, and (2) contract deployments/upgrades are mapped to protocols via their WASM hash lookup in `known_wasms`. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -209,62 +209,52 @@ During live ingestion, new contracts are classified when they appear in ledger c ▼ ┌──────────────────────────────┐ │ ProcessLedger() │ - │ (iterate transaction changes)│ + │ (iterate ledger entry changes│ └──────────────┬───────────────┘ │ - ▼ - ┌──────────────────────────────┐ - │ Watch for: │ - │ - Contract deployments │ - │ - Contract upgrades │ - └──────────────┬───────────────┘ - │ - ▼ - ┌──────────────────────────────┐ - │ Extract WASM bytecode │ - │ from deployment/upgrade │ - └──────────────┬───────────────┘ - │ - ▼ - ┌──────────────────────────────┐ - │ Check known_wasms table: │ - │ Is this WASM hash known? │ - └──────────────┬───────────────┘ - │ - ┌───────────────┴───────────────┐ - │ │ - ▼ ▼ - ┌──────────────┐ ┌──────────────────┐ - │ WASM hash │ │ WASM hash │ - │ already known│ │ NOT known │ - │ │ │ │ - │ Use cached │ │ Compare WASM to │ - │classification│ │ protocol specs │ - └──────┬───────┘ └────────┬─────────┘ - │ │ - │ ┌───────┴───────┐ - │ │ │ - │ MATCH NO MATCH - │ │ │ - │ ▼ ▼ - │ ┌──────────────┐ ┌──────────────┐ - │ │ Insert to │ │ Mark as │ - │ │ known_wasms │ │ unknown in │ - │ │ + protocol_ │ │ known_wasms │ - │ │ contracts │ │ │ - │ └──────┬───────┘ └──────────────┘ - │ │ - └────────────────────────┘ - │ - ▼ - ┌──────────────────────────────┐ - │ Insert contract mapping to │ - │ protocol_contracts table │ - └──────────────────────────────┘ + ┌───────────────────┴───────────────────┐ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌──────────────────────────┐ + │ ContractCode │ │ ContractData Instance │ + │ (new WASM upload) │ │ (deployment or upgrade) │ + └──────────┬──────────┘ └────────────┬─────────────┘ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌──────────────────────────┐ + │ Extract WASM │ │ Extract WASM hash │ + │ bytecode + hash │ │ from instance wasm_ref │ + └──────────┬──────────┘ └────────────┬─────────────┘ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌──────────────────────────┐ + │ Validate against │ │ Lookup hash in │ + │ protocol validators │ │ known_wasms │ + └──────────┬──────────┘ └────────────┬─────────────┘ + │ │ + ┌────┴────┐ ┌───────────┴───────────┐ + │ │ │ │ + MATCH NO MATCH FOUND NOT FOUND + │ │ │ │ + ▼ ▼ ▼ ▼ + ┌──────────┐ ┌──────────┐ ┌──────────────┐ ┌──────────────────┐ + │Store in │ │Store in │ │ Map contract │ │ Fetch WASM via │ + │known_ │ │known_ │ │ to protocol │ │ RPC, validate, │ + │wasms with│ │wasms with│ │ from cached │ │ then map contract│ + │protocol │ │NULL │ │ classification │ (rare edge case) │ + └──────────┘ └──────────┘ └──────────────┘ └──────────────────┘ + │ │ + └───────────┬───────────┘ + │ + ▼ + ┌──────────────────────────┐ + │ Insert contract mapping │ + │ to protocol_contracts │ + └──────────────────────────┘ ``` -The classifier compares the WASM blob from new deployments/upgrades against known protocol specifications. -This comparison uses the same validation logic as checkpoint population: +The classifier validates WASM bytecode from ContractCode entries against protocol specifications. +This validation uses the same logic as checkpoint population: 1. Compile WASM with wazero runtime 2. Extract `contractspecv0` custom section From e82240104e27e1b6a1eef1064aceac45da741b97 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 09:03:31 -0700 Subject: [PATCH 04/52] removes incorrect details about live ingestions relationship to protocol-setup in the "When Checkpoint Classification Runs" section --- docs/feature-design/data-migrations.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index e79cdf51a..f3b520c0d 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -503,11 +503,7 @@ When checkpoint population runs for a newly registered protocol, it validates co 2. **In known_wasms with `protocol_id IS NULL`** (previously unknown) #### When Checkpoint Classification Runs -Both backfill migrations and live ingestion will rely on checkpoint population being complete before they can produce state changes for a new protocol. - -If checkpoint population does not run before live ingestion is processing a new protocol, live ingestion will potentially experience more pressure from the additional classification it has to do due to the missing seed of `protocol_contracts`. - -If checkpoint population does not run before a backfill migration is started for a new protocol, backfill migration will fail and exit since it does not classify protocols and cannot produce state without any classification being available. +Backfill migrations rely on checkpoint population being complete before they can produce state changes for a new protocol. If checkpoint population does not run before a backfill migration is started for a new protocol, backfill migration will fail and exit since it does not classify protocols and cannot produce state without any classification being available. ### Command From e7de88e48d112bd16b1a5e448b8fa8fe657146eb Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 09:24:29 -0700 Subject: [PATCH 05/52] Updates the migration design to be aware of the history retention window, in order to discard state changes outside of retention. 1. Schema changes: enabled field removed, display_name removed, status default is not_started 2. Status values: All updated to new naming scheme (not_started, classification_in_progress, classification_success, backfilling_in_progress, backfilling_success, failed) 3. protocol-setup: Now uses --protocol-id flag (opt-in), updated command examples and workflow 4. Classification section (line 125): Updated to describe ContractCode validation and ContractData lookup 5. Checkpoint population diagram: Removed Balance branch, updated to show WASM hash storage in known_wasms 6. Live ingestion classification diagram: Separated into ContractCode and ContractData paths with RPC fallback 7. Live State Production diagram: Updated classification box to mention ContractCode uploads and ContractData Instance changes 8. Backfill migration: Added retention-aware processing throughout (flow diagram, workflow diagram, parallel processing) 9. Parallel backfill worker pool: Added steps for retention window filtering --- docs/feature-design/data-migrations.md | 81 +++++++++++++++++++++----- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index f3b520c0d..f3ca6a720 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -122,7 +122,7 @@ Both live ingestion and backfill migration need the `protocol_contracts` table p Classification is the act of identifying new and existing contracts on the network and assigning a relationship to a known protocol. This has to happen in 2 stages during the migration process: - checkpoint population: We will use a history archive from the latest checkpoint in order to classify all contracts on the network. We will rely on the latest checkpoint available at the time of the migration. -- live ingestion: during live ingestion, we classify new contracts by watching for contract deployments/upgrades and comparing the wasm blob to the known protocols. +- live ingestion: during live ingestion, we classify new WASM uploads by validating the bytecode against protocol specs, and map contract deployments/upgrades to protocols by looking up their WASM hash in `known_wasms`. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -553,6 +553,9 @@ State produced by new protocols is done through dual processes in order to cover ### Backfill Migration The migration runner processes historical ledgers to enrich operations with protocol state and produce state changes/current state. + +**Retention-Aware Processing**: The migration reads the retention window start from `ingest_store` (`oldest_ledger_cursor`). State changes are only persisted for ledgers within the retention window, but all ledgers in the range are processed to build accurate current state. + ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ BACKFILL MIGRATION FLOW │ @@ -562,8 +565,8 @@ The migration runner processes historical ledgers to enrich operations with prot │ ./wallet-backend │ │ protocol-migrate │ │ --protocol-id SEP50 ... │ - │ --start-ledger 1 │ - │ --end-ledger 5 │ + │ --start-ledger 1000 │ + │ --end-ledger 5000 │ └─────────────┬──────────────┘ │ ▼ @@ -577,6 +580,8 @@ The migration runner processes historical ledgers to enrich operations with prot │ - Validate protocol exists │ │ - Set status = backfilling │ │ _in_progress │ + │ - Read oldest_ledger_cursor│ + │ from ingest_store │ └─────────────┬──────────────┘ │ ▼ @@ -590,13 +595,28 @@ The migration runner processes historical ledgers to enrich operations with prot │ Use processor to: │ │ - Find operations involving│ │ protocol contracts │ - │ - Produce state │ + │ - Generate state changes │ + │ - Update current state │ + │ running totals │ + └─────────────┬──────────────┘ + │ + ▼ + ┌────────────────────────────┐ + │ If ledger >= retention │ + │ window start: │ + │ - Persist state changes │ │ - Enrich historical data │ + │ │ + │ Otherwise: │ + │ - Discard state changes │ + │ (already applied to │ + │ current state totals) │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ │ Complete() │ + │ - Write final current state│ │ - Set status = │ │ backfilling_success │ └────────────────────────────┘ @@ -613,6 +633,29 @@ MIGRATION DEPENDENCIES: │ │ │ This ensures no ledger gap between backfill and live ingestion. │ └────────────────────────────────────────────────────────────────────────────┘ + +RETENTION WINDOW HANDLING: +┌────────────────────────────────────────────────────────────────────────────┐ +│ The migration decouples the processing range from the retention window: │ +│ │ +│ Example: Protocol deployed at ledger 1000, retention starts at 4000 │ +│ │ +│ Ledger 1000 ──────────────────────────────────────── Ledger 5000 │ +│ [start-ledger] [retention start] [end-ledger] │ +│ │ │ │ │ +│ ├─────────────────────────┤ │ │ +│ │ Process but DISCARD │ │ │ +│ │ state changes │ │ │ +│ │ (update current state │ │ │ +│ │ running totals only) │ │ │ +│ │ ├──────────────────────────┤ │ +│ │ │ Process AND PERSIST │ │ +│ │ │ state changes │ │ +│ │ │ (within retention) │ │ +│ │ +│ This allows accurate current state even when protocol history extends │ +│ beyond the retention window. │ +└────────────────────────────────────────────────────────────────────────────┘ ``` ### Live State Production @@ -637,19 +680,19 @@ During live ingestion, two related but distinct processes run: ┌─────────────────────────────────────┐ ┌─────────────────────────────────────┐ │ 1. CLASSIFICATION │ │ 2. STATE PRODUCTION │ │ │ │ │ -│ Watch for contract deployments/ │ │ Run protocol processors on │ -│ upgrades in ledger changes │ │ transactions in ledger │ +│ Watch for ContractCode uploads │ │ Run protocol processors on │ +│ and ContractData Instance changes │ │ transactions in ledger │ └─────────────────┬───────────────────┘ └─────────────────┬───────────────────┘ │ │ ▼ ▼ ┌─────────────────────────────────────┐ ┌─────────────────────────────────────┐ -│ For each new contract: │ │ For each protocol processor: │ +│ For each ledger entry change: │ │ For each protocol processor: │ │ ┌───────────────────────────────┐ │ │ ┌───────────────────────────────┐ │ -│ │ 1. Check known_wasms cache │ │ │ │ Processor.Process(ledger) │ │ -│ │ 2. If not known → validate │ │ │ │ │ │ -│ │ WASM against protocol specs│ │ │ │ - Examines transactions │ │ -│ │ 3. Update known_wasms + │ │ │ │ - Produces protocol-specific │ │ -│ │ protocol_contracts │ │ │ │ state changes │ │ +│ │ ContractCode: validate WASM, │ │ │ │ Processor.Process(ledger) │ │ +│ │ store in known_wasms │ │ │ │ │ │ +│ │ ContractData Instance: lookup │ │ │ │ - Examines transactions │ │ +│ │ hash in known_wasms, map │ │ │ │ - Produces protocol-specific │ │ +│ │ to protocol_contracts │ │ │ │ state changes │ │ │ └───────────────────────────────┘ │ │ └───────────────────────────────┘ │ └─────────────────┬───────────────────┘ └─────────────────┬───────────────────┘ │ │ @@ -754,7 +797,7 @@ func (c *KnownWasmsCache) Lookup(ctx context.Context, hash []byte) (*string, boo ## Backfill Migrations -Backfill migrations build current state and/or write state changes according to the logic defined in the processor for the protocol being migrated. +Backfill migrations process historical ledgers to build current state and generate state changes. State changes are only persisted for ledgers within the retention window, but all ledgers in the specified range are processed to produce accurate current state. The `protocol-migrate` command accepts a set of protocol IDs for an explicit signal to migrate those protocols. Each protocol migration requires a specific range, which may not be exactly what other migrations need even if they are implemented at the same time. Migrations that do share a ledger range can run in one process. @@ -766,7 +809,7 @@ The `protocol-migrate` command accepts a set of protocol IDs for an explicit sig **Parameters**: - `--protocol-id`: The protocol(s) to migrate (must exist in `protocols` table) -- `--start-ledger`: First ledger to process +- `--start-ledger`: First ledger to process (set based on protocol deployment/data needs) - `--end-ledger`: Last ledger to process (should be the ledger before live ingestion started) ### Migration Workflow @@ -782,6 +825,7 @@ The `protocol-migrate` command accepts a set of protocol IDs for an explicit sig │ - Verify protocol(s) exists in registry │ │ - Verify migration_status = 'classification_success' │ │ - Set migration_status = 'backfilling_in_progress' │ +│ - Read oldest_ledger_cursor from ingest_store (retention window start) │ └────────────────────────────────────────────────────────────────────────────┘ │ ▼ @@ -791,13 +835,16 @@ The `protocol-migrate` command accepts a set of protocol IDs for an explicit sig │ For ledger = start-ledger to end-ledger: │ │ - Fetch ledger data (from archive or RPC) │ │ - Run processor to find protocol operations │ -│ - Produce state changes / current state │ +│ - Generate state changes, update current state running totals │ +│ - If ledger >= retention window start: persist state changes │ +│ - Otherwise: discard state changes (totals already updated) │ └────────────────────────────────────────────────────────────────────────────┘ │ ▼ ┌────────────────────────────────────────────────────────────────────────────┐ │ 3. COMPLETE │ ├────────────────────────────────────────────────────────────────────────────┤ +│ - Write final current state │ │ - Set migration_status = 'backfilling_success' │ │ - Current state APIs now serve this protocol's data │ └────────────────────────────────────────────────────────────────────────────┘ @@ -875,7 +922,9 @@ The solution uses a **streaming ordered commit** pattern: │ 1. Creates isolated LedgerBackend │ │ 2. Creates isolated BatchBuffer │ │ 3. Processes ledgers sequentially within batch │ -│ 4. Sends BatchResult to results channel │ +│ 4. Generates state changes, updates current state running totals │ +│ 5. Filters state changes based on retention window │ +│ 6. Sends BatchResult to results channel │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ │ ▼ ▼ ▼ ▼ ▼ From 882ddc4802c39cfa0f8631c1b0dec5d94a26fa09 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 09:28:48 -0700 Subject: [PATCH 06/52] updates live ingestion state production diagram to better reflect the relationship between classification and state production --- docs/feature-design/data-migrations.md | 53 +++++++++++++------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index f3ca6a720..bb1824728 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -660,9 +660,9 @@ RETENTION WINDOW HANDLING: ### Live State Production -During live ingestion, two related but distinct processes run: +During live ingestion, two related but distinct processes run sequentially: 1. **Classification** - Identifies and classifies new contracts as they are deployed -2. **State Production** - Produces protocol-specific state using registered processors +2. **State Production** - Produces protocol-specific state using registered processors (depends on classification) ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -674,31 +674,32 @@ During live ingestion, two related but distinct processes run: │ (from RPC) │ └────────┬─────────┘ │ - ┌────────────────────┴────────────────────┐ - │ │ - ▼ ▼ -┌─────────────────────────────────────┐ ┌─────────────────────────────────────┐ -│ 1. CLASSIFICATION │ │ 2. STATE PRODUCTION │ -│ │ │ │ -│ Watch for ContractCode uploads │ │ Run protocol processors on │ -│ and ContractData Instance changes │ │ transactions in ledger │ -└─────────────────┬───────────────────┘ └─────────────────┬───────────────────┘ - │ │ - ▼ ▼ -┌─────────────────────────────────────┐ ┌─────────────────────────────────────┐ -│ For each ledger entry change: │ │ For each protocol processor: │ -│ ┌───────────────────────────────┐ │ │ ┌───────────────────────────────┐ │ -│ │ ContractCode: validate WASM, │ │ │ │ Processor.Process(ledger) │ │ -│ │ store in known_wasms │ │ │ │ │ │ -│ │ ContractData Instance: lookup │ │ │ │ - Examines transactions │ │ -│ │ hash in known_wasms, map │ │ │ │ - Produces protocol-specific │ │ -│ │ to protocol_contracts │ │ │ │ state changes │ │ -│ └───────────────────────────────┘ │ │ └───────────────────────────────┘ │ -└─────────────────┬───────────────────┘ └─────────────────┬───────────────────┘ - │ │ - └────────────────────┬────────────────────┘ - │ ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 1. CLASSIFICATION │ +│ │ +│ Process ledger entry changes to classify contracts: │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ ContractCode entries: validate WASM, store in known_wasms │ │ +│ │ ContractData Instance entries: lookup hash in known_wasms, │ │ +│ │ map contract to protocol_contracts │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────┬───────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 2. STATE PRODUCTION │ +│ │ +│ Run protocol processors on transactions (using updated classifications): │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ For each protocol processor: │ │ +│ │ Processor.Process(ledger) │ │ +│ │ - Examines transactions involving protocol contracts │ │ +│ │ - Produces protocol-specific state changes │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────┬───────────────────────────────────────┘ + │ + ▼ ┌──────────────────────────┐ │ PersistLedgerData() │ │ (single DB transaction) │ From f9365f1f23fba3925a9e0238dd94173fb34c26e1 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Feb 2026 09:34:34 -0700 Subject: [PATCH 07/52] removes migration status checks from current state queries, exposes migration status in the API for protocols --- docs/feature-design/data-migrations.md | 59 +++++++++----------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index bb1824728..f36790c92 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -1014,7 +1014,7 @@ type OperationProtocol { type Protocol { id: String! - displayName: String! + migrationStatus: String! } ``` @@ -1022,25 +1022,9 @@ type Protocol { Some migrations will write to new tables that will represent the current state produced by a protocol in relation to accounts. An example of this is SEP-50 Collectibles, where we will track collectible mints/transfers in order to maintain a table of collectibles owned by accounts. -Current state APIs should use `protocols.migration_status` in order to reject queries before the migration for that data type has completed. Some protocols require a complete view of the protocols history in order to correctly represent current state. +The API exposes `Protocol.migrationStatus` so clients can check whether a protocol's migration is complete before querying current state data. This pushes the responsibility to clients, keeping queries cleaner and faster. -Example error for in-progress migration: - -```json -{ - "errors": [ - { - "message": "BLEND protocol data is being migrated; please try again later", - "extensions": { - "code": "PROTOCOL_NOT_READY", - "protocol": "BLEND", - "migration_status": "backfilling_in_progress" - } - } - ], - "data": null -} -``` +**Client responsibility**: Clients should check `migrationStatus = 'backfilling_success'` before relying on current state data. Clients that query current state during an in-progress migration may receive incomplete data. The `Operation.protocols` field exposes which protocols were involved in an operation. The query path uses existing tables without requiring a dedicated mapping table: @@ -1056,7 +1040,7 @@ GraphQL Query: │ operation(id: "12345") { │ │ id │ │ protocols { │ -│ protocol { id, displayName } │ +│ protocol { id, migrationStatus } │ │ contractId │ │ } │ │ } │ @@ -1150,25 +1134,24 @@ QUERY COST BREAKDOWN (per operation): ### Client Handling of Migration Status -During migration, historical data may be partially enriched. Clients can: +The API exposes `Protocol.migrationStatus` to allow clients to handle in-progress migrations appropriately. + +**For historical data** (partially enriched during migration): 1. **Accept partial data**: Display enriched data where available -2. **Wait for completion**: Check `protocols.migration_status` and defer display until `'backfilling_success'` - -For current state APIs, queries should return an error if the protocol migration is not complete: - -```json -{ - "errors": [ - { - "message": "BLEND protocol data is being migrated; please try again later", - "extensions": { - "code": "PROTOCOL_NOT_READY", - "protocol": "BLEND", - "migration_status": "backfilling_in_progress" - } - } - ], - "data": null +2. **Wait for completion**: Check `Protocol.migrationStatus` and defer display until `'backfilling_success'` + +**For current state data**: + +Clients should check `Protocol.migrationStatus = 'backfilling_success'` before relying on current state queries. Current state may be incomplete or inaccurate while migration is in progress. + +Example query to check migration status: + +```graphql +query { + protocols { + id + migrationStatus + } } ``` \ No newline at end of file From 2c6669f1b32f6e8181dbd214f57cf65ce53fb82f Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Feb 2026 16:54:12 -0700 Subject: [PATCH 08/52] Extract checkpoint population into dedicated services, add known_wasms tracking - Add known_wasms table (migration, model, mock, and data layer tests) for tracking WASM hashes during checkpoint population - Add KnownWasm field to Models struct - Create WasmIngestionService (wasm_ingestion.go) that runs protocol validators against WASM bytecode and batch-persists hashes to known_wasms - Create CheckpointService (checkpoint.go) that orchestrates single-pass checkpoint population, delegating ContractCode entries to both WasmIngestionService and TokenProcessor, and all other entries to TokenProcessor - Extract readerFactory on checkpointService for injectable checkpoint reader creation - Extract TokenProcessor interface and NewTokenProcessor from TokenIngestionService, moving checkpoint iteration logic out of token_ingestion.go into checkpoint.go - Remove db, archive, and PopulateAccountTokens from TokenIngestionService interface and struct - Remove dbPool parameter from NewTokenIngestionServiceForLoadtest - Wire CheckpointService into IngestServiceConfig and ingestService - Update ingest_live.go to call checkpointService.PopulateFromCheckpoint instead of tokenIngestionService.PopulateAccountTokens - Update ingest.go setupDeps to construct WasmIngestionService and CheckpointService - Add ContractValidatorMock, ProtocolValidatorMock, ChangeReaderMock, CheckpointServiceMock, WasmIngestionServiceMock, TokenProcessorMock, and TokenIngestionServiceMock updates to mocks.go - Add unit tests for WasmIngestionService (10 cases covering ProcessContractCode and PersistKnownWasms) - Add unit tests for CheckpointService (16 cases covering entry routing, error propagation, and context cancellation) --- internal/data/known_wasms.go | 67 ++++++ internal/data/known_wasms_test.go | 130 ++++++++++++ internal/data/mocks.go | 34 ++-- .../migrations/2026-02-20.0-known_wasms.sql | 9 + internal/services/wasm_ingestion.go | 85 ++++++++ internal/services/wasm_ingestion_test.go | 192 ++++++++++++++++++ 6 files changed, 505 insertions(+), 12 deletions(-) create mode 100644 internal/data/known_wasms.go create mode 100644 internal/data/known_wasms_test.go create mode 100644 internal/db/migrations/2026-02-20.0-known_wasms.sql create mode 100644 internal/services/wasm_ingestion.go create mode 100644 internal/services/wasm_ingestion_test.go diff --git a/internal/data/known_wasms.go b/internal/data/known_wasms.go new file mode 100644 index 000000000..c17366826 --- /dev/null +++ b/internal/data/known_wasms.go @@ -0,0 +1,67 @@ +package data + +import ( + "context" + "fmt" + "time" + + "github.com/jackc/pgx/v5" + + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/metrics" + "github.com/stellar/wallet-backend/internal/utils" +) + +// KnownWasm represents a WASM hash tracked during checkpoint population. +type KnownWasm struct { + WasmHash string `db:"wasm_hash"` + ProtocolID *string `db:"protocol_id"` + CreatedAt time.Time `db:"created_at"` +} + +// KnownWasmModelInterface defines the interface for known_wasms operations. +type KnownWasmModelInterface interface { + BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error +} + +// KnownWasmModel implements KnownWasmModelInterface. +type KnownWasmModel struct { + DB db.ConnectionPool + MetricsService metrics.MetricsService +} + +var _ KnownWasmModelInterface = (*KnownWasmModel)(nil) + +// BatchInsert inserts multiple known WASMs using UNNEST for efficient batch insertion. +// Uses ON CONFLICT (wasm_hash) DO NOTHING for idempotent operations. +func (m *KnownWasmModel) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error { + if len(wasms) == 0 { + return nil + } + + wasmHashes := make([]string, len(wasms)) + protocolIDs := make([]*string, len(wasms)) + + for i, w := range wasms { + wasmHashes[i] = w.WasmHash + protocolIDs[i] = w.ProtocolID + } + + const query = ` + INSERT INTO known_wasms (wasm_hash, protocol_id) + SELECT * FROM UNNEST($1::text[], $2::text[]) + ON CONFLICT (wasm_hash) DO NOTHING + ` + + start := time.Now() + _, err := dbTx.Exec(ctx, query, wasmHashes, protocolIDs) + if err != nil { + m.MetricsService.IncDBQueryError("BatchInsert", "known_wasms", utils.GetDBErrorType(err)) + return fmt.Errorf("batch inserting known wasms: %w", err) + } + + m.MetricsService.ObserveDBQueryDuration("BatchInsert", "known_wasms", time.Since(start).Seconds()) + m.MetricsService.ObserveDBBatchSize("BatchInsert", "known_wasms", len(wasms)) + m.MetricsService.IncDBQuery("BatchInsert", "known_wasms") + return nil +} diff --git a/internal/data/known_wasms_test.go b/internal/data/known_wasms_test.go new file mode 100644 index 000000000..618390f0a --- /dev/null +++ b/internal/data/known_wasms_test.go @@ -0,0 +1,130 @@ +package data + +import ( + "context" + "testing" + + "github.com/jackc/pgx/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/db/dbtest" + "github.com/stellar/wallet-backend/internal/metrics" +) + +func TestKnownWasmBatchInsert(t *testing.T) { + ctx := context.Background() + + dbt := dbtest.Open(t) + defer dbt.Close() + dbConnectionPool, err := db.OpenDBConnectionPool(dbt.DSN) + require.NoError(t, err) + defer dbConnectionPool.Close() + + cleanUpDB := func() { + _, err = dbConnectionPool.ExecContext(ctx, `DELETE FROM known_wasms`) + require.NoError(t, err) + } + + t.Run("empty input returns no error", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{}) + }) + assert.NoError(t, err) + }) + + t.Run("single insert", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "abc123def456", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + // Verify the insert + var count int + err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'abc123def456'`) + require.NoError(t, err) + assert.Equal(t, 1, count) + }) + + t.Run("multiple inserts", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + protocolID := "test-protocol" + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "hash1", ProtocolID: nil}, + {WasmHash: "hash2", ProtocolID: &protocolID}, + {WasmHash: "hash3", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + var count int + err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms`) + require.NoError(t, err) + assert.Equal(t, 3, count) + + // Verify protocol_id was stored correctly + var storedProtocolID *string + err = dbConnectionPool.GetContext(ctx, &storedProtocolID, `SELECT protocol_id FROM known_wasms WHERE wasm_hash = 'hash2'`) + require.NoError(t, err) + require.NotNil(t, storedProtocolID) + assert.Equal(t, "test-protocol", *storedProtocolID) + }) + + t.Run("duplicate inserts are idempotent", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + + // First insert + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "duplicate_hash", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + // Second insert with same hash - should not error + err = db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "duplicate_hash", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + // Verify only one row + var count int + err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'duplicate_hash'`) + require.NoError(t, err) + assert.Equal(t, 1, count) + }) +} diff --git a/internal/data/mocks.go b/internal/data/mocks.go index c5b14eadd..335c4b10a 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -300,18 +300,28 @@ func (m *ProtocolContractsModelMock) BatchInsert(ctx context.Context, dbTx pgx.T return args.Error(0) } -func (m *ProtocolContractsModelMock) GetByProtocolID(ctx context.Context, protocolID string) ([]ProtocolContracts, error) { - args := m.Called(ctx, protocolID) - if args.Get(0) == nil { - return nil, args.Error(1) - } - return args.Get(0).([]ProtocolContracts), args.Error(1) +// KnownWasmModelMock is a mock implementation of KnownWasmModelInterface. +type KnownWasmModelMock struct { + mock.Mock } -func (m *ProtocolContractsModelMock) BatchGetByProtocolIDs(ctx context.Context, protocolIDs []string) (map[string][]ProtocolContracts, error) { - args := m.Called(ctx, protocolIDs) - if args.Get(0) == nil { - return nil, args.Error(1) - } - return args.Get(0).(map[string][]ProtocolContracts), args.Error(1) +var _ KnownWasmModelInterface = (*KnownWasmModelMock)(nil) + +// NewKnownWasmModelMock creates a new instance of KnownWasmModelMock. +func NewKnownWasmModelMock(t interface { + mock.TestingT + Cleanup(func()) +}, +) *KnownWasmModelMock { + mockModel := &KnownWasmModelMock{} + mockModel.Mock.Test(t) + + t.Cleanup(func() { mockModel.AssertExpectations(t) }) + + return mockModel +} + +func (m *KnownWasmModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error { + args := m.Called(ctx, dbTx, wasms) + return args.Error(0) } diff --git a/internal/db/migrations/2026-02-20.0-known_wasms.sql b/internal/db/migrations/2026-02-20.0-known_wasms.sql new file mode 100644 index 000000000..2e1415ef8 --- /dev/null +++ b/internal/db/migrations/2026-02-20.0-known_wasms.sql @@ -0,0 +1,9 @@ +-- +migrate Up +CREATE TABLE known_wasms ( + wasm_hash TEXT PRIMARY KEY, + protocol_id TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- +migrate Down +DROP TABLE IF EXISTS known_wasms; diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go new file mode 100644 index 000000000..e1e7cd41c --- /dev/null +++ b/internal/services/wasm_ingestion.go @@ -0,0 +1,85 @@ +package services + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v5" + "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/wallet-backend/internal/data" +) + +// ProtocolValidator validates WASM bytecode against a specific protocol. +type ProtocolValidator interface { + ProtocolID() string + Validate(ctx context.Context, wasmCode []byte) (bool, error) +} + +// WasmIngestionService tracks and persists WASM hashes during checkpoint population. +type WasmIngestionService interface { + ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error + PersistKnownWasms(ctx context.Context, dbTx pgx.Tx) error +} + +var _ WasmIngestionService = (*wasmIngestionService)(nil) + +type wasmIngestionService struct { + validators []ProtocolValidator + knownWasmModel data.KnownWasmModelInterface + wasmHashes map[xdr.Hash]struct{} +} + +// NewWasmIngestionService creates a WasmIngestionService. +func NewWasmIngestionService( + knownWasmModel data.KnownWasmModelInterface, + validators ...ProtocolValidator, +) WasmIngestionService { + return &wasmIngestionService{ + validators: validators, + knownWasmModel: knownWasmModel, + wasmHashes: make(map[xdr.Hash]struct{}), + } +} + +// ProcessContractCode runs protocol validators against the WASM and tracks the hash. +func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error { + // Run all registered validators + for _, v := range s.validators { + matched, err := v.Validate(ctx, wasmCode) + if err != nil { + log.Ctx(ctx).Warnf("protocol validator %s error for hash %s: %v", v.ProtocolID(), wasmHash.HexString(), err) + continue + } + if matched { + log.Ctx(ctx).Infof("WASM %s matched protocol %s", wasmHash.HexString(), v.ProtocolID()) + } + } + + // Track hash for later persistence + s.wasmHashes[wasmHash] = struct{}{} + return nil +} + +// PersistKnownWasms writes all accumulated WASM hashes to the known_wasms table. +func (s *wasmIngestionService) PersistKnownWasms(ctx context.Context, dbTx pgx.Tx) error { + if len(s.wasmHashes) == 0 { + return nil + } + + wasms := make([]data.KnownWasm, 0, len(s.wasmHashes)) + for hash := range s.wasmHashes { + wasms = append(wasms, data.KnownWasm{ + WasmHash: hash.HexString(), + ProtocolID: nil, // No validators matched for now + }) + } + + if err := s.knownWasmModel.BatchInsert(ctx, dbTx, wasms); err != nil { + return fmt.Errorf("persisting known wasms: %w", err) + } + + log.Ctx(ctx).Infof("Persisted %d known WASM hashes", len(wasms)) + return nil +} diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go new file mode 100644 index 000000000..1970a014b --- /dev/null +++ b/internal/services/wasm_ingestion_test.go @@ -0,0 +1,192 @@ +package services + +import ( + "context" + "errors" + "testing" + + "github.com/stellar/go-stellar-sdk/xdr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/stellar/wallet-backend/internal/data" +) + +func TestWasmIngestionService_ProcessContractCode(t *testing.T) { + ctx := context.Background() + hash := xdr.Hash{1, 2, 3} + code := []byte{0xDE, 0xAD} + + t.Run("no_validators_tracks_hash", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + + err := svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + + _, tracked := svc.wasmHashes[hash] + assert.True(t, tracked, "hash should be tracked") + }) + + t.Run("validator_match", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + validatorMock := NewProtocolValidatorMock(t) + validatorMock.On("Validate", mock.Anything, code).Return(true, nil).Once() + validatorMock.On("ProtocolID").Return("test-protocol").Once() + + svc := NewWasmIngestionService(knownWasmModelMock, validatorMock).(*wasmIngestionService) + + err := svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + + _, tracked := svc.wasmHashes[hash] + assert.True(t, tracked) + }) + + t.Run("validator_no_match", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + validatorMock := NewProtocolValidatorMock(t) + validatorMock.On("Validate", mock.Anything, code).Return(false, nil).Once() + + svc := NewWasmIngestionService(knownWasmModelMock, validatorMock).(*wasmIngestionService) + + err := svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + + _, tracked := svc.wasmHashes[hash] + assert.True(t, tracked, "hash should still be tracked even without match") + }) + + t.Run("validator_error_continues", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + validatorMock := NewProtocolValidatorMock(t) + validatorMock.On("Validate", mock.Anything, code).Return(false, errors.New("validation failed")).Once() + validatorMock.On("ProtocolID").Return("test-protocol").Once() + + svc := NewWasmIngestionService(knownWasmModelMock, validatorMock).(*wasmIngestionService) + + err := svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err, "validator error should not propagate") + + _, tracked := svc.wasmHashes[hash] + assert.True(t, tracked, "hash should still be tracked despite validator error") + }) + + t.Run("multiple_validators_all_run", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + v1 := NewProtocolValidatorMock(t) + v1.On("Validate", mock.Anything, code).Return(true, nil).Once() + v1.On("ProtocolID").Return("protocol-1").Once() + + v2 := NewProtocolValidatorMock(t) + v2.On("Validate", mock.Anything, code).Return(true, nil).Once() + v2.On("ProtocolID").Return("protocol-2").Once() + + svc := NewWasmIngestionService(knownWasmModelMock, v1, v2).(*wasmIngestionService) + + err := svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + // Both validator expectations are asserted via t.Cleanup + }) + + t.Run("duplicate_hash_deduplicated", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + + err := svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + + err = svc.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + + assert.Len(t, svc.wasmHashes, 1, "duplicate hash should be deduplicated") + + // Verify PersistKnownWasms produces 1 entry + knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(wasms []data.KnownWasm) bool { + return len(wasms) == 1 + }), + ).Return(nil).Once() + + err = svc.PersistKnownWasms(ctx, nil) + require.NoError(t, err) + }) +} + +func TestWasmIngestionService_PersistKnownWasms(t *testing.T) { + ctx := context.Background() + + t.Run("no_hashes_skips_insert", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + svc := NewWasmIngestionService(knownWasmModelMock) + + err := svc.PersistKnownWasms(ctx, nil) + require.NoError(t, err) + knownWasmModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) + }) + + t.Run("single_hash_persisted", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + hash := xdr.Hash{10, 20, 30} + + knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(wasms []data.KnownWasm) bool { + if len(wasms) != 1 { + return false + } + return wasms[0].WasmHash == hash.HexString() && wasms[0].ProtocolID == nil + }), + ).Return(nil).Once() + + svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + err := svc.ProcessContractCode(ctx, hash, []byte{0x01}) + require.NoError(t, err) + + err = svc.PersistKnownWasms(ctx, nil) + require.NoError(t, err) + }) + + t.Run("multiple_hashes_persisted", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + hash1 := xdr.Hash{1} + hash2 := xdr.Hash{2} + + knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(wasms []data.KnownWasm) bool { + if len(wasms) != 2 { + return false + } + hashes := make(map[string]bool) + for _, w := range wasms { + hashes[w.WasmHash] = true + } + return hashes[hash1.HexString()] && hashes[hash2.HexString()] + }), + ).Return(nil).Once() + + svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + require.NoError(t, svc.ProcessContractCode(ctx, hash1, []byte{0x01})) + require.NoError(t, svc.ProcessContractCode(ctx, hash2, []byte{0x02})) + + err := svc.PersistKnownWasms(ctx, nil) + require.NoError(t, err) + }) + + t.Run("batch_insert_error_propagated", func(t *testing.T) { + knownWasmModelMock := data.NewKnownWasmModelMock(t) + hash := xdr.Hash{99} + insertErr := errors.New("db connection lost") + + knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). + Return(insertErr).Once() + + svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + require.NoError(t, svc.ProcessContractCode(ctx, hash, []byte{0x01})) + + err := svc.PersistKnownWasms(ctx, nil) + require.Error(t, err) + assert.ErrorContains(t, err, "persisting known wasms") + assert.ErrorIs(t, err, insertErr) + }) +} From 89db54926bee747231eac15eabd928b545c44c91 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 26 Feb 2026 15:19:00 -0700 Subject: [PATCH 09/52] renames known_wasms to protocol_wasms --- internal/data/known_wasms.go | 67 --------- internal/data/known_wasms_test.go | 130 ------------------ internal/data/mocks.go | 16 +-- ...ms.sql => 2026-02-20.0-protocol_wasms.sql} | 4 +- internal/services/wasm_ingestion.go | 30 ++-- internal/services/wasm_ingestion_test.go | 72 +++++----- 6 files changed, 61 insertions(+), 258 deletions(-) delete mode 100644 internal/data/known_wasms.go delete mode 100644 internal/data/known_wasms_test.go rename internal/db/migrations/{2026-02-20.0-known_wasms.sql => 2026-02-20.0-protocol_wasms.sql} (67%) diff --git a/internal/data/known_wasms.go b/internal/data/known_wasms.go deleted file mode 100644 index c17366826..000000000 --- a/internal/data/known_wasms.go +++ /dev/null @@ -1,67 +0,0 @@ -package data - -import ( - "context" - "fmt" - "time" - - "github.com/jackc/pgx/v5" - - "github.com/stellar/wallet-backend/internal/db" - "github.com/stellar/wallet-backend/internal/metrics" - "github.com/stellar/wallet-backend/internal/utils" -) - -// KnownWasm represents a WASM hash tracked during checkpoint population. -type KnownWasm struct { - WasmHash string `db:"wasm_hash"` - ProtocolID *string `db:"protocol_id"` - CreatedAt time.Time `db:"created_at"` -} - -// KnownWasmModelInterface defines the interface for known_wasms operations. -type KnownWasmModelInterface interface { - BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error -} - -// KnownWasmModel implements KnownWasmModelInterface. -type KnownWasmModel struct { - DB db.ConnectionPool - MetricsService metrics.MetricsService -} - -var _ KnownWasmModelInterface = (*KnownWasmModel)(nil) - -// BatchInsert inserts multiple known WASMs using UNNEST for efficient batch insertion. -// Uses ON CONFLICT (wasm_hash) DO NOTHING for idempotent operations. -func (m *KnownWasmModel) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error { - if len(wasms) == 0 { - return nil - } - - wasmHashes := make([]string, len(wasms)) - protocolIDs := make([]*string, len(wasms)) - - for i, w := range wasms { - wasmHashes[i] = w.WasmHash - protocolIDs[i] = w.ProtocolID - } - - const query = ` - INSERT INTO known_wasms (wasm_hash, protocol_id) - SELECT * FROM UNNEST($1::text[], $2::text[]) - ON CONFLICT (wasm_hash) DO NOTHING - ` - - start := time.Now() - _, err := dbTx.Exec(ctx, query, wasmHashes, protocolIDs) - if err != nil { - m.MetricsService.IncDBQueryError("BatchInsert", "known_wasms", utils.GetDBErrorType(err)) - return fmt.Errorf("batch inserting known wasms: %w", err) - } - - m.MetricsService.ObserveDBQueryDuration("BatchInsert", "known_wasms", time.Since(start).Seconds()) - m.MetricsService.ObserveDBBatchSize("BatchInsert", "known_wasms", len(wasms)) - m.MetricsService.IncDBQuery("BatchInsert", "known_wasms") - return nil -} diff --git a/internal/data/known_wasms_test.go b/internal/data/known_wasms_test.go deleted file mode 100644 index 618390f0a..000000000 --- a/internal/data/known_wasms_test.go +++ /dev/null @@ -1,130 +0,0 @@ -package data - -import ( - "context" - "testing" - - "github.com/jackc/pgx/v5" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/stellar/wallet-backend/internal/db" - "github.com/stellar/wallet-backend/internal/db/dbtest" - "github.com/stellar/wallet-backend/internal/metrics" -) - -func TestKnownWasmBatchInsert(t *testing.T) { - ctx := context.Background() - - dbt := dbtest.Open(t) - defer dbt.Close() - dbConnectionPool, err := db.OpenDBConnectionPool(dbt.DSN) - require.NoError(t, err) - defer dbConnectionPool.Close() - - cleanUpDB := func() { - _, err = dbConnectionPool.ExecContext(ctx, `DELETE FROM known_wasms`) - require.NoError(t, err) - } - - t.Run("empty input returns no error", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{}) - }) - assert.NoError(t, err) - }) - - t.Run("single insert", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "abc123def456", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - // Verify the insert - var count int - err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'abc123def456'`) - require.NoError(t, err) - assert.Equal(t, 1, count) - }) - - t.Run("multiple inserts", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - protocolID := "test-protocol" - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "hash1", ProtocolID: nil}, - {WasmHash: "hash2", ProtocolID: &protocolID}, - {WasmHash: "hash3", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - var count int - err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms`) - require.NoError(t, err) - assert.Equal(t, 3, count) - - // Verify protocol_id was stored correctly - var storedProtocolID *string - err = dbConnectionPool.GetContext(ctx, &storedProtocolID, `SELECT protocol_id FROM known_wasms WHERE wasm_hash = 'hash2'`) - require.NoError(t, err) - require.NotNil(t, storedProtocolID) - assert.Equal(t, "test-protocol", *storedProtocolID) - }) - - t.Run("duplicate inserts are idempotent", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - - // First insert - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "duplicate_hash", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - // Second insert with same hash - should not error - err = db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "duplicate_hash", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - // Verify only one row - var count int - err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'duplicate_hash'`) - require.NoError(t, err) - assert.Equal(t, 1, count) - }) -} diff --git a/internal/data/mocks.go b/internal/data/mocks.go index 335c4b10a..e26cfbcfc 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -300,20 +300,20 @@ func (m *ProtocolContractsModelMock) BatchInsert(ctx context.Context, dbTx pgx.T return args.Error(0) } -// KnownWasmModelMock is a mock implementation of KnownWasmModelInterface. -type KnownWasmModelMock struct { +// ProtocolWasmModelMock is a mock implementation of ProtocolWasmModelInterface. +type ProtocolWasmModelMock struct { mock.Mock } -var _ KnownWasmModelInterface = (*KnownWasmModelMock)(nil) +var _ ProtocolWasmModelInterface = (*ProtocolWasmModelMock)(nil) -// NewKnownWasmModelMock creates a new instance of KnownWasmModelMock. -func NewKnownWasmModelMock(t interface { +// NewProtocolWasmModelMock creates a new instance of ProtocolWasmModelMock. +func NewProtocolWasmModelMock(t interface { mock.TestingT Cleanup(func()) }, -) *KnownWasmModelMock { - mockModel := &KnownWasmModelMock{} +) *ProtocolWasmModelMock { + mockModel := &ProtocolWasmModelMock{} mockModel.Mock.Test(t) t.Cleanup(func() { mockModel.AssertExpectations(t) }) @@ -321,7 +321,7 @@ func NewKnownWasmModelMock(t interface { return mockModel } -func (m *KnownWasmModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error { +func (m *ProtocolWasmModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []ProtocolWasm) error { args := m.Called(ctx, dbTx, wasms) return args.Error(0) } diff --git a/internal/db/migrations/2026-02-20.0-known_wasms.sql b/internal/db/migrations/2026-02-20.0-protocol_wasms.sql similarity index 67% rename from internal/db/migrations/2026-02-20.0-known_wasms.sql rename to internal/db/migrations/2026-02-20.0-protocol_wasms.sql index 2e1415ef8..d63e2f0c2 100644 --- a/internal/db/migrations/2026-02-20.0-known_wasms.sql +++ b/internal/db/migrations/2026-02-20.0-protocol_wasms.sql @@ -1,9 +1,9 @@ -- +migrate Up -CREATE TABLE known_wasms ( +CREATE TABLE protocol_wasms ( wasm_hash TEXT PRIMARY KEY, protocol_id TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- +migrate Down -DROP TABLE IF EXISTS known_wasms; +DROP TABLE IF EXISTS protocol_wasms; diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index e1e7cd41c..fed658a72 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -20,26 +20,26 @@ type ProtocolValidator interface { // WasmIngestionService tracks and persists WASM hashes during checkpoint population. type WasmIngestionService interface { ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error - PersistKnownWasms(ctx context.Context, dbTx pgx.Tx) error + PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error } var _ WasmIngestionService = (*wasmIngestionService)(nil) type wasmIngestionService struct { - validators []ProtocolValidator - knownWasmModel data.KnownWasmModelInterface - wasmHashes map[xdr.Hash]struct{} + validators []ProtocolValidator + protocolWasmModel data.ProtocolWasmModelInterface + wasmHashes map[xdr.Hash]struct{} } // NewWasmIngestionService creates a WasmIngestionService. func NewWasmIngestionService( - knownWasmModel data.KnownWasmModelInterface, + protocolWasmModel data.ProtocolWasmModelInterface, validators ...ProtocolValidator, ) WasmIngestionService { return &wasmIngestionService{ - validators: validators, - knownWasmModel: knownWasmModel, - wasmHashes: make(map[xdr.Hash]struct{}), + validators: validators, + protocolWasmModel: protocolWasmModel, + wasmHashes: make(map[xdr.Hash]struct{}), } } @@ -62,24 +62,24 @@ func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash return nil } -// PersistKnownWasms writes all accumulated WASM hashes to the known_wasms table. -func (s *wasmIngestionService) PersistKnownWasms(ctx context.Context, dbTx pgx.Tx) error { +// PersistProtocolWasms writes all accumulated WASM hashes to the protocol_wasms table. +func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error { if len(s.wasmHashes) == 0 { return nil } - wasms := make([]data.KnownWasm, 0, len(s.wasmHashes)) + wasms := make([]data.ProtocolWasm, 0, len(s.wasmHashes)) for hash := range s.wasmHashes { - wasms = append(wasms, data.KnownWasm{ + wasms = append(wasms, data.ProtocolWasm{ WasmHash: hash.HexString(), ProtocolID: nil, // No validators matched for now }) } - if err := s.knownWasmModel.BatchInsert(ctx, dbTx, wasms); err != nil { - return fmt.Errorf("persisting known wasms: %w", err) + if err := s.protocolWasmModel.BatchInsert(ctx, dbTx, wasms); err != nil { + return fmt.Errorf("persisting protocol wasms: %w", err) } - log.Ctx(ctx).Infof("Persisted %d known WASM hashes", len(wasms)) + log.Ctx(ctx).Infof("Persisted %d protocol WASM hashes", len(wasms)) return nil } diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index 1970a014b..e5b8433a4 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -19,8 +19,8 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { code := []byte{0xDE, 0xAD} t.Run("no_validators_tracks_hash", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) - svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, code) require.NoError(t, err) @@ -30,12 +30,12 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { }) t.Run("validator_match", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) validatorMock := NewProtocolValidatorMock(t) validatorMock.On("Validate", mock.Anything, code).Return(true, nil).Once() validatorMock.On("ProtocolID").Return("test-protocol").Once() - svc := NewWasmIngestionService(knownWasmModelMock, validatorMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock, validatorMock).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, code) require.NoError(t, err) @@ -45,11 +45,11 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { }) t.Run("validator_no_match", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) validatorMock := NewProtocolValidatorMock(t) validatorMock.On("Validate", mock.Anything, code).Return(false, nil).Once() - svc := NewWasmIngestionService(knownWasmModelMock, validatorMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock, validatorMock).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, code) require.NoError(t, err) @@ -59,12 +59,12 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { }) t.Run("validator_error_continues", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) validatorMock := NewProtocolValidatorMock(t) validatorMock.On("Validate", mock.Anything, code).Return(false, errors.New("validation failed")).Once() validatorMock.On("ProtocolID").Return("test-protocol").Once() - svc := NewWasmIngestionService(knownWasmModelMock, validatorMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock, validatorMock).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, code) require.NoError(t, err, "validator error should not propagate") @@ -74,7 +74,7 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { }) t.Run("multiple_validators_all_run", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) v1 := NewProtocolValidatorMock(t) v1.On("Validate", mock.Anything, code).Return(true, nil).Once() v1.On("ProtocolID").Return("protocol-1").Once() @@ -83,7 +83,7 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { v2.On("Validate", mock.Anything, code).Return(true, nil).Once() v2.On("ProtocolID").Return("protocol-2").Once() - svc := NewWasmIngestionService(knownWasmModelMock, v1, v2).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock, v1, v2).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, code) require.NoError(t, err) @@ -91,8 +91,8 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { }) t.Run("duplicate_hash_deduplicated", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) - svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, code) require.NoError(t, err) @@ -102,36 +102,36 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { assert.Len(t, svc.wasmHashes, 1, "duplicate hash should be deduplicated") - // Verify PersistKnownWasms produces 1 entry - knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(wasms []data.KnownWasm) bool { + // Verify PersistProtocolWasms produces 1 entry + protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(wasms []data.ProtocolWasm) bool { return len(wasms) == 1 }), ).Return(nil).Once() - err = svc.PersistKnownWasms(ctx, nil) + err = svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) }) } -func TestWasmIngestionService_PersistKnownWasms(t *testing.T) { +func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { ctx := context.Background() t.Run("no_hashes_skips_insert", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) - svc := NewWasmIngestionService(knownWasmModelMock) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock) - err := svc.PersistKnownWasms(ctx, nil) + err := svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) - knownWasmModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) + protocolWasmModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) }) t.Run("single_hash_persisted", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) hash := xdr.Hash{10, 20, 30} - knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(wasms []data.KnownWasm) bool { + protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(wasms []data.ProtocolWasm) bool { if len(wasms) != 1 { return false } @@ -139,21 +139,21 @@ func TestWasmIngestionService_PersistKnownWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) err := svc.ProcessContractCode(ctx, hash, []byte{0x01}) require.NoError(t, err) - err = svc.PersistKnownWasms(ctx, nil) + err = svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) }) t.Run("multiple_hashes_persisted", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) hash1 := xdr.Hash{1} hash2 := xdr.Hash{2} - knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(wasms []data.KnownWasm) bool { + protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(wasms []data.ProtocolWasm) bool { if len(wasms) != 2 { return false } @@ -165,28 +165,28 @@ func TestWasmIngestionService_PersistKnownWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) require.NoError(t, svc.ProcessContractCode(ctx, hash1, []byte{0x01})) require.NoError(t, svc.ProcessContractCode(ctx, hash2, []byte{0x02})) - err := svc.PersistKnownWasms(ctx, nil) + err := svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) }) t.Run("batch_insert_error_propagated", func(t *testing.T) { - knownWasmModelMock := data.NewKnownWasmModelMock(t) + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) hash := xdr.Hash{99} insertErr := errors.New("db connection lost") - knownWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). + protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). Return(insertErr).Once() - svc := NewWasmIngestionService(knownWasmModelMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) require.NoError(t, svc.ProcessContractCode(ctx, hash, []byte{0x01})) - err := svc.PersistKnownWasms(ctx, nil) + err := svc.PersistProtocolWasms(ctx, nil) require.Error(t, err) - assert.ErrorContains(t, err, "persisting known wasms") + assert.ErrorContains(t, err, "persisting protocol wasms") assert.ErrorIs(t, err, insertErr) }) } From 73c4d3042c4ae9e5ef833cb4eeadb236957046fc Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 27 Feb 2026 09:13:30 -0700 Subject: [PATCH 10/52] Add unit tests for tokenProcessor.ProcessContractCode --- internal/services/token_ingestion_test.go | 73 +++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/internal/services/token_ingestion_test.go b/internal/services/token_ingestion_test.go index 5b42e2184..a150012f9 100644 --- a/internal/services/token_ingestion_test.go +++ b/internal/services/token_ingestion_test.go @@ -3,6 +3,7 @@ package services import ( "context" + "errors" "testing" "github.com/jackc/pgx/v5" @@ -379,3 +380,75 @@ func TestProcessTokenChanges(t *testing.T) { assert.NoError(t, err) }) } + +func TestTokenProcessor_ProcessContractCode(t *testing.T) { + ctx := context.Background() + + t.Run("valid_sep41_contract", func(t *testing.T) { + contractValidatorMock := NewContractValidatorMock(t) + tp := &tokenProcessor{ + contractValidator: contractValidatorMock, + data: checkpointData{ + contractTypesByWasmHash: make(map[xdr.Hash]types.ContractType), + }, + } + + hash := xdr.Hash{1, 2, 3} + code := []byte{0xDE, 0xAD} + contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code). + Return(types.ContractTypeSEP41, nil).Once() + + err := tp.ProcessContractCode(ctx, hash, code) + require.NoError(t, err) + assert.Equal(t, types.ContractTypeSEP41, tp.data.contractTypesByWasmHash[hash]) + assert.Equal(t, 1, tp.entries) + }) + + t.Run("validator_error_skips_entry", func(t *testing.T) { + contractValidatorMock := NewContractValidatorMock(t) + tp := &tokenProcessor{ + contractValidator: contractValidatorMock, + data: checkpointData{ + contractTypesByWasmHash: make(map[xdr.Hash]types.ContractType), + }, + } + + hash := xdr.Hash{4, 5, 6} + code := []byte{0xBA, 0xD0} + contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code). + Return(types.ContractTypeUnknown, errors.New("invalid WASM")).Once() + + err := tp.ProcessContractCode(ctx, hash, code) + require.NoError(t, err, "validator error should not propagate") + assert.Empty(t, tp.data.contractTypesByWasmHash, "no entry should be stored on error") + assert.Equal(t, 0, tp.entries, "entries counter should not be incremented") + }) + + t.Run("multiple_contract_codes", func(t *testing.T) { + contractValidatorMock := NewContractValidatorMock(t) + tp := &tokenProcessor{ + contractValidator: contractValidatorMock, + data: checkpointData{ + contractTypesByWasmHash: make(map[xdr.Hash]types.ContractType), + }, + } + + hash1 := xdr.Hash{10} + code1 := []byte{0x01} + hash2 := xdr.Hash{20} + code2 := []byte{0x02} + + contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code1). + Return(types.ContractTypeSEP41, nil).Once() + contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code2). + Return(types.ContractTypeSEP41, nil).Once() + + require.NoError(t, tp.ProcessContractCode(ctx, hash1, code1)) + require.NoError(t, tp.ProcessContractCode(ctx, hash2, code2)) + + assert.Len(t, tp.data.contractTypesByWasmHash, 2) + assert.Equal(t, types.ContractTypeSEP41, tp.data.contractTypesByWasmHash[hash1]) + assert.Equal(t, types.ContractTypeSEP41, tp.data.contractTypesByWasmHash[hash2]) + assert.Equal(t, 2, tp.entries) + }) +} From 25838f4aac26e0163c71b778a10a44e5d3e4eb6e Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 11:42:15 -0700 Subject: [PATCH 11/52] services/wasm_ingestion: remove ProtocolValidator execution from WasmIngestionService (#524) * Initial plan * Remove validator execution from WasmIngestionService Co-authored-by: aristidesstaffieri <6886006+aristidesstaffieri@users.noreply.github.com> * services/wasm_ingestion: remove ProtocolValidator execution from WasmIngestionService Co-authored-by: aristidesstaffieri <6886006+aristidesstaffieri@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: aristidesstaffieri <6886006+aristidesstaffieri@users.noreply.github.com> --- internal/services/wasm_ingestion.go | 26 +--------- internal/services/wasm_ingestion_test.go | 63 +----------------------- 2 files changed, 3 insertions(+), 86 deletions(-) diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index fed658a72..4a8eaac18 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -11,12 +11,6 @@ import ( "github.com/stellar/wallet-backend/internal/data" ) -// ProtocolValidator validates WASM bytecode against a specific protocol. -type ProtocolValidator interface { - ProtocolID() string - Validate(ctx context.Context, wasmCode []byte) (bool, error) -} - // WasmIngestionService tracks and persists WASM hashes during checkpoint population. type WasmIngestionService interface { ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error @@ -26,7 +20,6 @@ type WasmIngestionService interface { var _ WasmIngestionService = (*wasmIngestionService)(nil) type wasmIngestionService struct { - validators []ProtocolValidator protocolWasmModel data.ProtocolWasmModelInterface wasmHashes map[xdr.Hash]struct{} } @@ -34,30 +27,15 @@ type wasmIngestionService struct { // NewWasmIngestionService creates a WasmIngestionService. func NewWasmIngestionService( protocolWasmModel data.ProtocolWasmModelInterface, - validators ...ProtocolValidator, ) WasmIngestionService { return &wasmIngestionService{ - validators: validators, protocolWasmModel: protocolWasmModel, wasmHashes: make(map[xdr.Hash]struct{}), } } -// ProcessContractCode runs protocol validators against the WASM and tracks the hash. +// ProcessContractCode tracks the WASM hash for later persistence. func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error { - // Run all registered validators - for _, v := range s.validators { - matched, err := v.Validate(ctx, wasmCode) - if err != nil { - log.Ctx(ctx).Warnf("protocol validator %s error for hash %s: %v", v.ProtocolID(), wasmHash.HexString(), err) - continue - } - if matched { - log.Ctx(ctx).Infof("WASM %s matched protocol %s", wasmHash.HexString(), v.ProtocolID()) - } - } - - // Track hash for later persistence s.wasmHashes[wasmHash] = struct{}{} return nil } @@ -72,7 +50,7 @@ func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pg for hash := range s.wasmHashes { wasms = append(wasms, data.ProtocolWasm{ WasmHash: hash.HexString(), - ProtocolID: nil, // No validators matched for now + ProtocolID: nil, }) } diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index e5b8433a4..b4bc565a2 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -18,7 +18,7 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { hash := xdr.Hash{1, 2, 3} code := []byte{0xDE, 0xAD} - t.Run("no_validators_tracks_hash", func(t *testing.T) { + t.Run("tracks_hash", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) @@ -29,67 +29,6 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { assert.True(t, tracked, "hash should be tracked") }) - t.Run("validator_match", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - validatorMock := NewProtocolValidatorMock(t) - validatorMock.On("Validate", mock.Anything, code).Return(true, nil).Once() - validatorMock.On("ProtocolID").Return("test-protocol").Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, validatorMock).(*wasmIngestionService) - - err := svc.ProcessContractCode(ctx, hash, code) - require.NoError(t, err) - - _, tracked := svc.wasmHashes[hash] - assert.True(t, tracked) - }) - - t.Run("validator_no_match", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - validatorMock := NewProtocolValidatorMock(t) - validatorMock.On("Validate", mock.Anything, code).Return(false, nil).Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, validatorMock).(*wasmIngestionService) - - err := svc.ProcessContractCode(ctx, hash, code) - require.NoError(t, err) - - _, tracked := svc.wasmHashes[hash] - assert.True(t, tracked, "hash should still be tracked even without match") - }) - - t.Run("validator_error_continues", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - validatorMock := NewProtocolValidatorMock(t) - validatorMock.On("Validate", mock.Anything, code).Return(false, errors.New("validation failed")).Once() - validatorMock.On("ProtocolID").Return("test-protocol").Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, validatorMock).(*wasmIngestionService) - - err := svc.ProcessContractCode(ctx, hash, code) - require.NoError(t, err, "validator error should not propagate") - - _, tracked := svc.wasmHashes[hash] - assert.True(t, tracked, "hash should still be tracked despite validator error") - }) - - t.Run("multiple_validators_all_run", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - v1 := NewProtocolValidatorMock(t) - v1.On("Validate", mock.Anything, code).Return(true, nil).Once() - v1.On("ProtocolID").Return("protocol-1").Once() - - v2 := NewProtocolValidatorMock(t) - v2.On("Validate", mock.Anything, code).Return(true, nil).Once() - v2.On("ProtocolID").Return("protocol-2").Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, v1, v2).(*wasmIngestionService) - - err := svc.ProcessContractCode(ctx, hash, code) - require.NoError(t, err) - // Both validator expectations are asserted via t.Cleanup - }) - t.Run("duplicate_hash_deduplicated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) From 4e4854b097579d2947d89bb09f788938334caf70 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 9 Mar 2026 10:15:25 -0600 Subject: [PATCH 12/52] Simplify ProcessContractCode to pass only WASM hashes, refactor TokenIngestionService to use config struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WasmIngestionService.ProcessContractCode no longer receives the full bytecode—it only needs the hash to track protocol WASMs. This reduces memory pressure during checkpoint population. TokenIngestionService construction is consolidated into a single NewTokenIngestionService(config) constructor, eliminating the separate NewTokenIngestionServiceForLoadtest variant. The loadtest runner now uses the same constructor with only the fields it needs. Also refactors processContractInstanceChange to return a contractInstanceResult struct instead of multiple return values, extracts newCheckpointData() helper, uses idiomatic nil slices instead of make([]T, 0), and introduces a checkpointTestFixture struct to reduce boilerplate in checkpoint tests. Constructors return concrete types instead of interfaces to allow direct field access in tests. --- internal/services/wasm_ingestion.go | 6 +++--- internal/services/wasm_ingestion_test.go | 25 ++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index 4a8eaac18..065686996 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -13,7 +13,7 @@ import ( // WasmIngestionService tracks and persists WASM hashes during checkpoint population. type WasmIngestionService interface { - ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error + ProcessContractCode(ctx context.Context, wasmHash xdr.Hash) error PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error } @@ -27,7 +27,7 @@ type wasmIngestionService struct { // NewWasmIngestionService creates a WasmIngestionService. func NewWasmIngestionService( protocolWasmModel data.ProtocolWasmModelInterface, -) WasmIngestionService { +) *wasmIngestionService { return &wasmIngestionService{ protocolWasmModel: protocolWasmModel, wasmHashes: make(map[xdr.Hash]struct{}), @@ -35,7 +35,7 @@ func NewWasmIngestionService( } // ProcessContractCode tracks the WASM hash for later persistence. -func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash xdr.Hash, wasmCode []byte) error { +func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash xdr.Hash) error { s.wasmHashes[wasmHash] = struct{}{} return nil } diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index b4bc565a2..7d6a45b7f 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -16,13 +16,12 @@ import ( func TestWasmIngestionService_ProcessContractCode(t *testing.T) { ctx := context.Background() hash := xdr.Hash{1, 2, 3} - code := []byte{0xDE, 0xAD} t.Run("tracks_hash", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock) - err := svc.ProcessContractCode(ctx, hash, code) + err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) _, tracked := svc.wasmHashes[hash] @@ -31,12 +30,12 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { t.Run("duplicate_hash_deduplicated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) + svc := NewWasmIngestionService(protocolWasmModelMock) - err := svc.ProcessContractCode(ctx, hash, code) + err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) - err = svc.ProcessContractCode(ctx, hash, code) + err = svc.ProcessContractCode(ctx, hash) require.NoError(t, err) assert.Len(t, svc.wasmHashes, 1, "duplicate hash should be deduplicated") @@ -78,8 +77,8 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) - err := svc.ProcessContractCode(ctx, hash, []byte{0x01}) + svc := NewWasmIngestionService(protocolWasmModelMock) + err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) err = svc.PersistProtocolWasms(ctx, nil) @@ -104,9 +103,9 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) - require.NoError(t, svc.ProcessContractCode(ctx, hash1, []byte{0x01})) - require.NoError(t, svc.ProcessContractCode(ctx, hash2, []byte{0x02})) + svc := NewWasmIngestionService(protocolWasmModelMock) + require.NoError(t, svc.ProcessContractCode(ctx, hash1)) + require.NoError(t, svc.ProcessContractCode(ctx, hash2)) err := svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) @@ -120,8 +119,8 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). Return(insertErr).Once() - svc := NewWasmIngestionService(protocolWasmModelMock).(*wasmIngestionService) - require.NoError(t, svc.ProcessContractCode(ctx, hash, []byte{0x01})) + svc := NewWasmIngestionService(protocolWasmModelMock) + require.NoError(t, svc.ProcessContractCode(ctx, hash)) err := svc.PersistProtocolWasms(ctx, nil) require.Error(t, err) From bd8f7a0827d1cb37edbbe60a0f6d960b6e3b3bc1 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 9 Mar 2026 15:44:24 -0600 Subject: [PATCH 13/52] Add protocol_contracts table and populate during checkpoint Persist contract-to-WASM-hash mappings by extending WasmIngestionService with ProcessContractData and PersistProtocolContracts methods. During checkpoint population, ContractData Instance entries are parsed to extract the wasm_hash and contract_id relationship, which is stored in a new protocol_contracts table (FK to protocol_wasms). This mapping will be used by protocol-setup and live ingestion to classify contracts by protocol. --- docs/feature-design/data-migrations.md | 872 +++++++++++++----- internal/data/mocks.go | 26 + .../2026-03-09.0-protocol_contracts.sql | 13 + internal/services/wasm_ingestion.go | 74 +- internal/services/wasm_ingestion_test.go | 245 ++++- 5 files changed, 1005 insertions(+), 225 deletions(-) create mode 100644 internal/db/migrations/2026-03-09.0-protocol_contracts.sql diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index f36790c92..7713cf4bc 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -17,18 +17,18 @@ and live ingestion processes. ```sql CREATE TABLE protocols ( id TEXT PRIMARY KEY, -- "BLEND", "SEP50", etc. - migration_status TEXT DEFAULT 'not_started', + classification_status TEXT DEFAULT 'not_started', + history_migration_status TEXT DEFAULT 'not_started', + current_state_migration_status TEXT DEFAULT 'not_started', created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); --- migration_status values: --- 'not_started' - Initial state after registration --- 'classification_in_progress' - Checkpoint classification running --- 'classification_success' - Checkpoint classification complete --- 'backfilling_in_progress' - Historical state migration running --- 'backfilling_success' - Migration complete, data is complete --- 'failed' - Migration failed +-- Status values for each column: +-- 'not_started' - Initial state +-- 'in_progress' - Process running +-- 'success' - Process complete +-- 'failed' - Process failed ``` **Migration Cursor Tracking** (via `ingest_store` table): @@ -42,13 +42,51 @@ CREATE TABLE ingest_store ( key TEXT PRIMARY KEY, value TEXT NOT NULL ); +``` + +Each protocol has two CAS cursors, one per migration subcommand. Each cursor is shared between its respective migration subcommand and live ingestion, and serves as both the convergence mechanism and crash recovery cursor — eliminating the need for a separate migration cursor. + +**History Cursor** (via `ingest_store` table): + +Tracks the last ledger for which protocol state changes were written: + +```sql +-- History cursor example: +INSERT INTO ingest_store (key, value) VALUES ('protocol_SEP41_history_cursor', '50000'); +``` + +The history cursor (e.g., `protocol_{PROTOCOL_ID}_history_cursor`) is **shared between history migration and live ingestion**. It is advanced atomically via compare-and-swap (CAS) within the same DB transaction that writes state change data. It also serves as the crash recovery cursor for history migration. + +**Current State Cursor** (via `ingest_store` table): + +Tracks the last ledger for which current state was produced: + +```sql +-- Current state cursor example: +INSERT INTO ingest_store (key, value) VALUES ('protocol_SEP41_current_state_cursor', '50000'); +``` + +The current state cursor (e.g., `protocol_{PROTOCOL_ID}_current_state_cursor`) is **shared between current-state migration and live ingestion**. It is advanced atomically via compare-and-swap (CAS) within the same DB transaction that writes current state data. It also serves as the crash recovery cursor for current-state migration. --- Protocol migration cursor example: -INSERT INTO ingest_store (key, value) VALUES ('protocol_SEP41_migration_cursor', '50000'); +**CAS Mechanism** (shared by both cursors): + +```sql +-- CAS: only advance if the cursor is at the expected value +UPDATE ingest_store SET value = $new WHERE key = $cursor_name AND value = $expected; +-- Returns rows_affected = 1 on success, 0 if another process already advanced it ``` -Each protocol migration has its own cursor key (e.g., `protocol_{PROTOCOL_ID}_migration_cursor`). -This cursor is updated atomically with each batch commit for crash recovery and can be deleted after the migration completes. +This requires a new `CompareAndSwap` method on `IngestStoreModel`. The existing `Update()` (`ingest_store.go:48`) is an unconditional upsert and cannot be used for this purpose. + +The CAS mechanism ensures that exactly one process (migration or live ingestion) writes data for any given ledger on each cursor, enabling a seamless handoff without coordination between the two processes (see [Convergence Model](#backfill-migration)). + +**Cursor Initialization** (during `protocol-setup`): + +Both cursors are initialized when `classification_status` moves to `success`: +- `protocol_{ID}_history_cursor` = `oldest_ledger_cursor - 1` +- `protocol_{ID}_current_state_cursor` = 0 (or left uninitialized until current-state migration starts) + +This ensures live ingestion has cursors to gate against from the start, even if migrations haven't run yet. ### protocol_contracts @@ -57,20 +95,21 @@ Maps protocols to the contracts that make up their systems. ```sql CREATE TABLE protocol_contracts ( contract_id TEXT NOT NULL, -- C... address - protocol_id TEXT NOT NULL REFERENCES protocols(id), + protocol_id TEXT REFERENCES protocols(id), + wasm_hash TEXT NOT NULL REFERENCES protocol_wasms(wasm_hash), name TEXT, -- "pool", "factory", "token", etc. created_at TIMESTAMPTZ DEFAULT NOW(), PRIMARY KEY (contract_id, protocol_id) ); ``` -### known_wasms +### protocol_wasms A cache for all known WASM blobs. This acts as a filter for the classification process to reduce the overhead of classifying new contract instances that use the same WASM code. ```sql -CREATE TABLE known_wasms ( +CREATE TABLE protocol_wasms ( wasm_hash TEXT PRIMARY KEY, protocol_id TEXT REFERENCES protocols(id), -- NULL if unknown/unclassified created_at TIMESTAMPTZ DEFAULT NOW() @@ -79,42 +118,71 @@ CREATE TABLE known_wasms ( ## Overview -Adding a new protocol requires three coordinated processes: +Adding a new protocol requires four coordinated processes: ``` ┌─────────────────────────────────────────────────────────────────────────────────┐ │ PROTOCOL ONBOARDING WORKFLOW │ └─────────────────────────────────────────────────────────────────────────────────┘ - STEP 1: SETUP STEP 2: LIVE INGESTION STEP 3: BACKFILL -┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ -│ ./wallet-backend │ │ Restart ingestion │ │ ./wallet-backend │ -│ protocol-setup │───▶│ with new processor │───▶│ protocol-migrate │ -│ │ │ │ │ │ -│ Classifies existing │ │ Note the restart │ │ Backfills historical │ -│ contracts │ │ ledger number │ │ state │ -└──────────────────────┘ └──────────────────────┘ └──────────────────────┘ - ▲ │ │ - │ │ │ - │ ▼ │ - │ ┌──────────────────┐ │ - │ │ Live ingestion │ │ - │ │ produces state │ │ - │ │ from restart │ │ - │ │ ledger onward │ │ - │ └──────────────────┘ │ - │ │ - └────────────────────────────────────────────────────────┘ - Complete coverage: [first_block → current] + STEP 1: SETUP STEP 2: LIVE INGESTION +┌──────────────────────┐ ┌──────────────────────┐ +│ ./wallet-backend │ │ Restart ingestion │ +│ protocol-setup │───▶│ with new processor │ +│ │ │ │ +│ Classifies existing │ │ Produces state from │ +│ contracts │ │ restart ledger onward│ +└──────────────────────┘ └──────────┬───────────┘ + │ + Steps 2, 3a, and 3b run concurrently + │ + ┌──────────────────┼──────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌──────────────────┐ ┌──────────────┐ ┌──────────────────┐ + │ Live ingestion: │ │ STEP 3a: │ │ STEP 3b: │ + │ state changes │ │ HISTORY │ │ CURRENT-STATE │ + │ after history │ │ MIGRATION │ │ MIGRATION │ + │ convergence, │ │ │ │ │ + │ current state │ │ protocol- │ │ protocol- │ + │ after current- │ │ migrate │ │ migrate │ + │ state │ │ history │ │ current-state │ + │ convergence │ │ │ │ │ + │ │ │ Retention │ │ From start- │ + │ │ │ window only │ │ ledger to tip │ + └────────┬─────────┘ └──────┬───────┘ └────────┬─────────┘ + │ │ │ + │◄── CAS handoff ──▶│ │ + │ (history cursor)│ │ + │ │ │ + │◄────── CAS handoff ──────────────────▶│ + │ (current-state cursor) │ + │ │ + └──────────────────┬────────────────────┘ + │ + Each migration CAS fails = handoff + Live ingestion takes over that responsibility + │ + ▼ + Complete coverage via two independent cursors: + - History cursor: state changes [retention_start → current] + - Current-state cursor: current state [start_ledger → current] ``` ## Process Dependencies | Step | Requires | Produces | |------|----------|----------| -| **1. protocol-setup** | Protocol migration SQL file, protocol implementation in code | Protocol in DB, `known_wasms`, `protocol_contracts`, status = `classification_success` | -| **2. ingest (live)** | Status = `classification_success`, processor registered | State from `restart_ledger` onward | -| **3. protocol-migrate** | `protocol_contracts` populated, status = `classification_success` | Historical state from `first_block` to `restart_ledger - 1` | +| **1. protocol-setup** | Protocol migration SQL file, protocol implementation in code | Protocol in DB, `protocol_wasms`, `protocol_contracts`, `classification_status = success`, both cursors initialized | +| **2. ingest (live)** | `classification_status = success`, processor registered | State changes after history convergence (history cursor). Current state after current-state convergence (current-state cursor). | +| **3a. protocol-migrate history** | `classification_status = success` | Protocol state changes within retention window, through convergence with live ingestion | +| **3b. protocol-migrate current-state** | `classification_status = success` | Current state from `start_ledger` through convergence with live ingestion | + +Steps 2, 3a, and 3b run **concurrently**. Each migration subcommand converges independently with live ingestion via its own CAS cursor: +- History migration converges via `protocol_{ID}_history_cursor` — when its CAS fails, live ingestion owns state change production +- Current-state migration converges via `protocol_{ID}_current_state_cursor` — when its CAS fails, live ingestion owns current state production + +The two subcommands are fully independent. They write to different tables, use different CAS cursors, and track different status columns. They can run in any order, concurrently, or only one can be run. Both live ingestion and backfill migration need the `protocol_contracts` table populated to know which contracts to process. The `protocol-setup` command ensures this data exists before either process runs. @@ -122,7 +190,7 @@ Both live ingestion and backfill migration need the `protocol_contracts` table p Classification is the act of identifying new and existing contracts on the network and assigning a relationship to a known protocol. This has to happen in 2 stages during the migration process: - checkpoint population: We will use a history archive from the latest checkpoint in order to classify all contracts on the network. We will rely on the latest checkpoint available at the time of the migration. -- live ingestion: during live ingestion, we classify new WASM uploads by validating the bytecode against protocol specs, and map contract deployments/upgrades to protocols by looking up their WASM hash in `known_wasms`. +- live ingestion: during live ingestion, we classify new WASM uploads by validating the bytecode against protocol specs, and map contract deployments/upgrades to protocols by looking up their WASM hash in `protocol_wasms`. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -178,7 +246,7 @@ This has to happen in 2 stages during the migration process: │hash in │ │hash in │ │ Map contract ID │ │known_ │ │known_ │ │ to WASM hash │ │wasms │ │wasms │ │ (for later lookup│ - │with │ │with NULL │ │ in known_wasms) │ + │with │ │with NULL │ │ in protocol_wasms) │ │protocol│ │protocol │ └──────────────────┘ └────────┘ └──────────┘ @@ -186,15 +254,15 @@ This has to happen in 2 stages during the migration process: │ Post-Processing: │ │ 1. Store in protocol_contracts│ │ (contract → protocol via │ - │ wasm hash → known_wasms) │ - │ 2. Cache in known_wasms │ + │ wasm hash → protocol_wasms) │ + │ 2. Cache in protocol_wasms │ └───────────────────────────────┘ ``` Contracts are grouped by WASM hash before validation. This means we validate each unique WASM blob once, then apply the result to all contracts using that same code. -Once a WASM hash is classified, it is stored in the `known_wasms` table to avoid re-classification of future contracts using the same code. +Once a WASM hash is classified, it is stored in the `protocol_wasms` table to avoid re-classification of future contracts using the same code. -During live ingestion, classification happens in two parts: (1) new WASM uploads are validated against protocol specs and stored in `known_wasms`, and (2) contract deployments/upgrades are mapped to protocols via their WASM hash lookup in `known_wasms`. +During live ingestion, classification happens in two parts: (1) new WASM uploads are validated against protocol specs and stored in `protocol_wasms`, and (2) contract deployments/upgrades are mapped to protocols via their WASM hash lookup in `protocol_wasms`. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -209,7 +277,7 @@ During live ingestion, classification happens in two parts: (1) new WASM uploads ▼ ┌──────────────────────────────┐ │ ProcessLedger() │ - │ (iterate ledger entry changes│ + │ iterate ledger entry changes │ └──────────────┬───────────────┘ │ ┌───────────────────┴───────────────────┐ @@ -229,7 +297,7 @@ During live ingestion, classification happens in two parts: (1) new WASM uploads ▼ ▼ ┌─────────────────────┐ ┌──────────────────────────┐ │ Validate against │ │ Lookup hash in │ - │ protocol validators │ │ known_wasms │ + │ protocol validators │ │ protocol_wasms │ └──────────┬──────────┘ └────────────┬─────────────┘ │ │ ┌────┴────┐ ┌───────────┴───────────┐ @@ -241,7 +309,7 @@ During live ingestion, classification happens in two parts: (1) new WASM uploads │Store in │ │Store in │ │ Map contract │ │ Fetch WASM via │ │known_ │ │known_ │ │ to protocol │ │ RPC, validate, │ │wasms with│ │wasms with│ │ from cached │ │ then map contract│ - │protocol │ │NULL │ │ classification │ (rare edge case) │ + │protocol │ │NULL │ │ classification │ (rare edge case) │ └──────────┘ └──────────┘ └──────────────┘ └──────────────────┘ │ │ └───────────┬───────────┘ @@ -302,7 +370,7 @@ and live ingestion. │ │ Custom Sections │ │ │ │ └── "contractspecv0" ◄─────────────┼── XDR-encoded spec │ │ └──────────────────────────────────────┘ │ -│ │ +│ │ │ for _, section := range compiledModule.CustomSections() { │ │ if section.Name() == "contractspecv0" { │ │ specBytes = section.Data() │ @@ -313,14 +381,14 @@ and live ingestion. ▼ ┌──────────────────────────────────────────────────────────────────┐ │ Step 3: XDR Unmarshal → []ScSpecEntry │ -│ │ +│ │ │ reader := bytes.NewReader(specBytes) │ │ for reader.Len() > 0 { │ │ var spec xdr.ScSpecEntry │ │ xdr.Unmarshal(reader, &spec) │ │ specs = append(specs, spec) │ │ } │ -│ │ +│ │ │ Each ScSpecEntry represents: │ │ - Function definitions (name, inputs, outputs) │ │ - Type definitions (structs, enums) │ @@ -330,18 +398,18 @@ and live ingestion. ▼ ┌──────────────────────────────────────────────────────────────────┐ │ Step 4: Protocol Signature Validation │ -│ │ +│ │ │ For each function in contractSpec: │ │ - Extract function name │ │ - Extract input parameters (name → type mapping) │ │ - Extract output types │ │ - Compare against protocol's required functions │ -│ │ +│ │ │ Example (SEP-41 Token Standard): │ │ - Required: balance, allowance, decimals, name, symbol, │ │ approve, transfer, transfer_from, burn, burn_from │ │ - All parameter names and types must match exactly │ -│ │ +│ │ │ foundFunctions.Add(funcName) if signature matches │ │ MATCH = foundFunctions.Cardinality() == len(requiredSpecs) │ └──────────────────────────────────────────────────────────────────┘ @@ -367,15 +435,15 @@ and live ingestion. - Parameter names must match exactly (`from`, `to`, `amount`, etc.) - Parameter types must match (Address, i128, u32, etc.) -**known_wasms Table Usage**: +**protocol_wasms Table Usage**: -The `known_wasms` table stores classification results by WASM hash. The table stores +The `protocol_wasms` table stores classification results by WASM hash. The table stores a `protocol_id` for each WASM hash - this is `NULL` for WASM blobs that don't match any known protocol. ``` ┌────────────────────────────────────────────────────────────────────────────┐ -│ known_wasms CACHE FLOW │ +│ protocol_wasms CACHE FLOW │ └────────────────────────────────────────────────────────────────────────────┘ New Contract Deployment @@ -388,7 +456,7 @@ any known protocol. ▼ ┌─────────────────────┐ │ SELECT protocol_id │ - │ FROM known_wasms │ + │ FROM protocol_wasms │ │ WHERE wasm_hash = ? │ └──────────┬──────────┘ │ @@ -402,7 +470,7 @@ any known protocol. │ protocol_id │ │ validation │ │ │ │ │ │ Skip WASM │ │ Then INSERT INTO │ - │ validation │ │ known_wasms │ + │ validation │ │ protocol_wasms │ └──────────────┘ └──────────────────┘ ``` @@ -411,7 +479,7 @@ This optimization is critical for performance because: - WASM compilation with wazero is CPU-intensive - A single validation per unique WASM hash serves all contracts using that code -When a new protocol is registered, previously unknown WASM hashes (those with `protocol_id = NULL`) must be re-validated against the new protocol's spec. This ensures contracts deployed before the protocol was added can still be classified correctly. +When a new protocol is registered, running `protocol-setup` re-validates previously unknown WASM hashes (those with `protocol_id = NULL`) against the new protocol's spec. This ensures contracts deployed before the protocol was added can still be classified correctly. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ RE-CLASSIFICATION ON NEW PROTOCOL REGISTRATION │ @@ -425,18 +493,18 @@ When a new protocol is registered, previously unknown WASM hashes (those with `p │ ▼ ┌─────────────────────────────┐ - │ Restart Live Ingestion │ - │ (triggers checkpoint │ - │ population for new protocol)│ + │ protocol-setup │ + │ --protocol-id BLEND │ + │ │ └─────────────┬───────────────┘ │ ▼ ┌─────────────────────────────┐ - │ Query known_wasms for │ + │ Query protocol_wasms for │ │ unclassified entries: │ │ │ │ SELECT wasm_hash │ - │ FROM known_wasms │ + │ FROM protocol_wasms │ │ WHERE protocol_id IS NULL │ └─────────────┬───────────────┘ │ @@ -454,7 +522,7 @@ When a new protocol is registered, previously unknown WASM hashes (those with `p ▼ ▼ ┌─────────────────┐ ┌─────────────────┐ │ UPDATE │ │ Leave as │ - │ known_wasms │ │ protocol_id │ + │ protocol_wasms │ │ protocol_id │ │ SET protocol_id │ │ = NULL │ │ = 'BLEND' │ │ │ │ WHERE wasm_hash │ │ (still unknown) │ @@ -499,8 +567,8 @@ current SEP-41 validator - compile WASM, extract `contractspecv0` section, parse XDR spec entries, check for required functions. When checkpoint population runs for a newly registered protocol, it validates contracts whose WASM hash is either: -1. **Not in known_wasms** (never seen before) -2. **In known_wasms with `protocol_id IS NULL`** (previously unknown) +1. **Not in protocol_wasms** (never seen before) +2. **In protocol_wasms with `protocol_id IS NULL`** (previously unknown) #### When Checkpoint Classification Runs Backfill migrations rely on checkpoint population being complete before they can produce state changes for a new protocol. If checkpoint population does not run before a backfill migration is started for a new protocol, backfill migration will fail and exit since it does not classify protocols and cannot produce state without any classification being available. @@ -515,14 +583,13 @@ Backfill migrations rely on checkpoint population being complete before they can 1. **Runs protocol migrations** - Executes SQL migrations from `internal/data/migrations/protocols/` to register new protocols in the `protocols` table with status `not_started` 2. **Sets status** to `classification_in_progress` for specified protocols -3. **Reads the latest checkpoint** from the history archive -4. **Extracts all WASM code** from contract entries in the checkpoint -5. **Queries existing unclassified entries** from `known_wasms WHERE protocol_id IS NULL` -6. **Validates each WASM** against all specified protocols' validators -7. **Populates tables**: +3. **Queries existing unclassified entries** from `known_wasms WHERE protocol_id IS NULL` +4. **Gets bytecode** from all unknown contracts using RPC +5. **Validates each WASM** against all specified protocols' validators +6. **Populates tables**: - `known_wasms`: Maps WASM hashes to protocol IDs - `protocol_contracts`: Maps contract IDs to protocols -8. **Updates status** to `classification_success` for all processed protocols +7. **Updates status** to `classification_success` for all processed protocols ### Protocol Migration Files @@ -544,117 +611,263 @@ The command requires an explicit list of protocols to set up via the `--protocol **Benefits:** - Opt-in protocol support - operators control which protocols are enabled - Clear operator intent - no accidental protocol enablement -- Consistent with `protocol-migrate` command interface +- Consistent with `protocol-migrate` subcommand interfaces ## State Production -State produced by new protocols is done through dual processes in order to cover historical state and new state production during live ingestion. -- Historical state: A backfill style migration will run for all ledgers that are needed to produce historical state enrichment, as well as current state tracking. -- Live ingest state: live ingestion will produce state defined by a protocol, this state can be an enrichment for an operation(richer data for history) and/or can be an update to the tracking of the current-state of a protocol as it relates to a user(which collectibles does a user own?). +State produced by new protocols is split into two independent responsibilities, each handled by a dedicated migration subcommand: +- **History (state changes)**: `protocol-migrate history` writes protocol state changes (operation enrichment) for ledgers within the retention window. It starts at `oldest_ledger_cursor` and converges with live ingestion via the history cursor. Since it only processes the retention window, ALL processed ledgers produce persisted state changes — no "process but discard" logic needed. +- **Current state**: `protocol-migrate current-state` builds current state from a protocol's deployment ledger forward. It starts at `--start-ledger` and converges with live ingestion via the current-state cursor. It processes ALL ledgers from start to tip to build accurate additive state, but writes only current state — no state changes. +- **Live ingest state**: Live ingestion produces both state changes and current state, but only after converging with the respective migration subcommand for each. It gates state change writes on the history cursor and current state writes on the current-state cursor. + +### Additive vs Non-Additive Current State + +Protocol current state falls into two categories that affect how migration and live ingestion interact: + +**Non-additive state** (e.g., collectible ownership): The current state at ledger N can be determined from the ledger data alone, without knowing the state at ledger N-1. Live ingestion can write current state immediately for any ledger, independent of migration progress. + +**Additive state** (e.g., token balances): The current state at ledger N depends on the state at ledger N-1. A "transfer of 5 tokens" event at ledger N requires knowing the balance before ledger N to compute the new balance. During migration, that previous balance doesn't exist until all prior ledgers are processed. + +``` +Non-additive example (collectible ownership): + Ledger N says "User A owns collectible X" → write directly, no prior state needed. + +Additive example (token balance): + Ledger N says "Transfer 5 tokens from A to B" + → Need balance of A at ledger N-1 to compute new balance + → That balance doesn't exist until migration processes ledgers 1 through N-1 +``` + +This distinction drives the convergence model: migration must run to the tip (not stop at a fixed end-ledger) so that additive current state is continuously built without gaps. The shared current-state cursor with CAS ensures exactly one process produces current state for each ledger, with a seamless handoff when migration catches up to live ingestion. ### Backfill Migration -The migration runner processes historical ledgers to enrich operations with protocol state and produce state changes/current state. -**Retention-Aware Processing**: The migration reads the retention window start from `ingest_store` (`oldest_ledger_cursor`). State changes are only persisted for ledgers within the retention window, but all ledgers in the range are processed to build accurate current state. +The backfill migration is split into two independent subcommands that handle different responsibilities: + +#### History Migration (`protocol-migrate history`) + +The history migration writes protocol state changes (operation enrichment) for ledgers within the retention window. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ BACKFILL MIGRATION FLOW │ +│ HISTORY MIGRATION FLOW │ └─────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────┐ │ ./wallet-backend │ - │ protocol-migrate │ - │ --protocol-id SEP50 ... │ - │ --start-ledger 1000 │ - │ --end-ledger 5000 │ - └─────────────┬──────────────┘ - │ - ▼ - ┌────────────────────────────┐ - │ MigrationRunner.Run() │ + │ protocol-migrate history │ + │ --protocol-id SEP50 SEP41 │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ │ Start() │ - │ - Validate protocol exists │ - │ - Set status = backfilling │ - │ _in_progress │ + │ - Validate classification │ + │ _status = 'success' │ + │ - Set history_migration │ + │ _status = 'in_progress' │ │ - Read oldest_ledger_cursor│ │ from ingest_store │ + │ - Initialize history_cursor│ + │ = oldest_ledger_cursor-1 │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ - │ For each ledger in range: │ - │(start-ledger to end-ledger)│ - └─────────────┬──────────────┘ - │ - ▼ + │ Read latest_ledger_cursor │ + │ Split [start, target] into │ + │ batches. Process in │ + │ parallel with ordered │◀──────────────┐ + │ commit. │ │ + └─────────────┬──────────────┘ │ + │ │ + ▼ │ + ┌────────────────────────────┐ │ + │ Per batch commit: │ │ + │ - CAS-advance history │ │ + │ _cursor │ │ + │ - Write state changes │ │ + │ (if CAS succeeded) │ │ + └─────────────┬──────────────┘ │ + │ │ + ┌────────┴────────┐ │ + │ │ │ + CAS success CAS failure │ + │ │ │ + ▼ ▼ │ + ┌──────────┐ ┌──────────────────┐ │ + │ Continue │ │ Handoff detected │ │ + │ to next │ │ Live ingestion │ │ + │ batch │ │ took over. │ │ + └────┬─────┘ │ Exit loop. │ │ + │ └────────┬─────────┘ │ + │ │ │ + ▼ │ │ + ┌──────────────┐ │ │ + │ More batches │ │ │ + │ remaining? │ │ │ + │ │ │ │ + │ YES: continue│ │ │ + │ NO: re-read │────────┼────────────────────┘ + │ latest_ledger│ │ (fetch new target, + │ _cursor, loop│ │ process remaining) + └──────────────┘ │ + │ + ▼ + ┌────────────────────────────┐ + │ Complete() │ + │ - Set history_migration │ + │ _status = 'success' │ + │ - Clean up resources │ + └────────────────────────────┘ +``` + +**Key simplification**: Since history migration starts at retention start, ALL processed ledgers are within retention. No need for the "process but discard" logic — every batch produces persisted state changes. + +**Parallelization advantage**: State changes for ledger N do not depend on state changes for ledger N-1, so batches are truly independent. History migration can be more aggressively parallelized than current-state migration. + +#### Current-State Migration (`protocol-migrate current-state`) + +The current-state migration builds current state from a protocol's deployment ledger forward. + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CURRENT-STATE MIGRATION FLOW │ +└─────────────────────────────────────────────────────────────────────────────┘ + ┌────────────────────────────┐ - │ Use processor to: │ - │ - Find operations involving│ - │ protocol contracts │ - │ - Generate state changes │ - │ - Update current state │ - │ running totals │ + │ ./wallet-backend │ + │ protocol-migrate │ + │ current-state │ + │ --protocol-id SEP50 │ + │ --start-ledger 1000 │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ - │ If ledger >= retention │ - │ window start: │ - │ - Persist state changes │ - │ - Enrich historical data │ - │ │ - │ Otherwise: │ - │ - Discard state changes │ - │ (already applied to │ - │ current state totals) │ + │ Start() │ + │ - Validate classification │ + │ _status = 'success' │ + │ - Set current_state │ + │ _migration_status = │ + │ 'in_progress' │ + │ - Initialize current_state │ + │ _cursor = start-ledger-1 │ └─────────────┬──────────────┘ │ ▼ ┌────────────────────────────┐ + │ Read latest_ledger_cursor │ + │ Split [start, target] into │ + │ batches. Process in │ + │ parallel with ordered │◀──────────────┐ + │ commit. │ │ + └─────────────┬──────────────┘ │ + │ │ + ▼ │ + ┌────────────────────────────┐ │ + │ Per batch commit: │ │ + │ - CAS-advance current │ │ + │ _state_cursor │ │ + │ - Write current state │ │ + │ (if CAS succeeded) │ │ + │ - No state changes written │ │ + └─────────────┬──────────────┘ │ + │ │ + ┌────────┴────────┐ │ + │ │ │ + CAS success CAS failure │ + │ │ │ + ▼ ▼ │ + ┌──────────┐ ┌──────────────────┐ │ + │ Continue │ │ Handoff detected │ │ + │ to next │ │ Live ingestion │ │ + │ batch │ │ took over. │ │ + └────┬─────┘ │ Exit loop. │ │ + │ └────────┬─────────┘ │ + │ │ │ + ▼ │ │ + ┌──────────────┐ │ │ + │ More batches │ │ │ + │ remaining? │ │ │ + │ │ │ │ + │ YES: continue│ │ │ + │ NO: re-read │────────┼────────────────────┘ + │ latest_ledger│ │ (fetch new target, + │ _cursor, loop│ │ process remaining) + └──────────────┘ │ + │ + ▼ + ┌────────────────────────────┐ │ Complete() │ - │ - Write final current state│ - │ - Set status = │ - │ backfilling_success │ + │ - Set current_state │ + │ _migration_status = │ + │ 'success' │ + │ - Clean up resources │ └────────────────────────────┘ +``` +**Processing range**: Current-state migration processes ALL ledgers from `--start-ledger` to tip. This is necessary for accurate additive state (e.g., token balances) where ledger N depends on ledger N-1. -MIGRATION DEPENDENCIES: +#### Independence + +The two subcommands are fully independent: +- They write to different tables (state changes vs current state) +- They use different CAS cursors (`history_cursor` vs `current_state_cursor`) +- They track different status columns (`history_migration_status` vs `current_state_migration_status`) +- They can run in any order, concurrently, or only one can be run + +#### Convergence Model + +Two independent convergence paths: + +``` +HISTORY CONVERGENCE: ┌────────────────────────────────────────────────────────────────────────────┐ -│ The migration has an explicit dependency on protocol-setup, │ -│ and an implicit dependency on live-ingestion | -│ 1. Live ingestion must be running with the same processor │ -│ 2. Checkpoint population must have completed for the protocol │ -│ 3. Migration processes: start-ledger → (live ingestion start - 1) │ -│ 4. Live ingestion continues from its start point onward │ +│ History migration CAS-advances protocol_{ID}_history_cursor from │ +│ retention_start. Live ingestion also CAS-advances the same cursor. │ +│ When history migration CAS fails → live ingestion owns state change │ +│ production. │ +│ │ +│ Timeline example: │ +│ T=0s: History cursor=10004. Migration CAS 10004→10005. Success. │ +│ T=0.5s: Migration CAS 10005→10006. Success. │ +│ T=5s: Live ingestion processes 10008. Cursor=10007 >= 10007. YES. │ +│ Live CAS 10007→10008. Success. │ +│ T=5.5s: Migration tries CAS 10007→10008. FAILS. Handoff detected. │ │ │ -│ This ensures no ledger gap between backfill and live ingestion. │ +│ No gap: every ledger gets state changes from exactly one process. │ └────────────────────────────────────────────────────────────────────────────┘ -RETENTION WINDOW HANDLING: +CURRENT STATE CONVERGENCE: ┌────────────────────────────────────────────────────────────────────────────┐ -│ The migration decouples the processing range from the retention window: │ +│ Current-state migration CAS-advances protocol_{ID}_current_state_cursor │ +│ from start_ledger. Live ingestion also CAS-advances the same cursor. │ +│ When current-state migration CAS fails → live ingestion owns current │ +│ state production. │ │ │ -│ Example: Protocol deployed at ledger 1000, retention starts at 4000 │ -│ │ -│ Ledger 1000 ──────────────────────────────────────── Ledger 5000 │ -│ [start-ledger] [retention start] [end-ledger] │ -│ │ │ │ │ -│ ├─────────────────────────┤ │ │ -│ │ Process but DISCARD │ │ │ -│ │ state changes │ │ │ -│ │ (update current state │ │ │ -│ │ running totals only) │ │ │ -│ │ ├──────────────────────────┤ │ -│ │ │ Process AND PERSIST │ │ -│ │ │ state changes │ │ -│ │ │ (within retention) │ │ +│ Same CAS mechanism as history convergence, but using a separate cursor. │ +│ No gap: every ledger gets current state from exactly one process. │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + +#### Migration Dependencies + +``` +MIGRATION DEPENDENCIES: +┌────────────────────────────────────────────────────────────────────────────┐ +│ Both migration subcommands depend on protocol-setup, │ +│ and run concurrently with live ingestion: │ +│ 1. Checkpoint population must have completed (classification_status = │ +│ 'success') │ +│ 2. Live ingestion should be running with the same processor │ +│ 3. History migration: retention_start → tip (until CAS fails) | +│ 4. Current-state migration: start-ledger → tip (until CAS fails) │ +│ 5. Live ingestion gates both state changes and current state on their │ +│ respective cursors │ +│ 6. Handoff: each migration's CAS fails → live ingestion owns that │ +│ responsibility │ │ │ -│ This allows accurate current state even when protocol history extends │ -│ beyond the retention window. │ +│ This ensures zero-gap coverage via CAS serialization on each cursor. │ └────────────────────────────────────────────────────────────────────────────┘ ``` @@ -679,11 +892,11 @@ During live ingestion, two related but distinct processes run sequentially: │ 1. CLASSIFICATION │ │ │ │ Process ledger entry changes to classify contracts: │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ ContractCode entries: validate WASM, store in known_wasms │ │ -│ │ ContractData Instance entries: lookup hash in known_wasms, │ │ -│ │ map contract to protocol_contracts │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ ContractCode entries: validate WASM, store in protocol_wasms │ │ +│ │ ContractData Instance entries: lookup hash in protocol_wasms, │ │ +│ │ map contract to protocol_contracts │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────┬───────────────────────────────────────┘ │ ▼ @@ -691,12 +904,12 @@ During live ingestion, two related but distinct processes run sequentially: │ 2. STATE PRODUCTION │ │ │ │ Run protocol processors on transactions (using updated classifications): │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ For each protocol processor: │ │ -│ │ Processor.Process(ledger) │ │ -│ │ - Examines transactions involving protocol contracts │ │ -│ │ - Produces protocol-specific state changes │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ For each protocol processor: │ │ +│ │ Processor.Process(ledger) │ │ +│ │ - Examines transactions involving protocol contracts │ │ +│ │ - Produces protocol-specific state changes │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────┬───────────────────────────────────────┘ │ ▼ @@ -712,13 +925,55 @@ During live ingestion, two related but distinct processes run sequentially: │ New contract │ │ Protocol-specific │ │ Operations, │ │ classifications │ │ state changes │ │ transactions, │ │ (protocol_contracts, │ │ (from processors) │ │ accounts, etc. │ -│ known_wasms) │ │ │ │ │ +│ protocol_wasms) │ │ │ │ │ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PER-PROTOCOL DUAL GATING │ +│ │ +│ Within PersistLedgerData, for each registered protocol at ledger N: │ +│ │ +│ === PROTOCOL STATE CHANGES === │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ 1. Read protocol_{ID}_history_cursor │ │ +│ │ │ │ +│ │ 2. If cursor >= N-1: │ │ +│ │ - CAS history cursor from N-1 to N │ │ +│ │ - If CAS succeeds: WRITE state changes for N │ │ +│ │ - If CAS fails: skip (history migration wrote them) │ │ +│ │ │ │ +│ │ 3. If cursor < N-1: │ │ +│ │ - SKIP state changes (history migration hasn't caught up) │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ +│ === CURRENT STATE === │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ 4. Read protocol_{ID}_current_state_cursor │ │ +│ │ │ │ +│ │ 5. If cursor >= N-1: │ │ +│ │ - CAS current_state cursor from N-1 to N │ │ +│ │ - If CAS succeeds: WRITE current state for N │ │ +│ │ - If CAS fails: skip (current-state migration wrote it) │ │ +│ │ │ │ +│ │ 6. If cursor < N-1: │ │ +│ │ - SKIP current state (current-state migration hasn't caught up) │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ +│ Why gate state changes: The existing BatchCopy write path (pgx COPY │ +│ protocol) fails on duplicate records — it does not support ON CONFLICT. │ +│ Gating prevents duplicates and follows the same proven CAS pattern │ +│ already designed for current state. │ +│ │ +│ This logic is per-protocol. Different protocols can be at different │ +│ stages — one may have history migration complete while another is still │ +│ running, and current-state migration may be at a different stage than │ +│ history migration for the same protocol. │ +└─────────────────────────────────────────────────────────────────────────────┘ ``` -## known_wasms Lookup Optimization +## protocol_wasms Lookup Optimization -The `known_wasms` table grows unboundedly as new contracts are deployed on the network. Since +The `protocol_wasms` table grows unboundedly as new contracts are deployed on the network. Since every live ingestion lookup queries this table, optimizing lookup performance is critical. #### Default Implementation: LRU Cache + PostgreSQL @@ -727,7 +982,7 @@ The recommended approach is an in-memory LRU cache layered over the PostgreSQL t ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ known_wasms LOOKUP OPTIMIZATION │ +│ protocol_wasms LOOKUP OPTIMIZATION │ └─────────────────────────────────────────────────────────────────────────────┘ New Contract Deployment @@ -750,7 +1005,7 @@ The recommended approach is an in-memory LRU cache layered over the PostgreSQL t ▼ ▼ ┌──────────────┐ ┌──────────────────────┐ │ Return │ │ Query PostgreSQL │ - │ cached │ │ known_wasms table │ + │ cached │ │ protocol_wasms table │ │ protocol_id │ │ (1-5ms) │ └──────────────┘ └──────────┬───────────┘ │ @@ -781,7 +1036,7 @@ func (c *KnownWasmsCache) Lookup(ctx context.Context, hash []byte) (*string, boo // Cache miss: query DB (~1-5ms) var protocolID *string err := c.db.QueryRowContext(ctx, - "SELECT protocol_id FROM known_wasms WHERE wasm_hash = $1", key).Scan(&protocolID) + "SELECT protocol_id FROM protocol_wasms WHERE wasm_hash = $1", key).Scan(&protocolID) if err == sql.ErrNoRows { return nil, false, nil // Not in DB at all @@ -796,66 +1051,227 @@ func (c *KnownWasmsCache) Lookup(ctx context.Context, hash []byte) (*string, boo } ``` +## Write-Through Current State Cache + +When live ingestion first takes over current-state production for a protocol (its first successful CAS), it needs the current state to compute the next state. This is handled by a write-through in-memory cache, similar in pattern to the protocol_wasms LRU cache above. + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ WRITE-THROUGH CURRENT STATE CACHE │ +└─────────────────────────────────────────────────────────────────────────────┘ + + Live Ingestion at Ledger N + │ + ▼ + ┌──────────────────────┐ + │ Check in-memory │ + │ state cache for │ + │ protocol │ + └──────────┬───────────┘ + │ + ┌───────────┴───────────┐ + │ │ + POPULATED EMPTY + │ │ + ▼ ▼ + ┌──────────────┐ ┌──────────────────────┐ + │ Use cached │ │ Read current state │ + │ state to │ │ from protocol state │ + │ compute N │ │ tables (one-time DB │ + │ │ │ read at handoff) │ + └──────────────┘ └──────────┬───────────┘ + │ │ + │ ▼ + │ ┌──────────────────────┐ + │ │ Populate in-memory │ + │ │ cache │ + │ └──────────┬───────────┘ + │ │ + └───────────┬───────────┘ + │ + ▼ + ┌──────────────────────┐ + │ Compute new state │ + │ for ledger N │ + └──────────┬───────────┘ + │ + ▼ + ┌──────────────────────┐ + │ Update in-memory │ + │ cache + write to │ + │ protocol state │ + │ tables in DB │ + │ (write-through) │ + └──────────────────────┘ +``` + +**Cache structure**: +```go +// Per-protocol current state cache +map[protocolID] -> { + currentStateCursor uint32 // last ledger for which state was produced + stateData protocolState // protocol-specific current state +} +``` + +**Lifecycle**: +- **Empty at start**: Cache is unpopulated when live ingestion starts +- **Populated from DB**: When live ingestion first successfully CAS-advances the cursor (handoff from migration), it reads current state from the protocol's state tables (one-time read) +- **Updated per ledger**: On each subsequent ledger, cache is updated in-memory and written through to DB +- **Lost on restart**: If live ingestion restarts, the cache is repopulated from DB on the next current-state production + ## Backfill Migrations -Backfill migrations process historical ledgers to build current state and generate state changes. State changes are only persisted for ledgers within the retention window, but all ledgers in the specified range are processed to produce accurate current state. +Backfill migrations are split into two independent subcommands: +- `protocol-migrate history` — writes protocol state changes within the retention window +- `protocol-migrate current-state` — builds current state from a protocol's deployment ledger -The `protocol-migrate` command accepts a set of protocol IDs for an explicit signal to migrate those protocols. Each protocol migration requires a specific range, which may not be exactly what other migrations need even if they are implemented at the same time. Migrations that do share a ledger range can run in one process. +Each subcommand converges independently with live ingestion via its own CAS cursor. They can run in any order, concurrently, or only one can be run. -### Migration Command +### History Migration Command ```bash -./wallet-backend protocol-migrate --protocol-id SEP50 SEP41 --start-ledger 1 --end-ledger 5 +./wallet-backend protocol-migrate history --protocol-id SEP50 SEP41 ``` **Parameters**: -- `--protocol-id`: The protocol(s) to migrate (must exist in `protocols` table) -- `--start-ledger`: First ledger to process (set based on protocol deployment/data needs) -- `--end-ledger`: Last ledger to process (should be the ledger before live ingestion started) +- `--protocol-id`: Protocol(s) to migrate (required) +- No `--start-ledger` — always reads `oldest_ledger_cursor` from `ingest_store` + +The history migration runs until it converges with live ingestion. It processes batches from `oldest_ledger_cursor` toward the tip, CAS-advancing the history cursor with each batch commit. When a CAS fails (because live ingestion advanced the cursor first), the migration detects the handoff, sets `history_migration_status = 'success'`, and exits. -### Migration Workflow +### Current-State Migration Command + +```bash +./wallet-backend protocol-migrate current-state --protocol-id SEP50 --start-ledger 1000 +``` + +**Parameters**: +- `--protocol-id`: Protocol(s) to migrate (required) +- `--start-ledger`: First ledger to process (required, based on protocol deployment) + +The current-state migration runs until it converges with live ingestion. It processes ALL ledgers from `--start-ledger` toward the tip, CAS-advancing the current-state cursor with each batch commit. It writes only current state — no state changes. When a CAS fails, the migration detects the handoff, sets `current_state_migration_status = 'success'`, and exits. + +### History Migration Workflow ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ MIGRATION RUNNER WORKFLOW │ +│ HISTORY MIGRATION RUNNER WORKFLOW │ └─────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────┐ │ 1. VALIDATE │ ├────────────────────────────────────────────────────────────────────────────┤ -│ - Verify protocol(s) exists in registry │ -│ - Verify migration_status = 'classification_success' │ -│ - Set migration_status = 'backfilling_in_progress' │ +│ - Verify protocol(s) exists in registry │ +│ - Verify classification_status = 'success' │ +│ - Set history_migration_status = 'in_progress' │ │ - Read oldest_ledger_cursor from ingest_store (retention window start) │ +│ - Initialize protocol_{ID}_history_cursor = oldest_ledger_cursor - 1 │ +└────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌────────────────────────────────────────────────────────────────────────────┐ +│ 2. PROCESS BATCHES TO TIP │ +├────────────────────────────────────────────────────────────────────────────┤ +│ Loop: │ +│ a. Read latest_ledger_cursor to get target │ +│ b. Split [cursor+1, target] into batches │ +│ c. Process batches in parallel with ordered commit │ +│ d. Each batch commit: │ +│ - CAS-advance protocol_{ID}_history_cursor │ +│ - If CAS succeeds: write state changes │ +│ - If CAS fails: handoff detected → go to step 3 │ +│ e. After all batches: re-read latest_ledger_cursor │ +│ f. If more ledgers remain: repeat from (b) │ +│ g. If no more ledgers: block on RPC for next ledger (~5s), repeat │ +└────────────────────────────────────────────────────────────────────────────┘ + │ + CAS failure + (handoff) + │ + ▼ +┌────────────────────────────────────────────────────────────────────────────┐ +│ 3. COMPLETE │ +├────────────────────────────────────────────────────────────────────────────┤ +│ - Verify cursor is at or past the ledger migration tried to write │ +│ - Set history_migration_status = 'success' │ +│ - Clean up migration resources │ +│ - Live ingestion now owns state change production for this protocol │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + +### Current-State Migration Workflow + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CURRENT-STATE MIGRATION RUNNER WORKFLOW │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌────────────────────────────────────────────────────────────────────────────┐ +│ 1. VALIDATE │ +├────────────────────────────────────────────────────────────────────────────┤ +│ - Verify protocol(s) exists in registry │ +│ - Verify classification_status = 'success' │ +│ - Set current_state_migration_status = 'in_progress' │ +│ - Initialize protocol_{ID}_current_state_cursor = start_ledger - 1 │ └────────────────────────────────────────────────────────────────────────────┘ │ ▼ ┌────────────────────────────────────────────────────────────────────────────┐ -│ 2. PROCESS LEDGER RANGE │ +│ 2. PROCESS BATCHES TO TIP │ ├────────────────────────────────────────────────────────────────────────────┤ -│ For ledger = start-ledger to end-ledger: │ -│ - Fetch ledger data (from archive or RPC) │ -│ - Run processor to find protocol operations │ -│ - Generate state changes, update current state running totals │ -│ - If ledger >= retention window start: persist state changes │ -│ - Otherwise: discard state changes (totals already updated) │ +│ Loop: │ +│ a. Read latest_ledger_cursor to get target │ +│ b. Split [cursor+1, target] into batches │ +│ c. Process batches in parallel with ordered commit │ +│ d. Each batch commit: │ +│ - CAS-advance protocol_{ID}_current_state_cursor │ +│ - If CAS succeeds: write current state │ +│ - If CAS fails: handoff detected → go to step 3 │ +│ e. After all batches: re-read latest_ledger_cursor │ +│ f. If more ledgers remain: repeat from (b) │ +│ g. If no more ledgers: block on RPC for next ledger (~5s), repeat │ └────────────────────────────────────────────────────────────────────────────┘ │ + CAS failure + (handoff) + │ ▼ ┌────────────────────────────────────────────────────────────────────────────┐ │ 3. COMPLETE │ ├────────────────────────────────────────────────────────────────────────────┤ -│ - Write final current state │ -│ - Set migration_status = 'backfilling_success' │ -│ - Current state APIs now serve this protocol's data │ +│ - Verify cursor is at or past the ledger migration tried to write │ +│ - Set current_state_migration_status = 'success' │ +│ - Clean up migration resources │ +│ - Live ingestion now owns current-state production for this protocol │ └────────────────────────────────────────────────────────────────────────────┘ -ERROR HANDLING: +ERROR HANDLING (applies to both subcommands): ┌────────────────────────────────────────────────────────────────────────────┐ │ If migration fails at any point: │ -│ - Set migration_status = 'failed' │ +│ - Set respective status column = 'failed' │ │ - Log error details │ │ - Migration can be retried after fixing the issue │ +│ - On restart: resume from the respective CAS cursor + 1 │ +│ (history_cursor for history, current_state_cursor for current-state) │ +└────────────────────────────────────────────────────────────────────────────┘ + +STATUS TRANSITIONS (per column): +┌────────────────────────────────────────────────────────────────────────────┐ +│ classification_status: │ +│ not_started → in_progress (protocol-setup starts) │ +│ → success (protocol-setup completes) │ +│ → failed (error) │ +│ │ +│ history_migration_status: │ +│ not_started → in_progress (protocol-migrate history starts) │ +│ → success (CAS fails = live ingestion took over) │ +│ → failed (error) │ +│ │ +│ current_state_migration_status: │ +│ not_started → in_progress (protocol-migrate current-state starts) │ +│ → success (CAS fails = live ingestion took over) │ +│ → failed (error) │ └────────────────────────────────────────────────────────────────────────────┘ ``` @@ -863,7 +1279,11 @@ ERROR HANDLING: Protocol backfill migrations can process millions of ledgers. Sequential processing (ledger-by-ledger) is slow because each ledger must wait for the previous to complete. This section describes how to -parallelize backfill migrations while preserving the correctness of order-dependent current state tracking. +parallelize backfill migrations while preserving correctness. + +The two migration subcommands have different parallelization characteristics: +- **History migration** (`protocol-migrate history`): State changes for ledger N do not depend on state changes for ledger N-1, so batches are truly independent. History migration can be more aggressively parallelized — batches can be processed and committed in any order without affecting correctness. +- **Current-state migration** (`protocol-migrate current-state`): Current state is order-dependent (see below), so batches must be committed in order even though they can be processed in parallel. ### Order-Dependent Current State Tracking @@ -887,11 +1307,11 @@ The final current state must reflect ledger 300's removal, not ledger 100's addi ### Parallel Processing with Ordered Commit -The solution uses a **streaming ordered commit** pattern: +The solution uses a **streaming ordered commit** pattern (required for current-state migration; history migration can use simpler unordered commit since state changes are independent): 1. **PARALLEL PHASE**: Process ledger batches concurrently (each batch gets isolated state) 2. **ORDERED COMMIT**: A committer goroutine writes completed batches to the database **in order** -3. **CURSOR TRACKING**: Each batch commit updates the migration cursor for crash recovery +3. **CURSOR TRACKING**: Each batch commit CAS-advances the respective cursor (history or current-state). If a CAS fails during any batch commit, migration detects that live ingestion has taken over and exits. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -923,9 +1343,10 @@ The solution uses a **streaming ordered commit** pattern: │ 1. Creates isolated LedgerBackend │ │ 2. Creates isolated BatchBuffer │ │ 3. Processes ledgers sequentially within batch │ -│ 4. Generates state changes, updates current state running totals │ -│ 5. Filters state changes based on retention window │ -│ 6. Sends BatchResult to results channel │ +│ 4. Generates output per subcommand: │ +│ - History: state changes for each ledger │ +│ - Current-state: current state running totals │ +│ 5. Sends BatchResult to results channel │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ │ ▼ ▼ ▼ ▼ ▼ @@ -950,25 +1371,27 @@ The solution uses a **streaming ordered commit** pattern: │ When batch 1 arrives: ┌────────────────┼────────────────┐ - │ Commit 1, then 2, then 3 │ + │ Commit 1, then 2, then 3 │ │ (sequential, in order) │ └────────────────┼────────────────┘ │ ┌───────────────────────────┼───────────────────────────┐ ▼ ▼ ▼ -┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ -│ COMMIT Batch 1 │ │ COMMIT Batch 2 │ │ COMMIT Batch 3 │ -│ cursor = 1000 │ ──▶ │ cursor = 2000 │ ──▶ │ cursor = 3000 │ -│ (atomic tx) │ │ (atomic tx) │ │ (atomic tx) │ -└─────────────────┘ └─────────────────┘ └─────────────────┘ +┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐ +│ COMMIT Batch 1 │ │ COMMIT Batch 2 │ │ COMMIT Batch 3 │ +│ CAS cursor→1000 │ ──▶ │ CAS cursor→2000 │ ──▶ │ CAS cursor→3000 │ +│ + batch data │ │ + batch data │ │ + batch data │ +│ (atomic tx) │ │ (atomic tx) │ │ (atomic tx) │ +└──────────────────┘ └──────────────────┘ └──────────────────┘ │ │ │ ▼ ▼ ▼ - Crash? Crash? Crash? - Resume @ 1 Resume @ 1001 Resume @ 2001 + CAS fail? CAS fail? CAS fail? + No → continue No → continue No → continue + Yes → handoff Yes → handoff Yes → handoff ``` -**Crash Recovery**: If the process crashes after committing batch 2, the cursor is at ledger 2000. -On restart, processing resumes from ledger 2001 - no work is lost. +**Crash Recovery**: If the process crashes after committing batch 2, the respective CAS cursor is at ledger 2000. +On restart, processing resumes from ledger 2001 — no work is lost. Each subcommand uses its own CAS cursor for crash recovery, eliminating the need for a separate migration cursor. **Example**: @@ -1014,7 +1437,9 @@ type OperationProtocol { type Protocol { id: String! - migrationStatus: String! + classificationStatus: String! + historyMigrationStatus: String! + currentStateMigrationStatus: String! } ``` @@ -1022,9 +1447,12 @@ type Protocol { Some migrations will write to new tables that will represent the current state produced by a protocol in relation to accounts. An example of this is SEP-50 Collectibles, where we will track collectible mints/transfers in order to maintain a table of collectibles owned by accounts. -The API exposes `Protocol.migrationStatus` so clients can check whether a protocol's migration is complete before querying current state data. This pushes the responsibility to clients, keeping queries cleaner and faster. +The API exposes per-process status fields so clients can independently check whether each migration responsibility is complete. This pushes the responsibility to clients, keeping queries cleaner and faster. -**Client responsibility**: Clients should check `migrationStatus = 'backfilling_success'` before relying on current state data. Clients that query current state during an in-progress migration may receive incomplete data. +**Client responsibility**: +- Check `historyMigrationStatus = 'success'` before relying on enriched operation history +- Check `currentStateMigrationStatus = 'success'` before relying on current state completeness +- Clients that query data during an in-progress migration may receive incomplete results The `Operation.protocols` field exposes which protocols were involved in an operation. The query path uses existing tables without requiring a dedicated mapping table: @@ -1036,11 +1464,13 @@ The query path uses existing tables without requiring a dedicated mapping table: GraphQL Query: ┌──────────────────────────────────────────────────────────────────────────┐ -│ query { │ +│ query { │ │ operation(id: "12345") { │ │ id │ │ protocols { │ -│ protocol { id, migrationStatus } │ +│ protocol { id, classificationStatus, │ +│ historyMigrationStatus, │ +│ currentStateMigrationStatus } │ │ contractId │ │ } │ │ } │ @@ -1049,7 +1479,7 @@ GraphQL Query: │ ▼ ┌──────────────────────────────────────────────────────────────────────────┐ -│ DATABASE QUERY │ +│ DATABASE QUERY │ └──────────────────────────────────────────────────────────────────────────┘ SELECT DISTINCT p.id, pc.contract_id @@ -1062,7 +1492,7 @@ WHERE o.id = $1; │ ▼ ┌──────────────────────────────────────────────────────────────────────────┐ -│ JOIN VISUALIZATION │ +│ JOIN VISUALIZATION │ └──────────────────────────────────────────────────────────────────────────┘ ┌────────────┐ ┌──────────────────┐ ┌───────────────────┐ ┌───────────┐ @@ -1070,8 +1500,16 @@ WHERE o.id = $1; │ │ │ accounts │ │ contracts │ │ │ ├────────────┤ ├──────────────────┤ ├───────────────────┤ ├───────────┤ │ id │ │ operation_id (FK)│ │ contract_id (PK) │ │ id (PK) │ -│ ... │ │ account_id │ │ protocol_id (FK) │ │ migration │ -│ │ │ │ │ name │ │ _status │ +│ ... │ │ account_id │ │ protocol_id (FK) │ │ classifi- │ +│ │ │ │ │ name │ │ cation_ │ +│ │ │ │ │ │ │ status, │ +│ │ │ │ │ │ │ history_ │ +│ │ │ │ │ │ │ migration │ +│ │ │ │ │ │ │ _status, │ +│ │ │ │ │ │ │ current_ │ +│ │ │ │ │ │ │ state_ │ +│ │ │ │ │ │ │ migration │ +│ │ │ │ │ │ │ _status │ └────────────┘ └──────────────────┘ └───────────────────┘ └───────────┘ │ │ │ │ │ │ │ │ @@ -1108,7 +1546,6 @@ INDEXES REQUIRED: │ │ │ protocols: │ │ PRIMARY KEY (id) -- fast lookup by id │ -│ INDEX on (migration_status) -- filter by status │ │ │ └────────────────────────────────────────────────────────────────────────────┘ @@ -1134,16 +1571,21 @@ QUERY COST BREAKDOWN (per operation): ### Client Handling of Migration Status -The API exposes `Protocol.migrationStatus` to allow clients to handle in-progress migrations appropriately. +The API exposes per-process status fields on `Protocol` to allow clients to handle in-progress migrations appropriately. -**For historical data** (partially enriched during migration): +**For historical data** (enriched operations with state changes): 1. **Accept partial data**: Display enriched data where available -2. **Wait for completion**: Check `Protocol.migrationStatus` and defer display until `'backfilling_success'` +2. **Wait for completion**: Check `historyMigrationStatus = 'success'` and defer display until complete **For current state data**: -Clients should check `Protocol.migrationStatus = 'backfilling_success'` before relying on current state queries. Current state may be incomplete or inaccurate while migration is in progress. +Current state is **progressively available** during migration — the current-state cursor advances incrementally as migration processes each ledger. However, until `currentStateMigrationStatus = 'success'`, the current state only reflects ledgers up to the cursor position and may not include recent activity. + +- `in_progress`: Current state exists but may lag behind live activity. The cursor indicates how far the migration has progressed. +- `success`: Live ingestion has fully taken over current-state production. Current state is up-to-date and will stay current going forward. + +Clients should check `currentStateMigrationStatus = 'success'` before relying on current state queries for completeness. Clients that can tolerate partial data may use current state during `in_progress` with the understanding that it reflects state up to the migration cursor, not necessarily the latest ledger. Example query to check migration status: @@ -1151,7 +1593,9 @@ Example query to check migration status: query { protocols { id - migrationStatus + classificationStatus + historyMigrationStatus + currentStateMigrationStatus } } ``` \ No newline at end of file diff --git a/internal/data/mocks.go b/internal/data/mocks.go index e26cfbcfc..e25f4b0be 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -325,3 +325,29 @@ func (m *ProtocolWasmModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, wa args := m.Called(ctx, dbTx, wasms) return args.Error(0) } + +// ProtocolContractModelMock is a mock implementation of ProtocolContractModelInterface. +type ProtocolContractModelMock struct { + mock.Mock +} + +var _ ProtocolContractModelInterface = (*ProtocolContractModelMock)(nil) + +// NewProtocolContractModelMock creates a new instance of ProtocolContractModelMock. +func NewProtocolContractModelMock(t interface { + mock.TestingT + Cleanup(func()) +}, +) *ProtocolContractModelMock { + mockModel := &ProtocolContractModelMock{} + mockModel.Mock.Test(t) + + t.Cleanup(func() { mockModel.AssertExpectations(t) }) + + return mockModel +} + +func (m *ProtocolContractModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, contracts []ProtocolContract) error { + args := m.Called(ctx, dbTx, contracts) + return args.Error(0) +} diff --git a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql new file mode 100644 index 000000000..2ee01e961 --- /dev/null +++ b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql @@ -0,0 +1,13 @@ +-- +migrate Up +CREATE TABLE protocol_contracts ( + contract_id TEXT PRIMARY KEY, + wasm_hash TEXT NOT NULL REFERENCES protocol_wasms(wasm_hash), + protocol_id TEXT, + name TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX idx_protocol_contracts_wasm_hash ON protocol_contracts(wasm_hash); + +-- +migrate Down +DROP TABLE IF EXISTS protocol_contracts; diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index 065686996..d23048c7e 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -5,32 +5,41 @@ import ( "fmt" "github.com/jackc/pgx/v5" + "github.com/stellar/go-stellar-sdk/ingest" + "github.com/stellar/go-stellar-sdk/strkey" "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/wallet-backend/internal/data" ) -// WasmIngestionService tracks and persists WASM hashes during checkpoint population. +// WasmIngestionService tracks and persists WASM hashes and contract-to-WASM mappings during checkpoint population. type WasmIngestionService interface { ProcessContractCode(ctx context.Context, wasmHash xdr.Hash) error + ProcessContractData(ctx context.Context, change ingest.Change) error PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error + PersistProtocolContracts(ctx context.Context, dbTx pgx.Tx) error } var _ WasmIngestionService = (*wasmIngestionService)(nil) type wasmIngestionService struct { - protocolWasmModel data.ProtocolWasmModelInterface - wasmHashes map[xdr.Hash]struct{} + protocolWasmModel data.ProtocolWasmModelInterface + protocolContractModel data.ProtocolContractModelInterface + wasmHashes map[xdr.Hash]struct{} + contractIDsByWasmHash map[xdr.Hash][]string } // NewWasmIngestionService creates a WasmIngestionService. func NewWasmIngestionService( protocolWasmModel data.ProtocolWasmModelInterface, + protocolContractModel data.ProtocolContractModelInterface, ) *wasmIngestionService { return &wasmIngestionService{ - protocolWasmModel: protocolWasmModel, - wasmHashes: make(map[xdr.Hash]struct{}), + protocolWasmModel: protocolWasmModel, + protocolContractModel: protocolContractModel, + wasmHashes: make(map[xdr.Hash]struct{}), + contractIDsByWasmHash: make(map[xdr.Hash][]string), } } @@ -40,6 +49,37 @@ func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash return nil } +// ProcessContractData extracts contract-to-WASM-hash mappings from ContractData Instance entries. +func (s *wasmIngestionService) ProcessContractData(ctx context.Context, change ingest.Change) error { + contractDataEntry := change.Post.Data.MustContractData() + + // Only process Instance entries + if contractDataEntry.Key.Type != xdr.ScValTypeScvLedgerKeyContractInstance { + return nil + } + + // Extract contract address + contractAddress, ok := contractDataEntry.Contract.GetContractId() + if !ok { + return nil + } + + // Extract WASM hash from contract instance executable + contractInstance := contractDataEntry.Val.MustInstance() + if contractInstance.Executable.Type != xdr.ContractExecutableTypeContractExecutableWasm { + return nil + } + if contractInstance.Executable.WasmHash == nil { + return nil + } + + hash := *contractInstance.Executable.WasmHash + contractAddressStr := strkey.MustEncode(strkey.VersionByteContract, contractAddress[:]) + s.contractIDsByWasmHash[hash] = append(s.contractIDsByWasmHash[hash], contractAddressStr) + + return nil +} + // PersistProtocolWasms writes all accumulated WASM hashes to the protocol_wasms table. func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error { if len(s.wasmHashes) == 0 { @@ -61,3 +101,27 @@ func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pg log.Ctx(ctx).Infof("Persisted %d protocol WASM hashes", len(wasms)) return nil } + +// PersistProtocolContracts writes all accumulated contract-to-WASM mappings to the protocol_contracts table. +func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbTx pgx.Tx) error { + if len(s.contractIDsByWasmHash) == 0 { + return nil + } + + var contracts []data.ProtocolContract + for hash, contractIDs := range s.contractIDsByWasmHash { + for _, contractID := range contractIDs { + contracts = append(contracts, data.ProtocolContract{ + ContractID: contractID, + WasmHash: hash.HexString(), + }) + } + } + + if err := s.protocolContractModel.BatchInsert(ctx, dbTx, contracts); err != nil { + return fmt.Errorf("persisting protocol contracts: %w", err) + } + + log.Ctx(ctx).Infof("Persisted %d protocol contracts", len(contracts)) + return nil +} diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index 7d6a45b7f..10a9629fe 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -5,6 +5,8 @@ import ( "errors" "testing" + "github.com/stellar/go-stellar-sdk/ingest" + "github.com/stellar/go-stellar-sdk/strkey" "github.com/stellar/go-stellar-sdk/xdr" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -19,7 +21,8 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { t.Run("tracks_hash", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) @@ -30,7 +33,8 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { t.Run("duplicate_hash_deduplicated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) @@ -57,7 +61,8 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("no_hashes_skips_insert", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) err := svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) @@ -66,6 +71,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("single_hash_persisted", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) hash := xdr.Hash{10, 20, 30} protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, @@ -77,7 +83,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(protocolWasmModelMock) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) @@ -87,6 +93,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("multiple_hashes_persisted", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) hash1 := xdr.Hash{1} hash2 := xdr.Hash{2} @@ -103,7 +110,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(protocolWasmModelMock) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) require.NoError(t, svc.ProcessContractCode(ctx, hash1)) require.NoError(t, svc.ProcessContractCode(ctx, hash2)) @@ -113,13 +120,14 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("batch_insert_error_propagated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) hash := xdr.Hash{99} insertErr := errors.New("db connection lost") protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). Return(insertErr).Once() - svc := NewWasmIngestionService(protocolWasmModelMock) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) require.NoError(t, svc.ProcessContractCode(ctx, hash)) err := svc.PersistProtocolWasms(ctx, nil) @@ -128,3 +136,228 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { assert.ErrorIs(t, err, insertErr) }) } + +func TestWasmIngestionService_ProcessContractData(t *testing.T) { + ctx := context.Background() + + t.Run("non_instance_entry_skipped", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + // ContractData entry with a non-Instance key type (e.g., balance entry) + contractHash := [32]byte{1, 2, 3} + change := ingest.Change{ + Type: xdr.LedgerEntryTypeContractData, + Post: &xdr.LedgerEntry{ + Data: xdr.LedgerEntryData{ + Type: xdr.LedgerEntryTypeContractData, + ContractData: &xdr.ContractDataEntry{ + Contract: xdr.ScAddress{ + Type: xdr.ScAddressTypeScAddressTypeContract, + ContractId: (*xdr.ContractId)(&contractHash), + }, + Key: xdr.ScVal{Type: xdr.ScValTypeScvSymbol}, + Durability: xdr.ContractDataDurabilityPersistent, + }, + }, + }, + } + + err := svc.ProcessContractData(ctx, change) + require.NoError(t, err) + assert.Empty(t, svc.contractIDsByWasmHash, "non-instance entry should be skipped") + }) + + t.Run("instance_without_contract_id_skipped", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + // Instance entry with account address (not contract) — GetContractId returns false + change := ingest.Change{ + Type: xdr.LedgerEntryTypeContractData, + Post: &xdr.LedgerEntry{ + Data: xdr.LedgerEntryData{ + Type: xdr.LedgerEntryTypeContractData, + ContractData: &xdr.ContractDataEntry{ + Contract: xdr.ScAddress{ + Type: xdr.ScAddressTypeScAddressTypeAccount, + }, + Key: xdr.ScVal{Type: xdr.ScValTypeScvLedgerKeyContractInstance}, + Durability: xdr.ContractDataDurabilityPersistent, + }, + }, + }, + } + + err := svc.ProcessContractData(ctx, change) + require.NoError(t, err) + assert.Empty(t, svc.contractIDsByWasmHash, "entry without contract ID should be skipped") + }) + + t.Run("sac_contract_skipped", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + // SAC contract — executable type is StellarAsset, not WASM + contractHash := [32]byte{5, 6, 7} + change := ingest.Change{ + Type: xdr.LedgerEntryTypeContractData, + Post: &xdr.LedgerEntry{ + Data: xdr.LedgerEntryData{ + Type: xdr.LedgerEntryTypeContractData, + ContractData: &xdr.ContractDataEntry{ + Contract: xdr.ScAddress{ + Type: xdr.ScAddressTypeScAddressTypeContract, + ContractId: (*xdr.ContractId)(&contractHash), + }, + Key: xdr.ScVal{Type: xdr.ScValTypeScvLedgerKeyContractInstance}, + Durability: xdr.ContractDataDurabilityPersistent, + Val: xdr.ScVal{ + Type: xdr.ScValTypeScvContractInstance, + Instance: &xdr.ScContractInstance{ + Executable: xdr.ContractExecutable{ + Type: xdr.ContractExecutableTypeContractExecutableStellarAsset, + }, + }, + }, + }, + }, + }, + } + + err := svc.ProcessContractData(ctx, change) + require.NoError(t, err) + assert.Empty(t, svc.contractIDsByWasmHash, "SAC contract should be skipped") + }) + + t.Run("wasm_contract_tracked", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + contractHash := [32]byte{10, 20, 30} + wasmHash := xdr.Hash{40, 50, 60} + change := makeContractInstanceChange(contractHash, wasmHash) + + err := svc.ProcessContractData(ctx, change) + require.NoError(t, err) + + require.Contains(t, svc.contractIDsByWasmHash, wasmHash) + expectedAddr := strkey.MustEncode(strkey.VersionByteContract, contractHash[:]) + assert.Equal(t, []string{expectedAddr}, svc.contractIDsByWasmHash[wasmHash]) + }) + + t.Run("multiple_contracts_same_wasm_hash", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + wasmHash := xdr.Hash{1, 2, 3} + contractHash1 := [32]byte{10} + contractHash2 := [32]byte{20} + + change1 := makeContractInstanceChange(contractHash1, wasmHash) + change2 := makeContractInstanceChange(contractHash2, wasmHash) + + require.NoError(t, svc.ProcessContractData(ctx, change1)) + require.NoError(t, svc.ProcessContractData(ctx, change2)) + + require.Contains(t, svc.contractIDsByWasmHash, wasmHash) + assert.Len(t, svc.contractIDsByWasmHash[wasmHash], 2) + + addr1 := strkey.MustEncode(strkey.VersionByteContract, contractHash1[:]) + addr2 := strkey.MustEncode(strkey.VersionByteContract, contractHash2[:]) + assert.Contains(t, svc.contractIDsByWasmHash[wasmHash], addr1) + assert.Contains(t, svc.contractIDsByWasmHash[wasmHash], addr2) + }) +} + +func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { + ctx := context.Background() + + t.Run("empty_no_op", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + err := svc.PersistProtocolContracts(ctx, nil) + require.NoError(t, err) + protocolContractModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) + }) + + t.Run("single_contract", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + contractHash := [32]byte{10, 20, 30} + wasmHash := xdr.Hash{40, 50, 60} + change := makeContractInstanceChange(contractHash, wasmHash) + + require.NoError(t, svc.ProcessContractData(ctx, change)) + + expectedAddr := strkey.MustEncode(strkey.VersionByteContract, contractHash[:]) + protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(contracts []data.ProtocolContract) bool { + if len(contracts) != 1 { + return false + } + return contracts[0].ContractID == expectedAddr && + contracts[0].WasmHash == wasmHash.HexString() && + contracts[0].ProtocolID == nil && + contracts[0].Name == nil + }), + ).Return(nil).Once() + + err := svc.PersistProtocolContracts(ctx, nil) + require.NoError(t, err) + }) + + t.Run("multiple_contracts_across_hashes", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + wasmHash1 := xdr.Hash{1} + wasmHash2 := xdr.Hash{2} + contractHash1 := [32]byte{10} + contractHash2 := [32]byte{20} + contractHash3 := [32]byte{30} + + // Two contracts with wasmHash1, one with wasmHash2 + require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash1, wasmHash1))) + require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, wasmHash1))) + require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash3, wasmHash2))) + + protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(contracts []data.ProtocolContract) bool { + return len(contracts) == 3 + }), + ).Return(nil).Once() + + err := svc.PersistProtocolContracts(ctx, nil) + require.NoError(t, err) + }) + + t.Run("batch_insert_error_propagated", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + contractHash := [32]byte{10} + wasmHash := xdr.Hash{1} + require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash, wasmHash))) + + insertErr := errors.New("db connection lost") + protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). + Return(insertErr).Once() + + err := svc.PersistProtocolContracts(ctx, nil) + require.Error(t, err) + assert.ErrorContains(t, err, "persisting protocol contracts") + assert.ErrorIs(t, err, insertErr) + }) +} From 8ae0c618ad54df9fca8773c321e6696793d9d6f0 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 9 Mar 2026 16:08:45 -0600 Subject: [PATCH 14/52] Populate protocol_wasms and protocol_contracts during live ingestion and backfill Add two new LedgerChangeProcessors (ProtocolWasmProcessor, ProtocolContractProcessor) that extract WASM hashes and contract-to-WASM mappings from ledger changes during live ingestion, catchup, and historical backfill. Previously this data was only populated during checkpoint. - ProtocolWasmProcessor extracts hashes from ContractCode entries - ProtocolContractProcessor extracts contract-to-WASM mappings from ContractData Instance entries - Extended IndexerBuffer with protocolWasmsByHash/protocolContractsByID maps (Push/Get/Merge/Clear) - PersistLedgerData inserts wasms before contracts (FK ordering) with ON CONFLICT DO NOTHING - BatchChanges and processBatchChanges extended for backfill paths --- internal/services/ingest_live.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 984d43357..a6c871c4b 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -122,6 +122,28 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, } } + // 2.5: Persist protocol wasms and contracts + protocolWasms := buffer.GetProtocolWasms() + if len(protocolWasms) > 0 { + wasmSlice := make([]data.ProtocolWasm, 0, len(protocolWasms)) + for _, wasm := range protocolWasms { + wasmSlice = append(wasmSlice, wasm) + } + if txErr = m.models.ProtocolWasm.BatchInsert(ctx, dbTx, wasmSlice); txErr != nil { + return fmt.Errorf("inserting protocol wasms for ledger %d: %w", ledgerSeq, txErr) + } + } + protocolContracts := buffer.GetProtocolContracts() + if len(protocolContracts) > 0 { + contractSlice := make([]data.ProtocolContract, 0, len(protocolContracts)) + for _, contract := range protocolContracts { + contractSlice = append(contractSlice, contract) + } + if txErr = m.models.ProtocolContract.BatchInsert(ctx, dbTx, contractSlice); txErr != nil { + return fmt.Errorf("inserting protocol contracts for ledger %d: %w", ledgerSeq, txErr) + } + } + // 3. Insert transactions/operations/state_changes numTxs, numOps, txErr = m.insertIntoDB(ctx, dbTx, buffer) if txErr != nil { From a0df89e10cd06b4558a7ba0fa655189ac245ebca Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 9 Mar 2026 17:13:14 -0600 Subject: [PATCH 15/52] Fix FK violation when persisting protocol contracts with evicted WASMs ContractData Instance entries can outlive their referenced ContractCode entries due to independent TTLs, causing FK violations when inserting protocol_contracts during checkpoint population. - Skip contracts referencing unknown WASM hashes in PersistProtocolContracts - Add WHERE EXISTS guard in BatchInsert SQL for live/backfill path - Add test for contracts_with_missing_wasm_skipped scenario --- internal/services/wasm_ingestion.go | 9 +++++++ internal/services/wasm_ingestion_test.go | 33 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index d23048c7e..17de55107 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -103,13 +103,19 @@ func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pg } // PersistProtocolContracts writes all accumulated contract-to-WASM mappings to the protocol_contracts table. +// Contracts referencing WASM hashes not present in wasmHashes are skipped (e.g., expired/evicted WASMs). func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbTx pgx.Tx) error { if len(s.contractIDsByWasmHash) == 0 { return nil } var contracts []data.ProtocolContract + var skipped int for hash, contractIDs := range s.contractIDsByWasmHash { + if _, exists := s.wasmHashes[hash]; !exists { + skipped += len(contractIDs) + continue + } for _, contractID := range contractIDs { contracts = append(contracts, data.ProtocolContract{ ContractID: contractID, @@ -117,6 +123,9 @@ func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbT }) } } + if skipped > 0 { + log.Ctx(ctx).Infof("Skipped %d protocol contracts referencing missing WASM hashes (expired/evicted)", skipped) + } if err := s.protocolContractModel.BatchInsert(ctx, dbTx, contracts); err != nil { return fmt.Errorf("persisting protocol contracts: %w", err) diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index 10a9629fe..3f6bcab0c 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -297,6 +297,7 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { wasmHash := xdr.Hash{40, 50, 60} change := makeContractInstanceChange(contractHash, wasmHash) + require.NoError(t, svc.ProcessContractCode(ctx, wasmHash)) require.NoError(t, svc.ProcessContractData(ctx, change)) expectedAddr := strkey.MustEncode(strkey.VersionByteContract, contractHash[:]) @@ -327,6 +328,10 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { contractHash2 := [32]byte{20} contractHash3 := [32]byte{30} + // Register WASM hashes first + require.NoError(t, svc.ProcessContractCode(ctx, wasmHash1)) + require.NoError(t, svc.ProcessContractCode(ctx, wasmHash2)) + // Two contracts with wasmHash1, one with wasmHash2 require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash1, wasmHash1))) require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, wasmHash1))) @@ -342,6 +347,33 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { require.NoError(t, err) }) + t.Run("contracts_with_missing_wasm_skipped", func(t *testing.T) { + protocolWasmModelMock := data.NewProtocolWasmModelMock(t) + protocolContractModelMock := data.NewProtocolContractModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + + knownWasm := xdr.Hash{1} + unknownWasm := xdr.Hash{2} + contractHash1 := [32]byte{10} + contractHash2 := [32]byte{20} + + // Only register one WASM hash + require.NoError(t, svc.ProcessContractCode(ctx, knownWasm)) + + // Add contracts — one with known WASM, one with unknown + require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash1, knownWasm))) + require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, unknownWasm))) + + protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(contracts []data.ProtocolContract) bool { + return len(contracts) == 1 && contracts[0].WasmHash == knownWasm.HexString() + }), + ).Return(nil).Once() + + err := svc.PersistProtocolContracts(ctx, nil) + require.NoError(t, err) + }) + t.Run("batch_insert_error_propagated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) protocolContractModelMock := data.NewProtocolContractModelMock(t) @@ -349,6 +381,7 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { contractHash := [32]byte{10} wasmHash := xdr.Hash{1} + require.NoError(t, svc.ProcessContractCode(ctx, wasmHash)) require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash, wasmHash))) insertErr := errors.New("db connection lost") From 1e24f98843e9942e9895f4d034c1b5737da635a1 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Tue, 10 Mar 2026 15:29:48 -0600 Subject: [PATCH 16/52] renames known_wasms to protocol_wasms for missing references in design doc --- docs/feature-design/data-migrations.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index 7713cf4bc..a4745b640 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -583,11 +583,11 @@ Backfill migrations rely on checkpoint population being complete before they can 1. **Runs protocol migrations** - Executes SQL migrations from `internal/data/migrations/protocols/` to register new protocols in the `protocols` table with status `not_started` 2. **Sets status** to `classification_in_progress` for specified protocols -3. **Queries existing unclassified entries** from `known_wasms WHERE protocol_id IS NULL` +3. **Queries existing unclassified entries** from `protocol_wasms WHERE protocol_id IS NULL` 4. **Gets bytecode** from all unknown contracts using RPC 5. **Validates each WASM** against all specified protocols' validators 6. **Populates tables**: - - `known_wasms`: Maps WASM hashes to protocol IDs + - `protocol_wasms`: Maps WASM hashes to protocol IDs - `protocol_contracts`: Maps contract IDs to protocols 7. **Updates status** to `classification_success` for all processed protocols From cfb4f1937abd9cc455b39c3a2ca446293118f4d2 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Tue, 10 Mar 2026 15:54:49 -0600 Subject: [PATCH 17/52] Change protocol_wasms and protocol_contracts columns from TEXT to BYTEA Store wasm_hash and contract_id as raw bytes instead of hex/strkey-encoded strings. Both values originate as [32]byte arrays in XDR, so BYTEA reduces storage by ~50%, improves index performance on fixed-size keys, and removes unnecessary encoding/decoding at the persistence boundary. --- .../2026-02-20.0-protocol_wasms.sql | 2 +- .../2026-03-09.0-protocol_contracts.sql | 4 +-- internal/services/wasm_ingestion.go | 12 +++---- internal/services/wasm_ingestion_test.go | 36 +++++++++++-------- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/internal/db/migrations/2026-02-20.0-protocol_wasms.sql b/internal/db/migrations/2026-02-20.0-protocol_wasms.sql index d63e2f0c2..a9b1af269 100644 --- a/internal/db/migrations/2026-02-20.0-protocol_wasms.sql +++ b/internal/db/migrations/2026-02-20.0-protocol_wasms.sql @@ -1,6 +1,6 @@ -- +migrate Up CREATE TABLE protocol_wasms ( - wasm_hash TEXT PRIMARY KEY, + wasm_hash BYTEA PRIMARY KEY, protocol_id TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); diff --git a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql index 2ee01e961..b08672492 100644 --- a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql +++ b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql @@ -1,7 +1,7 @@ -- +migrate Up CREATE TABLE protocol_contracts ( - contract_id TEXT PRIMARY KEY, - wasm_hash TEXT NOT NULL REFERENCES protocol_wasms(wasm_hash), + contract_id BYTEA PRIMARY KEY, + wasm_hash BYTEA NOT NULL REFERENCES protocol_wasms(wasm_hash), protocol_id TEXT, name TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index 17de55107..e6fc10ab5 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -6,7 +6,6 @@ import ( "github.com/jackc/pgx/v5" "github.com/stellar/go-stellar-sdk/ingest" - "github.com/stellar/go-stellar-sdk/strkey" "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/xdr" @@ -27,7 +26,7 @@ type wasmIngestionService struct { protocolWasmModel data.ProtocolWasmModelInterface protocolContractModel data.ProtocolContractModelInterface wasmHashes map[xdr.Hash]struct{} - contractIDsByWasmHash map[xdr.Hash][]string + contractIDsByWasmHash map[xdr.Hash][][]byte } // NewWasmIngestionService creates a WasmIngestionService. @@ -39,7 +38,7 @@ func NewWasmIngestionService( protocolWasmModel: protocolWasmModel, protocolContractModel: protocolContractModel, wasmHashes: make(map[xdr.Hash]struct{}), - contractIDsByWasmHash: make(map[xdr.Hash][]string), + contractIDsByWasmHash: make(map[xdr.Hash][][]byte), } } @@ -74,8 +73,7 @@ func (s *wasmIngestionService) ProcessContractData(ctx context.Context, change i } hash := *contractInstance.Executable.WasmHash - contractAddressStr := strkey.MustEncode(strkey.VersionByteContract, contractAddress[:]) - s.contractIDsByWasmHash[hash] = append(s.contractIDsByWasmHash[hash], contractAddressStr) + s.contractIDsByWasmHash[hash] = append(s.contractIDsByWasmHash[hash], contractAddress[:]) return nil } @@ -89,7 +87,7 @@ func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pg wasms := make([]data.ProtocolWasm, 0, len(s.wasmHashes)) for hash := range s.wasmHashes { wasms = append(wasms, data.ProtocolWasm{ - WasmHash: hash.HexString(), + WasmHash: hash[:], ProtocolID: nil, }) } @@ -119,7 +117,7 @@ func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbT for _, contractID := range contractIDs { contracts = append(contracts, data.ProtocolContract{ ContractID: contractID, - WasmHash: hash.HexString(), + WasmHash: hash[:], }) } } diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index 3f6bcab0c..a177719b8 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -1,12 +1,12 @@ package services import ( + "bytes" "context" "errors" "testing" "github.com/stellar/go-stellar-sdk/ingest" - "github.com/stellar/go-stellar-sdk/strkey" "github.com/stellar/go-stellar-sdk/xdr" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -79,7 +79,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { if len(wasms) != 1 { return false } - return wasms[0].WasmHash == hash.HexString() && wasms[0].ProtocolID == nil + return bytes.Equal(wasms[0].WasmHash, hash[:]) && wasms[0].ProtocolID == nil }), ).Return(nil).Once() @@ -102,11 +102,11 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { if len(wasms) != 2 { return false } - hashes := make(map[string]bool) + found := make(map[string]bool) for _, w := range wasms { - hashes[w.WasmHash] = true + found[string(w.WasmHash)] = true } - return hashes[hash1.HexString()] && hashes[hash2.HexString()] + return found[string(hash1[:])] && found[string(hash2[:])] }), ).Return(nil).Once() @@ -246,8 +246,7 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { require.NoError(t, err) require.Contains(t, svc.contractIDsByWasmHash, wasmHash) - expectedAddr := strkey.MustEncode(strkey.VersionByteContract, contractHash[:]) - assert.Equal(t, []string{expectedAddr}, svc.contractIDsByWasmHash[wasmHash]) + assert.Equal(t, [][]byte{contractHash[:]}, svc.contractIDsByWasmHash[wasmHash]) }) t.Run("multiple_contracts_same_wasm_hash", func(t *testing.T) { @@ -268,10 +267,18 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { require.Contains(t, svc.contractIDsByWasmHash, wasmHash) assert.Len(t, svc.contractIDsByWasmHash[wasmHash], 2) - addr1 := strkey.MustEncode(strkey.VersionByteContract, contractHash1[:]) - addr2 := strkey.MustEncode(strkey.VersionByteContract, contractHash2[:]) - assert.Contains(t, svc.contractIDsByWasmHash[wasmHash], addr1) - assert.Contains(t, svc.contractIDsByWasmHash[wasmHash], addr2) + // Check that both contract hashes are present as raw bytes + var foundAddr1, foundAddr2 bool + for _, id := range svc.contractIDsByWasmHash[wasmHash] { + if bytes.Equal(id, contractHash1[:]) { + foundAddr1 = true + } + if bytes.Equal(id, contractHash2[:]) { + foundAddr2 = true + } + } + assert.True(t, foundAddr1, "contractHash1 should be tracked") + assert.True(t, foundAddr2, "contractHash2 should be tracked") }) } @@ -300,14 +307,13 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { require.NoError(t, svc.ProcessContractCode(ctx, wasmHash)) require.NoError(t, svc.ProcessContractData(ctx, change)) - expectedAddr := strkey.MustEncode(strkey.VersionByteContract, contractHash[:]) protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.MatchedBy(func(contracts []data.ProtocolContract) bool { if len(contracts) != 1 { return false } - return contracts[0].ContractID == expectedAddr && - contracts[0].WasmHash == wasmHash.HexString() && + return bytes.Equal(contracts[0].ContractID, contractHash[:]) && + bytes.Equal(contracts[0].WasmHash, wasmHash[:]) && contracts[0].ProtocolID == nil && contracts[0].Name == nil }), @@ -366,7 +372,7 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.MatchedBy(func(contracts []data.ProtocolContract) bool { - return len(contracts) == 1 && contracts[0].WasmHash == knownWasm.HexString() + return len(contracts) == 1 && bytes.Equal(contracts[0].WasmHash, knownWasm[:]) }), ).Return(nil).Once() From 55d3349fea2a9cec75468e4716dd4350faeaeec8 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Tue, 10 Mar 2026 16:14:02 -0600 Subject: [PATCH 18/52] Remove redundant protocol_id column from protocol_contracts table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protocol_id on protocol_contracts was always NULL and never queried. It's derivable via the existing FK join: protocol_contracts.wasm_hash → protocol_wasms.wasm_hash → protocol_wasms.protocol_id. --- internal/db/migrations/2026-03-09.0-protocol_contracts.sql | 1 - internal/services/wasm_ingestion_test.go | 1 - 2 files changed, 2 deletions(-) diff --git a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql index b08672492..a3d055e38 100644 --- a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql +++ b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql @@ -2,7 +2,6 @@ CREATE TABLE protocol_contracts ( contract_id BYTEA PRIMARY KEY, wasm_hash BYTEA NOT NULL REFERENCES protocol_wasms(wasm_hash), - protocol_id TEXT, name TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index a177719b8..5bd9ebb29 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -314,7 +314,6 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { } return bytes.Equal(contracts[0].ContractID, contractHash[:]) && bytes.Equal(contracts[0].WasmHash, wasmHash[:]) && - contracts[0].ProtocolID == nil && contracts[0].Name == nil }), ).Return(nil).Once() From e1ead2a932e4b75207d50b556a4ba5224e290c98 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 11 Mar 2026 09:57:26 -0600 Subject: [PATCH 19/52] Use HashBytea type for protocol wasm/contract bytea fields Replace raw []byte with types.HashBytea for WasmHash and ContractID fields in ProtocolWasm and ProtocolContract models. HashBytea implements sql.Scanner and driver.Valuer to auto-convert between raw bytes (DB) and hex strings (Go), consistent with how Transaction.Hash is handled. Updated files: - internal/data/protocol_wasms.go, protocol_contracts.go (models + BatchInsert) - internal/indexer/processors/protocol_wasms.go, protocol_contracts.go - internal/services/wasm_ingestion.go - All corresponding test files --- internal/services/wasm_ingestion.go | 12 +++++++----- internal/services/wasm_ingestion_test.go | 24 ++++++++++++++---------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index e6fc10ab5..fbbc450a4 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -2,6 +2,7 @@ package services import ( "context" + "encoding/hex" "fmt" "github.com/jackc/pgx/v5" @@ -10,6 +11,7 @@ import ( "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/wallet-backend/internal/data" + "github.com/stellar/wallet-backend/internal/indexer/types" ) // WasmIngestionService tracks and persists WASM hashes and contract-to-WASM mappings during checkpoint population. @@ -26,7 +28,7 @@ type wasmIngestionService struct { protocolWasmModel data.ProtocolWasmModelInterface protocolContractModel data.ProtocolContractModelInterface wasmHashes map[xdr.Hash]struct{} - contractIDsByWasmHash map[xdr.Hash][][]byte + contractIDsByWasmHash map[xdr.Hash][]types.HashBytea } // NewWasmIngestionService creates a WasmIngestionService. @@ -38,7 +40,7 @@ func NewWasmIngestionService( protocolWasmModel: protocolWasmModel, protocolContractModel: protocolContractModel, wasmHashes: make(map[xdr.Hash]struct{}), - contractIDsByWasmHash: make(map[xdr.Hash][][]byte), + contractIDsByWasmHash: make(map[xdr.Hash][]types.HashBytea), } } @@ -73,7 +75,7 @@ func (s *wasmIngestionService) ProcessContractData(ctx context.Context, change i } hash := *contractInstance.Executable.WasmHash - s.contractIDsByWasmHash[hash] = append(s.contractIDsByWasmHash[hash], contractAddress[:]) + s.contractIDsByWasmHash[hash] = append(s.contractIDsByWasmHash[hash], types.HashBytea(hex.EncodeToString(contractAddress[:]))) return nil } @@ -87,7 +89,7 @@ func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pg wasms := make([]data.ProtocolWasm, 0, len(s.wasmHashes)) for hash := range s.wasmHashes { wasms = append(wasms, data.ProtocolWasm{ - WasmHash: hash[:], + WasmHash: types.HashBytea(hex.EncodeToString(hash[:])), ProtocolID: nil, }) } @@ -117,7 +119,7 @@ func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbT for _, contractID := range contractIDs { contracts = append(contracts, data.ProtocolContract{ ContractID: contractID, - WasmHash: hash[:], + WasmHash: types.HashBytea(hex.EncodeToString(hash[:])), }) } } diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index 5bd9ebb29..df1a56a84 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -1,8 +1,8 @@ package services import ( - "bytes" "context" + "encoding/hex" "errors" "testing" @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stellar/wallet-backend/internal/data" + "github.com/stellar/wallet-backend/internal/indexer/types" ) func TestWasmIngestionService_ProcessContractCode(t *testing.T) { @@ -79,7 +80,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { if len(wasms) != 1 { return false } - return bytes.Equal(wasms[0].WasmHash, hash[:]) && wasms[0].ProtocolID == nil + return wasms[0].WasmHash == types.HashBytea(hex.EncodeToString(hash[:])) && wasms[0].ProtocolID == nil }), ).Return(nil).Once() @@ -106,7 +107,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { for _, w := range wasms { found[string(w.WasmHash)] = true } - return found[string(hash1[:])] && found[string(hash2[:])] + return found[hex.EncodeToString(hash1[:])] && found[hex.EncodeToString(hash2[:])] }), ).Return(nil).Once() @@ -246,7 +247,8 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { require.NoError(t, err) require.Contains(t, svc.contractIDsByWasmHash, wasmHash) - assert.Equal(t, [][]byte{contractHash[:]}, svc.contractIDsByWasmHash[wasmHash]) + expectedContractID := types.HashBytea(hex.EncodeToString(contractHash[:])) + assert.Equal(t, []types.HashBytea{expectedContractID}, svc.contractIDsByWasmHash[wasmHash]) }) t.Run("multiple_contracts_same_wasm_hash", func(t *testing.T) { @@ -267,13 +269,15 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { require.Contains(t, svc.contractIDsByWasmHash, wasmHash) assert.Len(t, svc.contractIDsByWasmHash[wasmHash], 2) - // Check that both contract hashes are present as raw bytes + // Check that both contract hashes are present as hex strings + expectedID1 := types.HashBytea(hex.EncodeToString(contractHash1[:])) + expectedID2 := types.HashBytea(hex.EncodeToString(contractHash2[:])) var foundAddr1, foundAddr2 bool for _, id := range svc.contractIDsByWasmHash[wasmHash] { - if bytes.Equal(id, contractHash1[:]) { + if id == expectedID1 { foundAddr1 = true } - if bytes.Equal(id, contractHash2[:]) { + if id == expectedID2 { foundAddr2 = true } } @@ -312,8 +316,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { if len(contracts) != 1 { return false } - return bytes.Equal(contracts[0].ContractID, contractHash[:]) && - bytes.Equal(contracts[0].WasmHash, wasmHash[:]) && + return contracts[0].ContractID == types.HashBytea(hex.EncodeToString(contractHash[:])) && + contracts[0].WasmHash == types.HashBytea(hex.EncodeToString(wasmHash[:])) && contracts[0].Name == nil }), ).Return(nil).Once() @@ -371,7 +375,7 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.MatchedBy(func(contracts []data.ProtocolContract) bool { - return len(contracts) == 1 && bytes.Equal(contracts[0].WasmHash, knownWasm[:]) + return len(contracts) == 1 && contracts[0].WasmHash == types.HashBytea(hex.EncodeToString(knownWasm[:])) }), ).Return(nil).Once() From 11ad8e55054abd16c05cf322600b37e7e28fa7c2 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 11 Mar 2026 13:13:40 -0600 Subject: [PATCH 20/52] replaces remaining known_wasms references in diagrams with protocol_wasms --- docs/feature-design/data-migrations.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/feature-design/data-migrations.md b/docs/feature-design/data-migrations.md index a4745b640..66f47359e 100644 --- a/docs/feature-design/data-migrations.md +++ b/docs/feature-design/data-migrations.md @@ -244,8 +244,8 @@ This has to happen in 2 stages during the migration process: ┌────────┐ ┌──────────┐ ▼ │Store │ │Store │ ┌──────────────────┐ │hash in │ │hash in │ │ Map contract ID │ - │known_ │ │known_ │ │ to WASM hash │ - │wasms │ │wasms │ │ (for later lookup│ + │protocol│ │protocol │ │ to WASM hash │ + │_wasms │ │_wasms │ │ (for later lookup│ │with │ │with NULL │ │ in protocol_wasms) │ │protocol│ │protocol │ └──────────────────┘ └────────┘ └──────────┘ @@ -307,8 +307,8 @@ During live ingestion, classification happens in two parts: (1) new WASM uploads ▼ ▼ ▼ ▼ ┌──────────┐ ┌──────────┐ ┌──────────────┐ ┌──────────────────┐ │Store in │ │Store in │ │ Map contract │ │ Fetch WASM via │ - │known_ │ │known_ │ │ to protocol │ │ RPC, validate, │ - │wasms with│ │wasms with│ │ from cached │ │ then map contract│ + │protocol │ │protocol │ │ to protocol │ │ RPC, validate, │ + │_wasms w/ │ │_wasms w/ │ │ from cached │ │ then map contract│ │protocol │ │NULL │ │ classification │ (rare edge case) │ └──────────┘ └──────────┘ └──────────────┘ └──────────────────┘ │ │ From 2ad8fcfc4e85301f73e255d9e06b73ec1ec89eb6 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 12 Mar 2026 08:31:12 -0600 Subject: [PATCH 21/52] Rename ProtocolContract to ProtocolContracts to match table name convention --- internal/data/mocks.go | 16 ++--- internal/services/ingest_live.go | 4 +- internal/services/wasm_ingestion.go | 12 ++-- internal/services/wasm_ingestion_test.go | 80 ++++++++++++------------ 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/internal/data/mocks.go b/internal/data/mocks.go index e25f4b0be..62592e7bc 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -326,20 +326,20 @@ func (m *ProtocolWasmModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, wa return args.Error(0) } -// ProtocolContractModelMock is a mock implementation of ProtocolContractModelInterface. -type ProtocolContractModelMock struct { +// ProtocolContractsModelMock is a mock implementation of ProtocolContractsModelInterface. +type ProtocolContractsModelMock struct { mock.Mock } -var _ ProtocolContractModelInterface = (*ProtocolContractModelMock)(nil) +var _ ProtocolContractsModelInterface = (*ProtocolContractsModelMock)(nil) -// NewProtocolContractModelMock creates a new instance of ProtocolContractModelMock. -func NewProtocolContractModelMock(t interface { +// NewProtocolContractsModelMock creates a new instance of ProtocolContractsModelMock. +func NewProtocolContractsModelMock(t interface { mock.TestingT Cleanup(func()) }, -) *ProtocolContractModelMock { - mockModel := &ProtocolContractModelMock{} +) *ProtocolContractsModelMock { + mockModel := &ProtocolContractsModelMock{} mockModel.Mock.Test(t) t.Cleanup(func() { mockModel.AssertExpectations(t) }) @@ -347,7 +347,7 @@ func NewProtocolContractModelMock(t interface { return mockModel } -func (m *ProtocolContractModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, contracts []ProtocolContract) error { +func (m *ProtocolContractsModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, contracts []ProtocolContracts) error { args := m.Called(ctx, dbTx, contracts) return args.Error(0) } diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index a6c871c4b..3a60ebb7b 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -135,11 +135,11 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, } protocolContracts := buffer.GetProtocolContracts() if len(protocolContracts) > 0 { - contractSlice := make([]data.ProtocolContract, 0, len(protocolContracts)) + contractSlice := make([]data.ProtocolContracts, 0, len(protocolContracts)) for _, contract := range protocolContracts { contractSlice = append(contractSlice, contract) } - if txErr = m.models.ProtocolContract.BatchInsert(ctx, dbTx, contractSlice); txErr != nil { + if txErr = m.models.ProtocolContracts.BatchInsert(ctx, dbTx, contractSlice); txErr != nil { return fmt.Errorf("inserting protocol contracts for ledger %d: %w", ledgerSeq, txErr) } } diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index fbbc450a4..ae8c7b68a 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -26,7 +26,7 @@ var _ WasmIngestionService = (*wasmIngestionService)(nil) type wasmIngestionService struct { protocolWasmModel data.ProtocolWasmModelInterface - protocolContractModel data.ProtocolContractModelInterface + protocolContractsModel data.ProtocolContractsModelInterface wasmHashes map[xdr.Hash]struct{} contractIDsByWasmHash map[xdr.Hash][]types.HashBytea } @@ -34,11 +34,11 @@ type wasmIngestionService struct { // NewWasmIngestionService creates a WasmIngestionService. func NewWasmIngestionService( protocolWasmModel data.ProtocolWasmModelInterface, - protocolContractModel data.ProtocolContractModelInterface, + protocolContractsModel data.ProtocolContractsModelInterface, ) *wasmIngestionService { return &wasmIngestionService{ protocolWasmModel: protocolWasmModel, - protocolContractModel: protocolContractModel, + protocolContractsModel: protocolContractsModel, wasmHashes: make(map[xdr.Hash]struct{}), contractIDsByWasmHash: make(map[xdr.Hash][]types.HashBytea), } @@ -109,7 +109,7 @@ func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbT return nil } - var contracts []data.ProtocolContract + var contracts []data.ProtocolContracts var skipped int for hash, contractIDs := range s.contractIDsByWasmHash { if _, exists := s.wasmHashes[hash]; !exists { @@ -117,7 +117,7 @@ func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbT continue } for _, contractID := range contractIDs { - contracts = append(contracts, data.ProtocolContract{ + contracts = append(contracts, data.ProtocolContracts{ ContractID: contractID, WasmHash: types.HashBytea(hex.EncodeToString(hash[:])), }) @@ -127,7 +127,7 @@ func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbT log.Ctx(ctx).Infof("Skipped %d protocol contracts referencing missing WASM hashes (expired/evicted)", skipped) } - if err := s.protocolContractModel.BatchInsert(ctx, dbTx, contracts); err != nil { + if err := s.protocolContractsModel.BatchInsert(ctx, dbTx, contracts); err != nil { return fmt.Errorf("persisting protocol contracts: %w", err) } diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go index df1a56a84..a4657e3b7 100644 --- a/internal/services/wasm_ingestion_test.go +++ b/internal/services/wasm_ingestion_test.go @@ -22,8 +22,8 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { t.Run("tracks_hash", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) @@ -34,8 +34,8 @@ func TestWasmIngestionService_ProcessContractCode(t *testing.T) { t.Run("duplicate_hash_deduplicated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) @@ -62,8 +62,8 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("no_hashes_skips_insert", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) err := svc.PersistProtocolWasms(ctx, nil) require.NoError(t, err) @@ -72,7 +72,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("single_hash_persisted", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) hash := xdr.Hash{10, 20, 30} protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, @@ -84,7 +84,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) err := svc.ProcessContractCode(ctx, hash) require.NoError(t, err) @@ -94,7 +94,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("multiple_hashes_persisted", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) hash1 := xdr.Hash{1} hash2 := xdr.Hash{2} @@ -111,7 +111,7 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { }), ).Return(nil).Once() - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) require.NoError(t, svc.ProcessContractCode(ctx, hash1)) require.NoError(t, svc.ProcessContractCode(ctx, hash2)) @@ -121,14 +121,14 @@ func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { t.Run("batch_insert_error_propagated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) hash := xdr.Hash{99} insertErr := errors.New("db connection lost") protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). Return(insertErr).Once() - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) require.NoError(t, svc.ProcessContractCode(ctx, hash)) err := svc.PersistProtocolWasms(ctx, nil) @@ -143,8 +143,8 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { t.Run("non_instance_entry_skipped", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) // ContractData entry with a non-Instance key type (e.g., balance entry) contractHash := [32]byte{1, 2, 3} @@ -172,8 +172,8 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { t.Run("instance_without_contract_id_skipped", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) // Instance entry with account address (not contract) — GetContractId returns false change := ingest.Change{ @@ -199,8 +199,8 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { t.Run("sac_contract_skipped", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) // SAC contract — executable type is StellarAsset, not WASM contractHash := [32]byte{5, 6, 7} @@ -236,8 +236,8 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { t.Run("wasm_contract_tracked", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) contractHash := [32]byte{10, 20, 30} wasmHash := xdr.Hash{40, 50, 60} @@ -253,8 +253,8 @@ func TestWasmIngestionService_ProcessContractData(t *testing.T) { t.Run("multiple_contracts_same_wasm_hash", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) wasmHash := xdr.Hash{1, 2, 3} contractHash1 := [32]byte{10} @@ -291,18 +291,18 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { t.Run("empty_no_op", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) err := svc.PersistProtocolContracts(ctx, nil) require.NoError(t, err) - protocolContractModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) + protocolContractsModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) }) t.Run("single_contract", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) contractHash := [32]byte{10, 20, 30} wasmHash := xdr.Hash{40, 50, 60} @@ -311,8 +311,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { require.NoError(t, svc.ProcessContractCode(ctx, wasmHash)) require.NoError(t, svc.ProcessContractData(ctx, change)) - protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(contracts []data.ProtocolContract) bool { + protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(contracts []data.ProtocolContracts) bool { if len(contracts) != 1 { return false } @@ -328,8 +328,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { t.Run("multiple_contracts_across_hashes", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) wasmHash1 := xdr.Hash{1} wasmHash2 := xdr.Hash{2} @@ -346,8 +346,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, wasmHash1))) require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash3, wasmHash2))) - protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(contracts []data.ProtocolContract) bool { + protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(contracts []data.ProtocolContracts) bool { return len(contracts) == 3 }), ).Return(nil).Once() @@ -358,8 +358,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { t.Run("contracts_with_missing_wasm_skipped", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) knownWasm := xdr.Hash{1} unknownWasm := xdr.Hash{2} @@ -373,8 +373,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash1, knownWasm))) require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, unknownWasm))) - protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(contracts []data.ProtocolContract) bool { + protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, + mock.MatchedBy(func(contracts []data.ProtocolContracts) bool { return len(contracts) == 1 && contracts[0].WasmHash == types.HashBytea(hex.EncodeToString(knownWasm[:])) }), ).Return(nil).Once() @@ -385,8 +385,8 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { t.Run("batch_insert_error_propagated", func(t *testing.T) { protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractModelMock := data.NewProtocolContractModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractModelMock) + protocolContractsModelMock := data.NewProtocolContractsModelMock(t) + svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) contractHash := [32]byte{10} wasmHash := xdr.Hash{1} @@ -394,7 +394,7 @@ func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash, wasmHash))) insertErr := errors.New("db connection lost") - protocolContractModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). + protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). Return(insertErr).Once() err := svc.PersistProtocolContracts(ctx, nil) From df342c81a6952a6152157a3a0b76508dcde06109 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 12 Mar 2026 09:12:12 -0600 Subject: [PATCH 22/52] runs fmt and tidy to abide by lint rules --- internal/services/wasm_ingestion.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go index ae8c7b68a..5e9fef9bd 100644 --- a/internal/services/wasm_ingestion.go +++ b/internal/services/wasm_ingestion.go @@ -25,10 +25,10 @@ type WasmIngestionService interface { var _ WasmIngestionService = (*wasmIngestionService)(nil) type wasmIngestionService struct { - protocolWasmModel data.ProtocolWasmModelInterface + protocolWasmModel data.ProtocolWasmModelInterface protocolContractsModel data.ProtocolContractsModelInterface - wasmHashes map[xdr.Hash]struct{} - contractIDsByWasmHash map[xdr.Hash][]types.HashBytea + wasmHashes map[xdr.Hash]struct{} + contractIDsByWasmHash map[xdr.Hash][]types.HashBytea } // NewWasmIngestionService creates a WasmIngestionService. @@ -37,10 +37,10 @@ func NewWasmIngestionService( protocolContractsModel data.ProtocolContractsModelInterface, ) *wasmIngestionService { return &wasmIngestionService{ - protocolWasmModel: protocolWasmModel, + protocolWasmModel: protocolWasmModel, protocolContractsModel: protocolContractsModel, - wasmHashes: make(map[xdr.Hash]struct{}), - contractIDsByWasmHash: make(map[xdr.Hash][]types.HashBytea), + wasmHashes: make(map[xdr.Hash]struct{}), + contractIDsByWasmHash: make(map[xdr.Hash][]types.HashBytea), } } From d56dc399dc1434a6a80a358e6858974b2dd6461c Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 13 Mar 2026 10:38:56 -0600 Subject: [PATCH 23/52] Consolidate WasmIngestionService and checkpoint token logic into CheckpointService WasmIngestionService was only used by CheckpointService, and TokenIngestionService's NewTokenProcessor/TokenProcessor interface was only used by CheckpointService. This inlines all checkpoint-specific logic directly into CheckpointService, eliminating unnecessary intermediate service abstractions. - Rewrite checkpoint.go to absorb all checkpoint logic: checkpointData, batch, trustline/contract/WASM processing, and protocol persistence - Replace positional NewCheckpointService args with CheckpointServiceConfig - Strip token_ingestion.go to live-only (ProcessTokenChanges); remove TokenProcessor interface, NewTokenProcessor, and checkpoint-only fields from TokenIngestionServiceConfig - Delete wasm_ingestion.go (absorbed into checkpoint.go) - Remove WasmIngestionServiceMock, TokenProcessorMock from mocks.go - Update ingest.go wiring and simplify TokenIngestionServiceConfig - Rewrite checkpoint_test.go with data model mocks; port WASM and checkpoint processor tests from deleted test files - Add TrustlineAssetModelMock to data/mocks.go --- internal/services/wasm_ingestion.go | 136 -------- internal/services/wasm_ingestion_test.go | 405 ----------------------- 2 files changed, 541 deletions(-) delete mode 100644 internal/services/wasm_ingestion.go delete mode 100644 internal/services/wasm_ingestion_test.go diff --git a/internal/services/wasm_ingestion.go b/internal/services/wasm_ingestion.go deleted file mode 100644 index 5e9fef9bd..000000000 --- a/internal/services/wasm_ingestion.go +++ /dev/null @@ -1,136 +0,0 @@ -package services - -import ( - "context" - "encoding/hex" - "fmt" - - "github.com/jackc/pgx/v5" - "github.com/stellar/go-stellar-sdk/ingest" - "github.com/stellar/go-stellar-sdk/support/log" - "github.com/stellar/go-stellar-sdk/xdr" - - "github.com/stellar/wallet-backend/internal/data" - "github.com/stellar/wallet-backend/internal/indexer/types" -) - -// WasmIngestionService tracks and persists WASM hashes and contract-to-WASM mappings during checkpoint population. -type WasmIngestionService interface { - ProcessContractCode(ctx context.Context, wasmHash xdr.Hash) error - ProcessContractData(ctx context.Context, change ingest.Change) error - PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error - PersistProtocolContracts(ctx context.Context, dbTx pgx.Tx) error -} - -var _ WasmIngestionService = (*wasmIngestionService)(nil) - -type wasmIngestionService struct { - protocolWasmModel data.ProtocolWasmModelInterface - protocolContractsModel data.ProtocolContractsModelInterface - wasmHashes map[xdr.Hash]struct{} - contractIDsByWasmHash map[xdr.Hash][]types.HashBytea -} - -// NewWasmIngestionService creates a WasmIngestionService. -func NewWasmIngestionService( - protocolWasmModel data.ProtocolWasmModelInterface, - protocolContractsModel data.ProtocolContractsModelInterface, -) *wasmIngestionService { - return &wasmIngestionService{ - protocolWasmModel: protocolWasmModel, - protocolContractsModel: protocolContractsModel, - wasmHashes: make(map[xdr.Hash]struct{}), - contractIDsByWasmHash: make(map[xdr.Hash][]types.HashBytea), - } -} - -// ProcessContractCode tracks the WASM hash for later persistence. -func (s *wasmIngestionService) ProcessContractCode(ctx context.Context, wasmHash xdr.Hash) error { - s.wasmHashes[wasmHash] = struct{}{} - return nil -} - -// ProcessContractData extracts contract-to-WASM-hash mappings from ContractData Instance entries. -func (s *wasmIngestionService) ProcessContractData(ctx context.Context, change ingest.Change) error { - contractDataEntry := change.Post.Data.MustContractData() - - // Only process Instance entries - if contractDataEntry.Key.Type != xdr.ScValTypeScvLedgerKeyContractInstance { - return nil - } - - // Extract contract address - contractAddress, ok := contractDataEntry.Contract.GetContractId() - if !ok { - return nil - } - - // Extract WASM hash from contract instance executable - contractInstance := contractDataEntry.Val.MustInstance() - if contractInstance.Executable.Type != xdr.ContractExecutableTypeContractExecutableWasm { - return nil - } - if contractInstance.Executable.WasmHash == nil { - return nil - } - - hash := *contractInstance.Executable.WasmHash - s.contractIDsByWasmHash[hash] = append(s.contractIDsByWasmHash[hash], types.HashBytea(hex.EncodeToString(contractAddress[:]))) - - return nil -} - -// PersistProtocolWasms writes all accumulated WASM hashes to the protocol_wasms table. -func (s *wasmIngestionService) PersistProtocolWasms(ctx context.Context, dbTx pgx.Tx) error { - if len(s.wasmHashes) == 0 { - return nil - } - - wasms := make([]data.ProtocolWasm, 0, len(s.wasmHashes)) - for hash := range s.wasmHashes { - wasms = append(wasms, data.ProtocolWasm{ - WasmHash: types.HashBytea(hex.EncodeToString(hash[:])), - ProtocolID: nil, - }) - } - - if err := s.protocolWasmModel.BatchInsert(ctx, dbTx, wasms); err != nil { - return fmt.Errorf("persisting protocol wasms: %w", err) - } - - log.Ctx(ctx).Infof("Persisted %d protocol WASM hashes", len(wasms)) - return nil -} - -// PersistProtocolContracts writes all accumulated contract-to-WASM mappings to the protocol_contracts table. -// Contracts referencing WASM hashes not present in wasmHashes are skipped (e.g., expired/evicted WASMs). -func (s *wasmIngestionService) PersistProtocolContracts(ctx context.Context, dbTx pgx.Tx) error { - if len(s.contractIDsByWasmHash) == 0 { - return nil - } - - var contracts []data.ProtocolContracts - var skipped int - for hash, contractIDs := range s.contractIDsByWasmHash { - if _, exists := s.wasmHashes[hash]; !exists { - skipped += len(contractIDs) - continue - } - for _, contractID := range contractIDs { - contracts = append(contracts, data.ProtocolContracts{ - ContractID: contractID, - WasmHash: types.HashBytea(hex.EncodeToString(hash[:])), - }) - } - } - if skipped > 0 { - log.Ctx(ctx).Infof("Skipped %d protocol contracts referencing missing WASM hashes (expired/evicted)", skipped) - } - - if err := s.protocolContractsModel.BatchInsert(ctx, dbTx, contracts); err != nil { - return fmt.Errorf("persisting protocol contracts: %w", err) - } - - log.Ctx(ctx).Infof("Persisted %d protocol contracts", len(contracts)) - return nil -} diff --git a/internal/services/wasm_ingestion_test.go b/internal/services/wasm_ingestion_test.go deleted file mode 100644 index a4657e3b7..000000000 --- a/internal/services/wasm_ingestion_test.go +++ /dev/null @@ -1,405 +0,0 @@ -package services - -import ( - "context" - "encoding/hex" - "errors" - "testing" - - "github.com/stellar/go-stellar-sdk/ingest" - "github.com/stellar/go-stellar-sdk/xdr" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/stellar/wallet-backend/internal/data" - "github.com/stellar/wallet-backend/internal/indexer/types" -) - -func TestWasmIngestionService_ProcessContractCode(t *testing.T) { - ctx := context.Background() - hash := xdr.Hash{1, 2, 3} - - t.Run("tracks_hash", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - err := svc.ProcessContractCode(ctx, hash) - require.NoError(t, err) - - _, tracked := svc.wasmHashes[hash] - assert.True(t, tracked, "hash should be tracked") - }) - - t.Run("duplicate_hash_deduplicated", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - err := svc.ProcessContractCode(ctx, hash) - require.NoError(t, err) - - err = svc.ProcessContractCode(ctx, hash) - require.NoError(t, err) - - assert.Len(t, svc.wasmHashes, 1, "duplicate hash should be deduplicated") - - // Verify PersistProtocolWasms produces 1 entry - protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(wasms []data.ProtocolWasm) bool { - return len(wasms) == 1 - }), - ).Return(nil).Once() - - err = svc.PersistProtocolWasms(ctx, nil) - require.NoError(t, err) - }) -} - -func TestWasmIngestionService_PersistProtocolWasms(t *testing.T) { - ctx := context.Background() - - t.Run("no_hashes_skips_insert", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - err := svc.PersistProtocolWasms(ctx, nil) - require.NoError(t, err) - protocolWasmModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) - }) - - t.Run("single_hash_persisted", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - hash := xdr.Hash{10, 20, 30} - - protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(wasms []data.ProtocolWasm) bool { - if len(wasms) != 1 { - return false - } - return wasms[0].WasmHash == types.HashBytea(hex.EncodeToString(hash[:])) && wasms[0].ProtocolID == nil - }), - ).Return(nil).Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - err := svc.ProcessContractCode(ctx, hash) - require.NoError(t, err) - - err = svc.PersistProtocolWasms(ctx, nil) - require.NoError(t, err) - }) - - t.Run("multiple_hashes_persisted", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - hash1 := xdr.Hash{1} - hash2 := xdr.Hash{2} - - protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(wasms []data.ProtocolWasm) bool { - if len(wasms) != 2 { - return false - } - found := make(map[string]bool) - for _, w := range wasms { - found[string(w.WasmHash)] = true - } - return found[hex.EncodeToString(hash1[:])] && found[hex.EncodeToString(hash2[:])] - }), - ).Return(nil).Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - require.NoError(t, svc.ProcessContractCode(ctx, hash1)) - require.NoError(t, svc.ProcessContractCode(ctx, hash2)) - - err := svc.PersistProtocolWasms(ctx, nil) - require.NoError(t, err) - }) - - t.Run("batch_insert_error_propagated", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - hash := xdr.Hash{99} - insertErr := errors.New("db connection lost") - - protocolWasmModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). - Return(insertErr).Once() - - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - require.NoError(t, svc.ProcessContractCode(ctx, hash)) - - err := svc.PersistProtocolWasms(ctx, nil) - require.Error(t, err) - assert.ErrorContains(t, err, "persisting protocol wasms") - assert.ErrorIs(t, err, insertErr) - }) -} - -func TestWasmIngestionService_ProcessContractData(t *testing.T) { - ctx := context.Background() - - t.Run("non_instance_entry_skipped", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - // ContractData entry with a non-Instance key type (e.g., balance entry) - contractHash := [32]byte{1, 2, 3} - change := ingest.Change{ - Type: xdr.LedgerEntryTypeContractData, - Post: &xdr.LedgerEntry{ - Data: xdr.LedgerEntryData{ - Type: xdr.LedgerEntryTypeContractData, - ContractData: &xdr.ContractDataEntry{ - Contract: xdr.ScAddress{ - Type: xdr.ScAddressTypeScAddressTypeContract, - ContractId: (*xdr.ContractId)(&contractHash), - }, - Key: xdr.ScVal{Type: xdr.ScValTypeScvSymbol}, - Durability: xdr.ContractDataDurabilityPersistent, - }, - }, - }, - } - - err := svc.ProcessContractData(ctx, change) - require.NoError(t, err) - assert.Empty(t, svc.contractIDsByWasmHash, "non-instance entry should be skipped") - }) - - t.Run("instance_without_contract_id_skipped", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - // Instance entry with account address (not contract) — GetContractId returns false - change := ingest.Change{ - Type: xdr.LedgerEntryTypeContractData, - Post: &xdr.LedgerEntry{ - Data: xdr.LedgerEntryData{ - Type: xdr.LedgerEntryTypeContractData, - ContractData: &xdr.ContractDataEntry{ - Contract: xdr.ScAddress{ - Type: xdr.ScAddressTypeScAddressTypeAccount, - }, - Key: xdr.ScVal{Type: xdr.ScValTypeScvLedgerKeyContractInstance}, - Durability: xdr.ContractDataDurabilityPersistent, - }, - }, - }, - } - - err := svc.ProcessContractData(ctx, change) - require.NoError(t, err) - assert.Empty(t, svc.contractIDsByWasmHash, "entry without contract ID should be skipped") - }) - - t.Run("sac_contract_skipped", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - // SAC contract — executable type is StellarAsset, not WASM - contractHash := [32]byte{5, 6, 7} - change := ingest.Change{ - Type: xdr.LedgerEntryTypeContractData, - Post: &xdr.LedgerEntry{ - Data: xdr.LedgerEntryData{ - Type: xdr.LedgerEntryTypeContractData, - ContractData: &xdr.ContractDataEntry{ - Contract: xdr.ScAddress{ - Type: xdr.ScAddressTypeScAddressTypeContract, - ContractId: (*xdr.ContractId)(&contractHash), - }, - Key: xdr.ScVal{Type: xdr.ScValTypeScvLedgerKeyContractInstance}, - Durability: xdr.ContractDataDurabilityPersistent, - Val: xdr.ScVal{ - Type: xdr.ScValTypeScvContractInstance, - Instance: &xdr.ScContractInstance{ - Executable: xdr.ContractExecutable{ - Type: xdr.ContractExecutableTypeContractExecutableStellarAsset, - }, - }, - }, - }, - }, - }, - } - - err := svc.ProcessContractData(ctx, change) - require.NoError(t, err) - assert.Empty(t, svc.contractIDsByWasmHash, "SAC contract should be skipped") - }) - - t.Run("wasm_contract_tracked", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - contractHash := [32]byte{10, 20, 30} - wasmHash := xdr.Hash{40, 50, 60} - change := makeContractInstanceChange(contractHash, wasmHash) - - err := svc.ProcessContractData(ctx, change) - require.NoError(t, err) - - require.Contains(t, svc.contractIDsByWasmHash, wasmHash) - expectedContractID := types.HashBytea(hex.EncodeToString(contractHash[:])) - assert.Equal(t, []types.HashBytea{expectedContractID}, svc.contractIDsByWasmHash[wasmHash]) - }) - - t.Run("multiple_contracts_same_wasm_hash", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - wasmHash := xdr.Hash{1, 2, 3} - contractHash1 := [32]byte{10} - contractHash2 := [32]byte{20} - - change1 := makeContractInstanceChange(contractHash1, wasmHash) - change2 := makeContractInstanceChange(contractHash2, wasmHash) - - require.NoError(t, svc.ProcessContractData(ctx, change1)) - require.NoError(t, svc.ProcessContractData(ctx, change2)) - - require.Contains(t, svc.contractIDsByWasmHash, wasmHash) - assert.Len(t, svc.contractIDsByWasmHash[wasmHash], 2) - - // Check that both contract hashes are present as hex strings - expectedID1 := types.HashBytea(hex.EncodeToString(contractHash1[:])) - expectedID2 := types.HashBytea(hex.EncodeToString(contractHash2[:])) - var foundAddr1, foundAddr2 bool - for _, id := range svc.contractIDsByWasmHash[wasmHash] { - if id == expectedID1 { - foundAddr1 = true - } - if id == expectedID2 { - foundAddr2 = true - } - } - assert.True(t, foundAddr1, "contractHash1 should be tracked") - assert.True(t, foundAddr2, "contractHash2 should be tracked") - }) -} - -func TestWasmIngestionService_PersistProtocolContracts(t *testing.T) { - ctx := context.Background() - - t.Run("empty_no_op", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - err := svc.PersistProtocolContracts(ctx, nil) - require.NoError(t, err) - protocolContractsModelMock.AssertNotCalled(t, "BatchInsert", mock.Anything, mock.Anything, mock.Anything) - }) - - t.Run("single_contract", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - contractHash := [32]byte{10, 20, 30} - wasmHash := xdr.Hash{40, 50, 60} - change := makeContractInstanceChange(contractHash, wasmHash) - - require.NoError(t, svc.ProcessContractCode(ctx, wasmHash)) - require.NoError(t, svc.ProcessContractData(ctx, change)) - - protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(contracts []data.ProtocolContracts) bool { - if len(contracts) != 1 { - return false - } - return contracts[0].ContractID == types.HashBytea(hex.EncodeToString(contractHash[:])) && - contracts[0].WasmHash == types.HashBytea(hex.EncodeToString(wasmHash[:])) && - contracts[0].Name == nil - }), - ).Return(nil).Once() - - err := svc.PersistProtocolContracts(ctx, nil) - require.NoError(t, err) - }) - - t.Run("multiple_contracts_across_hashes", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - wasmHash1 := xdr.Hash{1} - wasmHash2 := xdr.Hash{2} - contractHash1 := [32]byte{10} - contractHash2 := [32]byte{20} - contractHash3 := [32]byte{30} - - // Register WASM hashes first - require.NoError(t, svc.ProcessContractCode(ctx, wasmHash1)) - require.NoError(t, svc.ProcessContractCode(ctx, wasmHash2)) - - // Two contracts with wasmHash1, one with wasmHash2 - require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash1, wasmHash1))) - require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, wasmHash1))) - require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash3, wasmHash2))) - - protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(contracts []data.ProtocolContracts) bool { - return len(contracts) == 3 - }), - ).Return(nil).Once() - - err := svc.PersistProtocolContracts(ctx, nil) - require.NoError(t, err) - }) - - t.Run("contracts_with_missing_wasm_skipped", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - knownWasm := xdr.Hash{1} - unknownWasm := xdr.Hash{2} - contractHash1 := [32]byte{10} - contractHash2 := [32]byte{20} - - // Only register one WASM hash - require.NoError(t, svc.ProcessContractCode(ctx, knownWasm)) - - // Add contracts — one with known WASM, one with unknown - require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash1, knownWasm))) - require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash2, unknownWasm))) - - protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, - mock.MatchedBy(func(contracts []data.ProtocolContracts) bool { - return len(contracts) == 1 && contracts[0].WasmHash == types.HashBytea(hex.EncodeToString(knownWasm[:])) - }), - ).Return(nil).Once() - - err := svc.PersistProtocolContracts(ctx, nil) - require.NoError(t, err) - }) - - t.Run("batch_insert_error_propagated", func(t *testing.T) { - protocolWasmModelMock := data.NewProtocolWasmModelMock(t) - protocolContractsModelMock := data.NewProtocolContractsModelMock(t) - svc := NewWasmIngestionService(protocolWasmModelMock, protocolContractsModelMock) - - contractHash := [32]byte{10} - wasmHash := xdr.Hash{1} - require.NoError(t, svc.ProcessContractCode(ctx, wasmHash)) - require.NoError(t, svc.ProcessContractData(ctx, makeContractInstanceChange(contractHash, wasmHash))) - - insertErr := errors.New("db connection lost") - protocolContractsModelMock.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything). - Return(insertErr).Once() - - err := svc.PersistProtocolContracts(ctx, nil) - require.Error(t, err) - assert.ErrorContains(t, err, "persisting protocol contracts") - assert.ErrorIs(t, err, insertErr) - }) -} From f9266d3dc93febbccb9756e3e24ffe32ed77e54f Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 13 Mar 2026 11:01:55 -0600 Subject: [PATCH 24/52] Fix checkpoint test mock expectations for consolidated CheckpointService - Add valid AccountId to makeAccountChange() helper to prevent nil pointer dereference - Add missing protocolWasmModel.BatchInsert mock expectation in ContractCodeEntry test - Fix ContextCancellation test to cancel context during reader.Read() instead of before PopulateFromCheckpoint, matching the expected error path --- internal/services/checkpoint_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/services/checkpoint_test.go b/internal/services/checkpoint_test.go index 58315feb0..3451311f9 100644 --- a/internal/services/checkpoint_test.go +++ b/internal/services/checkpoint_test.go @@ -210,6 +210,9 @@ func TestCheckpointService_PopulateFromCheckpoint_ContractCodeEntry(t *testing.T // finalize -> persistProtocolWasms inserts the tracked WASM hash f.protocolWasmModel.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() + // finalize -> persistProtocolWasms inserts the tracked WASM hash + f.protocolWasmModel.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() + err := f.svc.PopulateFromCheckpoint(context.Background(), 100, func(_ pgx.Tx) error { return nil }) require.NoError(t, err) } From 51cb11531d363d4508c8b6d420d84dc2e86c5410 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Feb 2026 16:54:12 -0700 Subject: [PATCH 25/52] Extract checkpoint population into dedicated services, add known_wasms tracking - Add known_wasms table (migration, model, mock, and data layer tests) for tracking WASM hashes during checkpoint population - Add KnownWasm field to Models struct - Create WasmIngestionService (wasm_ingestion.go) that runs protocol validators against WASM bytecode and batch-persists hashes to known_wasms - Create CheckpointService (checkpoint.go) that orchestrates single-pass checkpoint population, delegating ContractCode entries to both WasmIngestionService and TokenProcessor, and all other entries to TokenProcessor - Extract readerFactory on checkpointService for injectable checkpoint reader creation - Extract TokenProcessor interface and NewTokenProcessor from TokenIngestionService, moving checkpoint iteration logic out of token_ingestion.go into checkpoint.go - Remove db, archive, and PopulateAccountTokens from TokenIngestionService interface and struct - Remove dbPool parameter from NewTokenIngestionServiceForLoadtest - Wire CheckpointService into IngestServiceConfig and ingestService - Update ingest_live.go to call checkpointService.PopulateFromCheckpoint instead of tokenIngestionService.PopulateAccountTokens - Update ingest.go setupDeps to construct WasmIngestionService and CheckpointService - Add ContractValidatorMock, ProtocolValidatorMock, ChangeReaderMock, CheckpointServiceMock, WasmIngestionServiceMock, TokenProcessorMock, and TokenIngestionServiceMock updates to mocks.go - Add unit tests for WasmIngestionService (10 cases covering ProcessContractCode and PersistKnownWasms) - Add unit tests for CheckpointService (16 cases covering entry routing, error propagation, and context cancellation) --- internal/data/known_wasms.go | 67 +++++++++ internal/data/known_wasms_test.go | 130 ++++++++++++++++++ .../migrations/2026-02-20.0-known_wasms.sql | 9 ++ 3 files changed, 206 insertions(+) create mode 100644 internal/data/known_wasms.go create mode 100644 internal/data/known_wasms_test.go create mode 100644 internal/db/migrations/2026-02-20.0-known_wasms.sql diff --git a/internal/data/known_wasms.go b/internal/data/known_wasms.go new file mode 100644 index 000000000..c17366826 --- /dev/null +++ b/internal/data/known_wasms.go @@ -0,0 +1,67 @@ +package data + +import ( + "context" + "fmt" + "time" + + "github.com/jackc/pgx/v5" + + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/metrics" + "github.com/stellar/wallet-backend/internal/utils" +) + +// KnownWasm represents a WASM hash tracked during checkpoint population. +type KnownWasm struct { + WasmHash string `db:"wasm_hash"` + ProtocolID *string `db:"protocol_id"` + CreatedAt time.Time `db:"created_at"` +} + +// KnownWasmModelInterface defines the interface for known_wasms operations. +type KnownWasmModelInterface interface { + BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error +} + +// KnownWasmModel implements KnownWasmModelInterface. +type KnownWasmModel struct { + DB db.ConnectionPool + MetricsService metrics.MetricsService +} + +var _ KnownWasmModelInterface = (*KnownWasmModel)(nil) + +// BatchInsert inserts multiple known WASMs using UNNEST for efficient batch insertion. +// Uses ON CONFLICT (wasm_hash) DO NOTHING for idempotent operations. +func (m *KnownWasmModel) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error { + if len(wasms) == 0 { + return nil + } + + wasmHashes := make([]string, len(wasms)) + protocolIDs := make([]*string, len(wasms)) + + for i, w := range wasms { + wasmHashes[i] = w.WasmHash + protocolIDs[i] = w.ProtocolID + } + + const query = ` + INSERT INTO known_wasms (wasm_hash, protocol_id) + SELECT * FROM UNNEST($1::text[], $2::text[]) + ON CONFLICT (wasm_hash) DO NOTHING + ` + + start := time.Now() + _, err := dbTx.Exec(ctx, query, wasmHashes, protocolIDs) + if err != nil { + m.MetricsService.IncDBQueryError("BatchInsert", "known_wasms", utils.GetDBErrorType(err)) + return fmt.Errorf("batch inserting known wasms: %w", err) + } + + m.MetricsService.ObserveDBQueryDuration("BatchInsert", "known_wasms", time.Since(start).Seconds()) + m.MetricsService.ObserveDBBatchSize("BatchInsert", "known_wasms", len(wasms)) + m.MetricsService.IncDBQuery("BatchInsert", "known_wasms") + return nil +} diff --git a/internal/data/known_wasms_test.go b/internal/data/known_wasms_test.go new file mode 100644 index 000000000..618390f0a --- /dev/null +++ b/internal/data/known_wasms_test.go @@ -0,0 +1,130 @@ +package data + +import ( + "context" + "testing" + + "github.com/jackc/pgx/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/db/dbtest" + "github.com/stellar/wallet-backend/internal/metrics" +) + +func TestKnownWasmBatchInsert(t *testing.T) { + ctx := context.Background() + + dbt := dbtest.Open(t) + defer dbt.Close() + dbConnectionPool, err := db.OpenDBConnectionPool(dbt.DSN) + require.NoError(t, err) + defer dbConnectionPool.Close() + + cleanUpDB := func() { + _, err = dbConnectionPool.ExecContext(ctx, `DELETE FROM known_wasms`) + require.NoError(t, err) + } + + t.Run("empty input returns no error", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{}) + }) + assert.NoError(t, err) + }) + + t.Run("single insert", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "abc123def456", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + // Verify the insert + var count int + err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'abc123def456'`) + require.NoError(t, err) + assert.Equal(t, 1, count) + }) + + t.Run("multiple inserts", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + protocolID := "test-protocol" + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "hash1", ProtocolID: nil}, + {WasmHash: "hash2", ProtocolID: &protocolID}, + {WasmHash: "hash3", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + var count int + err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms`) + require.NoError(t, err) + assert.Equal(t, 3, count) + + // Verify protocol_id was stored correctly + var storedProtocolID *string + err = dbConnectionPool.GetContext(ctx, &storedProtocolID, `SELECT protocol_id FROM known_wasms WHERE wasm_hash = 'hash2'`) + require.NoError(t, err) + require.NotNil(t, storedProtocolID) + assert.Equal(t, "test-protocol", *storedProtocolID) + }) + + t.Run("duplicate inserts are idempotent", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + + // First insert + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "duplicate_hash", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + // Second insert with same hash - should not error + err = db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.BatchInsert(ctx, dbTx, []KnownWasm{ + {WasmHash: "duplicate_hash", ProtocolID: nil}, + }) + }) + assert.NoError(t, err) + + // Verify only one row + var count int + err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'duplicate_hash'`) + require.NoError(t, err) + assert.Equal(t, 1, count) + }) +} diff --git a/internal/db/migrations/2026-02-20.0-known_wasms.sql b/internal/db/migrations/2026-02-20.0-known_wasms.sql new file mode 100644 index 000000000..2e1415ef8 --- /dev/null +++ b/internal/db/migrations/2026-02-20.0-known_wasms.sql @@ -0,0 +1,9 @@ +-- +migrate Up +CREATE TABLE known_wasms ( + wasm_hash TEXT PRIMARY KEY, + protocol_id TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- +migrate Down +DROP TABLE IF EXISTS known_wasms; From 26908fbe10afaaa2c3a753a868003f7d0a8a0a9b Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 11 Mar 2026 11:48:16 -0600 Subject: [PATCH 26/52] Remove dead known_wasms model, tests, and migration The known_wasms table was renamed to protocol_wasms, and the new ProtocolWasm model already exists. Remove the obsolete KnownWasm model, its tests, and the old migration file. --- internal/data/known_wasms.go | 67 --------- internal/data/known_wasms_test.go | 130 ------------------ .../migrations/2026-02-20.0-known_wasms.sql | 9 -- 3 files changed, 206 deletions(-) delete mode 100644 internal/data/known_wasms.go delete mode 100644 internal/data/known_wasms_test.go delete mode 100644 internal/db/migrations/2026-02-20.0-known_wasms.sql diff --git a/internal/data/known_wasms.go b/internal/data/known_wasms.go deleted file mode 100644 index c17366826..000000000 --- a/internal/data/known_wasms.go +++ /dev/null @@ -1,67 +0,0 @@ -package data - -import ( - "context" - "fmt" - "time" - - "github.com/jackc/pgx/v5" - - "github.com/stellar/wallet-backend/internal/db" - "github.com/stellar/wallet-backend/internal/metrics" - "github.com/stellar/wallet-backend/internal/utils" -) - -// KnownWasm represents a WASM hash tracked during checkpoint population. -type KnownWasm struct { - WasmHash string `db:"wasm_hash"` - ProtocolID *string `db:"protocol_id"` - CreatedAt time.Time `db:"created_at"` -} - -// KnownWasmModelInterface defines the interface for known_wasms operations. -type KnownWasmModelInterface interface { - BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error -} - -// KnownWasmModel implements KnownWasmModelInterface. -type KnownWasmModel struct { - DB db.ConnectionPool - MetricsService metrics.MetricsService -} - -var _ KnownWasmModelInterface = (*KnownWasmModel)(nil) - -// BatchInsert inserts multiple known WASMs using UNNEST for efficient batch insertion. -// Uses ON CONFLICT (wasm_hash) DO NOTHING for idempotent operations. -func (m *KnownWasmModel) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []KnownWasm) error { - if len(wasms) == 0 { - return nil - } - - wasmHashes := make([]string, len(wasms)) - protocolIDs := make([]*string, len(wasms)) - - for i, w := range wasms { - wasmHashes[i] = w.WasmHash - protocolIDs[i] = w.ProtocolID - } - - const query = ` - INSERT INTO known_wasms (wasm_hash, protocol_id) - SELECT * FROM UNNEST($1::text[], $2::text[]) - ON CONFLICT (wasm_hash) DO NOTHING - ` - - start := time.Now() - _, err := dbTx.Exec(ctx, query, wasmHashes, protocolIDs) - if err != nil { - m.MetricsService.IncDBQueryError("BatchInsert", "known_wasms", utils.GetDBErrorType(err)) - return fmt.Errorf("batch inserting known wasms: %w", err) - } - - m.MetricsService.ObserveDBQueryDuration("BatchInsert", "known_wasms", time.Since(start).Seconds()) - m.MetricsService.ObserveDBBatchSize("BatchInsert", "known_wasms", len(wasms)) - m.MetricsService.IncDBQuery("BatchInsert", "known_wasms") - return nil -} diff --git a/internal/data/known_wasms_test.go b/internal/data/known_wasms_test.go deleted file mode 100644 index 618390f0a..000000000 --- a/internal/data/known_wasms_test.go +++ /dev/null @@ -1,130 +0,0 @@ -package data - -import ( - "context" - "testing" - - "github.com/jackc/pgx/v5" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/stellar/wallet-backend/internal/db" - "github.com/stellar/wallet-backend/internal/db/dbtest" - "github.com/stellar/wallet-backend/internal/metrics" -) - -func TestKnownWasmBatchInsert(t *testing.T) { - ctx := context.Background() - - dbt := dbtest.Open(t) - defer dbt.Close() - dbConnectionPool, err := db.OpenDBConnectionPool(dbt.DSN) - require.NoError(t, err) - defer dbConnectionPool.Close() - - cleanUpDB := func() { - _, err = dbConnectionPool.ExecContext(ctx, `DELETE FROM known_wasms`) - require.NoError(t, err) - } - - t.Run("empty input returns no error", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{}) - }) - assert.NoError(t, err) - }) - - t.Run("single insert", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "abc123def456", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - // Verify the insert - var count int - err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'abc123def456'`) - require.NoError(t, err) - assert.Equal(t, 1, count) - }) - - t.Run("multiple inserts", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - protocolID := "test-protocol" - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "hash1", ProtocolID: nil}, - {WasmHash: "hash2", ProtocolID: &protocolID}, - {WasmHash: "hash3", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - var count int - err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms`) - require.NoError(t, err) - assert.Equal(t, 3, count) - - // Verify protocol_id was stored correctly - var storedProtocolID *string - err = dbConnectionPool.GetContext(ctx, &storedProtocolID, `SELECT protocol_id FROM known_wasms WHERE wasm_hash = 'hash2'`) - require.NoError(t, err) - require.NotNil(t, storedProtocolID) - assert.Equal(t, "test-protocol", *storedProtocolID) - }) - - t.Run("duplicate inserts are idempotent", func(t *testing.T) { - cleanUpDB() - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - model := &KnownWasmModel{DB: dbConnectionPool, MetricsService: mockMetricsService} - - // First insert - err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "duplicate_hash", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - // Second insert with same hash - should not error - err = db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { - return model.BatchInsert(ctx, dbTx, []KnownWasm{ - {WasmHash: "duplicate_hash", ProtocolID: nil}, - }) - }) - assert.NoError(t, err) - - // Verify only one row - var count int - err = dbConnectionPool.GetContext(ctx, &count, `SELECT COUNT(*) FROM known_wasms WHERE wasm_hash = 'duplicate_hash'`) - require.NoError(t, err) - assert.Equal(t, 1, count) - }) -} diff --git a/internal/db/migrations/2026-02-20.0-known_wasms.sql b/internal/db/migrations/2026-02-20.0-known_wasms.sql deleted file mode 100644 index 2e1415ef8..000000000 --- a/internal/db/migrations/2026-02-20.0-known_wasms.sql +++ /dev/null @@ -1,9 +0,0 @@ --- +migrate Up -CREATE TABLE known_wasms ( - wasm_hash TEXT PRIMARY KEY, - protocol_id TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - --- +migrate Down -DROP TABLE IF EXISTS known_wasms; From e8dfa1ccbd0f18ca0f604e14c951df42706238e2 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 13 Mar 2026 15:02:28 -0600 Subject: [PATCH 27/52] Rename Protocol and ProtocolWasm models to plural form matching table names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This aligns Protocol→Protocols and ProtocolWasm→ProtocolWasms (structs, interfaces, mocks, and Models struct fields) to match the protocols and protocol_wasms table names, consistent with the existing ProtocolContracts convention. --- internal/services/ingest_live.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 3a60ebb7b..ab654fc4c 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -125,11 +125,11 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, // 2.5: Persist protocol wasms and contracts protocolWasms := buffer.GetProtocolWasms() if len(protocolWasms) > 0 { - wasmSlice := make([]data.ProtocolWasm, 0, len(protocolWasms)) + wasmSlice := make([]data.ProtocolWasms, 0, len(protocolWasms)) for _, wasm := range protocolWasms { wasmSlice = append(wasmSlice, wasm) } - if txErr = m.models.ProtocolWasm.BatchInsert(ctx, dbTx, wasmSlice); txErr != nil { + if txErr = m.models.ProtocolWasms.BatchInsert(ctx, dbTx, wasmSlice); txErr != nil { return fmt.Errorf("inserting protocol wasms for ledger %d: %w", ledgerSeq, txErr) } } From ef7c588615e5256d44397502692fbfd2267bfa03 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Tue, 17 Mar 2026 10:32:48 -0600 Subject: [PATCH 28/52] Add live protocol state production pipeline with dual CAS gating Introduces the infrastructure for protocol processors to produce and persist protocol-specific state during live ledger ingestion, gated by per-protocol compare-and-swap cursors that coordinate with concurrent migration processes. Key changes: - ProtocolProcessor interface and ProtocolProcessorInput for protocol- specific ledger analysis and state persistence - Processor registry (RegisterProcessor/GetAllProcessors) for protocol processor discovery at startup - Dual CAS gating in PersistLedgerData (step 5.5): per-protocol history and current_state cursors ensure exactly-once writes even when live ingestion and migration run concurrently - Protocol contract cache with periodic refresh to avoid per-ledger DB queries for classified contracts - Data layer additions: IngestStoreModel.GetTx, CompareAndSwap, ProtocolContractsModel.GetByProtocolID, ProtocolsModel.GetClassified Tests: - Unit tests for processor registry (concurrent safety, overwrite, etc.) - 5 subtests for PersistLedgerData CAS gating (win, lose, behind, no cursor, no processors) using a real test DB and sentinel-writing testProtocolProcessor - Docker integration test (ProtocolStateProductionTestSuite) exercising CAS gating against a live ingest container's DB in three phases --- internal/data/mocks.go | 91 +++--- internal/data/protocol_contracts.go | 45 --- internal/data/protocols.go | 2 - internal/integrationtests/main_test.go | 5 + .../protocol_state_production_test.go | 261 ++++++++++++++++++ internal/services/ingest.go | 15 +- internal/services/ingest_live.go | 162 ++++------- internal/services/ingest_test.go | 211 -------------- 8 files changed, 370 insertions(+), 422 deletions(-) create mode 100644 internal/integrationtests/protocol_state_production_test.go diff --git a/internal/data/mocks.go b/internal/data/mocks.go index 62592e7bc..d23bcc646 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -5,6 +5,7 @@ package data import ( "context" + "github.com/google/uuid" "github.com/jackc/pgx/v5" "github.com/stretchr/testify/mock" @@ -188,6 +189,40 @@ func (m *SACBalanceModelMock) BatchCopy(ctx context.Context, dbTx pgx.Tx, balanc return args.Error(0) } +// AccountContractTokensModelMock is a mock implementation of AccountContractTokensModelInterface. +type AccountContractTokensModelMock struct { + mock.Mock +} + +var _ AccountContractTokensModelInterface = (*AccountContractTokensModelMock)(nil) + +// NewAccountContractTokensModelMock creates a new instance of AccountContractTokensModelMock. +func NewAccountContractTokensModelMock(t interface { + mock.TestingT + Cleanup(func()) +}, +) *AccountContractTokensModelMock { + mockModel := &AccountContractTokensModelMock{} + mockModel.Mock.Test(t) + + t.Cleanup(func() { mockModel.AssertExpectations(t) }) + + return mockModel +} + +func (m *AccountContractTokensModelMock) GetByAccount(ctx context.Context, accountAddress string) ([]*Contract, error) { + args := m.Called(ctx, accountAddress) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]*Contract), args.Error(1) +} + +func (m *AccountContractTokensModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, contractsByAccount map[string][]uuid.UUID) error { + args := m.Called(ctx, dbTx, contractsByAccount) + return args.Error(0) +} + // ProtocolWasmsModelMock is a mock implementation of ProtocolWasmsModelInterface. type ProtocolWasmsModelMock struct { mock.Mock @@ -300,54 +335,10 @@ func (m *ProtocolContractsModelMock) BatchInsert(ctx context.Context, dbTx pgx.T return args.Error(0) } -// ProtocolWasmModelMock is a mock implementation of ProtocolWasmModelInterface. -type ProtocolWasmModelMock struct { - mock.Mock -} - -var _ ProtocolWasmModelInterface = (*ProtocolWasmModelMock)(nil) - -// NewProtocolWasmModelMock creates a new instance of ProtocolWasmModelMock. -func NewProtocolWasmModelMock(t interface { - mock.TestingT - Cleanup(func()) -}, -) *ProtocolWasmModelMock { - mockModel := &ProtocolWasmModelMock{} - mockModel.Mock.Test(t) - - t.Cleanup(func() { mockModel.AssertExpectations(t) }) - - return mockModel -} - -func (m *ProtocolWasmModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, wasms []ProtocolWasm) error { - args := m.Called(ctx, dbTx, wasms) - return args.Error(0) -} - -// ProtocolContractsModelMock is a mock implementation of ProtocolContractsModelInterface. -type ProtocolContractsModelMock struct { - mock.Mock -} - -var _ ProtocolContractsModelInterface = (*ProtocolContractsModelMock)(nil) - -// NewProtocolContractsModelMock creates a new instance of ProtocolContractsModelMock. -func NewProtocolContractsModelMock(t interface { - mock.TestingT - Cleanup(func()) -}, -) *ProtocolContractsModelMock { - mockModel := &ProtocolContractsModelMock{} - mockModel.Mock.Test(t) - - t.Cleanup(func() { mockModel.AssertExpectations(t) }) - - return mockModel -} - -func (m *ProtocolContractsModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, contracts []ProtocolContracts) error { - args := m.Called(ctx, dbTx, contracts) - return args.Error(0) +func (m *ProtocolContractsModelMock) GetByProtocolID(ctx context.Context, protocolID string) ([]ProtocolContracts, error) { + args := m.Called(ctx, protocolID) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]ProtocolContracts), args.Error(1) } diff --git a/internal/data/protocol_contracts.go b/internal/data/protocol_contracts.go index 148d46e7f..db10e307e 100644 --- a/internal/data/protocol_contracts.go +++ b/internal/data/protocol_contracts.go @@ -26,7 +26,6 @@ type ProtocolContracts struct { type ProtocolContractsModelInterface interface { BatchInsert(ctx context.Context, dbTx pgx.Tx, contracts []ProtocolContracts) error GetByProtocolID(ctx context.Context, protocolID string) ([]ProtocolContracts, error) - BatchGetByProtocolIDs(ctx context.Context, protocolIDs []string) (map[string][]ProtocolContracts, error) } // ProtocolContractsModel implements ProtocolContractsModelInterface. @@ -108,13 +107,11 @@ func (m *ProtocolContractsModel) GetByProtocolID(ctx context.Context, protocolID for rows.Next() { var c ProtocolContracts if err := rows.Scan(&c.ContractID, &c.WasmHash, &c.Name, &c.CreatedAt); err != nil { - m.MetricsService.IncDBQueryError("GetByProtocolID", "protocol_contracts", utils.GetDBErrorType(err)) return nil, fmt.Errorf("scanning protocol contract row: %w", err) } contracts = append(contracts, c) } if err := rows.Err(); err != nil { - m.MetricsService.IncDBQueryError("GetByProtocolID", "protocol_contracts", utils.GetDBErrorType(err)) return nil, fmt.Errorf("iterating protocol contract rows: %w", err) } @@ -122,45 +119,3 @@ func (m *ProtocolContractsModel) GetByProtocolID(ctx context.Context, protocolID m.MetricsService.IncDBQuery("GetByProtocolID", "protocol_contracts") return contracts, nil } - -// BatchGetByProtocolIDs returns all contracts for the given protocol IDs in a single query, -// grouped by protocol ID. -func (m *ProtocolContractsModel) BatchGetByProtocolIDs(ctx context.Context, protocolIDs []string) (map[string][]ProtocolContracts, error) { - if len(protocolIDs) == 0 { - return nil, nil - } - - const query = ` - SELECT pw.protocol_id, pc.contract_id, pc.wasm_hash, pc.name, pc.created_at - FROM protocol_contracts pc - JOIN protocol_wasms pw ON pc.wasm_hash = pw.wasm_hash - WHERE pw.protocol_id = ANY($1) - ` - - start := time.Now() - rows, err := m.DB.PgxPool().Query(ctx, query, protocolIDs) - if err != nil { - m.MetricsService.IncDBQueryError("BatchGetByProtocolIDs", "protocol_contracts", utils.GetDBErrorType(err)) - return nil, fmt.Errorf("batch querying contracts for protocols: %w", err) - } - defer rows.Close() - - result := make(map[string][]ProtocolContracts, len(protocolIDs)) - for rows.Next() { - var protocolID string - var c ProtocolContracts - if err := rows.Scan(&protocolID, &c.ContractID, &c.WasmHash, &c.Name, &c.CreatedAt); err != nil { - m.MetricsService.IncDBQueryError("BatchGetByProtocolIDs", "protocol_contracts", utils.GetDBErrorType(err)) - return nil, fmt.Errorf("scanning batch protocol contract row: %w", err) - } - result[protocolID] = append(result[protocolID], c) - } - if err := rows.Err(); err != nil { - m.MetricsService.IncDBQueryError("BatchGetByProtocolIDs", "protocol_contracts", utils.GetDBErrorType(err)) - return nil, fmt.Errorf("iterating batch protocol contract rows: %w", err) - } - - m.MetricsService.ObserveDBQueryDuration("BatchGetByProtocolIDs", "protocol_contracts", time.Since(start).Seconds()) - m.MetricsService.IncDBQuery("BatchGetByProtocolIDs", "protocol_contracts") - return result, nil -} diff --git a/internal/data/protocols.go b/internal/data/protocols.go index cd77250a8..2731bbd97 100644 --- a/internal/data/protocols.go +++ b/internal/data/protocols.go @@ -129,13 +129,11 @@ func (m *ProtocolsModel) GetClassified(ctx context.Context) ([]Protocols, error) for rows.Next() { var p Protocols if err := rows.Scan(&p.ID, &p.ClassificationStatus, &p.HistoryMigrationStatus, &p.CurrentStateMigrationStatus, &p.CreatedAt, &p.UpdatedAt); err != nil { - m.MetricsService.IncDBQueryError("GetClassified", "protocols", utils.GetDBErrorType(err)) return nil, fmt.Errorf("scanning classified protocol row: %w", err) } protocols = append(protocols, p) } if err := rows.Err(); err != nil { - m.MetricsService.IncDBQueryError("GetClassified", "protocols", utils.GetDBErrorType(err)) return nil, fmt.Errorf("iterating classified protocol rows: %w", err) } diff --git a/internal/integrationtests/main_test.go b/internal/integrationtests/main_test.go index c1119e799..3620de85c 100644 --- a/internal/integrationtests/main_test.go +++ b/internal/integrationtests/main_test.go @@ -66,6 +66,11 @@ func TestIntegrationTests(t *testing.T) { suite.Run(t, &DataMigrationTestSuite{testEnv: testEnv}) }) + // Protocol state production tests — dual CAS gating during live ingestion + t.Run("ProtocolStateProductionTestSuite", func(t *testing.T) { + suite.Run(t, &ProtocolStateProductionTestSuite{testEnv: testEnv}) + }) + t.Run("BuildAndSubmitTransactionsTestSuite", func(t *testing.T) { suite.Run(t, &BuildAndSubmitTransactionsTestSuite{ testEnv: testEnv, diff --git a/internal/integrationtests/protocol_state_production_test.go b/internal/integrationtests/protocol_state_production_test.go new file mode 100644 index 000000000..fe9ac1ab1 --- /dev/null +++ b/internal/integrationtests/protocol_state_production_test.go @@ -0,0 +1,261 @@ +package integrationtests + +import ( + "context" + "database/sql" + "fmt" + "testing" + + "github.com/jackc/pgx/v5" + _ "github.com/lib/pq" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + + "github.com/stellar/wallet-backend/internal/apptracker" + "github.com/stellar/wallet-backend/internal/data" + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/indexer" + "github.com/stellar/wallet-backend/internal/integrationtests/infrastructure" + "github.com/stellar/wallet-backend/internal/metrics" + "github.com/stellar/wallet-backend/internal/services" + "github.com/stellar/wallet-backend/internal/signing/store" +) + +// --- ProtocolStateProductionTestSuite (requires Docker DB + live ingest) --- + +type ProtocolStateProductionTestSuite struct { + suite.Suite + testEnv *infrastructure.TestEnvironment +} + +func (s *ProtocolStateProductionTestSuite) setupDB() (db.ConnectionPool, func()) { + ctx := context.Background() + dbURL, err := s.testEnv.Containers.GetWalletDBConnectionString(ctx) + s.Require().NoError(err) + pool, err := db.OpenDBConnectionPool(dbURL) + s.Require().NoError(err) + return pool, func() { pool.Close() } +} + +func (s *ProtocolStateProductionTestSuite) setupModels(pool db.ConnectionPool) *data.Models { + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetrics.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return() + mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetrics.On("RegisterPoolMetrics", mock.Anything, mock.Anything).Return() + models, err := data.NewModels(pool, mockMetrics) + s.Require().NoError(err) + return models +} + +func (s *ProtocolStateProductionTestSuite) cleanupTestKeys(ctx context.Context, dbURL string) { + sqlDB, err := sql.Open("postgres", dbURL) + s.Require().NoError(err) + defer sqlDB.Close() + + _, err = sqlDB.ExecContext(ctx, `DELETE FROM ingest_store WHERE key LIKE 'test_%' OR key LIKE 'protocol_testproto%'`) + s.Require().NoError(err) +} + +// integrationTestProcessor implements services.ProtocolProcessor using the real +// IngestStoreModel to write sentinel keys within the DB transaction. +type integrationTestProcessor struct { + id string + processedLedger uint32 + ingestStore *data.IngestStoreModel +} + +func (p *integrationTestProcessor) ProtocolID() string { return p.id } + +func (p *integrationTestProcessor) ProcessLedger(_ context.Context, input services.ProtocolProcessorInput) error { + p.processedLedger = input.LedgerSequence + return nil +} + +func (p *integrationTestProcessor) PersistHistory(ctx context.Context, dbTx pgx.Tx) error { + return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_history_written", p.id), p.processedLedger) +} + +func (p *integrationTestProcessor) PersistCurrentState(ctx context.Context, dbTx pgx.Tx) error { + return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_current_state_written", p.id), p.processedLedger) +} + +// TestDualCASGatingDuringLiveIngestion proves CAS gating works against the +// Docker DB that has been populated by the real ingest container. +func (s *ProtocolStateProductionTestSuite) TestDualCASGatingDuringLiveIngestion() { + ctx := context.Background() + pool, cleanup := s.setupDB() + defer cleanup() + models := s.setupModels(pool) + + dbURL, err := s.testEnv.Containers.GetWalletDBConnectionString(ctx) + s.Require().NoError(err) + defer s.cleanupTestKeys(ctx, dbURL) + + // Read current latest_ingest_ledger to know where the live container is + latestLedger, err := models.IngestStore.Get(ctx, "latest_ingest_ledger") + s.Require().NoError(err) + s.Require().Greater(latestLedger, uint32(0), "live ingest should have populated latest_ingest_ledger") + s.T().Logf("Live ingest container is at ledger %d", latestLedger) + + // Pick a test ledger well beyond the live ingest tip to avoid collision + testLedger := latestLedger + 1000 + + // Insert protocol cursors at testLedger-1 (ready for CAS win) + sqlDB, err := sql.Open("postgres", dbURL) + s.Require().NoError(err) + defer sqlDB.Close() + + _, err = sqlDB.ExecContext(ctx, + `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, + "protocol_testproto_history_cursor", testLedger-1) + s.Require().NoError(err) + _, err = sqlDB.ExecContext(ctx, + `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, + "protocol_testproto_current_state_cursor", testLedger-1) + s.Require().NoError(err) + + // Insert a test-specific main cursor (avoid interfering with real ingest) + _, err = sqlDB.ExecContext(ctx, + `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, + "test_cursor", testLedger-1) + s.Require().NoError(err) + + processor := &integrationTestProcessor{id: "testproto", ingestStore: models.IngestStore, processedLedger: testLedger} + + mockTokenIngestionService := services.NewTokenIngestionServiceMock(s.T()) + mockTokenIngestionService.On("ProcessTokenChanges", + mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, + ).Return(nil).Maybe() + + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("RegisterPoolMetrics", mock.Anything, mock.Anything).Return() + mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() + + svc, err := services.NewIngestService(services.IngestServiceConfig{ + IngestionMode: services.IngestionModeLive, + Models: models, + LatestLedgerCursorName: "test_cursor", + OldestLedgerCursorName: "test_cursor", + AppTracker: &apptracker.MockAppTracker{}, + RPCService: s.testEnv.RPCService, + LedgerBackend: &services.LedgerBackendMock{}, + ChannelAccountStore: &store.ChannelAccountStoreMock{}, + TokenIngestionService: mockTokenIngestionService, + MetricsService: mockMetrics, + Network: "Test SDF Network ; September 2015", + NetworkPassphrase: "Test SDF Network ; September 2015", + Archive: &services.HistoryArchiveMock{}, + ProtocolProcessors: []services.ProtocolProcessor{processor}, + }) + s.Require().NoError(err) + + buffer := indexer.NewIndexerBuffer() + + // Phase 1: CAS Win — cursors at testLedger-1, persisting testLedger + s.T().Log("Phase 1: CAS win") + _, _, err = svc.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") + s.Require().NoError(err) + + histCursor, err := models.IngestStore.Get(ctx, "protocol_testproto_history_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger, histCursor, "history cursor should advance to testLedger") + + csCursor, err := models.IngestStore.Get(ctx, "protocol_testproto_current_state_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger, csCursor, "current state cursor should advance to testLedger") + + histSentinel, err := models.IngestStore.Get(ctx, "test_testproto_history_written") + s.Require().NoError(err) + s.Assert().Equal(testLedger, histSentinel, "history sentinel should be testLedger") + + csSentinel, err := models.IngestStore.Get(ctx, "test_testproto_current_state_written") + s.Require().NoError(err) + s.Assert().Equal(testLedger, csSentinel, "current state sentinel should be testLedger") + + // Phase 2: CAS Lose — same ledger again, CAS expects testLedger-1 but finds testLedger + s.T().Log("Phase 2: CAS lose (same ledger again)") + _, _, err = svc.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") + s.Require().NoError(err) + + // Cursors should still be at testLedger + histCursor, err = models.IngestStore.Get(ctx, "protocol_testproto_history_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger, histCursor, "history cursor should remain at testLedger after CAS lose") + + csCursor, err = models.IngestStore.Get(ctx, "protocol_testproto_current_state_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger, csCursor, "current state cursor should remain at testLedger after CAS lose") + + // Delete sentinels, re-run, and verify they are NOT re-written + _, err = sqlDB.ExecContext(ctx, `DELETE FROM ingest_store WHERE key LIKE 'test_testproto_%_written'`) + s.Require().NoError(err) + + _, _, err = svc.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") + s.Require().NoError(err) + + histSentinel, err = models.IngestStore.Get(ctx, "test_testproto_history_written") + s.Require().NoError(err) + s.Assert().Equal(uint32(0), histSentinel, "sentinels should NOT be re-written after CAS lose") + + csSentinel, err = models.IngestStore.Get(ctx, "test_testproto_current_state_written") + s.Require().NoError(err) + s.Assert().Equal(uint32(0), csSentinel, "sentinels should NOT be re-written after CAS lose") + + // Phase 3: Cursor behind — second protocol at testLedger-2 + s.T().Log("Phase 3: Cursor behind (second protocol)") + _, err = sqlDB.ExecContext(ctx, + `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, + "protocol_testproto2_history_cursor", testLedger-2) + s.Require().NoError(err) + _, err = sqlDB.ExecContext(ctx, + `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, + "protocol_testproto2_current_state_cursor", testLedger-2) + s.Require().NoError(err) + + processor2 := &integrationTestProcessor{id: "testproto2", ingestStore: models.IngestStore, processedLedger: testLedger} + + svc2, err := services.NewIngestService(services.IngestServiceConfig{ + IngestionMode: services.IngestionModeLive, + Models: models, + LatestLedgerCursorName: "test_cursor", + OldestLedgerCursorName: "test_cursor", + AppTracker: &apptracker.MockAppTracker{}, + RPCService: s.testEnv.RPCService, + LedgerBackend: &services.LedgerBackendMock{}, + ChannelAccountStore: &store.ChannelAccountStoreMock{}, + TokenIngestionService: mockTokenIngestionService, + MetricsService: mockMetrics, + Network: "Test SDF Network ; September 2015", + NetworkPassphrase: "Test SDF Network ; September 2015", + Archive: &services.HistoryArchiveMock{}, + ProtocolProcessors: []services.ProtocolProcessor{processor2}, + }) + s.Require().NoError(err) + + _, _, err = svc2.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") + s.Require().NoError(err) + + hist2, err := models.IngestStore.Get(ctx, "protocol_testproto2_history_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger-2, hist2, "testproto2 history cursor should stay behind") + + cs2, err := models.IngestStore.Get(ctx, "protocol_testproto2_current_state_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger-2, cs2, "testproto2 current state cursor should stay behind") + + histSentinel2, err := models.IngestStore.Get(ctx, "test_testproto2_history_written") + s.Require().NoError(err) + s.Assert().Equal(uint32(0), histSentinel2, "no sentinels for behind protocol") + + csSentinel2, err := models.IngestStore.Get(ctx, "test_testproto2_current_state_written") + s.Require().NoError(err) + s.Assert().Equal(uint32(0), csSentinel2, "no sentinels for behind protocol") +} + +func TestProtocolStateProductionTestSuiteStandalone(t *testing.T) { + t.Skip("Run via TestIntegrationTests") +} diff --git a/internal/services/ingest.go b/internal/services/ingest.go index e8bb97c92..d86bc39cb 100644 --- a/internal/services/ingest.go +++ b/internal/services/ingest.go @@ -124,10 +124,6 @@ type ingestService struct { knownContractIDs set.Set[string] protocolProcessors map[string]ProtocolProcessor protocolContractCache *protocolContractCache - // eligibleProtocolProcessors is set by ingestLiveLedgers before each call - // to PersistLedgerData, scoping the CAS loop to only processors that had - // ProcessLedger called. Only accessed from the single-threaded live ingestion loop. - eligibleProtocolProcessors map[string]ProtocolProcessor } func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { @@ -146,15 +142,8 @@ func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { // Build protocol processor map from slice ppMap := make(map[string]ProtocolProcessor, len(cfg.ProtocolProcessors)) - for i, p := range cfg.ProtocolProcessors { - if p == nil { - return nil, fmt.Errorf("protocol processor at index %d is nil", i) - } - id := p.ProtocolID() - if _, exists := ppMap[id]; exists { - return nil, fmt.Errorf("duplicate protocol processor ID %q", id) - } - ppMap[id] = p + for _, p := range cfg.ProtocolProcessors { + ppMap[p.ProtocolID()] = p } var ppCache *protocolContractCache diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index ab654fc4c..15af4e7a0 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "strconv" + "sync" "time" set "github.com/deckarep/golang-set/v2" @@ -27,51 +28,12 @@ const ( ) // protocolContractCache caches classified protocol contracts to avoid per-ledger DB queries. -// Only accessed from the single-threaded live ingestion loop, so no mutex is needed. type protocolContractCache struct { + mu sync.RWMutex contractsByProtocol map[string][]data.ProtocolContracts lastRefreshLedger uint32 } -func protocolStateCursorReady(cursorValue, ledgerSeq uint32) bool { - if ledgerSeq == 0 { - return true - } - - return cursorValue >= ledgerSeq-1 -} - -// protocolProcessorsEligibleForProduction returns the processors that may persist -// history or current state for ledgerSeq. This is only a best-effort optimization: -// PersistLedgerData still performs the authoritative CAS check inside the DB -// transaction, so a later CAS loss can still skip persistence. -func (m *ingestService) protocolProcessorsEligibleForProduction(ctx context.Context, ledgerSeq uint32) (map[string]ProtocolProcessor, error) { - if len(m.protocolProcessors) == 0 { - return nil, nil - } - - eligible := make(map[string]ProtocolProcessor, len(m.protocolProcessors)) - for protocolID, processor := range m.protocolProcessors { - historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) - historyVal, err := m.models.IngestStore.Get(ctx, historyCursor) - if err != nil { - return nil, fmt.Errorf("reading history cursor for %s: %w", protocolID, err) - } - - currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) - currentStateVal, err := m.models.IngestStore.Get(ctx, currentStateCursor) - if err != nil { - return nil, fmt.Errorf("reading current state cursor for %s: %w", protocolID, err) - } - - if protocolStateCursorReady(historyVal, ledgerSeq) || protocolStateCursorReady(currentStateVal, ledgerSeq) { - eligible[protocolID] = processor - } - } - - return eligible, nil -} - // PersistLedgerData persists processed ledger data to the database in a single atomic transaction. // This is the shared core used by both live ingestion and loadtest. // It handles: trustline assets, contract tokens, filtered data insertion, channel account unlocking, @@ -165,43 +127,48 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, } // 5.5: Per-protocol dual CAS gating for state production - if len(m.eligibleProtocolProcessors) > 0 { - for protocolID, processor := range m.eligibleProtocolProcessors { - if ledgerSeq == 0 { - // No previous ledger to form an expected cursor value; skip CAS for this ledger. - continue - } + if len(m.protocolProcessors) > 0 { + for protocolID, processor := range m.protocolProcessors { historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) - expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) - next := strconv.FormatUint(uint64(ledgerSeq), 10) - // --- History State Changes --- - swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, historyCursor, expected, next) - if casErr != nil { - return fmt.Errorf("CAS history cursor for %s: %w", protocolID, casErr) + historyVal, histErr := m.models.IngestStore.Get(ctx, historyCursor) + if histErr != nil { + return fmt.Errorf("reading history cursor for %s: %w", protocolID, histErr) } - if swapped { - start := time.Now() - persistErr := processor.PersistHistory(ctx, dbTx) - m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_history", time.Since(start).Seconds()) - if persistErr != nil { - return fmt.Errorf("persisting history for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) + if historyVal >= ledgerSeq-1 { + expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) + next := strconv.FormatUint(uint64(ledgerSeq), 10) + swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, historyCursor, expected, next) + if casErr != nil { + return fmt.Errorf("CAS history cursor for %s: %w", protocolID, casErr) + } + if swapped { + if persistErr := processor.PersistHistory(ctx, dbTx); persistErr != nil { + return fmt.Errorf("persisting history for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) + } } + // CAS failed: migration already wrote them — skip } + // historyVal < ledgerSeq-1: migration hasn't caught up — skip // --- Current State --- - swapped, casErr = m.models.IngestStore.CompareAndSwap(ctx, dbTx, currentStateCursor, expected, next) - if casErr != nil { - return fmt.Errorf("CAS current state cursor for %s: %w", protocolID, casErr) + csVal, csErr := m.models.IngestStore.Get(ctx, currentStateCursor) + if csErr != nil { + return fmt.Errorf("reading current state cursor for %s: %w", protocolID, csErr) } - if swapped { - start := time.Now() - persistErr := processor.PersistCurrentState(ctx, dbTx) - m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_current_state", time.Since(start).Seconds()) - if persistErr != nil { - return fmt.Errorf("persisting current state for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) + if csVal >= ledgerSeq-1 { + expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) + next := strconv.FormatUint(uint64(ledgerSeq), 10) + swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, currentStateCursor, expected, next) + if casErr != nil { + return fmt.Errorf("CAS current state cursor for %s: %w", protocolID, casErr) + } + if swapped { + if persistErr := processor.PersistCurrentState(ctx, dbTx); persistErr != nil { + return fmt.Errorf("persisting current state for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) + } } } } @@ -322,15 +289,8 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 } m.metricsService.ObserveIngestionPhaseDuration("process_ledger", time.Since(processStart).Seconds()) - eligibleProcessors, err := m.protocolProcessorsEligibleForProduction(ctx, currentLedger) - if err != nil { - return fmt.Errorf("checking protocol state readiness for ledger %d: %w", currentLedger, err) - } - m.eligibleProtocolProcessors = eligibleProcessors - - // Run protocol state production (in-memory analysis before DB transaction) only - // for processors that may actually persist this ledger. - if err := m.produceProtocolStateForProcessors(ctx, ledgerMeta, currentLedger, eligibleProcessors); err != nil { + // Run protocol state production (in-memory analysis before DB transaction) + if err := m.produceProtocolState(ctx, ledgerMeta, currentLedger); err != nil { return fmt.Errorf("producing protocol state for ledger %d: %w", currentLedger, err) } @@ -359,11 +319,12 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 } } -func (m *ingestService) produceProtocolStateForProcessors(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32, processors map[string]ProtocolProcessor) error { - if len(processors) == 0 { +// produceProtocolState runs all registered protocol processors against a ledger. +func (m *ingestService) produceProtocolState(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32) error { + if len(m.protocolProcessors) == 0 { return nil } - for protocolID, processor := range processors { + for protocolID, processor := range m.protocolProcessors { contracts := m.getProtocolContracts(ctx, protocolID, ledgerSeq) input := ProtocolProcessorInput{ LedgerSequence: ledgerSeq, @@ -371,12 +332,9 @@ func (m *ingestService) produceProtocolStateForProcessors(ctx context.Context, l ProtocolContracts: contracts, NetworkPassphrase: m.networkPassphrase, } - start := time.Now() if err := processor.ProcessLedger(ctx, input); err != nil { - m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "process_ledger", time.Since(start).Seconds()) return fmt.Errorf("processing ledger %d for protocol %s: %w", ledgerSeq, protocolID, err) } - m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "process_ledger", time.Since(start).Seconds()) } return nil } @@ -386,41 +344,43 @@ func (m *ingestService) getProtocolContracts(ctx context.Context, protocolID str if m.protocolContractCache == nil { return nil } + m.protocolContractCache.mu.RLock() stale := m.protocolContractCache.lastRefreshLedger == 0 || (currentLedger-m.protocolContractCache.lastRefreshLedger) >= protocolContractRefreshInterval + m.protocolContractCache.mu.RUnlock() if stale { - m.metricsService.IncProtocolContractCacheAccess(protocolID, "miss") m.refreshProtocolContractCache(ctx, currentLedger) - } else { - m.metricsService.IncProtocolContractCacheAccess(protocolID, "hit") } + m.protocolContractCache.mu.RLock() + defer m.protocolContractCache.mu.RUnlock() return m.protocolContractCache.contractsByProtocol[protocolID] } // refreshProtocolContractCache reloads all protocol contracts from the DB. -// Only called from the single-threaded live ingestion loop. func (m *ingestService) refreshProtocolContractCache(ctx context.Context, currentLedger uint32) { - start := time.Now() - protocolIDs := make([]string, 0, len(m.protocolProcessors)) - for protocolID := range m.protocolProcessors { - protocolIDs = append(protocolIDs, protocolID) - } - newMap, err := m.models.ProtocolContracts.BatchGetByProtocolIDs(ctx, protocolIDs) - if err != nil { - log.Ctx(ctx).Warnf("Error refreshing protocol contract cache: %v; preserving previous entries", err) - newMap = m.protocolContractCache.contractsByProtocol + m.protocolContractCache.mu.Lock() + defer m.protocolContractCache.mu.Unlock() + + // Double-check after acquiring write lock + if m.protocolContractCache.lastRefreshLedger != 0 && + (currentLedger-m.protocolContractCache.lastRefreshLedger) < protocolContractRefreshInterval { + return } + newMap := make(map[string][]data.ProtocolContracts, len(m.protocolProcessors)) + for protocolID := range m.protocolProcessors { + contracts, err := m.models.ProtocolContracts.GetByProtocolID(ctx, protocolID) + if err != nil { + log.Ctx(ctx).Warnf("Error refreshing protocol contract cache for %s: %v", protocolID, err) + continue + } + newMap[protocolID] = contracts + } m.protocolContractCache.contractsByProtocol = newMap m.protocolContractCache.lastRefreshLedger = currentLedger - m.metricsService.ObserveProtocolContractCacheRefreshDuration(time.Since(start).Seconds()) - if err != nil { - log.Ctx(ctx).Warnf("Protocol contract cache refresh failed at ledger %d; will retry at next interval", currentLedger) - } else { - log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) - } + log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) } // ingestProcessedDataWithRetry wraps PersistLedgerData with retry logic. diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 720af440e..39bc96fff 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -2865,9 +2865,6 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return().Maybe() mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() - mockMetrics.On("ObserveProtocolStateProcessingDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() - mockMetrics.On("IncProtocolContractCacheAccess", mock.Anything, mock.Anything).Return().Maybe() - mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Maybe() models, err := data.NewModels(pool, mockMetrics) require.NoError(t, err) @@ -2906,7 +2903,6 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { // Set ingestStore after models are created, and simulate ProcessLedger processor.ingestStore = models.IngestStore processor.processedLedger = 100 - svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) setupProtocolCursors(t, ctx, pool, "testproto", 99, 99) @@ -2939,7 +2935,6 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { ctx, svc, models, pool := setupTest(t, []ProtocolProcessor{processor}) processor.ingestStore = models.IngestStore processor.processedLedger = 100 - svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) setupProtocolCursors(t, ctx, pool, "testproto", 100, 100) @@ -2972,7 +2967,6 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { ctx, svc, models, pool := setupTest(t, []ProtocolProcessor{processor}) processor.ingestStore = models.IngestStore processor.processedLedger = 100 - svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) setupProtocolCursors(t, ctx, pool, "testproto", 98, 98) @@ -3005,7 +2999,6 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { ctx, svc, models, pool := setupTest(t, []ProtocolProcessor{processor}) processor.ingestStore = models.IngestStore processor.processedLedger = 100 - svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) // No protocol cursors inserted — simulates first run @@ -3048,207 +3041,3 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { assert.Equal(t, uint32(100), mainCursor) }) } - -func Test_protocolStateCursorReady(t *testing.T) { - t.Parallel() - - testCases := []struct { - name string - cursorValue uint32 - ledgerSeq uint32 - want bool - }{ - {name: "ledger zero", cursorValue: 0, ledgerSeq: 0, want: true}, - {name: "cursor at previous ledger", cursorValue: 99, ledgerSeq: 100, want: true}, - {name: "cursor ahead", cursorValue: 100, ledgerSeq: 100, want: true}, - {name: "cursor behind", cursorValue: 98, ledgerSeq: 100, want: false}, - {name: "missing row semantics", cursorValue: 0, ledgerSeq: 100, want: false}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.want, protocolStateCursorReady(tc.cursorValue, tc.ledgerSeq)) - }) - } -} - -func Test_ingestService_produceProtocolStateForProcessors_ProcessesOnlyProvidedProcessors(t *testing.T) { - t.Parallel() - - ctx := context.Background() - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("ObserveProtocolStateProcessingDuration", "selected", "process_ledger", mock.Anything).Return().Once() - mockMetrics.On("IncProtocolContractCacheAccess", "selected", "hit").Return().Once() - - selectedProcessor := NewProtocolProcessorMock(t) - expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} - selectedProcessor.On("ProcessLedger", ctx, mock.MatchedBy(func(input ProtocolProcessorInput) bool { - return input.LedgerSequence == 123 && - input.NetworkPassphrase == "test-passphrase" && - reflect.DeepEqual(input.ProtocolContracts, expectedContracts) - })).Return(nil).Once() - - svc := &ingestService{ - metricsService: mockMetrics, - networkPassphrase: "test-passphrase", - protocolContractCache: &protocolContractCache{ - contractsByProtocol: map[string][]data.ProtocolContracts{ - "selected": expectedContracts, - }, - lastRefreshLedger: 123, - }, - } - - err := svc.produceProtocolStateForProcessors(ctx, xdr.LedgerCloseMeta{}, 123, map[string]ProtocolProcessor{ - "selected": selectedProcessor, - }) - require.NoError(t, err) - - mockMetrics.AssertExpectations(t) -} - -// produceProtocolState runs all registered protocol processors against a ledger. -func (m *ingestService) produceProtocolState(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32) error { - return m.produceProtocolStateForProcessors(ctx, ledgerMeta, ledgerSeq, m.protocolProcessors) -} - -func Test_ingestService_produceProtocolState_RecordsMetrics(t *testing.T) { - t.Parallel() - - ctx := context.Background() - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("ObserveProtocolStateProcessingDuration", "testproto", "process_ledger", mock.Anything).Return().Once() - mockMetrics.On("IncProtocolContractCacheAccess", "testproto", "hit").Return().Once() - - processor := NewProtocolProcessorMock(t) - expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} - processor.On("ProcessLedger", ctx, mock.MatchedBy(func(input ProtocolProcessorInput) bool { - return input.LedgerSequence == 123 && - input.NetworkPassphrase == "test-passphrase" && - reflect.DeepEqual(input.ProtocolContracts, expectedContracts) - })).Return(nil).Once() - - svc := &ingestService{ - metricsService: mockMetrics, - networkPassphrase: "test-passphrase", - protocolProcessors: map[string]ProtocolProcessor{ - "testproto": processor, - }, - protocolContractCache: &protocolContractCache{ - contractsByProtocol: map[string][]data.ProtocolContracts{ - "testproto": expectedContracts, - }, - lastRefreshLedger: 123, - }, - } - - err := svc.produceProtocolState(ctx, xdr.LedgerCloseMeta{}, 123) - require.NoError(t, err) - mockMetrics.AssertExpectations(t) -} - -func Test_ingestService_getProtocolContracts_RefreshesAndRecordsMetrics(t *testing.T) { - t.Parallel() - - ctx := context.Background() - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("IncProtocolContractCacheAccess", "testproto", "miss").Return().Once() - mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() - - protocolContractsModel := data.NewProtocolContractsModelMock(t) - expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} - protocolContractsModel.On("BatchGetByProtocolIDs", ctx, []string{"testproto"}). - Return(map[string][]data.ProtocolContracts{"testproto": expectedContracts}, nil).Once() - - svc := &ingestService{ - metricsService: mockMetrics, - models: &data.Models{ - ProtocolContracts: protocolContractsModel, - }, - protocolProcessors: map[string]ProtocolProcessor{ - "testproto": NewProtocolProcessorMock(t), - }, - protocolContractCache: &protocolContractCache{ - contractsByProtocol: make(map[string][]data.ProtocolContracts), - }, - } - - contracts := svc.getProtocolContracts(ctx, "testproto", 100) - assert.Equal(t, expectedContracts, contracts) - mockMetrics.AssertExpectations(t) -} - -func Test_ingestService_refreshProtocolContractCache_Failure_StillUpdatesLedger(t *testing.T) { - t.Parallel() - - ctx := context.Background() - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("IncProtocolContractCacheAccess", "proto_a", "miss").Return().Once() - mockMetrics.On("IncProtocolContractCacheAccess", "proto_a", "hit").Return().Once() - mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() - - protocolContractsModel := data.NewProtocolContractsModelMock(t) - protocolContractsModel.On("BatchGetByProtocolIDs", ctx, mock.AnythingOfType("[]string")). - Return(nil, fmt.Errorf("db error")).Once() - - svc := &ingestService{ - metricsService: mockMetrics, - models: &data.Models{ProtocolContracts: protocolContractsModel}, - protocolProcessors: map[string]ProtocolProcessor{ - "proto_a": NewProtocolProcessorMock(t), - "proto_b": NewProtocolProcessorMock(t), - }, - protocolContractCache: &protocolContractCache{ - contractsByProtocol: make(map[string][]data.ProtocolContracts), - }, - } - - // First call triggers refresh (cache is empty, so stale) - svc.getProtocolContracts(ctx, "proto_a", 200) - - // lastRefreshLedger must advance despite failure - assert.Equal(t, uint32(200), svc.protocolContractCache.lastRefreshLedger) - - // Calling again at currentLedger+1 should be a cache hit (not stale yet). - // The .Once() expectations on the mock ensure no extra DB calls happen. - svc.getProtocolContracts(ctx, "proto_a", 201) - - mockMetrics.AssertExpectations(t) -} - -func Test_ingestService_refreshProtocolContractCache_Failure_PreservesPreviousEntries(t *testing.T) { - t.Parallel() - - ctx := context.Background() - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() - - previousContracts := map[string][]data.ProtocolContracts{ - "proto_a": {{ContractID: types.HashBytea(txHash1)}}, - "proto_b": {{ContractID: types.HashBytea(txHash2)}}, - } - - protocolContractsModel := data.NewProtocolContractsModelMock(t) - protocolContractsModel.On("BatchGetByProtocolIDs", ctx, mock.AnythingOfType("[]string")). - Return(nil, fmt.Errorf("db error")).Once() - - svc := &ingestService{ - metricsService: mockMetrics, - models: &data.Models{ProtocolContracts: protocolContractsModel}, - protocolProcessors: map[string]ProtocolProcessor{ - "proto_a": NewProtocolProcessorMock(t), - "proto_b": NewProtocolProcessorMock(t), - }, - protocolContractCache: &protocolContractCache{ - contractsByProtocol: previousContracts, - lastRefreshLedger: 0, // force refresh - }, - } - - svc.refreshProtocolContractCache(ctx, 300) - - // All previous entries preserved on failure - assert.Equal(t, previousContracts, svc.protocolContractCache.contractsByProtocol) - - mockMetrics.AssertExpectations(t) -} From 403b1fc5d9be749003833687e6e08c46070ae40b Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Tue, 17 Mar 2026 16:09:20 -0600 Subject: [PATCH 29/52] test: consolidate data migration integration coverage Combine protocol setup and protocol state tests into a shared DataMigrationTestSuite. Use real SEP41 setup classification plus manual cursor seeding to verify live ingestion produces protocol history/current state only when the protocol cursors are ready, and stays inert when they are absent. --- internal/integrationtests/main_test.go | 5 - .../protocol_state_production_test.go | 261 ------------------ internal/services/ingest_live.go | 82 +++++- internal/services/ingest_test.go | 128 +++++++++ 4 files changed, 202 insertions(+), 274 deletions(-) delete mode 100644 internal/integrationtests/protocol_state_production_test.go diff --git a/internal/integrationtests/main_test.go b/internal/integrationtests/main_test.go index 3620de85c..c1119e799 100644 --- a/internal/integrationtests/main_test.go +++ b/internal/integrationtests/main_test.go @@ -66,11 +66,6 @@ func TestIntegrationTests(t *testing.T) { suite.Run(t, &DataMigrationTestSuite{testEnv: testEnv}) }) - // Protocol state production tests — dual CAS gating during live ingestion - t.Run("ProtocolStateProductionTestSuite", func(t *testing.T) { - suite.Run(t, &ProtocolStateProductionTestSuite{testEnv: testEnv}) - }) - t.Run("BuildAndSubmitTransactionsTestSuite", func(t *testing.T) { suite.Run(t, &BuildAndSubmitTransactionsTestSuite{ testEnv: testEnv, diff --git a/internal/integrationtests/protocol_state_production_test.go b/internal/integrationtests/protocol_state_production_test.go deleted file mode 100644 index fe9ac1ab1..000000000 --- a/internal/integrationtests/protocol_state_production_test.go +++ /dev/null @@ -1,261 +0,0 @@ -package integrationtests - -import ( - "context" - "database/sql" - "fmt" - "testing" - - "github.com/jackc/pgx/v5" - _ "github.com/lib/pq" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - - "github.com/stellar/wallet-backend/internal/apptracker" - "github.com/stellar/wallet-backend/internal/data" - "github.com/stellar/wallet-backend/internal/db" - "github.com/stellar/wallet-backend/internal/indexer" - "github.com/stellar/wallet-backend/internal/integrationtests/infrastructure" - "github.com/stellar/wallet-backend/internal/metrics" - "github.com/stellar/wallet-backend/internal/services" - "github.com/stellar/wallet-backend/internal/signing/store" -) - -// --- ProtocolStateProductionTestSuite (requires Docker DB + live ingest) --- - -type ProtocolStateProductionTestSuite struct { - suite.Suite - testEnv *infrastructure.TestEnvironment -} - -func (s *ProtocolStateProductionTestSuite) setupDB() (db.ConnectionPool, func()) { - ctx := context.Background() - dbURL, err := s.testEnv.Containers.GetWalletDBConnectionString(ctx) - s.Require().NoError(err) - pool, err := db.OpenDBConnectionPool(dbURL) - s.Require().NoError(err) - return pool, func() { pool.Close() } -} - -func (s *ProtocolStateProductionTestSuite) setupModels(pool db.ConnectionPool) *data.Models { - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetrics.On("ObserveDBBatchSize", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return() - mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return() - mockMetrics.On("RegisterPoolMetrics", mock.Anything, mock.Anything).Return() - models, err := data.NewModels(pool, mockMetrics) - s.Require().NoError(err) - return models -} - -func (s *ProtocolStateProductionTestSuite) cleanupTestKeys(ctx context.Context, dbURL string) { - sqlDB, err := sql.Open("postgres", dbURL) - s.Require().NoError(err) - defer sqlDB.Close() - - _, err = sqlDB.ExecContext(ctx, `DELETE FROM ingest_store WHERE key LIKE 'test_%' OR key LIKE 'protocol_testproto%'`) - s.Require().NoError(err) -} - -// integrationTestProcessor implements services.ProtocolProcessor using the real -// IngestStoreModel to write sentinel keys within the DB transaction. -type integrationTestProcessor struct { - id string - processedLedger uint32 - ingestStore *data.IngestStoreModel -} - -func (p *integrationTestProcessor) ProtocolID() string { return p.id } - -func (p *integrationTestProcessor) ProcessLedger(_ context.Context, input services.ProtocolProcessorInput) error { - p.processedLedger = input.LedgerSequence - return nil -} - -func (p *integrationTestProcessor) PersistHistory(ctx context.Context, dbTx pgx.Tx) error { - return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_history_written", p.id), p.processedLedger) -} - -func (p *integrationTestProcessor) PersistCurrentState(ctx context.Context, dbTx pgx.Tx) error { - return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_current_state_written", p.id), p.processedLedger) -} - -// TestDualCASGatingDuringLiveIngestion proves CAS gating works against the -// Docker DB that has been populated by the real ingest container. -func (s *ProtocolStateProductionTestSuite) TestDualCASGatingDuringLiveIngestion() { - ctx := context.Background() - pool, cleanup := s.setupDB() - defer cleanup() - models := s.setupModels(pool) - - dbURL, err := s.testEnv.Containers.GetWalletDBConnectionString(ctx) - s.Require().NoError(err) - defer s.cleanupTestKeys(ctx, dbURL) - - // Read current latest_ingest_ledger to know where the live container is - latestLedger, err := models.IngestStore.Get(ctx, "latest_ingest_ledger") - s.Require().NoError(err) - s.Require().Greater(latestLedger, uint32(0), "live ingest should have populated latest_ingest_ledger") - s.T().Logf("Live ingest container is at ledger %d", latestLedger) - - // Pick a test ledger well beyond the live ingest tip to avoid collision - testLedger := latestLedger + 1000 - - // Insert protocol cursors at testLedger-1 (ready for CAS win) - sqlDB, err := sql.Open("postgres", dbURL) - s.Require().NoError(err) - defer sqlDB.Close() - - _, err = sqlDB.ExecContext(ctx, - `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - "protocol_testproto_history_cursor", testLedger-1) - s.Require().NoError(err) - _, err = sqlDB.ExecContext(ctx, - `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - "protocol_testproto_current_state_cursor", testLedger-1) - s.Require().NoError(err) - - // Insert a test-specific main cursor (avoid interfering with real ingest) - _, err = sqlDB.ExecContext(ctx, - `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - "test_cursor", testLedger-1) - s.Require().NoError(err) - - processor := &integrationTestProcessor{id: "testproto", ingestStore: models.IngestStore, processedLedger: testLedger} - - mockTokenIngestionService := services.NewTokenIngestionServiceMock(s.T()) - mockTokenIngestionService.On("ProcessTokenChanges", - mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, - ).Return(nil).Maybe() - - mockMetrics := metrics.NewMockMetricsService() - mockMetrics.On("RegisterPoolMetrics", mock.Anything, mock.Anything).Return() - mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() - mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return().Maybe() - mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() - - svc, err := services.NewIngestService(services.IngestServiceConfig{ - IngestionMode: services.IngestionModeLive, - Models: models, - LatestLedgerCursorName: "test_cursor", - OldestLedgerCursorName: "test_cursor", - AppTracker: &apptracker.MockAppTracker{}, - RPCService: s.testEnv.RPCService, - LedgerBackend: &services.LedgerBackendMock{}, - ChannelAccountStore: &store.ChannelAccountStoreMock{}, - TokenIngestionService: mockTokenIngestionService, - MetricsService: mockMetrics, - Network: "Test SDF Network ; September 2015", - NetworkPassphrase: "Test SDF Network ; September 2015", - Archive: &services.HistoryArchiveMock{}, - ProtocolProcessors: []services.ProtocolProcessor{processor}, - }) - s.Require().NoError(err) - - buffer := indexer.NewIndexerBuffer() - - // Phase 1: CAS Win — cursors at testLedger-1, persisting testLedger - s.T().Log("Phase 1: CAS win") - _, _, err = svc.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") - s.Require().NoError(err) - - histCursor, err := models.IngestStore.Get(ctx, "protocol_testproto_history_cursor") - s.Require().NoError(err) - s.Assert().Equal(testLedger, histCursor, "history cursor should advance to testLedger") - - csCursor, err := models.IngestStore.Get(ctx, "protocol_testproto_current_state_cursor") - s.Require().NoError(err) - s.Assert().Equal(testLedger, csCursor, "current state cursor should advance to testLedger") - - histSentinel, err := models.IngestStore.Get(ctx, "test_testproto_history_written") - s.Require().NoError(err) - s.Assert().Equal(testLedger, histSentinel, "history sentinel should be testLedger") - - csSentinel, err := models.IngestStore.Get(ctx, "test_testproto_current_state_written") - s.Require().NoError(err) - s.Assert().Equal(testLedger, csSentinel, "current state sentinel should be testLedger") - - // Phase 2: CAS Lose — same ledger again, CAS expects testLedger-1 but finds testLedger - s.T().Log("Phase 2: CAS lose (same ledger again)") - _, _, err = svc.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") - s.Require().NoError(err) - - // Cursors should still be at testLedger - histCursor, err = models.IngestStore.Get(ctx, "protocol_testproto_history_cursor") - s.Require().NoError(err) - s.Assert().Equal(testLedger, histCursor, "history cursor should remain at testLedger after CAS lose") - - csCursor, err = models.IngestStore.Get(ctx, "protocol_testproto_current_state_cursor") - s.Require().NoError(err) - s.Assert().Equal(testLedger, csCursor, "current state cursor should remain at testLedger after CAS lose") - - // Delete sentinels, re-run, and verify they are NOT re-written - _, err = sqlDB.ExecContext(ctx, `DELETE FROM ingest_store WHERE key LIKE 'test_testproto_%_written'`) - s.Require().NoError(err) - - _, _, err = svc.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") - s.Require().NoError(err) - - histSentinel, err = models.IngestStore.Get(ctx, "test_testproto_history_written") - s.Require().NoError(err) - s.Assert().Equal(uint32(0), histSentinel, "sentinels should NOT be re-written after CAS lose") - - csSentinel, err = models.IngestStore.Get(ctx, "test_testproto_current_state_written") - s.Require().NoError(err) - s.Assert().Equal(uint32(0), csSentinel, "sentinels should NOT be re-written after CAS lose") - - // Phase 3: Cursor behind — second protocol at testLedger-2 - s.T().Log("Phase 3: Cursor behind (second protocol)") - _, err = sqlDB.ExecContext(ctx, - `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - "protocol_testproto2_history_cursor", testLedger-2) - s.Require().NoError(err) - _, err = sqlDB.ExecContext(ctx, - `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - "protocol_testproto2_current_state_cursor", testLedger-2) - s.Require().NoError(err) - - processor2 := &integrationTestProcessor{id: "testproto2", ingestStore: models.IngestStore, processedLedger: testLedger} - - svc2, err := services.NewIngestService(services.IngestServiceConfig{ - IngestionMode: services.IngestionModeLive, - Models: models, - LatestLedgerCursorName: "test_cursor", - OldestLedgerCursorName: "test_cursor", - AppTracker: &apptracker.MockAppTracker{}, - RPCService: s.testEnv.RPCService, - LedgerBackend: &services.LedgerBackendMock{}, - ChannelAccountStore: &store.ChannelAccountStoreMock{}, - TokenIngestionService: mockTokenIngestionService, - MetricsService: mockMetrics, - Network: "Test SDF Network ; September 2015", - NetworkPassphrase: "Test SDF Network ; September 2015", - Archive: &services.HistoryArchiveMock{}, - ProtocolProcessors: []services.ProtocolProcessor{processor2}, - }) - s.Require().NoError(err) - - _, _, err = svc2.PersistLedgerData(ctx, testLedger, buffer, "test_cursor") - s.Require().NoError(err) - - hist2, err := models.IngestStore.Get(ctx, "protocol_testproto2_history_cursor") - s.Require().NoError(err) - s.Assert().Equal(testLedger-2, hist2, "testproto2 history cursor should stay behind") - - cs2, err := models.IngestStore.Get(ctx, "protocol_testproto2_current_state_cursor") - s.Require().NoError(err) - s.Assert().Equal(testLedger-2, cs2, "testproto2 current state cursor should stay behind") - - histSentinel2, err := models.IngestStore.Get(ctx, "test_testproto2_history_written") - s.Require().NoError(err) - s.Assert().Equal(uint32(0), histSentinel2, "no sentinels for behind protocol") - - csSentinel2, err := models.IngestStore.Get(ctx, "test_testproto2_current_state_written") - s.Require().NoError(err) - s.Assert().Equal(uint32(0), csSentinel2, "no sentinels for behind protocol") -} - -func TestProtocolStateProductionTestSuiteStandalone(t *testing.T) { - t.Skip("Run via TestIntegrationTests") -} diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 15af4e7a0..d952eb20e 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -34,6 +34,45 @@ type protocolContractCache struct { lastRefreshLedger uint32 } +func protocolStateCursorReady(cursorValue, ledgerSeq uint32) bool { + if ledgerSeq == 0 { + return true + } + + return cursorValue >= ledgerSeq-1 +} + +// protocolProcessorsEligibleForProduction returns the processors that may persist +// history or current state for ledgerSeq. This is only a best-effort optimization: +// PersistLedgerData still performs the authoritative CAS check inside the DB +// transaction, so a later CAS loss can still skip persistence. +func (m *ingestService) protocolProcessorsEligibleForProduction(ctx context.Context, ledgerSeq uint32) (map[string]ProtocolProcessor, error) { + if len(m.protocolProcessors) == 0 { + return nil, nil + } + + eligible := make(map[string]ProtocolProcessor, len(m.protocolProcessors)) + for protocolID, processor := range m.protocolProcessors { + historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) + historyVal, err := m.models.IngestStore.Get(ctx, historyCursor) + if err != nil { + return nil, fmt.Errorf("reading history cursor for %s: %w", protocolID, err) + } + + currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) + currentStateVal, err := m.models.IngestStore.Get(ctx, currentStateCursor) + if err != nil { + return nil, fmt.Errorf("reading current state cursor for %s: %w", protocolID, err) + } + + if protocolStateCursorReady(historyVal, ledgerSeq) || protocolStateCursorReady(currentStateVal, ledgerSeq) { + eligible[protocolID] = processor + } + } + + return eligible, nil +} + // PersistLedgerData persists processed ledger data to the database in a single atomic transaction. // This is the shared core used by both live ingestion and loadtest. // It handles: trustline assets, contract tokens, filtered data insertion, channel account unlocking, @@ -137,7 +176,7 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, if histErr != nil { return fmt.Errorf("reading history cursor for %s: %w", protocolID, histErr) } - if historyVal >= ledgerSeq-1 { + if protocolStateCursorReady(historyVal, ledgerSeq) { expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) next := strconv.FormatUint(uint64(ledgerSeq), 10) swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, historyCursor, expected, next) @@ -145,7 +184,10 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, return fmt.Errorf("CAS history cursor for %s: %w", protocolID, casErr) } if swapped { - if persistErr := processor.PersistHistory(ctx, dbTx); persistErr != nil { + start := time.Now() + persistErr := processor.PersistHistory(ctx, dbTx) + m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_history", time.Since(start).Seconds()) + if persistErr != nil { return fmt.Errorf("persisting history for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) } } @@ -158,7 +200,7 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, if csErr != nil { return fmt.Errorf("reading current state cursor for %s: %w", protocolID, csErr) } - if csVal >= ledgerSeq-1 { + if protocolStateCursorReady(csVal, ledgerSeq) { expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) next := strconv.FormatUint(uint64(ledgerSeq), 10) swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, currentStateCursor, expected, next) @@ -166,7 +208,10 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, return fmt.Errorf("CAS current state cursor for %s: %w", protocolID, casErr) } if swapped { - if persistErr := processor.PersistCurrentState(ctx, dbTx); persistErr != nil { + start := time.Now() + persistErr := processor.PersistCurrentState(ctx, dbTx) + m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_current_state", time.Since(start).Seconds()) + if persistErr != nil { return fmt.Errorf("persisting current state for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) } } @@ -289,8 +334,14 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 } m.metricsService.ObserveIngestionPhaseDuration("process_ledger", time.Since(processStart).Seconds()) - // Run protocol state production (in-memory analysis before DB transaction) - if err := m.produceProtocolState(ctx, ledgerMeta, currentLedger); err != nil { + eligibleProcessors, err := m.protocolProcessorsEligibleForProduction(ctx, currentLedger) + if err != nil { + return fmt.Errorf("checking protocol state readiness for ledger %d: %w", currentLedger, err) + } + + // Run protocol state production (in-memory analysis before DB transaction) only + // for processors that may actually persist this ledger. + if err := m.produceProtocolStateForProcessors(ctx, ledgerMeta, currentLedger, eligibleProcessors); err != nil { return fmt.Errorf("producing protocol state for ledger %d: %w", currentLedger, err) } @@ -321,10 +372,14 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 // produceProtocolState runs all registered protocol processors against a ledger. func (m *ingestService) produceProtocolState(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32) error { - if len(m.protocolProcessors) == 0 { + return m.produceProtocolStateForProcessors(ctx, ledgerMeta, ledgerSeq, m.protocolProcessors) +} + +func (m *ingestService) produceProtocolStateForProcessors(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32, processors map[string]ProtocolProcessor) error { + if len(processors) == 0 { return nil } - for protocolID, processor := range m.protocolProcessors { + for protocolID, processor := range processors { contracts := m.getProtocolContracts(ctx, protocolID, ledgerSeq) input := ProtocolProcessorInput{ LedgerSequence: ledgerSeq, @@ -332,9 +387,12 @@ func (m *ingestService) produceProtocolState(ctx context.Context, ledgerMeta xdr ProtocolContracts: contracts, NetworkPassphrase: m.networkPassphrase, } + start := time.Now() if err := processor.ProcessLedger(ctx, input); err != nil { + m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "process_ledger", time.Since(start).Seconds()) return fmt.Errorf("processing ledger %d for protocol %s: %w", ledgerSeq, protocolID, err) } + m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "process_ledger", time.Since(start).Seconds()) } return nil } @@ -349,6 +407,12 @@ func (m *ingestService) getProtocolContracts(ctx context.Context, protocolID str (currentLedger-m.protocolContractCache.lastRefreshLedger) >= protocolContractRefreshInterval m.protocolContractCache.mu.RUnlock() + if stale { + m.metricsService.IncProtocolContractCacheAccess(protocolID, "miss") + } else { + m.metricsService.IncProtocolContractCacheAccess(protocolID, "hit") + } + if stale { m.refreshProtocolContractCache(ctx, currentLedger) } @@ -369,6 +433,7 @@ func (m *ingestService) refreshProtocolContractCache(ctx context.Context, curren return } + start := time.Now() newMap := make(map[string][]data.ProtocolContracts, len(m.protocolProcessors)) for protocolID := range m.protocolProcessors { contracts, err := m.models.ProtocolContracts.GetByProtocolID(ctx, protocolID) @@ -380,6 +445,7 @@ func (m *ingestService) refreshProtocolContractCache(ctx context.Context, curren } m.protocolContractCache.contractsByProtocol = newMap m.protocolContractCache.lastRefreshLedger = currentLedger + m.metricsService.ObserveProtocolContractCacheRefreshDuration(time.Since(start).Seconds()) log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) } diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 39bc96fff..2aab15e3d 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -2865,6 +2865,9 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return().Maybe() mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("ObserveProtocolStateProcessingDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("IncProtocolContractCacheAccess", mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Maybe() models, err := data.NewModels(pool, mockMetrics) require.NoError(t, err) @@ -3041,3 +3044,128 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { assert.Equal(t, uint32(100), mainCursor) }) } + +func Test_protocolStateCursorReady(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + cursorValue uint32 + ledgerSeq uint32 + want bool + }{ + {name: "ledger zero", cursorValue: 0, ledgerSeq: 0, want: true}, + {name: "cursor at previous ledger", cursorValue: 99, ledgerSeq: 100, want: true}, + {name: "cursor ahead", cursorValue: 100, ledgerSeq: 100, want: true}, + {name: "cursor behind", cursorValue: 98, ledgerSeq: 100, want: false}, + {name: "missing row semantics", cursorValue: 0, ledgerSeq: 100, want: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, protocolStateCursorReady(tc.cursorValue, tc.ledgerSeq)) + }) + } +} + +func Test_ingestService_produceProtocolStateForProcessors_SkipsFilteredProtocols(t *testing.T) { + t.Parallel() + + ctx := context.Background() + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("ObserveProtocolStateProcessingDuration", "selected", "process_ledger", mock.Anything).Return().Once() + mockMetrics.On("IncProtocolContractCacheAccess", "selected", "hit").Return().Once() + + selectedProcessor := NewProtocolProcessorMock(t) + skippedProcessor := NewProtocolProcessorMock(t) + expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} + selectedProcessor.On("ProcessLedger", ctx, mock.MatchedBy(func(input ProtocolProcessorInput) bool { + return input.LedgerSequence == 123 && + input.NetworkPassphrase == "test-passphrase" && + reflect.DeepEqual(input.ProtocolContracts, expectedContracts) + })).Return(nil).Once() + + svc := &ingestService{ + metricsService: mockMetrics, + networkPassphrase: "test-passphrase", + protocolContractCache: &protocolContractCache{ + contractsByProtocol: map[string][]data.ProtocolContracts{ + "selected": expectedContracts, + }, + lastRefreshLedger: 123, + }, + } + + err := svc.produceProtocolStateForProcessors(ctx, xdr.LedgerCloseMeta{}, 123, map[string]ProtocolProcessor{ + "selected": selectedProcessor, + }) + require.NoError(t, err) + + skippedProcessor.AssertNotCalled(t, "ProcessLedger", mock.Anything, mock.Anything) + mockMetrics.AssertExpectations(t) +} + +func Test_ingestService_produceProtocolState_RecordsMetrics(t *testing.T) { + t.Parallel() + + ctx := context.Background() + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("ObserveProtocolStateProcessingDuration", "testproto", "process_ledger", mock.Anything).Return().Once() + mockMetrics.On("IncProtocolContractCacheAccess", "testproto", "hit").Return().Once() + + processor := NewProtocolProcessorMock(t) + expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} + processor.On("ProcessLedger", ctx, mock.MatchedBy(func(input ProtocolProcessorInput) bool { + return input.LedgerSequence == 123 && + input.NetworkPassphrase == "test-passphrase" && + reflect.DeepEqual(input.ProtocolContracts, expectedContracts) + })).Return(nil).Once() + + svc := &ingestService{ + metricsService: mockMetrics, + networkPassphrase: "test-passphrase", + protocolProcessors: map[string]ProtocolProcessor{ + "testproto": processor, + }, + protocolContractCache: &protocolContractCache{ + contractsByProtocol: map[string][]data.ProtocolContracts{ + "testproto": expectedContracts, + }, + lastRefreshLedger: 123, + }, + } + + err := svc.produceProtocolState(ctx, xdr.LedgerCloseMeta{}, 123) + require.NoError(t, err) + mockMetrics.AssertExpectations(t) +} + +func Test_ingestService_getProtocolContracts_RefreshesAndRecordsMetrics(t *testing.T) { + t.Parallel() + + ctx := context.Background() + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("IncProtocolContractCacheAccess", "testproto", "miss").Return().Once() + mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() + + protocolContractsModel := data.NewProtocolContractsModelMock(t) + expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} + protocolContractsModel.On("GetByProtocolID", ctx, "testproto").Return(expectedContracts, nil).Once() + + svc := &ingestService{ + metricsService: mockMetrics, + models: &data.Models{ + ProtocolContracts: protocolContractsModel, + }, + protocolProcessors: map[string]ProtocolProcessor{ + "testproto": NewProtocolProcessorMock(t), + }, + protocolContractCache: &protocolContractCache{ + contractsByProtocol: make(map[string][]data.ProtocolContracts), + }, + } + + contracts := svc.getProtocolContracts(ctx, "testproto", 100) + assert.Equal(t, expectedContracts, contracts) + mockMetrics.AssertExpectations(t) +} From 55c1c82885491e33b9675939414f60ece37efba8 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 18 Mar 2026 13:39:17 -0600 Subject: [PATCH 30/52] Validate ProtocolProcessors in NewIngestService for nil and duplicate IDs --- internal/services/ingest.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/services/ingest.go b/internal/services/ingest.go index d86bc39cb..098b3aa9e 100644 --- a/internal/services/ingest.go +++ b/internal/services/ingest.go @@ -142,8 +142,15 @@ func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { // Build protocol processor map from slice ppMap := make(map[string]ProtocolProcessor, len(cfg.ProtocolProcessors)) - for _, p := range cfg.ProtocolProcessors { - ppMap[p.ProtocolID()] = p + for i, p := range cfg.ProtocolProcessors { + if p == nil { + return nil, fmt.Errorf("protocol processor at index %d is nil", i) + } + id := p.ProtocolID() + if _, exists := ppMap[id]; exists { + return nil, fmt.Errorf("duplicate protocol processor ID %q", id) + } + ppMap[id] = p } var ppCache *protocolContractCache From 5af595e323b7398ab8b5353641f681b996291d95 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 18 Mar 2026 13:48:12 -0600 Subject: [PATCH 31/52] Remove non-transactional reads from PersistLedgerData transaction The two IngestStore.Get calls inside the RunInPgxTransaction callback read from the connection pool instead of dbTx, breaking transactional consistency and opening extra connections. They are also redundant: CompareAndSwap already handles all cursor states (at ledger-1, ahead, behind, or missing) with the correct outcome, making the pre-read guards unnecessary. --- internal/services/ingest_live.go | 57 ++++++++++++-------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index d952eb20e..7044f7858 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -171,49 +171,34 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) + expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) + next := strconv.FormatUint(uint64(ledgerSeq), 10) + // --- History State Changes --- - historyVal, histErr := m.models.IngestStore.Get(ctx, historyCursor) - if histErr != nil { - return fmt.Errorf("reading history cursor for %s: %w", protocolID, histErr) + swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, historyCursor, expected, next) + if casErr != nil { + return fmt.Errorf("CAS history cursor for %s: %w", protocolID, casErr) } - if protocolStateCursorReady(historyVal, ledgerSeq) { - expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) - next := strconv.FormatUint(uint64(ledgerSeq), 10) - swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, historyCursor, expected, next) - if casErr != nil { - return fmt.Errorf("CAS history cursor for %s: %w", protocolID, casErr) - } - if swapped { - start := time.Now() - persistErr := processor.PersistHistory(ctx, dbTx) - m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_history", time.Since(start).Seconds()) - if persistErr != nil { - return fmt.Errorf("persisting history for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) - } + if swapped { + start := time.Now() + persistErr := processor.PersistHistory(ctx, dbTx) + m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_history", time.Since(start).Seconds()) + if persistErr != nil { + return fmt.Errorf("persisting history for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) } - // CAS failed: migration already wrote them — skip } - // historyVal < ledgerSeq-1: migration hasn't caught up — skip // --- Current State --- - csVal, csErr := m.models.IngestStore.Get(ctx, currentStateCursor) - if csErr != nil { - return fmt.Errorf("reading current state cursor for %s: %w", protocolID, csErr) + swapped, casErr = m.models.IngestStore.CompareAndSwap(ctx, dbTx, currentStateCursor, expected, next) + if casErr != nil { + return fmt.Errorf("CAS current state cursor for %s: %w", protocolID, casErr) } - if protocolStateCursorReady(csVal, ledgerSeq) { - expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) - next := strconv.FormatUint(uint64(ledgerSeq), 10) - swapped, casErr := m.models.IngestStore.CompareAndSwap(ctx, dbTx, currentStateCursor, expected, next) - if casErr != nil { - return fmt.Errorf("CAS current state cursor for %s: %w", protocolID, casErr) - } - if swapped { - start := time.Now() - persistErr := processor.PersistCurrentState(ctx, dbTx) - m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_current_state", time.Since(start).Seconds()) - if persistErr != nil { - return fmt.Errorf("persisting current state for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) - } + if swapped { + start := time.Now() + persistErr := processor.PersistCurrentState(ctx, dbTx) + m.metricsService.ObserveProtocolStateProcessingDuration(protocolID, "persist_current_state", time.Since(start).Seconds()) + if persistErr != nil { + return fmt.Errorf("persisting current state for %s at ledger %d: %w", protocolID, ledgerSeq, persistErr) } } } From 0113123dd9614d95a073a1b955241b8af687240f Mon Sep 17 00:00:00 2001 From: aristides Date: Wed, 18 Mar 2026 13:50:33 -0600 Subject: [PATCH 32/52] guard against ledger sequence 0 edge case, dont treat 0 as a valid CAS Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- internal/services/ingest_live.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 7044f7858..a3b3a547b 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -168,6 +168,10 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, // 5.5: Per-protocol dual CAS gating for state production if len(m.protocolProcessors) > 0 { for protocolID, processor := range m.protocolProcessors { + if ledgerSeq == 0 { + // No previous ledger to form an expected cursor value; skip CAS for this ledger. + continue + } historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) From 9de69772a6b84eb910c86a1cbe37deedce720420 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 18 Mar 2026 13:55:03 -0600 Subject: [PATCH 33/52] Preserve protocol contract cache entries on partial refresh failure When GetByProtocolID fails for a protocol during cache refresh, preserve the previously cached contracts instead of silently dropping them. Only advance lastRefreshLedger when all protocols refresh successfully so transient DB errors trigger a retry on the next ledger rather than serving empty data for the next 100 ledgers. --- internal/services/ingest_live.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index a3b3a547b..1e43c2528 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -424,16 +424,23 @@ func (m *ingestService) refreshProtocolContractCache(ctx context.Context, curren start := time.Now() newMap := make(map[string][]data.ProtocolContracts, len(m.protocolProcessors)) + allSucceeded := true for protocolID := range m.protocolProcessors { contracts, err := m.models.ProtocolContracts.GetByProtocolID(ctx, protocolID) if err != nil { - log.Ctx(ctx).Warnf("Error refreshing protocol contract cache for %s: %v", protocolID, err) + log.Ctx(ctx).Warnf("Error refreshing protocol contract cache for %s: %v; preserving previous entry", protocolID, err) + allSucceeded = false + if prev, ok := m.protocolContractCache.contractsByProtocol[protocolID]; ok { + newMap[protocolID] = prev + } continue } newMap[protocolID] = contracts } m.protocolContractCache.contractsByProtocol = newMap - m.protocolContractCache.lastRefreshLedger = currentLedger + if allSucceeded { + m.protocolContractCache.lastRefreshLedger = currentLedger + } m.metricsService.ObserveProtocolContractCacheRefreshDuration(time.Since(start).Seconds()) log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) } From f0d2ad4d14cc865db0aad4fe44d61bada5182221 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 18 Mar 2026 13:58:27 -0600 Subject: [PATCH 34/52] Remove vacuous skippedProcessor assertion from produceProtocolStateForProcessors test The skippedProcessor mock was never included in the processors map passed to produceProtocolStateForProcessors, making AssertNotCalled trivially true. Removed the unused mock and renamed the test to accurately reflect what it verifies: that the function processes only the processors provided in the map. --- internal/services/ingest_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 2aab15e3d..42c59e943 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -3068,7 +3068,7 @@ func Test_protocolStateCursorReady(t *testing.T) { } } -func Test_ingestService_produceProtocolStateForProcessors_SkipsFilteredProtocols(t *testing.T) { +func Test_ingestService_produceProtocolStateForProcessors_ProcessesOnlyProvidedProcessors(t *testing.T) { t.Parallel() ctx := context.Background() @@ -3077,7 +3077,6 @@ func Test_ingestService_produceProtocolStateForProcessors_SkipsFilteredProtocols mockMetrics.On("IncProtocolContractCacheAccess", "selected", "hit").Return().Once() selectedProcessor := NewProtocolProcessorMock(t) - skippedProcessor := NewProtocolProcessorMock(t) expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} selectedProcessor.On("ProcessLedger", ctx, mock.MatchedBy(func(input ProtocolProcessorInput) bool { return input.LedgerSequence == 123 && @@ -3101,7 +3100,6 @@ func Test_ingestService_produceProtocolStateForProcessors_SkipsFilteredProtocols }) require.NoError(t, err) - skippedProcessor.AssertNotCalled(t, "ProcessLedger", mock.Anything, mock.Anything) mockMetrics.AssertExpectations(t) } From f1cf92cd9f7160305c276e54a58d292007664919 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 18 Mar 2026 14:38:41 -0600 Subject: [PATCH 35/52] Fix TOCTOU gap, missing metrics, and lock-during-IO in live ingestion Scope PersistLedgerData's CAS loop to eligibleProtocolProcessors so processors that were skipped by protocolProcessorsEligibleForProduction cannot win a CAS race if a migration advances the cursor between the pre-check and the in-transaction swap. Add IncDBQueryError calls on rows.Scan and rows.Err failure paths in GetByProtocolID, GetByIDs, and GetClassified so scan/iteration errors are surfaced in metrics the same way Query() errors already are. Move DB queries outside the write lock in refreshProtocolContractCache so concurrent readers are not blocked for the full DB round-trip. --- internal/data/protocol_contracts.go | 2 ++ internal/data/protocols.go | 2 ++ internal/services/ingest.go | 4 ++++ internal/services/ingest_live.go | 27 +++++++++++++++++++-------- internal/services/ingest_test.go | 4 ++++ 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/internal/data/protocol_contracts.go b/internal/data/protocol_contracts.go index db10e307e..33a16754d 100644 --- a/internal/data/protocol_contracts.go +++ b/internal/data/protocol_contracts.go @@ -107,11 +107,13 @@ func (m *ProtocolContractsModel) GetByProtocolID(ctx context.Context, protocolID for rows.Next() { var c ProtocolContracts if err := rows.Scan(&c.ContractID, &c.WasmHash, &c.Name, &c.CreatedAt); err != nil { + m.MetricsService.IncDBQueryError("GetByProtocolID", "protocol_contracts", utils.GetDBErrorType(err)) return nil, fmt.Errorf("scanning protocol contract row: %w", err) } contracts = append(contracts, c) } if err := rows.Err(); err != nil { + m.MetricsService.IncDBQueryError("GetByProtocolID", "protocol_contracts", utils.GetDBErrorType(err)) return nil, fmt.Errorf("iterating protocol contract rows: %w", err) } diff --git a/internal/data/protocols.go b/internal/data/protocols.go index 2731bbd97..cd77250a8 100644 --- a/internal/data/protocols.go +++ b/internal/data/protocols.go @@ -129,11 +129,13 @@ func (m *ProtocolsModel) GetClassified(ctx context.Context) ([]Protocols, error) for rows.Next() { var p Protocols if err := rows.Scan(&p.ID, &p.ClassificationStatus, &p.HistoryMigrationStatus, &p.CurrentStateMigrationStatus, &p.CreatedAt, &p.UpdatedAt); err != nil { + m.MetricsService.IncDBQueryError("GetClassified", "protocols", utils.GetDBErrorType(err)) return nil, fmt.Errorf("scanning classified protocol row: %w", err) } protocols = append(protocols, p) } if err := rows.Err(); err != nil { + m.MetricsService.IncDBQueryError("GetClassified", "protocols", utils.GetDBErrorType(err)) return nil, fmt.Errorf("iterating classified protocol rows: %w", err) } diff --git a/internal/services/ingest.go b/internal/services/ingest.go index 098b3aa9e..e8bb97c92 100644 --- a/internal/services/ingest.go +++ b/internal/services/ingest.go @@ -124,6 +124,10 @@ type ingestService struct { knownContractIDs set.Set[string] protocolProcessors map[string]ProtocolProcessor protocolContractCache *protocolContractCache + // eligibleProtocolProcessors is set by ingestLiveLedgers before each call + // to PersistLedgerData, scoping the CAS loop to only processors that had + // ProcessLedger called. Only accessed from the single-threaded live ingestion loop. + eligibleProtocolProcessors map[string]ProtocolProcessor } func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 1e43c2528..6a3b52fb7 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -166,8 +166,8 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, } // 5.5: Per-protocol dual CAS gating for state production - if len(m.protocolProcessors) > 0 { - for protocolID, processor := range m.protocolProcessors { + if len(m.eligibleProtocolProcessors) > 0 { + for protocolID, processor := range m.eligibleProtocolProcessors { if ledgerSeq == 0 { // No previous ledger to form an expected cursor value; skip CAS for this ledger. continue @@ -327,6 +327,7 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 if err != nil { return fmt.Errorf("checking protocol state readiness for ledger %d: %w", currentLedger, err) } + m.eligibleProtocolProcessors = eligibleProcessors // Run protocol state production (in-memory analysis before DB transaction) only // for processors that may actually persist this ledger. @@ -412,16 +413,22 @@ func (m *ingestService) getProtocolContracts(ctx context.Context, protocolID str } // refreshProtocolContractCache reloads all protocol contracts from the DB. +// The write lock is held only to check staleness and swap the new data in, +// keeping DB queries outside the lock to avoid blocking concurrent readers. func (m *ingestService) refreshProtocolContractCache(ctx context.Context, currentLedger uint32) { + // 1. Check staleness under write lock, copy previous data for fallback m.protocolContractCache.mu.Lock() - defer m.protocolContractCache.mu.Unlock() - - // Double-check after acquiring write lock - if m.protocolContractCache.lastRefreshLedger != 0 && - (currentLedger-m.protocolContractCache.lastRefreshLedger) < protocolContractRefreshInterval { + stale := m.protocolContractCache.lastRefreshLedger == 0 || + (currentLedger-m.protocolContractCache.lastRefreshLedger) >= protocolContractRefreshInterval + if !stale { + m.protocolContractCache.mu.Unlock() return } + // Snapshot previous entries for fallback on partial failure + prevContracts := m.protocolContractCache.contractsByProtocol + m.protocolContractCache.mu.Unlock() + // 2. Fetch new data outside the lock start := time.Now() newMap := make(map[string][]data.ProtocolContracts, len(m.protocolProcessors)) allSucceeded := true @@ -430,13 +437,17 @@ func (m *ingestService) refreshProtocolContractCache(ctx context.Context, curren if err != nil { log.Ctx(ctx).Warnf("Error refreshing protocol contract cache for %s: %v; preserving previous entry", protocolID, err) allSucceeded = false - if prev, ok := m.protocolContractCache.contractsByProtocol[protocolID]; ok { + if prev, ok := prevContracts[protocolID]; ok { newMap[protocolID] = prev } continue } newMap[protocolID] = contracts } + + // 3. Swap under write lock + m.protocolContractCache.mu.Lock() + defer m.protocolContractCache.mu.Unlock() m.protocolContractCache.contractsByProtocol = newMap if allSucceeded { m.protocolContractCache.lastRefreshLedger = currentLedger diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 42c59e943..ffd5879d3 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -2906,6 +2906,7 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { // Set ingestStore after models are created, and simulate ProcessLedger processor.ingestStore = models.IngestStore processor.processedLedger = 100 + svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) setupProtocolCursors(t, ctx, pool, "testproto", 99, 99) @@ -2938,6 +2939,7 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { ctx, svc, models, pool := setupTest(t, []ProtocolProcessor{processor}) processor.ingestStore = models.IngestStore processor.processedLedger = 100 + svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) setupProtocolCursors(t, ctx, pool, "testproto", 100, 100) @@ -2970,6 +2972,7 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { ctx, svc, models, pool := setupTest(t, []ProtocolProcessor{processor}) processor.ingestStore = models.IngestStore processor.processedLedger = 100 + svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) setupProtocolCursors(t, ctx, pool, "testproto", 98, 98) @@ -3002,6 +3005,7 @@ func Test_PersistLedgerData_ProtocolCASGating(t *testing.T) { ctx, svc, models, pool := setupTest(t, []ProtocolProcessor{processor}) processor.ingestStore = models.IngestStore processor.processedLedger = 100 + svc.eligibleProtocolProcessors = map[string]ProtocolProcessor{"testproto": processor} setupDBCursors(t, ctx, pool, 99, 99) // No protocol cursors inserted — simulates first run From ab14a8ac9ea8661d17bd9bd3692e917fc1ec5380 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Wed, 18 Mar 2026 14:48:28 -0600 Subject: [PATCH 36/52] Fix query storm from partial protocol contract cache refresh failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When any GetByProtocolID call fails during cache refresh, lastRefreshLedger was never updated, causing the staleness check to trigger on every ledger instead of every 100th — a 100x query amplification. Make the ledger update unconditional since the cache already preserves previous entries on partial failure, so data integrity is not at risk. Add warn-level logging to distinguish partial from full refreshes. --- internal/data/mocks.go | 35 -------- .../2026-02-20.0-protocol_wasms.sql | 9 --- .../2026-03-09.0-protocol_contracts.sql | 12 --- internal/services/checkpoint_test.go | 3 - internal/services/ingest_live.go | 32 ++------ internal/services/ingest_test.go | 79 +++++++++++++++++++ internal/services/token_ingestion_test.go | 73 ----------------- 7 files changed, 85 insertions(+), 158 deletions(-) delete mode 100644 internal/db/migrations/2026-02-20.0-protocol_wasms.sql delete mode 100644 internal/db/migrations/2026-03-09.0-protocol_contracts.sql diff --git a/internal/data/mocks.go b/internal/data/mocks.go index d23bcc646..0e8aa6b08 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -5,7 +5,6 @@ package data import ( "context" - "github.com/google/uuid" "github.com/jackc/pgx/v5" "github.com/stretchr/testify/mock" @@ -189,40 +188,6 @@ func (m *SACBalanceModelMock) BatchCopy(ctx context.Context, dbTx pgx.Tx, balanc return args.Error(0) } -// AccountContractTokensModelMock is a mock implementation of AccountContractTokensModelInterface. -type AccountContractTokensModelMock struct { - mock.Mock -} - -var _ AccountContractTokensModelInterface = (*AccountContractTokensModelMock)(nil) - -// NewAccountContractTokensModelMock creates a new instance of AccountContractTokensModelMock. -func NewAccountContractTokensModelMock(t interface { - mock.TestingT - Cleanup(func()) -}, -) *AccountContractTokensModelMock { - mockModel := &AccountContractTokensModelMock{} - mockModel.Mock.Test(t) - - t.Cleanup(func() { mockModel.AssertExpectations(t) }) - - return mockModel -} - -func (m *AccountContractTokensModelMock) GetByAccount(ctx context.Context, accountAddress string) ([]*Contract, error) { - args := m.Called(ctx, accountAddress) - if args.Get(0) == nil { - return nil, args.Error(1) - } - return args.Get(0).([]*Contract), args.Error(1) -} - -func (m *AccountContractTokensModelMock) BatchInsert(ctx context.Context, dbTx pgx.Tx, contractsByAccount map[string][]uuid.UUID) error { - args := m.Called(ctx, dbTx, contractsByAccount) - return args.Error(0) -} - // ProtocolWasmsModelMock is a mock implementation of ProtocolWasmsModelInterface. type ProtocolWasmsModelMock struct { mock.Mock diff --git a/internal/db/migrations/2026-02-20.0-protocol_wasms.sql b/internal/db/migrations/2026-02-20.0-protocol_wasms.sql deleted file mode 100644 index a9b1af269..000000000 --- a/internal/db/migrations/2026-02-20.0-protocol_wasms.sql +++ /dev/null @@ -1,9 +0,0 @@ --- +migrate Up -CREATE TABLE protocol_wasms ( - wasm_hash BYTEA PRIMARY KEY, - protocol_id TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - --- +migrate Down -DROP TABLE IF EXISTS protocol_wasms; diff --git a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql b/internal/db/migrations/2026-03-09.0-protocol_contracts.sql deleted file mode 100644 index a3d055e38..000000000 --- a/internal/db/migrations/2026-03-09.0-protocol_contracts.sql +++ /dev/null @@ -1,12 +0,0 @@ --- +migrate Up -CREATE TABLE protocol_contracts ( - contract_id BYTEA PRIMARY KEY, - wasm_hash BYTEA NOT NULL REFERENCES protocol_wasms(wasm_hash), - name TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_protocol_contracts_wasm_hash ON protocol_contracts(wasm_hash); - --- +migrate Down -DROP TABLE IF EXISTS protocol_contracts; diff --git a/internal/services/checkpoint_test.go b/internal/services/checkpoint_test.go index 3451311f9..58315feb0 100644 --- a/internal/services/checkpoint_test.go +++ b/internal/services/checkpoint_test.go @@ -210,9 +210,6 @@ func TestCheckpointService_PopulateFromCheckpoint_ContractCodeEntry(t *testing.T // finalize -> persistProtocolWasms inserts the tracked WASM hash f.protocolWasmModel.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() - // finalize -> persistProtocolWasms inserts the tracked WASM hash - f.protocolWasmModel.On("BatchInsert", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() - err := f.svc.PopulateFromCheckpoint(context.Background(), 100, func(_ pgx.Tx) error { return nil }) require.NoError(t, err) } diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 6a3b52fb7..21669903f 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -123,28 +123,6 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, } } - // 2.5: Persist protocol wasms and contracts - protocolWasms := buffer.GetProtocolWasms() - if len(protocolWasms) > 0 { - wasmSlice := make([]data.ProtocolWasms, 0, len(protocolWasms)) - for _, wasm := range protocolWasms { - wasmSlice = append(wasmSlice, wasm) - } - if txErr = m.models.ProtocolWasms.BatchInsert(ctx, dbTx, wasmSlice); txErr != nil { - return fmt.Errorf("inserting protocol wasms for ledger %d: %w", ledgerSeq, txErr) - } - } - protocolContracts := buffer.GetProtocolContracts() - if len(protocolContracts) > 0 { - contractSlice := make([]data.ProtocolContracts, 0, len(protocolContracts)) - for _, contract := range protocolContracts { - contractSlice = append(contractSlice, contract) - } - if txErr = m.models.ProtocolContracts.BatchInsert(ctx, dbTx, contractSlice); txErr != nil { - return fmt.Errorf("inserting protocol contracts for ledger %d: %w", ledgerSeq, txErr) - } - } - // 3. Insert transactions/operations/state_changes numTxs, numOps, txErr = m.insertIntoDB(ctx, dbTx, buffer) if txErr != nil { @@ -449,11 +427,13 @@ func (m *ingestService) refreshProtocolContractCache(ctx context.Context, curren m.protocolContractCache.mu.Lock() defer m.protocolContractCache.mu.Unlock() m.protocolContractCache.contractsByProtocol = newMap - if allSucceeded { - m.protocolContractCache.lastRefreshLedger = currentLedger - } + m.protocolContractCache.lastRefreshLedger = currentLedger m.metricsService.ObserveProtocolContractCacheRefreshDuration(time.Since(start).Seconds()) - log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) + if !allSucceeded { + log.Ctx(ctx).Warnf("Protocol contract cache partially refreshed at ledger %d; will retry at next interval", currentLedger) + } else { + log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) + } } // ingestProcessedDataWithRetry wraps PersistLedgerData with retry logic. diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index ffd5879d3..5b0ab82f7 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -3171,3 +3171,82 @@ func Test_ingestService_getProtocolContracts_RefreshesAndRecordsMetrics(t *testi assert.Equal(t, expectedContracts, contracts) mockMetrics.AssertExpectations(t) } + +func Test_ingestService_refreshProtocolContractCache_PartialFailure_StillUpdatesLedger(t *testing.T) { + t.Parallel() + + ctx := context.Background() + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() + + protocolContractsModel := data.NewProtocolContractsModelMock(t) + protocolContractsModel.On("GetByProtocolID", ctx, "proto_ok"). + Return([]data.ProtocolContracts{{ContractID: types.HashBytea(txHash1)}}, nil).Once() + protocolContractsModel.On("GetByProtocolID", ctx, "proto_fail"). + Return(nil, fmt.Errorf("db error")).Once() + + svc := &ingestService{ + metricsService: mockMetrics, + models: &data.Models{ProtocolContracts: protocolContractsModel}, + protocolProcessors: map[string]ProtocolProcessor{ + "proto_ok": NewProtocolProcessorMock(t), + "proto_fail": NewProtocolProcessorMock(t), + }, + protocolContractCache: &protocolContractCache{ + contractsByProtocol: make(map[string][]data.ProtocolContracts), + }, + } + + svc.refreshProtocolContractCache(ctx, 200) + + // lastRefreshLedger must advance despite partial failure + assert.Equal(t, uint32(200), svc.protocolContractCache.lastRefreshLedger) + + // Calling again at currentLedger+1 should be a no-op (not stale yet). + // The .Once() expectations on the mock ensure no extra DB calls happen. + svc.refreshProtocolContractCache(ctx, 201) + + mockMetrics.AssertExpectations(t) +} + +func Test_ingestService_refreshProtocolContractCache_PartialFailure_PreservesPreviousEntries(t *testing.T) { + t.Parallel() + + ctx := context.Background() + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() + + previousContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1)}} + newContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash2)}} + + protocolContractsModel := data.NewProtocolContractsModelMock(t) + protocolContractsModel.On("GetByProtocolID", ctx, "proto_ok"). + Return(newContracts, nil).Once() + protocolContractsModel.On("GetByProtocolID", ctx, "proto_fail"). + Return(nil, fmt.Errorf("db error")).Once() + + svc := &ingestService{ + metricsService: mockMetrics, + models: &data.Models{ProtocolContracts: protocolContractsModel}, + protocolProcessors: map[string]ProtocolProcessor{ + "proto_ok": NewProtocolProcessorMock(t), + "proto_fail": NewProtocolProcessorMock(t), + }, + protocolContractCache: &protocolContractCache{ + contractsByProtocol: map[string][]data.ProtocolContracts{ + "proto_ok": previousContracts, + "proto_fail": previousContracts, + }, + lastRefreshLedger: 0, // force refresh + }, + } + + svc.refreshProtocolContractCache(ctx, 300) + + // Successful protocol gets new data + assert.Equal(t, newContracts, svc.protocolContractCache.contractsByProtocol["proto_ok"]) + // Failed protocol retains previous entries + assert.Equal(t, previousContracts, svc.protocolContractCache.contractsByProtocol["proto_fail"]) + + mockMetrics.AssertExpectations(t) +} diff --git a/internal/services/token_ingestion_test.go b/internal/services/token_ingestion_test.go index a150012f9..5b42e2184 100644 --- a/internal/services/token_ingestion_test.go +++ b/internal/services/token_ingestion_test.go @@ -3,7 +3,6 @@ package services import ( "context" - "errors" "testing" "github.com/jackc/pgx/v5" @@ -380,75 +379,3 @@ func TestProcessTokenChanges(t *testing.T) { assert.NoError(t, err) }) } - -func TestTokenProcessor_ProcessContractCode(t *testing.T) { - ctx := context.Background() - - t.Run("valid_sep41_contract", func(t *testing.T) { - contractValidatorMock := NewContractValidatorMock(t) - tp := &tokenProcessor{ - contractValidator: contractValidatorMock, - data: checkpointData{ - contractTypesByWasmHash: make(map[xdr.Hash]types.ContractType), - }, - } - - hash := xdr.Hash{1, 2, 3} - code := []byte{0xDE, 0xAD} - contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code). - Return(types.ContractTypeSEP41, nil).Once() - - err := tp.ProcessContractCode(ctx, hash, code) - require.NoError(t, err) - assert.Equal(t, types.ContractTypeSEP41, tp.data.contractTypesByWasmHash[hash]) - assert.Equal(t, 1, tp.entries) - }) - - t.Run("validator_error_skips_entry", func(t *testing.T) { - contractValidatorMock := NewContractValidatorMock(t) - tp := &tokenProcessor{ - contractValidator: contractValidatorMock, - data: checkpointData{ - contractTypesByWasmHash: make(map[xdr.Hash]types.ContractType), - }, - } - - hash := xdr.Hash{4, 5, 6} - code := []byte{0xBA, 0xD0} - contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code). - Return(types.ContractTypeUnknown, errors.New("invalid WASM")).Once() - - err := tp.ProcessContractCode(ctx, hash, code) - require.NoError(t, err, "validator error should not propagate") - assert.Empty(t, tp.data.contractTypesByWasmHash, "no entry should be stored on error") - assert.Equal(t, 0, tp.entries, "entries counter should not be incremented") - }) - - t.Run("multiple_contract_codes", func(t *testing.T) { - contractValidatorMock := NewContractValidatorMock(t) - tp := &tokenProcessor{ - contractValidator: contractValidatorMock, - data: checkpointData{ - contractTypesByWasmHash: make(map[xdr.Hash]types.ContractType), - }, - } - - hash1 := xdr.Hash{10} - code1 := []byte{0x01} - hash2 := xdr.Hash{20} - code2 := []byte{0x02} - - contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code1). - Return(types.ContractTypeSEP41, nil).Once() - contractValidatorMock.On("ValidateFromContractCode", mock.Anything, code2). - Return(types.ContractTypeSEP41, nil).Once() - - require.NoError(t, tp.ProcessContractCode(ctx, hash1, code1)) - require.NoError(t, tp.ProcessContractCode(ctx, hash2, code2)) - - assert.Len(t, tp.data.contractTypesByWasmHash, 2) - assert.Equal(t, types.ContractTypeSEP41, tp.data.contractTypesByWasmHash[hash1]) - assert.Equal(t, types.ContractTypeSEP41, tp.data.contractTypesByWasmHash[hash2]) - assert.Equal(t, 2, tp.entries) - }) -} From b19dc6609239e6d71ecf09efba3c497173958a0e Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Thu, 19 Mar 2026 14:21:01 -0600 Subject: [PATCH 37/52] Add protocol history migration service and asymmetric CAS integration tests Introduce ProtocolMigrateHistoryService that backfills protocol state changes for historical ledgers, walking forward from the oldest ingest cursor to the latest cursor and persisting PersistHistory at each ledger. The service tracks progress via a per-protocol history_cursor using CAS, refreshes the protocol contract cache periodically, and marks history_migration_status on completion. Supporting changes: - Add `protocol-data-migrate` CLI command (cmd/protocol_data_migrate.go) - Add UpdateHistoryMigrationStatus to ProtocolsModel and its mock/tests - Add per-call tracking (persistedHistorySeqs, persistedCurrentStateSeqs) to integrationTestProcessor for verifying persistence call counts Integration test additions: - Enhance TestHistoryMigrationThenLiveIngestionHandoff with per-ledger PersistHistory verification across migration and live handoff phases - Add TestLiveIngestionHistoryCursorReadyCurrentStateLags proving the asymmetric cursor CAS path: when history_cursor is ready but current_state_cursor lags, only PersistHistory executes while PersistCurrentState is correctly skipped --- cmd/protocol_data_migrate.go | 156 ++++ cmd/root.go | 1 + internal/data/ingest_store.go | 5 + internal/data/mocks.go | 5 + internal/data/protocols.go | 25 + internal/data/protocols_test.go | 28 + .../integrationtests/data_migration_test.go | 323 ++++++- internal/services/protocol_migrate_history.go | 411 +++++++++ .../services/protocol_migrate_history_test.go | 808 ++++++++++++++++++ 9 files changed, 1749 insertions(+), 13 deletions(-) create mode 100644 cmd/protocol_data_migrate.go create mode 100644 internal/services/protocol_migrate_history.go create mode 100644 internal/services/protocol_migrate_history_test.go diff --git a/cmd/protocol_data_migrate.go b/cmd/protocol_data_migrate.go new file mode 100644 index 000000000..769b18047 --- /dev/null +++ b/cmd/protocol_data_migrate.go @@ -0,0 +1,156 @@ +package cmd + +import ( + "context" + "fmt" + + _ "github.com/lib/pq" + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/support/config" + "github.com/stellar/go-stellar-sdk/support/log" + + "github.com/stellar/wallet-backend/cmd/utils" + "github.com/stellar/wallet-backend/internal/data" + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/metrics" + "github.com/stellar/wallet-backend/internal/services" + internalutils "github.com/stellar/wallet-backend/internal/utils" +) + +type protocolMigrateCmd struct{} + +func (c *protocolMigrateCmd) Command() *cobra.Command { + cmd := &cobra.Command{ + Use: "protocol-migrate", + Short: "Data migration commands for protocol state", + Long: "Parent command for protocol data migrations. Use subcommands to run specific migration tasks.", + Run: func(cmd *cobra.Command, args []string) { + _ = cmd.Help() + }, + } + + cmd.AddCommand(c.historyCommand()) + + return cmd +} + +func (c *protocolMigrateCmd) historyCommand() *cobra.Command { + var databaseURL string + var rpcURL string + var networkPassphrase string + var protocolIDs []string + var logLevel string + + cfgOpts := config.ConfigOptions{ + utils.DatabaseURLOption(&databaseURL), + utils.RPCURLOption(&rpcURL), + utils.NetworkPassphraseOption(&networkPassphrase), + } + + cmd := &cobra.Command{ + Use: "history", + Short: "Backfill protocol history state from oldest to latest ingested ledger", + Long: "Processes historical ledgers from oldest_ingest_ledger to the tip, producing protocol state changes and converging with live ingestion via CAS-gated cursors.", + PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + if err := cfgOpts.RequireE(); err != nil { + return fmt.Errorf("requiring values of config options: %w", err) + } + if err := cfgOpts.SetValues(); err != nil { + return fmt.Errorf("setting values of config options: %w", err) + } + + if logLevel != "" { + ll, err := logrus.ParseLevel(logLevel) + if err != nil { + return fmt.Errorf("invalid log level %q: %w", logLevel, err) + } + log.DefaultLogger.SetLevel(ll) + } + + if len(protocolIDs) == 0 { + return fmt.Errorf("at least one --protocol-id is required") + } + return nil + }, + RunE: func(_ *cobra.Command, _ []string) error { + return c.RunHistory(databaseURL, rpcURL, networkPassphrase, protocolIDs) + }, + } + + if err := cfgOpts.Init(cmd); err != nil { + log.Fatalf("Error initializing a config option: %s", err.Error()) + } + + cmd.Flags().StringSliceVar(&protocolIDs, "protocol-id", nil, "Protocol ID(s) to migrate (required, repeatable)") + cmd.Flags().StringVar(&logLevel, "log-level", "", `Log level: "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "PANIC"`) + + return cmd +} + +func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase string, protocolIDs []string) error { + ctx := context.Background() + + // Build processors from protocol IDs using the dynamic registry + var processors []services.ProtocolProcessor + for _, pid := range protocolIDs { + factory, ok := services.GetProcessor(pid) + if !ok { + return fmt.Errorf("unknown protocol ID %q — no processor registered", pid) + } + p := factory() + if p == nil { + return fmt.Errorf("processor factory for protocol %q returned nil", pid) + } + processors = append(processors, p) + } + + // Open DB connection + dbPool, err := db.OpenDBConnectionPool(databaseURL) + if err != nil { + return fmt.Errorf("opening database connection: %w", err) + } + defer internalutils.DeferredClose(ctx, dbPool, "closing dbPool in protocol migrate history") + + // Create models + sqlxDB, err := dbPool.SqlxDB(ctx) + if err != nil { + return fmt.Errorf("getting sqlx DB: %w", err) + } + metricsService := metrics.NewMetricsService(sqlxDB) + models, err := data.NewModels(dbPool, metricsService) + if err != nil { + return fmt.Errorf("creating models: %w", err) + } + + // Create ledger backend for fetching historical ledgers + ledgerBackend := ledgerbackend.NewRPCLedgerBackend(ledgerbackend.RPCLedgerBackendOptions{ + RPCServerURL: rpcURL, + BufferSize: 10, + }) + defer func() { + if closeErr := ledgerBackend.Close(); closeErr != nil { + log.Ctx(ctx).Errorf("error closing ledger backend: %v", closeErr) + } + }() + + service, err := services.NewProtocolMigrateHistoryService(services.ProtocolMigrateHistoryConfig{ + DB: dbPool, + LedgerBackend: ledgerBackend, + ProtocolsModel: models.Protocols, + ProtocolContractsModel: models.ProtocolContracts, + IngestStore: models.IngestStore, + NetworkPassphrase: networkPassphrase, + Processors: processors, + }) + if err != nil { + return fmt.Errorf("creating protocol migrate history service: %w", err) + } + + if err := service.Run(ctx, protocolIDs); err != nil { + return fmt.Errorf("running protocol migrate history: %w", err) + } + + return nil +} diff --git a/cmd/root.go b/cmd/root.go index ff0aa4aca..ba6c10bc0 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -53,4 +53,5 @@ func SetupCLI(cfg RootConfig) { rootCmd.AddCommand((&distributionAccountCmd{}).Command()) rootCmd.AddCommand((&loadtestCmd{}).Command()) rootCmd.AddCommand((&protocolSetupCmd{}).Command()) + rootCmd.AddCommand((&protocolMigrateCmd{}).Command()) } diff --git a/internal/data/ingest_store.go b/internal/data/ingest_store.go index 2d7e1951b..50e5e7bac 100644 --- a/internal/data/ingest_store.go +++ b/internal/data/ingest_store.go @@ -15,6 +15,11 @@ import ( "github.com/stellar/wallet-backend/internal/utils" ) +const ( + LatestLedgerCursorName = "latest_ingest_ledger" + OldestLedgerCursorName = "oldest_ingest_ledger" +) + type LedgerRange struct { GapStart uint32 `db:"gap_start"` GapEnd uint32 `db:"gap_end"` diff --git a/internal/data/mocks.go b/internal/data/mocks.go index 0e8aa6b08..4d2a1d640 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -253,6 +253,11 @@ func (m *ProtocolsModelMock) UpdateClassificationStatus(ctx context.Context, dbT return args.Error(0) } +func (m *ProtocolsModelMock) UpdateHistoryMigrationStatus(ctx context.Context, dbTx pgx.Tx, protocolIDs []string, status string) error { + args := m.Called(ctx, dbTx, protocolIDs, status) + return args.Error(0) +} + func (m *ProtocolsModelMock) GetByIDs(ctx context.Context, protocolIDs []string) ([]Protocols, error) { args := m.Called(ctx, protocolIDs) if args.Get(0) == nil { diff --git a/internal/data/protocols.go b/internal/data/protocols.go index cd77250a8..9841affb1 100644 --- a/internal/data/protocols.go +++ b/internal/data/protocols.go @@ -33,6 +33,7 @@ type Protocols struct { // ProtocolsModelInterface defines the interface for protocols operations. type ProtocolsModelInterface interface { UpdateClassificationStatus(ctx context.Context, dbTx pgx.Tx, protocolIDs []string, status string) error + UpdateHistoryMigrationStatus(ctx context.Context, dbTx pgx.Tx, protocolIDs []string, status string) error GetByIDs(ctx context.Context, protocolIDs []string) ([]Protocols, error) GetClassified(ctx context.Context) ([]Protocols, error) InsertIfNotExists(ctx context.Context, dbTx pgx.Tx, protocolID string) error @@ -70,6 +71,30 @@ func (m *ProtocolsModel) UpdateClassificationStatus(ctx context.Context, dbTx pg return nil } +// UpdateHistoryMigrationStatus updates history_migration_status and updated_at for the given protocol IDs. +func (m *ProtocolsModel) UpdateHistoryMigrationStatus(ctx context.Context, dbTx pgx.Tx, protocolIDs []string, status string) error { + if len(protocolIDs) == 0 { + return nil + } + + const query = ` + UPDATE protocols + SET history_migration_status = $1, updated_at = NOW() + WHERE id = ANY($2) + ` + + start := time.Now() + _, err := dbTx.Exec(ctx, query, status, protocolIDs) + if err != nil { + m.MetricsService.IncDBQueryError("UpdateHistoryMigrationStatus", "protocols", utils.GetDBErrorType(err)) + return fmt.Errorf("updating history migration status for protocols: %w", err) + } + + m.MetricsService.ObserveDBQueryDuration("UpdateHistoryMigrationStatus", "protocols", time.Since(start).Seconds()) + m.MetricsService.IncDBQuery("UpdateHistoryMigrationStatus", "protocols") + return nil +} + // GetByIDs returns protocols matching the given IDs. func (m *ProtocolsModel) GetByIDs(ctx context.Context, protocolIDs []string) ([]Protocols, error) { if len(protocolIDs) == 0 { diff --git a/internal/data/protocols_test.go b/internal/data/protocols_test.go index 096893d86..d6ee8f1f5 100644 --- a/internal/data/protocols_test.go +++ b/internal/data/protocols_test.go @@ -98,6 +98,34 @@ func TestProtocolsModel(t *testing.T) { assert.Equal(t, StatusInProgress, status) }) + t.Run("UpdateHistoryMigrationStatus updates status", func(t *testing.T) { + cleanUpDB() + mockMetricsService := metrics.NewMockMetricsService() + mockMetricsService.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return() + mockMetricsService.On("IncDBQuery", mock.Anything, mock.Anything).Return() + defer mockMetricsService.AssertExpectations(t) + + model := &ProtocolsModel{DB: dbConnectionPool, MetricsService: mockMetricsService} + + // Insert protocol first + err := db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.InsertIfNotExists(ctx, dbTx, "SEP41") + }) + require.NoError(t, err) + + // Update status + err = db.RunInPgxTransaction(ctx, dbConnectionPool, func(dbTx pgx.Tx) error { + return model.UpdateHistoryMigrationStatus(ctx, dbTx, []string{"SEP41"}, StatusInProgress) + }) + require.NoError(t, err) + + // Verify + var status string + err = dbConnectionPool.GetContext(ctx, &status, `SELECT history_migration_status FROM protocols WHERE id = 'SEP41'`) + require.NoError(t, err) + assert.Equal(t, StatusInProgress, status) + }) + t.Run("GetByIDs returns matching protocols", func(t *testing.T) { cleanUpDB() mockMetricsService := metrics.NewMockMetricsService() diff --git a/internal/integrationtests/data_migration_test.go b/internal/integrationtests/data_migration_test.go index 15d96f4a6..fe29d5390 100644 --- a/internal/integrationtests/data_migration_test.go +++ b/internal/integrationtests/data_migration_test.go @@ -23,6 +23,8 @@ import ( "github.com/stellar/wallet-backend/internal/services" ) +const sep41ProtocolID = "SEP41" + type DataMigrationTestSuite struct { suite.Suite testEnv *infrastructure.TestEnvironment @@ -94,7 +96,7 @@ func (s *DataMigrationTestSuite) ingestStoreKeyExists(ctx context.Context, pool func (s *DataMigrationTestSuite) runSEP41ProtocolSetup(ctx context.Context, pool db.ConnectionPool, models *data.Models) { err := db.RunInPgxTransaction(ctx, pool, func(dbTx pgx.Tx) error { - return models.Protocols.InsertIfNotExists(ctx, dbTx, "SEP41") + return models.Protocols.InsertIfNotExists(ctx, dbTx, sep41ProtocolID) }) s.Require().NoError(err) @@ -110,7 +112,7 @@ func (s *DataMigrationTestSuite) runSEP41ProtocolSetup(ctx context.Context, pool []services.ProtocolValidator{validator}, ) - s.Require().NoError(svc.Run(ctx, []string{"SEP41"})) + s.Require().NoError(svc.Run(ctx, []string{sep41ProtocolID})) } func (s *DataMigrationTestSuite) newServiceMetricsMock() *metrics.MockMetricsService { @@ -181,11 +183,42 @@ func (b *singleLedgerBackend) Close() error { return nil } +type rangeLedgerBackend struct { + startSeq, endSeq uint32 + ledgerMeta xdr.LedgerCloseMeta +} + +func (b *rangeLedgerBackend) GetLatestLedgerSequence(context.Context) (uint32, error) { + return b.endSeq, nil +} + +func (b *rangeLedgerBackend) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, error) { + if sequence >= b.startSeq && sequence <= b.endSeq { + return b.ledgerMeta, nil + } + <-ctx.Done() + return xdr.LedgerCloseMeta{}, ctx.Err() +} + +func (b *rangeLedgerBackend) PrepareRange(context.Context, ledgerbackend.Range) error { + return nil +} + +func (b *rangeLedgerBackend) IsPrepared(context.Context, ledgerbackend.Range) (bool, error) { + return true, nil +} + +func (b *rangeLedgerBackend) Close() error { + return nil +} + type integrationTestProcessor struct { - id string - processedLedger uint32 - seenContracts []data.ProtocolContracts - ingestStore *data.IngestStoreModel + id string + processedLedger uint32 + seenContracts []data.ProtocolContracts + ingestStore *data.IngestStoreModel + persistedHistorySeqs []uint32 + persistedCurrentStateSeqs []uint32 } func (p *integrationTestProcessor) ProtocolID() string { return p.id } @@ -197,10 +230,12 @@ func (p *integrationTestProcessor) ProcessLedger(_ context.Context, input servic } func (p *integrationTestProcessor) PersistHistory(ctx context.Context, dbTx pgx.Tx) error { + p.persistedHistorySeqs = append(p.persistedHistorySeqs, p.processedLedger) return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_history_written", p.id), p.processedLedger) } func (p *integrationTestProcessor) PersistCurrentState(ctx context.Context, dbTx pgx.Tx) error { + p.persistedCurrentStateSeqs = append(p.persistedCurrentStateSeqs, p.processedLedger) return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_current_state_written", p.id), p.processedLedger) } @@ -237,6 +272,25 @@ func (s *DataMigrationTestSuite) newLiveRunService( return svc } +func (s *DataMigrationTestSuite) newHistoryMigrationService( + pool db.ConnectionPool, + models *data.Models, + ledgerBackend ledgerbackend.LedgerBackend, + processor services.ProtocolProcessor, +) services.ProtocolMigrateHistoryService { + svc, err := services.NewProtocolMigrateHistoryService(services.ProtocolMigrateHistoryConfig{ + DB: pool, + LedgerBackend: ledgerBackend, + ProtocolsModel: models.Protocols, + ProtocolContractsModel: models.ProtocolContracts, + IngestStore: models.IngestStore, + NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []services.ProtocolProcessor{processor}, + }) + s.Require().NoError(err) + return svc +} + func protocolContractKeys(contracts []data.ProtocolContracts) []string { keys := make([]string, len(contracts)) for i, contract := range contracts { @@ -267,7 +321,7 @@ func (s *DataMigrationTestSuite) TestProtocolSetupClassifiesIngestedWasms() { s.Assert().Greater(classifiedCount, 0, "at least one WASM should have been classified as SEP41") s.T().Logf("Classified %d/%d WASMs as SEP41", classifiedCount, totalWasms) - protocols, err := models.Protocols.GetByIDs(ctx, []string{"SEP41"}) + protocols, err := models.Protocols.GetByIDs(ctx, []string{sep41ProtocolID}) s.Require().NoError(err) s.Require().Len(protocols, 1) s.Assert().Equal(data.StatusSuccess, protocols[0].ClassificationStatus) @@ -297,7 +351,7 @@ func (s *DataMigrationTestSuite) TestLiveIngestionProcessesSetupClassifiedSEP41W s.runSEP41ProtocolSetup(ctx, pool, models) - classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, "SEP41") + classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, sep41ProtocolID) s.Require().NoError(err) s.Require().NotEmpty(classifiedContracts, "setup-classified SEP41 contracts should be queryable for live ingestion") expectedContractKeys := protocolContractKeys(classifiedContracts) @@ -307,7 +361,7 @@ func (s *DataMigrationTestSuite) TestLiveIngestionProcessesSetupClassifiedSEP41W s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_history_cursor", testLedger-1) s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_current_state_cursor", testLedger-1) - processor := &integrationTestProcessor{id: "SEP41", ingestStore: models.IngestStore} + processor := &integrationTestProcessor{id: sep41ProtocolID, ingestStore: models.IngestStore} rpcService := services.NewRPCServiceMock(s.T()) rpcService.On("GetHealth").Return(entities.RPCGetHealthResult{ Status: "healthy", @@ -391,14 +445,14 @@ func (s *DataMigrationTestSuite) TestLiveIngestionSkipsSetupClassifiedSEP41Witho s.runSEP41ProtocolSetup(ctx, pool, models) - classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, "SEP41") + classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, sep41ProtocolID) s.Require().NoError(err) s.Require().NotEmpty(classifiedContracts, "setup-classified SEP41 contracts should be queryable for live ingestion") const cursorName = "test_live_run_cursor" s.upsertIngestStoreValue(ctx, pool, cursorName, testLedger-1) - processor := &integrationTestProcessor{id: "SEP41", ingestStore: models.IngestStore} + processor := &integrationTestProcessor{id: sep41ProtocolID, ingestStore: models.IngestStore} rpcService := services.NewRPCServiceMock(s.T()) rpcService.On("GetHealth").Return(entities.RPCGetHealthResult{ Status: "healthy", @@ -467,7 +521,7 @@ func (s *DataMigrationTestSuite) TestLiveIngestionSkipsSetupClassifiedSEP41WhenP s.runSEP41ProtocolSetup(ctx, pool, models) - classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, "SEP41") + classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, sep41ProtocolID) s.Require().NoError(err) s.Require().NotEmpty(classifiedContracts, "setup-classified SEP41 contracts should be queryable for live ingestion") @@ -476,7 +530,7 @@ func (s *DataMigrationTestSuite) TestLiveIngestionSkipsSetupClassifiedSEP41WhenP s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_history_cursor", testLedger-2) s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_current_state_cursor", testLedger-2) - processor := &integrationTestProcessor{id: "SEP41", ingestStore: models.IngestStore} + processor := &integrationTestProcessor{id: sep41ProtocolID, ingestStore: models.IngestStore} rpcService := services.NewRPCServiceMock(s.T()) rpcService.On("GetHealth").Return(entities.RPCGetHealthResult{ Status: "healthy", @@ -537,6 +591,249 @@ func (s *DataMigrationTestSuite) TestLiveIngestionSkipsSetupClassifiedSEP41WhenP } } +func (s *DataMigrationTestSuite) TestHistoryMigrationThenLiveIngestionHandoff() { + ctx := context.Background() + pool, cleanup := s.setupDB() + defer cleanup() + + models := s.setupModels(pool) + + latestLedger, err := models.IngestStore.Get(ctx, "latest_ingest_ledger") + s.Require().NoError(err) + s.Require().Greater(latestLedger, uint32(0)) + baseSeq := latestLedger + 2000 + + // Phase 1: Protocol setup — classify contracts, verify no protocol cursors yet. + s.runSEP41ProtocolSetup(ctx, pool, models) + + classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, sep41ProtocolID) + s.Require().NoError(err) + s.Require().NotEmpty(classifiedContracts, "setup should classify at least one SEP41 contract") + expectedContractKeys := protocolContractKeys(classifiedContracts) + + s.Assert().False(s.ingestStoreKeyExists(ctx, pool, "protocol_SEP41_history_cursor"), "protocol cursors should not exist after setup") + s.Assert().False(s.ingestStoreKeyExists(ctx, pool, "protocol_SEP41_current_state_cursor"), "protocol cursors should not exist after setup") + + // Phase 2: History migration — backfill 3 ledgers [baseSeq, baseSeq+2]. + s.upsertIngestStoreValue(ctx, pool, data.OldestLedgerCursorName, baseSeq) + s.upsertIngestStoreValue(ctx, pool, data.LatestLedgerCursorName, baseSeq+2) + s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_current_state_cursor", baseSeq+2) + + processor := &integrationTestProcessor{id: sep41ProtocolID, ingestStore: models.IngestStore} + + rangeBackend := &rangeLedgerBackend{ + startSeq: baseSeq, + endSeq: baseSeq + 2, + ledgerMeta: s.mustLedgerCloseMeta(), + } + + migrationSvc := s.newHistoryMigrationService( + pool, models, rangeBackend, processor, + ) + + err = migrationSvc.Run(ctx, []string{sep41ProtocolID}) + s.Require().NoError(err, "history migration should complete successfully") + + historyCursor, err := models.IngestStore.Get(ctx, "protocol_SEP41_history_cursor") + s.Require().NoError(err) + s.Assert().Equal(baseSeq+2, historyCursor, "history cursor should advance to the tip of the migration range") + + historyWritten, err := models.IngestStore.Get(ctx, "test_SEP41_history_written") + s.Require().NoError(err) + s.Assert().Equal(baseSeq+2, historyWritten, "PersistHistory should have committed data through the last migrated ledger") + + protocols, err := models.Protocols.GetByIDs(ctx, []string{sep41ProtocolID}) + s.Require().NoError(err) + s.Require().Len(protocols, 1) + s.Assert().Equal(data.StatusSuccess, protocols[0].HistoryMigrationStatus, "history migration status should be success") + + s.Assert().NotEmpty(processor.seenContracts, "processor should have seen classified contracts during history migration") + s.Assert().Equal(expectedContractKeys, protocolContractKeys(processor.seenContracts)) + s.Assert().Equal([]uint32{baseSeq, baseSeq + 1, baseSeq + 2}, processor.persistedHistorySeqs, + "PersistHistory should be called for every ledger in the migration range") + + // Phase 3: Live ingestion handoff — process baseSeq+3, proving CAS picks up where migration left off. + const liveCursorName = "test_handoff_live_cursor" + s.upsertIngestStoreValue(ctx, pool, liveCursorName, baseSeq+2) + s.upsertIngestStoreValue(ctx, pool, data.LatestLedgerCursorName, baseSeq+3) + + processor.processedLedger = 0 + processor.seenContracts = nil + processor.persistedHistorySeqs = nil + processor.persistedCurrentStateSeqs = nil + + liveBackend := &singleLedgerBackend{ + ledgerSeq: baseSeq + 3, + ledgerMeta: s.mustLedgerCloseMeta(), + } + + rpcService := services.NewRPCServiceMock(s.T()) + rpcService.On("GetHealth").Return(entities.RPCGetHealthResult{ + Status: "healthy", + LatestLedger: baseSeq + 3, + OldestLedger: 1, + }, nil).Once() + + metricsService := s.newServiceMetricsMock() + liveSvc := s.newLiveRunService(models, rpcService, liveBackend, metricsService, processor, liveCursorName) + + runCtx, cancel := context.WithCancel(ctx) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- liveSvc.Run(runCtx, 0, 0) + }() + + var earlyRunErr error + require.Eventually(s.T(), func() bool { + select { + case earlyRunErr = <-runErrCh: + return true + default: + } + + hc, err := models.IngestStore.Get(ctx, "protocol_SEP41_history_cursor") + if err != nil || hc != baseSeq+3 { + return false + } + + hw, err := models.IngestStore.Get(ctx, "test_SEP41_history_written") + if err != nil || hw != baseSeq+3 { + return false + } + + csc, err := models.IngestStore.Get(ctx, "protocol_SEP41_current_state_cursor") + if err != nil || csc != baseSeq+3 { + return false + } + + csw, err := models.IngestStore.Get(ctx, "test_SEP41_current_state_written") + return err == nil && csw == baseSeq+3 + }, 10*time.Second, 100*time.Millisecond) + + s.Require().NoError(earlyRunErr, "live Run exited before the expected DB state was committed") + s.Assert().Equal(baseSeq+3, processor.processedLedger, "live ingestion should have processed the handoff ledger") + s.Assert().Equal(expectedContractKeys, protocolContractKeys(processor.seenContracts), "live ingestion should see the same classified contracts") + s.Assert().Equal([]uint32{baseSeq + 3}, processor.persistedHistorySeqs, + "live ingestion should call PersistHistory for the handoff ledger") + s.Assert().Equal([]uint32{baseSeq + 3}, processor.persistedCurrentStateSeqs, + "live ingestion should call PersistCurrentState for the handoff ledger") + + cancel() + + select { + case err := <-runErrCh: + s.Require().Error(err) + s.Require().ErrorIs(err, context.Canceled) + case <-time.After(5 * time.Second): + s.FailNow("timed out waiting for live Run to stop after context cancellation") + } +} + +func (s *DataMigrationTestSuite) TestLiveIngestionHistoryCursorReadyCurrentStateLags() { + ctx := context.Background() + pool, cleanup := s.setupDB() + defer cleanup() + + models := s.setupModels(pool) + + latestLedger, err := models.IngestStore.Get(ctx, "latest_ingest_ledger") + s.Require().NoError(err) + s.Require().Greater(latestLedger, uint32(0)) + testLedger := latestLedger + 1000 + + // Protocol setup — classify contracts. + s.runSEP41ProtocolSetup(ctx, pool, models) + + classifiedContracts, err := models.ProtocolContracts.GetByProtocolID(ctx, sep41ProtocolID) + s.Require().NoError(err) + s.Require().NotEmpty(classifiedContracts) + + // Set up asymmetric cursors: + // history_cursor = testLedger-1 → ready (CAS expected matches) + // current_state_cursor = testLedger-2 → lags (CAS expected=testLedger-1 ≠ testLedger-2) + const cursorName = "test_asymmetric_cursor" + s.upsertIngestStoreValue(ctx, pool, cursorName, testLedger-1) + s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_history_cursor", testLedger-1) + s.upsertIngestStoreValue(ctx, pool, "protocol_SEP41_current_state_cursor", testLedger-2) + + processor := &integrationTestProcessor{id: sep41ProtocolID, ingestStore: models.IngestStore} + rpcService := services.NewRPCServiceMock(s.T()) + rpcService.On("GetHealth").Return(entities.RPCGetHealthResult{ + Status: "healthy", + LatestLedger: testLedger, + OldestLedger: 1, + }, nil).Once() + + metricsService := s.newServiceMetricsMock() + ledgerBackend := &singleLedgerBackend{ + ledgerSeq: testLedger, + ledgerMeta: s.mustLedgerCloseMeta(), + } + svc := s.newLiveRunService(models, rpcService, ledgerBackend, metricsService, processor, cursorName) + + runCtx, cancel := context.WithCancel(ctx) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- svc.Run(runCtx, 0, 0) + }() + + var earlyRunErr error + require.Eventually(s.T(), func() bool { + select { + case earlyRunErr = <-runErrCh: + return true + default: + } + + cursor, err := models.IngestStore.Get(ctx, cursorName) + return err == nil && cursor == testLedger + }, 10*time.Second, 100*time.Millisecond) + + s.Require().NoError(earlyRunErr, "live Run exited before the main ingest cursor advanced") + + // ProcessLedger WAS called — OR precheck passed because history cursor was ready. + s.Assert().Equal(testLedger, processor.processedLedger, + "ProcessLedger should run when at least one protocol cursor is ready (OR precheck)") + + // History CAS succeeded — cursor advanced and PersistHistory was called. + historyCursor, err := models.IngestStore.Get(ctx, "protocol_SEP41_history_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger, historyCursor, + "history cursor should advance when its CAS succeeds independently") + + historyWritten, err := models.IngestStore.Get(ctx, "test_SEP41_history_written") + s.Require().NoError(err) + s.Assert().Equal(testLedger, historyWritten, + "PersistHistory should commit when history CAS succeeds") + s.Assert().Equal([]uint32{testLedger}, processor.persistedHistorySeqs, + "PersistHistory should be called exactly once for the processed ledger") + + // Current-state CAS failed — cursor unchanged, PersistCurrentState never called. + currentStateCursor, err := models.IngestStore.Get(ctx, "protocol_SEP41_current_state_cursor") + s.Require().NoError(err) + s.Assert().Equal(testLedger-2, currentStateCursor, + "current-state cursor should remain unchanged when CAS rejects mismatched expected value") + + s.Assert().False(s.ingestStoreKeyExists(ctx, pool, "test_SEP41_current_state_written"), + "PersistCurrentState should not be called when current-state CAS fails") + s.Assert().Empty(processor.persistedCurrentStateSeqs, + "persistedCurrentStateSeqs should be empty when current-state CAS fails") + + cancel() + + select { + case err := <-runErrCh: + s.Require().Error(err) + s.Require().ErrorIs(err, context.Canceled) + case <-time.After(5 * time.Second): + s.FailNow("timed out waiting for live Run to stop after context cancellation") + } +} + func TestDataMigrationTestSuiteStandalone(t *testing.T) { t.Skip("Run via TestIntegrationTests") } diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go new file mode 100644 index 000000000..77f3feb50 --- /dev/null +++ b/internal/services/protocol_migrate_history.go @@ -0,0 +1,411 @@ +package services + +import ( + "context" + "fmt" + "strconv" + "time" + + "github.com/jackc/pgx/v5" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/wallet-backend/internal/data" + "github.com/stellar/wallet-backend/internal/db" +) + +const ( + // convergencePollTimeout is the timeout for polling for new ledgers at the tip. + convergencePollTimeout = 5 * time.Second +) + +// protocolTracker holds per-protocol state for the ledger-first migration loop. +type protocolTracker struct { + protocolID string + cursorName string + cursorValue uint32 + processor ProtocolProcessor + handedOff bool +} + +// ProtocolMigrateHistoryService backfills protocol state changes for historical ledgers. +type ProtocolMigrateHistoryService interface { + Run(ctx context.Context, protocolIDs []string) error +} + +var _ ProtocolMigrateHistoryService = (*protocolMigrateHistoryService)(nil) + +type protocolMigrateHistoryService struct { + db db.ConnectionPool + ledgerBackend ledgerbackend.LedgerBackend + protocolsModel data.ProtocolsModelInterface + protocolContractsModel data.ProtocolContractsModelInterface + ingestStore *data.IngestStoreModel + networkPassphrase string + processors map[string]ProtocolProcessor +} + +// ProtocolMigrateHistoryConfig holds the configuration for creating a protocolMigrateHistoryService. +type ProtocolMigrateHistoryConfig struct { + DB db.ConnectionPool + LedgerBackend ledgerbackend.LedgerBackend + ProtocolsModel data.ProtocolsModelInterface + ProtocolContractsModel data.ProtocolContractsModelInterface + IngestStore *data.IngestStoreModel + NetworkPassphrase string + Processors []ProtocolProcessor +} + +// NewProtocolMigrateHistoryService creates a new protocolMigrateHistoryService from the given config. +func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protocolMigrateHistoryService, error) { + ppMap := make(map[string]ProtocolProcessor, len(cfg.Processors)) + for i, p := range cfg.Processors { + if p == nil { + return nil, fmt.Errorf("protocol processor at index %d is nil", i) + } + id := p.ProtocolID() + if _, exists := ppMap[id]; exists { + return nil, fmt.Errorf("duplicate protocol processor ID %q", id) + } + ppMap[id] = p + } + + return &protocolMigrateHistoryService{ + db: cfg.DB, + ledgerBackend: cfg.LedgerBackend, + protocolsModel: cfg.ProtocolsModel, + protocolContractsModel: cfg.ProtocolContractsModel, + ingestStore: cfg.IngestStore, + networkPassphrase: cfg.NetworkPassphrase, + processors: ppMap, + }, nil +} + +// Run performs history migration for the given protocol IDs. +func (s *protocolMigrateHistoryService) Run(ctx context.Context, protocolIDs []string) error { + // Phase 1: Validate + activeProtocolIDs, err := s.validate(ctx, protocolIDs) + if err != nil { + return fmt.Errorf("validating protocols: %w", err) + } + + if len(activeProtocolIDs) == 0 { + log.Ctx(ctx).Info("All protocols already completed history migration, nothing to do") + return nil + } + + if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + return s.protocolsModel.UpdateHistoryMigrationStatus(ctx, dbTx, activeProtocolIDs, data.StatusInProgress) + }); err != nil { + return fmt.Errorf("setting history migration status to in_progress: %w", err) + } + + // Phase 2: Process each protocol + if err := s.processAllProtocols(ctx, activeProtocolIDs); err != nil { + // Best-effort set status to failed + cleanupCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if txErr := db.RunInPgxTransaction(cleanupCtx, s.db, func(dbTx pgx.Tx) error { + return s.protocolsModel.UpdateHistoryMigrationStatus(cleanupCtx, dbTx, activeProtocolIDs, data.StatusFailed) + }); txErr != nil { + log.Ctx(ctx).Errorf("error setting history migration status to failed: %v", txErr) + } + return fmt.Errorf("processing protocols: %w", err) + } + + // Phase 3: Set status to success + if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + return s.protocolsModel.UpdateHistoryMigrationStatus(ctx, dbTx, activeProtocolIDs, data.StatusSuccess) + }); err != nil { + return fmt.Errorf("setting history migration status to success: %w", err) + } + + log.Ctx(ctx).Infof("History migration completed successfully for protocols: %v", activeProtocolIDs) + return nil +} + +// validate checks that all protocol IDs are valid and ready for history migration. +// Returns the list of protocol IDs that need processing (excludes already-success ones). +func (s *protocolMigrateHistoryService) validate(ctx context.Context, protocolIDs []string) ([]string, error) { + // Check each protocol has a registered processor + for _, pid := range protocolIDs { + if _, ok := s.processors[pid]; !ok { + return nil, fmt.Errorf("no processor registered for protocol %q", pid) + } + } + + // Verify all protocols exist in the DB and classification is complete + protocols, err := s.protocolsModel.GetByIDs(ctx, protocolIDs) + if err != nil { + return nil, fmt.Errorf("querying protocols: %w", err) + } + + foundSet := make(map[string]*data.Protocols, len(protocols)) + for i := range protocols { + foundSet[protocols[i].ID] = &protocols[i] + } + + var missing []string + for _, pid := range protocolIDs { + if _, ok := foundSet[pid]; !ok { + missing = append(missing, pid) + } + } + if len(missing) > 0 { + return nil, fmt.Errorf("protocols not found in DB: %v", missing) + } + + // Check classification status and filter out already-completed migrations + var active []string + for _, pid := range protocolIDs { + p := foundSet[pid] + if p.ClassificationStatus != data.StatusSuccess { + return nil, fmt.Errorf("protocol %q classification not complete (status: %s)", pid, p.ClassificationStatus) + } + if p.HistoryMigrationStatus == data.StatusSuccess { + log.Ctx(ctx).Infof("Protocol %q history migration already completed, skipping", pid) + continue + } + active = append(active, pid) + } + + return active, nil +} + +// processAllProtocols runs history migration for all protocols using ledger-first iteration. +// Each ledger is fetched once and processed by all eligible protocols, avoiding redundant RPC calls. +func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, protocolIDs []string) error { + // Read oldest_ingest_ledger + oldestLedger, err := s.ingestStore.Get(ctx, data.OldestLedgerCursorName) + if err != nil { + return fmt.Errorf("reading oldest ingest ledger: %w", err) + } + if oldestLedger == 0 { + return fmt.Errorf("ingestion has not started yet (oldest_ingest_ledger is 0)") + } + + // Initialize trackers: read/initialize cursor for each protocol + trackers := make([]*protocolTracker, 0, len(protocolIDs)) + for _, pid := range protocolIDs { + cursorName := fmt.Sprintf("protocol_%s_history_cursor", pid) + cursorValue, readErr := s.ingestStore.Get(ctx, cursorName) + if readErr != nil { + return fmt.Errorf("reading history cursor for %s: %w", pid, readErr) + } + + if cursorValue == 0 { + initValue := oldestLedger - 1 + if initErr := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + return s.ingestStore.Update(ctx, dbTx, cursorName, initValue) + }); initErr != nil { + return fmt.Errorf("initializing history cursor for %s: %w", pid, initErr) + } + cursorValue = initValue + } + + trackers = append(trackers, &protocolTracker{ + protocolID: pid, + cursorName: cursorName, + cursorValue: cursorValue, + processor: s.processors[pid], + }) + } + + // Load contracts once — all relevant contracts are in the DB before migration starts + // (validate() requires ClassificationStatus == StatusSuccess). + contractsByProtocol := make(map[string][]data.ProtocolContracts, len(trackers)) + for _, t := range trackers { + contracts, err := s.protocolContractsModel.GetByProtocolID(ctx, t.protocolID) + if err != nil { + return fmt.Errorf("loading contracts for %s: %w", t.protocolID, err) + } + contractsByProtocol[t.protocolID] = contracts + } + + for { + if allHandedOff(trackers) { + return nil + } + + latestLedger, err := s.ingestStore.Get(ctx, data.LatestLedgerCursorName) + if err != nil { + return fmt.Errorf("reading latest ingest ledger: %w", err) + } + + // Find minimum cursor among non-handed-off trackers + var minCursor uint32 + first := true + for _, t := range trackers { + if t.handedOff { + continue + } + if first || t.cursorValue < minCursor { + minCursor = t.cursorValue + first = false + } + } + + startLedger := minCursor + 1 + if startLedger > latestLedger { + log.Ctx(ctx).Infof("All protocols at or past tip %d, migration complete", latestLedger) + return nil + } + + log.Ctx(ctx).Infof("Processing ledgers %d to %d for %d protocol(s)", startLedger, latestLedger, len(protocolIDs)) + + if err := s.ledgerBackend.PrepareRange(ctx, ledgerbackend.BoundedRange(startLedger, latestLedger)); err != nil { + return fmt.Errorf("preparing ledger range [%d, %d]: %w", startLedger, latestLedger, err) + } + + for seq := startLedger; seq <= latestLedger; seq++ { + select { + case <-ctx.Done(): + return fmt.Errorf("context cancelled: %w", ctx.Err()) + default: + } + + // Skip if no tracker needs this ledger + needsFetch := false + for _, t := range trackers { + if !t.handedOff && t.cursorValue < seq { + needsFetch = true + break + } + } + if !needsFetch { + continue + } + + // Fetch ledger ONCE for all protocols + ledgerMeta, fetchErr := s.getLedgerWithRetry(ctx, seq) + if fetchErr != nil { + return fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) + } + + // Process each eligible tracker + for _, t := range trackers { + if t.handedOff || t.cursorValue >= seq { + continue + } + + contracts := contractsByProtocol[t.protocolID] + input := ProtocolProcessorInput{ + LedgerSequence: seq, + LedgerCloseMeta: ledgerMeta, + ProtocolContracts: contracts, + NetworkPassphrase: s.networkPassphrase, + } + if err := t.processor.ProcessLedger(ctx, input); err != nil { + return fmt.Errorf("processing ledger %d for protocol %s: %w", seq, t.protocolID, err) + } + + // CAS + persist in a transaction + expected := strconv.FormatUint(uint64(seq-1), 10) + next := strconv.FormatUint(uint64(seq), 10) + + var swapped bool + if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + var casErr error + swapped, casErr = s.ingestStore.CompareAndSwap(ctx, dbTx, t.cursorName, expected, next) + if casErr != nil { + return fmt.Errorf("CAS history cursor for %s: %w", t.protocolID, casErr) + } + if swapped { + return t.processor.PersistHistory(ctx, dbTx) + } + return nil + }); err != nil { + return fmt.Errorf("persisting ledger %d for protocol %s: %w", seq, t.protocolID, err) + } + + if !swapped { + log.Ctx(ctx).Infof("Protocol %s: CAS failed at ledger %d, handoff to live ingestion detected", t.protocolID, seq) + t.handedOff = true + } else { + t.cursorValue = seq + } + } + + if allHandedOff(trackers) { + return nil + } + + if seq%100 == 0 { + log.Ctx(ctx).Infof("Progress: processed ledger %d / %d", seq, latestLedger) + } + } + + if allHandedOff(trackers) { + return nil + } + + // Check if tip has advanced + newLatest, err := s.ingestStore.Get(ctx, data.LatestLedgerCursorName) + if err != nil { + return fmt.Errorf("re-reading latest ingest ledger: %w", err) + } + if newLatest > latestLedger { + continue + } + + // At tip — poll briefly for convergence + pollCtx, cancel := context.WithTimeout(ctx, convergencePollTimeout) + if err := s.ledgerBackend.PrepareRange(pollCtx, ledgerbackend.UnboundedRange(latestLedger+1)); err != nil { + cancel() + log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) + return nil + } + + _, getLedgerErr := s.ledgerBackend.GetLedger(pollCtx, latestLedger+1) + cancel() + if getLedgerErr != nil { + log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) + return nil + } + + // New ledger available, loop again + } +} + +// allHandedOff returns true if every tracker has been handed off to live ingestion. +func allHandedOff(trackers []*protocolTracker) bool { + for _, t := range trackers { + if !t.handedOff { + return false + } + } + return true +} + +// getLedgerWithRetry fetches a ledger with exponential backoff retry logic. +func (s *protocolMigrateHistoryService) getLedgerWithRetry(ctx context.Context, ledgerSeq uint32) (xdr.LedgerCloseMeta, error) { + var lastErr error + for attempt := 0; attempt < maxLedgerFetchRetries; attempt++ { + select { + case <-ctx.Done(): + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", ctx.Err()) + default: + } + + ledgerMeta, err := s.ledgerBackend.GetLedger(ctx, ledgerSeq) + if err == nil { + return ledgerMeta, nil + } + lastErr = err + + backoff := time.Duration(1< maxRetryBackoff { + backoff = maxRetryBackoff + } + log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", + ledgerSeq, attempt+1, maxLedgerFetchRetries, err, backoff) + + select { + case <-ctx.Done(): + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) + case <-time.After(backoff): + } + } + return xdr.LedgerCloseMeta{}, fmt.Errorf("failed after %d attempts: %w", maxLedgerFetchRetries, lastErr) +} diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go new file mode 100644 index 000000000..7440fad3d --- /dev/null +++ b/internal/services/protocol_migrate_history_test.go @@ -0,0 +1,808 @@ +package services + +import ( + "context" + "fmt" + "strconv" + "testing" + + "github.com/jackc/pgx/v5" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/xdr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/stellar/wallet-backend/internal/data" + "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/db/dbtest" + "github.com/stellar/wallet-backend/internal/metrics" +) + +// multiLedgerBackend is a test double that serves ledger meta for a range of ledgers. +type multiLedgerBackend struct { + ledgers map[uint32]xdr.LedgerCloseMeta +} + +func (b *multiLedgerBackend) GetLatestLedgerSequence(_ context.Context) (uint32, error) { + var max uint32 + for seq := range b.ledgers { + if seq > max { + max = seq + } + } + return max, nil +} + +func (b *multiLedgerBackend) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, error) { + if meta, ok := b.ledgers[sequence]; ok { + return meta, nil + } + <-ctx.Done() + return xdr.LedgerCloseMeta{}, ctx.Err() +} + +func (b *multiLedgerBackend) PrepareRange(context.Context, ledgerbackend.Range) error { + return nil +} + +func (b *multiLedgerBackend) IsPrepared(context.Context, ledgerbackend.Range) (bool, error) { + return true, nil +} + +func (b *multiLedgerBackend) Close() error { + return nil +} + +// recordingProcessor is a test double that records all ProcessLedger inputs +// and writes per-ledger sentinel keys to ingest_store during PersistHistory, +// proving that PersistHistory actually committed data inside the transaction. +type recordingProcessor struct { + id string + ingestStore *data.IngestStoreModel + processedInputs []ProtocolProcessorInput + persistedSeqs []uint32 + lastProcessed uint32 +} + +func (p *recordingProcessor) ProtocolID() string { return p.id } + +func (p *recordingProcessor) ProcessLedger(_ context.Context, input ProtocolProcessorInput) error { + p.processedInputs = append(p.processedInputs, input) + p.lastProcessed = input.LedgerSequence + return nil +} + +func (p *recordingProcessor) PersistHistory(ctx context.Context, dbTx pgx.Tx) error { + p.persistedSeqs = append(p.persistedSeqs, p.lastProcessed) + return p.ingestStore.Update(ctx, dbTx, fmt.Sprintf("test_%s_history_%d", p.id, p.lastProcessed), p.lastProcessed) +} + +func (p *recordingProcessor) PersistCurrentState(_ context.Context, _ pgx.Tx) error { + return nil +} + +// cursorAdvancingProcessor simulates live ingestion taking over by advancing +// its own cursor in the DB during ProcessLedger, causing the subsequent CAS to fail. +type cursorAdvancingProcessor struct { + recordingProcessor + dbPool db.ConnectionPool + advanceAtSeq uint32 +} + +func (p *cursorAdvancingProcessor) ProcessLedger(ctx context.Context, input ProtocolProcessorInput) error { + if input.LedgerSequence == p.advanceAtSeq { + _, _ = p.dbPool.ExecContext(ctx, + `UPDATE ingest_store SET value = $1 WHERE key = $2`, + strconv.FormatUint(uint64(p.advanceAtSeq+100), 10), + fmt.Sprintf("protocol_%s_history_cursor", p.id)) + } + return p.recordingProcessor.ProcessLedger(ctx, input) +} + +func getHistorySentinel(t *testing.T, ctx context.Context, dbPool db.ConnectionPool, protocolID string, seq uint32) (uint32, bool) { + t.Helper() + var val uint32 + err := dbPool.GetContext(ctx, &val, `SELECT value FROM ingest_store WHERE key = $1`, fmt.Sprintf("test_%s_history_%d", protocolID, seq)) + if err != nil { + return 0, false + } + return val, true +} + +func dummyLedgerMeta(seq uint32) xdr.LedgerCloseMeta { + return xdr.LedgerCloseMeta{ + V: 0, + V0: &xdr.LedgerCloseMetaV0{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{ + LedgerSeq: xdr.Uint32(seq), + }, + }, + }, + } +} + +func setupTestDB(t *testing.T) (db.ConnectionPool, *data.IngestStoreModel) { + t.Helper() + dbt := dbtest.Open(t) + t.Cleanup(func() { dbt.Close() }) + + dbPool, err := db.OpenDBConnectionPool(dbt.DSN) + require.NoError(t, err) + t.Cleanup(func() { dbPool.Close() }) + + mockMetrics := metrics.NewMockMetricsService() + mockMetrics.On("ObserveDBQueryDuration", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("IncDBQuery", mock.Anything, mock.Anything).Return().Maybe() + mockMetrics.On("IncDBQueryError", mock.Anything, mock.Anything, mock.Anything).Return().Maybe() + + ingestStore := &data.IngestStoreModel{DB: dbPool, MetricsService: mockMetrics} + return dbPool, ingestStore +} + +func setIngestStoreValue(t *testing.T, ctx context.Context, dbPool db.ConnectionPool, key string, value uint32) { + t.Helper() + _, err := dbPool.ExecContext(ctx, `INSERT INTO ingest_store (key, value) VALUES ($1, $2) + ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value`, key, strconv.FormatUint(uint64(value), 10)) + require.NoError(t, err) +} + +func getIngestStoreValue(t *testing.T, ctx context.Context, dbPool db.ConnectionPool, key string) uint32 { + t.Helper() + var val uint32 + err := dbPool.GetContext(ctx, &val, `SELECT value FROM ingest_store WHERE key = $1`, key) + require.NoError(t, err) + return val +} + +func TestProtocolMigrateHistory(t *testing.T) { + t.Run("happy path — single protocol, 3 ledgers, all CAS succeed", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + // Set up ingest cursors + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 102) + + // Set up protocol in DB + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + 102: dummyLedgerMeta(102), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + // Verify cursor advanced + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(102), cursorVal) + + // Verify PersistHistory actually committed sentinel values to the DB + for _, seq := range []uint32{100, 101, 102} { + val, ok := getHistorySentinel(t, ctx, dbPool, "testproto", seq) + require.True(t, ok, "sentinel for ledger %d should exist", seq) + assert.Equal(t, seq, val, "sentinel value for ledger %d", seq) + } + + // Verify processor recorded all inputs + require.Len(t, processor.processedInputs, 3) + for i, seq := range []uint32{100, 101, 102} { + assert.Equal(t, seq, processor.processedInputs[i].LedgerSequence) + assert.Equal(t, "Test SDF Network ; September 2015", processor.processedInputs[i].NetworkPassphrase) + } + assert.Equal(t, []uint32{100, 101, 102}, processor.persistedSeqs) + }) + + t.Run("CAS failure (handoff) — CAS fails at ledger N, status success", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 102) + // Pre-set cursor to 100, so processing starts at 101 + // But we'll simulate CAS failure at 101 by having someone else advance it + setIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor", 100) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil).Maybe() + + processorMock.On("ProtocolID").Return("testproto") + processorMock.On("ProcessLedger", mock.Anything, mock.Anything).Return(nil).Maybe() + processorMock.On("PersistHistory", mock.Anything, mock.Anything).Return(nil).Maybe() + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 101: dummyLedgerMeta(101), + 102: dummyLedgerMeta(102), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + // Simulate CAS failure: advance cursor externally before service runs + setIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor", 105) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) // Handoff is success + }) + + t.Run("validation: classification not complete", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + processorMock.On("ProtocolID").Return("testproto") + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusInProgress, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + + backend := &multiLedgerBackend{ledgers: map[uint32]xdr.LedgerCloseMeta{}} + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "classification not complete") + }) + + t.Run("validation: protocol not found in DB", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + processorMock.On("ProtocolID").Return("testproto") + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{}, nil) + + backend := &multiLedgerBackend{ledgers: map[uint32]xdr.LedgerCloseMeta{}} + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "not found in DB") + }) + + t.Run("validation: no processor registered for protocol", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + processorMock.On("ProtocolID").Return("otherproto") + + backend := &multiLedgerBackend{ledgers: map[uint32]xdr.LedgerCloseMeta{}} + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "no processor registered") + }) + + t.Run("resume from cursor — cursor already at N, process from N+1", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 103) + // Cursor already at 101 (previous partial run) + setIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 102: dummyLedgerMeta(102), + 103: dummyLedgerMeta(103), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(103), cursorVal) + + // Verify sentinels exist only for 102, 103 (not 100, 101) + for _, seq := range []uint32{100, 101} { + _, ok := getHistorySentinel(t, ctx, dbPool, "testproto", seq) + assert.False(t, ok, "sentinel for ledger %d should NOT exist (already processed)", seq) + } + for _, seq := range []uint32{102, 103} { + val, ok := getHistorySentinel(t, ctx, dbPool, "testproto", seq) + require.True(t, ok, "sentinel for ledger %d should exist", seq) + assert.Equal(t, seq, val, "sentinel value for ledger %d", seq) + } + assert.Equal(t, []uint32{102, 103}, processor.persistedSeqs) + }) + + t.Run("error during ProcessLedger — status failed", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusFailed).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + processorMock.On("ProtocolID").Return("testproto") + processorMock.On("ProcessLedger", mock.Anything, mock.Anything).Return(fmt.Errorf("simulated ProcessLedger error")) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "simulated ProcessLedger error") + }) + + t.Run("error during PersistHistory — tx rolls back, status failed", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 100) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusFailed).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + processorMock.On("ProtocolID").Return("testproto") + processorMock.On("ProcessLedger", mock.Anything, mock.Anything).Return(nil) + processorMock.On("PersistHistory", mock.Anything, mock.Anything).Return(fmt.Errorf("simulated PersistHistory error")) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "simulated PersistHistory error") + + // Cursor should NOT have advanced because tx rolled back + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(99), cursorVal) // initialized to oldest-1 + }) + + t.Run("already at tip — cursor equals latest, immediate success", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 105) + setIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor", 105) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + backend := &multiLedgerBackend{ledgers: map[uint32]xdr.LedgerCloseMeta{}} + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + // No processing happened — no sentinels should exist + _, ok := getHistorySentinel(t, ctx, dbPool, "testproto", 105) + assert.False(t, ok, "no sentinel should exist when already at tip") + assert.Empty(t, processor.processedInputs) + }) + + t.Run("multiple protocols — both process each ledger via shared fetch", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto1', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + _, err = dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto2', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + proc1 := &recordingProcessor{id: "proto1", ingestStore: ingestStore} + proc2 := &recordingProcessor{id: "proto2", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"proto1", "proto2"}).Return([]data.Protocols{ + {ID: "proto1", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + {ID: "proto2", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusSuccess).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto1").Return([]data.ProtocolContracts{}, nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto2").Return([]data.ProtocolContracts{}, nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{proc1, proc2}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"proto1", "proto2"}) + require.NoError(t, err) + + cursor1 := getIngestStoreValue(t, ctx, dbPool, "protocol_proto1_history_cursor") + cursor2 := getIngestStoreValue(t, ctx, dbPool, "protocol_proto2_history_cursor") + assert.Equal(t, uint32(101), cursor1) + assert.Equal(t, uint32(101), cursor2) + + // Verify each protocol has independently keyed sentinels + for _, id := range []string{"proto1", "proto2"} { + for _, seq := range []uint32{100, 101} { + val, ok := getHistorySentinel(t, ctx, dbPool, id, seq) + require.True(t, ok, "sentinel for %s ledger %d should exist", id, seq) + assert.Equal(t, seq, val, "sentinel value for %s ledger %d", id, seq) + } + } + assert.Equal(t, []uint32{100, 101}, proc1.persistedSeqs) + assert.Equal(t, []uint32{100, 101}, proc2.persistedSeqs) + }) + + t.Run("protocols at different cursors — each starts from its own position", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 50) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 102) + // proto1 cursor at 98, proto2 cursor at 100 + setIngestStoreValue(t, ctx, dbPool, "protocol_proto1_history_cursor", 98) + setIngestStoreValue(t, ctx, dbPool, "protocol_proto2_history_cursor", 100) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto1', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + _, err = dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto2', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + proc1 := &recordingProcessor{id: "proto1", ingestStore: ingestStore} + proc2 := &recordingProcessor{id: "proto2", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"proto1", "proto2"}).Return([]data.Protocols{ + {ID: "proto1", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + {ID: "proto2", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusSuccess).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto1").Return([]data.ProtocolContracts{}, nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto2").Return([]data.ProtocolContracts{}, nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 99: dummyLedgerMeta(99), + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + 102: dummyLedgerMeta(102), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{proc1, proc2}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"proto1", "proto2"}) + require.NoError(t, err) + + cursor1 := getIngestStoreValue(t, ctx, dbPool, "protocol_proto1_history_cursor") + cursor2 := getIngestStoreValue(t, ctx, dbPool, "protocol_proto2_history_cursor") + assert.Equal(t, uint32(102), cursor1) + assert.Equal(t, uint32(102), cursor2) + + // proto1 should process 99-102, proto2 should process 101-102 + require.Len(t, proc1.processedInputs, 4) + for i, seq := range []uint32{99, 100, 101, 102} { + assert.Equal(t, seq, proc1.processedInputs[i].LedgerSequence) + } + require.Len(t, proc2.processedInputs, 2) + for i, seq := range []uint32{101, 102} { + assert.Equal(t, seq, proc2.processedInputs[i].LedgerSequence) + } + + assert.Equal(t, []uint32{99, 100, 101, 102}, proc1.persistedSeqs) + assert.Equal(t, []uint32{101, 102}, proc2.persistedSeqs) + }) + + t.Run("one protocol hands off, other continues", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 102) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto1', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + _, err = dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto2', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + // proc1 advances its own cursor during ProcessLedger at seq 100, causing CAS failure + proc1 := &cursorAdvancingProcessor{ + recordingProcessor: recordingProcessor{id: "proto1", ingestStore: ingestStore}, + dbPool: dbPool, + advanceAtSeq: 100, + } + proc2 := &recordingProcessor{id: "proto2", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"proto1", "proto2"}).Return([]data.Protocols{ + {ID: "proto1", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + {ID: "proto2", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusSuccess).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto1").Return([]data.ProtocolContracts{}, nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto2").Return([]data.ProtocolContracts{}, nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + 102: dummyLedgerMeta(102), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{proc1, proc2}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"proto1", "proto2"}) + require.NoError(t, err) + + // proto2 should have processed all 3 ledgers + cursor2 := getIngestStoreValue(t, ctx, dbPool, "protocol_proto2_history_cursor") + assert.Equal(t, uint32(102), cursor2) + assert.Equal(t, []uint32{100, 101, 102}, proc2.persistedSeqs) + + // proto1 should have processed only ledger 100 (then CAS failed, handed off) + require.Len(t, proc1.processedInputs, 1) + assert.Equal(t, uint32(100), proc1.processedInputs[0].LedgerSequence) + // proto1 PersistHistory was NOT called because CAS failed + assert.Empty(t, proc1.persistedSeqs) + + // Verify proto2 sentinels exist for all ledgers + for _, seq := range []uint32{100, 101, 102} { + val, ok := getHistorySentinel(t, ctx, dbPool, "proto2", seq) + require.True(t, ok, "sentinel for proto2 ledger %d should exist", seq) + assert.Equal(t, seq, val) + } + }) + + t.Run("already success — skips without error", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + processorMock.On("ProtocolID").Return("testproto") + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusSuccess}, + }, nil) + + backend := &multiLedgerBackend{ledgers: map[uint32]xdr.LedgerCloseMeta{}} + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) // No-op, nothing to do + }) +} + +func TestNewProtocolMigrateHistoryService(t *testing.T) { + t.Run("nil processor returns error", func(t *testing.T) { + _, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + Processors: []ProtocolProcessor{nil}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "nil") + }) + + t.Run("duplicate processor ID returns error", func(t *testing.T) { + proc1 := &ProtocolProcessorMock{} + proc1.On("ProtocolID").Return("dup") + proc2 := &ProtocolProcessorMock{} + proc2.On("ProtocolID").Return("dup") + + _, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + Processors: []ProtocolProcessor{proc1, proc2}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "duplicate") + }) +} From 1afbb7d8df190af0df59340802b2568ea70e9773 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Mar 2026 11:26:17 -0600 Subject: [PATCH 38/52] Extract shared helpers between history migration and live ingestion Move duplicated logic into ingest_helpers.go: - getLedgerWithRetry: was identical method on both ingestService and protocolMigrateHistoryService, now a package-level function - buildProtocolProcessorMap: deduplicates processor slice-to-map conversion with nil/duplicate validation - protocolHistoryCursorName/protocolCurrentStateCursorName: replaces scattered Sprintf calls for cursor key formatting Simplifies getLedgerWithRetry test to call the function directly without constructing a full ingestService. --- internal/services/ingest.go | 46 +----------- internal/services/ingest_backfill.go | 2 +- internal/services/ingest_helpers.go | 71 +++++++++++++++++++ internal/services/ingest_live.go | 10 +-- internal/services/ingest_test.go | 41 ++--------- internal/services/protocol_migrate_history.go | 50 ++----------- .../services/protocol_migrate_history_test.go | 2 +- 7 files changed, 90 insertions(+), 132 deletions(-) create mode 100644 internal/services/ingest_helpers.go diff --git a/internal/services/ingest.go b/internal/services/ingest.go index e8bb97c92..ff2b868f2 100644 --- a/internal/services/ingest.go +++ b/internal/services/ingest.go @@ -145,16 +145,9 @@ func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { cfg.MetricsService.RegisterPoolMetrics("backfill", backfillPool) // Build protocol processor map from slice - ppMap := make(map[string]ProtocolProcessor, len(cfg.ProtocolProcessors)) - for i, p := range cfg.ProtocolProcessors { - if p == nil { - return nil, fmt.Errorf("protocol processor at index %d is nil", i) - } - id := p.ProtocolID() - if _, exists := ppMap[id]; exists { - return nil, fmt.Errorf("duplicate protocol processor ID %q", id) - } - ppMap[id] = p + ppMap, err := buildProtocolProcessorMap(cfg.ProtocolProcessors) + if err != nil { + return nil, err } var ppCache *protocolContractCache @@ -206,39 +199,6 @@ func (m *ingestService) Run(ctx context.Context, startLedger uint32, endLedger u } } -// getLedgerWithRetry fetches a ledger with exponential backoff retry logic. -// It respects context cancellation and limits retries to maxLedgerFetchRetries attempts. -func (m *ingestService) getLedgerWithRetry(ctx context.Context, backend ledgerbackend.LedgerBackend, ledgerSeq uint32) (xdr.LedgerCloseMeta, error) { - var lastErr error - for attempt := 0; attempt < maxLedgerFetchRetries; attempt++ { - select { - case <-ctx.Done(): - return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", ctx.Err()) - default: - } - - ledgerMeta, err := backend.GetLedger(ctx, ledgerSeq) - if err == nil { - return ledgerMeta, nil - } - lastErr = err - - backoff := time.Duration(1< maxRetryBackoff { - backoff = maxRetryBackoff - } - log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", - ledgerSeq, attempt+1, maxLedgerFetchRetries, err, backoff) - - select { - case <-ctx.Done(): - return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) - case <-time.After(backoff): - } - } - return xdr.LedgerCloseMeta{}, fmt.Errorf("failed after %d attempts: %w", maxLedgerFetchRetries, lastErr) -} - // processLedger processes a single ledger - gets the transactions and processes them using indexer processors. func (m *ingestService) processLedger(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, buffer *indexer.IndexerBuffer) error { participantCount, err := indexer.ProcessLedger(ctx, m.networkPassphrase, ledgerMeta, m.ledgerIndexer, buffer) diff --git a/internal/services/ingest_backfill.go b/internal/services/ingest_backfill.go index f6bfdbd3d..eeee7a7ec 100644 --- a/internal/services/ingest_backfill.go +++ b/internal/services/ingest_backfill.go @@ -521,7 +521,7 @@ func (m *ingestService) processLedgersInBatch( } for ledgerSeq := batch.StartLedger; ledgerSeq <= batch.EndLedger; ledgerSeq++ { - ledgerMeta, err := m.getLedgerWithRetry(ctx, backend, ledgerSeq) + ledgerMeta, err := getLedgerWithRetry(ctx, backend, ledgerSeq) if err != nil { return ledgersProcessed, nil, startTime, endTime, fmt.Errorf("getting ledger %d: %w", ledgerSeq, err) } diff --git a/internal/services/ingest_helpers.go b/internal/services/ingest_helpers.go new file mode 100644 index 000000000..f196f555b --- /dev/null +++ b/internal/services/ingest_helpers.go @@ -0,0 +1,71 @@ +package services + +import ( + "context" + "fmt" + "time" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" +) + +// getLedgerWithRetry fetches a ledger with exponential backoff retry logic. +// It respects context cancellation and limits retries to maxLedgerFetchRetries attempts. +func getLedgerWithRetry(ctx context.Context, backend ledgerbackend.LedgerBackend, ledgerSeq uint32) (xdr.LedgerCloseMeta, error) { + var lastErr error + for attempt := 0; attempt < maxLedgerFetchRetries; attempt++ { + select { + case <-ctx.Done(): + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", ctx.Err()) + default: + } + + ledgerMeta, err := backend.GetLedger(ctx, ledgerSeq) + if err == nil { + return ledgerMeta, nil + } + lastErr = err + + backoff := time.Duration(1< maxRetryBackoff { + backoff = maxRetryBackoff + } + log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", + ledgerSeq, attempt+1, maxLedgerFetchRetries, err, backoff) + + select { + case <-ctx.Done(): + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) + case <-time.After(backoff): + } + } + return xdr.LedgerCloseMeta{}, fmt.Errorf("failed after %d attempts: %w", maxLedgerFetchRetries, lastErr) +} + +// buildProtocolProcessorMap converts a slice of ProtocolProcessors into a map keyed by protocol ID, +// validating that no entries are nil and no IDs are duplicated. +func buildProtocolProcessorMap(processors []ProtocolProcessor) (map[string]ProtocolProcessor, error) { + ppMap := make(map[string]ProtocolProcessor, len(processors)) + for i, p := range processors { + if p == nil { + return nil, fmt.Errorf("protocol processor at index %d is nil", i) + } + id := p.ProtocolID() + if _, exists := ppMap[id]; exists { + return nil, fmt.Errorf("duplicate protocol processor ID %q", id) + } + ppMap[id] = p + } + return ppMap, nil +} + +// protocolHistoryCursorName returns the ingest_store key for a protocol's history migration cursor. +func protocolHistoryCursorName(protocolID string) string { + return fmt.Sprintf("protocol_%s_history_cursor", protocolID) +} + +// protocolCurrentStateCursorName returns the ingest_store key for a protocol's current state cursor. +func protocolCurrentStateCursorName(protocolID string) string { + return fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) +} diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 21669903f..445841d7e 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -53,13 +53,13 @@ func (m *ingestService) protocolProcessorsEligibleForProduction(ctx context.Cont eligible := make(map[string]ProtocolProcessor, len(m.protocolProcessors)) for protocolID, processor := range m.protocolProcessors { - historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) + historyCursor := protocolHistoryCursorName(protocolID) historyVal, err := m.models.IngestStore.Get(ctx, historyCursor) if err != nil { return nil, fmt.Errorf("reading history cursor for %s: %w", protocolID, err) } - currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) + currentStateCursor := protocolCurrentStateCursorName(protocolID) currentStateVal, err := m.models.IngestStore.Get(ctx, currentStateCursor) if err != nil { return nil, fmt.Errorf("reading current state cursor for %s: %w", protocolID, err) @@ -150,8 +150,8 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, // No previous ledger to form an expected cursor value; skip CAS for this ledger. continue } - historyCursor := fmt.Sprintf("protocol_%s_history_cursor", protocolID) - currentStateCursor := fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) + historyCursor := protocolHistoryCursorName(protocolID) + currentStateCursor := protocolCurrentStateCursorName(protocolID) expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) next := strconv.FormatUint(uint64(ledgerSeq), 10) @@ -287,7 +287,7 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 currentLedger := startLedger log.Ctx(ctx).Infof("Starting ingestion from ledger: %d", currentLedger) for { - ledgerMeta, ledgerErr := m.getLedgerWithRetry(ctx, m.ledgerBackend, currentLedger) + ledgerMeta, ledgerErr := getLedgerWithRetry(ctx, m.ledgerBackend, currentLedger) if ledgerErr != nil { return fmt.Errorf("fetching ledger %d: %w", currentLedger, ledgerErr) } diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 5b0ab82f7..ac17ae74f 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -678,13 +678,7 @@ func Test_analyzeBatchResults(t *testing.T) { } } -func Test_ingestService_getLedgerWithRetry(t *testing.T) { - dbt := dbtest.Open(t) - defer dbt.Close() - dbConnectionPool, err := db.OpenDBConnectionPool(dbt.DSN) - require.NoError(t, err) - defer dbConnectionPool.Close() - +func Test_getLedgerWithRetry(t *testing.T) { ctx := context.Background() testCases := []struct { @@ -740,41 +734,14 @@ func Test_ingestService_getLedgerWithRetry(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - mockMetricsService := metrics.NewMockMetricsService() - mockMetricsService.On("RegisterPoolMetrics", "ledger_indexer", mock.Anything).Return() - mockMetricsService.On("RegisterPoolMetrics", "backfill", mock.Anything).Return() - defer mockMetricsService.AssertExpectations(t) - - models, err := data.NewModels(dbConnectionPool, mockMetricsService) - require.NoError(t, err) - mockLedgerBackend := &LedgerBackendMock{} tc.setupBackend(mockLedgerBackend) defer mockLedgerBackend.AssertExpectations(t) - mockRPCService := &RPCServiceMock{} - mockRPCService.On("NetworkPassphrase").Return(network.TestNetworkPassphrase).Maybe() - - svc, err := NewIngestService(IngestServiceConfig{ - IngestionMode: IngestionModeBackfill, - Models: models, - LatestLedgerCursorName: "latest_ledger_cursor", - OldestLedgerCursorName: "oldest_ledger_cursor", - AppTracker: &apptracker.MockAppTracker{}, - RPCService: mockRPCService, - LedgerBackend: mockLedgerBackend, - MetricsService: mockMetricsService, - GetLedgersLimit: defaultGetLedgersLimit, - Network: network.TestNetworkPassphrase, - NetworkPassphrase: network.TestNetworkPassphrase, - Archive: &HistoryArchiveMock{}, - }) - require.NoError(t, err) - testCtx, cancel := tc.ctxFunc() defer cancel() - ledger, err := svc.getLedgerWithRetry(testCtx, mockLedgerBackend, 100) + ledger, err := getLedgerWithRetry(testCtx, mockLedgerBackend, 100) if tc.wantErr { require.Error(t, err) if tc.wantErrContains != "" { @@ -2840,11 +2807,11 @@ func setupProtocolCursors(t *testing.T, ctx context.Context, pool db.ConnectionP t.Helper() _, err := pool.ExecContext(ctx, `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - fmt.Sprintf("protocol_%s_history_cursor", protocolID), historyCursor) + protocolHistoryCursorName(protocolID), historyCursor) require.NoError(t, err) _, err = pool.ExecContext(ctx, `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - fmt.Sprintf("protocol_%s_current_state_cursor", protocolID), currentStateCursor) + protocolCurrentStateCursorName(protocolID), currentStateCursor) require.NoError(t, err) } diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index 77f3feb50..df1aad8de 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -9,7 +9,6 @@ import ( "github.com/jackc/pgx/v5" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/log" - "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/wallet-backend/internal/data" "github.com/stellar/wallet-backend/internal/db" @@ -59,16 +58,9 @@ type ProtocolMigrateHistoryConfig struct { // NewProtocolMigrateHistoryService creates a new protocolMigrateHistoryService from the given config. func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protocolMigrateHistoryService, error) { - ppMap := make(map[string]ProtocolProcessor, len(cfg.Processors)) - for i, p := range cfg.Processors { - if p == nil { - return nil, fmt.Errorf("protocol processor at index %d is nil", i) - } - id := p.ProtocolID() - if _, exists := ppMap[id]; exists { - return nil, fmt.Errorf("duplicate protocol processor ID %q", id) - } - ppMap[id] = p + ppMap, err := buildProtocolProcessorMap(cfg.Processors) + if err != nil { + return nil, err } return &protocolMigrateHistoryService{ @@ -188,7 +180,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, // Initialize trackers: read/initialize cursor for each protocol trackers := make([]*protocolTracker, 0, len(protocolIDs)) for _, pid := range protocolIDs { - cursorName := fmt.Sprintf("protocol_%s_history_cursor", pid) + cursorName := protocolHistoryCursorName(pid) cursorValue, readErr := s.ingestStore.Get(ctx, cursorName) if readErr != nil { return fmt.Errorf("reading history cursor for %s: %w", pid, readErr) @@ -278,7 +270,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } // Fetch ledger ONCE for all protocols - ledgerMeta, fetchErr := s.getLedgerWithRetry(ctx, seq) + ledgerMeta, fetchErr := getLedgerWithRetry(ctx, s.ledgerBackend, seq) if fetchErr != nil { return fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) } @@ -377,35 +369,3 @@ func allHandedOff(trackers []*protocolTracker) bool { } return true } - -// getLedgerWithRetry fetches a ledger with exponential backoff retry logic. -func (s *protocolMigrateHistoryService) getLedgerWithRetry(ctx context.Context, ledgerSeq uint32) (xdr.LedgerCloseMeta, error) { - var lastErr error - for attempt := 0; attempt < maxLedgerFetchRetries; attempt++ { - select { - case <-ctx.Done(): - return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", ctx.Err()) - default: - } - - ledgerMeta, err := s.ledgerBackend.GetLedger(ctx, ledgerSeq) - if err == nil { - return ledgerMeta, nil - } - lastErr = err - - backoff := time.Duration(1< maxRetryBackoff { - backoff = maxRetryBackoff - } - log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", - ledgerSeq, attempt+1, maxLedgerFetchRetries, err, backoff) - - select { - case <-ctx.Done(): - return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) - case <-time.After(backoff): - } - } - return xdr.LedgerCloseMeta{}, fmt.Errorf("failed after %d attempts: %w", maxLedgerFetchRetries, lastErr) -} diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index 7440fad3d..7dc48000d 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -95,7 +95,7 @@ func (p *cursorAdvancingProcessor) ProcessLedger(ctx context.Context, input Prot _, _ = p.dbPool.ExecContext(ctx, `UPDATE ingest_store SET value = $1 WHERE key = $2`, strconv.FormatUint(uint64(p.advanceAtSeq+100), 10), - fmt.Sprintf("protocol_%s_history_cursor", p.id)) + protocolHistoryCursorName(p.id)) } return p.recordingProcessor.ProcessLedger(ctx, input) } From 69faa5f3d6ec03bd91188bd17795034a72820de1 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Mar 2026 12:13:57 -0600 Subject: [PATCH 39/52] Fix convergence poll to distinguish timeout from transient RPC errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The convergence poll in processAllProtocols treated any error from PrepareRange/GetLedger as convergence, including transient RPC failures like connection refused. This could prematurely mark protocols as StatusSuccess during network blips. Now discriminates three cases: poll deadline exceeded (converged), parent context cancelled (propagate), anything else (transient — retry). --- internal/services/protocol_migrate_history.go | 25 +++- .../services/protocol_migrate_history_test.go | 132 ++++++++++++++++++ 2 files changed, 152 insertions(+), 5 deletions(-) diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index df1aad8de..a51d0a9ff 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -343,17 +343,32 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, // At tip — poll briefly for convergence pollCtx, cancel := context.WithTimeout(ctx, convergencePollTimeout) - if err := s.ledgerBackend.PrepareRange(pollCtx, ledgerbackend.UnboundedRange(latestLedger+1)); err != nil { + prepErr := s.ledgerBackend.PrepareRange(pollCtx, ledgerbackend.UnboundedRange(latestLedger+1)) + if prepErr != nil { cancel() - log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) - return nil + if ctx.Err() != nil { + return fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) + } + if pollCtx.Err() == context.DeadlineExceeded { + log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) + return nil + } + log.Ctx(ctx).Warnf("Transient error during convergence poll PrepareRange: %v, retrying", prepErr) + continue } _, getLedgerErr := s.ledgerBackend.GetLedger(pollCtx, latestLedger+1) cancel() if getLedgerErr != nil { - log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) - return nil + if ctx.Err() != nil { + return fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) + } + if pollCtx.Err() == context.DeadlineExceeded { + log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) + return nil + } + log.Ctx(ctx).Warnf("Transient error during convergence poll GetLedger: %v, retrying", getLedgerErr) + continue } // New ledger available, loop again diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index 7dc48000d..929a08019 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strconv" + "sync/atomic" "testing" "github.com/jackc/pgx/v5" @@ -54,6 +55,36 @@ func (b *multiLedgerBackend) Close() error { return nil } +// transientErrorBackend wraps multiLedgerBackend and injects transient errors +// on convergence-poll calls (unbounded PrepareRange, missing-ledger GetLedger) +// before delegating normally. This simulates RPC blips that should not be +// mistaken for convergence. +type transientErrorBackend struct { + multiLedgerBackend + // unboundedPrepareFailsLeft counts how many unbounded PrepareRange calls + // (convergence polls) should return a transient error before succeeding. + unboundedPrepareFailsLeft atomic.Int32 + // missingGetLedgerFailsLeft counts how many GetLedger calls for missing + // ledgers should return a transient error instead of blocking. + missingGetLedgerFailsLeft atomic.Int32 +} + +func (b *transientErrorBackend) PrepareRange(ctx context.Context, r ledgerbackend.Range) error { + if !r.Bounded() && b.unboundedPrepareFailsLeft.Add(-1) >= 0 { + return fmt.Errorf("transient RPC error: connection refused") + } + return b.multiLedgerBackend.PrepareRange(ctx, r) +} + +func (b *transientErrorBackend) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, error) { + if _, ok := b.multiLedgerBackend.ledgers[sequence]; !ok { + if b.missingGetLedgerFailsLeft.Add(-1) >= 0 { + return xdr.LedgerCloseMeta{}, fmt.Errorf("transient RPC error: connection reset") + } + } + return b.multiLedgerBackend.GetLedger(ctx, sequence) +} + // recordingProcessor is a test double that records all ProcessLedger inputs // and writes per-ledger sentinel keys to ingest_store during PersistHistory, // proving that PersistHistory actually committed data inside the transaction. @@ -782,6 +813,107 @@ func TestProtocolMigrateHistory(t *testing.T) { err = svc.Run(ctx, []string{"testproto"}) require.NoError(t, err) // No-op, nothing to do }) + + t.Run("transient PrepareRange error retries then converges", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + backend := &transientErrorBackend{ + multiLedgerBackend: multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + }, + }, + } + // First PrepareRange call for the convergence poll will fail transiently. + // The bounded-range PrepareRange calls (for processing) always succeed because + // the counter is only 1 and multiLedgerBackend.PrepareRange is a no-op. + backend.unboundedPrepareFailsLeft.Store(1) + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + // Verify all ledgers were processed — the transient error did not cause premature convergence. + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(101), cursorVal) + assert.Equal(t, []uint32{100, 101}, processor.persistedSeqs) + }) + + t.Run("transient GetLedger error retries then converges", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + backend := &transientErrorBackend{ + multiLedgerBackend: multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + }, + }, + } + // First GetLedger call for the convergence poll (ledger 102, which doesn't exist) + // will fail transiently instead of blocking until context done. + backend.missingGetLedgerFailsLeft.Store(1) + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + // Verify all ledgers were processed — the transient error did not cause premature convergence. + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(101), cursorVal) + assert.Equal(t, []uint32{100, 101}, processor.persistedSeqs) + }) } func TestNewProtocolMigrateHistoryService(t *testing.T) { From 987ec5a1e71b41c2790f04d66063eeb483e86ad4 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Mar 2026 13:12:06 -0600 Subject: [PATCH 40/52] Fix hardcoded cursor names in protocol history migration The history migration service read cursor positions using hardcoded constants (data.OldestLedgerCursorName, data.LatestLedgerCursorName), ignoring operator overrides via CLI flags. Add configurable cursor name fields with defaults matching the ingest command, so operators who override --latest-ledger-cursor-name or --oldest-ledger-cursor-name get consistent behavior across live ingestion and history migration. --- cmd/protocol_data_migrate.go | 24 ++++--- .../integrationtests/data_migration_test.go | 16 ++--- internal/services/protocol_migrate_history.go | 63 ++++++++++++------- 3 files changed, 63 insertions(+), 40 deletions(-) diff --git a/cmd/protocol_data_migrate.go b/cmd/protocol_data_migrate.go index 769b18047..823c21725 100644 --- a/cmd/protocol_data_migrate.go +++ b/cmd/protocol_data_migrate.go @@ -42,6 +42,8 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { var networkPassphrase string var protocolIDs []string var logLevel string + var latestLedgerCursorName string + var oldestLedgerCursorName string cfgOpts := config.ConfigOptions{ utils.DatabaseURLOption(&databaseURL), @@ -75,7 +77,7 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { return nil }, RunE: func(_ *cobra.Command, _ []string) error { - return c.RunHistory(databaseURL, rpcURL, networkPassphrase, protocolIDs) + return c.RunHistory(databaseURL, rpcURL, networkPassphrase, protocolIDs, latestLedgerCursorName, oldestLedgerCursorName) }, } @@ -85,11 +87,13 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { cmd.Flags().StringSliceVar(&protocolIDs, "protocol-id", nil, "Protocol ID(s) to migrate (required, repeatable)") cmd.Flags().StringVar(&logLevel, "log-level", "", `Log level: "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "PANIC"`) + cmd.Flags().StringVar(&latestLedgerCursorName, "latest-ledger-cursor-name", data.LatestLedgerCursorName, "Name of the latest ledger cursor in the ingest store. Must match the value used by the ingest service.") + cmd.Flags().StringVar(&oldestLedgerCursorName, "oldest-ledger-cursor-name", data.OldestLedgerCursorName, "Name of the oldest ledger cursor in the ingest store. Must match the value used by the ingest service.") return cmd } -func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase string, protocolIDs []string) error { +func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase string, protocolIDs []string, latestLedgerCursorName, oldestLedgerCursorName string) error { ctx := context.Background() // Build processors from protocol IDs using the dynamic registry @@ -136,13 +140,15 @@ func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase s }() service, err := services.NewProtocolMigrateHistoryService(services.ProtocolMigrateHistoryConfig{ - DB: dbPool, - LedgerBackend: ledgerBackend, - ProtocolsModel: models.Protocols, - ProtocolContractsModel: models.ProtocolContracts, - IngestStore: models.IngestStore, - NetworkPassphrase: networkPassphrase, - Processors: processors, + DB: dbPool, + LedgerBackend: ledgerBackend, + ProtocolsModel: models.Protocols, + ProtocolContractsModel: models.ProtocolContracts, + IngestStore: models.IngestStore, + NetworkPassphrase: networkPassphrase, + Processors: processors, + LatestLedgerCursorName: latestLedgerCursorName, + OldestLedgerCursorName: oldestLedgerCursorName, }) if err != nil { return fmt.Errorf("creating protocol migrate history service: %w", err) diff --git a/internal/integrationtests/data_migration_test.go b/internal/integrationtests/data_migration_test.go index fe29d5390..9e9c06f34 100644 --- a/internal/integrationtests/data_migration_test.go +++ b/internal/integrationtests/data_migration_test.go @@ -279,13 +279,15 @@ func (s *DataMigrationTestSuite) newHistoryMigrationService( processor services.ProtocolProcessor, ) services.ProtocolMigrateHistoryService { svc, err := services.NewProtocolMigrateHistoryService(services.ProtocolMigrateHistoryConfig{ - DB: pool, - LedgerBackend: ledgerBackend, - ProtocolsModel: models.Protocols, - ProtocolContractsModel: models.ProtocolContracts, - IngestStore: models.IngestStore, - NetworkPassphrase: "Test SDF Network ; September 2015", - Processors: []services.ProtocolProcessor{processor}, + DB: pool, + LedgerBackend: ledgerBackend, + ProtocolsModel: models.Protocols, + ProtocolContractsModel: models.ProtocolContracts, + IngestStore: models.IngestStore, + NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []services.ProtocolProcessor{processor}, + LatestLedgerCursorName: data.LatestLedgerCursorName, + OldestLedgerCursorName: data.OldestLedgerCursorName, }) s.Require().NoError(err) return svc diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index a51d0a9ff..4a626a86b 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -36,24 +36,28 @@ type ProtocolMigrateHistoryService interface { var _ ProtocolMigrateHistoryService = (*protocolMigrateHistoryService)(nil) type protocolMigrateHistoryService struct { - db db.ConnectionPool - ledgerBackend ledgerbackend.LedgerBackend - protocolsModel data.ProtocolsModelInterface - protocolContractsModel data.ProtocolContractsModelInterface - ingestStore *data.IngestStoreModel - networkPassphrase string - processors map[string]ProtocolProcessor + db db.ConnectionPool + ledgerBackend ledgerbackend.LedgerBackend + protocolsModel data.ProtocolsModelInterface + protocolContractsModel data.ProtocolContractsModelInterface + ingestStore *data.IngestStoreModel + networkPassphrase string + processors map[string]ProtocolProcessor + latestLedgerCursorName string + oldestLedgerCursorName string } // ProtocolMigrateHistoryConfig holds the configuration for creating a protocolMigrateHistoryService. type ProtocolMigrateHistoryConfig struct { - DB db.ConnectionPool - LedgerBackend ledgerbackend.LedgerBackend - ProtocolsModel data.ProtocolsModelInterface - ProtocolContractsModel data.ProtocolContractsModelInterface - IngestStore *data.IngestStoreModel - NetworkPassphrase string - Processors []ProtocolProcessor + DB db.ConnectionPool + LedgerBackend ledgerbackend.LedgerBackend + ProtocolsModel data.ProtocolsModelInterface + ProtocolContractsModel data.ProtocolContractsModelInterface + IngestStore *data.IngestStoreModel + NetworkPassphrase string + Processors []ProtocolProcessor + LatestLedgerCursorName string + OldestLedgerCursorName string } // NewProtocolMigrateHistoryService creates a new protocolMigrateHistoryService from the given config. @@ -63,14 +67,25 @@ func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protoc return nil, err } + latestCursor := cfg.LatestLedgerCursorName + if latestCursor == "" { + latestCursor = data.LatestLedgerCursorName + } + oldestCursor := cfg.OldestLedgerCursorName + if oldestCursor == "" { + oldestCursor = data.OldestLedgerCursorName + } + return &protocolMigrateHistoryService{ - db: cfg.DB, - ledgerBackend: cfg.LedgerBackend, - protocolsModel: cfg.ProtocolsModel, - protocolContractsModel: cfg.ProtocolContractsModel, - ingestStore: cfg.IngestStore, - networkPassphrase: cfg.NetworkPassphrase, - processors: ppMap, + db: cfg.DB, + ledgerBackend: cfg.LedgerBackend, + protocolsModel: cfg.ProtocolsModel, + protocolContractsModel: cfg.ProtocolContractsModel, + ingestStore: cfg.IngestStore, + networkPassphrase: cfg.NetworkPassphrase, + processors: ppMap, + latestLedgerCursorName: latestCursor, + oldestLedgerCursorName: oldestCursor, }, nil } @@ -169,7 +184,7 @@ func (s *protocolMigrateHistoryService) validate(ctx context.Context, protocolID // Each ledger is fetched once and processed by all eligible protocols, avoiding redundant RPC calls. func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, protocolIDs []string) error { // Read oldest_ingest_ledger - oldestLedger, err := s.ingestStore.Get(ctx, data.OldestLedgerCursorName) + oldestLedger, err := s.ingestStore.Get(ctx, s.oldestLedgerCursorName) if err != nil { return fmt.Errorf("reading oldest ingest ledger: %w", err) } @@ -220,7 +235,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, return nil } - latestLedger, err := s.ingestStore.Get(ctx, data.LatestLedgerCursorName) + latestLedger, err := s.ingestStore.Get(ctx, s.latestLedgerCursorName) if err != nil { return fmt.Errorf("reading latest ingest ledger: %w", err) } @@ -333,7 +348,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } // Check if tip has advanced - newLatest, err := s.ingestStore.Get(ctx, data.LatestLedgerCursorName) + newLatest, err := s.ingestStore.Get(ctx, s.latestLedgerCursorName) if err != nil { return fmt.Errorf("re-reading latest ingest ledger: %w", err) } From 91413ebbc55dd9b9e5d78f37d8630ccf0159f5dd Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Mar 2026 13:43:04 -0600 Subject: [PATCH 41/52] Document and test BoundedRange/UnboundedRange transition on shared ledger backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The outer loop in protocol history migration transitions the same LedgerBackend instance between BoundedRange and UnboundedRange without explicit reset. This works because captive core internally closes the subprocess before opening a new range, but that behavior is an implementation detail not guaranteed by the LedgerBackend interface. Add an explanatory comment at the transition point and a new integration test (rangeTrackingBackend) that verifies the Bounded→Unbounded→Bounded PrepareRange sequence when the tip advances during the convergence poll. --- internal/services/protocol_migrate_history.go | 9 +- .../services/protocol_migrate_history_test.go | 117 ++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index 4a626a86b..fcb96be75 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -356,7 +356,14 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, continue } - // At tip — poll briefly for convergence + // At tip — poll briefly for convergence. + // + // This transitions the backend from BoundedRange (line 264) to UnboundedRange + // on the same instance. The captive core implementation handles this internally + // by closing the existing subprocess before starting a new one (see + // CaptiveStellarCore.startPreparingRange). If the poll succeeds and a new ledger + // is detected, the outer loop iterates again and re-prepares a BoundedRange — + // the same implicit close-and-reopen applies in that direction too. pollCtx, cancel := context.WithTimeout(ctx, convergencePollTimeout) prepErr := s.ledgerBackend.PrepareRange(pollCtx, ledgerbackend.UnboundedRange(latestLedger+1)) if prepErr != nil { diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index 929a08019..1a97fd5ef 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strconv" + "sync" "sync/atomic" "testing" @@ -85,6 +86,46 @@ func (b *transientErrorBackend) GetLedger(ctx context.Context, sequence uint32) return b.multiLedgerBackend.GetLedger(ctx, sequence) } +// rangeTrackingBackend wraps multiLedgerBackend and records the sequence of +// PrepareRange calls, capturing whether each was bounded or unbounded. +// An optional onUnbounded callback fires synchronously on the first unbounded +// PrepareRange, allowing tests to inject new ledgers deterministically before +// the subsequent GetLedger call. +type rangeTrackingBackend struct { + multiLedgerBackend + mu sync.Mutex + ranges []rangeCall + onUnbounded func() + onUnboundedOnce sync.Once +} + +type rangeCall struct { + bounded bool + r ledgerbackend.Range +} + +func (b *rangeTrackingBackend) PrepareRange(ctx context.Context, r ledgerbackend.Range) error { + b.mu.Lock() + b.ranges = append(b.ranges, rangeCall{bounded: r.Bounded(), r: r}) + b.mu.Unlock() + if !r.Bounded() && b.onUnbounded != nil { + b.onUnboundedOnce.Do(b.onUnbounded) + } + return b.multiLedgerBackend.PrepareRange(ctx, r) +} + +// GetLedger checks for ledgers under the mutex (supporting ledgers added by +// the onUnbounded callback), then falls back to the base blocking behavior. +func (b *rangeTrackingBackend) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, error) { + b.mu.Lock() + meta, ok := b.multiLedgerBackend.ledgers[sequence] + b.mu.Unlock() + if ok { + return meta, nil + } + return b.multiLedgerBackend.GetLedger(ctx, sequence) +} + // recordingProcessor is a test double that records all ProcessLedger inputs // and writes per-ledger sentinel keys to ingest_store during PersistHistory, // proving that PersistHistory actually committed data inside the transaction. @@ -914,6 +955,82 @@ func TestProtocolMigrateHistory(t *testing.T) { assert.Equal(t, uint32(101), cursorVal) assert.Equal(t, []uint32{100, 101}, processor.persistedSeqs) }) + + t.Run("tip advances during convergence poll triggers bounded-unbounded-bounded transition", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + backend := &rangeTrackingBackend{ + multiLedgerBackend: multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + }, + }, + } + + // When the service reaches the convergence poll and calls + // PrepareRange(UnboundedRange), this callback fires synchronously + // to simulate tip advancement: it adds new ledgers and updates the + // ingest store before GetLedger is called. + backend.onUnbounded = func() { + backend.mu.Lock() + backend.multiLedgerBackend.ledgers[102] = dummyLedgerMeta(102) + backend.multiLedgerBackend.ledgers[103] = dummyLedgerMeta(103) + backend.mu.Unlock() + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 103) + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + // Verify Bounded → Unbounded → Bounded range transition sequence + backend.mu.Lock() + ranges := make([]rangeCall, len(backend.ranges)) + copy(ranges, backend.ranges) + backend.mu.Unlock() + + require.GreaterOrEqual(t, len(ranges), 3, "expected at least 3 PrepareRange calls, got %d", len(ranges)) + assert.True(t, ranges[0].bounded, "first PrepareRange should be bounded") + assert.False(t, ranges[1].bounded, "second PrepareRange should be unbounded (convergence poll)") + assert.True(t, ranges[2].bounded, "third PrepareRange should be bounded (re-entered loop after tip advance)") + + // Verify all ledgers 100-103 were processed and persisted + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(103), cursorVal) + assert.Equal(t, []uint32{100, 101, 102, 103}, processor.persistedSeqs) + + for _, seq := range []uint32{100, 101, 102, 103} { + val, ok := getHistorySentinel(t, ctx, dbPool, "testproto", seq) + require.True(t, ok, "sentinel for ledger %d should exist", seq) + assert.Equal(t, seq, val, "sentinel value for ledger %d", seq) + } + }) } func TestNewProtocolMigrateHistoryService(t *testing.T) { From 417d9110aed0665c06b39fa619bc0f7d89b6d14c Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Mar 2026 13:53:31 -0600 Subject: [PATCH 42/52] Fix bulk StatusFailed marking to exclude handed-off protocols When processAllProtocols fails, the Run() method was marking all active protocols as StatusFailed, including ones already handed off to live ingestion via CAS failure. This caused handed-off protocols to be re-processed on the next Run(), conflicting with live ingestion's cursor ownership. Change processAllProtocols to return handed-off protocol IDs alongside the error, then split the status update: handed-off protocols get StatusSuccess (live ingestion owns them), while only non-handed-off protocols get StatusFailed. --- internal/services/protocol_migrate_history.go | 95 +++++++++++++------ .../services/protocol_migrate_history_test.go | 79 +++++++++++++++ 2 files changed, 147 insertions(+), 27 deletions(-) diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index fcb96be75..200e861ff 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -109,15 +109,30 @@ func (s *protocolMigrateHistoryService) Run(ctx context.Context, protocolIDs []s } // Phase 2: Process each protocol - if err := s.processAllProtocols(ctx, activeProtocolIDs); err != nil { - // Best-effort set status to failed + handedOffIDs, err := s.processAllProtocols(ctx, activeProtocolIDs) + if err != nil { cleanupCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - if txErr := db.RunInPgxTransaction(cleanupCtx, s.db, func(dbTx pgx.Tx) error { - return s.protocolsModel.UpdateHistoryMigrationStatus(cleanupCtx, dbTx, activeProtocolIDs, data.StatusFailed) - }); txErr != nil { - log.Ctx(ctx).Errorf("error setting history migration status to failed: %v", txErr) + + // Mark handed-off protocols as success — live ingestion owns them now + if len(handedOffIDs) > 0 { + if txErr := db.RunInPgxTransaction(cleanupCtx, s.db, func(dbTx pgx.Tx) error { + return s.protocolsModel.UpdateHistoryMigrationStatus(cleanupCtx, dbTx, handedOffIDs, data.StatusSuccess) + }); txErr != nil { + log.Ctx(ctx).Errorf("error setting handed-off protocols to success: %v", txErr) + } } + + // Mark only non-handed-off protocols as failed + failedIDs := subtract(activeProtocolIDs, handedOffIDs) + if len(failedIDs) > 0 { + if txErr := db.RunInPgxTransaction(cleanupCtx, s.db, func(dbTx pgx.Tx) error { + return s.protocolsModel.UpdateHistoryMigrationStatus(cleanupCtx, dbTx, failedIDs, data.StatusFailed) + }); txErr != nil { + log.Ctx(ctx).Errorf("error setting history migration status to failed: %v", txErr) + } + } + return fmt.Errorf("processing protocols: %w", err) } @@ -182,14 +197,14 @@ func (s *protocolMigrateHistoryService) validate(ctx context.Context, protocolID // processAllProtocols runs history migration for all protocols using ledger-first iteration. // Each ledger is fetched once and processed by all eligible protocols, avoiding redundant RPC calls. -func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, protocolIDs []string) error { +func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, protocolIDs []string) ([]string, error) { // Read oldest_ingest_ledger oldestLedger, err := s.ingestStore.Get(ctx, s.oldestLedgerCursorName) if err != nil { - return fmt.Errorf("reading oldest ingest ledger: %w", err) + return nil, fmt.Errorf("reading oldest ingest ledger: %w", err) } if oldestLedger == 0 { - return fmt.Errorf("ingestion has not started yet (oldest_ingest_ledger is 0)") + return nil, fmt.Errorf("ingestion has not started yet (oldest_ingest_ledger is 0)") } // Initialize trackers: read/initialize cursor for each protocol @@ -198,7 +213,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, cursorName := protocolHistoryCursorName(pid) cursorValue, readErr := s.ingestStore.Get(ctx, cursorName) if readErr != nil { - return fmt.Errorf("reading history cursor for %s: %w", pid, readErr) + return nil, fmt.Errorf("reading history cursor for %s: %w", pid, readErr) } if cursorValue == 0 { @@ -206,7 +221,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, if initErr := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { return s.ingestStore.Update(ctx, dbTx, cursorName, initValue) }); initErr != nil { - return fmt.Errorf("initializing history cursor for %s: %w", pid, initErr) + return nil, fmt.Errorf("initializing history cursor for %s: %w", pid, initErr) } cursorValue = initValue } @@ -225,19 +240,19 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, for _, t := range trackers { contracts, err := s.protocolContractsModel.GetByProtocolID(ctx, t.protocolID) if err != nil { - return fmt.Errorf("loading contracts for %s: %w", t.protocolID, err) + return nil, fmt.Errorf("loading contracts for %s: %w", t.protocolID, err) } contractsByProtocol[t.protocolID] = contracts } for { if allHandedOff(trackers) { - return nil + return handedOffProtocolIDs(trackers), nil } latestLedger, err := s.ingestStore.Get(ctx, s.latestLedgerCursorName) if err != nil { - return fmt.Errorf("reading latest ingest ledger: %w", err) + return handedOffProtocolIDs(trackers), fmt.Errorf("reading latest ingest ledger: %w", err) } // Find minimum cursor among non-handed-off trackers @@ -256,19 +271,19 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, startLedger := minCursor + 1 if startLedger > latestLedger { log.Ctx(ctx).Infof("All protocols at or past tip %d, migration complete", latestLedger) - return nil + return handedOffProtocolIDs(trackers), nil } log.Ctx(ctx).Infof("Processing ledgers %d to %d for %d protocol(s)", startLedger, latestLedger, len(protocolIDs)) if err := s.ledgerBackend.PrepareRange(ctx, ledgerbackend.BoundedRange(startLedger, latestLedger)); err != nil { - return fmt.Errorf("preparing ledger range [%d, %d]: %w", startLedger, latestLedger, err) + return handedOffProtocolIDs(trackers), fmt.Errorf("preparing ledger range [%d, %d]: %w", startLedger, latestLedger, err) } for seq := startLedger; seq <= latestLedger; seq++ { select { case <-ctx.Done(): - return fmt.Errorf("context cancelled: %w", ctx.Err()) + return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled: %w", ctx.Err()) default: } @@ -287,7 +302,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, // Fetch ledger ONCE for all protocols ledgerMeta, fetchErr := getLedgerWithRetry(ctx, s.ledgerBackend, seq) if fetchErr != nil { - return fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) + return handedOffProtocolIDs(trackers), fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) } // Process each eligible tracker @@ -304,7 +319,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, NetworkPassphrase: s.networkPassphrase, } if err := t.processor.ProcessLedger(ctx, input); err != nil { - return fmt.Errorf("processing ledger %d for protocol %s: %w", seq, t.protocolID, err) + return handedOffProtocolIDs(trackers), fmt.Errorf("processing ledger %d for protocol %s: %w", seq, t.protocolID, err) } // CAS + persist in a transaction @@ -323,7 +338,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } return nil }); err != nil { - return fmt.Errorf("persisting ledger %d for protocol %s: %w", seq, t.protocolID, err) + return handedOffProtocolIDs(trackers), fmt.Errorf("persisting ledger %d for protocol %s: %w", seq, t.protocolID, err) } if !swapped { @@ -335,7 +350,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } if allHandedOff(trackers) { - return nil + return handedOffProtocolIDs(trackers), nil } if seq%100 == 0 { @@ -344,13 +359,13 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } if allHandedOff(trackers) { - return nil + return handedOffProtocolIDs(trackers), nil } // Check if tip has advanced newLatest, err := s.ingestStore.Get(ctx, s.latestLedgerCursorName) if err != nil { - return fmt.Errorf("re-reading latest ingest ledger: %w", err) + return handedOffProtocolIDs(trackers), fmt.Errorf("re-reading latest ingest ledger: %w", err) } if newLatest > latestLedger { continue @@ -369,11 +384,11 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, if prepErr != nil { cancel() if ctx.Err() != nil { - return fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) + return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) } if pollCtx.Err() == context.DeadlineExceeded { log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) - return nil + return handedOffProtocolIDs(trackers), nil } log.Ctx(ctx).Warnf("Transient error during convergence poll PrepareRange: %v, retrying", prepErr) continue @@ -383,11 +398,11 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, cancel() if getLedgerErr != nil { if ctx.Err() != nil { - return fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) + return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) } if pollCtx.Err() == context.DeadlineExceeded { log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) - return nil + return handedOffProtocolIDs(trackers), nil } log.Ctx(ctx).Warnf("Transient error during convergence poll GetLedger: %v, retrying", getLedgerErr) continue @@ -406,3 +421,29 @@ func allHandedOff(trackers []*protocolTracker) bool { } return true } + +// handedOffProtocolIDs returns the IDs of trackers that have been handed off to live ingestion. +func handedOffProtocolIDs(trackers []*protocolTracker) []string { + var ids []string + for _, t := range trackers { + if t.handedOff { + ids = append(ids, t.protocolID) + } + } + return ids +} + +// subtract returns all elements in `all` that are not in `remove`. +func subtract(all, remove []string) []string { + removeSet := make(map[string]struct{}, len(remove)) + for _, id := range remove { + removeSet[id] = struct{}{} + } + var result []string + for _, id := range all { + if _, ok := removeSet[id]; !ok { + result = append(result, id) + } + } + return result +} diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index 1a97fd5ef..fbffc8658 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -172,6 +172,20 @@ func (p *cursorAdvancingProcessor) ProcessLedger(ctx context.Context, input Prot return p.recordingProcessor.ProcessLedger(ctx, input) } +// errorAtSeqProcessor wraps recordingProcessor and returns an error when +// ProcessLedger is called for a specific ledger sequence. +type errorAtSeqProcessor struct { + recordingProcessor + errorAtSeq uint32 +} + +func (p *errorAtSeqProcessor) ProcessLedger(ctx context.Context, input ProtocolProcessorInput) error { + if input.LedgerSequence == p.errorAtSeq { + return fmt.Errorf("simulated error at ledger %d", p.errorAtSeq) + } + return p.recordingProcessor.ProcessLedger(ctx, input) +} + func getHistorySentinel(t *testing.T, ctx context.Context, dbPool db.ConnectionPool, protocolID string, seq uint32) (uint32, bool) { t.Helper() var val uint32 @@ -827,6 +841,71 @@ func TestProtocolMigrateHistory(t *testing.T) { } }) + t.Run("multi-protocol failure with handoff — handed-off gets success, other gets failed", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 102) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto1', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + _, err = dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('proto2', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + + // proto1: hands off via CAS failure at ledger 100 + proc1 := &cursorAdvancingProcessor{ + recordingProcessor: recordingProcessor{id: "proto1", ingestStore: ingestStore}, + dbPool: dbPool, + advanceAtSeq: 100, + } + // proto2: errors at ledger 101 + proc2 := &errorAtSeqProcessor{ + recordingProcessor: recordingProcessor{id: "proto2", ingestStore: ingestStore}, + errorAtSeq: 101, + } + + protocolsModel.On("GetByIDs", ctx, []string{"proto1", "proto2"}).Return([]data.Protocols{ + {ID: "proto1", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + {ID: "proto2", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1", "proto2"}, data.StatusInProgress).Return(nil) + // proto1 should be marked success (handed off to live ingestion) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto1"}, data.StatusSuccess).Return(nil) + // proto2 should be marked failed (ProcessLedger error) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"proto2"}, data.StatusFailed).Return(nil) + + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto1").Return([]data.ProtocolContracts{}, nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "proto2").Return([]data.ProtocolContracts{}, nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + 102: dummyLedgerMeta(102), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + + Processors: []ProtocolProcessor{proc1, proc2}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"proto1", "proto2"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "simulated error at ledger 101") + + // Verify the mock expectations — proto1 got StatusSuccess, proto2 got StatusFailed + protocolsModel.AssertExpectations(t) + }) + t.Run("already success — skips without error", func(t *testing.T) { ctx := context.Background() dbPool, ingestStore := setupTestDB(t) From b96e8f4c37545f17c9b6dd8b355cbe978f74d392 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Fri, 20 Mar 2026 15:25:42 -0600 Subject: [PATCH 43/52] De-duplicate protocolIDs early in validate() If the caller passes duplicate protocol IDs (e.g. --protocol-id foo --protocol-id foo), duplicate trackers would be created for the same protocol, causing self-induced CAS failures and incorrect handoff detection. Add order-preserving deduplication as the first operation in validate(), which is the single choke-point for both Run() and processAllProtocols(). --- internal/services/protocol_migrate_history.go | 11 ++++ .../services/protocol_migrate_history_test.go | 52 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index 200e861ff..8b046d04c 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -150,6 +150,17 @@ func (s *protocolMigrateHistoryService) Run(ctx context.Context, protocolIDs []s // validate checks that all protocol IDs are valid and ready for history migration. // Returns the list of protocol IDs that need processing (excludes already-success ones). func (s *protocolMigrateHistoryService) validate(ctx context.Context, protocolIDs []string) ([]string, error) { + // De-duplicate protocolIDs, preserving order. + seen := make(map[string]struct{}, len(protocolIDs)) + unique := make([]string, 0, len(protocolIDs)) + for _, pid := range protocolIDs { + if _, dup := seen[pid]; !dup { + seen[pid] = struct{}{} + unique = append(unique, pid) + } + } + protocolIDs = unique + // Check each protocol has a registered processor for _, pid := range protocolIDs { if _, ok := s.processors[pid]; !ok { diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index fbffc8658..5384406aa 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -440,6 +440,58 @@ func TestProtocolMigrateHistory(t *testing.T) { assert.Contains(t, err.Error(), "no processor registered") }) + t.Run("duplicate protocol IDs are deduplicated — each processed once", func(t *testing.T) { + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + // Mock expects the deduplicated slice (single element), not the duplicated input. + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + + backend := &multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + // Pass duplicate IDs — should be deduplicated internally. + err = svc.Run(ctx, []string{"testproto", "testproto", "testproto"}) + require.NoError(t, err) + + // Single cursor write — only one tracker was created. + cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(101), cursorVal) + + // Each ledger processed exactly once. + require.Len(t, processor.processedInputs, 2) + assert.Equal(t, uint32(100), processor.processedInputs[0].LedgerSequence) + assert.Equal(t, uint32(101), processor.processedInputs[1].LedgerSequence) + assert.Equal(t, []uint32{100, 101}, processor.persistedSeqs) + }) + t.Run("resume from cursor — cursor already at N, process from N+1", func(t *testing.T) { ctx := context.Background() dbPool, ingestStore := setupTestDB(t) From 791ae798e2e4e613c69a15a809109b3f1199b540 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 23 Mar 2026 09:32:45 -0600 Subject: [PATCH 44/52] tweaks style and formatting to abide by the linter --- ...ol_data_migrate.go => protocol_migrate.go} | 22 ++++---- .../integrationtests/data_migration_test.go | 18 +++---- internal/services/protocol_migrate_history.go | 54 +++++++++---------- .../services/protocol_migrate_history_test.go | 6 ++- 4 files changed, 52 insertions(+), 48 deletions(-) rename cmd/{protocol_data_migrate.go => protocol_migrate.go} (91%) diff --git a/cmd/protocol_data_migrate.go b/cmd/protocol_migrate.go similarity index 91% rename from cmd/protocol_data_migrate.go rename to cmd/protocol_migrate.go index 823c21725..8eb1f754e 100644 --- a/cmd/protocol_data_migrate.go +++ b/cmd/protocol_migrate.go @@ -27,7 +27,9 @@ func (c *protocolMigrateCmd) Command() *cobra.Command { Short: "Data migration commands for protocol state", Long: "Parent command for protocol data migrations. Use subcommands to run specific migration tasks.", Run: func(cmd *cobra.Command, args []string) { - _ = cmd.Help() + if err := cmd.Help(); err != nil { + log.Fatalf("Error calling help command: %s", err.Error()) + } }, } @@ -140,15 +142,15 @@ func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase s }() service, err := services.NewProtocolMigrateHistoryService(services.ProtocolMigrateHistoryConfig{ - DB: dbPool, - LedgerBackend: ledgerBackend, - ProtocolsModel: models.Protocols, - ProtocolContractsModel: models.ProtocolContracts, - IngestStore: models.IngestStore, - NetworkPassphrase: networkPassphrase, - Processors: processors, - LatestLedgerCursorName: latestLedgerCursorName, - OldestLedgerCursorName: oldestLedgerCursorName, + DB: dbPool, + LedgerBackend: ledgerBackend, + ProtocolsModel: models.Protocols, + ProtocolContractsModel: models.ProtocolContracts, + IngestStore: models.IngestStore, + NetworkPassphrase: networkPassphrase, + Processors: processors, + LatestLedgerCursorName: latestLedgerCursorName, + OldestLedgerCursorName: oldestLedgerCursorName, }) if err != nil { return fmt.Errorf("creating protocol migrate history service: %w", err) diff --git a/internal/integrationtests/data_migration_test.go b/internal/integrationtests/data_migration_test.go index 9e9c06f34..1911c2a0b 100644 --- a/internal/integrationtests/data_migration_test.go +++ b/internal/integrationtests/data_migration_test.go @@ -279,15 +279,15 @@ func (s *DataMigrationTestSuite) newHistoryMigrationService( processor services.ProtocolProcessor, ) services.ProtocolMigrateHistoryService { svc, err := services.NewProtocolMigrateHistoryService(services.ProtocolMigrateHistoryConfig{ - DB: pool, - LedgerBackend: ledgerBackend, - ProtocolsModel: models.Protocols, - ProtocolContractsModel: models.ProtocolContracts, - IngestStore: models.IngestStore, - NetworkPassphrase: "Test SDF Network ; September 2015", - Processors: []services.ProtocolProcessor{processor}, - LatestLedgerCursorName: data.LatestLedgerCursorName, - OldestLedgerCursorName: data.OldestLedgerCursorName, + DB: pool, + LedgerBackend: ledgerBackend, + ProtocolsModel: models.Protocols, + ProtocolContractsModel: models.ProtocolContracts, + IngestStore: models.IngestStore, + NetworkPassphrase: "Test SDF Network ; September 2015", + Processors: []services.ProtocolProcessor{processor}, + LatestLedgerCursorName: data.LatestLedgerCursorName, + OldestLedgerCursorName: data.OldestLedgerCursorName, }) s.Require().NoError(err) return svc diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index 8b046d04c..fbaa16ab4 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -36,28 +36,28 @@ type ProtocolMigrateHistoryService interface { var _ ProtocolMigrateHistoryService = (*protocolMigrateHistoryService)(nil) type protocolMigrateHistoryService struct { - db db.ConnectionPool - ledgerBackend ledgerbackend.LedgerBackend - protocolsModel data.ProtocolsModelInterface - protocolContractsModel data.ProtocolContractsModelInterface - ingestStore *data.IngestStoreModel - networkPassphrase string - processors map[string]ProtocolProcessor - latestLedgerCursorName string - oldestLedgerCursorName string + db db.ConnectionPool + ledgerBackend ledgerbackend.LedgerBackend + protocolsModel data.ProtocolsModelInterface + protocolContractsModel data.ProtocolContractsModelInterface + ingestStore *data.IngestStoreModel + networkPassphrase string + processors map[string]ProtocolProcessor + latestLedgerCursorName string + oldestLedgerCursorName string } // ProtocolMigrateHistoryConfig holds the configuration for creating a protocolMigrateHistoryService. type ProtocolMigrateHistoryConfig struct { - DB db.ConnectionPool - LedgerBackend ledgerbackend.LedgerBackend - ProtocolsModel data.ProtocolsModelInterface - ProtocolContractsModel data.ProtocolContractsModelInterface - IngestStore *data.IngestStoreModel - NetworkPassphrase string - Processors []ProtocolProcessor - LatestLedgerCursorName string - OldestLedgerCursorName string + DB db.ConnectionPool + LedgerBackend ledgerbackend.LedgerBackend + ProtocolsModel data.ProtocolsModelInterface + ProtocolContractsModel data.ProtocolContractsModelInterface + IngestStore *data.IngestStoreModel + NetworkPassphrase string + Processors []ProtocolProcessor + LatestLedgerCursorName string + OldestLedgerCursorName string } // NewProtocolMigrateHistoryService creates a new protocolMigrateHistoryService from the given config. @@ -77,15 +77,15 @@ func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protoc } return &protocolMigrateHistoryService{ - db: cfg.DB, - ledgerBackend: cfg.LedgerBackend, - protocolsModel: cfg.ProtocolsModel, - protocolContractsModel: cfg.ProtocolContractsModel, - ingestStore: cfg.IngestStore, - networkPassphrase: cfg.NetworkPassphrase, - processors: ppMap, - latestLedgerCursorName: latestCursor, - oldestLedgerCursorName: oldestCursor, + db: cfg.DB, + ledgerBackend: cfg.LedgerBackend, + protocolsModel: cfg.ProtocolsModel, + protocolContractsModel: cfg.ProtocolContractsModel, + ingestStore: cfg.IngestStore, + networkPassphrase: cfg.NetworkPassphrase, + processors: ppMap, + latestLedgerCursorName: latestCursor, + oldestLedgerCursorName: oldestCursor, }, nil } diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index 5384406aa..8d7e69fa1 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -164,10 +164,12 @@ type cursorAdvancingProcessor struct { func (p *cursorAdvancingProcessor) ProcessLedger(ctx context.Context, input ProtocolProcessorInput) error { if input.LedgerSequence == p.advanceAtSeq { - _, _ = p.dbPool.ExecContext(ctx, + if _, err := p.dbPool.ExecContext(ctx, `UPDATE ingest_store SET value = $1 WHERE key = $2`, strconv.FormatUint(uint64(p.advanceAtSeq+100), 10), - protocolHistoryCursorName(p.id)) + protocolHistoryCursorName(p.id)); err != nil { + return fmt.Errorf("advancing cursor for test: %w", err) + } } return p.recordingProcessor.ProcessLedger(ctx, input) } From 209b853a9de5f5ed25f00ff0c0868845248854a2 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 23 Mar 2026 11:30:15 -0600 Subject: [PATCH 45/52] Extract generic utilities from ingest helpers and eliminate helper file Move reusable logic into internal/utils/ as generic functions (RetryWithBackoff[T], BuildMap[T]) and move cursor name helpers to ingestion_utils.go. Inline all call sites in services to use utils directly and delete the ingest helpers file entirely. Also fix variable shadow lint errors in ingest_live.go and protocol_migrate_history.go. --- .../2026-03-09.1-protocol_wasms.sql | 9 -- .../2026-03-09.2-protocol_contracts.sql | 12 --- internal/services/ingest.go | 18 +++- internal/services/ingest_backfill.go | 12 ++- internal/services/ingest_helpers.go | 71 -------------- internal/services/ingest_live.go | 23 +++-- internal/services/ingest_test.go | 88 +---------------- internal/services/protocol_migrate_history.go | 37 +++++-- .../services/protocol_migrate_history_test.go | 3 +- internal/utils/collections.go | 17 ++++ internal/utils/collections_test.go | 37 +++++++ internal/utils/ingestion_utils.go | 11 +++ internal/utils/retry.go | 51 ++++++++++ internal/utils/retry_test.go | 96 +++++++++++++++++++ 14 files changed, 288 insertions(+), 197 deletions(-) delete mode 100644 internal/db/migrations/2026-03-09.1-protocol_wasms.sql delete mode 100644 internal/db/migrations/2026-03-09.2-protocol_contracts.sql delete mode 100644 internal/services/ingest_helpers.go create mode 100644 internal/utils/collections.go create mode 100644 internal/utils/collections_test.go create mode 100644 internal/utils/retry.go create mode 100644 internal/utils/retry_test.go diff --git a/internal/db/migrations/2026-03-09.1-protocol_wasms.sql b/internal/db/migrations/2026-03-09.1-protocol_wasms.sql deleted file mode 100644 index e3eac642e..000000000 --- a/internal/db/migrations/2026-03-09.1-protocol_wasms.sql +++ /dev/null @@ -1,9 +0,0 @@ --- +migrate Up -CREATE TABLE protocol_wasms ( - wasm_hash BYTEA PRIMARY KEY, - protocol_id TEXT REFERENCES protocols(id), - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - --- +migrate Down -DROP TABLE IF EXISTS protocol_wasms; diff --git a/internal/db/migrations/2026-03-09.2-protocol_contracts.sql b/internal/db/migrations/2026-03-09.2-protocol_contracts.sql deleted file mode 100644 index a3d055e38..000000000 --- a/internal/db/migrations/2026-03-09.2-protocol_contracts.sql +++ /dev/null @@ -1,12 +0,0 @@ --- +migrate Up -CREATE TABLE protocol_contracts ( - contract_id BYTEA PRIMARY KEY, - wasm_hash BYTEA NOT NULL REFERENCES protocol_wasms(wasm_hash), - name TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_protocol_contracts_wasm_hash ON protocol_contracts(wasm_hash); - --- +migrate Down -DROP TABLE IF EXISTS protocol_contracts; diff --git a/internal/services/ingest.go b/internal/services/ingest.go index ff2b868f2..4e202aec0 100644 --- a/internal/services/ingest.go +++ b/internal/services/ingest.go @@ -22,6 +22,7 @@ import ( "github.com/stellar/wallet-backend/internal/indexer/types" "github.com/stellar/wallet-backend/internal/metrics" "github.com/stellar/wallet-backend/internal/signing/store" + "github.com/stellar/wallet-backend/internal/utils" ) const ( @@ -130,6 +131,12 @@ type ingestService struct { eligibleProtocolProcessors map[string]ProtocolProcessor } +// SetEligibleProtocolProcessorsForTest sets the eligible protocol processors for testing. +// In production, this is set by ingestLiveLedgers before each PersistLedgerData call. +func (m *ingestService) SetEligibleProtocolProcessorsForTest(processors map[string]ProtocolProcessor) { + m.eligibleProtocolProcessors = processors +} + func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { // Create worker pool for the ledger indexer (parallel transaction processing within a ledger) ledgerIndexerPool := pond.NewPool(0) @@ -145,9 +152,16 @@ func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { cfg.MetricsService.RegisterPoolMetrics("backfill", backfillPool) // Build protocol processor map from slice - ppMap, err := buildProtocolProcessorMap(cfg.ProtocolProcessors) + for i, p := range cfg.ProtocolProcessors { + if p == nil { + return nil, fmt.Errorf("protocol processor at index %d is nil", i) + } + } + ppMap, err := utils.BuildMap(cfg.ProtocolProcessors, func(p ProtocolProcessor) string { + return p.ProtocolID() + }) if err != nil { - return nil, err + return nil, fmt.Errorf("building protocol processor map: %w", err) } var ppCache *protocolContractCache diff --git a/internal/services/ingest_backfill.go b/internal/services/ingest_backfill.go index eeee7a7ec..23f41754e 100644 --- a/internal/services/ingest_backfill.go +++ b/internal/services/ingest_backfill.go @@ -12,11 +12,13 @@ import ( "github.com/jackc/pgx/v5/pgxpool" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/wallet-backend/internal/data" "github.com/stellar/wallet-backend/internal/db" "github.com/stellar/wallet-backend/internal/indexer" "github.com/stellar/wallet-backend/internal/indexer/types" + "github.com/stellar/wallet-backend/internal/utils" ) // BackfillMode indicates the purpose of backfilling. @@ -521,7 +523,15 @@ func (m *ingestService) processLedgersInBatch( } for ledgerSeq := batch.StartLedger; ledgerSeq <= batch.EndLedger; ledgerSeq++ { - ledgerMeta, err := getLedgerWithRetry(ctx, backend, ledgerSeq) + ledgerMeta, err := utils.RetryWithBackoff(ctx, maxLedgerFetchRetries, maxRetryBackoff, + func(ctx context.Context) (xdr.LedgerCloseMeta, error) { + return backend.GetLedger(ctx, ledgerSeq) + }, + func(attempt int, err error, backoff time.Duration) { + log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", + ledgerSeq, attempt+1, maxLedgerFetchRetries, err, backoff) + }, + ) if err != nil { return ledgersProcessed, nil, startTime, endTime, fmt.Errorf("getting ledger %d: %w", ledgerSeq, err) } diff --git a/internal/services/ingest_helpers.go b/internal/services/ingest_helpers.go deleted file mode 100644 index f196f555b..000000000 --- a/internal/services/ingest_helpers.go +++ /dev/null @@ -1,71 +0,0 @@ -package services - -import ( - "context" - "fmt" - "time" - - "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" - "github.com/stellar/go-stellar-sdk/support/log" - "github.com/stellar/go-stellar-sdk/xdr" -) - -// getLedgerWithRetry fetches a ledger with exponential backoff retry logic. -// It respects context cancellation and limits retries to maxLedgerFetchRetries attempts. -func getLedgerWithRetry(ctx context.Context, backend ledgerbackend.LedgerBackend, ledgerSeq uint32) (xdr.LedgerCloseMeta, error) { - var lastErr error - for attempt := 0; attempt < maxLedgerFetchRetries; attempt++ { - select { - case <-ctx.Done(): - return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", ctx.Err()) - default: - } - - ledgerMeta, err := backend.GetLedger(ctx, ledgerSeq) - if err == nil { - return ledgerMeta, nil - } - lastErr = err - - backoff := time.Duration(1< maxRetryBackoff { - backoff = maxRetryBackoff - } - log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", - ledgerSeq, attempt+1, maxLedgerFetchRetries, err, backoff) - - select { - case <-ctx.Done(): - return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) - case <-time.After(backoff): - } - } - return xdr.LedgerCloseMeta{}, fmt.Errorf("failed after %d attempts: %w", maxLedgerFetchRetries, lastErr) -} - -// buildProtocolProcessorMap converts a slice of ProtocolProcessors into a map keyed by protocol ID, -// validating that no entries are nil and no IDs are duplicated. -func buildProtocolProcessorMap(processors []ProtocolProcessor) (map[string]ProtocolProcessor, error) { - ppMap := make(map[string]ProtocolProcessor, len(processors)) - for i, p := range processors { - if p == nil { - return nil, fmt.Errorf("protocol processor at index %d is nil", i) - } - id := p.ProtocolID() - if _, exists := ppMap[id]; exists { - return nil, fmt.Errorf("duplicate protocol processor ID %q", id) - } - ppMap[id] = p - } - return ppMap, nil -} - -// protocolHistoryCursorName returns the ingest_store key for a protocol's history migration cursor. -func protocolHistoryCursorName(protocolID string) string { - return fmt.Sprintf("protocol_%s_history_cursor", protocolID) -} - -// protocolCurrentStateCursorName returns the ingest_store key for a protocol's current state cursor. -func protocolCurrentStateCursorName(protocolID string) string { - return fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) -} diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 445841d7e..a66b8f57d 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -18,6 +18,7 @@ import ( "github.com/stellar/wallet-backend/internal/db" "github.com/stellar/wallet-backend/internal/indexer" "github.com/stellar/wallet-backend/internal/indexer/types" + "github.com/stellar/wallet-backend/internal/utils" ) const ( @@ -53,13 +54,13 @@ func (m *ingestService) protocolProcessorsEligibleForProduction(ctx context.Cont eligible := make(map[string]ProtocolProcessor, len(m.protocolProcessors)) for protocolID, processor := range m.protocolProcessors { - historyCursor := protocolHistoryCursorName(protocolID) + historyCursor := utils.ProtocolHistoryCursorName(protocolID) historyVal, err := m.models.IngestStore.Get(ctx, historyCursor) if err != nil { return nil, fmt.Errorf("reading history cursor for %s: %w", protocolID, err) } - currentStateCursor := protocolCurrentStateCursorName(protocolID) + currentStateCursor := utils.ProtocolCurrentStateCursorName(protocolID) currentStateVal, err := m.models.IngestStore.Get(ctx, currentStateCursor) if err != nil { return nil, fmt.Errorf("reading current state cursor for %s: %w", protocolID, err) @@ -150,8 +151,8 @@ func (m *ingestService) PersistLedgerData(ctx context.Context, ledgerSeq uint32, // No previous ledger to form an expected cursor value; skip CAS for this ledger. continue } - historyCursor := protocolHistoryCursorName(protocolID) - currentStateCursor := protocolCurrentStateCursorName(protocolID) + historyCursor := utils.ProtocolHistoryCursorName(protocolID) + currentStateCursor := utils.ProtocolCurrentStateCursorName(protocolID) expected := strconv.FormatUint(uint64(ledgerSeq-1), 10) next := strconv.FormatUint(uint64(ledgerSeq), 10) @@ -287,7 +288,15 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 currentLedger := startLedger log.Ctx(ctx).Infof("Starting ingestion from ledger: %d", currentLedger) for { - ledgerMeta, ledgerErr := getLedgerWithRetry(ctx, m.ledgerBackend, currentLedger) + ledgerMeta, ledgerErr := utils.RetryWithBackoff(ctx, maxLedgerFetchRetries, maxRetryBackoff, + func(ctx context.Context) (xdr.LedgerCloseMeta, error) { + return m.ledgerBackend.GetLedger(ctx, currentLedger) + }, + func(attempt int, err error, backoff time.Duration) { + log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", + currentLedger, attempt+1, maxLedgerFetchRetries, err, backoff) + }, + ) if ledgerErr != nil { return fmt.Errorf("fetching ledger %d: %w", currentLedger, ledgerErr) } @@ -309,8 +318,8 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 // Run protocol state production (in-memory analysis before DB transaction) only // for processors that may actually persist this ledger. - if err := m.produceProtocolStateForProcessors(ctx, ledgerMeta, currentLedger, eligibleProcessors); err != nil { - return fmt.Errorf("producing protocol state for ledger %d: %w", currentLedger, err) + if produceErr := m.produceProtocolStateForProcessors(ctx, ledgerMeta, currentLedger, eligibleProcessors); produceErr != nil { + return fmt.Errorf("producing protocol state for ledger %d: %w", currentLedger, produceErr) } // All DB operations in a single atomic transaction with retry diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index ac17ae74f..8214f2222 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -27,6 +27,7 @@ import ( "github.com/stellar/wallet-backend/internal/indexer/types" "github.com/stellar/wallet-backend/internal/metrics" "github.com/stellar/wallet-backend/internal/signing/store" + "github.com/stellar/wallet-backend/internal/utils" ) var ( @@ -391,7 +392,7 @@ func Test_NewIngestService_ProtocolProcessorValidation(t *testing.T) { cfg.ProtocolProcessors = []ProtocolProcessor{p1, p2} _, err := NewIngestService(cfg) require.Error(t, err) - assert.Contains(t, err.Error(), `duplicate protocol processor ID "dup-id"`) + assert.Contains(t, err.Error(), `duplicate key "dup-id"`) }) } @@ -678,87 +679,6 @@ func Test_analyzeBatchResults(t *testing.T) { } } -func Test_getLedgerWithRetry(t *testing.T) { - ctx := context.Background() - - testCases := []struct { - name string - setupBackend func(*LedgerBackendMock) - ctxFunc func() (context.Context, context.CancelFunc) - wantErr bool - wantErrContains string - }{ - { - name: "success_on_first_try", - setupBackend: func(lb *LedgerBackendMock) { - var meta xdr.LedgerCloseMeta - err := xdr.SafeUnmarshalBase64(ledgerMetadataWith0Tx, &meta) - require.NoError(t, err) - lb.On("GetLedger", mock.Anything, uint32(100)).Return(meta, nil).Once() - }, - ctxFunc: func() (context.Context, context.CancelFunc) { - return context.WithCancel(ctx) - }, - wantErr: false, - }, - { - name: "success_after_retries", - setupBackend: func(lb *LedgerBackendMock) { - var meta xdr.LedgerCloseMeta - err := xdr.SafeUnmarshalBase64(ledgerMetadataWith0Tx, &meta) - require.NoError(t, err) - // Fail twice, then succeed - lb.On("GetLedger", mock.Anything, uint32(100)).Return(xdr.LedgerCloseMeta{}, fmt.Errorf("temporary error")).Twice() - lb.On("GetLedger", mock.Anything, uint32(100)).Return(meta, nil).Once() - }, - ctxFunc: func() (context.Context, context.CancelFunc) { - return context.WithCancel(ctx) - }, - wantErr: false, - }, - { - name: "context_cancelled_immediately", - setupBackend: func(lb *LedgerBackendMock) { - // May or may not be called depending on timing - lb.On("GetLedger", mock.Anything, uint32(100)).Return(xdr.LedgerCloseMeta{}, fmt.Errorf("error")).Maybe() - }, - ctxFunc: func() (context.Context, context.CancelFunc) { - cancelledCtx, cancel := context.WithCancel(ctx) - cancel() // Cancel immediately - return cancelledCtx, cancel - }, - wantErr: true, - wantErrContains: "context cancelled", - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - mockLedgerBackend := &LedgerBackendMock{} - tc.setupBackend(mockLedgerBackend) - defer mockLedgerBackend.AssertExpectations(t) - - testCtx, cancel := tc.ctxFunc() - defer cancel() - - ledger, err := getLedgerWithRetry(testCtx, mockLedgerBackend, 100) - if tc.wantErr { - require.Error(t, err) - if tc.wantErrContains != "" { - assert.Contains(t, err.Error(), tc.wantErrContains) - } - } else { - require.NoError(t, err) - - var meta xdr.LedgerCloseMeta - err := xdr.SafeUnmarshalBase64(ledgerMetadataWith0Tx, &meta) - require.NoError(t, err) - assert.Equal(t, meta, ledger) - } - }) - } -} - func Test_ingestService_setupBatchBackend(t *testing.T) { dbt := dbtest.Open(t) defer dbt.Close() @@ -2807,11 +2727,11 @@ func setupProtocolCursors(t *testing.T, ctx context.Context, pool db.ConnectionP t.Helper() _, err := pool.ExecContext(ctx, `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - protocolHistoryCursorName(protocolID), historyCursor) + utils.ProtocolHistoryCursorName(protocolID), historyCursor) require.NoError(t, err) _, err = pool.ExecContext(ctx, `INSERT INTO ingest_store (key, value) VALUES ($1, $2)`, - protocolCurrentStateCursorName(protocolID), currentStateCursor) + utils.ProtocolCurrentStateCursorName(protocolID), currentStateCursor) require.NoError(t, err) } diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index fbaa16ab4..e6d846c64 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -9,9 +9,11 @@ import ( "github.com/jackc/pgx/v5" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/wallet-backend/internal/data" "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/utils" ) const ( @@ -62,9 +64,16 @@ type ProtocolMigrateHistoryConfig struct { // NewProtocolMigrateHistoryService creates a new protocolMigrateHistoryService from the given config. func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protocolMigrateHistoryService, error) { - ppMap, err := buildProtocolProcessorMap(cfg.Processors) + for i, p := range cfg.Processors { + if p == nil { + return nil, fmt.Errorf("protocol processor at index %d is nil", i) + } + } + ppMap, err := utils.BuildMap(cfg.Processors, func(p ProtocolProcessor) string { + return p.ProtocolID() + }) if err != nil { - return nil, err + return nil, fmt.Errorf("building protocol processor map: %w", err) } latestCursor := cfg.LatestLedgerCursorName @@ -102,10 +111,10 @@ func (s *protocolMigrateHistoryService) Run(ctx context.Context, protocolIDs []s return nil } - if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + if txErr := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { return s.protocolsModel.UpdateHistoryMigrationStatus(ctx, dbTx, activeProtocolIDs, data.StatusInProgress) - }); err != nil { - return fmt.Errorf("setting history migration status to in_progress: %w", err) + }); txErr != nil { + return fmt.Errorf("setting history migration status to in_progress: %w", txErr) } // Phase 2: Process each protocol @@ -137,10 +146,10 @@ func (s *protocolMigrateHistoryService) Run(ctx context.Context, protocolIDs []s } // Phase 3: Set status to success - if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + if txErr := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { return s.protocolsModel.UpdateHistoryMigrationStatus(ctx, dbTx, activeProtocolIDs, data.StatusSuccess) - }); err != nil { - return fmt.Errorf("setting history migration status to success: %w", err) + }); txErr != nil { + return fmt.Errorf("setting history migration status to success: %w", txErr) } log.Ctx(ctx).Infof("History migration completed successfully for protocols: %v", activeProtocolIDs) @@ -221,7 +230,7 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, // Initialize trackers: read/initialize cursor for each protocol trackers := make([]*protocolTracker, 0, len(protocolIDs)) for _, pid := range protocolIDs { - cursorName := protocolHistoryCursorName(pid) + cursorName := utils.ProtocolHistoryCursorName(pid) cursorValue, readErr := s.ingestStore.Get(ctx, cursorName) if readErr != nil { return nil, fmt.Errorf("reading history cursor for %s: %w", pid, readErr) @@ -311,7 +320,15 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } // Fetch ledger ONCE for all protocols - ledgerMeta, fetchErr := getLedgerWithRetry(ctx, s.ledgerBackend, seq) + ledgerMeta, fetchErr := utils.RetryWithBackoff(ctx, maxLedgerFetchRetries, maxRetryBackoff, + func(ctx context.Context) (xdr.LedgerCloseMeta, error) { + return s.ledgerBackend.GetLedger(ctx, seq) + }, + func(attempt int, err error, backoff time.Duration) { + log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", + seq, attempt+1, maxLedgerFetchRetries, err, backoff) + }, + ) if fetchErr != nil { return handedOffProtocolIDs(trackers), fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) } diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index 8d7e69fa1..c159bd057 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -19,6 +19,7 @@ import ( "github.com/stellar/wallet-backend/internal/db" "github.com/stellar/wallet-backend/internal/db/dbtest" "github.com/stellar/wallet-backend/internal/metrics" + "github.com/stellar/wallet-backend/internal/utils" ) // multiLedgerBackend is a test double that serves ledger meta for a range of ledgers. @@ -167,7 +168,7 @@ func (p *cursorAdvancingProcessor) ProcessLedger(ctx context.Context, input Prot if _, err := p.dbPool.ExecContext(ctx, `UPDATE ingest_store SET value = $1 WHERE key = $2`, strconv.FormatUint(uint64(p.advanceAtSeq+100), 10), - protocolHistoryCursorName(p.id)); err != nil { + utils.ProtocolHistoryCursorName(p.id)); err != nil { return fmt.Errorf("advancing cursor for test: %w", err) } } diff --git a/internal/utils/collections.go b/internal/utils/collections.go new file mode 100644 index 000000000..3ad3c05b2 --- /dev/null +++ b/internal/utils/collections.go @@ -0,0 +1,17 @@ +package utils + +import "fmt" + +// BuildMap converts a slice into a map keyed by keyFn. Returns an error if +// any two elements produce the same key. +func BuildMap[T any](items []T, keyFn func(T) string) (map[string]T, error) { + result := make(map[string]T, len(items)) + for _, item := range items { + key := keyFn(item) + if _, exists := result[key]; exists { + return nil, fmt.Errorf("duplicate key %q", key) + } + result[key] = item + } + return result, nil +} diff --git a/internal/utils/collections_test.go b/internal/utils/collections_test.go new file mode 100644 index 000000000..f8af571b7 --- /dev/null +++ b/internal/utils/collections_test.go @@ -0,0 +1,37 @@ +package utils + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildMap_Success(t *testing.T) { + type item struct { + id string + name string + } + items := []item{ + {id: "a", name: "Alice"}, + {id: "b", name: "Bob"}, + } + result, err := BuildMap(items, func(i item) string { return i.id }) + require.NoError(t, err) + assert.Len(t, result, 2) + assert.Equal(t, "Alice", result["a"].name) + assert.Equal(t, "Bob", result["b"].name) +} + +func TestBuildMap_EmptySlice(t *testing.T) { + result, err := BuildMap([]string{}, func(s string) string { return s }) + require.NoError(t, err) + assert.Empty(t, result) +} + +func TestBuildMap_DuplicateKey(t *testing.T) { + items := []string{"x", "y", "x"} + _, err := BuildMap(items, func(s string) string { return s }) + require.Error(t, err) + assert.Contains(t, err.Error(), `duplicate key "x"`) +} diff --git a/internal/utils/ingestion_utils.go b/internal/utils/ingestion_utils.go index a8bd58546..937c4f431 100644 --- a/internal/utils/ingestion_utils.go +++ b/internal/utils/ingestion_utils.go @@ -1,6 +1,7 @@ package utils import ( + "fmt" "strconv" "github.com/stellar/go-stellar-sdk/xdr" @@ -41,3 +42,13 @@ func Memo(memo xdr.Memo, txHash string) (*string, string) { // sentry.CaptureException(fmt.Errorf("failed to parse memo for type %q and transaction %s", memoType.String(), txHash)) return nil, memoType.String() } + +// ProtocolHistoryCursorName returns the ingest_store key for a protocol's history migration cursor. +func ProtocolHistoryCursorName(protocolID string) string { + return fmt.Sprintf("protocol_%s_history_cursor", protocolID) +} + +// ProtocolCurrentStateCursorName returns the ingest_store key for a protocol's current state cursor. +func ProtocolCurrentStateCursorName(protocolID string) string { + return fmt.Sprintf("protocol_%s_current_state_cursor", protocolID) +} diff --git a/internal/utils/retry.go b/internal/utils/retry.go new file mode 100644 index 000000000..d82f973f5 --- /dev/null +++ b/internal/utils/retry.go @@ -0,0 +1,51 @@ +package utils + +import ( + "context" + "fmt" + "time" +) + +// RetryWithBackoff calls fn up to maxRetries times with exponential backoff +// capped at maxBackoff. It respects context cancellation between attempts. +// onRetry, if non-nil, is called before each backoff wait with the attempt +// number (0-indexed), the error, and the backoff duration. +func RetryWithBackoff[T any]( + ctx context.Context, + maxRetries int, + maxBackoff time.Duration, + fn func(ctx context.Context) (T, error), + onRetry func(attempt int, err error, backoff time.Duration), +) (T, error) { + var zero T + var lastErr error + for attempt := 0; attempt < maxRetries; attempt++ { + select { + case <-ctx.Done(): + return zero, fmt.Errorf("context cancelled: %w", ctx.Err()) + default: + } + + result, err := fn(ctx) + if err == nil { + return result, nil + } + lastErr = err + + backoff := time.Duration(1< maxBackoff { + backoff = maxBackoff + } + + if onRetry != nil { + onRetry(attempt, err, backoff) + } + + select { + case <-ctx.Done(): + return zero, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) + case <-time.After(backoff): + } + } + return zero, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr) +} diff --git a/internal/utils/retry_test.go b/internal/utils/retry_test.go new file mode 100644 index 000000000..4cb23c7ac --- /dev/null +++ b/internal/utils/retry_test.go @@ -0,0 +1,96 @@ +package utils + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRetryWithBackoff_SucceedsFirstAttempt(t *testing.T) { + result, err := RetryWithBackoff(context.Background(), 3, 10*time.Second, + func(ctx context.Context) (string, error) { + return "ok", nil + }, nil) + require.NoError(t, err) + assert.Equal(t, "ok", result) +} + +func TestRetryWithBackoff_SucceedsAfterRetries(t *testing.T) { + attempts := 0 + result, err := RetryWithBackoff(context.Background(), 5, 1*time.Second, + func(ctx context.Context) (int, error) { + attempts++ + if attempts < 3 { + return 0, errors.New("not yet") + } + return 42, nil + }, nil) + require.NoError(t, err) + assert.Equal(t, 42, result) + assert.Equal(t, 3, attempts) +} + +func TestRetryWithBackoff_ExhaustsRetries(t *testing.T) { + sentinel := errors.New("persistent failure") + attempts := 0 + _, err := RetryWithBackoff(context.Background(), 3, 1*time.Second, + func(ctx context.Context) (string, error) { + attempts++ + return "", sentinel + }, nil) + require.Error(t, err) + assert.ErrorIs(t, err, sentinel) + assert.Contains(t, err.Error(), "failed after 3 attempts") + assert.Equal(t, 3, attempts) +} + +func TestRetryWithBackoff_RespectsContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := RetryWithBackoff(ctx, 5, 10*time.Second, + func(ctx context.Context) (string, error) { + return "", errors.New("should not reach") + }, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "context cancelled") +} + +func TestRetryWithBackoff_CallsOnRetry(t *testing.T) { + var retryAttempts []int + sentinel := errors.New("fail") + + _, err := RetryWithBackoff(context.Background(), 3, 1*time.Second, + func(ctx context.Context) (string, error) { + return "", sentinel + }, + func(attempt int, err error, backoff time.Duration) { + retryAttempts = append(retryAttempts, attempt) + assert.ErrorIs(t, err, sentinel) + assert.Greater(t, backoff, time.Duration(0)) + }) + require.Error(t, err) + assert.Equal(t, []int{0, 1, 2}, retryAttempts) +} + +func TestRetryWithBackoff_CapsBackoff(t *testing.T) { + maxBackoff := 2 * time.Second + var observedBackoffs []time.Duration + + _, err := RetryWithBackoff(context.Background(), 5, maxBackoff, + func(ctx context.Context) (string, error) { + return "", errors.New("fail") + }, + func(attempt int, err error, backoff time.Duration) { + observedBackoffs = append(observedBackoffs, backoff) + }) + require.Error(t, err) + + for _, b := range observedBackoffs { + assert.LessOrEqual(t, b, maxBackoff) + } +} From a22f382f445e3adb3a3b0d3eb59fd992002a1b25 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 30 Mar 2026 12:59:11 -0600 Subject: [PATCH 46/52] Add migration for protocol_wasms and protocol_contracts tables These tables were referenced by model code and tests but had no corresponding migration, causing test failures after rebase. --- ...026-03-09.1-protocol_wasms_and_contracts.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 internal/db/migrations/2026-03-09.1-protocol_wasms_and_contracts.sql diff --git a/internal/db/migrations/2026-03-09.1-protocol_wasms_and_contracts.sql b/internal/db/migrations/2026-03-09.1-protocol_wasms_and_contracts.sql new file mode 100644 index 000000000..80dc75b50 --- /dev/null +++ b/internal/db/migrations/2026-03-09.1-protocol_wasms_and_contracts.sql @@ -0,0 +1,17 @@ +-- +migrate Up +CREATE TABLE protocol_wasms ( + wasm_hash BYTEA PRIMARY KEY, + protocol_id TEXT REFERENCES protocols(id), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE TABLE protocol_contracts ( + contract_id BYTEA PRIMARY KEY, + wasm_hash BYTEA NOT NULL REFERENCES protocol_wasms(wasm_hash), + name TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- +migrate Down +DROP TABLE IF EXISTS protocol_contracts; +DROP TABLE IF EXISTS protocol_wasms; From 3c7714fb58b5d19a02f30884d9db46ccd0d8cbf4 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 13 Apr 2026 11:00:05 -0600 Subject: [PATCH 47/52] feature(internal): single UnboundedRange for history migration Fixes the RPCLedgerBackend re-PrepareRange bug would fail on the second PrepareRange call, whether triggered by tip advancement between batches or by the convergence poll. - Replace the outer loop + convergence-poll PrepareRange block with one PrepareRange(UnboundedRange(startLedger)) wrapped in RetryWithBackoff. - Replace the per-batch Bounded iteration with a single forward loop whose GetLedger uses a per-call deadline; a DeadlineExceeded without parent-ctx cancellation signals convergence at the RPC tip. - Drop latestLedgerCursorName from the service config/CLI; convergence no longer depends on live ingestion's tip cursor. Per-protocol CAS still owns the handoff signal. - Extract initTrackers, loadContracts, processTrackerAtLedger, minNonHandedOffCursor, anyTrackerNeedsLedger helpers (addresses review comment #7 on function length). - Inject ConvergencePollTimeout for fast unit tests (default 5s). - Add tests: PrepareRange-called-once guard, tip-advances-mid-run, context-cancelled-during-fetch, oldest_ingest_ledger=0 (review comment #8) --- cmd/protocol_migrate.go | 7 +- .../integrationtests/data_migration_test.go | 4 +- internal/serve/graphql/generated/generated.go | 5 +- .../graphql/resolvers/account.resolvers.go | 3 +- .../graphql/resolvers/mutations.resolvers.go | 3 +- .../graphql/resolvers/queries.resolvers.go | 3 +- .../resolvers/statechange.resolvers.go | 20 +- internal/services/protocol_migrate_history.go | 380 ++++++++++-------- .../services/protocol_migrate_history_test.go | 277 +++++++++++-- 9 files changed, 459 insertions(+), 243 deletions(-) diff --git a/cmd/protocol_migrate.go b/cmd/protocol_migrate.go index 8eb1f754e..1bfa0cf33 100644 --- a/cmd/protocol_migrate.go +++ b/cmd/protocol_migrate.go @@ -44,7 +44,6 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { var networkPassphrase string var protocolIDs []string var logLevel string - var latestLedgerCursorName string var oldestLedgerCursorName string cfgOpts := config.ConfigOptions{ @@ -79,7 +78,7 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { return nil }, RunE: func(_ *cobra.Command, _ []string) error { - return c.RunHistory(databaseURL, rpcURL, networkPassphrase, protocolIDs, latestLedgerCursorName, oldestLedgerCursorName) + return c.RunHistory(databaseURL, rpcURL, networkPassphrase, protocolIDs, oldestLedgerCursorName) }, } @@ -89,13 +88,12 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { cmd.Flags().StringSliceVar(&protocolIDs, "protocol-id", nil, "Protocol ID(s) to migrate (required, repeatable)") cmd.Flags().StringVar(&logLevel, "log-level", "", `Log level: "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "PANIC"`) - cmd.Flags().StringVar(&latestLedgerCursorName, "latest-ledger-cursor-name", data.LatestLedgerCursorName, "Name of the latest ledger cursor in the ingest store. Must match the value used by the ingest service.") cmd.Flags().StringVar(&oldestLedgerCursorName, "oldest-ledger-cursor-name", data.OldestLedgerCursorName, "Name of the oldest ledger cursor in the ingest store. Must match the value used by the ingest service.") return cmd } -func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase string, protocolIDs []string, latestLedgerCursorName, oldestLedgerCursorName string) error { +func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase string, protocolIDs []string, oldestLedgerCursorName string) error { ctx := context.Background() // Build processors from protocol IDs using the dynamic registry @@ -149,7 +147,6 @@ func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase s IngestStore: models.IngestStore, NetworkPassphrase: networkPassphrase, Processors: processors, - LatestLedgerCursorName: latestLedgerCursorName, OldestLedgerCursorName: oldestLedgerCursorName, }) if err != nil { diff --git a/internal/integrationtests/data_migration_test.go b/internal/integrationtests/data_migration_test.go index 1911c2a0b..23487e53c 100644 --- a/internal/integrationtests/data_migration_test.go +++ b/internal/integrationtests/data_migration_test.go @@ -286,8 +286,10 @@ func (s *DataMigrationTestSuite) newHistoryMigrationService( IngestStore: models.IngestStore, NetworkPassphrase: "Test SDF Network ; September 2015", Processors: []services.ProtocolProcessor{processor}, - LatestLedgerCursorName: data.LatestLedgerCursorName, OldestLedgerCursorName: data.OldestLedgerCursorName, + // Short poll so the integration test converges quickly once the + // migration reaches the RPC tip. + ConvergencePollTimeout: 500 * time.Millisecond, }) s.Require().NoError(err) return svc diff --git a/internal/serve/graphql/generated/generated.go b/internal/serve/graphql/generated/generated.go index ccbc9cf07..3c4ce2d0a 100644 --- a/internal/serve/graphql/generated/generated.go +++ b/internal/serve/graphql/generated/generated.go @@ -14,11 +14,10 @@ import ( "github.com/99designs/gqlgen/graphql" "github.com/99designs/gqlgen/graphql/introspection" - gqlparser "github.com/vektah/gqlparser/v2" - "github.com/vektah/gqlparser/v2/ast" - "github.com/stellar/wallet-backend/internal/indexer/types" "github.com/stellar/wallet-backend/internal/serve/graphql/scalars" + gqlparser "github.com/vektah/gqlparser/v2" + "github.com/vektah/gqlparser/v2/ast" ) // region ************************** generated!.gotpl ************************** diff --git a/internal/serve/graphql/resolvers/account.resolvers.go b/internal/serve/graphql/resolvers/account.resolvers.go index 63235b21a..7cdbc0100 100644 --- a/internal/serve/graphql/resolvers/account.resolvers.go +++ b/internal/serve/graphql/resolvers/account.resolvers.go @@ -10,11 +10,10 @@ import ( "strings" "time" - "github.com/vektah/gqlparser/v2/gqlerror" - "github.com/stellar/wallet-backend/internal/indexer/types" graphql1 "github.com/stellar/wallet-backend/internal/serve/graphql/generated" "github.com/stellar/wallet-backend/internal/utils" + "github.com/vektah/gqlparser/v2/gqlerror" ) // Address is the resolver for the address field. diff --git a/internal/serve/graphql/resolvers/mutations.resolvers.go b/internal/serve/graphql/resolvers/mutations.resolvers.go index 9b2a6390e..4b5d1086c 100644 --- a/internal/serve/graphql/resolvers/mutations.resolvers.go +++ b/internal/serve/graphql/resolvers/mutations.resolvers.go @@ -10,14 +10,13 @@ import ( "fmt" "github.com/stellar/go-stellar-sdk/txnbuild" - "github.com/vektah/gqlparser/v2/gqlerror" - "github.com/stellar/wallet-backend/internal/entities" graphql1 "github.com/stellar/wallet-backend/internal/serve/graphql/generated" "github.com/stellar/wallet-backend/internal/services" "github.com/stellar/wallet-backend/internal/signing" "github.com/stellar/wallet-backend/internal/signing/store" "github.com/stellar/wallet-backend/pkg/sorobanauth" + "github.com/vektah/gqlparser/v2/gqlerror" ) // BuildTransaction is the resolver for the buildTransaction field. diff --git a/internal/serve/graphql/resolvers/queries.resolvers.go b/internal/serve/graphql/resolvers/queries.resolvers.go index f5b9ed775..ced5dbec6 100644 --- a/internal/serve/graphql/resolvers/queries.resolvers.go +++ b/internal/serve/graphql/resolvers/queries.resolvers.go @@ -11,11 +11,10 @@ import ( "sync" "github.com/stellar/go-stellar-sdk/support/log" - "github.com/vektah/gqlparser/v2/gqlerror" - "github.com/stellar/wallet-backend/internal/indexer/types" graphql1 "github.com/stellar/wallet-backend/internal/serve/graphql/generated" "github.com/stellar/wallet-backend/internal/utils" + "github.com/vektah/gqlparser/v2/gqlerror" ) // TransactionByHash is the resolver for the transactionByHash field. diff --git a/internal/serve/graphql/resolvers/statechange.resolvers.go b/internal/serve/graphql/resolvers/statechange.resolvers.go index b9a4ededf..895f00708 100644 --- a/internal/serve/graphql/resolvers/statechange.resolvers.go +++ b/internal/serve/graphql/resolvers/statechange.resolvers.go @@ -431,14 +431,12 @@ func (r *Resolver) TrustlineChange() graphql1.TrustlineChangeResolver { return &trustlineChangeResolver{r} } -type ( - accountChangeResolver struct{ *Resolver } - balanceAuthorizationChangeResolver struct{ *Resolver } - flagsChangeResolver struct{ *Resolver } - metadataChangeResolver struct{ *Resolver } - reservesChangeResolver struct{ *Resolver } - signerChangeResolver struct{ *Resolver } - signerThresholdsChangeResolver struct{ *Resolver } - standardBalanceChangeResolver struct{ *Resolver } - trustlineChangeResolver struct{ *Resolver } -) +type accountChangeResolver struct{ *Resolver } +type balanceAuthorizationChangeResolver struct{ *Resolver } +type flagsChangeResolver struct{ *Resolver } +type metadataChangeResolver struct{ *Resolver } +type reservesChangeResolver struct{ *Resolver } +type signerChangeResolver struct{ *Resolver } +type signerThresholdsChangeResolver struct{ *Resolver } +type standardBalanceChangeResolver struct{ *Resolver } +type trustlineChangeResolver struct{ *Resolver } diff --git a/internal/services/protocol_migrate_history.go b/internal/services/protocol_migrate_history.go index e6d846c64..d934f81e8 100644 --- a/internal/services/protocol_migrate_history.go +++ b/internal/services/protocol_migrate_history.go @@ -2,6 +2,7 @@ package services import ( "context" + "errors" "fmt" "strconv" "time" @@ -17,8 +18,9 @@ import ( ) const ( - // convergencePollTimeout is the timeout for polling for new ledgers at the tip. - convergencePollTimeout = 5 * time.Second + // defaultConvergencePollTimeout bounds how long a single GetLedger call may + // block waiting for a new ledger at the tip. Exceeding it signals convergence. + defaultConvergencePollTimeout = 5 * time.Second ) // protocolTracker holds per-protocol state for the ledger-first migration loop. @@ -45,8 +47,8 @@ type protocolMigrateHistoryService struct { ingestStore *data.IngestStoreModel networkPassphrase string processors map[string]ProtocolProcessor - latestLedgerCursorName string oldestLedgerCursorName string + convergencePollTimeout time.Duration } // ProtocolMigrateHistoryConfig holds the configuration for creating a protocolMigrateHistoryService. @@ -58,8 +60,10 @@ type ProtocolMigrateHistoryConfig struct { IngestStore *data.IngestStoreModel NetworkPassphrase string Processors []ProtocolProcessor - LatestLedgerCursorName string OldestLedgerCursorName string + // ConvergencePollTimeout optionally overrides the per-ledger fetch deadline + // used to detect convergence at the RPC tip. Zero uses defaultConvergencePollTimeout. + ConvergencePollTimeout time.Duration } // NewProtocolMigrateHistoryService creates a new protocolMigrateHistoryService from the given config. @@ -76,15 +80,16 @@ func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protoc return nil, fmt.Errorf("building protocol processor map: %w", err) } - latestCursor := cfg.LatestLedgerCursorName - if latestCursor == "" { - latestCursor = data.LatestLedgerCursorName - } oldestCursor := cfg.OldestLedgerCursorName if oldestCursor == "" { oldestCursor = data.OldestLedgerCursorName } + pollTimeout := cfg.ConvergencePollTimeout + if pollTimeout == 0 { + pollTimeout = defaultConvergencePollTimeout + } + return &protocolMigrateHistoryService{ db: cfg.DB, ledgerBackend: cfg.LedgerBackend, @@ -93,8 +98,8 @@ func NewProtocolMigrateHistoryService(cfg ProtocolMigrateHistoryConfig) (*protoc ingestStore: cfg.IngestStore, networkPassphrase: cfg.NetworkPassphrase, processors: ppMap, - latestLedgerCursorName: latestCursor, oldestLedgerCursorName: oldestCursor, + convergencePollTimeout: pollTimeout, }, nil } @@ -217,8 +222,14 @@ func (s *protocolMigrateHistoryService) validate(ctx context.Context, protocolID // processAllProtocols runs history migration for all protocols using ledger-first iteration. // Each ledger is fetched once and processed by all eligible protocols, avoiding redundant RPC calls. +// +// The backend is prepared exactly once with an UnboundedRange starting at the +// minimum tracker cursor + 1. Convergence with live ingestion is detected in +// two ways: +// - Per-protocol CAS failure (live ingestion advanced the cursor past us) → handoff. +// - GetLedger blocks past convergencePollTimeout because no newer ledger is +// available at the RPC tip → all remaining work is done. func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, protocolIDs []string) ([]string, error) { - // Read oldest_ingest_ledger oldestLedger, err := s.ingestStore.Get(ctx, s.oldestLedgerCursorName) if err != nil { return nil, fmt.Errorf("reading oldest ingest ledger: %w", err) @@ -227,7 +238,121 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, return nil, fmt.Errorf("ingestion has not started yet (oldest_ingest_ledger is 0)") } - // Initialize trackers: read/initialize cursor for each protocol + trackers, err := s.initTrackers(ctx, protocolIDs, oldestLedger) + if err != nil { + return nil, err + } + + contractsByProtocol, err := s.loadContracts(ctx, trackers) + if err != nil { + return nil, err + } + + startLedger := minNonHandedOffCursor(trackers) + 1 + + log.Ctx(ctx).Infof("Processing ledgers starting at %d (unbounded) for %d protocol(s)", startLedger, len(protocolIDs)) + + prepareFn := func(ctx context.Context) (struct{}, error) { + return struct{}{}, s.ledgerBackend.PrepareRange(ctx, ledgerbackend.UnboundedRange(startLedger)) + } + if _, prepErr := utils.RetryWithBackoff(ctx, maxLedgerFetchRetries, maxRetryBackoff, prepareFn, + func(attempt int, err error, backoff time.Duration) { + log.Ctx(ctx).Warnf("Error preparing unbounded range from %d (attempt %d/%d): %v, retrying in %v...", + startLedger, attempt+1, maxLedgerFetchRetries, err, backoff) + }, + ); prepErr != nil { + return handedOffProtocolIDs(trackers), fmt.Errorf("preparing unbounded range from %d: %w", startLedger, prepErr) + } + + for seq := startLedger; ; seq++ { + if err := ctx.Err(); err != nil { + return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled: %w", err) + } + if allHandedOff(trackers) { + return handedOffProtocolIDs(trackers), nil + } + + // Skip if no non-handed-off tracker needs this ledger. + if !anyTrackerNeedsLedger(trackers, seq) { + continue + } + + ledgerMeta, fetchErr := s.fetchLedgerOrConverge(ctx, seq) + if errors.Is(fetchErr, errConverged) { + log.Ctx(ctx).Infof("Converged at ledger %d (no new ledger within %v)", seq-1, s.convergencePollTimeout) + return handedOffProtocolIDs(trackers), nil + } + if fetchErr != nil { + return handedOffProtocolIDs(trackers), fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) + } + + for _, t := range trackers { + if t.handedOff || t.cursorValue >= seq { + continue + } + if err := s.processTrackerAtLedger(ctx, t, seq, ledgerMeta, contractsByProtocol[t.protocolID]); err != nil { + return handedOffProtocolIDs(trackers), err + } + } + + if seq%100 == 0 { + log.Ctx(ctx).Infof("Progress: processed ledger %d", seq) + } + } +} + +// errConverged signals that a GetLedger poll hit its deadline without a new +// ledger arriving — migration has caught up to the RPC tip. +var errConverged = errors.New("converged: no new ledger within poll timeout") + +// fetchLedgerOrConverge fetches a single ledger, retrying on transient RPC errors +// with exponential backoff. Returns errConverged when the per-call deadline +// elapses without a response (signaling we've caught up to the tip). Returns +// the wrapped parent-context error if the caller's context is cancelled. +func (s *protocolMigrateHistoryService) fetchLedgerOrConverge(ctx context.Context, seq uint32) (xdr.LedgerCloseMeta, error) { + var lastErr error + for attempt := 0; attempt < maxLedgerFetchRetries; attempt++ { + if err := ctx.Err(); err != nil { + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", err) + } + + pollCtx, cancel := context.WithTimeout(ctx, s.convergencePollTimeout) + meta, err := s.ledgerBackend.GetLedger(pollCtx, seq) + pollDeadline := pollCtx.Err() == context.DeadlineExceeded + cancel() + + if err == nil { + return meta, nil + } + if ctx.Err() != nil { + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled: %w", ctx.Err()) + } + // If the per-call deadline fired with no result, treat as convergence. + if pollDeadline || errors.Is(err, context.DeadlineExceeded) { + return xdr.LedgerCloseMeta{}, errConverged + } + lastErr = err + + backoff := time.Duration(1< maxRetryBackoff { + backoff = maxRetryBackoff + } + log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", + seq, attempt+1, maxLedgerFetchRetries, err, backoff) + + select { + case <-ctx.Done(): + return xdr.LedgerCloseMeta{}, fmt.Errorf("context cancelled during backoff: %w", ctx.Err()) + case <-time.After(backoff): + } + } + return xdr.LedgerCloseMeta{}, fmt.Errorf("failed after %d attempts: %w", maxLedgerFetchRetries, lastErr) +} + +// initTrackers reads (or initializes) each protocol's history cursor and builds +// the per-protocol tracker slice. Freshly-seen protocols have their cursor set +// to oldestLedger-1 so the first processed ledger is oldestLedger. +func (s *protocolMigrateHistoryService) initTrackers(ctx context.Context, protocolIDs []string, oldestLedger uint32) ([]*protocolTracker, error) { trackers := make([]*protocolTracker, 0, len(protocolIDs)) for _, pid := range protocolIDs { cursorName := utils.ProtocolHistoryCursorName(pid) @@ -253,9 +378,13 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, processor: s.processors[pid], }) } + return trackers, nil +} - // Load contracts once — all relevant contracts are in the DB before migration starts - // (validate() requires ClassificationStatus == StatusSuccess). +// loadContracts preloads each protocol's contract set once up front. validate() +// already enforces ClassificationStatus == StatusSuccess, so the DB is the +// source of truth for the full contract set. +func (s *protocolMigrateHistoryService) loadContracts(ctx context.Context, trackers []*protocolTracker) (map[string][]data.ProtocolContracts, error) { contractsByProtocol := make(map[string][]data.ProtocolContracts, len(trackers)) for _, t := range trackers { contracts, err := s.protocolContractsModel.GetByProtocolID(ctx, t.protocolID) @@ -264,180 +393,83 @@ func (s *protocolMigrateHistoryService) processAllProtocols(ctx context.Context, } contractsByProtocol[t.protocolID] = contracts } + return contractsByProtocol, nil +} - for { - if allHandedOff(trackers) { - return handedOffProtocolIDs(trackers), nil - } - - latestLedger, err := s.ingestStore.Get(ctx, s.latestLedgerCursorName) - if err != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("reading latest ingest ledger: %w", err) - } +// processTrackerAtLedger runs one protocol's processor on the given ledger and, +// on success, performs the CAS+persist transaction that atomically commits the +// cursor advance and the processor's history rows. A failed CAS (cursor already +// advanced by live ingestion) marks the tracker as handed off. +func (s *protocolMigrateHistoryService) processTrackerAtLedger( + ctx context.Context, + t *protocolTracker, + seq uint32, + ledgerMeta xdr.LedgerCloseMeta, + contracts []data.ProtocolContracts, +) error { + input := ProtocolProcessorInput{ + LedgerSequence: seq, + LedgerCloseMeta: ledgerMeta, + ProtocolContracts: contracts, + NetworkPassphrase: s.networkPassphrase, + } + if err := t.processor.ProcessLedger(ctx, input); err != nil { + return fmt.Errorf("processing ledger %d for protocol %s: %w", seq, t.protocolID, err) + } - // Find minimum cursor among non-handed-off trackers - var minCursor uint32 - first := true - for _, t := range trackers { - if t.handedOff { - continue - } - if first || t.cursorValue < minCursor { - minCursor = t.cursorValue - first = false - } - } + expected := strconv.FormatUint(uint64(seq-1), 10) + next := strconv.FormatUint(uint64(seq), 10) - startLedger := minCursor + 1 - if startLedger > latestLedger { - log.Ctx(ctx).Infof("All protocols at or past tip %d, migration complete", latestLedger) - return handedOffProtocolIDs(trackers), nil + var swapped bool + if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { + var casErr error + swapped, casErr = s.ingestStore.CompareAndSwap(ctx, dbTx, t.cursorName, expected, next) + if casErr != nil { + return fmt.Errorf("CAS history cursor for %s: %w", t.protocolID, casErr) } - - log.Ctx(ctx).Infof("Processing ledgers %d to %d for %d protocol(s)", startLedger, latestLedger, len(protocolIDs)) - - if err := s.ledgerBackend.PrepareRange(ctx, ledgerbackend.BoundedRange(startLedger, latestLedger)); err != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("preparing ledger range [%d, %d]: %w", startLedger, latestLedger, err) - } - - for seq := startLedger; seq <= latestLedger; seq++ { - select { - case <-ctx.Done(): - return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled: %w", ctx.Err()) - default: - } - - // Skip if no tracker needs this ledger - needsFetch := false - for _, t := range trackers { - if !t.handedOff && t.cursorValue < seq { - needsFetch = true - break - } - } - if !needsFetch { - continue - } - - // Fetch ledger ONCE for all protocols - ledgerMeta, fetchErr := utils.RetryWithBackoff(ctx, maxLedgerFetchRetries, maxRetryBackoff, - func(ctx context.Context) (xdr.LedgerCloseMeta, error) { - return s.ledgerBackend.GetLedger(ctx, seq) - }, - func(attempt int, err error, backoff time.Duration) { - log.Ctx(ctx).Warnf("Error fetching ledger %d (attempt %d/%d): %v, retrying in %v...", - seq, attempt+1, maxLedgerFetchRetries, err, backoff) - }, - ) - if fetchErr != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("fetching ledger %d: %w", seq, fetchErr) - } - - // Process each eligible tracker - for _, t := range trackers { - if t.handedOff || t.cursorValue >= seq { - continue - } - - contracts := contractsByProtocol[t.protocolID] - input := ProtocolProcessorInput{ - LedgerSequence: seq, - LedgerCloseMeta: ledgerMeta, - ProtocolContracts: contracts, - NetworkPassphrase: s.networkPassphrase, - } - if err := t.processor.ProcessLedger(ctx, input); err != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("processing ledger %d for protocol %s: %w", seq, t.protocolID, err) - } - - // CAS + persist in a transaction - expected := strconv.FormatUint(uint64(seq-1), 10) - next := strconv.FormatUint(uint64(seq), 10) - - var swapped bool - if err := db.RunInPgxTransaction(ctx, s.db, func(dbTx pgx.Tx) error { - var casErr error - swapped, casErr = s.ingestStore.CompareAndSwap(ctx, dbTx, t.cursorName, expected, next) - if casErr != nil { - return fmt.Errorf("CAS history cursor for %s: %w", t.protocolID, casErr) - } - if swapped { - return t.processor.PersistHistory(ctx, dbTx) - } - return nil - }); err != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("persisting ledger %d for protocol %s: %w", seq, t.protocolID, err) - } - - if !swapped { - log.Ctx(ctx).Infof("Protocol %s: CAS failed at ledger %d, handoff to live ingestion detected", t.protocolID, seq) - t.handedOff = true - } else { - t.cursorValue = seq - } - } - - if allHandedOff(trackers) { - return handedOffProtocolIDs(trackers), nil - } - - if seq%100 == 0 { - log.Ctx(ctx).Infof("Progress: processed ledger %d / %d", seq, latestLedger) - } + if swapped { + return t.processor.PersistHistory(ctx, dbTx) } + return nil + }); err != nil { + return fmt.Errorf("persisting ledger %d for protocol %s: %w", seq, t.protocolID, err) + } - if allHandedOff(trackers) { - return handedOffProtocolIDs(trackers), nil - } + if !swapped { + log.Ctx(ctx).Infof("Protocol %s: CAS failed at ledger %d, handoff to live ingestion detected", t.protocolID, seq) + t.handedOff = true + } else { + t.cursorValue = seq + } + return nil +} - // Check if tip has advanced - newLatest, err := s.ingestStore.Get(ctx, s.latestLedgerCursorName) - if err != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("re-reading latest ingest ledger: %w", err) - } - if newLatest > latestLedger { +// minNonHandedOffCursor returns the smallest cursorValue among trackers that +// have not yet been handed off. If every tracker is handed off, it returns 0. +func minNonHandedOffCursor(trackers []*protocolTracker) uint32 { + var minCursor uint32 + first := true + for _, t := range trackers { + if t.handedOff { continue } - - // At tip — poll briefly for convergence. - // - // This transitions the backend from BoundedRange (line 264) to UnboundedRange - // on the same instance. The captive core implementation handles this internally - // by closing the existing subprocess before starting a new one (see - // CaptiveStellarCore.startPreparingRange). If the poll succeeds and a new ledger - // is detected, the outer loop iterates again and re-prepares a BoundedRange — - // the same implicit close-and-reopen applies in that direction too. - pollCtx, cancel := context.WithTimeout(ctx, convergencePollTimeout) - prepErr := s.ledgerBackend.PrepareRange(pollCtx, ledgerbackend.UnboundedRange(latestLedger+1)) - if prepErr != nil { - cancel() - if ctx.Err() != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) - } - if pollCtx.Err() == context.DeadlineExceeded { - log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) - return handedOffProtocolIDs(trackers), nil - } - log.Ctx(ctx).Warnf("Transient error during convergence poll PrepareRange: %v, retrying", prepErr) - continue + if first || t.cursorValue < minCursor { + minCursor = t.cursorValue + first = false } + } + return minCursor +} - _, getLedgerErr := s.ledgerBackend.GetLedger(pollCtx, latestLedger+1) - cancel() - if getLedgerErr != nil { - if ctx.Err() != nil { - return handedOffProtocolIDs(trackers), fmt.Errorf("context cancelled during convergence poll: %w", ctx.Err()) - } - if pollCtx.Err() == context.DeadlineExceeded { - log.Ctx(ctx).Infof("Converged at ledger %d", latestLedger) - return handedOffProtocolIDs(trackers), nil - } - log.Ctx(ctx).Warnf("Transient error during convergence poll GetLedger: %v, retrying", getLedgerErr) - continue +// anyTrackerNeedsLedger reports whether at least one non-handed-off tracker +// still needs to process the given ledger sequence. +func anyTrackerNeedsLedger(trackers []*protocolTracker, seq uint32) bool { + for _, t := range trackers { + if !t.handedOff && t.cursorValue < seq { + return true } - - // New ledger available, loop again } + return false } // allHandedOff returns true if every tracker has been handed off to live ingestion. diff --git a/internal/services/protocol_migrate_history_test.go b/internal/services/protocol_migrate_history_test.go index c159bd057..77ce0edc8 100644 --- a/internal/services/protocol_migrate_history_test.go +++ b/internal/services/protocol_migrate_history_test.go @@ -7,6 +7,7 @@ import ( "sync" "sync/atomic" "testing" + "time" "github.com/jackc/pgx/v5" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" @@ -22,6 +23,11 @@ import ( "github.com/stellar/wallet-backend/internal/utils" ) +// testConvergenceTimeout keeps tests fast: 200ms is long enough for a block to +// land in a local goroutine and short enough that "no new ledger" cases don't +// dominate test runtime. +const testConvergenceTimeout = 200 * time.Millisecond + // multiLedgerBackend is a test double that serves ledger meta for a range of ledgers. type multiLedgerBackend struct { ledgers map[uint32]xdr.LedgerCloseMeta @@ -58,21 +64,21 @@ func (b *multiLedgerBackend) Close() error { } // transientErrorBackend wraps multiLedgerBackend and injects transient errors -// on convergence-poll calls (unbounded PrepareRange, missing-ledger GetLedger) -// before delegating normally. This simulates RPC blips that should not be -// mistaken for convergence. +// on the initial PrepareRange call and on missing-ledger GetLedger calls +// before delegating normally. Simulates RPC blips that should not be mistaken +// for convergence. type transientErrorBackend struct { multiLedgerBackend - // unboundedPrepareFailsLeft counts how many unbounded PrepareRange calls - // (convergence polls) should return a transient error before succeeding. - unboundedPrepareFailsLeft atomic.Int32 + // prepareFailsLeft counts how many PrepareRange calls should return a + // transient error before succeeding. + prepareFailsLeft atomic.Int32 // missingGetLedgerFailsLeft counts how many GetLedger calls for missing // ledgers should return a transient error instead of blocking. missingGetLedgerFailsLeft atomic.Int32 } func (b *transientErrorBackend) PrepareRange(ctx context.Context, r ledgerbackend.Range) error { - if !r.Bounded() && b.unboundedPrepareFailsLeft.Add(-1) >= 0 { + if b.prepareFailsLeft.Add(-1) >= 0 { return fmt.Errorf("transient RPC error: connection refused") } return b.multiLedgerBackend.PrepareRange(ctx, r) @@ -87,17 +93,16 @@ func (b *transientErrorBackend) GetLedger(ctx context.Context, sequence uint32) return b.multiLedgerBackend.GetLedger(ctx, sequence) } -// rangeTrackingBackend wraps multiLedgerBackend and records the sequence of -// PrepareRange calls, capturing whether each was bounded or unbounded. -// An optional onUnbounded callback fires synchronously on the first unbounded -// PrepareRange, allowing tests to inject new ledgers deterministically before -// the subsequent GetLedger call. +// rangeTrackingBackend wraps multiLedgerBackend and records every +// PrepareRange call. It also exposes an onMiss hook that fires synchronously +// when GetLedger is called for a ledger that's not yet in the map, letting +// tests inject new ledgers mid-run to simulate tip advancement. type rangeTrackingBackend struct { multiLedgerBackend - mu sync.Mutex - ranges []rangeCall - onUnbounded func() - onUnboundedOnce sync.Once + mu sync.Mutex + ranges []rangeCall + onMiss func(sequence uint32) + onMissOnce sync.Once } type rangeCall struct { @@ -109,14 +114,12 @@ func (b *rangeTrackingBackend) PrepareRange(ctx context.Context, r ledgerbackend b.mu.Lock() b.ranges = append(b.ranges, rangeCall{bounded: r.Bounded(), r: r}) b.mu.Unlock() - if !r.Bounded() && b.onUnbounded != nil { - b.onUnboundedOnce.Do(b.onUnbounded) - } return b.multiLedgerBackend.PrepareRange(ctx, r) } -// GetLedger checks for ledgers under the mutex (supporting ledgers added by -// the onUnbounded callback), then falls back to the base blocking behavior. +// GetLedger checks the map, runs the onMiss hook (once) if the ledger isn't +// present so tests can inject new ledgers, then re-checks. If still missing, +// falls back to the base blocking behavior. func (b *rangeTrackingBackend) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, error) { b.mu.Lock() meta, ok := b.multiLedgerBackend.ledgers[sequence] @@ -124,9 +127,34 @@ func (b *rangeTrackingBackend) GetLedger(ctx context.Context, sequence uint32) ( if ok { return meta, nil } + if b.onMiss != nil { + b.onMissOnce.Do(func() { b.onMiss(sequence) }) + } + b.mu.Lock() + meta, ok = b.multiLedgerBackend.ledgers[sequence] + b.mu.Unlock() + if ok { + return meta, nil + } return b.multiLedgerBackend.GetLedger(ctx, sequence) } +// prepareEnforcingBackend wraps multiLedgerBackend and errors on any second +// PrepareRange call. Guards against regressing into the pre-refactor behavior +// where the service called PrepareRange multiple times (which RPCLedgerBackend +// rejects in production). +type prepareEnforcingBackend struct { + multiLedgerBackend + prepareCalls atomic.Int32 +} + +func (b *prepareEnforcingBackend) PrepareRange(ctx context.Context, r ledgerbackend.Range) error { + if n := b.prepareCalls.Add(1); n > 1 { + return fmt.Errorf("PrepareRange called %d times; RPCLedgerBackend only accepts one", n) + } + return b.multiLedgerBackend.PrepareRange(ctx, r) +} + // recordingProcessor is a test double that records all ProcessLedger inputs // and writes per-ledger sentinel keys to ingest_store during PersistHistory, // proving that PersistHistory actually committed data inside the transaction. @@ -281,6 +309,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processor}, }) @@ -349,6 +378,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -380,6 +410,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -407,6 +438,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -433,6 +465,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -476,7 +509,8 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", - Processors: []ProtocolProcessor{processor}, + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processor}, }) require.NoError(t, err) @@ -530,6 +564,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processor}, }) @@ -589,6 +624,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -635,6 +671,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -678,6 +715,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processor}, }) @@ -730,6 +768,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{proc1, proc2}, }) @@ -798,6 +837,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{proc1, proc2}, }) @@ -869,6 +909,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{proc1, proc2}, }) @@ -948,6 +989,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{proc1, proc2}, }) @@ -980,6 +1022,7 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, Processors: []ProtocolProcessor{processorMock}, }) @@ -1018,16 +1061,16 @@ func TestProtocolMigrateHistory(t *testing.T) { }, }, } - // First PrepareRange call for the convergence poll will fail transiently. - // The bounded-range PrepareRange calls (for processing) always succeed because - // the counter is only 1 and multiLedgerBackend.PrepareRange is a no-op. - backend.unboundedPrepareFailsLeft.Store(1) + // First PrepareRange call fails transiently; RetryWithBackoff must retry + // until success. A second call would be the retry and will succeed. + backend.prepareFailsLeft.Store(1) svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", - Processors: []ProtocolProcessor{processor}, + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processor}, }) require.NoError(t, err) @@ -1077,7 +1120,8 @@ func TestProtocolMigrateHistory(t *testing.T) { DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", - Processors: []ProtocolProcessor{processor}, + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processor}, }) require.NoError(t, err) @@ -1090,12 +1134,11 @@ func TestProtocolMigrateHistory(t *testing.T) { assert.Equal(t, []uint32{100, 101}, processor.persistedSeqs) }) - t.Run("tip advances during convergence poll triggers bounded-unbounded-bounded transition", func(t *testing.T) { + t.Run("tip advances mid-run — PrepareRange called once, new ledgers picked up", func(t *testing.T) { ctx := context.Background() dbPool, ingestStore := setupTestDB(t) setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) - setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 101) _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) require.NoError(t, err) @@ -1120,41 +1163,40 @@ func TestProtocolMigrateHistory(t *testing.T) { }, } - // When the service reaches the convergence poll and calls - // PrepareRange(UnboundedRange), this callback fires synchronously - // to simulate tip advancement: it adds new ledgers and updates the - // ingest store before GetLedger is called. - backend.onUnbounded = func() { + // When the service first calls GetLedger for a missing sequence (102), + // inject ledgers 102 and 103 synchronously. The refactored design fetches + // them on the retry path inside the same GetLedger loop; no extra + // PrepareRange is needed. + backend.onMiss = func(_ uint32) { backend.mu.Lock() backend.multiLedgerBackend.ledgers[102] = dummyLedgerMeta(102) backend.multiLedgerBackend.ledgers[103] = dummyLedgerMeta(103) backend.mu.Unlock() - setIngestStoreValue(t, ctx, dbPool, "latest_ingest_ledger", 103) } svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ DB: dbPool, LedgerBackend: backend, ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", - Processors: []ProtocolProcessor{processor}, + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processor}, }) require.NoError(t, err) err = svc.Run(ctx, []string{"testproto"}) require.NoError(t, err) - // Verify Bounded → Unbounded → Bounded range transition sequence + // The refactor guarantees PrepareRange is called exactly once with an + // UnboundedRange — anything else would regress the RPCLedgerBackend fix. backend.mu.Lock() ranges := make([]rangeCall, len(backend.ranges)) copy(ranges, backend.ranges) backend.mu.Unlock() - require.GreaterOrEqual(t, len(ranges), 3, "expected at least 3 PrepareRange calls, got %d", len(ranges)) - assert.True(t, ranges[0].bounded, "first PrepareRange should be bounded") - assert.False(t, ranges[1].bounded, "second PrepareRange should be unbounded (convergence poll)") - assert.True(t, ranges[2].bounded, "third PrepareRange should be bounded (re-entered loop after tip advance)") + require.Len(t, ranges, 1, "PrepareRange must be called exactly once") + assert.False(t, ranges[0].bounded, "PrepareRange must be unbounded") - // Verify all ledgers 100-103 were processed and persisted + // All ledgers 100-103 processed despite the tip advancing mid-run. cursorVal := getIngestStoreValue(t, ctx, dbPool, "protocol_testproto_history_cursor") assert.Equal(t, uint32(103), cursorVal) assert.Equal(t, []uint32{100, 101, 102, 103}, processor.persistedSeqs) @@ -1165,6 +1207,155 @@ func TestProtocolMigrateHistory(t *testing.T) { assert.Equal(t, seq, val, "sentinel value for ledger %d", seq) } }) + + t.Run("PrepareRange called exactly once — guards against RPCLedgerBackend re-prepare error", func(t *testing.T) { + // Simulates RPCLedgerBackend's single-prepare constraint. If the service + // ever regresses and calls PrepareRange a second time, this backend + // returns an error and Run fails. + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, ctx, dbPool, "oldest_ingest_ledger", 100) + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusSuccess).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + backend := &prepareEnforcingBackend{ + multiLedgerBackend: multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + 101: dummyLedgerMeta(101), + 102: dummyLedgerMeta(102), + }, + }, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.NoError(t, err) + + assert.Equal(t, int32(1), backend.prepareCalls.Load(), "PrepareRange must be called exactly once") + }) + + t.Run("context cancelled during fetch — returns context error, status failed", func(t *testing.T) { + // Aditya review comment #8: the ctx.Done() select inside the fetch loop + // had no test coverage. Deterministic: we cancel the parent context from + // inside a GetLedger onMiss hook (ledger 101 is absent), so cancellation + // fires while fetchLedgerOrConverge is trying to serve 101 — before any + // tracker processes it. + parentCtx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + setIngestStoreValue(t, parentCtx, dbPool, "oldest_ingest_ledger", 100) + + _, err := dbPool.ExecContext(parentCtx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processor := &recordingProcessor{id: "testproto", ingestStore: ingestStore} + + protocolsModel.On("GetByIDs", mock.Anything, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusFailed).Return(nil) + protocolContractsModel.On("GetByProtocolID", mock.Anything, "testproto").Return([]data.ProtocolContracts{}, nil) + + cancelCtx, cancel := context.WithCancel(parentCtx) + backend := &rangeTrackingBackend{ + multiLedgerBackend: multiLedgerBackend{ + ledgers: map[uint32]xdr.LedgerCloseMeta{ + 100: dummyLedgerMeta(100), + }, + }, + } + // On the first miss (seq 101 is absent), cancel the parent context. + // fetchLedgerOrConverge then sees ctx.Err() != nil and returns a + // "context cancelled" error. + backend.onMiss = func(_ uint32) { cancel() } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processor}, + }) + require.NoError(t, err) + + err = svc.Run(cancelCtx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "context") + + // Ledger 100 was served before cancellation and should be committed. + // Ledger 101 was the trigger for cancellation and must not be committed. + assert.Equal(t, []uint32{100}, processor.persistedSeqs, "only ledger 100 persisted") + cursorVal := getIngestStoreValue(t, parentCtx, dbPool, "protocol_testproto_history_cursor") + assert.Equal(t, uint32(100), cursorVal, "cursor advances only for committed ledger 100") + }) + + t.Run("oldest_ingest_ledger is 0 — returns error, does not call backend", func(t *testing.T) { + // Aditya review comment #8: the "ingestion has not started yet" guard + // has no test coverage. + ctx := context.Background() + dbPool, ingestStore := setupTestDB(t) + + // Do NOT set oldest_ingest_ledger; the ingest store returns 0 by default. + + _, err := dbPool.ExecContext(ctx, `INSERT INTO protocols (id, classification_status) VALUES ('testproto', 'success') ON CONFLICT (id) DO UPDATE SET classification_status = 'success'`) + require.NoError(t, err) + + protocolsModel := data.NewProtocolsModelMock(t) + protocolContractsModel := data.NewProtocolContractsModelMock(t) + processorMock := NewProtocolProcessorMock(t) + + processorMock.On("ProtocolID").Return("testproto") + protocolsModel.On("GetByIDs", ctx, []string{"testproto"}).Return([]data.Protocols{ + {ID: "testproto", ClassificationStatus: data.StatusSuccess, HistoryMigrationStatus: data.StatusNotStarted}, + }, nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusInProgress).Return(nil) + protocolsModel.On("UpdateHistoryMigrationStatus", mock.Anything, mock.Anything, []string{"testproto"}, data.StatusFailed).Return(nil) + + backend := &prepareEnforcingBackend{ + multiLedgerBackend: multiLedgerBackend{ledgers: map[uint32]xdr.LedgerCloseMeta{}}, + } + + svc, err := NewProtocolMigrateHistoryService(ProtocolMigrateHistoryConfig{ + DB: dbPool, LedgerBackend: backend, + ProtocolsModel: protocolsModel, ProtocolContractsModel: protocolContractsModel, + IngestStore: ingestStore, NetworkPassphrase: "Test SDF Network ; September 2015", + ConvergencePollTimeout: testConvergenceTimeout, + Processors: []ProtocolProcessor{processorMock}, + }) + require.NoError(t, err) + + err = svc.Run(ctx, []string{"testproto"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "ingestion has not started yet") + + // Critical: the backend must not be touched if oldest_ingest_ledger is 0. + assert.Equal(t, int32(0), backend.prepareCalls.Load(), "PrepareRange should not be called when oldest_ingest_ledger is 0") + }) } func TestNewProtocolMigrateHistoryService(t *testing.T) { From a5a260c5a8ea9c116584b36b27435cbf165d8079 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 13 Apr 2026 11:07:43 -0600 Subject: [PATCH 48/52] refactor(internal): remove test-only receiver methods from ingestService MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Aditya review comments #3034067996 and #3034073695 — methods defined on receivers solely to be called from tests violate the Go practice of keeping the public API of a type production-oriented. - ingest.go: delete SetEligibleProtocolProcessorsForTest — dead code, tests already access svc.eligibleProtocolProcessors directly. - ingest_live.go: delete the produceProtocolState wrapper; its single test caller now calls produceProtocolStateForProcessors directly with svc.protocolProcessors. --- internal/services/ingest.go | 6 ------ internal/services/ingest_live.go | 8 +++----- internal/services/ingest_test.go | 2 +- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/internal/services/ingest.go b/internal/services/ingest.go index 4e202aec0..b9fcba77a 100644 --- a/internal/services/ingest.go +++ b/internal/services/ingest.go @@ -131,12 +131,6 @@ type ingestService struct { eligibleProtocolProcessors map[string]ProtocolProcessor } -// SetEligibleProtocolProcessorsForTest sets the eligible protocol processors for testing. -// In production, this is set by ingestLiveLedgers before each PersistLedgerData call. -func (m *ingestService) SetEligibleProtocolProcessorsForTest(processors map[string]ProtocolProcessor) { - m.eligibleProtocolProcessors = processors -} - func NewIngestService(cfg IngestServiceConfig) (*ingestService, error) { // Create worker pool for the ledger indexer (parallel transaction processing within a ledger) ledgerIndexerPool := pond.NewPool(0) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index a66b8f57d..653dd5a0a 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -347,11 +347,9 @@ func (m *ingestService) ingestLiveLedgers(ctx context.Context, startLedger uint3 } } -// produceProtocolState runs all registered protocol processors against a ledger. -func (m *ingestService) produceProtocolState(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32) error { - return m.produceProtocolStateForProcessors(ctx, ledgerMeta, ledgerSeq, m.protocolProcessors) -} - +// produceProtocolStateForProcessors runs the given protocol processors against +// a ledger. Callers pass either `m.protocolProcessors` (all registered) or a +// filtered subset (e.g., live ingestion scopes to `eligibleProtocolProcessors`). func (m *ingestService) produceProtocolStateForProcessors(ctx context.Context, ledgerMeta xdr.LedgerCloseMeta, ledgerSeq uint32, processors map[string]ProtocolProcessor) error { if len(processors) == 0 { return nil diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 8214f2222..90b27f0a1 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -3024,7 +3024,7 @@ func Test_ingestService_produceProtocolState_RecordsMetrics(t *testing.T) { }, } - err := svc.produceProtocolState(ctx, xdr.LedgerCloseMeta{}, 123) + err := svc.produceProtocolStateForProcessors(ctx, xdr.LedgerCloseMeta{}, 123, svc.protocolProcessors) require.NoError(t, err) mockMetrics.AssertExpectations(t) } From 9a4f29b0dfdb93425c6682e0498ccd1f4f8bd4fb Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 13 Apr 2026 13:31:57 -0600 Subject: [PATCH 49/52] refactor(internal): restore BatchGetByProtocolIDs for cache refresh Reverts the per-protocol GetByProtocolID fallback introduced during the protocol-state pipeline merge. Per-protocol and batch share the same failure domain (one pool, one DB), so the "partial failure tolerance" argument doesn't hold for two SELECT variants against the same backend. Batch is strictly simpler: one round trip, atomic snapshot, one error path. --- internal/data/mocks.go | 8 ++++++ internal/data/protocol_contracts.go | 43 +++++++++++++++++++++++++++++ internal/services/ingest_live.go | 38 ++++++++++++------------- internal/services/ingest_test.go | 33 +++++++++++----------- 4 files changed, 84 insertions(+), 38 deletions(-) diff --git a/internal/data/mocks.go b/internal/data/mocks.go index 4d2a1d640..c1406eac1 100644 --- a/internal/data/mocks.go +++ b/internal/data/mocks.go @@ -312,3 +312,11 @@ func (m *ProtocolContractsModelMock) GetByProtocolID(ctx context.Context, protoc } return args.Get(0).([]ProtocolContracts), args.Error(1) } + +func (m *ProtocolContractsModelMock) BatchGetByProtocolIDs(ctx context.Context, protocolIDs []string) (map[string][]ProtocolContracts, error) { + args := m.Called(ctx, protocolIDs) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(map[string][]ProtocolContracts), args.Error(1) +} diff --git a/internal/data/protocol_contracts.go b/internal/data/protocol_contracts.go index 33a16754d..148d46e7f 100644 --- a/internal/data/protocol_contracts.go +++ b/internal/data/protocol_contracts.go @@ -26,6 +26,7 @@ type ProtocolContracts struct { type ProtocolContractsModelInterface interface { BatchInsert(ctx context.Context, dbTx pgx.Tx, contracts []ProtocolContracts) error GetByProtocolID(ctx context.Context, protocolID string) ([]ProtocolContracts, error) + BatchGetByProtocolIDs(ctx context.Context, protocolIDs []string) (map[string][]ProtocolContracts, error) } // ProtocolContractsModel implements ProtocolContractsModelInterface. @@ -121,3 +122,45 @@ func (m *ProtocolContractsModel) GetByProtocolID(ctx context.Context, protocolID m.MetricsService.IncDBQuery("GetByProtocolID", "protocol_contracts") return contracts, nil } + +// BatchGetByProtocolIDs returns all contracts for the given protocol IDs in a single query, +// grouped by protocol ID. +func (m *ProtocolContractsModel) BatchGetByProtocolIDs(ctx context.Context, protocolIDs []string) (map[string][]ProtocolContracts, error) { + if len(protocolIDs) == 0 { + return nil, nil + } + + const query = ` + SELECT pw.protocol_id, pc.contract_id, pc.wasm_hash, pc.name, pc.created_at + FROM protocol_contracts pc + JOIN protocol_wasms pw ON pc.wasm_hash = pw.wasm_hash + WHERE pw.protocol_id = ANY($1) + ` + + start := time.Now() + rows, err := m.DB.PgxPool().Query(ctx, query, protocolIDs) + if err != nil { + m.MetricsService.IncDBQueryError("BatchGetByProtocolIDs", "protocol_contracts", utils.GetDBErrorType(err)) + return nil, fmt.Errorf("batch querying contracts for protocols: %w", err) + } + defer rows.Close() + + result := make(map[string][]ProtocolContracts, len(protocolIDs)) + for rows.Next() { + var protocolID string + var c ProtocolContracts + if err := rows.Scan(&protocolID, &c.ContractID, &c.WasmHash, &c.Name, &c.CreatedAt); err != nil { + m.MetricsService.IncDBQueryError("BatchGetByProtocolIDs", "protocol_contracts", utils.GetDBErrorType(err)) + return nil, fmt.Errorf("scanning batch protocol contract row: %w", err) + } + result[protocolID] = append(result[protocolID], c) + } + if err := rows.Err(); err != nil { + m.MetricsService.IncDBQueryError("BatchGetByProtocolIDs", "protocol_contracts", utils.GetDBErrorType(err)) + return nil, fmt.Errorf("iterating batch protocol contract rows: %w", err) + } + + m.MetricsService.ObserveDBQueryDuration("BatchGetByProtocolIDs", "protocol_contracts", time.Since(start).Seconds()) + m.MetricsService.IncDBQuery("BatchGetByProtocolIDs", "protocol_contracts") + return result, nil +} diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 653dd5a0a..98519545a 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -397,11 +397,13 @@ func (m *ingestService) getProtocolContracts(ctx context.Context, protocolID str return m.protocolContractCache.contractsByProtocol[protocolID] } -// refreshProtocolContractCache reloads all protocol contracts from the DB. -// The write lock is held only to check staleness and swap the new data in, -// keeping DB queries outside the lock to avoid blocking concurrent readers. +// refreshProtocolContractCache reloads all protocol contracts from the DB in a +// single batch query. The write lock is held only to check staleness and swap +// the new data in, keeping the DB query outside the lock to avoid blocking +// readers. On failure, the previous cache is preserved wholesale — a single +// SELECT has the same failure domain as N per-protocol SELECTs. func (m *ingestService) refreshProtocolContractCache(ctx context.Context, currentLedger uint32) { - // 1. Check staleness under write lock, copy previous data for fallback + // 1. Check staleness under write lock, snapshot previous data for fallback. m.protocolContractCache.mu.Lock() stale := m.protocolContractCache.lastRefreshLedger == 0 || (currentLedger-m.protocolContractCache.lastRefreshLedger) >= protocolContractRefreshInterval @@ -409,35 +411,29 @@ func (m *ingestService) refreshProtocolContractCache(ctx context.Context, curren m.protocolContractCache.mu.Unlock() return } - // Snapshot previous entries for fallback on partial failure prevContracts := m.protocolContractCache.contractsByProtocol m.protocolContractCache.mu.Unlock() - // 2. Fetch new data outside the lock + // 2. Fetch new data outside the lock. start := time.Now() - newMap := make(map[string][]data.ProtocolContracts, len(m.protocolProcessors)) - allSucceeded := true + protocolIDs := make([]string, 0, len(m.protocolProcessors)) for protocolID := range m.protocolProcessors { - contracts, err := m.models.ProtocolContracts.GetByProtocolID(ctx, protocolID) - if err != nil { - log.Ctx(ctx).Warnf("Error refreshing protocol contract cache for %s: %v; preserving previous entry", protocolID, err) - allSucceeded = false - if prev, ok := prevContracts[protocolID]; ok { - newMap[protocolID] = prev - } - continue - } - newMap[protocolID] = contracts + protocolIDs = append(protocolIDs, protocolID) + } + newMap, err := m.models.ProtocolContracts.BatchGetByProtocolIDs(ctx, protocolIDs) + if err != nil { + log.Ctx(ctx).Warnf("Error refreshing protocol contract cache: %v; preserving previous entries", err) + newMap = prevContracts } - // 3. Swap under write lock + // 3. Swap under write lock. m.protocolContractCache.mu.Lock() defer m.protocolContractCache.mu.Unlock() m.protocolContractCache.contractsByProtocol = newMap m.protocolContractCache.lastRefreshLedger = currentLedger m.metricsService.ObserveProtocolContractCacheRefreshDuration(time.Since(start).Seconds()) - if !allSucceeded { - log.Ctx(ctx).Warnf("Protocol contract cache partially refreshed at ledger %d; will retry at next interval", currentLedger) + if err != nil { + log.Ctx(ctx).Warnf("Protocol contract cache refresh failed at ledger %d; will retry at next interval", currentLedger) } else { log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) } diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 90b27f0a1..14234474b 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "reflect" + "slices" "testing" "time" @@ -3039,7 +3040,8 @@ func Test_ingestService_getProtocolContracts_RefreshesAndRecordsMetrics(t *testi protocolContractsModel := data.NewProtocolContractsModelMock(t) expectedContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1), WasmHash: types.HashBytea(txHash2)}} - protocolContractsModel.On("GetByProtocolID", ctx, "testproto").Return(expectedContracts, nil).Once() + protocolContractsModel.On("BatchGetByProtocolIDs", ctx, []string{"testproto"}). + Return(map[string][]data.ProtocolContracts{"testproto": expectedContracts}, nil).Once() svc := &ingestService{ metricsService: mockMetrics, @@ -3059,7 +3061,7 @@ func Test_ingestService_getProtocolContracts_RefreshesAndRecordsMetrics(t *testi mockMetrics.AssertExpectations(t) } -func Test_ingestService_refreshProtocolContractCache_PartialFailure_StillUpdatesLedger(t *testing.T) { +func Test_ingestService_refreshProtocolContractCache_Failure_StillUpdatesLedger(t *testing.T) { t.Parallel() ctx := context.Background() @@ -3067,10 +3069,9 @@ func Test_ingestService_refreshProtocolContractCache_PartialFailure_StillUpdates mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() protocolContractsModel := data.NewProtocolContractsModelMock(t) - protocolContractsModel.On("GetByProtocolID", ctx, "proto_ok"). - Return([]data.ProtocolContracts{{ContractID: types.HashBytea(txHash1)}}, nil).Once() - protocolContractsModel.On("GetByProtocolID", ctx, "proto_fail"). - Return(nil, fmt.Errorf("db error")).Once() + protocolContractsModel.On("BatchGetByProtocolIDs", ctx, mock.MatchedBy(func(ids []string) bool { + return len(ids) == 2 && slices.Contains(ids, "proto_ok") && slices.Contains(ids, "proto_fail") + })).Return(nil, fmt.Errorf("db error")).Once() svc := &ingestService{ metricsService: mockMetrics, @@ -3086,17 +3087,18 @@ func Test_ingestService_refreshProtocolContractCache_PartialFailure_StillUpdates svc.refreshProtocolContractCache(ctx, 200) - // lastRefreshLedger must advance despite partial failure + // lastRefreshLedger must advance despite batch failure so we don't + // hammer the DB on every subsequent ledger. assert.Equal(t, uint32(200), svc.protocolContractCache.lastRefreshLedger) // Calling again at currentLedger+1 should be a no-op (not stale yet). - // The .Once() expectations on the mock ensure no extra DB calls happen. + // The .Once() expectation on the mock ensures no extra DB call happens. svc.refreshProtocolContractCache(ctx, 201) mockMetrics.AssertExpectations(t) } -func Test_ingestService_refreshProtocolContractCache_PartialFailure_PreservesPreviousEntries(t *testing.T) { +func Test_ingestService_refreshProtocolContractCache_Failure_PreservesPreviousEntries(t *testing.T) { t.Parallel() ctx := context.Background() @@ -3104,13 +3106,11 @@ func Test_ingestService_refreshProtocolContractCache_PartialFailure_PreservesPre mockMetrics.On("ObserveProtocolContractCacheRefreshDuration", mock.Anything).Return().Once() previousContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash1)}} - newContracts := []data.ProtocolContracts{{ContractID: types.HashBytea(txHash2)}} protocolContractsModel := data.NewProtocolContractsModelMock(t) - protocolContractsModel.On("GetByProtocolID", ctx, "proto_ok"). - Return(newContracts, nil).Once() - protocolContractsModel.On("GetByProtocolID", ctx, "proto_fail"). - Return(nil, fmt.Errorf("db error")).Once() + protocolContractsModel.On("BatchGetByProtocolIDs", ctx, mock.MatchedBy(func(ids []string) bool { + return len(ids) == 2 && slices.Contains(ids, "proto_ok") && slices.Contains(ids, "proto_fail") + })).Return(nil, fmt.Errorf("db error")).Once() svc := &ingestService{ metricsService: mockMetrics, @@ -3130,9 +3130,8 @@ func Test_ingestService_refreshProtocolContractCache_PartialFailure_PreservesPre svc.refreshProtocolContractCache(ctx, 300) - // Successful protocol gets new data - assert.Equal(t, newContracts, svc.protocolContractCache.contractsByProtocol["proto_ok"]) - // Failed protocol retains previous entries + // Batch failure → both protocols' previous entries are preserved wholesale. + assert.Equal(t, previousContracts, svc.protocolContractCache.contractsByProtocol["proto_ok"]) assert.Equal(t, previousContracts, svc.protocolContractCache.contractsByProtocol["proto_fail"]) mockMetrics.AssertExpectations(t) From 64de5eceaeb1b860f8bae2f07242d92c72853014 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 13 Apr 2026 13:41:25 -0600 Subject: [PATCH 50/52] refactor(internal): drop mutex from protocolContractCache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cache is accessed only from the single-goroutine live ingestion loop (getProtocolContracts and refreshProtocolContractCache are both called inline from the same loop). With one accessor, the lock/unlock dance optimizes nothing — no reader to shield during the DB fetch, no swap to coordinate, no concurrent lastRefreshLedger update. --- internal/services/ingest_live.go | 47 ++++++-------------------------- internal/services/ingest_test.go | 7 ++--- 2 files changed, 11 insertions(+), 43 deletions(-) diff --git a/internal/services/ingest_live.go b/internal/services/ingest_live.go index 98519545a..8daf352e4 100644 --- a/internal/services/ingest_live.go +++ b/internal/services/ingest_live.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "strconv" - "sync" "time" set "github.com/deckarep/golang-set/v2" @@ -29,8 +28,8 @@ const ( ) // protocolContractCache caches classified protocol contracts to avoid per-ledger DB queries. +// Accessed only from the single-goroutine live ingestion loop; no locking needed. type protocolContractCache struct { - mu sync.RWMutex contractsByProtocol map[string][]data.ProtocolContracts lastRefreshLedger uint32 } @@ -377,66 +376,38 @@ func (m *ingestService) getProtocolContracts(ctx context.Context, protocolID str if m.protocolContractCache == nil { return nil } - m.protocolContractCache.mu.RLock() stale := m.protocolContractCache.lastRefreshLedger == 0 || (currentLedger-m.protocolContractCache.lastRefreshLedger) >= protocolContractRefreshInterval - m.protocolContractCache.mu.RUnlock() if stale { m.metricsService.IncProtocolContractCacheAccess(protocolID, "miss") + m.refreshProtocolContractCache(ctx, currentLedger) } else { m.metricsService.IncProtocolContractCacheAccess(protocolID, "hit") } - if stale { - m.refreshProtocolContractCache(ctx, currentLedger) - } - - m.protocolContractCache.mu.RLock() - defer m.protocolContractCache.mu.RUnlock() return m.protocolContractCache.contractsByProtocol[protocolID] } // refreshProtocolContractCache reloads all protocol contracts from the DB in a -// single batch query. The write lock is held only to check staleness and swap -// the new data in, keeping the DB query outside the lock to avoid blocking -// readers. On failure, the previous cache is preserved wholesale — a single -// SELECT has the same failure domain as N per-protocol SELECTs. +// single batch query. On failure, the existing cache is left untouched — a +// single SELECT has the same failure domain as N per-protocol SELECTs, so +// there's no partial-failure path to handle. func (m *ingestService) refreshProtocolContractCache(ctx context.Context, currentLedger uint32) { - // 1. Check staleness under write lock, snapshot previous data for fallback. - m.protocolContractCache.mu.Lock() - stale := m.protocolContractCache.lastRefreshLedger == 0 || - (currentLedger-m.protocolContractCache.lastRefreshLedger) >= protocolContractRefreshInterval - if !stale { - m.protocolContractCache.mu.Unlock() - return - } - prevContracts := m.protocolContractCache.contractsByProtocol - m.protocolContractCache.mu.Unlock() - - // 2. Fetch new data outside the lock. start := time.Now() protocolIDs := make([]string, 0, len(m.protocolProcessors)) for protocolID := range m.protocolProcessors { protocolIDs = append(protocolIDs, protocolID) } newMap, err := m.models.ProtocolContracts.BatchGetByProtocolIDs(ctx, protocolIDs) - if err != nil { - log.Ctx(ctx).Warnf("Error refreshing protocol contract cache: %v; preserving previous entries", err) - newMap = prevContracts - } - - // 3. Swap under write lock. - m.protocolContractCache.mu.Lock() - defer m.protocolContractCache.mu.Unlock() - m.protocolContractCache.contractsByProtocol = newMap m.protocolContractCache.lastRefreshLedger = currentLedger m.metricsService.ObserveProtocolContractCacheRefreshDuration(time.Since(start).Seconds()) if err != nil { - log.Ctx(ctx).Warnf("Protocol contract cache refresh failed at ledger %d; will retry at next interval", currentLedger) - } else { - log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) + log.Ctx(ctx).Warnf("Protocol contract cache refresh failed at ledger %d; preserving previous entries, will retry at next interval: %v", currentLedger, err) + return } + m.protocolContractCache.contractsByProtocol = newMap + log.Ctx(ctx).Infof("Refreshed protocol contract cache at ledger %d", currentLedger) } // ingestProcessedDataWithRetry wraps PersistLedgerData with retry logic. diff --git a/internal/services/ingest_test.go b/internal/services/ingest_test.go index 14234474b..a9ad4fb7d 100644 --- a/internal/services/ingest_test.go +++ b/internal/services/ingest_test.go @@ -3088,13 +3088,10 @@ func Test_ingestService_refreshProtocolContractCache_Failure_StillUpdatesLedger( svc.refreshProtocolContractCache(ctx, 200) // lastRefreshLedger must advance despite batch failure so we don't - // hammer the DB on every subsequent ledger. + // hammer the DB on every subsequent ledger (staleness is gated by + // getProtocolContracts against this value). assert.Equal(t, uint32(200), svc.protocolContractCache.lastRefreshLedger) - // Calling again at currentLedger+1 should be a no-op (not stale yet). - // The .Once() expectation on the mock ensures no extra DB call happens. - svc.refreshProtocolContractCache(ctx, 201) - mockMetrics.AssertExpectations(t) } From 4ee42c9d7a466e1bd0cd907f35b2c32358931e44 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 13 Apr 2026 13:49:50 -0600 Subject: [PATCH 51/52] feature(cmd): use datastore backend for protocol history migration RPC retention windows can drop the oldest ledgers a backfill needs; datastore carries the full archive and is the right default for a migration command that targets anywhere in the retention window. - protocol_migrate.go: add --ledger-backend-type (default datastore), --datastore-config-path, --get-ledgers-limit flags. Keep --rpc-url as an optional escape hatch. Reuse ingest.NewLedgerBackend so both CLIs share one selection path. - Collapse RunHistory's positional args into historyCmdOpts. - Close the backend via io.Closer assertion since the LedgerBackend interface doesn't expose Close directly. The service itself required no changes: the earlier refactor's single PrepareRange(UnboundedRange(...)) already works with BufferedStorageBackend (verified via startPreparingRange). --- cmd/protocol_migrate.go | 126 ++++++++++++++++++++++++++++++---------- 1 file changed, 96 insertions(+), 30 deletions(-) diff --git a/cmd/protocol_migrate.go b/cmd/protocol_migrate.go index 1bfa0cf33..128ca0b6f 100644 --- a/cmd/protocol_migrate.go +++ b/cmd/protocol_migrate.go @@ -3,17 +3,19 @@ package cmd import ( "context" "fmt" + "go/types" + "io" _ "github.com/lib/pq" "github.com/sirupsen/logrus" "github.com/spf13/cobra" - "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/config" "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/wallet-backend/cmd/utils" "github.com/stellar/wallet-backend/internal/data" "github.com/stellar/wallet-backend/internal/db" + "github.com/stellar/wallet-backend/internal/ingest" "github.com/stellar/wallet-backend/internal/metrics" "github.com/stellar/wallet-backend/internal/services" internalutils "github.com/stellar/wallet-backend/internal/utils" @@ -38,18 +40,58 @@ func (c *protocolMigrateCmd) Command() *cobra.Command { return cmd } +// historyCmdOpts holds the resolved flag values for `protocol-migrate history`. +type historyCmdOpts struct { + databaseURL string + rpcURL string + networkPassphrase string + protocolIDs []string + logLevel string + oldestLedgerCursorName string + ledgerBackendType string + datastoreConfigPath string + getLedgersLimit int +} + func (c *protocolMigrateCmd) historyCommand() *cobra.Command { - var databaseURL string - var rpcURL string - var networkPassphrase string - var protocolIDs []string - var logLevel string - var oldestLedgerCursorName string + var opts historyCmdOpts cfgOpts := config.ConfigOptions{ - utils.DatabaseURLOption(&databaseURL), - utils.RPCURLOption(&rpcURL), - utils.NetworkPassphraseOption(&networkPassphrase), + utils.DatabaseURLOption(&opts.databaseURL), + utils.NetworkPassphraseOption(&opts.networkPassphrase), + // RPC URL is only required when --ledger-backend-type=rpc; validated in PersistentPreRunE. + { + Name: "rpc-url", + Usage: "The URL of the RPC Server. Required when --ledger-backend-type=rpc.", + OptType: types.String, + ConfigKey: &opts.rpcURL, + FlagDefault: "", + Required: false, + }, + { + Name: "ledger-backend-type", + Usage: "Type of ledger backend to use for fetching historical ledgers. Options: 'rpc' or 'datastore' (default). Datastore is recommended for migrations because it can reach ledgers outside the RPC retention window.", + OptType: types.String, + ConfigKey: &opts.ledgerBackendType, + FlagDefault: string(ingest.LedgerBackendTypeDatastore), + Required: false, + }, + { + Name: "datastore-config-path", + Usage: "Path to TOML config file for datastore backend. Required when --ledger-backend-type=datastore.", + OptType: types.String, + ConfigKey: &opts.datastoreConfigPath, + FlagDefault: "config/datastore-pubnet.toml", + Required: false, + }, + { + Name: "get-ledgers-limit", + Usage: "Per-request ledger buffer size for the RPC backend. Ignored for datastore.", + OptType: types.Int, + ConfigKey: &opts.getLedgersLimit, + FlagDefault: 10, + Required: false, + }, } cmd := &cobra.Command{ @@ -64,21 +106,35 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { return fmt.Errorf("setting values of config options: %w", err) } - if logLevel != "" { - ll, err := logrus.ParseLevel(logLevel) + if opts.logLevel != "" { + ll, err := logrus.ParseLevel(opts.logLevel) if err != nil { - return fmt.Errorf("invalid log level %q: %w", logLevel, err) + return fmt.Errorf("invalid log level %q: %w", opts.logLevel, err) } log.DefaultLogger.SetLevel(ll) } - if len(protocolIDs) == 0 { + if len(opts.protocolIDs) == 0 { return fmt.Errorf("at least one --protocol-id is required") } + + // Per-backend required-field validation. + switch opts.ledgerBackendType { + case string(ingest.LedgerBackendTypeRPC): + if opts.rpcURL == "" { + return fmt.Errorf("--rpc-url is required when --ledger-backend-type=rpc") + } + case string(ingest.LedgerBackendTypeDatastore): + if opts.datastoreConfigPath == "" { + return fmt.Errorf("--datastore-config-path is required when --ledger-backend-type=datastore") + } + default: + return fmt.Errorf("invalid --ledger-backend-type %q, must be 'rpc' or 'datastore'", opts.ledgerBackendType) + } return nil }, RunE: func(_ *cobra.Command, _ []string) error { - return c.RunHistory(databaseURL, rpcURL, networkPassphrase, protocolIDs, oldestLedgerCursorName) + return c.RunHistory(opts) }, } @@ -86,19 +142,19 @@ func (c *protocolMigrateCmd) historyCommand() *cobra.Command { log.Fatalf("Error initializing a config option: %s", err.Error()) } - cmd.Flags().StringSliceVar(&protocolIDs, "protocol-id", nil, "Protocol ID(s) to migrate (required, repeatable)") - cmd.Flags().StringVar(&logLevel, "log-level", "", `Log level: "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "PANIC"`) - cmd.Flags().StringVar(&oldestLedgerCursorName, "oldest-ledger-cursor-name", data.OldestLedgerCursorName, "Name of the oldest ledger cursor in the ingest store. Must match the value used by the ingest service.") + cmd.Flags().StringSliceVar(&opts.protocolIDs, "protocol-id", nil, "Protocol ID(s) to migrate (required, repeatable)") + cmd.Flags().StringVar(&opts.logLevel, "log-level", "", `Log level: "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "PANIC"`) + cmd.Flags().StringVar(&opts.oldestLedgerCursorName, "oldest-ledger-cursor-name", data.OldestLedgerCursorName, "Name of the oldest ledger cursor in the ingest store. Must match the value used by the ingest service.") return cmd } -func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase string, protocolIDs []string, oldestLedgerCursorName string) error { +func (c *protocolMigrateCmd) RunHistory(opts historyCmdOpts) error { ctx := context.Background() // Build processors from protocol IDs using the dynamic registry var processors []services.ProtocolProcessor - for _, pid := range protocolIDs { + for _, pid := range opts.protocolIDs { factory, ok := services.GetProcessor(pid) if !ok { return fmt.Errorf("unknown protocol ID %q — no processor registered", pid) @@ -111,7 +167,7 @@ func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase s } // Open DB connection - dbPool, err := db.OpenDBConnectionPool(databaseURL) + dbPool, err := db.OpenDBConnectionPool(opts.databaseURL) if err != nil { return fmt.Errorf("opening database connection: %w", err) } @@ -128,14 +184,24 @@ func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase s return fmt.Errorf("creating models: %w", err) } - // Create ledger backend for fetching historical ledgers - ledgerBackend := ledgerbackend.NewRPCLedgerBackend(ledgerbackend.RPCLedgerBackendOptions{ - RPCServerURL: rpcURL, - BufferSize: 10, + // Build a ledger backend using the same selector the ingest service uses, + // so protocol-migrate inherits the datastore path (recommended for + // backfills — unbounded history, unlike RPC retention windows). + ledgerBackend, err := ingest.NewLedgerBackend(ctx, ingest.Configs{ + LedgerBackendType: ingest.LedgerBackendType(opts.ledgerBackendType), + DatastoreConfigPath: opts.datastoreConfigPath, + NetworkPassphrase: opts.networkPassphrase, + RPCURL: opts.rpcURL, + GetLedgersLimit: opts.getLedgersLimit, }) + if err != nil { + return fmt.Errorf("creating ledger backend: %w", err) + } defer func() { - if closeErr := ledgerBackend.Close(); closeErr != nil { - log.Ctx(ctx).Errorf("error closing ledger backend: %v", closeErr) + if closer, ok := ledgerBackend.(io.Closer); ok { + if closeErr := closer.Close(); closeErr != nil { + log.Ctx(ctx).Errorf("error closing ledger backend: %v", closeErr) + } } }() @@ -145,15 +211,15 @@ func (c *protocolMigrateCmd) RunHistory(databaseURL, rpcURL, networkPassphrase s ProtocolsModel: models.Protocols, ProtocolContractsModel: models.ProtocolContracts, IngestStore: models.IngestStore, - NetworkPassphrase: networkPassphrase, + NetworkPassphrase: opts.networkPassphrase, Processors: processors, - OldestLedgerCursorName: oldestLedgerCursorName, + OldestLedgerCursorName: opts.oldestLedgerCursorName, }) if err != nil { return fmt.Errorf("creating protocol migrate history service: %w", err) } - if err := service.Run(ctx, protocolIDs); err != nil { + if err := service.Run(ctx, opts.protocolIDs); err != nil { return fmt.Errorf("running protocol migrate history: %w", err) } From 73c382106b0f3e335908f21b3a2937e4c4f74ca6 Mon Sep 17 00:00:00 2001 From: Aristides Staffieri Date: Mon, 13 Apr 2026 14:10:35 -0600 Subject: [PATCH 52/52] fix(graphql): reorder imports to satisfy goimports -local CI's goimports step was failing on four generated files because github.com/vektah/gqlparser/v2/gqlerror was grouped with the local wallet-backend imports instead of the third-party group. gqlgen regeneration preserves the corrected layout (verified by rerunning go generate), so this stays fixed unless someone manually re-mixes them. --- internal/serve/graphql/generated/generated.go | 5 +++-- internal/serve/graphql/resolvers/account.resolvers.go | 3 ++- internal/serve/graphql/resolvers/mutations.resolvers.go | 3 ++- internal/serve/graphql/resolvers/queries.resolvers.go | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/internal/serve/graphql/generated/generated.go b/internal/serve/graphql/generated/generated.go index 3c4ce2d0a..ccbc9cf07 100644 --- a/internal/serve/graphql/generated/generated.go +++ b/internal/serve/graphql/generated/generated.go @@ -14,10 +14,11 @@ import ( "github.com/99designs/gqlgen/graphql" "github.com/99designs/gqlgen/graphql/introspection" - "github.com/stellar/wallet-backend/internal/indexer/types" - "github.com/stellar/wallet-backend/internal/serve/graphql/scalars" gqlparser "github.com/vektah/gqlparser/v2" "github.com/vektah/gqlparser/v2/ast" + + "github.com/stellar/wallet-backend/internal/indexer/types" + "github.com/stellar/wallet-backend/internal/serve/graphql/scalars" ) // region ************************** generated!.gotpl ************************** diff --git a/internal/serve/graphql/resolvers/account.resolvers.go b/internal/serve/graphql/resolvers/account.resolvers.go index 7cdbc0100..63235b21a 100644 --- a/internal/serve/graphql/resolvers/account.resolvers.go +++ b/internal/serve/graphql/resolvers/account.resolvers.go @@ -10,10 +10,11 @@ import ( "strings" "time" + "github.com/vektah/gqlparser/v2/gqlerror" + "github.com/stellar/wallet-backend/internal/indexer/types" graphql1 "github.com/stellar/wallet-backend/internal/serve/graphql/generated" "github.com/stellar/wallet-backend/internal/utils" - "github.com/vektah/gqlparser/v2/gqlerror" ) // Address is the resolver for the address field. diff --git a/internal/serve/graphql/resolvers/mutations.resolvers.go b/internal/serve/graphql/resolvers/mutations.resolvers.go index 4b5d1086c..9b2a6390e 100644 --- a/internal/serve/graphql/resolvers/mutations.resolvers.go +++ b/internal/serve/graphql/resolvers/mutations.resolvers.go @@ -10,13 +10,14 @@ import ( "fmt" "github.com/stellar/go-stellar-sdk/txnbuild" + "github.com/vektah/gqlparser/v2/gqlerror" + "github.com/stellar/wallet-backend/internal/entities" graphql1 "github.com/stellar/wallet-backend/internal/serve/graphql/generated" "github.com/stellar/wallet-backend/internal/services" "github.com/stellar/wallet-backend/internal/signing" "github.com/stellar/wallet-backend/internal/signing/store" "github.com/stellar/wallet-backend/pkg/sorobanauth" - "github.com/vektah/gqlparser/v2/gqlerror" ) // BuildTransaction is the resolver for the buildTransaction field. diff --git a/internal/serve/graphql/resolvers/queries.resolvers.go b/internal/serve/graphql/resolvers/queries.resolvers.go index ced5dbec6..f5b9ed775 100644 --- a/internal/serve/graphql/resolvers/queries.resolvers.go +++ b/internal/serve/graphql/resolvers/queries.resolvers.go @@ -11,10 +11,11 @@ import ( "sync" "github.com/stellar/go-stellar-sdk/support/log" + "github.com/vektah/gqlparser/v2/gqlerror" + "github.com/stellar/wallet-backend/internal/indexer/types" graphql1 "github.com/stellar/wallet-backend/internal/serve/graphql/generated" "github.com/stellar/wallet-backend/internal/utils" - "github.com/vektah/gqlparser/v2/gqlerror" ) // TransactionByHash is the resolver for the transactionByHash field.