diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 731203f..3297242 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,11 +10,11 @@ This project uses Protocol Buffers for consensus and node communication (except - `make buf-lint` - Lint protobuf files to ensure they follow best practices - `make buf-format` - Format protobuf files (this is included in `make lint`) -- `make buf-breaking` - Check for breaking changes against the master branch +- `make buf-breaking` - Check for breaking changes against the main branch ### Before Committing Changes -If you modify any `.proto` files, always run `make buf-lint` and `make buf-breaking` to ensure your changes don't introduce linting issues or breaking changes. The `buf-breaking` command compares your changes against the master branch to detect any backwards-incompatible modifications. Breaking changes should be carefully reviewed and documented as they can impact existing deployments. +If you modify any `.proto` files, always run `make buf-lint` and `make buf-breaking` to ensure your changes don't introduce linting issues or breaking changes. The `buf-breaking` command compares your changes against the main branch to detect any backwards-incompatible modifications. Breaking changes should be carefully reviewed and documented as they can impact existing deployments. ### CI diff --git a/README.md b/README.md index 19e594c..84014da 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ git config fetch.recurseSubmodules on-demand ### Prerequisites +- [Rust](https://rustup.rs/) +- [Docker](https://docs.docker.com/get-started/get-docker/) - [Node.js](https://nodejs.org/) - [Foundry](https://getfoundry.sh/) - [Hardhat](https://hardhat.org/) @@ -178,7 +180,7 @@ For more details, see our [Contributing Guide](CONTRIBUTING.md). ## Resources - [Arc Network](https://www.arc.network/) - Official Arc Network website -- [Arc Documentation](https://www.arc.network/) - Official Arc developer documentation +- [Arc Documentation](https://docs.arc.network/) - Official Arc developer documentation - [Reth](https://github.com/paradigmxyz/reth) - The underlying execution layer framework - [Malachite](https://github.com/circlefin/malachite) - BFT consensus engine - [Local Documentation](docs/) - Implementation guides and references diff --git a/crates/evm/src/handler.rs b/crates/evm/src/handler.rs index 831e508..a667d2a 100644 --- a/crates/evm/src/handler.rs +++ b/crates/evm/src/handler.rs @@ -89,10 +89,9 @@ where let beneficiary = ctx.block().beneficiary(); let basefee = ctx.block().basefee() as u128; let effective_gas_price = ctx.tx().effective_gas_price(basefee); - let gas_used = exec_result.gas().used() as u128; + let gas_used = exec_result.gas().used(); - // Calculate total fee (base fee + priority fee) instead of just priority fee - let total_fee_amount = U256::from(effective_gas_price * gas_used); + let total_fee_amount = U256::from(effective_gas_price) * U256::from(gas_used); // Transfer the total fee to the beneficiary (both base fee and priority fee) evm.ctx_mut() @@ -505,6 +504,62 @@ mod tests { ); } + #[test] + fn test_reward_beneficiary_large_values_no_overflow() { + let beneficiary = address!("1100000000000000000000000000000000000011"); + let caller = address!("2200000000000000000000000000000000000022"); + // Values that would overflow u128 when multiplied: (u128::MAX / 1000) * 2000 > u128::MAX + let gas_price = u128::MAX / 1000; + let gas_used = 2000u64; + + let db: CacheDB> = CacheDB::new(EmptyDB::default()); + let mut evm = Context::mainnet().with_db(db).build_mainnet(); + + evm.block.beneficiary = beneficiary; + evm.block.basefee = 0; + evm.tx.caller = caller; + evm.tx.gas_price = gas_price; + + let interpreter_result = InterpreterResult::new( + InstructionResult::Return, + alloy_primitives::Bytes::new(), + Gas::new_spent(gas_used), + ); + let call_outcome = CallOutcome::new(interpreter_result, 0..0); + let mut exec_result = FrameResult::Call(call_outcome); + + let initial_balance = evm + .journaled_state + .load_account(beneficiary) + .unwrap() + .info + .balance; + + let handler: ArcEvmHandler<_, EVMError> = + ArcEvmHandler::new(ArcHardforkFlags::default()); + let result = handler.reward_beneficiary(&mut evm, &mut exec_result); + + assert!( + result.is_ok(), + "reward_beneficiary should succeed with large values" + ); + + let expected_fee = U256::from(gas_price) * U256::from(gas_used); + + let final_balance = evm + .journaled_state + .load_account(beneficiary) + .unwrap() + .info + .balance; + let balance_increase = final_balance - initial_balance; + + assert_eq!( + balance_increase, expected_fee, + "Beneficiary should receive correct fee even with large values that would overflow u128" + ); + } + #[derive(Debug)] struct BlocklistTestCase { name: &'static str, diff --git a/crates/precompiles/src/native_coin_authority.rs b/crates/precompiles/src/native_coin_authority.rs index 927e3c5..d470e1e 100644 --- a/crates/precompiles/src/native_coin_authority.rs +++ b/crates/precompiles/src/native_coin_authority.rs @@ -246,6 +246,11 @@ stateful!(run_native_coin_authority, precompile_input, hardfork_flags; { &mut gas_counter, )?; + // Reject minting to zero address (Zero5+) + if hardfork_flags.is_active(ArcHardfork::Zero5) && args.to == Address::ZERO { + return Err(PrecompileErrorOrRevert::new_reverted(gas_counter, ERR_ZERO_ADDRESS)); + } + // Check blocklist if is_blocklisted(&mut precompile_input.internals, args.to, &mut gas_counter, hardfork_flags)? { return Err(PrecompileErrorOrRevert::new_reverted(gas_counter, ERR_BLOCKED_ADDRESS)); @@ -351,6 +356,11 @@ stateful!(run_native_coin_authority, precompile_input, hardfork_flags; { &mut gas_counter, )?; + // Reject burning from zero address (Zero5+) + if hardfork_flags.is_active(ArcHardfork::Zero5) && args.from == Address::ZERO { + return Err(PrecompileErrorOrRevert::new_reverted(gas_counter, ERR_ZERO_ADDRESS)); + } + // Check blocklist if is_blocklisted(&mut precompile_input.internals, args.from, &mut gas_counter, hardfork_flags)? { return Err(PrecompileErrorOrRevert::new_reverted(gas_counter, ERR_BLOCKED_ADDRESS)); @@ -1034,6 +1044,29 @@ mod tests { bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, ..Default::default() }, + // No auth SLOAD, zero-address check precedes blocklist SLOADs + NativeCoinAuthorityTest { + name: "mint() to zero address reverts (Zero5+)", + caller: ALLOWED_CALLER_ADDRESS, + calldata: INativeCoinAuthority::mintCall { + to: Address::ZERO, + amount: U256::from(1), + } + .abi_encode() + .into(), + gas_limit: MINT_GAS_COST, + pre_zero5_gas_limit: None, + expected_revert_str: Some(ERR_ZERO_ADDRESS), + expected_result: InstructionResult::Revert, + return_data: None, + blocklisted_addresses: None, + gas_used: 0, + pre_zero5_gas_used: None, + target_address: NATIVE_COIN_AUTHORITY_ADDRESS, + bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, + eip7708_only: true, + ..Default::default() + }, // No auth SLOAD, reverts immediately NativeCoinAuthorityTest { name: "burn() with unauthorized caller reverts", @@ -1210,6 +1243,29 @@ mod tests { bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, ..Default::default() }, + // No auth SLOAD, zero-address check precedes blocklist SLOADs + NativeCoinAuthorityTest { + name: "burn() from zero address reverts (Zero5+)", + caller: ALLOWED_CALLER_ADDRESS, + calldata: INativeCoinAuthority::burnCall { + from: Address::ZERO, + amount: U256::from(1), + } + .abi_encode() + .into(), + gas_limit: BURN_GAS_COST, + pre_zero5_gas_limit: None, + expected_revert_str: Some(ERR_ZERO_ADDRESS), + expected_result: InstructionResult::Revert, + return_data: None, + blocklisted_addresses: None, + gas_used: 0, + pre_zero5_gas_used: None, + target_address: NATIVE_COIN_AUTHORITY_ADDRESS, + bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, + eip7708_only: true, + ..Default::default() + }, // No auth SLOAD, reverts immediately NativeCoinAuthorityTest { name: "transfer() with unauthorized caller reverts", @@ -1342,7 +1398,7 @@ mod tests { bytecode_address: ADDRESS_B, ..Default::default() }, - // Zero address checks (Zero5+) happen before blocklist SLOADs, so gas_used = 0 + // No auth SLOAD, zero-address check precedes blocklist SLOADs NativeCoinAuthorityTest { name: "transfer() to zero address reverts (Zero5+)", caller: ALLOWED_CALLER_ADDRESS, @@ -1360,7 +1416,7 @@ mod tests { return_data: None, blocklisted_addresses: None, gas_used: 0, - pre_zero5_gas_used: Some(PRECOMPILE_SLOAD_GAS_COST), + pre_zero5_gas_used: None, target_address: NATIVE_COIN_AUTHORITY_ADDRESS, bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, eip7708_only: true, @@ -1383,7 +1439,7 @@ mod tests { return_data: None, blocklisted_addresses: None, gas_used: 0, - pre_zero5_gas_used: Some(PRECOMPILE_SLOAD_GAS_COST), + pre_zero5_gas_used: None, target_address: NATIVE_COIN_AUTHORITY_ADDRESS, bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, eip7708_only: true, @@ -1406,7 +1462,7 @@ mod tests { return_data: None, blocklisted_addresses: None, gas_used: 0, - pre_zero5_gas_used: Some(PRECOMPILE_SLOAD_GAS_COST), + pre_zero5_gas_used: None, target_address: NATIVE_COIN_AUTHORITY_ADDRESS, bytecode_address: NATIVE_COIN_AUTHORITY_ADDRESS, eip7708_only: true, diff --git a/crates/quake/README.md b/crates/quake/README.md index 4e86d1e..58946a7 100644 --- a/crates/quake/README.md +++ b/crates/quake/README.md @@ -742,6 +742,34 @@ The workflow runs **10 parallel jobs** (matrix indices 0–9). Each job computes PRs labeled `test-random` will also trigger this workflow. +### The `clean` command + +By default, `clean` removes all node data and configuration. The following flags control what is removed: + +| Flag | Short | Description | +|------|-------|-------------| +| `--all` | `-a` | Remove everything, including monitoring services and their data. Cannot be combined with other flags. | +| `--monitoring` | `-m` | Stop monitoring services and remove their data only. | +| `--data` | `-d` | Remove only execution and consensus layer data, preserving configuration. Cannot be combined with `--execution-data` or `--consensus-data`. | +| `--execution-data` | `-x` | Remove only execution layer (Reth) data. Cannot be combined with `--data` or `--consensus-data`. | +| `--consensus-data` | `-c` | Remove only consensus layer (Malachite) data. Cannot be combined with `--data` or `--execution-data`. | + +```bash +# Remove node data only (keep config, monitoring intact) +./quake clean --data + +# Remove only execution layer data +./quake clean --execution-data + +# Remove only consensus layer data +./quake clean --consensus-data + +# Remove node data and monitoring +./quake clean --data --monitoring + +# Remove everything including monitoring +./quake clean --all +``` ## Manifest File Format diff --git a/crates/quake/src/clean.rs b/crates/quake/src/clean.rs new file mode 100644 index 0000000..04e3a80 --- /dev/null +++ b/crates/quake/src/clean.rs @@ -0,0 +1,233 @@ +// Copyright 2026 Circle Internet Group, Inc. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::testnet::Testnet; +use std::fs; +use tracing::{debug, info, warn}; + +pub const RETH_DATA_SUBDIRS: [&str; 4] = ["db", "static_files", "blobstore", "invalid_block_hooks"]; +pub const MALACHITE_DATA_SUBDIRS: [&str; 2] = ["store.db", "wal"]; + +use crate::infra::InfraType; + +/// Controls which node data [`crate::testnet::Testnet::clean`] removes. +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum CleanScope { + /// Don't remove any node data. + Skip, + /// Remove both data and configuration from consensus and execution layer. + /// On remote infrastructure, also destroys AWS resources. + Full, + /// Remove only execution layer data, preserving configuration. + ExecutionData, + /// Remove only consensus layer data, preserving configuration. + ConsensusData, + /// Remove both consensus and execution layer data, preserving configuration. + Data, +} + +/// Derive a [`CleanScope`] from individual boolean flags. +pub fn clean_scope( + data: bool, + execution_data: bool, + consensus_data: bool, + monitoring: bool, +) -> CleanScope { + match (data || execution_data, data || consensus_data) { + (true, true) => CleanScope::Data, + (true, false) => CleanScope::ExecutionData, + (false, true) => CleanScope::ConsensusData, + // No data flags: clean only monitoring without touching node data. + (false, false) if monitoring => CleanScope::Skip, + (false, false) => CleanScope::Full, + } +} + +/// Clean up testnet-related files, directories, infrastructure, and running processes. +/// +/// `mode` controls which node data is removed. See [`CleanScope`] for the different strategies. +/// `include_monitoring` is orthogonal — any mode can be combined with monitoring cleanup. +pub async fn clean(testnet: &Testnet, mode: CleanScope, include_monitoring: bool) { + // Stop containers first + if let Err(err) = testnet.infra.down(&[]) { + warn!(%err, "⚠️ Failed to stop and remove containers"); + } else { + debug!("✅ Testnet is down"); + } + if include_monitoring { + match testnet.infra_data.infra_type { + InfraType::Local => { + if let Ok(local_infra) = testnet.local_infra() { + match local_infra.monitoring.stop() { + Ok(()) => debug!("✅ Monitoring containers stopped"), + Err(err) => warn!("⚠️ Failed to stop monitoring containers: {err:#}"), + } + match local_infra.monitoring.clean() { + Ok(()) => { + debug!(dir=%local_infra.monitoring.dir.display(), "✅ Monitoring data removed") + } + Err(err) => warn!("⚠️ Failed to remove monitoring data: {err:#}"), + } + } + } + InfraType::Remote => { + if let Ok(remote_infra) = testnet.remote_infra() { + match remote_infra.stop_monitoring() { + Ok(output) => info!(%output, "✅ Monitoring stopped on CC"), + Err(err) => warn!("⚠️ Failed to stop monitoring on CC: {err:#}"), + } + match remote_infra.clean_monitoring_data() { + Ok(output) => info!(%output, "✅ Monitoring data removed on CC"), + Err(err) => warn!("⚠️ Failed to remove monitoring data on CC: {err:#}"), + } + } + } + } + } + + match mode { + // Nothing to do. + CleanScope::Skip => {} + CleanScope::Full => { + if matches!(testnet.infra_data.infra_type, InfraType::Remote) { + if let Ok(remote_infra) = testnet.remote_infra() { + if let Err(err) = remote_infra.ssm_tunnels.stop().await { + warn!(%err, "⚠️ Failed to terminate SSM sessions"); + } + if remote_infra.terraform.has_state() { + debug!("⬇️ Destroying remote infrastructure..."); + if let Err(err) = remote_infra.terraform.destroy(true) { + warn!(%err, "⚠️ Failed to destroy remote infrastructure"); + } else { + info!("✅ Remote infrastructure destroyed"); + } + } else { + info!("No Terraform state found; skipping infrastructure destroy"); + } + } else { + warn!("⚠️ No configuration for remote infrastructure found"); + } + } + if testnet.dir.exists() { + debug!(dir=%testnet.dir.display(), "🗑️ Removing testnet data"); + if let Err(err) = fs::remove_dir_all(&testnet.dir) { + warn!(dir=%testnet.dir.display(), "Failed to remove testnet data: {err}"); + } else { + debug!(dir=%testnet.dir.display(), "✅ Testnet data removed"); + } + } + } + CleanScope::ExecutionData | CleanScope::ConsensusData | CleanScope::Data => { + match testnet.infra_data.infra_type { + InfraType::Local => clean_node_data(testnet, &mode), + InfraType::Remote => clean_remote_node_data(testnet, &mode), + } + } + } +} + +/// Remove per-node data subdirectories according to `mode`, leaving all configuration intact. +fn clean_node_data(testnet: &Testnet, mode: &CleanScope) { + let Ok(local_infra) = testnet.local_infra() else { + warn!("⚠️ Cannot access local infrastructure to clean node data"); + return; + }; + for name in testnet.nodes_metadata.node_names() { + match mode { + CleanScope::ExecutionData => { + local_infra.clean_reth_data(&name); + } + CleanScope::ConsensusData => { + local_infra.clean_malachite_data(&name); + } + CleanScope::Data => { + local_infra.clean_reth_data(&name); + local_infra.clean_malachite_data(&name); + } + _ => unreachable!("clean_node_data called with unexpected mode"), + } + } +} + +/// Remove per-node data on remote nodes according to `mode`, leaving all configuration intact. +fn clean_remote_node_data(testnet: &Testnet, mode: &CleanScope) { + let remote_infra = match testnet.remote_infra() { + Ok(r) => r, + Err(err) => { + warn!(%err, "⚠️ Cannot access remote infrastructure to clean node data"); + return; + } + }; + match mode { + CleanScope::ExecutionData => { + remote_infra.clean_reth_data(); + } + CleanScope::ConsensusData => { + remote_infra.clean_malachite_data(); + } + CleanScope::Data => { + remote_infra.clean_reth_data(); + remote_infra.clean_malachite_data(); + } + _ => unreachable!("clean_remote_node_data called with unexpected mode"), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_clean_scope_no_flags() { + assert_eq!(clean_scope(false, false, false, false), CleanScope::Full); + } + + #[test] + fn test_clean_scope_data() { + assert_eq!(clean_scope(true, false, false, false), CleanScope::Data); + } + + #[test] + fn test_clean_scope_execution_data() { + assert_eq!( + clean_scope(false, true, false, false), + CleanScope::ExecutionData + ); + } + + #[test] + fn test_clean_scope_consensus_data() { + assert_eq!( + clean_scope(false, false, true, false), + CleanScope::ConsensusData + ); + } + + #[test] + fn test_clean_scope_monitoring_only() { + assert_eq!(clean_scope(false, false, false, true), CleanScope::Skip); + } + + #[test] + fn test_clean_scope_data_and_monitoring() { + assert_eq!(clean_scope(true, false, false, true), CleanScope::Data); + } + + #[test] + fn test_clean_scope_execution_and_consensus_data() { + assert_eq!(clean_scope(false, true, true, false), CleanScope::Data); + } +} diff --git a/crates/quake/src/infra/local.rs b/crates/quake/src/infra/local.rs index 05b83a5..03aa79d 100644 --- a/crates/quake/src/infra/local.rs +++ b/crates/quake/src/infra/local.rs @@ -17,10 +17,13 @@ use color_eyre::eyre::{eyre, Result}; use std::fs; use std::path::{Path, PathBuf}; +use tracing::{debug, warn}; +use crate::clean; use crate::infra::{docker, BuildProfile, InfraProvider, COMPOSE_PROJECT_NAME}; use crate::node::{Container, ContainerName, NodeName, SubnetName}; use crate::nodes::NodeOrContainerName; +use crate::shell; pub(crate) const COMPOSE_FILENAME: &str = "compose.yaml"; pub(crate) const COMPOSE_BUILD_FILENAME: &str = "arc_builders.yaml"; @@ -71,6 +74,7 @@ macro_rules! args { /// Local infrastructure provider, with nodes and other services deployed locally as Docker containers. pub(crate) struct LocalInfra { root_dir: PathBuf, + testnet_dir: PathBuf, compose_path: PathBuf, compose_build_path: PathBuf, pub monitoring: MonitoringManager, @@ -82,12 +86,47 @@ impl LocalInfra { let compose_build_path = testnet_dir.join(COMPOSE_BUILD_FILENAME); Ok(Self { root_dir: root_dir.to_path_buf(), + testnet_dir: testnet_dir.to_path_buf(), compose_path, compose_build_path, monitoring, }) } + /// Clean Reth data for a node, preserving nodekey and jwt.hex. + pub fn clean_reth_data(&self, name: &str) { + let reth_dir = self.testnet_dir.join(name).join("reth"); + let paths: Vec = clean::RETH_DATA_SUBDIRS + .iter() + .map(|s| reth_dir.join(s).to_string_lossy().into_owned()) + .collect(); + let args: Vec<&str> = std::iter::once("-rf") + .chain(paths.iter().map(|s| s.as_str())) + .collect(); + if let Err(err) = shell::exec("rm", args, &self.root_dir, None, false) { + warn!(%err, "⚠️ Failed to remove Reth data for {name}"); + } else { + debug!("✅ Reth data removed for {name}"); + } + } + + /// Clean Malachite data for a node, preserving config/. + pub fn clean_malachite_data(&self, name: &str) { + let malachite_dir = self.testnet_dir.join(name).join("malachite"); + let paths: Vec = clean::MALACHITE_DATA_SUBDIRS + .iter() + .map(|s| malachite_dir.join(s).to_string_lossy().into_owned()) + .collect(); + let args: Vec<&str> = std::iter::once("-rf") + .chain(paths.iter().map(|s| s.as_str())) + .collect(); + if let Err(err) = shell::exec("rm", args, &self.root_dir, None, false) { + warn!(%err, "⚠️ Failed to remove Malachite data for {name}"); + } else { + debug!("✅ Malachite data removed for {name}"); + } + } + fn docker_exec(&self, args: Vec<&str>) -> Result<()> { docker::exec(&self.root_dir, args) } @@ -123,7 +162,8 @@ impl InfraProvider for LocalInfra { } fn is_setup(&self, _nodes: &[NodeName]) -> Result<()> { - docker::compose_file_exists(&self.compose_path) + docker::compose_file_exists(&self.compose_path)?; + docker::compose_file_exists(&self.monitoring.compose_path) } fn start(&self, names: &[NodeOrContainerName]) -> Result<()> { diff --git a/crates/quake/src/infra/remote.rs b/crates/quake/src/infra/remote.rs index 4f6de0f..81b1307 100644 --- a/crates/quake/src/infra/remote.rs +++ b/crates/quake/src/infra/remote.rs @@ -20,6 +20,7 @@ use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use tracing::{debug, info, warn}; +use crate::clean::{MALACHITE_DATA_SUBDIRS, RETH_DATA_SUBDIRS}; use crate::infra::export::SSH_KEY_FILENAME; use crate::infra::terraform::Terraform; use crate::infra::{ssm, BuildProfile, InfraData, InfraProvider}; @@ -220,6 +221,22 @@ impl RemoteInfra { .wrap_err_with(|| format!("Failed to run '{cmd}' on {nodes:?}")) } + /// Run the same command on the given nodes in parallel by calling pssh.sh in CC and return its stdout. + fn pssh_single_cmd_with_output(&self, nodes: &[&NodeName], cmd: &str) -> Result { + if nodes.is_empty() { + return Ok("".to_string()); + } + + let node_ips = nodes + .iter() + .map(|node| self.node_private_ip(node).map(String::as_str)) + .collect::>>()?; + + let pssh_cmd = format!("./pssh.sh '{cmd}' {}", node_ips.join(" ")); + self.ssh_cc_with_output(&pssh_cmd) + .wrap_err_with(|| format!("Failed to run '{cmd}' on {nodes:?}")) + } + /// Given a list of container names, return a list of pairs (node name, /// list of remote container names). A remote container name is either /// `cl` or `el`. @@ -455,6 +472,62 @@ impl RemoteInfra { info!("✅ Provisioning for remote infrastructure completed"); Ok(()) } + + /// Clean Reth data on all remote nodes. + pub fn clean_reth_data(&self) { + let paths = RETH_DATA_SUBDIRS + .map(|s| format!("~/data/reth/{s}")) + .join(" "); + let cmd = format!("sudo rm -rf {paths}"); + info!("Removing Reth data on remote nodes..."); + match self.pssh_single_cmd_with_output(&self.infra_data.node_names(), cmd.as_str()) { + Ok(output) => { + info!(%output, "✅ Reth data removed on remote nodes."); + } + Err(err) => { + warn!("⚠️ Failed to remove Reth data on remote nodes: {err:#}"); + } + } + } + + /// Clean Malachite data on all remote nodes. + pub fn clean_malachite_data(&self) { + let paths = MALACHITE_DATA_SUBDIRS + .map(|s| format!("~/data/malachite/{s}")) + .join(" "); + let cmd = format!("sudo rm -rf {paths}"); + info!("Removing Malachite data on remote nodes..."); + match self.pssh_single_cmd_with_output(&self.infra_data.node_names(), cmd.as_str()) { + Ok(output) => { + info!(%output, "✅ Malachite data removed on remote nodes."); + } + Err(err) => { + warn!("⚠️ Failed to remove Malachite data on remote nodes: {err:#}"); + } + } + } + + /// Start monitoring services on the CC server. + pub fn start_monitoring(&self) -> Result { + self.ssh_cc_with_output("docker compose -f ~/monitoring/compose.yaml up -d") + .wrap_err("Failed to start monitoring services on CC") + } + + /// Stop monitoring services on the CC server. + pub fn stop_monitoring(&self) -> Result { + self.ssh_cc_with_output( + "docker compose -f ~/monitoring/compose.yaml down --volumes --timeout 5", + ) + .wrap_err("Failed to stop monitoring services on CC") + } + + /// Remove monitoring data directories on the CC server. + pub fn clean_monitoring_data(&self) -> Result { + self.ssh_cc_with_output( + "sudo rm -rf ~/monitoring/data-prometheus ~/monitoring/data-grafana ~/monitoring/blockscout/db ~/monitoring/blockscout/logs ~/monitoring/blockscout/dets", + ) + .wrap_err("Failed to remove monitoring data on CC") + } } impl InfraProvider for RemoteInfra { diff --git a/crates/quake/src/main.rs b/crates/quake/src/main.rs index f5adaf8..f6f9696 100644 --- a/crates/quake/src/main.rs +++ b/crates/quake/src/main.rs @@ -26,6 +26,7 @@ use std::time::Duration; use tracing::{debug, info, warn}; use tracing_subscriber::EnvFilter; +use clean::{clean_scope, CleanScope}; use perturb::Perturbation; use testnet::{Testnet, TestnetError}; @@ -36,6 +37,7 @@ use crate::perturb::{PERTURB_MAX_TIME_OFF, PERTURB_MIN_TIME_OFF}; use crate::valset::ValidatorPowerUpdate; mod build; +mod clean; mod genesis; mod info; mod infra; @@ -346,9 +348,35 @@ pub(crate) struct InfraArgs { #[derive(Args)] struct CleanArgs { - /// Also stop monitoring services and remove their data + /// Remove all data, including the testnet directory and monitoring services #[clap(short = 'a', long, default_value = "false")] + #[clap(conflicts_with_all = ["data", "execution_data", "consensus_data", "monitoring"])] all: bool, + + /// Stop monitoring services and remove their data + #[clap(short = 'm', long, default_value = "false")] + monitoring: bool, + /// Remove only execution and consensus layer data, preserving configuration + #[clap(short = 'd', long, default_value = "false")] + #[clap(conflicts_with_all = ["execution_data", "consensus_data"])] + data: bool, + /// Remove only execution layer data, preserving configuration + #[clap(short = 'x', long, default_value = "false")] + execution_data: bool, + /// Remove only consensus layer data, preserving configuration + #[clap(short = 'c', long, default_value = "false")] + consensus_data: bool, +} + +impl CleanArgs { + fn scope(&self) -> CleanScope { + clean_scope( + self.data, + self.execution_data, + self.consensus_data, + self.monitoring, + ) + } } #[derive(Debug, Subcommand, PartialEq)] @@ -658,12 +686,18 @@ async fn main() -> Result<()> { Commands::Stop { nodes_or_containers, } => testnet.stop(nodes_or_containers).await?, - Commands::Clean { clean_args } => testnet.clean(clean_args.all).await?, + Commands::Clean { clean_args } => { + testnet + .clean(clean_args.scope(), clean_args.all || clean_args.monitoring) + .await? + } Commands::Restart { clean_args, start_args, } => { - testnet.clean(clean_args.all).await?; + testnet + .clean(clean_args.scope(), clean_args.all || clean_args.monitoring) + .await?; pre_start( &mut testnet, &start_args, diff --git a/crates/quake/src/mcp.rs b/crates/quake/src/mcp.rs index cfa654f..c03ab5c 100644 --- a/crates/quake/src/mcp.rs +++ b/crates/quake/src/mcp.rs @@ -34,6 +34,7 @@ use serde::Deserialize; use tokio::sync::RwLock; use tracing::info; +use crate::clean::{clean_scope, CleanScope}; use crate::infra::remote; use crate::perturb::Perturbation; use crate::rpc; @@ -448,8 +449,12 @@ impl QuakeMcpServer { /// Cleans up testnet data and infrastructure. /// - /// Both modes remove testnet data (databases and generated files). If `all` is true, - /// monitoring services are also stopped and their data is removed. + /// By default (no flags), removes all node data and configuration. Partial flags: + /// - `data`: remove both execution and consensus layer data, preserving configuration. + /// - `execution_data`: remove only Reth (execution layer) data. + /// - `consensus_data`: remove only Malachite (consensus layer) data. + /// - `monitoring`: stop monitoring services and remove their data (combinable with data flags). + /// - `all`: remove everything including monitoring; cannot be combined with other flags. #[tool( name = "clean_testnet", annotations(read_only_hint = false, open_world_hint = false) @@ -459,12 +464,28 @@ impl QuakeMcpServer { params: Parameters, ) -> Result { self.ensure_ssm_tunnels().await?; - let all = params.0.all.unwrap_or(false); + let p = ¶ms.0; + let all = p.all.unwrap_or(false); + let monitoring = p.monitoring.unwrap_or(false); + let mode = if all { + CleanScope::Full + } else { + clean_scope( + p.data.unwrap_or(false), + p.execution_data.unwrap_or(false), + p.consensus_data.unwrap_or(false), + monitoring, + ) + }; + let scope = if matches!(mode, CleanScope::Full) { + "full" + } else { + "partial" + }; let testnet = self.testnet.read().await; - testnet.clean(all).await.map_err(|e| { + testnet.clean(mode, all || monitoring).await.map_err(|e| { rmcp::ErrorData::internal_error(format!("Failed to clean testnet: {e}"), None) })?; - let scope = if all { "full" } else { "partial" }; Ok(CallToolResult::success(vec![Content::text(format!( "Testnet cleaned ({scope})" ))])) @@ -952,9 +973,18 @@ struct NodeNamesParams { /// Parameters for the clean_testnet tool. #[derive(Debug, Deserialize, JsonSchema)] struct CleanParams { - /// If true, also stop monitoring services and remove monitoring data. - /// Testnet data (including generated files) is removed in both modes. + /// If true, remove all data, including the testnet directory and monitoring services. all: Option, + + /// If true, also stop monitoring services and remove monitoring data. + monitoring: Option, + /// If true, remove both Reth and Malachite data, preserving testnet configuration. + /// The testnet can be restarted immediately without re-running setup. + data: Option, + /// If true, remove only Reth (execution layer) data. + execution_data: Option, + /// If true, remove only Malachite (consensus layer) data. + consensus_data: Option, } /// Parameters for timed perturbation tools (disconnect, kill, pause). diff --git a/crates/quake/src/testnet.rs b/crates/quake/src/testnet.rs index a9a4899..7d542c1 100644 --- a/crates/quake/src/testnet.rs +++ b/crates/quake/src/testnet.rs @@ -44,6 +44,8 @@ use crate::{InfoSubcommand, RemoteSubcommand, SSMSubcommand}; pub(crate) const QUAKE_DIR: &str = ".quake"; pub(crate) const LAST_MANIFEST_FILENAME: &str = ".last_manifest"; +pub use crate::clean::{clean, CleanScope}; + /// Stores the nodes upgraded using the 'quake upgrade' command on the running /// testnet, one per line. e.g.,: /// @@ -552,6 +554,13 @@ impl Testnet { self.start_from_manifest().await?; } + if let Ok(remote_infra) = self.remote_infra() { + match remote_infra.start_monitoring() { + Ok(output) => info!(%output, "✅ Monitoring started on CC"), + Err(err) => warn!("⚠️ Failed to start monitoring on CC: {err:#}"), + } + } + info!(dir=%self.dir.display(), "✅ Testnet started"); println!("📁 Testnet files: {}", self.dir.display()); self.print_monitoring_info(); @@ -971,60 +980,17 @@ impl Testnet { Ok(()) } - /// Clean up testnet-related files, directories, infrastructure, and running processes - pub async fn clean(&self, all: bool) -> Result<()> { - // Take down the testnet infrastructure - match self.infra_data.infra_type { - InfraType::Local => { - if let Err(err) = self.infra.down(&[]) { - warn!(%err, "⚠️ Failed to stop and remove containers"); - } else { - debug!("✅ Testnet is down"); - } - } - InfraType::Remote => { - let remote_infra = self.remote_infra()?; - if let Err(err) = remote_infra.ssm_tunnels.stop().await { - warn!(%err, "⚠️ Failed to terminate SSM sessions"); - } - - if remote_infra.terraform.has_state() { - debug!("⬇️ Destroying remote infrastructure..."); - if let Err(err) = remote_infra.terraform.destroy(true) { - warn!(%err, "⚠️ Failed to destroy remote infrastructure"); - } else { - info!("✅ Remote infrastructure destroyed"); - } - } else { - info!("No Terraform state found; skipping infrastructure destroy"); - } - } - } - - // Remove testnet local data - if self.dir.exists() { - debug!(dir=%self.dir.display(), "🗑️ Removing testnet data"); - if let Err(err) = fs::remove_dir_all(&self.dir) { - warn!(dir=%self.dir.display(), "Failed to remove testnet data: {err}"); - } else { - debug!(dir=%self.dir.display(), "✅ Testnet data removed"); - } - } - - // Take down local monitoring services and remove their data, if requested - if let Ok(local_infra) = self.local_infra() { - if all { - if local_infra.monitoring.stop().is_ok() { - debug!("✅ Monitoring services stopped"); - } - if local_infra.monitoring.clean().is_ok() { - debug!(dir=%local_infra.monitoring.dir.display(), "✅ Monitoring data removed"); - } - } else { - warn!( - "Monitoring services are still running; run `quake clean --all` to stop and clean them" + /// Clean up testnet-related files, directories, infrastructure, and running processes. + /// + /// `mode` controls which node data is removed. See [`CleanScope`] for the different strategies. + /// `include_monitoring` is orthogonal — any mode can be combined with monitoring cleanup. + pub async fn clean(&self, mode: CleanScope, include_monitoring: bool) -> Result<()> { + clean(self, mode, include_monitoring).await; + + if matches!(mode, CleanScope::Full) && !include_monitoring { + warn!( + "Monitoring services are still running; run `quake clean --monitoring` to stop and clean them" ); - } } let _ = fs::remove_file(self.quake_dir.join(UPGRADED_CONTAINERS_FILENAME)); @@ -1225,7 +1191,7 @@ impl Testnet { } } - fn local_infra(&self) -> Result> { + pub(crate) fn local_infra(&self) -> Result> { if self.is_local() { Ok(Arc::downcast::(self.infra.clone()).unwrap()) } else { diff --git a/docs/running-an-arc-node.md b/docs/running-an-arc-node.md index ebf76ff..cb8b7c0 100644 --- a/docs/running-an-arc-node.md +++ b/docs/running-an-arc-node.md @@ -114,6 +114,7 @@ The Execution Layer (EL) is deployed by the `arc-node-execution` binary and star arc-node-execution node \ --chain arc-testnet \ --datadir $ARC_EXECUTION \ + --full \ --ipcpath $ARC_RUN/reth.ipc \ --auth-ipc --auth-ipc.path $ARC_RUN/auth.ipc \ --http --http.addr 127.0.0.1 --http.port 8545 \ @@ -124,6 +125,12 @@ arc-node-execution node \ --enable-arc-rpc ``` +> **Note on `--full` and snapshots:** The `--full` flag is required on the +> first start when bootstrapping from a pruned snapshot. It reconciles internal +> database tables that would otherwise fail a consistency check. After the +> initial startup completes, you may restart without `--full` if you prefer to +> run without pruning. + The `--chain` parameter configures the genesis file. By using `--chain arc-testnet`, the genesis configuration bundled in the binary is adopted. Replace with `--chain /path/to/genesis.json` if you have a custom genesis file. @@ -143,6 +150,7 @@ After starting the [execution layer](#start-execution-layer), in a different ter ```sh arc-node-consensus start \ --home $ARC_CONSENSUS \ + --full \ --eth-socket $ARC_RUN/reth.ipc \ --execution-socket $ARC_RUN/auth.ipc \ --rpc.addr 127.0.0.1:31000 \ @@ -307,6 +315,7 @@ WorkingDirectory=$HOME/.arc ExecStart=/usr/local/bin/arc-node-execution node \ --chain arc-testnet \ --datadir $HOME/.arc/execution \ + --full \ --disable-discovery \ --ipcpath /run/arc/reth.ipc \ --auth-ipc \ @@ -350,6 +359,7 @@ Environment=RUST_LOG=info WorkingDirectory=$HOME/.arc ExecStart=/usr/local/bin/arc-node-consensus start \ --home $HOME/.arc/consensus \ + --full \ --eth-socket /run/arc/reth.ipc \ --execution-socket /run/arc/auth.ipc \ --rpc.addr 127.0.0.1:31000 \ @@ -410,4 +420,13 @@ For production monitoring, scrape the Prometheus metrics endpoints with Grafana: ### Pruning -The `--full` flag is accepted by both the CL and EL and will enable pruning. However, EL pruning is currently considered unstable and is not recommended at this time. +The `--full` flag is accepted by both the CL and EL and will enable pruning. +When bootstrapping from a pruned snapshot, `--full` is **required** on the +first EL start to reconcile the database (see the note in +[Start execution layer](#start-execution-layer)). After that initial run you +can restart without `--full`. + +> **Caution:** EL pruning increases memory usage and may cause out-of-memory +> issues on constrained machines. If you encounter memory pressure, enable +> backpressure (see [System Requirements](#system-requirements) section) and remove +> `--full` after the first successful start. diff --git a/scripts/run-upgrade-test.sh b/scripts/run-upgrade-test.sh index 4945bdc..5c5fae5 100755 --- a/scripts/run-upgrade-test.sh +++ b/scripts/run-upgrade-test.sh @@ -30,7 +30,7 @@ WORKFLOW="nightly-upgrade.yml" FROM_VERSION="" TO_VERSION="" HARDFORK="" -BRANCH="master" +BRANCH="main" REMOTE="upstream" DRY_RUN=false @@ -49,7 +49,7 @@ Options: --from Starting version for EL and CL images --to Upgrade target version for EL and CL images --hardfork Set el_init_hardfork (e.g. zero4, zero5) - --branch Branch to run the workflow on (default: master) + --branch Branch to run the workflow on (default: main) --remote Git remote to resolve the repository from (default: upstream) --dry-run Print the command without executing it --help Show this help message