From f9bd16a07791ecdfccd6f90de168eef1af34cd39 Mon Sep 17 00:00:00 2001 From: Julien Nioche Date: Tue, 17 Mar 2026 11:12:13 +0000 Subject: [PATCH] New NEtworking module, fixes #135 Signed-off-by: Julien Nioche --- docs/src/howto/config_modules.md | 2 +- docs/src/modules.md | 21 +++ .../spruce/modules/Networking.java | 103 +++++++++++++ .../spruce/modules/ccf/Networking.java | 2 + src/main/resources/default-config.json | 8 +- .../spruce/modules/NetworkingTest.java | 138 ++++++++++++++++++ 6 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 src/main/java/com/digitalpebble/spruce/modules/Networking.java create mode 100644 src/test/java/com/digitalpebble/spruce/modules/NetworkingTest.java diff --git a/docs/src/howto/config_modules.md b/docs/src/howto/config_modules.md index 3530607..1e005b8 100644 --- a/docs/src/howto/config_modules.md +++ b/docs/src/howto/config_modules.md @@ -6,7 +6,7 @@ The enrichment modules are configured in a file called `default-config.json`. Th {{#include ../../../src/main/resources/default-config.json}} ``` -This determines which modules are used and in what order but also configures their behaviour. For instance, the default coefficient set for the ccf.Networking module is _0.001_ kWh/Gb. +This determines which modules are used and in what order but also configures their behaviour. For instance, the Networking module uses different coefficients for intra-region, inter-region, and external data transfers, all configurable via the `network_coefficients_kwh_gb` map. ## Change the configuration diff --git a/docs/src/modules.md b/docs/src/modules.md index 3e72245..28d6fb5 100644 --- a/docs/src/modules.md +++ b/docs/src/modules.md @@ -29,6 +29,8 @@ See [methodology](https://www.cloudcarbonfootprint.org/docs/methodology#storage) Provides an estimate of energy used for networking in and out of data centres. Applies a flat coefficient of _0.001 kWh/Gb_ by default, see [methodology](https://www.cloudcarbonfootprint.org/docs/methodology#networking) for more details. The coefficient can be changed via configuration as shown in [Configure the modules](howto/config_modules.md). +**Note**: this module has been replaced by [Networking](#networking) in the default configuration, which distinguishes between transfer types. + **Output column**: `operational_energy_kwh`. ### ccf.Accelerators @@ -94,6 +96,25 @@ The World Resource Institute's Aqueduct tool is licensed through Creative Common **Output columns**: `water_cooling_l`, `water_electricity_production_l`, and `water_consumption_stress_area_l`. +## Networking + +Provides an estimate of energy used for networking in and out of data centres. Unlike `ccf.Networking` which applies a single flat coefficient, this module distinguishes between three transfer types with separate coefficients (in kWh/Gb): + +| Transfer type | Key | Default | Description | +|---|---|---|---| +| Intra-region | `intra` | 0.001 | Traffic within the same region | +| Inter-region | `inter` | 0.0015 | Traffic between AWS regions | +| External | `extra` | 0.059 | Traffic to/from the internet (AWS Inbound / Outbound) | + +The coefficients are taken from the Boavizta Cloud Emissions Working Group and can be overridden via the `network_coefficients_kwh_gb` configuration map. + +The relevance and usefulness of attributing emissions for networking based on usage is +subject for debate as the energy use of networking is pretty constant independently of +traffic. The consequences of reducing networking are probably negligible but since the +approach in SPRUCE is attributional, we do the same for networking in order to be consistent. + +**Output column**: `operational_energy_kwh`. + ## Serverless Provides an estimate of energy for the memory and vCPU usage of serverless services like Fargate or EMR. diff --git a/src/main/java/com/digitalpebble/spruce/modules/Networking.java b/src/main/java/com/digitalpebble/spruce/modules/Networking.java new file mode 100644 index 0000000..42d371b --- /dev/null +++ b/src/main/java/com/digitalpebble/spruce/modules/Networking.java @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: Apache-2.0 + +package com.digitalpebble.spruce.modules; + +import com.digitalpebble.spruce.Column; +import com.digitalpebble.spruce.EnrichmentModule; +import org.apache.spark.sql.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +import static com.digitalpebble.spruce.CURColumn.*; +import static com.digitalpebble.spruce.SpruceColumn.ENERGY_USED; + +/** + * Provides an estimate of energy used for networking in and out of data centres. + * Distinguishes between: + * - Intra-region: 0.001 + * - Inter-region: 0.0015 + * - External: 0.059 + * The coefficients are taken from the Boavizta Cloud Emissions Working Group. + * + * The relevance and usefulness of attributing emissions for networking based on usage is + * subject for debate as the energy use of networking is pretty constant independently of + * traffic. The consequences of reducing networking are probably negligible but since the + * approach in SPRUCE is attributional, we do the same for networking in order to be consistent. + **/ +public class Networking implements EnrichmentModule { + + private static final Logger LOG = LoggerFactory.getLogger(Networking.class); + + // estimated kWh/Gb + double network_coefficient_intra = 0.001; + double network_coefficient_inter = 0.0015; + double network_coefficient_extra = 0.059; + + @Override + public void init(Map params) { + Map network_coefficients = (Map) params.get("network_coefficients_kwh_gb"); + if (network_coefficients != null) { + Number intra = (Number) network_coefficients.get("intra"); + if (intra != null) { + network_coefficient_intra = intra.doubleValue(); + } + Number inter = (Number) network_coefficients.get("inter"); + if (inter != null) { + network_coefficient_inter = inter.doubleValue(); + } + Number extra = (Number) network_coefficients.get("extra"); + if (extra != null) { + network_coefficient_extra = extra.doubleValue(); + } + } + LOG.info("network_coefficients_kwh_gb: intra={}, inter={}, extra={}", network_coefficient_intra, network_coefficient_inter, network_coefficient_extra); + } + + @Override + public Column[] columnsNeeded() { + return new Column[]{PRODUCT_SERVICE_CODE, PRODUCT, USAGE_AMOUNT}; + } + + @Override + public Column[] columnsAdded() { + return new Column[]{ENERGY_USED}; + } + + @Override + public void enrich(Row row, Map enrichedValues) { + String service_code = PRODUCT_SERVICE_CODE.getString(row); + if (!"AWSDataTransfer".equals(service_code)) { + return; + } + int index = PRODUCT.resolveIndex(row); + Map productMap = row.getJavaMap(index); + String transfer_type = (String) productMap.getOrDefault("transfer_type", ""); + + double network_coefficient = 0d; + + if (transfer_type.startsWith("Inter")) { + network_coefficient = network_coefficient_inter; + } + else if (transfer_type.startsWith("IntraRegion")) { + network_coefficient = network_coefficient_intra; + } + else if (transfer_type.startsWith("AWS Inbound")) { + network_coefficient = network_coefficient_extra; + } + else if (transfer_type.startsWith("AWS Outbound")) { + network_coefficient = network_coefficient_extra; + } + else { + LOG.info("Transfer type not recognized: {}", transfer_type); + return; + } + + // get the amount of data transferred + double amount_gb = USAGE_AMOUNT.getDouble(row); + double energy_gb = amount_gb * network_coefficient; + + enrichedValues.put(ENERGY_USED, energy_gb); + } +} diff --git a/src/main/java/com/digitalpebble/spruce/modules/ccf/Networking.java b/src/main/java/com/digitalpebble/spruce/modules/ccf/Networking.java index 5a83f9e..4a5055f 100644 --- a/src/main/java/com/digitalpebble/spruce/modules/ccf/Networking.java +++ b/src/main/java/com/digitalpebble/spruce/modules/ccf/Networking.java @@ -19,6 +19,8 @@ * * @see CCF methodology * @see resource file + * + * @deprecated com.digitalpebble.spruce.modules.Networking is a more accurate replacement **/ public class Networking implements EnrichmentModule { diff --git a/src/main/resources/default-config.json b/src/main/resources/default-config.json index 179d306..1bcd1fa 100644 --- a/src/main/resources/default-config.json +++ b/src/main/resources/default-config.json @@ -11,9 +11,13 @@ } }, { - "className": "com.digitalpebble.spruce.modules.ccf.Networking", + "className": "com.digitalpebble.spruce.modules.Networking", "config": { - "network_coefficient": 0.001 + "network_coefficients_kwh_gb": { + "intra": 0.001, + "inter": 0.0015, + "extra": 0.059 + } } }, { diff --git a/src/test/java/com/digitalpebble/spruce/modules/NetworkingTest.java b/src/test/java/com/digitalpebble/spruce/modules/NetworkingTest.java new file mode 100644 index 0000000..4c60b9a --- /dev/null +++ b/src/test/java/com/digitalpebble/spruce/modules/NetworkingTest.java @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 + +package com.digitalpebble.spruce.modules; + +import com.digitalpebble.spruce.Column; +import com.digitalpebble.spruce.Utils; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.Test; +import scala.collection.JavaConverters; + +import java.util.HashMap; +import java.util.Map; + +import static com.digitalpebble.spruce.SpruceColumn.ENERGY_USED; +import static org.junit.jupiter.api.Assertions.*; + +class NetworkingTest { + + private final Networking networking = new Networking(); + private final StructType schema = Utils.getSchema(networking); + + @Test + void processNoValues() { + Object[] values = new Object[] {null, null, null, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + assertFalse(enriched.containsKey(ENERGY_USED)); + } + + @Test + void processNonDataTransferService() { + Map product = new HashMap<>(); + Object[] values = new Object[] {"AmazonEC2", JavaConverters.asScala(product), 10d, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + assertFalse(enriched.containsKey(ENERGY_USED)); + } + + @Test + void processIntraRegion() { + Map product = new HashMap<>(); + product.put("transfer_type", "IntraRegion"); + Object[] values = new Object[] {"AWSDataTransfer", JavaConverters.asScala(product), 10d, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + double expected = networking.network_coefficient_intra * 10; + assertEquals(expected, (Double) enriched.get(ENERGY_USED)); + } + + @Test + void processInterRegion() { + Map product = new HashMap<>(); + product.put("transfer_type", "InterRegion"); + Object[] values = new Object[] {"AWSDataTransfer", JavaConverters.asScala(product), 10d, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + double expected = networking.network_coefficient_inter * 10; + assertEquals(expected, (Double) enriched.get(ENERGY_USED)); + } + + @Test + void processAWSOutbound() { + Map product = new HashMap<>(); + product.put("transfer_type", "AWS Outbound"); + Object[] values = new Object[] {"AWSDataTransfer", JavaConverters.asScala(product), 5d, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + double expected = networking.network_coefficient_extra * 5; + assertEquals(expected, (Double) enriched.get(ENERGY_USED)); + } + + @Test + void processAWSInbound() { + Map product = new HashMap<>(); + product.put("transfer_type", "AWS Inbound"); + Object[] values = new Object[] {"AWSDataTransfer", JavaConverters.asScala(product), 5d, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + double expected = networking.network_coefficient_extra * 5; + assertEquals(expected, (Double) enriched.get(ENERGY_USED)); + } + + @Test + void processUnknownTransferType() { + Map product = new HashMap<>(); + product.put("transfer_type", "SomethingElse"); + Object[] values = new Object[] {"AWSDataTransfer", JavaConverters.asScala(product), 10d, null}; + Row row = new GenericRowWithSchema(values, schema); + Map enriched = new HashMap<>(); + networking.enrich(row, enriched); + assertFalse(enriched.containsKey(ENERGY_USED)); + } + + @Test + void initWithCustomCoefficients() { + Networking custom = new Networking(); + Map params = new HashMap<>(); + Map coefficients = new HashMap<>(); + coefficients.put("intra", 0.002); + coefficients.put("inter", 0.003); + coefficients.put("extra", 0.1); + params.put("network_coefficients_kwh_gb", coefficients); + custom.init(params); + assertEquals(0.002, custom.network_coefficient_intra); + assertEquals(0.003, custom.network_coefficient_inter); + assertEquals(0.1, custom.network_coefficient_extra); + } + + @Test + void initWithNoCoefficientsKeepsDefaults() { + Networking custom = new Networking(); + custom.init(new HashMap<>()); + assertEquals(0.001, custom.network_coefficient_intra); + assertEquals(0.0015, custom.network_coefficient_inter); + assertEquals(0.059, custom.network_coefficient_extra); + } + + @Test + void initWithPartialCoefficients() { + Networking custom = new Networking(); + Map params = new HashMap<>(); + Map coefficients = new HashMap<>(); + coefficients.put("inter", 0.005); + params.put("network_coefficients_kwh_gb", coefficients); + custom.init(params); + assertEquals(0.001, custom.network_coefficient_intra); + assertEquals(0.005, custom.network_coefficient_inter); + assertEquals(0.059, custom.network_coefficient_extra); + } +}