1010import csv
1111import io
1212import pickle
13+ import re
1314from collections import defaultdict
1415from multiprocessing import Pool , cpu_count
1516from pathlib import Path
@@ -172,7 +173,6 @@ def __init__(
172173 :param geography_mapping: Optional aggregation mapping for scenario regions.
173174 :type geography_mapping: dict | str | None
174175 :param activities_mapping: Optional reclassification mapping for activities.
175- :type activities_mapping: dict | str | None
176176 :param ecoinvent_version: Ecoinvent version string used when selecting LCIA data.
177177 :type ecoinvent_version: str
178178 :param classification_system: Ecoinvent classification system to use.
@@ -192,6 +192,10 @@ def __init__(
192192 self .classification_system = classification_system
193193 self ._load_classifications ()
194194
195+ # Apply activities_mapping to aggregate classifications
196+ if activities_mapping :
197+ self ._apply_activities_mapping (activities_mapping )
198+
195199 self .lca_results = None
196200 self .lcia_methods = get_lcia_method_names (self .ei_version )
197201 self .units = load_units_conversion ()
@@ -295,6 +299,82 @@ def _load_classifications(self):
295299 self .classifications [key ] = code_for_system
296300 added_keys += 1
297301
302+ def _extract_description (self , classification_code : str ) -> str :
303+ """Extract description from classification code.
304+
305+ Classification codes have format like '2011:Manufacture of basic chemicals'
306+ This extracts 'Manufacture of basic chemicals'.
307+
308+ :param classification_code: Full classification code with number prefix.
309+ :type classification_code: str
310+ :returns: Description part after the colon.
311+ :rtype: str
312+ """
313+ if ':' in classification_code :
314+ return classification_code .split (':' , 1 )[1 ].strip ()
315+ return classification_code .strip ()
316+
317+ def _normalize_text (self , text : str ) -> str :
318+ """Normalize text for fuzzy matching.
319+
320+ Converts to lowercase, removes punctuation, normalizes whitespace.
321+
322+ :param text: Text to normalize.
323+ :type text: str
324+ :returns: Normalized text.
325+ :rtype: str
326+ """
327+ text = text .lower ()
328+ text = re .sub (r'[,\-\(\)\.]' , ' ' , text ) # Replace punctuation with spaces
329+ text = ' ' .join (text .split ()) # Normalize whitespace
330+ return text
331+
332+ def _apply_activities_mapping (self , activities_mapping ):
333+ """Aggregate classification codes using the provided mapping.
334+
335+ The mapping YAML has format::
336+
337+ "Manufacture of basic chemicals": "Chemicals and Fertilizers"
338+
339+ Classification codes from the datapackage have format::
340+
341+ "2011:Manufacture of basic chemicals"
342+
343+ We extract the description, match against mapping keys, and replace
344+ with the aggregated category. Unmatched items become "unclassified".
345+
346+ :param activities_mapping: Path to YAML file or dict with mapping.
347+ :type activities_mapping: str | dict
348+ """
349+ mapping = load_mapping (activities_mapping )
350+
351+ # Normalize mapping keys for fuzzy matching
352+ normalized_mapping = {
353+ self ._normalize_text (k ): v
354+ for k , v in mapping .items ()
355+ }
356+
357+ aggregated_count = 0
358+ unclassified_count = 0
359+
360+ for key , code in self .classifications .items ():
361+ # Extract description from code like "2011:Manufacture of basic chemicals"
362+ description = self ._extract_description (code )
363+ normalized_desc = self ._normalize_text (description )
364+
365+ if normalized_desc in normalized_mapping :
366+ self .classifications [key ] = normalized_mapping [normalized_desc ]
367+ aggregated_count += 1
368+ else :
369+ self .classifications [key ] = "unclassified"
370+ unclassified_count += 1
371+
372+ if self .debug :
373+ logging .info (
374+ f"Activities mapping applied: { aggregated_count } aggregated, "
375+ f"{ unclassified_count } unclassified"
376+ )
377+
298378 def _get_scenarios (self , scenario_data : pd .DataFrame ) -> xr .DataArray :
299379 """Convert the datapackage scenario table into a harmonized ``xarray`` object.
300380
0 commit comments