Skip to content

Commit d64a82d

Browse files
committed
fix #15
1 parent 65839d1 commit d64a82d

2 files changed

Lines changed: 87 additions & 2 deletions

File tree

pathways/pathways.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import csv
1111
import io
1212
import pickle
13+
import re
1314
from collections import defaultdict
1415
from multiprocessing import Pool, cpu_count
1516
from pathlib import Path
@@ -172,7 +173,6 @@ def __init__(
172173
:param geography_mapping: Optional aggregation mapping for scenario regions.
173174
:type geography_mapping: dict | str | None
174175
:param activities_mapping: Optional reclassification mapping for activities.
175-
:type activities_mapping: dict | str | None
176176
:param ecoinvent_version: Ecoinvent version string used when selecting LCIA data.
177177
:type ecoinvent_version: str
178178
:param classification_system: Ecoinvent classification system to use.
@@ -192,6 +192,10 @@ def __init__(
192192
self.classification_system = classification_system
193193
self._load_classifications()
194194

195+
# Apply activities_mapping to aggregate classifications
196+
if activities_mapping:
197+
self._apply_activities_mapping(activities_mapping)
198+
195199
self.lca_results = None
196200
self.lcia_methods = get_lcia_method_names(self.ei_version)
197201
self.units = load_units_conversion()
@@ -295,6 +299,82 @@ def _load_classifications(self):
295299
self.classifications[key] = code_for_system
296300
added_keys += 1
297301

302+
def _extract_description(self, classification_code: str) -> str:
303+
"""Extract description from classification code.
304+
305+
Classification codes have format like '2011:Manufacture of basic chemicals'
306+
This extracts 'Manufacture of basic chemicals'.
307+
308+
:param classification_code: Full classification code with number prefix.
309+
:type classification_code: str
310+
:returns: Description part after the colon.
311+
:rtype: str
312+
"""
313+
if ':' in classification_code:
314+
return classification_code.split(':', 1)[1].strip()
315+
return classification_code.strip()
316+
317+
def _normalize_text(self, text: str) -> str:
318+
"""Normalize text for fuzzy matching.
319+
320+
Converts to lowercase, removes punctuation, normalizes whitespace.
321+
322+
:param text: Text to normalize.
323+
:type text: str
324+
:returns: Normalized text.
325+
:rtype: str
326+
"""
327+
text = text.lower()
328+
text = re.sub(r'[,\-\(\)\.]', ' ', text) # Replace punctuation with spaces
329+
text = ' '.join(text.split()) # Normalize whitespace
330+
return text
331+
332+
def _apply_activities_mapping(self, activities_mapping):
333+
"""Aggregate classification codes using the provided mapping.
334+
335+
The mapping YAML has format::
336+
337+
"Manufacture of basic chemicals": "Chemicals and Fertilizers"
338+
339+
Classification codes from the datapackage have format::
340+
341+
"2011:Manufacture of basic chemicals"
342+
343+
We extract the description, match against mapping keys, and replace
344+
with the aggregated category. Unmatched items become "unclassified".
345+
346+
:param activities_mapping: Path to YAML file or dict with mapping.
347+
:type activities_mapping: str | dict
348+
"""
349+
mapping = load_mapping(activities_mapping)
350+
351+
# Normalize mapping keys for fuzzy matching
352+
normalized_mapping = {
353+
self._normalize_text(k): v
354+
for k, v in mapping.items()
355+
}
356+
357+
aggregated_count = 0
358+
unclassified_count = 0
359+
360+
for key, code in self.classifications.items():
361+
# Extract description from code like "2011:Manufacture of basic chemicals"
362+
description = self._extract_description(code)
363+
normalized_desc = self._normalize_text(description)
364+
365+
if normalized_desc in normalized_mapping:
366+
self.classifications[key] = normalized_mapping[normalized_desc]
367+
aggregated_count += 1
368+
else:
369+
self.classifications[key] = "unclassified"
370+
unclassified_count += 1
371+
372+
if self.debug:
373+
logging.info(
374+
f"Activities mapping applied: {aggregated_count} aggregated, "
375+
f"{unclassified_count} unclassified"
376+
)
377+
298378
def _get_scenarios(self, scenario_data: pd.DataFrame) -> xr.DataArray:
299379
"""Convert the datapackage scenario table into a harmonized ``xarray`` object.
300380

pathways/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,13 @@ def create_lca_results_array(
280280
raise ValueError("Empty list of scenarios")
281281

282282
# Define the coordinates for the xarray DataArray
283+
# Get unique categories, only add "unclassified" if not already present
284+
act_categories = list(set(classifications.values()))
285+
if "unclassified" not in act_categories:
286+
act_categories.append("unclassified")
287+
283288
coords = {
284-
"act_category": list(set(list(classifications.values()))) + ["unclassified"],
289+
"act_category": act_categories,
285290
"variable": list(mapping.keys()),
286291
"year": years,
287292
"region": regions,

0 commit comments

Comments
 (0)