terrastackai · rosielickorish · Apr 21, 2026 · Dec 18, 2025 · Jan 12, 2026 · Jan 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -118,6 +118,7 @@ dmypy.json
 *.zip
 # Test data
 test_wildfire_vector
+test_wildfire_classes_vector
 
 # Ruff
 .ruff_cache

diff --git a/.secrets.baseline b/.secrets.baseline
@@ -81,7 +81,7 @@
       {
         "hashed_secret": "5204df45fc8c724684bbc61cd4107a726a6b9204",
         "is_verified": false,
-        "line_number": 96,
+        "line_number": 95,
         "type": "Secret Keyword",
         "verified_result": null
       },
@@ -153,72 +153,100 @@
     ],
     "docs/examples/labels_to_data.ipynb": [
       {
-        "hashed_secret": "ea7bf9657ae460e16f1f5a09be13331ef369e647",
+        "hashed_secret": "b4c98c8116583474b0ce624687462f16212d3650",
         "is_verified": false,
-        "line_number": 155,
+        "line_number": 159,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "ccfcfcee2a2f7039e65ad59276d2df5daff216f3",
+        "hashed_secret": "2d55e648f84ed1a7ebb0049f13aaaa8feac4c656",
         "is_verified": false,
-        "line_number": 654,
+        "line_number": 699,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "040bd74da546d56544a69f11bb291e0911b32415",
+        "hashed_secret": "f01d3fe6a4fac4d8acdd27cf69de63dfb3cb90ae",
         "is_verified": false,
-        "line_number": 673,
+        "line_number": 1075,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "0af104c47bac851d6aea383735fb00c038075527",
+        "hashed_secret": "5c3f245e2a875b184c38437fc79312a460e46f64",
         "is_verified": false,
-        "line_number": 759,
+        "line_number": 1094,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "28e34a8419ecb7d29f43f0bbc270b16d52e24318",
+        "hashed_secret": "6eb312da2c4cd2a6af47cfbfced9f7537d29e3ca",
         "is_verified": false,
-        "line_number": 769,
+        "line_number": 1188,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "78084293061a8f6970060ebca17f3c888aa6145f",
+        "hashed_secret": "7a115f1ced67eb4bdb8c61bfa843ff6805e83483",
         "is_verified": false,
-        "line_number": 968,
+        "line_number": 1276,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "21b0b4bef5092046ae7550022244e107908c622f",
+        "hashed_secret": "8eacdf4b2e1225fde9c0e61e379b6e64dc64f910",
         "is_verified": false,
-        "line_number": 1033,
+        "line_number": 1286,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "9822c5a483cf4b897a24d2002c20eaf371177428",
+        "hashed_secret": "86cee4c3f71528e41a7ecdf652934f07d50b64a3",
         "is_verified": false,
-        "line_number": 1052,
+        "line_number": 1388,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "c1dea1d5fe85fbf50d35ca3274348a534d0075e3",
+        "hashed_secret": "22c4272dc742fbf283fe572cd686439c29a6af06",
         "is_verified": false,
-        "line_number": 1112,
+        "line_number": 1398,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
-        "hashed_secret": "793b0144acd585c78b9f9ccd53855b47e199e1c0",
+        "hashed_secret": "2ef95f955e08b6e0cd9384f27bedac942b9bc98e",
         "is_verified": false,
-        "line_number": 1122,
+        "line_number": 1644,
+        "type": "Base64 High Entropy String",
+        "verified_result": null
+      },
+      {
+        "hashed_secret": "dcad3d7e0c43d75e00654218eeddf560c94e0d05",
+        "is_verified": false,
+        "line_number": 1709,
+        "type": "Base64 High Entropy String",
+        "verified_result": null
+      },
+      {
+        "hashed_secret": "9030aa541bbe3cd57e8121bc4df512b24e28addf",
+        "is_verified": false,
+        "line_number": 1728,
+        "type": "Base64 High Entropy String",
+        "verified_result": null
+      },
+      {
+        "hashed_secret": "719cffec4ca2382c1ec69049456d9114dd50ac84",
+        "is_verified": false,
+        "line_number": 1788,
+        "type": "Base64 High Entropy String",
+        "verified_result": null
+      },
+      {
+        "hashed_secret": "cff83a50da163ee1acd619111e345870745574d9",
+        "is_verified": false,
+        "line_number": 1798,
         "type": "Base64 High Entropy String",
         "verified_result": null
       }
@@ -280,7 +308,7 @@
       {
         "hashed_secret": "34fd2a7f5faa004cd1b9e4a22aa09b16d521661b",
         "is_verified": false,
-        "line_number": 227,
+        "line_number": 288,
         "type": "Hex High Entropy String",
         "verified_result": null
       }
@@ -314,7 +342,7 @@
       {
         "hashed_secret": "34fd2a7f5faa004cd1b9e4a22aa09b16d521661b",
         "is_verified": false,
-        "line_number": 62,
+        "line_number": 68,
         "type": "Hex High Entropy String",
         "verified_result": null
       }

diff --git a/docs/download_data.md b/docs/download_data.md
@@ -148,6 +148,23 @@ The shapefile `{dataset_name}_labels.shp` must contain a `datetime` field and `g
 ### Keep files: `keep_files`
 Flag to preserve shapefiles in the working directory once they have been used by the download data step. Downloaded files will not be removed. Set to `True` to ensure shapefiles remain in place.
 
+### Set No Data: `set_no_data`
+Controls how label rasterization handles the background (no-data) pixels. When set to `True`, background pixels are assigned a no-data value (-1), allowing label class 0 to be used for actual labels. When set to `False` (default), background pixels are assigned value 0, which means label classes must start from 1 to avoid conflicts.
+
+**Important:** If your labels use class 0 and `set_no_data=False`, TerraKit will raise a `TerrakitValueError` because class 0 would conflict with the background class. In this case, you must either:
+- Set `set_no_data=True` to use -1 for background pixels, or
+- Ensure your label classes start from 1 instead of 0
+
+Example with multi-class labels using class 0:
+```python
+queried_data = download_data(
+    data_sources=config["download"]["data_sources"],
+    date_allowance=config["download"]["date_allowance"],
+    set_no_data=True,  # Required when using class 0
+    transform=config["download"]["transform"],
+)
+```
+
 ## Data Connectors
 Data connectors are classes which enable a user to search for data and query data from a particular data source using a common set of functions. Check out the [TerraKit Data Connectors](#data-connectors) section for more information.
 

diff --git a/docs/examples/labels_to_data.ipynb b/docs/examples/labels_to_data.ipynb
diff --git a/docs/process_labels.md b/docs/process_labels.md
@@ -63,7 +63,17 @@ EMSR801_AOI01_DEL_MONIT02_observedEventA_v1.json,2025-04-23
 TerraKit will look a file called `metadata.csv` in the `labels_folder`.
 
 ### label_type
-`label_type`: Set to either `raster` or `vector`. TerraKit expects label data in either vector or raster format. 
+`label_type`: Set to either `raster` or `vector`. TerraKit expects label data in either vector or raster format.
+
+### Multi-class Labels
+For multi-class label datasets, TerraKit supports automatic class detection through filename patterns. Include `_CLASS_<number>_` in your label filenames to specify the class:
+
+```
+EMSR801_AOI01_DEL_MONIT02_CLASS_0_observedEventA_v1_2025-04-23.json
+EMSR801_AOI01_DEL_MONIT02_CLASS_1_observedEventA_v1_2025-04-23.json
+```
+
+The class number will be extracted from the filename and used during rasterization. If no `_CLASS_` pattern is found, the label defaults to class 1. This enables visualization with distinct colors for each class and proper handling of multi-class segmentation tasks.
 
 ## Download example labels
 To download a set of example labels, use the `rapid_mapping_geojson_downloader` function to get started:

diff --git a/terrakit/chip/__init__.py b/terrakit/chip/__init__.py
@@ -2,4 +2,4 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-from terrakit.chip import tiling
+from terrakit.chip import tiling as tiling
diff --git a/terrakit/download/download_data.py b/terrakit/download/download_data.py
@@ -52,6 +52,7 @@ class DownloadCls:
         active (bool): Flag to activate/deactivate data download.
         max_cloud_cover (int): Maximum cloud cover percentage for data selection.
         keep_files (bool): Flag to keep shapefiles once they have been used. Downloaded files will not be removed.
+        set_no_data (bool): Flag to set non-labeled data as no-data. Default False
         datetime_bbox_shp_file (str): Path to shapefile containing datetime and bounding boxes to be downloaded.
         labels_shp_file (str): Path to shapefile containing labels.
 
@@ -107,6 +108,7 @@ def __init__(
         active: bool = True,
         max_cloud_cover: int = 80,
         keep_files: bool = False,
+        set_no_data: bool = False,
         datetime_bbox_shp_file: str = "./tmp/terrakit_curated_dataset_all_bboxes.shp",
         labels_shp_file: str = "./tmp/terrakit_curated_dataset_labels.shp",
     ):
@@ -122,6 +124,7 @@ def __init__(
             active (bool): Flag to activate/deactivate data download.
             max_cloud_cover (int): Maximum cloud cover percentage for data selection.
             keep_files (bool): Flag to keep shapefiles once they have been used. Downloaded files will not be removed.
+            set_no_data (bool): Flag to set non-labeled data as no-data. Default Falise
             datetime_bbox_shp_file (str): Path to shapefile containing datetime bounding boxes.
             labels_shp_file (str): Path to shapefile containing labels.
         """
@@ -132,6 +135,7 @@ def __init__(
         self.active = active
         self.max_cloud_cover = max_cloud_cover
         self.keep_files = keep_files
+        self.set_no_data = set_no_data
         self.datetime_bbox_shp_file = datetime_bbox_shp_file
         self.labels_shp_file = labels_shp_file
         self.data_sources = data_sources
@@ -226,9 +230,15 @@ def find_and_query_data_for_matching_dates(
         )
         grouped_bbox_gdf = self._read_shp_file(bbox_shp_file)
 
+        # Deduplicate by datetime and geometry to avoid downloading same tile multiple times
+        # This happens when multiple label classes exist for the same date/location
+        grouped_bbox_gdf_unique = grouped_bbox_gdf.drop_duplicates(
+            subset=["datetime", "geometry"], keep="first"
+        ).reset_index(drop=True)
+
         queried_data = []
-        for li in range(0, len(grouped_bbox_gdf)):
-            l = grouped_bbox_gdf.loc[li]  # noqa
+        for li in range(0, len(grouped_bbox_gdf_unique)):
+            l = grouped_bbox_gdf_unique.loc[li]  # noqa
 
             from_date = (
                 datetime.strptime(l.datetime, "%Y-%m-%d")
@@ -301,9 +311,6 @@ def find_and_query_data_for_matching_dates(
                         f"Error while transforming data... {e}"
                     ) from e
 
-                for t in da.time.values:  # type: ignore[union-attr]
-                    date = t.astype(str)[:10]
-
                 for i, t in enumerate(da.time.values):  # type: ignore[union-attr]
                     date = t.astype(str)[:10]
                     queried_data.append(
@@ -316,7 +323,9 @@ def find_and_query_data_for_matching_dates(
             logging.info(f"Queried data: {queried_data}")
         return queried_data
 
-    def rasterize_vectors_to_the_queried_data(self, queried_data: list) -> int:
+    def rasterize_vectors_to_the_queried_data(
+        self, queried_data: list, set_no_data: bool
+    ) -> int:
         """
         Rasterize vector data to the queried raster data.
 
@@ -332,23 +341,58 @@ def rasterize_vectors_to_the_queried_data(self, queried_data: list) -> int:
         label_gdf = self._read_shp_file(labels_shp_file)
 
         logging.info("Rasterizing vectors to the queried data")
+
+        # Verify label classes
+        if "labelclass" in label_gdf.columns:
+            label_classes = np.sort(label_gdf["labelclass"].unique())
+            logger.info(f"Label classes being used: {label_classes}")
+            if not set_no_data and 0 in label_classes:
+                raise TerrakitValueError(
+                    "Labels are using class 0 which conflicts with the background class. "
+                    "Either use set_no_data=True or ensure label classes start from 1.",
+                    details={
+                        "label_classes": label_classes.tolist(),
+                        "set_no_data": set_no_data,
+                    },
+                )
+
+            start_index = 0 if set_no_data else 1
+            # Check if continuous and otherwise provide a warning
+            if not (
+                start_index in label_classes
+                and label_classes[-1] == start_index + len(label_classes) - 1
+            ):
+                logger.warning(
+                    "Label classes are not a continuous list of indicies, is this correct?"
+                )
+
+        background_value = -1 if set_no_data else 0  # 0 is rasterize default
         file_save_count = 0
         for q in queried_data:
             with rasterio.open(q, "r") as src:
                 out_meta = src.meta
                 out_meta.update({"count": 1})
+                label_column = label_gdf.get(
+                    "labelclass", [1] * len(label_gdf)
+                )  # Default 1 if not set
                 image = rasterio.features.rasterize(
-                    ((g, 1) for g in label_gdf.geometry),
+                    (
+                        (g, class_id)
+                        for g, class_id in zip(label_gdf.geometry, label_column)
+                    ),
                     out_shape=src.shape,
                     transform=src.transform,
+                    fill=background_value,
                 )
+                if set_no_data:
+                    out_meta.update({"nodata": -1})
                 # Write the burned image to geotiff
                 logging.info(f"Writing to {q.replace('.tif', '')}_labels.tif")
                 with rasterio.open(
                     f"{q.replace('.tif', '')}_labels.tif", "w", **out_meta
                 ) as dst:
                     dst.write(image, indexes=1)
-                    file_save_count = +1
+                    file_save_count += 1
         return file_save_count
 
 
@@ -362,6 +406,7 @@ def download_validation(
     datetime_bbox_shp_file: str = "./tmp/terrakit_curated_dataset_all_bboxes.shp",
     labels_shp_file: str = "./tmp/terrakit_curated_dataset_labels.shp",
     keep_files: bool = False,
+    set_no_data: bool = False,
 ) -> tuple[DownloadCls, DownloadModel]:
     """
     Validate and initialize the download process.
@@ -376,6 +421,7 @@ def download_validation(
         datetime_bbox_shp_file (str): Path to shapefile containing datetime bounding boxes.
         labels_shp_file (str): Path to shapefile containing labels.
         keep_files (bool): Flag to keep shapefiles once they have been used. Downloaded files will not be removed.
+        set_no_data (bool): Flag to set non-labeled data as no-data. Default False.
 
     Returns:
         DownloadCls: Initialized DownloadCls object.
@@ -445,6 +491,7 @@ def download_validation(
         max_cloud_cover=max_cloud_cover,
         datetime_bbox_shp_file=datetime_bbox_shp_file,
         keep_files=keep_files,
+        set_no_data=set_no_data,
         data_sources=data_source_list,
         date_allowance=date_allowance,
         labels_shp_file=labels_shp_file,
@@ -473,6 +520,7 @@ def download_data(
     datetime_bbox_shp_file: str = "./tmp/terrakit_curated_dataset_all_bboxes.shp",
     labels_shp_file: str = "./tmp/terrakit_curated_dataset_labels.shp",
     keep_files: bool = False,
+    set_no_data: bool = False,
 ) -> list:
     """
     Download and preprocess geospatial data.
@@ -488,6 +536,7 @@ def download_data(
         datetime_bbox_shp_file (str): Path to shapefile containing datetime bounding boxes.
         labels_shp_file (str): Path to shapefile containing labels.
         keep_files (bool): Flag to keep shapefiles once they have been used. Downloaded files will not be removed.
+        set_no_data (bool): Flag to set non-labeled data as no-data. Default False
 
     Returns:
         list: List of queried data file paths.
@@ -551,6 +600,7 @@ def download_data(
         datetime_bbox_shp_file=datetime_bbox_shp_file,
         labels_shp_file=labels_shp_file,
         keep_files=keep_files,
+        set_no_data=set_no_data,
     )
 
     logging.info("Listing collections..")
@@ -573,7 +623,8 @@ def download_data(
 
     # Rasterize
     file_save_count = download.rasterize_vectors_to_the_queried_data(
-        queried_data=queried_data
+        queried_data=queried_data,
+        set_no_data=set_no_data,
     )
 
     if file_save_count > 0:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@
		# SPDX-License-Identifier: Apache-2.0


		from terrakit.chip import tiling
		from terrakit.chip import tiling as tiling