diff --git a/pyproject.toml b/pyproject.toml
index b2c378d..3382a06 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "kintsugi"
-version = "0.9.0"
+version = "0.10.0"
 description = "Commonly used datasets and functions"
 readme = "README.md"
 authors = [
diff --git a/src/kintsugi/_data.py b/src/kintsugi/_data.py
deleted file mode 100644
index 210c3be..0000000
--- a/src/kintsugi/_data.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import hashlib
-import logging
-import os
-import shutil
-import tempfile
-from pathlib import Path
-
-import requests
-from platformdirs import user_cache_path
-from requests.adapters import HTTPAdapter, Retry
-
-logger = logging.getLogger("kintsugi")
-logger.addHandler(logging.NullHandler())
-
-BASE_URL = "https://raw.githubusercontent.com/bansallab/kintsugi-data/main/data"
-DATASETS = {
-    "county_neighbors/county_adjacency2010.txt": "7edda309ad38a4dfc6a6c6c30e1753e5490b9c8a3aa4563188841989b4fe9a96",
-    "county_neighbors/county_adjacency2023.txt": "2dbf9a8bae1b7c50db3a9db4864b073d2423c3e6e63518c97d649453e2809843",
-    "county_neighbors/county_adjacency2024.txt": "20cffeb48ba46972fb949c453d3fbf62620115039c45c88bc80c094885650816",
-    "county_neighbors/county_adjacency2025.txt": "27046a5f09f66205fd9869afbfb6dcae744e1c9b85cb19dd767c580211fb4575",
-    "crosswalk/county/ct_cou_to_cousub_crosswalk.txt": "13ad002564e30e6dcd1df112fdaa985f2cd0cdb908f7f3017cd2723db294cff8",
-    "crosswalk/county/sub-est2024_9.csv": "e5bd2cb1b10cf12d741572eef9e8eff19a1f6c5e08eb910d6f44f299c7ca83df",
-    "crosswalk/county_to_zip/county_to_zip_2016.parquet": "f13d79b059b272c9ac4ff02dbe4e0e26d56acffad837adeb0f2d86d3101fd9ad",
-    "crosswalk/county_to_zip/county_to_zip_2017.parquet": "009141988a3b902179d6beed80260faf2474a4cda8d6af0667d88ab4f0140b0b",
-    "crosswalk/county_to_zip/county_to_zip_2018.parquet": "697425d6ffba4c8f95c80a961814d2291f61507336010349a4d78c10ce5cfdba",
-    "crosswalk/county_to_zip/county_to_zip_2019.parquet": "3a06ca1a4d21638a4d014cbb4bc29b7d6b254a937dab1536058152d226073d62",
-    "crosswalk/county_to_zip/county_to_zip_2020.parquet": "06cb2bfa55085d9c5f0f652628a61d1bf64fe14ea190df5bdc361844f5204250",
-    "crosswalk/county_to_zip/county_to_zip_2021.parquet": "fdd59a6b8ab06a534d1c9c4765101105ce8bc08d9685eb9441c58701bffd96ae",
-    "crosswalk/county_to_zip/county_to_zip_2022.parquet": "c3875e66bff03ff71ad73b89277fc892a294a5d71be08b4eb4ac2c431d715b83",
-    "crosswalk/county_to_zip/county_to_zip_2023.parquet": "2f7c7564092c43d3a964ecf76bc61083916186d140c3cabed63aba5f91b56f1a",
-    "crosswalk/county_to_zip/county_to_zip_2024.parquet": "907321abfa45437a13d4cfa9047b687456e883a096b863c8874d168fb8cc57c0",
-    "crosswalk/county_to_zip/county_to_zip_2025.parquet": "e5008518558a4aca7ae2d40226f56bdcdcdf01bdb3879241a0b8d69a14097239",
-    "crosswalk/PUMA/geocorr_puma_2010_to_county_2020_with_afact2.csv": "b66f23aa27be3daf1f6456607478848df3383d4c7111d6f935f792fc9542b807",
-    "crosswalk/PUMA/puma2010-to-puma2020.csv": "bc6d0116aa39ea3b2af85095b49bac00f4612f591d2f4684cc2cc872c210a666",
-    "crosswalk/zip_to_county/zip_to_county_2016.parquet": "982913988b4915e9c4787d4f639985635cae938b447944e0ef04868bb5c784d1",
-    "crosswalk/zip_to_county/zip_to_county_2017.parquet": "e8dd16dcee07b3a6b52a30c49157d707a1f1e8f1cabf7da126cb094e88cfd0eb",
-    "crosswalk/zip_to_county/zip_to_county_2018.parquet": "8ab399f97bb8d81eaabca7adf512e801c8958745fe6f0e82992313fbcfaaa942",
-    "crosswalk/zip_to_county/zip_to_county_2019.parquet": "78ac9cdda7c5e00b82315ceaece4f3722da905e2ec4f68774a135d327c257435",
-    "crosswalk/zip_to_county/zip_to_county_2020.parquet": "17f518b2a6de5ee58b134c0dc5fbf10f6d48c45b14fdbfcfe58a4269e08eaee2",
-    "crosswalk/zip_to_county/zip_to_county_2021.parquet": "655c6ce8b257e942ac1c7dfa068d93ad2a1f139c9cd0c110efbce752ab27cea4",
-    "crosswalk/zip_to_county/zip_to_county_2022.parquet": "9f125642aeb5aba3c8f2bdf616c186f975fc6afc1b5543d55204ad9b2a10b252",
-    "crosswalk/zip_to_county/zip_to_county_2023.parquet": "89f50d28acb226a5777ededc1f8301dc1f028c65dd04b860355d23efe24b89c5",
-    "crosswalk/zip_to_county/zip_to_county_2024.parquet": "8d76e09fa52923c0901c0afad26ba2d32075210bb0ab5b82caa14ce5465b3d80",
-    "crosswalk/zip_to_county/zip_to_county_2025.parquet": "2683272f9759646d0a6234de06076f274852b85fcd2fd9e3d2dfcb3aa10def80",
-    "geo/cb_2020_us_county_5m.zip": "187e7118304428e5450083beb375e67c2c516c58a01ce52db95aaf24f18df3ba",
-    "geo/cb_2020_us_state_5m.zip": "aedc60e0d1924a9030ee6d39ff0ed27ad7d1b0bc86807ea809391a6b9008ffb3",
-    "geo/cb_2024_us_county_5m.zip": "a867f8734059b45d1d54a0ba56189dd7e73c42eb451418fa56de44c35232614b",
-    "geo/cb_2024_us_state_5m.zip": "c9db0e395c11a1f94a8017fde4f4c7cbee1dca6eb37ba8f1ccaab927df70885f",
-    "pop/county_cc/county_pop_2016.parquet": "74caad19bf5eed856ad9b6f63c65f7fceca612dec680d0768890de2265116607",
-    "pop/county_cc/county_pop_2017.parquet": "d93d027929861e115cf34b15f1ff7c697c8eaa327b73cd8132710a11860a63d5",
-    "pop/county_cc/county_pop_2018.parquet": "be3d3bab642a9f6f111c792a431f940b1753373194993885e4d47c136feed91a",
-    "pop/county_cc/county_pop_2019.parquet": "98801f118cd795c026a8269d5ac6674f98b9d47e0207c6a2721a5b7f4b6e5c08",
-    "pop/county_cc/county_pop_2020.parquet": "f1e4f282d297dc5498b6f839412c0815ca6f9e0a15d83d5d3867f2d70aa8413d",
-    "pop/county_cc/county_pop_2021.parquet": "3af369564ebb0e1fda25b440e5bf133ecb2d2eab60ab40f5db1f0a0955db713b",
-    "pop/county_cc/county_pop_2022.parquet": "977856eb5fffd508442ccedaa54c92e338b037135e5a9be55a03c7132863d9ca",
-    "pop/county_cc/county_pop_2023.parquet": "a4d66c302a557c1565ec9f43bad5ea9d4267576d1fbd17d8939e5a858a3d73e7",
-    "pop/county_cc/county_pop_2024.parquet": "12b16c7c20329a3df2f4120f6ec9a9a7313147fad0fd03bc360b1de5769c8abd",
-    "pop/state/state_pop_2016.parquet": "bac51c5ba4a9ff7305e92b3b2804c854fc20b9cbcf01156e5439d92668c0c81e",
-    "pop/state/state_pop_2017.parquet": "6fb950b1b78409af8130317b08b437b742c0906ff9d5c38655c1189103b8dddc",
-    "pop/state/state_pop_2018.parquet": "913fca35299028a842325000e58e33cd3912c1e900d480f00b468095398e57f8",
-    "pop/state/state_pop_2019.parquet": "7ca2c87065f24857178bb33a7512cb799a92890596bac6fff1cbeb3c69f6fc36",
-    "pop/state/state_pop_2020.parquet": "275b861e07f1c2327fb5382a28e84a5fb7ac4f896ae9f91b06612f6197af9611",
-    "pop/state/state_pop_2021.parquet": "8b47a5c9fdca838954c8ddac8265ad00d590281c7b444019070c81b9942a727e",
-    "pop/state/state_pop_2022.parquet": "ea113b3766c44bbf250e01b0b9509e810590119b3b9470b13dc347d43aed042b",
-    "pop/state/state_pop_2023.parquet": "e96a982342510fe6a1ba90fc85a9bd6fbdd8687bceaf76e6e117606429d2d160",
-    "pop/state/state_pop_2024.parquet": "b79bca471a68b8c3742ec30d41a2b65ab1227152e81239faf00763188752c6ff",
-    "county_groups.parquet": "7d7c150b5efd5596e0eaaed27abd6dc86137f08ff677c2606d402b9d165b87fa",
-    "state.txt": "bea4e03f71a1fa0045ae732aabad11fa541e5932b071c2369bb0d325e8cba5a0",
-}
-
-
-class GetDatasetError(Exception):
-    pass
-
-
-def get_dataset(file_name: str) -> Path:
-    """
-    Ensure valid dataset file is present in cache and return its path
-    """
-    if file_name not in DATASETS:
-        raise ValueError(f"{file_name} not in dataset")
-
-    file_cached = get_cache_dir() / file_name
-    if not file_cached.is_file() or not file_valid(file_name, file_cached):
-        logger.debug(
-            f"{file_name} not in cache or cached file is invalid. Getting file from kintsugi-data repository."
-        )
-        try:
-            download_dataset(file_name, file_cached)
-        except GetDatasetError as err:
-            logger.exception(f"Unable to get dataset: {file_name}", exc_info=err)
-            raise
-    else:
-        logger.debug(f"{file_name} already exists in cache")
-
-    return file_cached
-
-
-def get_cache_dir() -> Path:
-    """
-    Ensure cache directory exists and return its absolute path
-    """
-    cache_dir = (
-        Path(os.getenv("KINTSUGI_CACHE", user_cache_path("kintsugi-data")))
-        .expanduser()
-        .resolve()
-    )
-    try:
-        cache_dir.mkdir(parents=True, exist_ok=True)
-    except (OSError, PermissionError) as err:
-        logger.exception(
-            f"Error while setting up cache directory at {cache_dir}", exc_info=err
-        )
-        raise
-
-    return cache_dir
-
-
-def download_dataset(file_name: str, file_cached: Path) -> None:
-    """
-    Download dataset file, verify integrity via checksum, and save to cache
-    """
-    url = f"{BASE_URL}/{file_name}"
-    # 5 total retries, sleep for [0.0, 0.2, 0.4, 0.8, ...] seconds between retries after second try
-    retries = Retry(
-        total=5,
-        backoff_factor=0.1,
-        status_forcelist=[500, 502, 503, 504],
-        raise_on_status=True,
-    )
-    adapter = HTTPAdapter(max_retries=retries)
-
-    with requests.Session() as s:
-        s.mount("https://raw.githubusercontent.com/", adapter)
-        try:
-            res = s.get(url)
-        except requests.exceptions.RetryError as err:
-            raise GetDatasetError(
-                f"Error getting data file {file_name} despite retry strategy"
-            ) from err
-
-    with tempfile.NamedTemporaryFile("wb", delete_on_close=False) as fp:
-        fp.write(res.content)
-        fp.close()
-
-        tmp_file = Path(fp.name)
-        if not file_valid(file_name, tmp_file):
-            raise GetDatasetError(
-                f"Checksum for {file_name} is invalid. File not copied to cache"
-            )
-
-        logger.info(f"File {file_name} valid. Copying to cache")
-        file_cached.parent.mkdir(parents=True, exist_ok=True)
-        shutil.copyfile(tmp_file, file_cached)
-
-
-def file_valid(file_name: str, file: Path) -> bool:
-    """
-    Validate dataset file via sha256 checksum
-    """
-    with open(file, "rb") as f:
-        digest = hashlib.file_digest(f, "sha256")
-
-    return digest.hexdigest() == DATASETS[file_name]
diff --git a/src/kintsugi/_data/__init__.py b/src/kintsugi/_data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/kintsugi/_data/checksums.json b/src/kintsugi/_data/checksums.json
new file mode 100644
index 0000000..235adcb
--- /dev/null
+++ b/src/kintsugi/_data/checksums.json
@@ -0,0 +1,74 @@
+{
+    "county_neighbors": {
+        "county_adjacency2010.txt": "7edda309ad38a4dfc6a6c6c30e1753e5490b9c8a3aa4563188841989b4fe9a96",
+        "county_adjacency2023.txt": "2dbf9a8bae1b7c50db3a9db4864b073d2423c3e6e63518c97d649453e2809843",
+        "county_adjacency2024.txt": "20cffeb48ba46972fb949c453d3fbf62620115039c45c88bc80c094885650816",
+        "county_adjacency2025.txt": "27046a5f09f66205fd9869afbfb6dcae744e1c9b85cb19dd767c580211fb4575"
+    },
+    "crosswalk": {
+        "county": {
+            "ct_cou_to_cousub_crosswalk.txt": "13ad002564e30e6dcd1df112fdaa985f2cd0cdb908f7f3017cd2723db294cff8",
+            "sub-est2024_9.csv": "e5bd2cb1b10cf12d741572eef9e8eff19a1f6c5e08eb910d6f44f299c7ca83df"
+        },
+        "county_to_zip": {
+            "county_to_zip_2016.parquet": "f13d79b059b272c9ac4ff02dbe4e0e26d56acffad837adeb0f2d86d3101fd9ad",
+            "county_to_zip_2017.parquet": "009141988a3b902179d6beed80260faf2474a4cda8d6af0667d88ab4f0140b0b",
+            "county_to_zip_2018.parquet": "697425d6ffba4c8f95c80a961814d2291f61507336010349a4d78c10ce5cfdba",
+            "county_to_zip_2019.parquet": "3a06ca1a4d21638a4d014cbb4bc29b7d6b254a937dab1536058152d226073d62",
+            "county_to_zip_2020.parquet": "06cb2bfa55085d9c5f0f652628a61d1bf64fe14ea190df5bdc361844f5204250",
+            "county_to_zip_2021.parquet": "fdd59a6b8ab06a534d1c9c4765101105ce8bc08d9685eb9441c58701bffd96ae",
+            "county_to_zip_2022.parquet": "c3875e66bff03ff71ad73b89277fc892a294a5d71be08b4eb4ac2c431d715b83",
+            "county_to_zip_2023.parquet": "2f7c7564092c43d3a964ecf76bc61083916186d140c3cabed63aba5f91b56f1a",
+            "county_to_zip_2024.parquet": "907321abfa45437a13d4cfa9047b687456e883a096b863c8874d168fb8cc57c0",
+            "county_to_zip_2025.parquet": "e5008518558a4aca7ae2d40226f56bdcdcdf01bdb3879241a0b8d69a14097239"
+        },
+        "PUMA": {
+            "geocorr_puma_2010_to_county_2020_with_afact2.csv": "b66f23aa27be3daf1f6456607478848df3383d4c7111d6f935f792fc9542b807",
+            "puma2010-to-puma2020.csv": "bc6d0116aa39ea3b2af85095b49bac00f4612f591d2f4684cc2cc872c210a666"
+        },
+        "zip_to_county": {
+            "zip_to_county_2016.parquet": "982913988b4915e9c4787d4f639985635cae938b447944e0ef04868bb5c784d1",
+            "zip_to_county_2017.parquet": "e8dd16dcee07b3a6b52a30c49157d707a1f1e8f1cabf7da126cb094e88cfd0eb",
+            "zip_to_county_2018.parquet": "8ab399f97bb8d81eaabca7adf512e801c8958745fe6f0e82992313fbcfaaa942",
+            "zip_to_county_2019.parquet": "78ac9cdda7c5e00b82315ceaece4f3722da905e2ec4f68774a135d327c257435",
+            "zip_to_county_2020.parquet": "17f518b2a6de5ee58b134c0dc5fbf10f6d48c45b14fdbfcfe58a4269e08eaee2",
+            "zip_to_county_2021.parquet": "655c6ce8b257e942ac1c7dfa068d93ad2a1f139c9cd0c110efbce752ab27cea4",
+            "zip_to_county_2022.parquet": "9f125642aeb5aba3c8f2bdf616c186f975fc6afc1b5543d55204ad9b2a10b252",
+            "zip_to_county_2023.parquet": "89f50d28acb226a5777ededc1f8301dc1f028c65dd04b860355d23efe24b89c5",
+            "zip_to_county_2024.parquet": "8d76e09fa52923c0901c0afad26ba2d32075210bb0ab5b82caa14ce5465b3d80",
+            "zip_to_county_2025.parquet": "2683272f9759646d0a6234de06076f274852b85fcd2fd9e3d2dfcb3aa10def80"
+        }
+    },
+    "geo": {
+        "cb_2020_us_county_5m.zip": "187e7118304428e5450083beb375e67c2c516c58a01ce52db95aaf24f18df3ba",
+        "cb_2020_us_state_5m.zip": "aedc60e0d1924a9030ee6d39ff0ed27ad7d1b0bc86807ea809391a6b9008ffb3",
+        "cb_2024_us_county_5m.zip": "a867f8734059b45d1d54a0ba56189dd7e73c42eb451418fa56de44c35232614b",
+        "cb_2024_us_state_5m.zip": "c9db0e395c11a1f94a8017fde4f4c7cbee1dca6eb37ba8f1ccaab927df70885f"
+    },
+    "pop": {
+        "county_cc": {
+            "county_pop_2016.parquet": "74caad19bf5eed856ad9b6f63c65f7fceca612dec680d0768890de2265116607",
+            "county_pop_2017.parquet": "d93d027929861e115cf34b15f1ff7c697c8eaa327b73cd8132710a11860a63d5",
+            "county_pop_2018.parquet": "be3d3bab642a9f6f111c792a431f940b1753373194993885e4d47c136feed91a",
+            "county_pop_2019.parquet": "98801f118cd795c026a8269d5ac6674f98b9d47e0207c6a2721a5b7f4b6e5c08",
+            "county_pop_2020.parquet": "f1e4f282d297dc5498b6f839412c0815ca6f9e0a15d83d5d3867f2d70aa8413d",
+            "county_pop_2021.parquet": "3af369564ebb0e1fda25b440e5bf133ecb2d2eab60ab40f5db1f0a0955db713b",
+            "county_pop_2022.parquet": "977856eb5fffd508442ccedaa54c92e338b037135e5a9be55a03c7132863d9ca",
+            "county_pop_2023.parquet": "a4d66c302a557c1565ec9f43bad5ea9d4267576d1fbd17d8939e5a858a3d73e7",
+            "county_pop_2024.parquet": "12b16c7c20329a3df2f4120f6ec9a9a7313147fad0fd03bc360b1de5769c8abd"
+        },
+        "state": {
+            "state_pop_2016.parquet": "bac51c5ba4a9ff7305e92b3b2804c854fc20b9cbcf01156e5439d92668c0c81e",
+            "state_pop_2017.parquet": "6fb950b1b78409af8130317b08b437b742c0906ff9d5c38655c1189103b8dddc",
+            "state_pop_2018.parquet": "913fca35299028a842325000e58e33cd3912c1e900d480f00b468095398e57f8",
+            "state_pop_2019.parquet": "7ca2c87065f24857178bb33a7512cb799a92890596bac6fff1cbeb3c69f6fc36",
+            "state_pop_2020.parquet": "275b861e07f1c2327fb5382a28e84a5fb7ac4f896ae9f91b06612f6197af9611",
+            "state_pop_2021.parquet": "8b47a5c9fdca838954c8ddac8265ad00d590281c7b444019070c81b9942a727e",
+            "state_pop_2022.parquet": "ea113b3766c44bbf250e01b0b9509e810590119b3b9470b13dc347d43aed042b",
+            "state_pop_2023.parquet": "e96a982342510fe6a1ba90fc85a9bd6fbdd8687bceaf76e6e117606429d2d160",
+            "state_pop_2024.parquet": "b79bca471a68b8c3742ec30d41a2b65ab1227152e81239faf00763188752c6ff"
+        }
+    },
+    "county_groups.parquet": "7d7c150b5efd5596e0eaaed27abd6dc86137f08ff677c2606d402b9d165b87fa",
+    "state.txt": "bea4e03f71a1fa0045ae732aabad11fa541e5932b071c2369bb0d325e8cba5a0"
+}
diff --git a/src/kintsugi/_data/data.py b/src/kintsugi/_data/data.py
new file mode 100644
index 0000000..8dfca39
--- /dev/null
+++ b/src/kintsugi/_data/data.py
@@ -0,0 +1,144 @@
+import hashlib
+import importlib.resources
+import json
+import logging
+import os
+import shutil
+import tempfile
+from functools import lru_cache
+from pathlib import Path
+from typing import cast
+
+import requests
+from platformdirs import user_cache_path
+from requests.adapters import HTTPAdapter, Retry
+
+logger = logging.getLogger("kintsugi")
+logger.addHandler(logging.NullHandler())
+
+BASE_URL = "https://raw.githubusercontent.com/winter-again/kintsugi-data/main/data"
+
+
+class GetDatasetError(Exception):
+    pass
+
+
+type ChecksumTable = dict[str, ChecksumTable]
+
+
+@lru_cache(maxsize=1)
+def load_checksums() -> ChecksumTable:
+    with (importlib.resources.files("kintsugi") / "_data/checksums.json").open() as f:
+        checksums = cast(ChecksumTable, json.load(f))
+
+    return checksums
+
+
+def get_dataset(file_name: str) -> Path:
+    """
+    Ensure valid dataset file is present in cache and return its path
+    """
+    file_path_keys = file_name.split("/")
+    checksums = load_checksums()
+    for key in file_path_keys:
+        try:
+            checksums = checksums[key]
+        except KeyError:
+            raise ValueError(f"{file_name} not in dataset")
+
+    file_cached = get_cache_dir() / file_name
+    if not file_cached.is_file() or not file_valid(file_name, file_cached):
+        logger.debug(
+            f"{file_name} not in cache or cached file is invalid. Getting file from kintsugi-data repository."
+        )
+        try:
+            download_dataset(file_name, file_cached)
+        except GetDatasetError as err:
+            logger.exception(f"Unable to get dataset: {file_name}", exc_info=err)
+            raise
+    else:
+        logger.debug(f"{file_name} already exists in cache")
+
+    return file_cached
+
+
+def get_cache_dir() -> Path:
+    """
+    Ensure cache directory exists and return its absolute path
+    """
+    cache_dir = (
+        Path(os.getenv("KINTSUGI_CACHE", user_cache_path("kintsugi-data")))
+        .expanduser()
+        .resolve()
+    )
+    try:
+        cache_dir.mkdir(parents=True, exist_ok=True)
+    except (OSError, PermissionError) as err:
+        logger.exception(
+            f"Error while setting up cache directory at {cache_dir}", exc_info=err
+        )
+        raise
+
+    return cache_dir
+
+
+def download_dataset(file_name: str, file_cached: Path) -> None:
+    """
+    Download dataset file, verify integrity via checksum, and save to cache
+    """
+    url = f"{BASE_URL}/{file_name}"
+    # 5 total retries, sleep for [0.0, 0.2, 0.4, 0.8, ...] seconds between retries after second try
+    retries = Retry(
+        total=5,
+        backoff_factor=0.1,
+        status_forcelist=[500, 502, 503, 504],
+        raise_on_status=True,
+    )
+    adapter = HTTPAdapter(max_retries=retries)
+
+    with requests.Session() as s:
+        s.mount("https://raw.githubusercontent.com/", adapter)
+        try:
+            res = s.get(url)
+        except requests.exceptions.RetryError as err:
+            raise GetDatasetError(
+                f"Error getting data file {file_name} despite retry strategy"
+            ) from err
+
+    with tempfile.NamedTemporaryFile("wb", delete_on_close=False) as fp:
+        fp.write(res.content)
+        fp.close()
+
+        tmp_file = Path(fp.name)
+        if not file_valid(file_name, tmp_file):
+            raise GetDatasetError(
+                f"Checksum for {file_name} is invalid. File not copied to cache"
+            )
+
+        logger.info(f"File {file_name} valid. Copying to cache")
+        file_cached.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copyfile(tmp_file, file_cached)
+
+
+def file_valid(file_name: str, file: Path) -> bool:
+    """
+    Validate dataset file via sha256 checksum
+    """
+    with open(file, "rb") as f:
+        digest = hashlib.file_digest(f, "sha256")
+
+    checksum = get_checksum(file_name)
+    return digest.hexdigest() == checksum
+
+
+def get_checksum(file_name: str) -> str:
+    file_path_keys = file_name.split("/")
+    checksums = load_checksums()
+    for key in file_path_keys:
+        try:
+            checksums = checksums[key]
+        except KeyError:
+            raise ValueError(f"{file_name} not in dataset")
+
+    assert isinstance(checksums, str)
+    return checksums
diff --git a/src/kintsugi/county_groups.py b/src/kintsugi/county_groups.py
index f8a9a42..bc52dd8 100644
--- a/src/kintsugi/county_groups.py
+++ b/src/kintsugi/county_groups.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import polars as pl
 
-from ._data import get_dataset
+from ._data.data import get_dataset
 from .population import county_pop
 
 
diff --git a/src/kintsugi/crosswalk.py b/src/kintsugi/crosswalk.py
index 6c92501..b6e4774 100644
--- a/src/kintsugi/crosswalk.py
+++ b/src/kintsugi/crosswalk.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import polars as pl
 
-from ._data import get_dataset
+from ._data.data import get_dataset
 
 
 @overload
diff --git a/src/kintsugi/geo.py b/src/kintsugi/geo.py
index 3075786..bd1fc53 100644
--- a/src/kintsugi/geo.py
+++ b/src/kintsugi/geo.py
@@ -2,7 +2,7 @@
 
 import geopandas as gpd
 
-from ._data import get_dataset
+from ._data.data import get_dataset
 
 type ShapefileYear = Literal[2020, 2024]
 
diff --git a/src/kintsugi/metadata.py b/src/kintsugi/metadata.py
index b15962a..828bbcc 100644
--- a/src/kintsugi/metadata.py
+++ b/src/kintsugi/metadata.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import polars as pl
 
-from ._data import get_dataset
+from ._data.data import get_dataset
 
 
 @overload
diff --git a/src/kintsugi/neighbors.py b/src/kintsugi/neighbors.py
index 8bc9842..13fde41 100644
--- a/src/kintsugi/neighbors.py
+++ b/src/kintsugi/neighbors.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import polars as pl
 
-from ._data import get_dataset
+from ._data.data import get_dataset
 from .geo import ShapefileYear, county_geo
 
 type NeighborsYear = Literal[2010, 2023, 2024, 2025]
diff --git a/src/kintsugi/population.py b/src/kintsugi/population.py
index 558201c..5d5b57b 100644
--- a/src/kintsugi/population.py
+++ b/src/kintsugi/population.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import polars as pl
 
-from ._data import get_dataset
+from ._data.data import get_dataset
 
 type VintageYear = Literal[
     2016,
diff --git a/tests/_data_test.py b/tests/_data/data_test.py
similarity index 56%
rename from tests/_data_test.py
rename to tests/_data/data_test.py
index 4578c50..563809b 100644
--- a/tests/_data_test.py
+++ b/tests/_data/data_test.py
@@ -7,13 +7,15 @@
 import requests
 from platformdirs import user_cache_path
 
-from kintsugi._data import (
-    DATASETS,
+from kintsugi._data.data import (
+    ChecksumTable,
     GetDatasetError,
     download_dataset,
     file_valid,
     get_cache_dir,
+    get_checksum,
     get_dataset,
+    load_checksums,
 )
 
 
@@ -25,6 +27,25 @@ def _get_cache_dir() -> Path:
     yield _get_cache_dir
 
 
+def test_get_checksum() -> None:
+    with patch("kintsugi._data.data.load_checksums") as mock_load_checksums:
+        mock_load_checksums.return_value = {
+            "test": {
+                "diamonds.csv": "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+            }
+        }
+
+        assert (
+            get_checksum("test/diamonds.csv")
+            == "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+        )
+
+
+def test_get_checksum_not_exists() -> None:
+    with pytest.raises(ValueError, match="not in dataset$"):
+        get_checksum("foo/bar.csv")
+
+
 def test_get_dataset(mock_get_cache_dir: Callable[[], Path]) -> None:
     """
     Test basic dataset fetch. Cached file doesn't exist.
@@ -34,14 +55,15 @@ def test_get_dataset(mock_get_cache_dir: Callable[[], Path]) -> None:
     assert not file_cached.is_file()
 
     with (
-        patch.dict(
-            "kintsugi._data.DATASETS",
-            {
-                file_name: "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
-            },
-        ),
-        patch("kintsugi._data.get_cache_dir", mock_get_cache_dir),
+        patch("kintsugi._data.data.load_checksums") as mock_load_checksums,
+        patch("kintsugi._data.data.get_cache_dir", mock_get_cache_dir),
     ):
+        mock_load_checksums.return_value = {
+            "test": {
+                "diamonds.csv": "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+            }
+        }
+
         assert get_dataset(file_name) == file_cached
         assert file_cached.is_file()
 
@@ -50,8 +72,24 @@ def test_get_all_datasets(mock_get_cache_dir: Callable[[], Path]) -> None:
     """
     Test fetching all real datasets. Cached files don't exist.
     """
-    with patch("kintsugi._data.get_cache_dir", mock_get_cache_dir):
-        for file_name in DATASETS.keys():
+
+    def flatten_keys(
+        d: ChecksumTable, k_parent: str = "", sep: str = "/"
+    ) -> dict[str, str]:
+        items: list[tuple[str, str]] = []
+        for k, v in d.items():
+            k_next = k_parent + sep + k if k_parent else k
+            if isinstance(v, str):
+                items.append((k_next, v))
+            else:
+                items.extend(flatten_keys(v, k_next, sep).items())
+
+        return dict(items)
+
+    datasets = flatten_keys(load_checksums())
+
+    with patch("kintsugi._data.data.get_cache_dir", mock_get_cache_dir):
+        for file_name in datasets.keys():
             file_cached = mock_get_cache_dir() / file_name
             assert not file_cached.is_file()
             assert get_dataset(file_name) == file_cached
@@ -66,15 +104,16 @@ def test_get_dataset_no_cached(mock_get_cache_dir: Callable[[], Path]) -> None:
     file_cached = mock_get_cache_dir() / file_name
 
     with (
-        patch.dict(
-            "kintsugi._data.DATASETS",
-            {
-                file_name: "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
-            },
-        ),
-        patch("kintsugi._data.get_cache_dir", mock_get_cache_dir),
-        patch("kintsugi._data.download_dataset") as mock_download_dataset,
+        patch("kintsugi._data.data.load_checksums") as mock_load_checksums,
+        patch("kintsugi._data.data.get_cache_dir", mock_get_cache_dir),
+        patch("kintsugi._data.data.download_dataset") as mock_download_dataset,
     ):
+        mock_load_checksums.return_value = {
+            "test": {
+                "diamonds.csv": "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+            }
+        }
+
         get_dataset(file_name)
         mock_download_dataset.assert_called_once_with(file_name, file_cached)
 
@@ -92,15 +131,16 @@ def test_get_dataset_cache_invalid(mock_get_cache_dir: Callable[[], Path]) -> No
     assert file_cached.is_file()
 
     with (
-        patch.dict(
-            "kintsugi._data.DATASETS",
-            {
-                file_name: "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
-            },
-        ),
-        patch("kintsugi._data.get_cache_dir", mock_get_cache_dir),
-        patch("kintsugi._data.download_dataset") as mock_download_dataset,
+        patch("kintsugi._data.data.load_checksums") as mock_load_checksums,
+        patch("kintsugi._data.data.get_cache_dir", mock_get_cache_dir),
+        patch("kintsugi._data.data.download_dataset") as mock_download_dataset,
     ):
+        mock_load_checksums.return_value = {
+            "test": {
+                "diamonds.csv": "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+            }
+        }
+
         get_dataset(file_name)
         mock_download_dataset.assert_called_once_with(file_name, file_cached)
 
@@ -134,14 +174,15 @@ def test_download_dataset(mock_get_cache_dir: Callable[[], Path]) -> None:
     file_cached = mock_get_cache_dir() / file_name
 
     with (
-        patch.dict(
-            "kintsugi._data.DATASETS",
-            {
-                file_name: "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
-            },
-        ),
-        patch("kintsugi._data.get_cache_dir", mock_get_cache_dir),
+        patch("kintsugi._data.data.load_checksums") as mock_load_checksums,
+        patch("kintsugi._data.data.get_cache_dir", mock_get_cache_dir),
     ):
+        mock_load_checksums.return_value = {
+            "test": {
+                "diamonds.csv": "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+            }
+        }
+
         download_dataset(file_name, file_cached)
         assert file_cached.is_file()
 
@@ -151,9 +192,9 @@ def test_download_dataset_retry_error(mock_get_cache_dir: Callable[[], Path]) ->
     Test that RetryError during download_dataset() leads to GetDatasetError
     """
     with (
-        patch("kintsugi._data.get_cache_dir", mock_get_cache_dir),
+        patch("kintsugi._data.data.get_cache_dir", mock_get_cache_dir),
         patch(
-            "kintsugi._data.requests.Session.get",
+            "kintsugi._data.data.requests.Session.get",
             side_effect=requests.exceptions.RetryError,
         ),
         pytest.raises(GetDatasetError, match="^Error getting data file"),
@@ -171,10 +212,11 @@ def test_file_valid_content_correct(mock_get_cache_dir: Callable[[], Path]) -> N
     with open(file_cached, "w") as f:
         f.write("Valid content for test.txt")
 
-    with patch.dict(
-        "kintsugi._data.DATASETS",
-        {file_name: "9227f3934df8fd3b9cde4a201195921c90b3d15d174af1a6831b11cdc78ee5b8"},
-    ):
+    with patch("kintsugi._data.data.load_checksums") as mock_load_checksum:
+        mock_load_checksum.return_value = {
+            file_name: "9227f3934df8fd3b9cde4a201195921c90b3d15d174af1a6831b11cdc78ee5b8"
+        }
+
         assert file_valid(file_name, file_cached) is True
 
 
@@ -188,8 +230,11 @@ def test_file_valid_content_incorrect(mock_get_cache_dir: Callable[[], Path]) ->
     with open(file_cached, "w") as f:
         f.write("Invalid content for diamonds.csv")
 
-    with patch.dict(
-        "kintsugi._data.DATASETS",
-        {file_name: "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"},
-    ):
+    with patch("kintsugi._data.data.load_checksums") as mock_load_checksums:
+        mock_load_checksums.return_value = {
+            "test": {
+                "diamonds.csv": "9574730b03aba241d899c4a97511c5061b19358fab89510774fb6c24168345c4"
+            }
+        }
+
         assert file_valid(file_name, file_cached) is False
diff --git a/uv.lock b/uv.lock
index 9ec42ad..b3323e7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -200,7 +200,7 @@ wheels = [
 
 [[package]]
 name = "kintsugi"
-version = "0.9.0"
+version = "0.10.0"
 source = { editable = "." }
 dependencies = [
     { name = "pandas" },