yy · yy · Feb 15, 2026 · Feb 15, 2026
diff --git a/setup.sh b/setup.sh
@@ -42,6 +42,23 @@ if [[ $error -eq 1 ]]; then
 fi
 
 echo
-echo "All dependencies installed. Run:"
-echo "  uv sync        # Install Python packages"
-echo "  direnv allow   # Enable automatic venv activation"
+echo "All prerequisites found. Setting up project..."
+
+# Install Python packages
+uv sync
+echo "[OK] Python packages installed"
+
+# Copy workflow config template if it doesn't exist yet
+if [[ ! -f workflow/config.yaml ]]; then
+    cp workflow/config.template.yaml workflow/config.yaml
+    echo "[OK] Created workflow/config.yaml from template (edit as needed)"
+else
+    echo "[OK] workflow/config.yaml already exists"
+fi
+
+# Install pre-commit hooks
+uv run pre-commit install
+echo "[OK] Pre-commit hooks installed"
+
+echo
+echo "Setup complete. Run 'direnv allow' to enable automatic venv activation."
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -1,24 +1,33 @@
 from os.path import join as pjoin
 
-# You can use config file to set various variables including parameters
-# instead defining everything here.
-#
-# configfile: "config.yaml"   # uncomment to use config file.
+configfile: "config.yaml"
 
-DATA_DIR = "../data/"
+DATA_DIR = config.get("data_dir", "../data/")
 RAW_DATA_DIR = pjoin(DATA_DIR, "raw")
 DERIVED_DATA_DIR = pjoin(DATA_DIR, "derived")
+RESULTS_DIR = "../results/"
+
 
-"""
 rule all:
     input:
-        FIG1
+        pjoin(RESULTS_DIR, "summary.txt"),
+
+
+rule process_data:
+    """Example rule: process raw data into a derived dataset."""
+    input:
+        pjoin(RAW_DATA_DIR, "{dataset}.csv"),
+    output:
+        pjoin(DERIVED_DATA_DIR, "{dataset}.processed.csv"),
+    shell:
+        "python scripts/process_data.py {input} {output}"
+
 
-rule figure1:
+rule summarize:
+    """Example rule: generate a summary from processed data."""
     input:
-        SOME_DATASET
+        pjoin(DERIVED_DATA_DIR, "example.processed.csv"),
     output:
-        FIG1
+        pjoin(RESULTS_DIR, "summary.txt"),
     shell:
-        "python scripts/plot_fig1.py {input} {output}"
-"""
+        "python scripts/summarize.py {input} {output}"
diff --git a/workflow/scripts/process_data.py b/workflow/scripts/process_data.py
@@ -0,0 +1,25 @@
+"""Example script: process a raw CSV dataset.
+
+Usage (called by Snakemake):
+    python scripts/process_data.py <input.csv> <output.csv>
+
+Replace this with your own data-processing logic.
+"""
+
+import csv
+import sys
+
+
+def main(input_path: str, output_path: str) -> None:
+    with open(input_path, newline="") as fin, open(output_path, "w", newline="") as fout:
+        reader = csv.DictReader(fin)
+        assert reader.fieldnames is not None
+        writer = csv.DictWriter(fout, fieldnames=reader.fieldnames)
+        writer.writeheader()
+        for row in reader:
+            # Example: pass rows through unchanged. Add your processing here.
+            writer.writerow(row)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1], sys.argv[2])
diff --git a/workflow/scripts/summarize.py b/workflow/scripts/summarize.py
@@ -0,0 +1,25 @@
+"""Example script: summarize a processed CSV dataset.
+
+Usage (called by Snakemake):
+    python scripts/summarize.py <input.csv> <output.txt>
+
+Replace this with your own analysis/summarization logic.
+"""
+
+import csv
+import sys
+
+
+def main(input_path: str, output_path: str) -> None:
+    with open(input_path, newline="") as fin:
+        reader = csv.DictReader(fin)
+        rows = list(reader)
+
+    with open(output_path, "w") as fout:
+        fout.write(f"Total rows: {len(rows)}\n")
+        if rows:
+            fout.write(f"Columns: {', '.join(rows[0].keys())}\n")
+
+
+if __name__ == "__main__":
+    main(sys.argv[1], sys.argv[2])