Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,23 @@ if [[ $error -eq 1 ]]; then
fi

echo
echo "All dependencies installed. Run:"
echo " uv sync # Install Python packages"
echo " direnv allow # Enable automatic venv activation"
echo "All prerequisites found. Setting up project..."

# Install Python packages
uv sync
echo "[OK] Python packages installed"

# Copy workflow config template if it doesn't exist yet
if [[ ! -f workflow/config.yaml ]]; then
cp workflow/config.template.yaml workflow/config.yaml
echo "[OK] Created workflow/config.yaml from template (edit as needed)"
else
echo "[OK] workflow/config.yaml already exists"
fi

# Install pre-commit hooks
uv run pre-commit install
echo "[OK] Pre-commit hooks installed"

echo
echo "Setup complete. Run 'direnv allow' to enable automatic venv activation."
33 changes: 21 additions & 12 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
from os.path import join as pjoin

# You can use config file to set various variables including parameters
# instead defining everything here.
#
# configfile: "config.yaml" # uncomment to use config file.
configfile: "config.yaml"

DATA_DIR = "../data/"
DATA_DIR = config.get("data_dir", "../data/")
RAW_DATA_DIR = pjoin(DATA_DIR, "raw")
DERIVED_DATA_DIR = pjoin(DATA_DIR, "derived")
RESULTS_DIR = "../results/"


"""
rule all:
input:
FIG1
pjoin(RESULTS_DIR, "summary.txt"),


rule process_data:
"""Example rule: process raw data into a derived dataset."""
input:
pjoin(RAW_DATA_DIR, "{dataset}.csv"),
output:
pjoin(DERIVED_DATA_DIR, "{dataset}.processed.csv"),
shell:
"python scripts/process_data.py {input} {output}"


rule figure1:
rule summarize:
"""Example rule: generate a summary from processed data."""
input:
SOME_DATASET
pjoin(DERIVED_DATA_DIR, "example.processed.csv"),
output:
FIG1
pjoin(RESULTS_DIR, "summary.txt"),
shell:
"python scripts/plot_fig1.py {input} {output}"
"""
"python scripts/summarize.py {input} {output}"
25 changes: 25 additions & 0 deletions workflow/scripts/process_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Example script: process a raw CSV dataset.

Usage (called by Snakemake):
python scripts/process_data.py <input.csv> <output.csv>

Replace this with your own data-processing logic.
"""

import csv
import sys


def main(input_path: str, output_path: str) -> None:
with open(input_path, newline="") as fin, open(output_path, "w", newline="") as fout:
reader = csv.DictReader(fin)
assert reader.fieldnames is not None
writer = csv.DictWriter(fout, fieldnames=reader.fieldnames)
writer.writeheader()
for row in reader:
# Example: pass rows through unchanged. Add your processing here.
writer.writerow(row)


if __name__ == "__main__":
main(sys.argv[1], sys.argv[2])
25 changes: 25 additions & 0 deletions workflow/scripts/summarize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Example script: summarize a processed CSV dataset.

Usage (called by Snakemake):
python scripts/summarize.py <input.csv> <output.txt>

Replace this with your own analysis/summarization logic.
"""

import csv
import sys


def main(input_path: str, output_path: str) -> None:
with open(input_path, newline="") as fin:
reader = csv.DictReader(fin)
rows = list(reader)

with open(output_path, "w") as fout:
fout.write(f"Total rows: {len(rows)}\n")
if rows:
fout.write(f"Columns: {', '.join(rows[0].keys())}\n")


if __name__ == "__main__":
main(sys.argv[1], sys.argv[2])
Loading