From dd8c598f476db46bde7ec947ffaa56dcae563119 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 15 Feb 2026 01:53:25 +0000 Subject: [PATCH] setup.sh runs install steps; Snakefile has working example rules - setup.sh now runs uv sync, copies config template to config.yaml, and installs pre-commit hooks after prerequisite checks pass - Replace commented-out Snakefile placeholder with functional example rules (process_data, summarize) that use config.yaml - Add example workflow scripts (process_data.py, summarize.py) so the pipeline is runnable out of the box https://claude.ai/code/session_012TNv3qDVee7nBFUViVtRvM --- setup.sh | 23 +++++++++++++++++++--- workflow/Snakefile | 33 ++++++++++++++++++++------------ workflow/scripts/process_data.py | 25 ++++++++++++++++++++++++ workflow/scripts/summarize.py | 25 ++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 15 deletions(-) create mode 100644 workflow/scripts/process_data.py create mode 100644 workflow/scripts/summarize.py diff --git a/setup.sh b/setup.sh index ee31faf..9ddace6 100755 --- a/setup.sh +++ b/setup.sh @@ -42,6 +42,23 @@ if [[ $error -eq 1 ]]; then fi echo -echo "All dependencies installed. Run:" -echo " uv sync # Install Python packages" -echo " direnv allow # Enable automatic venv activation" +echo "All prerequisites found. Setting up project..." + +# Install Python packages +uv sync +echo "[OK] Python packages installed" + +# Copy workflow config template if it doesn't exist yet +if [[ ! -f workflow/config.yaml ]]; then + cp workflow/config.template.yaml workflow/config.yaml + echo "[OK] Created workflow/config.yaml from template (edit as needed)" +else + echo "[OK] workflow/config.yaml already exists" +fi + +# Install pre-commit hooks +uv run pre-commit install +echo "[OK] Pre-commit hooks installed" + +echo +echo "Setup complete. Run 'direnv allow' to enable automatic venv activation." diff --git a/workflow/Snakefile b/workflow/Snakefile index 46492a7..4318bf9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1,24 +1,33 @@ from os.path import join as pjoin -# You can use config file to set various variables including parameters -# instead defining everything here. -# -# configfile: "config.yaml" # uncomment to use config file. +configfile: "config.yaml" -DATA_DIR = "../data/" +DATA_DIR = config.get("data_dir", "../data/") RAW_DATA_DIR = pjoin(DATA_DIR, "raw") DERIVED_DATA_DIR = pjoin(DATA_DIR, "derived") +RESULTS_DIR = "../results/" + -""" rule all: input: - FIG1 + pjoin(RESULTS_DIR, "summary.txt"), + + +rule process_data: + """Example rule: process raw data into a derived dataset.""" + input: + pjoin(RAW_DATA_DIR, "{dataset}.csv"), + output: + pjoin(DERIVED_DATA_DIR, "{dataset}.processed.csv"), + shell: + "python scripts/process_data.py {input} {output}" + -rule figure1: +rule summarize: + """Example rule: generate a summary from processed data.""" input: - SOME_DATASET + pjoin(DERIVED_DATA_DIR, "example.processed.csv"), output: - FIG1 + pjoin(RESULTS_DIR, "summary.txt"), shell: - "python scripts/plot_fig1.py {input} {output}" -""" + "python scripts/summarize.py {input} {output}" diff --git a/workflow/scripts/process_data.py b/workflow/scripts/process_data.py new file mode 100644 index 0000000..07da9fd --- /dev/null +++ b/workflow/scripts/process_data.py @@ -0,0 +1,25 @@ +"""Example script: process a raw CSV dataset. + +Usage (called by Snakemake): + python scripts/process_data.py + +Replace this with your own data-processing logic. +""" + +import csv +import sys + + +def main(input_path: str, output_path: str) -> None: + with open(input_path, newline="") as fin, open(output_path, "w", newline="") as fout: + reader = csv.DictReader(fin) + assert reader.fieldnames is not None + writer = csv.DictWriter(fout, fieldnames=reader.fieldnames) + writer.writeheader() + for row in reader: + # Example: pass rows through unchanged. Add your processing here. + writer.writerow(row) + + +if __name__ == "__main__": + main(sys.argv[1], sys.argv[2]) diff --git a/workflow/scripts/summarize.py b/workflow/scripts/summarize.py new file mode 100644 index 0000000..7df8e1e --- /dev/null +++ b/workflow/scripts/summarize.py @@ -0,0 +1,25 @@ +"""Example script: summarize a processed CSV dataset. + +Usage (called by Snakemake): + python scripts/summarize.py + +Replace this with your own analysis/summarization logic. +""" + +import csv +import sys + + +def main(input_path: str, output_path: str) -> None: + with open(input_path, newline="") as fin: + reader = csv.DictReader(fin) + rows = list(reader) + + with open(output_path, "w") as fout: + fout.write(f"Total rows: {len(rows)}\n") + if rows: + fout.write(f"Columns: {', '.join(rows[0].keys())}\n") + + +if __name__ == "__main__": + main(sys.argv[1], sys.argv[2])