Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
*.fastq.gz
.nextflow*
**/work/*
input/human_genome
nextflow/kneaddata
nextflow/tutorial-inprogress.nf
input/humann_databases/full_chocophlan.v201901_v31.tar.gz
229 changes: 221 additions & 8 deletions components.typ
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

#v(2em)

Kevin Bonham, PhD
Kevin Bonham \
Sagun Maharjan \
Emily Green

2025-07-11
]
Expand All @@ -30,14 +32,15 @@
1. Tasks (processes, rules, etc): single step in a workflow (typically one command)
- inputs
- outputs
- command
- command#pause
2. Workflow: Order of steps (instructions to build computational graph)
- Also a way to find / specify inputs#pause
3. Parameters: variables that may change from run to run
- may be global (eg output directory)
- or task-specific (eg memory to allocate)


== Workflows builds computational graph
== Workflows build computational graph

#let bent-edge(from, to, ..args) = {
let midpoint = (from, 50%, to)
Expand All @@ -50,10 +53,12 @@
edge(..vertices, "-|>", ..args)
}

#set text(16pt)
#text(16pt)[
#diagram(
node-stroke: luma(80%),
node-stroke: blue,
edge-corner-radius: none,
edge-stroke: purple.darken(50%),
label-sep:0.2em,
spacing: (10pt, 47pt),

// Nodes
Expand All @@ -67,7 +72,7 @@
node((0,2), [Task 3 output 1], name: <d>),
node((1,2), [Task 3 output 2], name: <e>),

node((2.5,2), [Task3 output], name: <f>),
node((2.5,2), [Task 4 output], name: <f>),

node((3.5,3), [Report], name: <h>),

Expand All @@ -76,17 +81,225 @@

node((8,2), [outputs...], name: <u>),

edge((-0.3,3),(0.1,3), "-|>"),
node((0.4,3), [Task], stroke:none),
node((0,3.5), [], width:1em,height:1em),
node((0.4,3.5), [File ], stroke: none),

// Edges
bent-edge(<a>, <b>, [_Task 1_]),
bent-edge(<a>, <c>, [_Task 2_]),
bent-edge(<b>, <d>, [_Task 3_]),
bent-edge(<b>, <e>),
edge(<c>, <f>, "-|>", [_Task 4_]),
edge(<f>, <h>, "-|>"),
edge(<e>, <h>, "-|>"),
edge(<d>, <h>, "-|>"),
edge(<x>, <y>, "--|>", [Tasks...]),
edge(<w>, <u>, "--|>", [Tasks...]),
edge(<y>, <h>, "-|>"),
edge(<u>, <h>, "-|>"),
)
]

== In anadama, tasks are added to workflows

#slide(composer: (40%,60%))[
#set text(16pt)
```python
workflow.add_task(
"cat [depends[0]] > [targets[0]]",
depends = input_files,
targets = output_files
)
```

][
#set text(16pt)
#diagram(
node-stroke: blue,
edge-corner-radius: none,
edge-stroke: purple.darken(50%),
label-sep:0.2em,
spacing: (10pt, 47pt),

// Nodes
node((1.5,0), [*input_file*], stroke:3pt, name: <a>),
node((0.5,1), [*output_file*], stroke:3pt, name: <b>),
node((2.5,1), [Task 2 output], name: <c>),
node((0,2), [Task 3 output 1], name: <d>),
node((1,2), [Task 3 output 2], name: <e>),

node((2.5,2), [Task 4 output], name: <f>),
bent-edge(<a>, <b>, [*_task1_*]),
bent-edge(<a>, <c>, [_Task 2_]),
bent-edge(<b>, <d>, [_Task 3_]),
bent-edge(<b>, <e>),
edge(<c>, <f>, "-|>", [_Task 4_]),

node(stroke:teal+2pt, enclose:(<a>,<b>))
)
]
== In nextflow, tasks are `process`es

#slide(composer: (40%,60%))[
#set text(16pt)

```groovy
process task1 {
input:
path input_file

output:
path output_file

shell:
"""
cat $input_file > $output_file
"""
}
```

][
#set text(16pt)
#diagram(
node-stroke: blue,
edge-corner-radius: none,
edge-stroke: purple.darken(50%),
label-sep:0.2em,
spacing: (10pt, 47pt),

// Nodes
node((1.5,0), [*input_file*], stroke:3pt, name: <a>),
node((0.5,1), [*output_file*], stroke:3pt, name: <b>),
node((2.5,1), [Task 2 output], name: <c>),
node((0,2), [Task 3 output 1], name: <d>),
node((1,2), [Task 3 output 2], name: <e>),

node((2.5,2), [Task 4 output], name: <f>),
bent-edge(<a>, <b>, [*_task1_*]),
bent-edge(<a>, <c>, [_Task 2_]),
bent-edge(<b>, <d>, [_Task 3_]),
bent-edge(<b>, <e>),
edge(<c>, <f>, "-|>", [_Task 4_]),

node(stroke:teal+2pt, enclose:(<a>,<b>))
)
]

#slide(composer: (40%,60%))[

#set text(16pt)
```groovy
process task3 {
input:
path in_from_1

output:
path out1
path out2

shell:
"""
cat $input_file > $out1
echo "I'm done!" > $out2
"""
}
```

][
#set text(16pt)
#diagram(
node-stroke: blue,
edge-corner-radius: none,
edge-stroke: purple.darken(50%),
label-sep:0.2em,
spacing: (10pt, 47pt),

// Nodes
node((1.5,0), [Input file 1], name: <a>),
node((0.5,1), [*in_from_1*], name: <b>, stroke:3pt),
node((3.5,1), [Task 2 output], name: <c>),
node((0,2), [*out1*], name: <d>, stroke:3pt),
node((1.5,2), [*out2*], name: <e>, stroke:3pt),

node((3.5,2), [Task 4 output], name: <f>),
bent-edge(<a>, <b>, [_Task 1_]),
bent-edge(<a>, <c>, [_Task 2_]),
bent-edge(<b>, <d>, [*_task3_*], label-pos:0.6),
bent-edge(<b>, <e>),
edge(<c>, <f>, "-|>", [_Task 4_]),
node(stroke:teal+2pt, enclose:(<b>,<d>,<e>))
)
]

== Processes are called like functions

#slide(composer: (43%,50%))[
#set text(15pt)
```groovy
workflow {
input_ch = Channel
.fromPath("inputs/*.txt")

t1_out = task1(input_ch)
t2_out = task2(input_ch)

t3_out = task3(t1_out)
t4_out = task4(t2_out)

report = report_task(t3_out
.collect().map {
t3-> t3[1]
})

}
```

][
#set text(13pt)
#diagram(
node-stroke: blue,
edge-corner-radius: none,
edge-stroke: purple.darken(50%),
label-sep:0.2em,
spacing: (10pt, 47pt),

// Nodes
node((1.5,0), [input_ch[0]], name: <a>),
node((0.5,1), [t1_out], name: <b>),
node((2.5,1), [t2_out], name: <c>),
node((6, 0), [input_ch[n]], name:<w>),

node((5.5, 1), [...], stroke:none),

node((0,2), [t3_out[0]], name: <d>),
node((1,2), [t3_out[1]], name: <e>),

node((2.5,2), [t4_out], name: <f>),

node((2.5,3), [Report], name: <h>),

node((5,0), [input_ch[1]], name: <x>),
node((5,2), [outputs...], name: <y>),

node((6,2), [outputs...], name: <u>),

edge((-0.3,3),(0.1,3), "-|>"),
node((0.4,3), [Task], stroke:none),
node((0,3.5), [], width:1em,height:1em),
node((0.4,3.5), [File ], stroke: none),

// Edges
bent-edge(<a>, <b>, [_Task 1_]),
bent-edge(<a>, <c>, [_Task 2_]),
bent-edge(<b>, <d>, [_Task 3_]),
bent-edge(<b>, <e>),
edge(<c>, <f>, "-|>", [_Task 4_]),
edge(<e>, <h>, "-|>"),
edge(<x>, <y>, "--|>", []),
edge(<w>, <u>, "--|>", []),
edge(<y>, <h>, "-|>"),
edge(<u>, <h>, "-|>"),
)
]


77 changes: 77 additions & 0 deletions nextflow/.tutorial-answers.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl=2

workflow {
reads_ch = Channel.fromPath("../input/*.fastq.gz")

kneaddata_out = kneaddata(reads_ch)
metaphlan_out = metaphlan(kneaddata_out[0], kneaddata_out[1])
humann_out = humann(metaphlan_out[0], metaphlan_out[1], metaphlan_out[2])
}

process kneaddata {
publishDir "kneaddata/"
input:
path file

output:
val sample
path "${sample}_kneaddata.fastq.gz"
path "${sample}_kneaddata*.fastq.gz"
path "${sample}_kneaddata.log"


script:
sample = file.name.replaceAll(".fastq.gz", "")

"""
kneaddata --unpaired $file --output ./ --output-prefix ${sample}_kneaddata \
--reference-db ${params.kneaddata_db}

gzip *.fastq
"""
}

process metaphlan {
publishDir "metaphlan/", mode: copy

input:
val sample
path knead_out

output:
val sample
path knead_out
path "${sample}_profile.tsv"

shell:

"""
metaphlan $knead_out -o ${sample}_profile.tsv \
--input_type fastq
"""
}

process humann {
publishDir "humann/", mode: copy

input:
val sample
path knead_out
path profile

output:
path "${sample}_genefamilies.tsv"
path "${sample}_pathabundance.tsv"
path "${sample}_pathcoverage.tsv"

shell:

"""
humann --input $knead_out -o ./ --taxonomic-profile $profile \
--remove-temp-output --search-mode uniref90 \
--output-basename $sample
"""
}

Loading