diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..997504b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# SCM syntax highlighting & preventing 3-way merges +pixi.lock merge=binary linguist-language=YAML linguist-generated=true -diff diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..15d1f88 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,21 @@ +name: Test + +on: + pull_request: + branches: [main] + +jobs: + test-notebooks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: prefix-dev/setup-pixi@v0 + with: + # add separate env for each test + environments: >- + circlize + cache: true + + - name: test-circlize + run: pixi run -e circlize test-circlize diff --git a/.gitignore b/.gitignore index 5f26d04..a66edbe 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ *.Rproj output/* *.tar.gz +# pixi environments +.pixi/* +!.pixi/config.toml diff --git a/README.md b/README.md index 0a4cc45..f981bf6 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ This repository is a collection of reusable, self-contained code chunks and exam - [Plot sequence logos with `logomaker` (python)](https://MPUSP.github.io/bioinfo-code-chunks/plot_logos.html) - [Plot coverage tracks (R)](https://MPUSP.github.io/bioinfo-code-chunks/plot_coverage.nb.html) - [Plot Circos genomes with `pycircos` (python)](https://MPUSP.github.io/bioinfo-code-chunks/plot_circos.html) +- [Plot Circos genomes with `circlize` (R)](https://MPUSP.github.io/bioinfo-code-chunks/plot_circos.nb.html) - [Homology search for protein sequences (python)](https://MPUSP.github.io/bioinfo-code-chunks/homology_search.html) - [ENA fastq data submission (python)](https://MPUSP.github.io/bioinfo-code-chunks/ena_submission.html) diff --git a/docs/plot_circos.nb.html b/docs/plot_circos.nb.html new file mode 100644 index 0000000..1ad8acb --- /dev/null +++ b/docs/plot_circos.nb.html @@ -0,0 +1,2056 @@ + + + + +
+ + + + + + + + + +circlizecirclize is a powerful R package to plot circular
+visualizations, so called ‘Circos’ plotscirclize can be installed from within Rtidyverse,
+GenomicFeatures, GenomicRanges, and
+rtracklayerinstall.packages("circlize")
+
+
+
+pixi init
+pixi add r-circlize
+...
+
+
+
+pixi run test-notebook
+
+
+
+pixi shell --environment circlize
+
+
+
+suppressPackageStartupMessages({
+ library(tidyverse)
+ library(circlize)
+ library(Biostrings)
+ library(GenomicRanges)
+ library(GenomicFeatures)
+ library(rtracklayer)
+})
+
+
+
+validate_genomic_input takes as input two data frames,
+one with genomic coordinates and one with chromosome information, and
+checks if coordinates correspondplot_circlize takes as input two objects, a DNA
+sequence as DNAStringSet and a GRangesList
+with genomic featuressource("../source/circlize.R")
+
+
+
+*.fasta and a *.gff file
+corresponding to the same genome assemblyfasta <- Biostrings::readDNAStringSet("../data/spyogenes_genome.fna")
+gff <- rtracklayer::import("../data/spyogenes_genome.gff")
+
+names(fasta) <- stringr::str_split_i(names(fasta), "[ \\|]", 1)
+
+
+
+# genome info
+df_chroms <- data.frame(
+ name = names(fasta),
+ start = rep(0, length(fasta)),
+ end = width(fasta)
+)
+
+# gene annotation
+genes <- gff[gff$type == "gene"]
+df_genes <- tibble(
+ chr = as.character(seqnames(genes)),
+ start = start(genes),
+ end = end(genes)
+)
+
+# validate if genomic coordinates from annotation and chromosome info correspond
+df_genes <- validate_genomic_input(df_genes, df_chroms)
+
+
+
+extra <- list(
+ experiment = list(
+ data = data.frame(
+ chr = "NC_002737.2",
+ start = df_genes$start[seq(1, nrow(df_genes), by = 10)],
+ end = df_genes$end[seq(1, nrow(df_genes), by = 10)],
+ value = rnorm(ceiling(nrow(df_genes) / 10), mean = 10, sd = 5)
+ ),
+ type = "points",
+ color = "#96389f",
+ height = 0.07,
+ ylim = c(0, 20)
+ )
+)
+
+extra[["experiment2"]] <- list(
+ data = data.frame(
+ chr = "NC_002737.2",
+ start = df_genes$start[seq(1, nrow(df_genes), by = 10)],
+ end = df_genes$end[seq(1, nrow(df_genes), by = 10)],
+ value = rep(1, ceiling(nrow(df_genes) / 10))
+ ),
+ type = "rect",
+ color = sample(colors(), ceiling(nrow(df_genes) / 10))
+)
+
+
+
+png("../output/circlize.png", width = 2000, height = 2000, res = 300)
+plot_circlize(fasta, gff, extra = extra)
+dev.off()
+
+
+
+
+
+
+