Skip to content

Commit f9221b0

Browse files
committed
documentation
1 parent 8840ccd commit f9221b0

16 files changed

Lines changed: 148 additions & 270 deletions

vagrantDNA/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: vagrantDNA
22
Type: Package
33
Title: Estimating the proportion of vagrant DNA in a genome
4-
Version: 1.0.1
4+
Version: 1.1.0
55
Author: Richard Nichols and Hannes Becher
66
Maintainer: The package maintainer <r.a.nichols@qmul.ac.uk>
77
Description: The package two functions rainbowPlot and divEst. They produce two different estimates of the proportion of DNA from a particular vagrant genome. They exploit low coverge data from multiple individuals in which the vagrant DNA has taken residence (see Becher and Nichols 2022).

vagrantDNA/R/Rainbow_Plot.R

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,20 @@
2121
#'
2222
#' @param data A data.frame with at least the following columns.
2323
#' \describe{
24-
#' \item{AltProp}{A numberic vector giving the proportion (of reads mapping to the exogenous genome) that carry the non-standard alleles thought to be in the vagrant copies}
25-
#' \item{Position}{A factor (or structure that can be coerced to a factor),
26-
#' giving a unique name for each SNP location.}
2724
#' \item{Sample}{A factor (or structure that can be coerced to a factor),
2825
#' giving a unique name for each sample.}
26+
#' \item{Position}{A factor (or structure that can be coerced to a factor),
27+
#' giving a unique name for each SNP location.}
28+
#' \item{AltProp}{A numeric vector giving the proportion (of reads mapping to
29+
#' the exogenous genome) that carry the non-standard alleles (thought to be in
30+
#' the vagrant copies)}
31+
#' \item{DP}{A numeric vector giving the mapping depth at each site}
32+
#' \item{nMapped}{A numeric vector giving the number of base pairs in this
33+
#' sample's sequencing data that were successfully aligned to the extranuclear
34+
#' reference}
35+
#' \item{nTot}{A numeric vector giving the total number of base pairs of the sample's
36+
#' mapping data (ideally after quality control, filtering, read trimming, ect.)}
2937
#' \item{ylog}{A numeric vector giving the log(AltProp)}
30-
#' \item{DP}{A numeric vector giving the mapping depth of at each sites}
31-
#' \item{nMapped}{A numeric vector giving the mapping depth of at each sites}
32-
#' \item{xnqlogis}{A numeric vector giving log(m/N);
33-
#' where m is the number of nucleotides mapping to the exogenous genome and
34-
#' N is the remaining nucleotides in the sequencing data.}
3538
#' }
3639
#' @param nloci The number of loci to be selected for the analysis. Default, 400.
3740
#' @param minWt The minimum average allele frequency of SNPs to be included in the analysis.
@@ -40,12 +43,12 @@
4043
#' in the analysis. Default, 0.7.
4144
#' @param minSamples The minimum number of samples in which a SNP should be called in order to
4245
#' be included in the analysis.
43-
#' @param filterHard If filterHard is TRUE, the SNP loci with slopes in the outer quartiles are
44-
#' discarded. Otherwise the outliers identified by the default method of boxplot.stats function
45-
#' are discarded. Default, TRUE.
4646
#' @param seed Random number seed.
4747
#' @param title User-supplied title for the rainbow plot.
48-
#' @param printout If printout is TRUE, the function prints the estimates. Default, TRUE.
48+
#' @param printout If printout is TRUE, the function prints the estimates. Default, `TRUE`.
49+
#' @param correctForDepth Logical, whether of not to correct for uneven insertion rates along the extranuclear sequence. Not usually required. Default is `FALSE`.
50+
#' @param weigh Logical. Whether or not to select loci with high allele frequencies. Default is `TRUE`.
51+
#' @param extraNucLen Numeric. Length of the extranuclear genome reference. Only required when `correctForDepth` is set to `TRUE`. Default is 16000.
4952
#' @return An invisible list with the following elements.
5053
#' \describe{
5154
#' \item{$intercepts}{A vector giving the intercept estimate,
@@ -68,7 +71,7 @@
6871
#' ## n.b. hopperDF is too large to be included in the package's data
6972
#' ## but can be accessed as follows
7073
#' \dontrun{
71-
#' download.file("t.ly/6hXO", destfile = "hopper.csv")
74+
#' download.file("https://tinyurl.com/4mtrbkzc", destfile = "hopper.csv")
7275
#' hopperDF <- read.table("hopper.csv")}
7376
#'
7477
#' ## (the t.ly/ link is to the cvs file at
@@ -91,11 +94,9 @@ rainbowPlot <- function(data,
9194
minWt = 0.01,
9295
maxFreq = 0.7,
9396
minSamples = 10,
94-
#filterHard = TRUE,
9597
seed,
9698
title = "",
9799
printout = TRUE,
98-
#perBpDep=F,
99100
correctForDepth=F,
100101
weigh=T,
101102
extraNucLen=16000
@@ -105,6 +106,7 @@ rainbowPlot <- function(data,
105106
data$Position <- as.factor(data$Position)
106107
data$Sample <- as.factor(data$Sample)
107108
if(correctForDepth){
109+
cat("Option correctForDepth is set to true. Make sure to specify the length or the extranulcear genome reference using the argument extraNucLen!\n")
108110
p <- exp(data$ylog)
109111
nma <- p * data$DP
110112
ma <- (1-p) * data$DP

vagrantDNA/R/data.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#' NUMT datasets
2+
#'
3+
#' These data sets give allele frequencies generated from whole-genome
4+
#' sequencing data mapped against mitochondrial genome references.
5+
#'
6+
#' @format Each is a data.frame.
7+
#' [species]DF just contain variant calling data.
8+
#' The columns are:
9+
#' \describe{
10+
#' \item{Sample}{A character vector giving a unique name for each sample}
11+
#' \item{Position}{A numeric vector giving the positio of each SNP}
12+
#' \item{AltProp}{A numeric vector giving the proportion (of reads mapping to the exogenous genome) that carry the non-standard alleles (thought to be in the vagrant copies)}
13+
#' \item{DP}{A numeric vector giving the mapping depth at each site}
14+
#' \item{nMapped}{A numeric vector giving the number of base pairs in this sample's sequencing data that were successfully aligned to the extranuclear reference}
15+
#' \item{nTot}{A numeric vector giving the total number of base pairs of the sample's mapping data (ideally after quality control, filtering, read trimming, ect.)}
16+
#' \item{ylog}{A numeric vector giving the log of `AltProp`}
17+
#' }
18+
#' [species]FX were generated from species with diverged populations. These
19+
#' contain information on sites with fixed differences. The columns are:
20+
#' \describe{
21+
#' \item{pos}{A vector giving the site IDs}
22+
#' \item{sample}{A vector, giving a unique name for each individual genotyped}
23+
#' \item{g1}{A numeric vector giving the allele count of allele 1}
24+
#' \item{g2}{A numeric vector giving the allele count of allele 2}
25+
#' \item{g3}{A numeric vector giving the allele count of allele 3}
26+
#' \item{g4}{A numeric vector giving the allele count of allele 4}
27+
#' \item{N}{A numeric vector giving the number of bp in the read data that did not map to the vargrant DNA reference in this individual}
28+
#' \item{M}{A numeric vector giving the number of bp in the read data that did map to the vargrant DNA reference in this individual}
29+
#' \item{pop}{A factor (or structure that can be coerced to a factor), of "A" and "B" denoting which population the individual belongs to}
30+
#' \item{A}{A numeric vector giving the number of the major allele at this site in population A}
31+
#' \item{B}{A numeric vector giving the number of the major allele at this site in population B}
32+
#' }
33+
#'
34+
#' @examples
35+
#' humanDF
36+
#' parrotDF
37+
#' hopperFX
38+
#' parrotFX
39+
"humanDF"
40+
41+
#' @rdname humanDF
42+
#' @format NULL
43+
"parrotDF"
44+
45+
#' @rdname humanDF
46+
#' @format NULL
47+
"hopperFX"
48+
49+
#' @rdname humanDF
50+
#' @format NULL
51+
"parrotFX"

vagrantDNA/data-raw/DATASET.R

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,29 @@
11
## code to prepare humanDF, hopperDF and ParrotDF
22

33
# human
4-
download.file("https://raw.githubusercontent.com/SBCSnicholsLab/pseudogene_quantification/main/data/human/transformedData.csv",
4+
download.file("https://raw.githubusercontent.com/SBCSnicholsLab/pseudogene_quantification/main/data/human02/transformedData.csv",
55
destfile = "human.csv")
66
humanDF <- read.table("human.csv")
77

8+
89
#hopper
910
download.file("https://raw.githubusercontent.com/SBCSnicholsLab/pseudogene_quantification/main/data/grasshopper/transformedData.csv",
1011
destfile = "hopper.csv")
1112
hopperDF <- read.table("hopper.csv")
1213

14+
download.file("https://raw.githubusercontent.com/SBCSnicholsLab/pseudogene_quantification/main/data/grasshopper/hopperFixed.csv",
15+
destfile = "hopperFixed.csv")
16+
hopperFX <- read.table("hopperFixed.csv")
17+
18+
1319
#parrot
1420
download.file("https://raw.githubusercontent.com/SBCSnicholsLab/pseudogene_quantification/main/data/parrot/transformedData.csv",
1521
destfile = "parrot.csv")
1622
parrotDF <- read.table("parrot.csv")
1723

18-
usethis::use_data(humanDF, hopperDF, parrotDF, overwrite = TRUE)
24+
download.file("https://raw.githubusercontent.com/SBCSnicholsLab/pseudogene_quantification/main/data/parrot/parrotFixed.csv",
25+
destfile = "parrotFixed.csv")
26+
parrotFX <- read.table("parrotFixed.csv")
27+
28+
29+
usethis::use_data(humanDF, parrotDF, parrotFX, hopperFX, overwrite = TRUE, compress = "bzip2")

vagrantDNA/data/hopperFX.rda

8.17 KB
Binary file not shown.

vagrantDNA/data/humanDF.rda

312 KB
Binary file not shown.

vagrantDNA/data/parrotDF.rda

-155 KB
Binary file not shown.

vagrantDNA/data/parrotFX.rda

5.25 KB
Binary file not shown.

vagrantDNA/man/hopperFX.Rd

Lines changed: 0 additions & 38 deletions
This file was deleted.

vagrantDNA/man/humanDF.Rd

Lines changed: 48 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)