mitre/example.cfg at master · gerberlab/mitre · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Note: comments placed on their own lines may start with ';' or '#', but
# inline comments should start only with ';' - this restriction is imposed by
# python's ConfigParser module.

[description]
tag = david_diet

[data]
abundance_data = /path/to/your_data/abundance.csv
sequence_key = /path/to/your_data/sequence_key.fa
sample_metadata = /path/to/your_data/sample_metadata.csv
subject_data = /path/to/your_data/subject_data.csv
jplace_file = /path/to/your_data/placements.jplace
outcome_variable = diet
outcome_positive_value = Plant
taxonomy_source = hybrid
pplacer_taxa_table = /path/to/your_data/taxaTable.csv
pplacer_seq_info = /path/to/your_data/seq_info.csv
placement_table = /path/to/your_data/mothur_placements.csv

[preprocessing]
# Note that the order of these options in the configuration file is
# not important. Those filters and transformations selected will be
# applied in the following order:
# overall abundance filter, sample depth filter, window trimming,
# temporal sampling density filter, relative abundance transformation,
# phylogenetic aggregation, log transform, temporal abundance filter,
# surplus internal node filtering.

# Drop RSVs with less than a certain number of reads across all samples
min_overall_abundance = 10
# Drop samples with less than a certain total number of reads
min_sample_reads = 5000
# No need to trim the time window here, -5 to 10 is okay
# trim_start = 20
# trim_stop = 30
# Discard subjects with insufficiently dense temporal sampling:
# divide the window up into n_intervals equal pieces,
# then require at least n_samples within every n_consecutive consecutive
# such pieces
# We don't actually need to do this here, the sampling density is good for all 20 subjects.
# density_filter_n_samples = 1
# density_filter_n_intervals = 12
# density_filter_n_consecutive = 2
# Select which transformations should be applied to the data.
take_relative_abundance = True
aggregate_on_phylogeny = True
log_transform = False
# Temporal abundance filtration: keep only those taxa which
# exceed a threshold abundance in multiple consecutive observations
# at least once in a minimum number of subjects. Note that the
# threshold should be on a log scale if the log transform has been
# performed.
temporal_abundance_threshold = 0.001
temporal_abundance_consecutive_samples = 2
temporal_abundance_n_subjects = 4
# Discard taxa representing nodes in the phylogenetic tree not
# required to maintain the topological relationships among the other
# nodes of the tree still included in the model.
discard_surplus_internal_nodes = True
# Save this dataset as a pickle file?
pickle_dataset = True

[model]
# Divide the experiment into this many equal segments and use them
# as the atomic time windows
n_intervals = 10
# Allow rules to apply only to time windows longer than t_min
t_min = 1.0
# Allow rules to apply only to time windows shorter than t_max
t_max = 7.0
pickle_model = True

[sampling]
total_samples = 50000

[postprocessing]
quick_summary = True
full_summary = True
gui_output = True
burnin_fraction = 0.1