forked from diepthihoang/mpboot
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnnisearch.h
More file actions
254 lines (209 loc) · 7.34 KB
/
nnisearch.h
File metadata and controls
254 lines (209 loc) · 7.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#ifndef NNISEARCH_H
#define NNISEARCH_H
#include <stdio.h>
#include <stdlib.h>
#include "tools.h"
#include <string>
#include <sstream>
#include <set>
#include <algorithm>
#include <map>
#include <vector>
//#include <unordered_set>
extern "C" {
#include "pllrepo/src/pllInternal.h"
}
/* This is the info you need to copy the vector*/
typedef struct
{
int node_number;
int num_partitions;
size_t *partition_sizes;
double **lh_values;
int **expVector;
} LH_VECTOR;
typedef struct {
nodeptr p;
int nniType;
char* idString;
double z0[PLL_NUM_BRANCHES]; // p
double z1[PLL_NUM_BRANCHES]; // p->next
double z2[PLL_NUM_BRANCHES]; // p->next->next
double z3[PLL_NUM_BRANCHES]; // q->next
double z4[PLL_NUM_BRANCHES]; // q->next->next
double likelihood;
double loglDelta;
double negLoglDelta;
} pllNNIMove;
inline bool comparePLLNNIMove(const pllNNIMove &a, const pllNNIMove &b)
{
return a.likelihood < b.likelihood;
}
void countDistinctTrees(pllInstance* pllInst, partitionList *pllPartitions);
static int cmp_nni(const void* nni1, const void* nni2);
int compareDouble(const void * a, const void * b);
pllNNIMove *getNonConflictNNIList(pllInstance* tr);
void _update(pllInstance *tr, partitionList *pr, nodeptr p);
#define MAX_NUM_DELTA 10000
typedef struct {
double delta[MAX_NUM_DELTA];
int num_delta;
double delta_min;
int doNNICut;
} NNICUT;
typedef struct {
// FOR GENERAL TREE SEARCH
bool speednni;
vector<pllNNIMove> posNNIList; // positive NNIs
unordered_set<string> aBranches; // Set of branches that are affected by the previous NNIs
double curLogl; // Current tree log-likelihood
int curIter; // Current iteration number
double curPerStrength; // Current perturbation strength
// FOR NNI SEARCH
NNI_Type nni_type;
int numAppliedNNIs; // total number of applied NNIs sofar
int curNumAppliedNNIs; // number of applied NNIs at the current step
int curNumNNISteps;
set<double> deltaLogl; // logl differences between nni1 and nni5
} SearchInfo;
/**
* get all the nodes affected by the NNI
* @param p
* @return
*/
set<int> getAffectedNodes(pllInstance* tr, nodeptr p);
string getBranString(nodeptr p);
bool containsAffectedNodes(nodeptr p, SearchInfo &searchinfo);
void updateBranchLengthForNNI(pllInstance* tr, partitionList *pr, pllNNIMove &nni);
void pllEvalAllNNIs(pllInstance *tr, partitionList *pr, SearchInfo &searchinfo);
double pllDoRandomNNIs(pllInstance *tr, partitionList *pr, int numNNI);
/**
* Compute the possible best logl improvement of NNI5 over NNI1
* @param serachinfo contains the logl improvement collected in previous iterations
* @return a logl delta that is larger than 95% of the collected values
*/
double estBestLoglImp(SearchInfo* searchinfo);
/**
* Evaluate NNI moves for the current internal branch
* @param tr the current tree data structure
* @param pr partition data structure
* @param p the node representing the current branch
* @return number of positive NNIs found
*/
int evalNNIForBran(pllInstance* tr, partitionList *pr, nodeptr p, SearchInfo &searchinfo);
/**
* Perturb the best tree
*
* Given the best tree, apply some NNIs to escape local optimum
* @param tr
* @param pr
* @param nnis list of all NNI to apply
* @param numNNI size of the array nnis
* @return
*/
double pllPerturbTree(pllInstance *tr, partitionList *pr, vector<pllNNIMove> &nnis);
/**
* do 1 round of fastNNI
* return new tree log-likelihood if found improving NNI otherwise -1.0
*
* @param[in] tr the tree data structure
* @param[in] pr partition data structure
* @param[out] nniList list containing information about the 2(n-3) evaluated NNIs
* @param[in/out] tabuNNIs tabu list
* @param[out] nni_count number of NNI that have been applied
* @param[out] deltaNNI average improvement made by one NNI
*/
double pllDoNNISearch(pllInstance* tr, partitionList *pr, SearchInfo &searchinfo);
void pllUpdateTabuList(pllInstance *tr, SearchInfo &searchinfo);
void pllSaveQuartetForSubTree(pllInstance* tr, nodeptr p, SearchInfo &searchinfo);
/**
* @brief Do 1 NNI move.
* @param[in] tr: the tree data structure
* @param[in] pr partition data structure
* @param[in] swap: represents one of the 2 NNI moves. Could be either 0 or 1
* @param[in] NNI_Type
*/
double doOneNNI(pllInstance * tr, partitionList *pr, nodeptr p, int swap, NNI_Type nni_type, SearchInfo *searchinfo = NULL);
void pllGetAllInBran(pllInstance *tr, vector<nodeptr> &branlist);
void pllGetAllInBranForSubtree(pllInstance *tr, nodeptr p, vector<nodeptr> &branlist);
string convertQuartet2String(nodeptr p);
/**
* Go through all 2(n-3) internal branches of the tree and
* evaluate all possible NNI moves
*/
void evalAllNNI(pllInstance* tr);
/**
* @brief evaluate all NNIs within the subtree specified by node p
* populates the list containing all possible NNI moves
*
* @param[in] tr: the tree data structure
* @param[in] pr partition data structure
* @param[in] p node pointer that specify the subtree
*/
void evalNNIForSubtree(pllInstance* tr, partitionList *pr, nodeptr p, SearchInfo &searchinfo);
/*
* @brief return the array which can be used to store evaluated NNI moves
*
* @param[in] tr: the tree data structure
*/
pllNNIMove *getNNIList(pllInstance* tr);
/*
* ****************************************************************************
* pllUFBoot area
* ****************************************************************************
*/
/**
* DTH:
* pllUFBootData struct
* This one keeps all info necessary to run UFBoot in PLL mode
*/
typedef struct{
int max_candidate_trees;
int treels_size;
int save_all_trees;
pllBoolean save_all_br_lens;
double logl_cutoff;
int duplication_counter;
int n_patterns;
struct pllHashTable * treels;
unsigned int candidate_trees_count; /* counter of trees in pllHashTable */
double * treels_logl; // maintain size == treels_size
char ** treels_newick; // maintain size == treels_size
double ** treels_ptnlh; // maintain size == treels_size
int ** boot_samples;
double * boot_logl;
int * boot_counts;
int * boot_trees;
} pllUFBootData;
/**
* DTH:
* The PLL version of saveCurrentTree function
* @param tr: the tree (a pointer to a pllInstance)
* @param pr: pointer to a partitionList (this one keeps tons of tree info)
* @param p: root?
*/
void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p);
/**
* DTH:
* Extract the array of site log likelihood to be kept in ptnlh
* And update *cur_log
* @param tr: the tree (pointer to an pllInstance)
* @param ptnlh: to-be-kept array of site log likelihood
* @param cur_logl: pointer to current tree log likelihood
*/
void pllComputePatternLikelihood(pllInstance* tr, double * ptnlh, double * cur_logl);
/**
* DTH:
* Resize some of the arrays in UFBootData if they're full
* Along with update treels_size (to track the size of these arrays)
*/
void pllResizeUFBootData();
/**
* DTH:
* (Based on function Tree2StringREC of PLL)
* Print out the tree topology with IQTree taxa ID (starts at 0) instead of PLL taxa ID (starts at 1)
* @param All are the same as in PLL's
*/
static char *pllTree2StringREC(char *treestr, pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean printBranchLengths, pllBoolean printNames,
pllBoolean printLikelihood, pllBoolean rellTree, pllBoolean finalPrint, int perGene, pllBoolean branchLabelSupport, pllBoolean printSHSupport);
#endif