-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmsa.h
More file actions
66 lines (54 loc) · 1.91 KB
/
msa.h
File metadata and controls
66 lines (54 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
//msa.h
#ifndef MSA_H
#define MSA_H
#include <string>
#include <vector>
#include <map>
#include <memory>
#include <list>
#include <cmath>
#include <limits.h>
#include <iomanip>
#include <ios>
#include <iostream>
#include <float.h>
#include <fstream>
#include <chrono>
#include <immintrin.h>
#include <omp.h>
#include <mpi.h>
using namespace std;
//export out blosum matrix
extern int blosum[20][20];
const int MAX_SEQ_LEN = 200;
const int FILENAME = 1; //for accessing argv
const int NUM_LETTERS = 20; //20 amino acids
const int ROW_LEN = 24; //ascii value of 'Y' - 'A'
const int MATRIX_SIZE = 600; // substition matrix 24 * 24 + 24
const int ASCII_OFFSET = -65; //offsets 'A' to have index of 0
//error codes
const int CLI_ERROR = 1;
const int FILE_ERROR = 2;
const int GAP = -3; //penalty for adding a gap
//A representation of a sequence
struct Sequence {
string seq; //actual sequence
string id; //sequence name
int index; //where the sequence is in the matrix
};
float mean_difference(vector<Sequence>& c1, vector<Sequence>& c2,
const int numPoints, vector<float>& distanceMatrix);
vector<Sequence> read_fasta_file(string fileName);
void UPGMA(vector<vector<Sequence>>& clusters,
vector<float>& distanceMatrix, vector<int>& subMatrix);
void print_seqs(vector<vector<Sequence>> clusters);
vector<int> make_sub_matrix(void);
void find_closest_clusters(int numClusters, vector<vector<Sequence>> &clusters,
int numSeqs, vector<float>& distanceMatrix,
vector<Sequence>& cToMerge1, int* idxC1,
vector<Sequence>& cToMerge2, int* idxC2);
vector<Sequence> merge_clusters(vector<Sequence>& cToMerge1,
vector<Sequence>& cToMerge2);
float seq_to_seq_distance(int seq1Index, int seq2Index, vector<float>& distanceMatrix,
int chunkCount, int numSeqs);
#endif