-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataManipulate.h
More file actions
87 lines (60 loc) · 3.45 KB
/
dataManipulate.h
File metadata and controls
87 lines (60 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <cmath>
#include <vector>
#include <numeric>
#include <random>
#include <functional>
#include <ctime>
#include <algorithm>
#include <chrono>
#include <fstream>
#include <cassert>
#include <map>
#include <set>
#include <queue>
#include <iostream>
#include <string>
#include <regex>
#include <sstream>
#define __dataManipulate
#pragma once#pragma once
#ifndef __Linear_Algebra
#define __Linear_Algebra
#include "Linear_Algebra.h"
#endif
#pragma once#pragma once
#ifndef __Statistics
#define __Statistics
#include "Statistics.h"
#endif
using namespace std;
namespace dataManipulate
{
//資料讀取
int load_Data_With_Bias(string path, vector<vector<double>>& X, vector<double>& Y, const function<double(const string&)>& encoder, string cmd, int dim = 0, int start_index = 0);
int load_Data_NoBias_NN(string path, vector<vector<double>>& X, vector<vector<double>>& Y, const function<double(const string&)>& encoder, string cmd, int input_dim, int output_dim);
void init_NoBias_vector(ifstream& iData, vector<vector<double>>& X, vector<vector<double>>& Y, string cmd, int input_dim, int output_dim);
void data2vector(vector<string>& result, vector<vector<double>>& X, vector<double>& Y, const function<double(const string&)>& encoder, string cmd, int bias, int dim, int start_index);
void init_vector(vector<vector<double>>& X, vector<double>& Y, const function<double(const string&)>& encoder, string cmd, int dim, vector<string> result, int start_index);
void readData_for_NN(ifstream& iData, vector<double>& readData, int dim);
int readData_for_tree(string path, vector<map<string, string>>&, vector<string>&, string cmd);
void readParagraph(string path, string& paragraph);
int load_users_information(string path, vector<vector<string>>& X);
void load_mail(string path, string file_name, vector<string>& mail, vector<bool>& is_spam, bool spam, int num_file);
//資料處理
vector<string> string_partition(const string &source, char delim = '\n');
void split_data(vector< pair<vector<double>, double> >& data, vector<pair<vector<double>, double>>& train, vector<pair<vector<double>, double>>& test, double trainSize = 0.8);
void split_data(vector<pair<map<string, string>, string>>& data, vector<pair<map<string, string>, string>>& train, vector<pair<map<string, string>, string>>& test, double trainSize);
void split_data(vector< pair<string, bool> >& data, vector<pair<string, bool>>& train, vector<pair<string, bool>>& test, double trainSize);
void train_test_split(vector<vector<double> >& X, vector<double>& Y, vector<vector<double> >& X_train, vector<double>& Y_train, vector<vector<double> >& X_test, vector<double>& Y_test, double trainSize = 0.8);
void train_test_split(vector<map<string, string>>& X, vector<string>& Y, vector<map<string, string>>& X_train, vector<string>& Y_train, vector<map<string, string>>& X_test, vector<string>& Y_test, double trainSize = 0.8);
void train_test_split(vector<string>& X, vector<bool>& Y, vector<string>& X_train, vector<bool>& Y_train, vector<string>& X_test, vector<bool>& Y_test, double trainSize);
template<typename T>
vector<T> bootstrap_Xi(const vector<T>& data);
template<typename T, typename U, typename V>
vector<U> bootstrap_statisticXi(vector<T>& data, int num_bootstrap, function<V(T)>stats_fn);
vector<pair<vector<double>, double>> bootstrap_sample(vector<vector<double>>& X, vector<double>& Y);
void to_lower(string word);
int to_int(string data);
double to_double(string data);
string to_word(int Val);
}