diff --git a/include/gwmodel.h b/include/gwmodel.h index 20966182..a1937616 100644 --- a/include/gwmodel.h +++ b/include/gwmodel.h @@ -40,5 +40,6 @@ #include "gwmodelpp/GWAverage.h" #include "gwmodelpp/GWCorrelation.h" #include "gwmodelpp/GWPCA.h" +#include "gwmodelpp/GTWR.h" -#endif // GWMODEL_H \ No newline at end of file +#endif // GWMODEL_H diff --git a/include/gwmodelpp/GWPCA.h b/include/gwmodelpp/GWPCA.h index 826d92ef..ab2ce2ce 100644 --- a/include/gwmodelpp/GWPCA.h +++ b/include/gwmodelpp/GWPCA.h @@ -19,7 +19,7 @@ namespace gwm class GWPCA: public SpatialMonoscaleAlgorithm, public IMultivariableAnalysis { private: - typedef arma::mat (GWPCA::*Solver)(const arma::mat&, arma::cube&, arma::mat&); //!< \~english Calculator to solve \~chinese 模型求解函数 + typedef arma::mat (GWPCA::*Solver)(const arma::mat&, arma::cube&, arma::cube&, arma::mat&); //!< \~english Calculator to solve \~chinese 模型求解函数 public: // Constructors and Deconstructors @@ -62,6 +62,20 @@ class GWPCA: public SpatialMonoscaleAlgorithm, public IMultivariableAnalysis */ void setKeepComponents(int k) { mK = k; } + /** + * @brief \~english Get the Robust flag. \~chinese 获取是否使用鲁棒模式。 + * + * @return bool \~english Robust flag \~chinese 是否使用鲁棒模式 + */ + bool robust() { return mRobust; } + + /** + * @brief \~english Set the Robust flag. \~chinese 设置是否使用鲁棒模式。 + * + * @param robust \~english Robust flag \~chinese 是否使用鲁棒模式 + */ + void setRobust(bool robust) { mRobust = robust; mSolver = robust ? &GWPCA::solveRobustSerial : &GWPCA::solveSerial; } + /** * @brief \~english Get the Local Principle Values matrix. \~chinese 获取局部主成分值。 * @@ -86,7 +100,7 @@ class GWPCA: public SpatialMonoscaleAlgorithm, public IMultivariableAnalysis /** * @brief \~english Get the Scores matrix. \~chinese 获取得分矩阵。 * - * @return arma::mat \~english Scores matrix \~chinese 得分矩阵 + * @return arma::mat \~english Scores matrix \~chinese 得分矩阵1 */ const arma::cube& scores() { return mScores; } @@ -105,52 +119,77 @@ class GWPCA: public SpatialMonoscaleAlgorithm, public IMultivariableAnalysis * * @param x \~english Symmetric data matrix \~chinese 对称数据矩阵 * @param loadings [out] \~english Out reference to loadings matrix \~chinese 载荷矩阵 + * @param scores [out] \~english Out reference to scores matrix \~chinese 得分矩阵 * @param sdev [out] \~english Out reference to standard deviation matrix \~chinese 标准差 * @return arma::mat \~english Principle values matrix \~chinese 主成分值矩阵 */ - arma::mat pca(const arma::mat& x, arma::cube& loadings, arma::mat& sdev) + arma::mat pca(const arma::mat& x, arma::cube& loadings, arma::cube& scores, arma::mat& sdev) { - return (this->*mSolver)(x, loadings, sdev); + return (this->*mSolver)(x, loadings, scores, sdev); } /** - * @brief \~english Serial version of PCA funtion. \~chinese 单线程 PCA 函数。 + * @brief \~english Serial version of PCA funtion. \~chinese 单线程 PCA 函数。1 + * + * @param x \~english Symmetric data matrix \~chinese 对称数据矩阵1 + * @param loadings [out] \~english Out reference to loadings matrix \~chinese 载荷矩阵1 + * @param scores [out] \~english Out reference to scores matrix \~chinese 得分矩阵1 + * @param sdev [out] \~english Out reference to standard deviation matrix \~chinese 标准差1 + * @return arma::mat \~english Principle values matrix \~chinese 主成分值矩阵1 + */ + arma::mat solveSerial(const arma::mat& x, arma::cube& loadings, arma::cube& scores, arma::mat& sdev); + + /** + * @brief \~english Robust serial version of PCA function. \~chinese 鲁棒单线程 PCA 函数。 * * @param x \~english Symmetric data matrix \~chinese 对称数据矩阵 * @param loadings [out] \~english Out reference to loadings matrix \~chinese 载荷矩阵 + * @param scores [out] \~english Out reference to scores matrix \~chinese 得分矩阵 * @param sdev [out] \~english Out reference to standard deviation matrix \~chinese 标准差 * @return arma::mat \~english Principle values matrix \~chinese 主成分值矩阵 */ - arma::mat solveSerial(const arma::mat& x, arma::cube& loadings, arma::mat& sdev); + arma::mat solveRobustSerial(const arma::mat& x, arma::cube& loadings, arma::cube& scores, arma::mat& sdev); /** * @brief \~english Function to carry out weighted PCA. \~chinese 执行加权PCA的函数。 * * @param x \~english Symmetric data matrix \~chinese 对称数据矩阵 * @param w \~english Weight vector \~chinese 权重向量 - * @param V [out] \~english Right orthogonal matrix \~chinese 右边的正交矩阵 - * @param d [out] \~english Rectangular diagonal matri \~chinese 矩形对角阵 + * @param U [out] \~english Left orthogonal matrix (scores) \~chinese 左正交矩阵(得分) + * @param V [out] \~english Right orthogonal matrix (loadings) \~chinese 右正交矩阵(载荷) + * @param d [out] \~english Rectangular diagonal matrix \~chinese 矩形对角阵 + */ + void wpca(const arma::mat& x, const arma::vec& w, arma::mat& U, arma::mat& V, arma::vec & d); + + /** + * @brief \~english Function to carry out robust weighted PCA. \~chinese 执行鲁棒加权PCA的函数。 + * + * @param x \~english Symmetric data matrix \~chinese 对称数据矩阵 + * @param w \~english Weight vector \~chinese 权重向量 + * @param U [out] \~english Left orthogonal matrix (scores) \~chinese 左正交矩阵(得分) + * @param V [out] \~english Right orthogonal matrix (loadings) \~chinese 右正交矩阵(载荷) + * @param d [out] \~english Rectangular diagonal matrix \~chinese 矩形对角阵 */ - void wpca(const arma::mat& x, const arma::vec& w, arma::mat& V, arma::vec & d); + void rwpca(const arma::mat& x, const arma::vec& w, arma::mat& U, arma::mat& V, arma::vec & d); private: // Algorithm Parameters int mK = 2; //!< \~english Number of components to be kept \~chinese 要保留的主成分数量 - // bool mRobust = false; + bool mRobust = false; //!< \~english Robust mode flag \~chinese 鲁棒模式标志 private: // Algorithm Results - arma::mat mLocalPV; //!< \~english Local principle component values \~chinese 局部主成分值 - arma::cube mLoadings; //!< \~english Loadings for each component \~chinese 局部载荷矩阵 - arma::mat mSDev; //!< \~english Standard Deviation \~chinese 标准差矩阵 - arma::cube mScores; //!< \~english Scores for each variable \~chinese 得分矩阵 - arma::uvec mWinner; //!< \~english Winner variable at each sample \~chinese 优胜变量索引值 + arma::mat mLocalPV; //!< \~english Local principle component values \~chinese 局部主成分值1 + arma::cube mLoadings; //!< \~english Loadings for each component \~chinese 局部载荷矩阵1 + arma::mat mSDev; //!< \~english Standard Deviation \~chinese 标准差矩阵1 + arma::cube mScores; //!< \~english Scores for each variable \~chinese 得分矩阵1 + arma::uvec mWinner; //!< \~english Winner variable at each sample \~chinese 优胜变量索引值1 private: // Algorithm Runtime Variables - arma::mat mX; //!< \~english Variable matrix \~chinese 变量矩阵 - arma::vec mLatestWt; //!< \~english Latest weigths \~chinese 最新的权重 + arma::mat mX; //!< \~english Variable matrix \~chinese 变量矩阵1 + arma::vec mLatestWt; //!< \~english Latest weigths \~chinese 最新的权重1 - Solver mSolver = &GWPCA::solveSerial; //!< \~english Calculator to solve \~chinese 模型求解函数 + Solver mSolver = &GWPCA::solveSerial; //!< \~english Calculator to solve \~chinese 模型求解函数1 }; } -#endif // GWPCA_H \ No newline at end of file +#endif // GWPCA_H diff --git a/src/gwmodelpp/GWPCA.cpp b/src/gwmodelpp/GWPCA.cpp index a2a954ac..beae8598 100644 --- a/src/gwmodelpp/GWPCA.cpp +++ b/src/gwmodelpp/GWPCA.cpp @@ -10,47 +10,120 @@ void GWPCA::run() GWM_LOG_STOP_RETURN(mStatus, void()); GWM_LOG_STAGE("Solving"); - mLocalPV = pca(mX, mLoadings, mSDev); + mLocalPV = pca(mX, mLoadings, mScores, mSDev); GWM_LOG_STOP_RETURN(mStatus, void()); - + mWinner = index_max(mLoadings.slice(0), 1); } -mat GWPCA::solveSerial(const mat& x, cube& loadings, mat& sdev) +mat GWPCA::solveSerial(const mat& x, cube& loadings, cube& scores, mat& sdev) { uword nDp = mCoords.n_rows, nVar = mX.n_cols; mat d_all(nVar, nDp, arma::fill::zeros); - vec w0; + loadings = cube(nDp, nVar, mK, arma::fill::zeros); + scores = cube(nDp, mK, nDp, arma::fill::zeros); + for (uword i = 0; i < nDp; i++) { GWM_LOG_STOP_BREAK(mStatus); + vec w = mSpatialWeight.weightVector(i); - mat V; + uvec positive = find(w > 0); + vec newWt = w.elem(positive); + mat newX = x.rows(positive); + if (newWt.n_rows <= 5) + { + break; + } + + mat U, V; vec d; - wpca(x, w, V, d); - w0 = w; + wpca(newX, newWt, U, V, d); + + mLatestWt = newWt; d_all.col(i) = d; + for (int j = 0; j < mK; j++) { loadings.slice(j).row(i) = arma::trans(V.col(j)); } + + mat scorei(nDp, mK, arma::fill::zeros); + for (int j = 0; j < mK; j++) + { + mat score = newX.each_row() % arma::trans(V.col(j)); + scorei.col(j) = sum(score, 1); + } + scores.slice(i) = scorei; + GWM_LOG_PROGRESS(i + 1, nDp); } + d_all = trans(d_all); - mat variance = (d_all / sqrt(sum(w0))) % (d_all / sqrt(sum(w0))); - sdev = sqrt(variance); + mat variance = (d_all / pow(sum(mLatestWt), 0.5)) % (d_all / pow(sum(mLatestWt), 0.5)); + sdev = arma::sqrt(variance); mat pv = variance.cols(0, mK - 1).each_col() % (1.0 / sum(variance, 1)) * 100.0; return pv; } -void GWPCA::wpca(const mat& x, const vec& w, mat& V, vec & d) +void GWPCA::wpca(const mat& x, const vec& w, mat& U, mat& V, vec & d) { - mat xw = x.each_col() % w, U; + mat xw = x.each_col() % w; mat centerized = (x.each_row() - sum(xw) / sum(w)).each_col() % sqrt(w); svd(U, d, V, centerized); } +void GWPCA::rwpca(const mat& x, const vec& w, mat& U, mat& V, vec & d) +{ + mat mids = x; + uword medianIdx = (abs(w - 0.5)).index_min(); + mids = mids.each_row() - x.row(medianIdx); + mat weighted = mids.each_col() % w; + mat score; + vec tsquared; + princomp(V, score, d, tsquared, weighted); + U = score; +} + +mat GWPCA::solveRobustSerial(const mat& x, cube& loadings, cube& scores, mat& sdev) +{ + uword nDp = mCoords.n_rows, nVar = mX.n_cols; + mat d_all(nVar, nDp, arma::fill::zeros); + vec w0; + loadings = cube(nDp, nVar, mK, arma::fill::zeros); + scores = cube(nDp, nDp, mK, arma::fill::zeros); + for (uword i = 0; i < nDp; i++) + { + GWM_LOG_STOP_BREAK(mStatus); + vec w = mSpatialWeight.weightVector(i); + uvec positive = find(w > 0); + vec newWt = w.elem(positive); + mat newX = x.rows(positive); + if (newWt.n_rows <= 5) + { + continue; + } + mat U, V; + vec d; + rwpca(newX, newWt, U, V, d); + w0 = newWt; + d_all.col(i) = d; + for (int j = 0; j < mK; j++) + { + loadings.slice(j).row(i) = arma::trans(V.col(j)); + mat scoreAll = x.each_row() % arma::trans(V.col(j)); + scores.slice(j).col(i) = sum(scoreAll, 1); + } + GWM_LOG_PROGRESS(i + 1, nDp); + } + d_all = trans(d_all); + mat variance = (d_all / pow(sum(w0), 0.5)) % (d_all / pow(sum(w0), 0.5)); + sdev = sqrt(variance); + mat pv = variance.cols(0, mK - 1).each_col() % (1.0 / sum(variance, 1)) * 100.0; + return pv; +} + bool GWPCA::isValid() { if (SpatialAlgorithm::isValid()) diff --git a/test/CMakeFind/main.cpp b/test/CMakeFind/main.cpp index 1fb93361..e5939e88 100644 --- a/test/CMakeFind/main.cpp +++ b/test/CMakeFind/main.cpp @@ -6,14 +6,12 @@ using namespace arma; int main() { mat coords(100, 2, fill::randu); - mat x = join_rows(vec(100, fill::ones), mat(100, 2, fill::randu)); - mat betas = mat(100, 3, fill::randu); - vec eps(100, fill::randu); - vec y = sum(x % betas, 1) + eps; + mat x = mat(100, 3, fill::randu); BandwidthWeight bw(36.0, true, BandwidthWeight::Gaussian); CRSDistance dist(false); SpatialWeight sw(&bw, &dist); - GWRBasic algorithm(x, y, coords, sw); - algorithm.fit(); + GWPCA algorithm(x, coords, sw); + algorithm.setKeepComponents(2); + algorithm.run(); return 0; } diff --git a/test/testGWPCA.cpp b/test/testGWPCA.cpp index 67d8bd8a..2c00b1a1 100644 --- a/test/testGWPCA.cpp +++ b/test/testGWPCA.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "gwmodelpp/GWPCA.h" #include "gwmodelpp/spatialweight/CRSDistance.h" @@ -23,7 +24,6 @@ TEST_CASE("GWPCA: basic flow") { FAIL("Cannot load londonhp100 data."); } - CRSDistance distance(false); BandwidthWeight bandwidth(36, true, BandwidthWeight::Gaussian); SpatialWeight spatial(&bandwidth, &distance); @@ -78,6 +78,121 @@ TEST_CASE("GWPCA: basic flow") REQUIRE(approx_equal(loadings_pc2_q, loadings_pc2_q0, "absdiff", 1e-8)); } +TEST_CASE("GWPCA: summary statistics") +{ + mat londonhp100_coord, londonhp100_data; + vector londonhp100_fields; + if (!read_londonhp100(londonhp100_coord, londonhp100_data, londonhp100_fields)) + { + FAIL("Cannot load londonhp100 data."); + } + + CRSDistance distance(false); + BandwidthWeight bandwidth(15, true, BandwidthWeight::Gaussian); + SpatialWeight spatial(&bandwidth, &distance); + + mat x = londonhp100_data.cols(1, 3); + + GWPCA algorithm; + algorithm.setCoords(londonhp100_coord); + algorithm.setVariables(x); + algorithm.setSpatialWeight(spatial); + algorithm.setKeepComponents(2); + REQUIRE_NOTHROW(algorithm.run()); + + vec p = {0.0, 0.25, 0.5, 0.75, 1.0}; + + mat localPV = algorithm.localPV(); + mat sdev = algorithm.sdev(); + mat localVariance = sdev % sdev; + + BandwidthWeight* bw_weight = algorithm.spatialWeight().weight(); + double bw_value = bw_weight->bandwidth(); + bool bw_adaptive = bw_weight->adaptive(); + BandwidthWeight::KernelFunctionType kernel_type = bw_weight->kernel(); + const char* bw_kernel_name = "Unknown"; + switch (kernel_type) + { + case BandwidthWeight::Gaussian: bw_kernel_name = "Gaussian"; break; + case BandwidthWeight::Exponential: bw_kernel_name = "Exponential"; break; + case BandwidthWeight::Bisquare: bw_kernel_name = "Bisquare"; break; + case BandwidthWeight::Tricube: bw_kernel_name = "Tricube"; break; + case BandwidthWeight::Boxcar: bw_kernel_name = "Boxcar"; break; + } + + printf("\n"); + printf("Summary of GWPCA information\n"); + printf("=====================================\n\n"); + printf("Bandwidth: %.0f (%s, %s kernel)\n", bw_value, bw_adaptive ? "adaptive" : "fixed", bw_kernel_name); + printf("\n"); + + printf("Local variance:\n"); + printf("%-15s %15s %15s\n", "", "Comp.1", "Comp.2"); + mat variance_q = quantile(localVariance, p, 0); + printf("%-15s %15.3f %15.3f\n", "Min", variance_q(0, 0), variance_q(0, 1)); + printf("%-15s %15.3f %15.3f\n", "1st Qu", variance_q(1, 0), variance_q(1, 1)); + printf("%-15s %15.3f %15.3f\n", "Median", variance_q(2, 0), variance_q(2, 1)); + printf("%-15s %15.3f %15.3f\n", "3rd Qu", variance_q(3, 0), variance_q(3, 1)); + printf("%-15s %15.3f %15.3f\n", "Max", variance_q(4, 0), variance_q(4, 1)); + printf("\n"); + + printf("Local Proportion of Variance:\n"); + printf("%-15s %15s %15s\n", "", "Comp.1", "Comp.2"); + mat pv_q = quantile(localPV, p, 0); + printf("%-15s %15.3f %15.3f\n", "Min", pv_q(0, 0), pv_q(0, 1)); + printf("%-15s %15.3f %15.3f\n", "1st Qu", pv_q(1, 0), pv_q(1, 1)); + printf("%-15s %15.3f %15.3f\n", "Median", pv_q(2, 0), pv_q(2, 1)); + printf("%-15s %15.3f %15.3f\n", "3rd Qu", pv_q(3, 0), pv_q(3, 1)); + printf("%-15s %15.3f %15.3f\n", "Max", pv_q(4, 0), pv_q(4, 1)); + + vec cumulative = sum(localPV, 1); + vec cum_q = quantile(cumulative, p); + printf("%-15s %15.3f %15s\n", "Cumulative", cum_q(0), ""); + printf("%-15s %15.3f %15s\n", "", cum_q(1), ""); + printf("%-15s %15.3f %15s\n", "", cum_q(2), ""); + printf("%-15s %15.3f %15s\n", "", cum_q(3), ""); + printf("%-15s %15.3f %15s\n", "", cum_q(4), ""); + printf("\n"); + + cube scores = algorithm.scores(); + if (scores.n_elem > 0) + { + printf("Local Scores:\n"); + printf("%-15s %15s %15s\n", "", "Comp.1", "Comp.2"); + + mat scores_pc1(scores.n_rows, scores.n_cols); + mat scores_pc2(scores.n_rows, scores.n_cols); + for (uword i = 0; i < scores.n_rows; i++) + { + for (uword j = 0; j < scores.n_cols; j++) + { + scores_pc1(i, j) = scores(i, j, 0); + if (scores.n_slices > 1) + { + scores_pc2(i, j) = scores(i, j, 1); + } + } + } + + vec scores_pc1_flat = vectorise(scores_pc1); + vec scores_pc2_flat = vectorise(scores_pc2); + + vec scores_pc1_q = quantile(scores_pc1_flat, p); + vec scores_pc2_q = quantile(scores_pc2_flat, p); + + printf("%-15s %15.3f %15.3f\n", "Min", scores_pc1_q(0), scores_pc2_q(0)); + printf("%-15s %15.3f %15.3f\n", "1st Qu", scores_pc1_q(1), scores_pc2_q(1)); + printf("%-15s %15.3f %15.3f\n", "Median", scores_pc1_q(2), scores_pc2_q(2)); + printf("%-15s %15.3f %15.3f\n", "3rd Qu", scores_pc1_q(3), scores_pc2_q(3)); + printf("%-15s %15.3f %15.3f\n", "Max", scores_pc1_q(4), scores_pc2_q(4)); + printf("\n"); + } + else + { + printf("Local Scores: (not yet implemented)\n\n"); + } +} + TEST_CASE("GWSS: cancel") {