-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_code.m
More file actions
90 lines (69 loc) · 2.48 KB
/
example_code.m
File metadata and controls
90 lines (69 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
%%% Assume oracle numeber of clusters and features
% Input:
X = ; y = ; Z = ;
type = "";
% X: Continuous/Gaussian data: p * n
% y: Supervising Auxiliary Variable:
% 1 * n for Gaussian,binary,categorical,count data
% 2 * n for survival data; the first column is survival time, the second column is censoring indicator
% Z: Additional covariate, n * p_cov
% type: type of Supervising Auxiliary Variable
% can be "gaussian","binary","categorical","count" and "survival".
% Z_cov: additional covariates
K = ; % Desired Number of Clusters
% SCC
[class_id] = scc(X,y,type,K,Z_cov);
% Output:
% class_id: cluster assignment
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Example Code; Example data from simulation in paper
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Without covariates
%%% Spherical Data + Gaussian Supervising Auxiliary Variable
load('gaus_S1_X_R_1.mat')
load('gaus_S1_Y_R_1.mat')
% X: p * n, Y: 1 * n
K = 3;
[class_id] = scc(X,Y,"gaussian",K);
%%% Visualize Clustering result
% subplot(1,3,1)
[coeff,score,latent] = pca(X');
n = length(class_id);
c = zeros(n,3);
c(class_id == 1,1) = 1; c(class_id == 2,2) = 1; c(class_id == 3,3) = 1;
sz = 25;
scatter(score(:,1),score(:,2),sz,c,'filled');
labels = cellstr(num2str([1:n]'));
title("PCA plot in X")
%%% Spherical Data + Categorical Supervising Auxiliary Variable
load('cate_S1_X_R_1.mat')
load('cate_S1_Y_R_1.mat')
% X: p * n, Y: 1 * n
K = 3;
[class_id] = scc(X,Y,"categorical",K);
%%% Visualize Clustering result
subplot(1,2,1)
[coeff,score,latent] = pca(X');
n = length(class_id);
c = zeros(n,3);
c(class_id == 1,1) = 1; c(class_id == 2,2) = 1; c(class_id == 3,3) = 1;
sz = 25;
scatter(score(:,1),score(:,2),sz,c,'filled');
labels = cellstr(num2str([1:n]'));
title("PCA plot in X, colors are estimated cluster labels")
subplot(1,2,2)
[coeff,score,latent] = pca(X');
n = length(class_id);
c = zeros(n,3);
c(Y == 1,1) = 1; c(Y == 2,2) = 1; c(Y == 3,3) = 1;
sz = 25;
scatter(score(:,1),score(:,2),sz,c,'filled');
labels = cellstr(num2str([1:n]'));
title("PCA plot in X, colors are labels from supervising auxiliary variable")
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% With covariates
load('gaus_cov_X_R_1.mat')
load('gaus_cov_Y_R_1.mat')
load('gaus_cov_Z_R_1.mat')
K = 3;
[class_id] = scc(X,Y,"gaussian",K,Z_cov);