Skip to content

Commit cf2bf53

Browse files
committed
Merge branch 'devel'
2 parents 8e63c53 + 499d727 commit cf2bf53

29 files changed

Lines changed: 525 additions & 221 deletions

DESCRIPTION

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,30 @@ Maintainer: Pavlo Lutsik <p.lutsik@mx.uni-saarland.de>
55
Authors@R: c(
66
person ( "Pavlo", "Lutsik", email="p.lutsik@mx.uni-saarland.de", role = c("cre")),
77
person ( "Martin", "Slawski", email="ms@cs.uni-saarland.de", role = c("aut")),
8+
person ( "Nik", "Vedeneev", email="nikitavedeneev@gmail.com", role = c("aut")),
89
person ( "Gilles", "Gasparoni", email="gillesgasparoni@gmail.com", role = c("aut")),
910
person ( "Matthias", "Hein", email="hein@cs.uni-saarland.de", role = c("aut")),
1011
person ( "Joern", "Walter", email="j.walter@mx.uni-saarland.de", role = c("aut"))
1112
)
12-
Date: 2016-06-30
13-
Version: 0.11
13+
Date: 2016-11-02
14+
Version: 0.2
1415
Depends:
1516
R (>= 3.2.0),
1617
Rcpp,
1718
pracma,
1819
gtools,
1920
gplots,
20-
parallel,
21+
parallel
2122
Suggests:
2223
BiocStyle,
2324
knitr,
2425
rmarkdown
26+
Imports:
27+
Rcpp (>= 0.12),
28+
RcppEigen (>= 0.3)
29+
LinkingTo:
30+
Rcpp,
31+
RcppEigen
2532
VignetteBuilder: knitr
2633
License: GPL-3
27-
LinkingTo: Rcpp
2834
RoxygenNote: 5.0.1

NEWS

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
MeDeCom 0.2
2+
=============
3+
4+
* Added improved optimization routines (cppTAfact by Nik Vedeneev) which are now used by default
5+
* Improved R-side parallelization in the single machine mode
6+
* Multiple minor fixes and improvements
7+
8+
MeDeCom 0.1
9+
=============
10+
11+
* Initial release

R/RcppExports.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
# This file was generated by Rcpp::compileAttributes
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
RHLasso <- function(Ginp, Winp, Ainp, l) {
5-
.Call('MeDeCom_RHLasso', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, l)
4+
cppTAfact <- function(mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda = 0.0, itersMax = 1000L, tol = 1e-8, tolA = 1e-7, tolT = 1e-7) {
5+
.Call('MeDeCom_cppTAfact', PACKAGE = 'MeDeCom', mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda, itersMax, tol, tolA, tolT)
66
}
77

8-
matMult <- function(Ainp, Binp) {
9-
.Call('MeDeCom_matMult', PACKAGE = 'MeDeCom', Ainp, Binp)
8+
RHLasso <- function(Ginp, Winp, Ainp, l) {
9+
.Call('MeDeCom_RHLasso', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, l)
1010
}
1111

1212
RQuadHC <- function(Ginp, Winp, Ainp, otol, lconstr, uconstr) {

R/allGlobals.R

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
#######################################################################################################################
23
# GLOBALS
34
#######################################################################################################################
@@ -7,7 +8,9 @@ ALGORITHMS<-c(
78
"regression",
89
"houseman2012",
910
"houseman2016",
10-
"MeDeCom",
11+
"MeDeCom",
12+
"MeDeCom.quadPen",
13+
"MeDeCom.cppTAfact",
1114
"HLasso",
1215
"IntFac",
1316
"IntEmpirical",
@@ -19,7 +22,9 @@ T_METHODS<-c(
1922
NA,
2023
NA,
2124
NA,
22-
"quadPen",
25+
"quadPen",
26+
"quadPen",
27+
"cppTAfact",
2328
"Hlasso",
2429
"integer",
2530
"empirical",
@@ -33,14 +38,16 @@ ALGORITHM.COLS=c(
3338
"regression"="blue",
3439
"houseman2012"="deepskyblue",
3540
"houseman2016"="skyblue",
36-
"MeDeCom"="red",
41+
"MeDeCom"="red",
42+
"MeDeCom.quadPen"="red",
43+
"MeDeCom.cppTAfact"="tomato",
3744
"HLasso"="orange",
3845
"IntFac"="green",
3946
"IntEmpirical"="plum",
4047
"Resample"="magenta",
4148
"VertexSearch"="brown")
4249

43-
ALGORITHM.PCH=c(15,0,0,0,2,3,4,5,6,1)
50+
ALGORITHM.PCH=c(15,0,0,0,2,2,2,3,4,5,6,1)
4451

4552
names(ALGORITHM.PCH)<-ALGORITHMS
4653

R/factorizations.R

Lines changed: 122 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ updateT_gini<-function(G, W, Tk, lambdaT, tol, f0, lower=0, upper=1){
180180
return(list(Tk1, fk1, iter))
181181
}
182182

183+
183184
#######################################################################################################################
184185

185186
updateT_empirical<-function(G, W, Poss, lambda){
@@ -239,7 +240,25 @@ updateT_empirical<-function(G, W, Poss, lambda){
239240
# return(Tnew)
240241
#
241242
#}
243+
#######################################################################################################################
242244

245+
updateT_multicore<-function(method="gini", G, W, Tk, lambdaT, tol, f0, lower=0, upper=1, ncores=2){
246+
247+
if(method=="gini"){
248+
update.func<-updateT_gini
249+
}
250+
251+
row.partition<-lapply(0:(ncores-1), function(chunck) (1:(ncol(W)%/%ncores))+chunck*(ncol(W)%/%ncores))
252+
253+
opt_res<-mclapply(row.partition, function(row.ind) update.func(G, W[,row.ind], Tk[,row.ind], lambdaT, tol, f0, lower=0, upper=1), mc.cores=ncores)
254+
#opt_res<-lapply(row.partition, function(row.ind) update.func(G, W[,row.ind], Tk[,row.ind], lambdaT, tol, f0, lower=0, upper=1))
255+
256+
Tk<-do.call("cbind", lapply(opt_res, el, 1))
257+
fk<-sum(sapply(opt_res, el, 2))
258+
avg_iter<-mean(sapply(opt_res, el, 3))
259+
260+
return(list(Tk,fk,avg_iter))
261+
}
243262

244263
#######################################################################################################################
245264
# Wrappers for the T update methods
@@ -393,7 +412,8 @@ onerun.alternate<-function(
393412
eps=1e-8,
394413
blocks=NULL,
395414
na.values=FALSE,
396-
verbose=TRUE){
415+
verbose=TRUE,
416+
ncores=1){
397417

398418
k<-ncol(T0)
399419
if(!is.null(Tfix)){
@@ -586,7 +606,11 @@ onerun.alternate<-function(
586606
G <- A %*% t(A); W <- A %*% Dt4T
587607
##%%%mexHCLasso(G,W,T',lambda);
588608

589-
res <- updateT_gini(G, W, Tstart, lambda, eps, f - norm_val, lower=qp.rangeT[1], upper=qp.rangeT[2]);
609+
if(ncores>1){
610+
res <- updateT_multicore("gini", G, W, Tstart, lambda, eps, f - norm_val, lower=qp.rangeT[1], upper=qp.rangeT[2], ncores=ncores);
611+
}else{
612+
res <- updateT_gini(G, W, Tstart, lambda, eps, f - norm_val, lower=qp.rangeT[1], upper=qp.rangeT[2]);
613+
}
590614
Trecov <- t(res[[1]]); ftemp<-res[[2]]
591615

592616
if(!is.null(Tfix)){
@@ -750,13 +774,86 @@ onerun.alternate<-function(
750774
}
751775

752776
rmse<-sqrt(sum((D-TT%*%A)^2)/ncol(D)/nrow(D))
753-
result<-list("T" = TT, "A" = A, "Fval" = Fval, "Conv" = Conv, "rmse"=rmse)
777+
result<-list("T" = TT, "A" = A, "Fval" = Fval, "Conv" = Conv, "rmse" = rmse)
754778
if(!is.null(Tfix)){
755779
result$Afix<-Afix
756780
}
757781
return(result)
758782
}
759783

784+
785+
786+
#######################################################################################################################
787+
# Implemenation of the alternating optimization scheme
788+
#######################################################################################################################
789+
#
790+
# a wrapper for cppTAfact
791+
#
792+
# cppTAfact - alternating optimization framework to solve the following
793+
# problem:
794+
# find T, A such that 0.5 * (||D - TA||_F)^2 + lambda ,
795+
# where D is a mxn matrix with entries between 0 and 1,
796+
# T is a mxr matrix with entries between 0 and 1,
797+
# A is a rxn matrix with nonnegative values and columns summing up
798+
# to 1.
799+
#
800+
# author: Nikita Vedeneev
801+
#
802+
# R port by Pavlo Lutsik
803+
#
804+
805+
onerun.cppTAfact<-function(
806+
D,
807+
T0,
808+
A0,
809+
Tfix=NULL,
810+
Tpartial=NULL,
811+
Tpartial.rows=NULL,
812+
Apartial=NULL,
813+
Apartial.cols=NULL,
814+
t.method="quadPen",
815+
lambda = 0,
816+
t.Poss = NULL,
817+
normD = NULL,
818+
itermax=100,
819+
qp.rangeT=c(0,1),
820+
#qp.Alower=rep(0,ncol(T0)),
821+
#qp.Aupper=rep(1,ncol(T0)),
822+
qp.Alower=NULL,
823+
qp.Aupper=NULL,
824+
emp.dim=500,
825+
emp.resample=TRUE,
826+
emp.vsf=1,
827+
emp.borders=c(0,1),
828+
trace=FALSE,
829+
eps=1e-8,
830+
blocks=NULL,
831+
na.values=FALSE,
832+
verbose=TRUE,
833+
ncores=1){
834+
835+
res<-cppTAfact(
836+
t(D), #- a transposed D matrix,
837+
t(T0), #-Ttinit - a transposed init for T matrix,
838+
A0, # - an initial value for A matrix,
839+
lambda,# - regularizer parameter (0.0 by default),
840+
itermax, #itersMax, - a max number of alternations (1000 by default),
841+
eps, #tol - tolerance for alternations (1e-8 by default),
842+
10*eps, #tolA - tolerance for opt wrt A (1e-7 by default),
843+
10*eps #tolT - tolerance for opt wrt T (1e-7 by default)
844+
)
845+
### TODO: modify cppTAfact to output the list is identical to the output of onerun.alternate
846+
#
847+
#cppTAfact returns a named list where:
848+
#res$Tt - a transposed estimated of T matrix,
849+
#res$A - an estimate of A matrix,
850+
#res$niter - a total number of alternations
851+
#res$objF - objective value at res$Tt and res$A
852+
#
853+
result<-list("T" = t(res$Tt), "A" = res$A, "Fval" = res$objF, "Conv" = res$niter, "rmse"= res$rmse)
854+
return(result)
855+
}
856+
760857
#######################################################################################################################
761858
#'
762859
#' factorize.alternate
@@ -823,7 +920,8 @@ onerun.alternate<-function(
823920
#' @export
824921
#'
825922
factorize.alternate<-function(D,
826-
k,
923+
k,
924+
method="MeDeCom.quadPen",
827925
t.method="quadPen",
828926
Tfix=NULL,
829927
Tpartial=NULL,
@@ -849,9 +947,10 @@ factorize.alternate<-function(D,
849947
ncores=1,
850948
pheno=NULL,
851949
na.values=FALSE,
950+
seed=NULL,
852951
verbosity=0L){
853952

854-
if(!t.method %in% c("integer", "empirical", "resample", "Hlasso", "optim", "quadPen")){
953+
if(!t.method %in% c("integer", "empirical", "resample", "Hlasso", "optim", "quadPen", "cppTAfact")){
855954
stop("supplied optimization method for T is not implemented")
856955
}
857956

@@ -944,9 +1043,11 @@ factorize.alternate<-function(D,
9441043
}
9451044
}
9461045

947-
9481046
if(init=="random"){
9491047
numruns <- opt
1048+
if(!is.null(seed)){
1049+
set.seed(seed)
1050+
}
9501051
}else if(init=="fixed"){
9511052
numruns <- 1
9521053
}else{
@@ -1008,8 +1109,15 @@ factorize.alternate<-function(D,
10081109
}
10091110
}
10101111
}
1112+
10111113
# solve the topic model
1012-
onerun <- onerun.alternate(
1114+
if(method == "MeDeCom.cppTAfact"){
1115+
onerun.function<-onerun.cppTAfact
1116+
}else{
1117+
onerun.function<-onerun.alternate
1118+
}
1119+
1120+
onerun <- onerun.function(
10131121
D, T0, A0,
10141122
Tfix = Tfix,
10151123
Tpartial=NULL,
@@ -1032,7 +1140,8 @@ factorize.alternate<-function(D,
10321140
eps=eps,
10331141
blocks=blocks,
10341142
na.values=na.values,
1035-
verbose=verbosity>1L);
1143+
verbose=verbosity>1L,
1144+
ncores=ncores);
10361145

10371146
# Ts[[run]]<<-onerun[[1]]
10381147
# As[[run]]<<-onerun[[2]]
@@ -1050,7 +1159,8 @@ factorize.alternate<-function(D,
10501159
# pcoordinates <- foreach(target = targets) %dopar%
10511160
# tryCatch(RnBeads::rnb.execute.dreduction(rnb.set, target = target), error = function(e) { e$message } )
10521161

1053-
if(ncores>1){
1162+
#if(ncores>1){
1163+
if(FALSE){
10541164
#require(doMC)
10551165
#registerDoMC(N_CORES)
10561166
#cl<-makeCluster(N_CORES)
@@ -1084,7 +1194,8 @@ factorize.alternate<-function(D,
10841194
Conv <- Convs[[idx]]
10851195

10861196
if(!na.values){
1087-
rmse<-sqrt(sum((D-TT%*%A)^2)/ncol(D)/nrow(D))
1197+
#rmse<-sqrt(sum((D-TT%*%A)^2)/ncol(D)/nrow(D))
1198+
rmse <- Fval - lambda*sum(TT-TT^2)
10881199
}else{
10891200
diff_mat<-D-TT%*%A
10901201
rmse<-sqrt(sum(diff_mat[!is.na(diff_mat)]^2)/ncol(D)/nrow(D))
@@ -1614,4 +1725,4 @@ factorize.exact<-function(D, r, V=c(0,1), opt=NULL, zeroprob = (0 %in% V)){
16141725
return(list(T=T, A=A, status=status))
16151726

16161727
}
1617-
#######################################################################################################################
1728+
#######################################################################################################################

0 commit comments

Comments
 (0)