##########################################################
# Syntren R Code #########################################
##########################################################

library(snow)
library(snowfall)
source("sourceROCR.R")

Wmatrix.o <-function(data,delta,gamma,n)  #data$x: p*n
{       x<-t(data$x)
        y<-data$y
        
        if (is.null(n)) n<-nrow(data$x)
        left <- cor(x)
        diag(left) <- 0
        covs <- cor(x, y)
        right <-(covs%*%t(covs))
        W<-( (abs(left)^delta)*sign(left) ) * ( (abs(right)^gamma)*sign(right) )
        W.p<-apply(W, 2, function(x) {ifelse(x < 0, 0, x)})   ##make negative to zero
        eig<-eigen(W)
        eig.p<-eigen(W.p)
        list(eigenvectors=eig$vectors,eigenvectors.p=eig.p$vectors)
}


get.par<-function(rm,SNR,alpha)
{
  beta<-sqrt(rm/(1-rm))
  sig.sq<-(alpha/SNR)^2
  rh<-alpha/sqrt(alpha^2+sig.sq)
  ri<-alpha*beta/sqrt((1+beta^2)*(alpha^2+sig.sq))
  return(list(alpha=alpha,beta=beta,v.e=sig.sq,ri=ri,rh=rh))
  ## In presence of func indep, sig.sq=v.e+v.fi
}

##############################################
print("args = commandArgs(trailingOnly = TRUE);");
args = commandArgs(trailingOnly = TRUE);

print("ncpus = args[1];");
ncpus = args[1];

print("outFile <- paste0(make.names(date()), \".Rout\")");
outFile <- paste0(make.names(date()), ".Rout")


print("sink(outFile, split = TRUE)");
sink(outFile, split = TRUE)

# ------------------------------------------------------------------------
# initialize parallel mode using sockets and command-line args
# ------------------------------------------------------------------------
print("sfInit( parallel=TRUE, cpus=ncpus , type=\"SOCK\", socketHosts=c(args[-1]));");
sfInit( parallel=TRUE, cpus=ncpus , type="SOCK", socketHosts=c(args[c(-1,-2)]));


sfLibrary(superpc)
sfLibrary(cluster)
sfLibrary(MASS)
sfLibrary(Matrix)
sfLibrary(ROCR)
sfExportAll()


results.all <- sfSapply(c(1,2,10:15), function(rep.num){
  simul.name=paste0("Eco",rep.num,collapse="")
  setwd(paste0("/ifs/user/xiweiche/syn/results/",simul.name))
  SNR.seq=seq(.2,.6,.1)
  Nbp<-100
  Nrep<-50
  Nsample<-400
  name="nn30_nbgr300_hop0.3_bionoise0.1_expnoise0.1_corrnoise0.1_clustAdd_"
  external=read.table(paste0(name,"external.txt",collapse=""),header=TRUE)
  X.raw=read.table(paste0(name,"unnormalized_dataset.txt",collapse=""),header=TRUE)
  #X.raw=read.table(paste0(name,"maxExpr1_dataset.txt",collapse=""),header=TRUE)
  
  X=X.raw[,-1]
  rownames(X)=X.raw[,1]
  total=nrow(X)
  
  FH.truth=which(!is.na(match(rownames(X),(c(external)$Regulator)))) #levels
  nFH=30
  nI=300
  NH.truth=c(1:nFH)[-FH.truth]
  NI.truth<-c((nFH+1):total)
  
  
  delta<-1
  truth<-c(rep(1,nFH),rep(0,total-nFH))
  a<-nFH/(total-nFH)
  
  options(stringsAsFactors = FALSE)
  
  ###########
  
  S<-Nsample
  Nc<-length(SNR.seq)
  FPR<-TPR<-array(NA,dim=c(Nrep,8,Nc))
  
  for (i in 1:Nc)
  {
    par<-get.par(rm=.3,SNR=SNR.seq[i],alpha=1) #
    alpha=par$alpha
    beta=par$beta
    V.e=par$v.e
    
    for (j in 1:Nrep)
    {
      set.seed(j)
      X.sub=X[,seq(j,by=Nrep,length.out=Nsample)]
      X.t=t(X.sub)
      
      if (nrow(external)==1) y<-as.numeric(as.character((alpha*X.t[,FH.truth]/sd(X.t[,FH.truth]))+rnorm(S,0,sd=sqrt(V.e))))
      if (nrow(external)>1) y<-as.numeric(as.character(rowSums(alpha*X.t[,FH.truth]/sd(X.t[,FH.truth]))+rnorm(S,0,sd=sqrt(V.e))))
      
      data<-list(x=as.matrix(X.sub),y=c(y))
      #X:p*n
      cor.y=cor(t(data$x),data$y)
      data$x[is.na(cor.y),]=data$x[is.na(cor.y),]+rnorm(S,mean=0,sd=.001)
      cor.y=cor(t(data$x),data$y)
      
      Wall=Wmatrix.o(data,delta=1,gamma=1,n=NULL)
      ee=Wall$eigenvectors[,1]
      ee.p=Wall$eigenvectors.p[,1]
      
      # K 6
      k.fit <- kmeans(x=data$x,centers=2,iter.max=100,nstart=25)
      ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
      u<-(k.fit$cluster==which.max(ModuleSignificance))*1
      pred <- prediction( u*1, truth)
      
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,1,i]<-unlist(perf1@x.values)[2]
      TPR[j,1,i]<-unlist(perf1@y.values)[2]
      
      # WGC.cor: 6
      pam=pam(as.dist((1-abs(cor(t(data$x))))), 2)  ##a measure of dissimilarity
      ModuleSignificance<-tapply(abs( cor.y), pam$clustering, mean, na.rm=T)
      u<-(pam$clustering==which.max(ModuleSignificance))*1  
      
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,2,i]<-unlist(perf1@x.values)[2]
      TPR[j,2,i]<-unlist(perf1@y.values)[2]
      
      # WGC.cor^6: 6
      pam=pam(as.dist((1-abs(cor(t(data$x)))^6)), 2)   ##a measure of dissimilarity
      ModuleSignificance<-tapply(abs( cor.y), pam$clustering, mean, na.rm=T)
      u<-(pam$clustering==which.max(ModuleSignificance))*1  
      
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,3,i]<-unlist(perf1@x.values)[2]
      TPR[j,3,i]<-unlist(perf1@y.values)[2]
      
      # # superpc
      train.obj<- superpc.train(data, type="regression")
      cv.obj<-superpc.cv(train.obj, data)
      th<-cv.obj$thresholds[which.max(cv.obj$scor[1,])]  ## thresholds for the 1st component
      u <- (abs(train.obj$feature.scores) > th)*1
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,4,i]<-unlist(perf1@x.values)[2]
      TPR[j,4,i]<-unlist(perf1@y.values)[2]
      
      ###############################  W : original ############################## 
      # W e
      k.fit=kmeans(ee.p,2,iter.max=100,nstart=25)
      ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
      
      u<-(k.fit$cluster==which.max(ModuleSignificance))*1
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,5,i]<-unlist(perf1@x.values)[2]
      TPR[j,5,i]<-unlist(perf1@y.values)[2]
      
      k.fit=kmeans(ee,2,iter.max=100,nstart=25)
      ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
      
      u<-(k.fit$cluster==which.max(ModuleSignificance))*1
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,6,i]<-unlist(perf1@x.values)[2]
      TPR[j,6,i]<-unlist(perf1@y.values)[2]
      
      # W boostrap
      bt.func.p<-function(data){  # data$x: p*n
        bt<-sample(1:dim(data$x)[2],replace = TRUE)
        datanew<-list(x=data$x[,bt],y=data$y[bt])
        junk<-Wmatrix.o(datanew,delta=1,gamma=1,n=NULL)
        
        e.bt<-junk$eigenvectors.p
        r.bt<-apply(e.bt,2,function(x) cor(x,ee))
        xi<-which.max(abs(r.bt))
        bt.p<-e.bt[,xi]*sign(r.bt[xi])
        
        e.bt<-junk$eigenvectors
        r.bt<-apply(e.bt,2,function(x) cor(x,ee))
        xi<-which.max(abs(r.bt))
        bt.o<-e.bt[,xi]*sign(r.bt[xi])
        return(cbind(bt.p,bt.o))
      }
      eigen.bt<-replicate(Nbp,bt.func.p(data)) #  total*Nbp
      
      mean.bt<-apply(eigen.bt[,1,],1,mean)
      k.fit=kmeans(mean.bt,2,iter.max=100,nstart=25)
      ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
      u<-(k.fit$cluster==which.max(ModuleSignificance))*1
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,7,i]<-unlist(perf1@x.values)[2]
      TPR[j,7,i]<-unlist(perf1@y.values)[2]
      
      mean.bt<-apply(eigen.bt[,2,],1,mean)
      k.fit=kmeans(mean.bt,2,iter.max=100,nstart=25)
      ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
      u<-(k.fit$cluster==which.max(ModuleSignificance))*1
      pred <- prediction( u*1, truth)
      perf1 <- performance(pred, "tpr", "fpr")
      FPR[j,8,i]<-unlist(perf1@x.values)[2]
      TPR[j,8,i]<-unlist(perf1@y.values)[2]
      
      print(paste0("i=",i,", j=",j))
    }
    
  }
  FDR<-1/(1 + a*TPR/FPR)
  
  names=c("K","WGC","WGC6","superpc","W.e.pos","W.e","W.bt.pos","W.bt")
  
  TPR.m<-apply(TPR,3,function(x) apply(x,2,function(z) mean(z,na.rm=TRUE)))
  FPR.m<-apply(FPR,3,function(x) apply(x,2,function(z) mean(z,na.rm=TRUE)))
  FDR.m<-apply(FDR,3,function(x) apply(x,2,function(z) mean(z,na.rm=TRUE)))
  
  TPR.sd<-apply(TPR,3,function(x) apply(x,2,function(z) sd(z,na.rm=TRUE)))
  FPR.sd<-apply(FPR,3,function(x) apply(x,2,function(z) sd(z,na.rm=TRUE)))
  FDR.sd<-apply(FDR,3,function(x) apply(x,2,function(z) sd(z,na.rm=TRUE)))
  
  
  rownames(TPR.m)<-rownames(FPR.m)<-rownames(FDR.m)<-names
  colnames(TPR.m)<-colnames(FPR.m)<-colnames(FDR.m)<-sapply(SNR.seq,function(x) paste0("SNR=",x))
  rownames(TPR.sd)<-rownames(FPR.sd)<-rownames(FDR.sd)<-names
  colnames(TPR.sd)<-colnames(FPR.sd)<-colnames(FDR.sd)<-sapply(SNR.seq,function(x) paste0("SNR=",x))
  
  mean.all<-list(TPR.m,FPR.m,FDR.m)
  sd.all<-list(TPR.sd,FPR.sd,FDR.sd)
  
  save(mean.all,file=paste0("/ifs/user/xiweiche/syn/out/Mean_",simul.name,".RData"))
  save(sd.all,file=paste0("/ifs/user/xiweiche/syn/out/Sd_",simul.name,".RData"))
  
  sink(paste0("/ifs/user/xiweiche/syn/out/Mean_",simul.name,".csv"), type="output")
  invisible(lapply(names(result.mean), 
                   function(x) { print(x)
                                 dput(write.csv(mean.all[[x]])) } ))
  sink()
  
  sink(paste0("/ifs/user/xiweiche/syn/out/Sd_",simul.name,".csv"), type="output")
  invisible(lapply(names(result.sd), 
                   function(x) { print(x)
                                 dput(write.csv(sd.all[[x]])) } ))
  sink()
})

sfStop()





