#######################################################
### Simulate Yeast Data
### initial files external and X.raw are created in
### syntrencode.R
#######################################################

rm(list=ls())
require(Matrix)
require(MASS)
require(cluster)
require(ROCR)
require(superpc)


Nsample<-400

# read data
external=read.table("external.txt",header=TRUE)
X.raw=read.table("sampled_bionoise_0.10_expnoise_0.10_inputnoise_0.10_burnin_1000_experiments_1_samples_400_unnormalized_dataset.txt",header=TRUE)
X=X.raw[,-1]
rownames(X)=X.raw[,1]
total=nrow(X)

## standardize
X.t=apply(X,1,function(tt) {
  if (length(unique(tt))==1) {rnorm(Nsample,mean=0,sd=.00001)}
    else{(tt-mean(tt))/sd(tt)}
})

X.t[is.na(X.t)]=rnorm(1,mean=0,sd=.001)

FH.truth=which(!is.na(match(rownames(X),(c(external)$Regulator)))) #levels
nFH=30
nI=300
NH.truth=c(1:nFH)[-FH.truth]
NI.truth<-c((nFH+1):total)
Nbp<-100
delta<-1
truth<-c(rep(1,nFH),rep(0,total-nFH))
a<-nFH/(total-nFH)

Wmatrix.o <-function(data,delta,gamma,n)  #data$x: p*n
{       x<-t(data$x)
        y<-data$y
        
        if (is.null(n)) n<-nrow(data$x)
        left <- cor(x)
        diag(left) <- 0
        covs <- cor(x, y)
        right <-(covs%*%t(covs))
        W<-( (abs(left)^delta)*sign(left) ) * ( (abs(right)^gamma)*sign(right) )
        # W<-apply(W, 2, function(x) {ifelse(x < 0, 0, x)})   ##make negative to zero
        eig<-eigen(W)
        list(eigenvalues=eig$values,eigenvectors=eig$vectors,W=W,xmat=left,right=right,covs=covs)
}


get.par<-function(rm,SNR,alpha)
{
  beta<-sqrt(rm/(1-rm))
  sig.sq<-(alpha/SNR)^2
  rh<-alpha/sqrt(alpha^2+sig.sq)
  ri<-alpha*beta/sqrt((1+beta^2)*(alpha^2+sig.sq))
  return(list(alpha=alpha,beta=beta,v.e=sig.sq,ri=ri,rh=rh))
  ## In presence of func indep, sig.sq=v.e+v.fi
}
options(stringsAsFactors = FALSE)

###########

S<-Nsample
SNR.seq=seq(.2,.3,.1)
Nc<-length(SNR.seq)
FPR<-TPR<-array(NA,dim=c(6,Nc))

for (i in 1:Nc)
{
  par<-get.par(rm=.3,SNR=SNR.seq[i],alpha=1) #
  alpha=par$alpha
  beta=par$beta
  V.e=par$v.e
  
  set.seed(12345)
  
  if (nrow(external)==1) y<-as.numeric(as.character((alpha*X.t[,FH.truth])+rnorm(S,0,sd=sqrt(V.e))))
  if (nrow(external)>1) y<-as.numeric(as.character(rowSums(alpha*X.t[,FH.truth])+rnorm(S,0,sd=sqrt(V.e))))
  
  
  data<-list(x=as.matrix(t(X.t)),y=c(y))
  #X:p*n
  cor.y=cor(t(data$x),data$y)
  Wall=Wmatrix.o(data,delta=1,gamma=1,n=NULL)
  ee=Wall$eigenvectors[,1]
  
  # K 6
  k.fit <- kmeans(x=data$x,centers=2,iter.max=100,nstart=25)
  ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
  u<-(k.fit$cluster==which.max(ModuleSignificance))*1
  pred <- prediction( u*1, truth)
  
  perf1 <- performance(pred, "tpr", "fpr")
  FPR[1,i]<-unlist(perf1@x.values)[2]
  TPR[1,i]<-unlist(perf1@y.values)[2]
  
  # WGC.cor: 6
  pam=pam(as.dist((1-abs(cor(t(data$x))))), 2)  ##a measure of dissimilarity
  ModuleSignificance<-tapply(abs( cor.y), pam$clustering, mean, na.rm=T)
  u<-(pam$clustering==which.max(ModuleSignificance))*1  
  
  pred <- prediction( u*1, truth)
  perf1 <- performance(pred, "tpr", "fpr")
  FPR[2,i]<-unlist(perf1@x.values)[2]
  TPR[2,i]<-unlist(perf1@y.values)[2]
  
  # WGC.cor^6: 6
  pam=pam(as.dist((1-abs(cor(t(data$x)))^6)), 2)   ##a measure of dissimilarity
  ModuleSignificance<-tapply(abs( cor.y), pam$clustering, mean, na.rm=T)
  u<-(pam$clustering==which.max(ModuleSignificance))*1  
  
  pred <- prediction( u*1, truth)
  perf1 <- performance(pred, "tpr", "fpr")
  FPR[3,i]<-unlist(perf1@x.values)[2]
  TPR[3,i]<-unlist(perf1@y.values)[2]
  
  # # superpc
  train.obj<- superpc.train(data, type="regression")
  cv.obj<-superpc.cv(train.obj, data)
  th<-cv.obj$thresholds[which.max(cv.obj$scor[1,])]  ## thresholds for the 1st component
  u <- (abs(train.obj$feature.scores) > th)*1
  pred <- prediction( u*1, truth)
  perf1 <- performance(pred, "tpr", "fpr")
  FPR[4,i]<-unlist(perf1@x.values)[2]
  TPR[4,i]<-unlist(perf1@y.values)[2]
  
  ###############################  W : original ############################## 
  # W e
  Wall=Wmatrix.o(data,delta=1,gamma=1,n=NULL)
  ee=Wall$eigenvectors[,1]
  
  k.fit=kmeans(ee,2,iter.max=100,nstart=25)
  ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
  
  u<-(k.fit$cluster==which.max(ModuleSignificance))*1
  pred <- prediction( u*1, truth)
  perf1 <- performance(pred, "tpr", "fpr")
  FPR[5,i]<-unlist(perf1@x.values)[2]
  TPR[5,i]<-unlist(perf1@y.values)[2]
  
  
  # W boostrap
  bt.func.o<-function(data){  # data$x: p*n
    bt<-sample(1:dim(data$x)[2],replace = TRUE)
    datanew<-list(x=data$x[,bt],y=data$y[bt])
    e.bt<-Wmatrix.o(datanew,delta=1,gamma=1,n=NULL)$eigenvectors
    r.bt<-apply(e.bt,2,function(x) cor(x,ee))
    xi<-which.max(abs(r.bt))
    return(e.bt[,xi]*sign(r.bt[xi]))
  }
  eigen.bt<-replicate(Nbp,bt.func.o(data)) #  total*Nbp
  mean.bt<-apply(eigen.bt,1,mean)
  k.fit=kmeans(mean.bt,2,iter.max=100,nstart=25)
  ModuleSignificance<-tapply(abs( cor.y), k.fit$cluster, mean, na.rm=T)
  
  u<-(k.fit$cluster==which.max(ModuleSignificance))*1
  pred <- prediction( u*1, truth)
  perf1 <- performance(pred, "tpr", "fpr")
  FPR[6,i]<-unlist(perf1@x.values)[2]
  TPR[6,i]<-unlist(perf1@y.values)[2]
}

names=c("K","WGC","WGC6","superpc","W.e","W.bt")
rownames(TPR)<-names
colnames(TPR)<-SNR.seq
rownames(FPR)<-names
colnames(FPR)<-SNR.seq
FDR<-1/(1 + a*TPR/FPR)

all<-cbind(TPR,FPR,FDR)

## every simulation: every 6 rows correspond to 6 methods, first Nc columns correspond to TPR, then FPR and FDR
write.table(all,file="Yeast_results.txt",col.names=FALSE,row.names=FALSE,append=TRUE)

write.table(all,file="/ifs/user/xiweiche/syn/out/Yeast_results.txt",col.names=FALSE,row.names=FALSE,append=TRUE)




