### Build a simulation that looks at
### LHS/RHS kFWER using truth/parametric and empirical kFWERs
### for a variety of k's.

### use the functions from BUMsims.R

### for the empirical kFWERs keep track
### of all the estimates and show the shading
### between the 5th and 95th percentiles

rm(list=ls())

source("KFWERfuns.R")
library(locfdr)
library(MASS)

# 500 genes,
# 1000 MC simulations

N=500
nMC = 1000
grid = seq(-5,5, length=500)
pi0=.95

# Make figure showing distribution of z scores.#############################
 myind = rbinom(N,size=1,prob=pi0)
  N0 = sum(myind)
  myp=c()
  myp[myind==1] = runif(N0)
  myp[myind==0] = rbeta((N-N0),shape1=.3, shape2=5)
# now make LHSsideprob and RHSsideprob dictate whether the alternatives
# are on the LHS or the RHS, i.e. large negative test stats or large positive test
# stats, see page 30 of Efron textbook - equation 3.10
LHSsideprob = 0.50
RHSsideprob = 1-LHSsideprob
LHSorRHSind = rbinom(length(myp[myind==0]),size=1,prob=LHSsideprob)
myzNULL = qnorm(myp)
mypsig = myp[myind==0]
myzLHS = qnorm(mypsig[LHSorRHSind==1]/2)
myzRHS = qnorm(1-mypsig[LHSorRHSind==0]/2)

myz = c(myzNULL,myzLHS,myzRHS)
#myz = qnorm(myp)


postscript("BUMzscoreshist.ps", paper="letter")

truehist(myz, xlab="z values", ylim=c(0,.5), prob=T,nbins=50,xlim=c(-5,5))
par = dnorm(grid)
lines(grid,par,type="l", lwd=2)

emp = EMPf0hat(myz)
test=emp
scalef=trapz(test$x[test$x<10],test$y[test$x<10])
lines(emp$x,emp$y/scalef, type="l", col="red",lwd=2)

dev.off()
system("ps2pdf BUMzscoreshist.ps")
###########################################################################


LHS.MCresults = RHS.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
EMPLHS.K1.MCresults = EMPRHS.K1.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
EMPLHS.K5.MCresults = EMPRHS.K5.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
EMPLHS.K20.MCresults = EMPRHS.K20.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
EMPLHS.K50.MCresults = EMPRHS.K50.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))


PARLHS.K1.MCresults = PARRHS.K1.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
PARLHS.K5.MCresults = PARRHS.K5.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
PARLHS.K20.MCresults = PARRHS.K20.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))
PARLHS.K50.MCresults = PARRHS.K50.MCresults = matrix(NA, nrow=nMC, ncol=length(grid))


for(i in 1:nMC){
  
  myind = rbinom(N,size=1,prob=pi0)
  N0 = sum(myind)
  myp=c()
  myp[myind==1] = runif(N0)
  myp[myind==0] = rbeta((N-N0),shape1=.3, shape2=5)
  myz = qnorm(myp)

  for(j in 1:length(grid)){
    LHS.MCresults[i,j] = sum(myz[myind==1] <= grid[j])
    RHS.MCresults[i,j] = sum(myz[myind==1] >= grid[j])
    EMPLHS.K1.MCresults[i,j] = sEMPfullkfwerfunLHS(z=grid[j],k=1,N=N,zz=myz)
    EMPRHS.K1.MCresults[i,j] = sEMPfullkfwerfunRHS(z=grid[j],k=1,N=N,zz=myz)
    EMPLHS.K5.MCresults[i,j] = sEMPfullkfwerfunLHS(z=grid[j],k=5,N=N,zz=myz)
    EMPRHS.K5.MCresults[i,j] = sEMPfullkfwerfunRHS(z=grid[j],k=5,N=N,zz=myz)
    EMPLHS.K20.MCresults[i,j] = sEMPfullkfwerfunLHS(z=grid[j],k=20,N=N,zz=myz)
    EMPRHS.K20.MCresults[i,j] = sEMPfullkfwerfunRHS(z=grid[j],k=20,N=N,zz=myz)
    EMPLHS.K50.MCresults[i,j] = sEMPfullkfwerfunLHS(z=grid[j],k=50,N=N,zz=myz)
    EMPRHS.K50.MCresults[i,j] = sEMPfullkfwerfunRHS(z=grid[j],k=50,N=N,zz=myz)

   PARLHS.K1.MCresults[i,j] = sPARfullkfwerfunLHS(z=grid[j],k=1,N=N,zz=myz)
    PARRHS.K1.MCresults[i,j] = sPARfullkfwerfunRHS(z=grid[j],k=1,N=N,zz=myz)
    PARLHS.K5.MCresults[i,j] = sPARfullkfwerfunLHS(z=grid[j],k=5,N=N,zz=myz)
    PARRHS.K5.MCresults[i,j] = sPARfullkfwerfunRHS(z=grid[j],k=5,N=N,zz=myz)
    PARLHS.K20.MCresults[i,j] = sPARfullkfwerfunLHS(z=grid[j],k=20,N=N,zz=myz)
    PARRHS.K20.MCresults[i,j] = sPARfullkfwerfunRHS(z=grid[j],k=20,N=N,zz=myz)
    PARLHS.K50.MCresults[i,j] = sPARfullkfwerfunLHS(z=grid[j],k=50,N=N,zz=myz)
    PARRHS.K50.MCresults[i,j] = sPARfullkfwerfunRHS(z=grid[j],k=50,N=N,zz=myz)

          }
  print(i)
  }

#### Now get the quantiles for the empirical kFWER LHS and kFWER RHS
 
LHSk1ninetyfivepercent =LHSk1fivepercent = RHSk1ninetyfivepercent = RHSk1fivepercent =c()
LHSk5ninetyfivepercent =LHSk5fivepercent = RHSk5ninetyfivepercent = RHSk5fivepercent =c()
LHSk20ninetyfivepercent =LHSk20fivepercent = RHSk20ninetyfivepercent = RHSk20fivepercent =c()
LHSk50ninetyfivepercent =LHSk50fivepercent = RHSk50ninetyfivepercent = RHSk50fivepercent =c()


for(i in 1:dim(EMPLHS.K1.MCresults)[2]){
#k =1
LHSmyvec = EMPLHS.K1.MCresults[,i]
RHSmyvec = EMPRHS.K1.MCresults[,i]

LHSk1fivepercent[i] = quantile(LHSmyvec,.05, na.rm=T)
LHSk1ninetyfivepercent[i] = quantile(LHSmyvec,.95, na.rm=T)
RHSk1fivepercent[i] = quantile(RHSmyvec,.05, na.rm=T)
RHSk1ninetyfivepercent[i] = quantile(RHSmyvec,.95, na.rm=T)

#k =5
LHSmyvec = EMPLHS.K5.MCresults[,i]
RHSmyvec = EMPRHS.K5.MCresults[,i]

LHSk5fivepercent[i] = quantile(LHSmyvec,.05, na.rm=T)
LHSk5ninetyfivepercent[i] = quantile(LHSmyvec,.95, na.rm=T)
RHSk5fivepercent[i] = quantile(RHSmyvec,.05, na.rm=T)
RHSk5ninetyfivepercent[i] = quantile(RHSmyvec,.95, na.rm=T)

# k = 20
LHSmyvec = EMPLHS.K20.MCresults[,i]
RHSmyvec = EMPRHS.K20.MCresults[,i]

LHSk20fivepercent[i] = quantile(LHSmyvec,.05, na.rm=T)
LHSk20ninetyfivepercent[i] = quantile(LHSmyvec,.95, na.rm=T)
RHSk20fivepercent[i] = quantile(RHSmyvec,.05, na.rm=T)
RHSk20ninetyfivepercent[i] = quantile(RHSmyvec,.95, na.rm=T)

# k = 50
LHSmyvec = EMPLHS.K50.MCresults[,i]
RHSmyvec = EMPRHS.K50.MCresults[,i]

LHSk50fivepercent[i] = quantile(LHSmyvec,.05, na.rm=T)
LHSk50ninetyfivepercent[i] = quantile(LHSmyvec,.95, na.rm=T)
RHSk50fivepercent[i] = quantile(RHSmyvec,.05, na.rm=T)
RHSk50ninetyfivepercent[i] = quantile(RHSmyvec,.95, na.rm=T)
}


# add Sidak/Bonf cutpoints

f.bin.jcm = function(N, k, con.rate = .95){
   f. = function(alpha.tilde) pbinom(k-1,N,alpha.tilde) - con.rate  
   uniroot(f.,interval=c(.00001/N,1))$root
}

returnpcut = function(N,K, alpha){
 f.bin.jcm(N,K, con.rate=1-alpha)
}

##############################################################
##############################################################
### Now make figure
##############################################################
##############################################################

# for k = 1 ############################################ 

k=1
LHSestkFWER = colMeans(PARLHS.K1.MCresults)
RHSestkFWER = colMeans(PARRHS.K1.MCresults)

LHSkfwer=RHSkfwer=c()
for(i in 1:dim(LHS.MCresults)[2]){
  LHSkfwer[i] = mean(LHS.MCresults[,i] >= k)
  RHSkfwer[i] = mean(RHS.MCresults[,i] >= k)
}

postscript("kfwerfigWITHEMP.ps", paper="letter")
par(mfrow=c(2,2))

plot(grid,LHSkfwer,type="n", main="k=1", xlab="z-values", ylab = "k-FWER")
axis(side=4)
# and EMP's and shading
polygon(c(grid[grid<0],sort(grid[grid<0], decreasing=T)), c(LHSk1ninetyfivepercent[grid<0],sort(LHSk1fivepercent[grid<0], decreasing=T)), col="gray", border=F)
polygon(c(sort(grid[grid>0], decreasing=T),grid[grid>0]), c(sort(RHSk1ninetyfivepercent[grid>0], decreasing=F),RHSk1fivepercent[grid>0]), col="gray", border=F)

lines(grid[grid<0], LHSkfwer[grid<0],col=1,lwd=2, lty=1)
lines(grid[grid>0], RHSkfwer[grid>0],col=2,lwd=2, lty=1)
lines(grid[grid<0], LHSestkFWER[grid<0],col=1,lwd=2, lty=2)
lines(grid[grid>0], RHSestkFWER[grid>0],col=2,lwd=2, lty=2)

###########
pboncut = 1*0.2/N
psidakcut = returnpcut(N=N,alpha=0.2,K=1)

LHSzboncut= qnorm(pboncut)
LHSzsidakcut = qnorm(psidakcut)

RHSzboncut= qnorm(1-pboncut)
RHSzsidakcut = qnorm(1-psidakcut)

# boncut line LHS ################################
#ycut = min(LHSkfwer[grid>LHSzboncut & grid<0])
#segments(x0 = LHSzboncut, x1=LHSzboncut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzboncut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)
# sidak cut LHS ##################################
#ycut = min(LHSkfwer[grid>LHSzsidakcut & grid<0])
#segments(x0 = LHSzsidakcut, x1=LHSzsidakcut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzsidakcut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)

# boncut line RHS ###############################
#ycut = min(RHSkfwer[grid>RHSzboncut & grid<0])
#segments(x0 = RHSzboncut, x1=RHSzboncut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=RHSzboncut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)
# sidak cut RHS ##################################
#ycut = min(RHSkfwer[grid>RHSzsidakcut & grid<0])
#segments(x0 = RHSzsidakcut, x1=RHSzsidakcut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=RHSzsidakcut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)





# for k = 5 ############################################

k=5
LHSestkFWER = colMeans(PARLHS.K5.MCresults)
RHSestkFWER = colMeans(PARRHS.K5.MCresults)


LHSkfwer=RHSkfwer=c()
for(i in 1:dim(LHS.MCresults)[2]){
  LHSkfwer[i] = mean(LHS.MCresults[,i] >= k)
  RHSkfwer[i] = mean(RHS.MCresults[,i] >= k)
}

plot(grid,LHSkfwer,type="n", main="k=5", xlab="z-values", ylab = "k-FWER")
# and EMP's and shading
polygon(c(grid[grid<0],sort(grid[grid<0], decreasing=T)), c(LHSk5ninetyfivepercent[grid<0],sort(LHSk5fivepercent[grid<0], decreasing=T)), col="gray", border=F)
polygon(c(sort(grid[grid>0], decreasing=T),grid[grid>0]), c(sort(RHSk5ninetyfivepercent[grid>0], decreasing=F),RHSk5fivepercent[grid>0]), col="gray", border=F)

lines(grid[grid<0], LHSkfwer[grid<0],col=1,lwd=2, lty=1)
lines(grid[grid>0], RHSkfwer[grid>0],col=2,lwd=2, lty=1)
lines(grid[grid<0], LHSestkFWER[grid<0],col=1,lwd=2, lty=2)
lines(grid[grid>0], RHSestkFWER[grid>0],col=2,lwd=2, lty=2)

###########
pboncut = 5*0.2/N
psidakcut = returnpcut(N=N,alpha=0.2,K=5)

LHSzboncut= qnorm(pboncut)
LHSzsidakcut = qnorm(psidakcut)

RHSzboncut= qnorm(1-pboncut)
RHSzsidakcut = qnorm(1-psidakcut)

# boncut line #################################
#ycut = min(LHSkfwer[grid>LHSzboncut & grid<0])
#segments(x0 = LHSzboncut, x1=LHSzboncut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzboncut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)
# sidak cut ###################################
#ycut = min(LHSkfwer[grid>LHSzsidakcut & grid<0])
#segments(x0 = LHSzsidakcut, x1=LHSzsidakcut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzsidakcut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)


# for k = 20 ###########################################

k=20
LHSestkFWER = colMeans(PARLHS.K20.MCresults)
RHSestkFWER = colMeans(PARRHS.K20.MCresults)

LHSkfwer=RHSkfwer=c()
for(i in 1:dim(LHS.MCresults)[2]){
  LHSkfwer[i] = mean(LHS.MCresults[,i] >= k)
  RHSkfwer[i] = mean(RHS.MCresults[,i] >= k)
}

plot(grid,LHSkfwer,type="n", main="k=20", xlab="z-values", ylab = "k-FWER")
# and EMP's and shading
polygon(c(grid[grid<0],sort(grid[grid<0], decreasing=T)), c(LHSk20ninetyfivepercent[grid<0],sort(LHSk20fivepercent[grid<0], decreasing=T)), col="gray", border=F)
polygon(c(sort(grid[grid>0], decreasing=T),grid[grid>0]), c(sort(RHSk20ninetyfivepercent[grid>0], decreasing=F),RHSk20fivepercent[grid>0]), col="gray", border=F)

lines(grid[grid<0], LHSkfwer[grid<0],col=1,lwd=2, lty=1)
lines(grid[grid>0], RHSkfwer[grid>0],col=2,lwd=2, lty=1)
lines(grid[grid<0], LHSestkFWER[grid<0],col=1,lwd=2, lty=2)
lines(grid[grid>0], RHSestkFWER[grid>0],col=2,lwd=2, lty=2)

###########
pboncut = 20*0.2/N
psidakcut = returnpcut(N=N,alpha=0.2,K=20)

LHSzboncut= qnorm(pboncut)
LHSzsidakcut = qnorm(psidakcut)

RHSzboncut= qnorm(1-pboncut)
RHSzsidakcut = qnorm(1-psidakcut)

# boncut line ##########################################
#ycut = min(LHSkfwer[grid>LHSzboncut & grid<0])
#segments(x0 = LHSzboncut, x1=LHSzboncut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzboncut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)
# sidak cut ############################################
#ycut = min(LHSkfwer[grid>LHSzsidakcut & grid<0])
#segments(x0 = LHSzsidakcut, x1=LHSzsidakcut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzsidakcut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)


# for k = 50 ##########################################

k=50
LHSestkFWER = colMeans(PARLHS.K50.MCresults)
RHSestkFWER = colMeans(PARRHS.K50.MCresults)

LHSkfwer=RHSkfwer=c()
for(i in 1:dim(LHS.MCresults)[2]){
  LHSkfwer[i] = mean(LHS.MCresults[,i] >= k)
  RHSkfwer[i] = mean(RHS.MCresults[,i] >= k)
}

plot(grid,LHSkfwer,type="n", main="k=50", xlab="z-values", ylab = "k-FWER")
# and EMP's and shading
polygon(c(grid[grid<0],sort(grid[grid<0], decreasing=T)), c(LHSk50ninetyfivepercent[grid<0],sort(LHSk50fivepercent[grid<0], decreasing=T)), col="gray", border=F)
polygon(c(sort(grid[grid>0], decreasing=T),grid[grid>0]), c(sort(RHSk50ninetyfivepercent[grid>0], decreasing=F),RHSk50fivepercent[grid>0]), col="gray", border=F)

lines(grid[grid<0], LHSkfwer[grid<0],col=1,lwd=2, lty=1)
lines(grid[grid>0], RHSkfwer[grid>0],col=2,lwd=2, lty=1)
lines(grid[grid<0], LHSestkFWER[grid<0],col=1,lwd=2, lty=2)
lines(grid[grid>0], RHSestkFWER[grid>0],col=2,lwd=2, lty=2)

###########
pboncut = 50*0.2/N
psidakcut = returnpcut(N=N,alpha=0.2,K=50)

LHSzboncut= qnorm(pboncut)
LHSzsidakcut = qnorm(psidakcut)

RHSzboncut= qnorm(1-pboncut)
RHSzsidakcut = qnorm(1-psidakcut)

# boncut line #################################
#ycut = min(LHSkfwer[grid>LHSzboncut & grid<0])
#segments(x0 = LHSzboncut, x1=LHSzboncut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzboncut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)
# sidak cut #################################
#ycut = min(LHSkfwer[grid>LHSzsidakcut & grid<0])
#segments(x0 = LHSzsidakcut, x1=LHSzsidakcut,y0=0,  y1= ycut, lty=2,col="black",lwd=2)
#segments(x0 = -5, x1=LHSzsidakcut,y0=ycut,  y1= ycut, lty=2,col="black",lwd=2)


dev.off()
system("ps2pdf kfwerfigWITHEMP.ps")

save.image(file = "BUMsims.Rdata")


