##########################################
# Run the code in FULLBUMsims.R for
# Efron's code
##########################################

# work with the following data

#prostz.Rda
#leukz.Rda

# use with load("data.Rda")

# obtain the vector of z-values from each experiment and then use tools to
# produce local (parametric and empirical) k-FWER curves.

rm(list=ls())

load("prostz.Rda")
# work with prostz

load("leukz.Rda")
# work with leukz

#########################################
## Now analyze them for local k-FWER
#########################################

source("KFWERfuns.R")
library(locfdr)
library(MASS)

# DO each analysis for 4 different k-values, k=1,5,20,50
# put figs in ExampleFigs

# put empirical null and parametric null on graphs

########### Prostate DATA ########################################
## Designed with GOOD agreement between empirical and parametric
##################################################################

grid = seq(-5,5,length=100)

postscript("prostatezhist.ps", paper="letter")

truehist(prostz, xlab="z values", ylim=c(0,0.45), ylab="density",prob=T, cex.lab=1.3)
#truehist(prostz, xlab="z values", ylim=c(0,0.45), ylab="frequency",prob=T, axes=F)
#axis(side=1)
#axis(side=2,at=c(0,0.1,0.2,0.3,0.4),labels=c(0,100,200,300,400))

par = dnorm(grid)
lines(grid,par,type="l", lwd=2)

emp = EMPf0hat(prostz)
test=emp
scalef=trapz(test$x[test$x<10],test$y[test$x<10])
lines(emp$x,emp$y/scalef, type="l", col="red",lwd=2)
dev.off()
system("ps2pdf prostatezhist.ps")



####################################

# red is the empirical, black is the parametric

# now produce k-FWER figures

### EMPIRICAL and PARAMETRIC

N =length(prostz)
grid = seq(-5,5,length=200)

empLHSk1 = empRHSk1 = empLHSk5 = empRHSk5 = empLHSk20 = empRHSk20 = empLHSk50 = empRHSk50 =c()
parLHSk1 = parRHSk1 = parLHSk5 = parRHSk5 = parLHSk20 = parRHSk20 = parLHSk50 = parRHSk50 =c()

for(j in 1:length(grid)){

# CHOOSING pi0hat of 1 created NaN error because of log(0)
  
empLHSk1[j] = sEMPfullkfwerfunLHS(z=grid[j],k=1,N=N,zz=prostz)
empRHSk1[j] = sEMPfullkfwerfunRHS(z=grid[j],k=1,N=N,zz=prostz)
parLHSk1[j] = sPARfullkfwerfunLHS(z=grid[j],k=1,N=N,zz=prostz)
parRHSk1[j] = sPARfullkfwerfunRHS(z=grid[j],k=1,N=N,zz=prostz)

empLHSk5[j] = sEMPfullkfwerfunLHS(z=grid[j],k=5,N=N,zz=prostz)
empRHSk5[j] = sEMPfullkfwerfunRHS(z=grid[j],k=5,N=N,zz=prostz)
parLHSk5[j]= sPARfullkfwerfunLHS(z=grid[j],k=5,N=N,zz=prostz)
parRHSk5[j]= sPARfullkfwerfunRHS(z=grid[j],k=5,N=N,zz=prostz)

empLHSk20[j] = sEMPfullkfwerfunLHS(z=grid[j],k=20,N=N,zz=prostz)
empRHSk20[j] = sEMPfullkfwerfunRHS(z=grid[j],k=20,N=N,zz=prostz)
parLHSk20[j]= sPARfullkfwerfunLHS(z=grid[j],k=20,N=N,zz=prostz)
parRHSk20[j]= sPARfullkfwerfunRHS(z=grid[j],k=20,N=N,zz=prostz)

#empLHSk50[j] = EMPfullkfwerfunLHS(z=grid[j],k=50,N=N,zz=prostz)
#empRHSk50[j] = EMPfullkfwerfunRHS(z=grid[j],k=50,N=N,zz=prostz)
#parLHSk50[j]=PARfullkfwerfunLHS(z=grid[j],k=50,N=N,pi0hat=1)
#parRHSk50[j]=PARfullkfwerfunRHS(z=grid[j],k=50,N=N,pi0hat=1)
print(j)

}

save.image("prostateresults.Rdata")

load("prostateresults.Rdata")

#### Determine the number of significant genes under
#### a given setting, e.g. k=5, alpha=0.10

LHSzcutemp = max(grid[empLHSk5<0.1 & grid<0])
RHSzcutemp = max(grid[empRHSk5<0.1 & grid>0])

sum(prostz<LHSzcutemp)
# 15
sum(prostz>RHSzcutemp)
# 1

LHSzcutpar = max(grid[parLHSk5<0.1 & grid<0])
RHSzcutpar = max(grid[parRHSk5<0.1 & grid>0])
sum(prostz<LHSzcutpar)
# 25
sum(prostz>RHSzcutpar)
# 1


# create the partial figure... Runs very very slowly...

postscript("prostateex.ps", paper="letter")

# prostate example

plot(grid, empLHSk1, xlim=c(-4,4),ylim=c(0,1),type="n", xlab="z values", ylab="k-FWER")
#k=1
lines(grid[grid<0], empLHSk1[grid<0], col=2, lwd=3, lty=1)
lines(grid[grid<0], parLHSk1[grid<0], col=1, lwd=3, lty=1)
lines(grid[grid>0], empRHSk1[grid>0], col=2, lwd=3, lty=1)
lines(grid[grid>0], parRHSk1[grid>0], col=1, lwd=3, lty=1)

#k=5
lines(grid[grid<0], empLHSk5[grid<0], col=2, lwd=3, lty=2)
lines(grid[grid<0], parLHSk5[grid<0], col=1, lwd=3, lty=2)
lines(grid[grid>0], empRHSk5[grid>0], col=2, lwd=3, lty=2)
lines(grid[grid>0], parRHSk5[grid>0], col=1, lwd=3, lty=2)

#k=20
lines(grid[grid<0], empLHSk20[grid<0], col=2, lwd=3, lty=3)
lines(grid[grid<0], parLHSk20[grid<0], col=1, lwd=3, lty=3)
lines(grid[grid>0], empRHSk20[grid>0], col=2, lwd=3, lty=3)
lines(grid[grid>0], parRHSk20[grid>0], col=1, lwd=3, lty=3)

dev.off()

system("ps2pdf prostateex.ps")

########### LEUKEMIA DATA ########################################
## Chosen for poor agreement between empirical and parametric
##################################################################

postscript("leukemiazhist.ps", paper="letter")

truehist(leukz, xlab="z values", ylim=c(0,0.45), ylab="density",prob=T, cex.lab=1.3)
#truehist(leukz, xlab="z values", ylim=c(0,0.45), ylab="frequency",prob=T, axes=F)
#axis(side=1)
#axis(side=2,at=c(0,0.0575, 0.115,0.1725,0.23),labels=c(0,200,400,600,800))

par = dnorm(grid)
lines(grid,par,type="l", lwd=2)

emp = EMPf0hat(leukz)
test=emp
scalef=trapz(test$x[test$x<10],test$y[test$x<10])
lines(emp$x,emp$y/scalef, type="l", col="red",lwd=2)
dev.off()
system("ps2pdf leukemiazhist.ps")

# red is the empirical, black is the parametric

# now produce k-FWER figures

### EMPIRICAL and PARAMETRIC

N =length(leukz)
grid = seq(-8,8,length=400)

empLHSk1 = empRHSk1 = empLHSk5 = empRHSk5 = empLHSk20 = empRHSk20 = empLHSk50 = empRHSk50 =c()
parLHSk1 = parRHSk1 = parLHSk5 = parRHSk5 = parLHSk20 = parRHSk20 = parLHSk50 = parRHSk50 =c()

for(j in 1:length(grid)){

# CHOOSING pi0hat of 1 created NaN error because of log(0)
  
empLHSk1[j] = sEMPfullkfwerfunLHS(z=grid[j],k=1,N=N,zz=leukz)
empRHSk1[j] = sEMPfullkfwerfunRHS(z=grid[j],k=1,N=N,zz=leukz)
parLHSk1[j] = sPARfullkfwerfunLHS(z=grid[j],k=1,N=N,zz=leukz)
parRHSk1[j] = sPARfullkfwerfunRHS(z=grid[j],k=1,N=N,zz=leukz)

empLHSk5[j] = sEMPfullkfwerfunLHS(z=grid[j],k=5,N=N,zz=leukz)
empRHSk5[j] = sEMPfullkfwerfunRHS(z=grid[j],k=5,N=N,zz=leukz)
parLHSk5[j]= sPARfullkfwerfunLHS(z=grid[j],k=5,N=N,zz=leukz)
parRHSk5[j]= sPARfullkfwerfunRHS(z=grid[j],k=5,N=N,zz=leukz)

empLHSk20[j] = sEMPfullkfwerfunLHS(z=grid[j],k=20,N=N,zz=leukz)
empRHSk20[j] = sEMPfullkfwerfunRHS(z=grid[j],k=20,N=N,zz=leukz)
parLHSk20[j]= sPARfullkfwerfunLHS(z=grid[j],k=20,N=N,zz=leukz)
parRHSk20[j]= sPARfullkfwerfunRHS(z=grid[j],k=20,N=N,zz=leukz)

#empLHSk50[j] = EMPfullkfwerfunLHS(z=grid[j],k=50,N=N,zz=leukz)
#empRHSk50[j] = EMPfullkfwerfunRHS(z=grid[j],k=50,N=N,zz=leukz)
#parLHSk50[j]=PARfullkfwerfunLHS(z=grid[j],k=50,N=N,pi0hat=1)
#parRHSk50[j]=PARfullkfwerfunRHS(z=grid[j],k=50,N=N,pi0hat=1)
print(j)

}

save.image("leukemiaresults.Rdata")

#load("leukemiaresults.Rdata")

#### Determine the number of significant genes under
#### a given setting, e.g. k=5, alpha=0.10

LHSzcutemp = max(grid[empLHSk5<0.1 & grid<0])
RHSzcutemp = max(grid[empRHSk5<0.1 & grid>0])

sum(leukz<LHSzcutemp)
# 83
sum(leukz>RHSzcutemp)
# 0

LHSzcutpar = max(grid[parLHSk5<0.1 & grid<0])
RHSzcutpar = max(grid[parRHSk5<0.1 & grid>0])
sum(leukz<LHSzcutpar)
# 440
sum(leukz>RHSzcutpar)
# 0


# create the partial figure... Runs very very slowly...

postscript("leukemiaex.ps", paper="letter")

# leukemia example

plot(grid, empLHSk1, xlim=c(-8,8),ylim=c(0,1),type="n", xlab="z values", ylab="k-FWER")
#k=1
lines(grid[grid<0], empLHSk1[grid<0], col=2, lwd=3, lty=1)
lines(grid[grid<0], parLHSk1[grid<0], col=1, lwd=3, lty=1)
lines(grid[grid>0], empRHSk1[grid>0], col=2, lwd=3, lty=1)
lines(grid[grid>0], parRHSk1[grid>0], col=1, lwd=3, lty=1)

#k=5
lines(grid[grid<0], empLHSk5[grid<0], col=2, lwd=3, lty=2)
lines(grid[grid<0], parLHSk5[grid<0], col=1, lwd=3, lty=2)
lines(grid[grid>0], empRHSk5[grid>0], col=2, lwd=3, lty=2)
lines(grid[grid>0], parRHSk5[grid>0], col=1, lwd=3, lty=2)

#k=20
lines(grid[grid<0], empLHSk20[grid<0], col=2, lwd=3, lty=3)
lines(grid[grid<0], parLHSk20[grid<0], col=1, lwd=3, lty=3)
lines(grid[grid>0], empRHSk20[grid>0], col=2, lwd=3, lty=3)
lines(grid[grid>0], parRHSk20[grid>0], col=1, lwd=3, lty=3)

dev.off()

system("ps2pdf leukemiaex.ps")





