
#options(java.parameters = "-Xmx12g")
gc()
require(reshape)  # this should be removed in favor of reshape2 at some point.
require(reshape2)
require(spatstat)
require(stringr)
require(gplots)
require(ggplot2)
require(ggtext)
#require(xlsx)
require(openxlsx)
#require(openxlsx2)
require(NMF)
require(corpcor)
require(corrplot)

require(igraph)
require(qgraph)
require(class)
require(kohonen)
require(fdrtool)
require(randomForest)
require(grid)
require(RColorBrewer)
require(scales)
require(glue)
require(dplyr)
require(tidyverse)
require(tibble)


par(pin=c(5,5))
par(plt=c(.263, .737, 1.198, 2.198) )
opar <- par(no.readonly=T)

# cleans out the old objects then repopulates
rm(list=ls())

# cleans out the old files
do.call(file.remove,list(list.files("concept/", full.names=T)))
do.call(file.remove,list(list.files("picts/", full.names=T)))

#brings in my functions only if not yet loaded.
source("mine.R")
source("PortalFunctions1.R")
#source("4Correlation analysis.R")

### setup the data as needed from input files & make maps of it to be put into spreadsheet
starttime <- Sys.time()
#numLab <- 1
Lab <- 1

#for(Lab in 1:numLab) {   # labs cycle starts here
dataFile= "Lab1-4297Fixed.tsv"   # for manual running

fileIn <- paste("data/RR Datasets/4297/", dataFile, sep="")

#------------------- start analysis/data.R

if(file.exists(fileIn) ) {
  inFile <- read.delim(file=fileIn, header=TRUE, stringsAsFactors=T)
  
  # deSign associates metadata for experimental design
  deSign <- as.data.frame(createDesign(inFile))   # experimental design
  colnames(deSign) <- c("Sample", "UserID", "ExperimentalGroup", "GroupFactor") # "Conc", "SampleID")
  
  # files associates files and their attributes with samples 
  files  <- as.data.frame(createFiles(inFile))    # Files used
  
  # lookup associates all metadata concerning compounds found
  lookup  <- as.data.frame(createLookup(inFile))  # compounds found
  
  # Now build data matrices for all datasets
  dataRaw <- ts2swAUC(inFile, 19)              # Raw C12
  dataC13Raw <- ts2swAUC(inFile, 20)           # Raw C13
  dataRatio <-ts2swR(inFile, 21)               # Apical Ratio
  dataSC <-ts2swAUC(inFile, 22)                # Suppression C12 Corrected 
  dataC13SC <- ts2swAUC(inFile, 23)            # SC C13 == Norm C13
  dataNormalized <- ts2swAUC(inFile, 24)       # NormC12Inten
  dataZ <-ts2swR(inFile, 28)                   # ZScore
  
  test <- colSums(dataRaw>1)
  MSTUSdataRaw <- dataRaw[,test==nrow(deSign)]
  MSTUSdataSC <- dataSC[,test==nrow(deSign)]
  MSTUSdataNormalized <- dataNormalized[,test==nrow(deSign)]
  MSTUSdataC13Raw <- dataC13Raw[,test==nrow(deSign)]
  MSTUSdataC13SC <- dataC13SC[,test==nrow(deSign)]
  MSTUSdataRatio <- dataRatio[,test==nrow(deSign)]
  
  dataPctSupp <- 1-(dataC13Raw[,]/dataC13SC[,])
  
  MSTUSdataNormalized <- renormalize()
  
  }  # end if file exists  / datasts generated

#Cycle through and fill individual modes
  # Modes <- c("Raw", "SC", "Norm")
   Modes <- c("Raw")   # for MSTUS Report

for(mode in Modes) {     # start of modes cycle
  #  mode = "Norm" # to debug loop

  # DATA is generally used in the analyses this makes it easy to swap the underlying dataset
  if(mode=="Raw")  {DATA <-dataRaw #dataRaw 
                    MDATA <-MSTUSdataRaw
                    Labcol <- 9 
                    tkn <- paste("_L",Lab,"R", sep="")
                    ListNum <- ((Lab-1)*3)+1}
   
  #colors <- c(addTrans("red",200), addTrans("blue",200), addTrans("green",200), addTrans("darkmagenta",200), addTrans("orange",200), addTrans("purple",200) )
  
  # variables according to loaded factors
  list <- rainbow(length(as.factor(unique(deSign[,3]))))

  # from Haley
  group.colors <- c("red","blue","gold","purple","darkorange","green", "brown")
  list <- group.colors

  listByGroup <- list[as.factor(deSign[,3])]
  
  # list of groups
  #groups <- levels(deSign$GroupFactor)
  groups <- c("GroupA", "GroupB", "GroupC")  # original RR group

  conditions <- factor(deSign[,3])  #levels(deSign[,3])
  
if(length(conditions)>4){knum=4}else{knum=length(conditions)}

# ---------------------------Start plot/data

###  Runs QA/QC on dataset - mostly for experimental page but reused elsewhere
# univariate stuff for Experimental deSign & QC

SumDataRaw <- rowSums(MSTUSdataRaw, na.rm=FALSE)
SumDataSC <- rowSums(MSTUSdataSC, na.rm=FALSE)
SumDataNorm <- rowSums(MSTUSdataNormalized, na.rm=FALSE)

maxData <- max(c(max(SumDataRaw), max(SumDataNorm), max(SumDataSC)))
minData <- min(c(min(SumDataRaw), min(SumDataNorm), min(SumDataSC)))
rawMin <- 0.6*minData  #.75
rawMax <- 1.1*maxData  #1.25

SDRaw <- paste("Raw Data (",percent((sd(SumDataRaw)/mean(SumDataRaw)), digits=2)," RSD)", sep="")
SDSC <- paste("SC Data (",percent((sd(SumDataSC)/mean(SumDataSC)), digits=2)," RSD)", sep="")
SDNorm <- paste("Norm Data (",percent((sd(SumDataNorm)/mean(SumDataNorm)), digits=2)," RSD)", sep="")

BoxPlot <- paste("picts/BoxPlotOverallSamples", tkn, ".png", sep="")
png(BoxPlot, 1500,600)  # Used in excel
plot(SumDataRaw, col="#CC3333", pch=8, cex=1.6, ylim = c(rawMin, rawMax), 
     main="Summed IROA signal (MSTUS) in samples", yaxt="n", ylab="", xlab="Samples (in original experimental order)", cex.main=1)
lines(SumDataRaw, col="#CC3333", pch=19, cex=1, lty=3, ylim = c(rawMin, rawMax), yaxt="n")
points(SumDataNorm, col="#3333FF", pch=15, cex=1.5, ylim = c(rawMin, rawMax), yaxt="n")
lines(SumDataNorm, col="#3333FF", cex=1.2,lty=3, ylim = c(rawMin, rawMax), yaxt="n")
points(SumDataSC, col="#336600", pch=19, cex=1.3, ylim = c(rawMin, rawMax), yaxt="n")
lines(SumDataSC, col="#336600", pch=19, cex=1,lty=3, ylim = c(rawMin, rawMax), yaxt="n")
legend("topleft", legend=c(SDRaw, SDSC, SDNorm), pch=c(8,19,19),  col=c("#CC3333","#336600", "#3333FF" ), cex=1, horiz=FALSE)
axis(2, cex.axis=.75)
mtext("MSTUS values", side=2, line=2, cex=2)
devOff()

png("picts/legend.png", 1000, 800)  # used in excel
plot.new() 
legend("topleft", legend=levels(deSign[,3]), pch=20,  col=list, cex=20/length(list), horiz=FALSE  )
devOff() 

# ------------------------------------end plot/data

# open spreadsheet and index it for use  The spreadsheet is preconfigured because it was such a pain here.
wb <- createWorkbook()

#Design - fills in the Experimental design tab and associated data tabs
addWorksheet(wb, "Design") #, sheetName="Design")

SampDesign <- as.data.frame(paste("The Sample design is below"))
writeData(wb, "Design", SampDesign, startRow=2, startCol=2, colNames=F, rowNames=F)
writeData(wb, "Design", deSign[-1], startRow=3, startCol=2, colNames=T, rowNames=T)
addStyle(wb, "Design", style = createStyle(numFmt="0.00"), rows = 1:35, cols = 5, gridExpand = F, stack = T)

Injections <- as.data.frame(paste("These injections for each sample were examined"))
writeData(wb, "Design", Injections, startRow=nrow(deSign)+5, startCol=2, colNames=F, rowNames=F)
writeData(wb, "Design", files, startRow=nrow(deSign)+6, startCol=2, colNames=T, rowNames=F)

timeStamp <- paste("This report was compiled on", starttime, " Mode = ", mode, " Based on file = ", fileIn) 
Stamp <- as.data.frame(timeStamp)
writeData(wb, "Design", Stamp, startRow=26, startCol=ncol(deSign)+12, colNames=F, rowNames=F)

insertImage(wb, "Design", BoxPlot, startRow=2,  startCol=ncol(deSign)+4 )
insertImage(wb, "Design", "picts/legend.png", startRow=20, startCol=ncol(deSign)+4)

#Clean <- createSheet(wb, sheetName="Dataset IROA Raw")
addWorksheet(wb, "C12-Clean") #, sheetName="Clean")
Stamp <- as.data.frame(paste("The IROA Clean (Raw) dataset"))
writeData(wb, "C12-Clean", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "C12-Clean", dataRaw, startRow=2, startCol=2, colNames=T, rowNames=T)

#C12 SC <- createSheet(wb, sheetName="Dataset C12 Supp. Corr.")
addWorksheet(wb, "C12-SC") #, sheetName="SC")
Stamp <- as.data.frame(paste("The C-12 Suppression Corrected (SC) dataset"))
writeData(wb, "C12-SC", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "C12-SC", dataSC, startRow=2, startCol=2, colNames=T, rowNames=T)

#C12 Norm <- createSheet(wb, sheetName="Dataset C12 Normalized")
addWorksheet(wb, "C12-Norm") #, sheetName="Norm")
Stamp <- as.data.frame(paste("The C-12 Normalized (Norm) dataset"))
writeData(wb, "C12-Norm", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "C12-Norm", dataNormalized, startRow=2, startCol=2, colNames=T, rowNames=T)

#sheet <- createSheet(wb, sheetName="Dataset Ratio")
addWorksheet(wb, "Ratio") #, sheetName="Ratio")
Stamp <- as.data.frame(paste("The peak ratios (ratio) dataset"))
writeData(wb, "Ratio", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "Ratio", dataRatio, startRow=2, startCol=2, rowNames=T, colNames=T)
addStyle(wb, "Ratio", style=createStyle(numFmt="0.000"), rows=1:nrow(dataRatio)+2, cols = 1:ncol(dataRatio)+2, gridExpand = T )

#Clean <- createSheet(wb, sheetName="Dataset IROA Raw")
addWorksheet(wb, "C12-MSTUS") #, sheetName="Clean")
Stamp <- as.data.frame(paste("The C12 IS MSTUS dataset"))
writeData(wb, "C12-MSTUS", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "C12-MSTUS", MSTUSdataRaw, startRow=2, startCol=2, colNames=T, rowNames=T)

#Clean <- createSheet(wb, sheetName="Dataset IROA Raw")
addWorksheet(wb, "C13-MSTUS") #, sheetName="Clean")
Stamp <- as.data.frame(paste("The C13 IS MSTUS dataset"))
writeData(wb, "C13-MSTUS", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "C13-MSTUS", MSTUSdataC13Raw, startRow=2, startCol=2, colNames=T, rowNames=T)

#sheet <- createSheet(wb, sheetName="Compounds found")
addWorksheet(wb, "Cmpds") #, sheetName="Lookup")
Stamp <- as.data.frame(paste("The compound names associated with each Bin dataset"))
writeData(wb, "Cmpds", Stamp, startRow=1, startCol=1, colNames=F, rowNames=F)
writeData(wb, "Cmpds", lookup, startRow=2, startCol=2, rowNames=F, colNames=T)

addStyle(wb, "Cmpds", style=createStyle(numFmt="0.00"), rows=c(3:nrow(lookup)+3), cols=c(7), gridExpand = T, stack = T )  # for NMFSmpls
addStyle(wb, "Cmpds", style=createStyle(numFmt="0.0000"), rows=c(3:nrow(lookup)+3), cols=c(6), gridExpand = T, stack = T )
setColWidths(wb, "Cmpds", cols=c(3:5), widths = 22)
setColWidths(wb, "Cmpds", cols=c(7), widths = 7)

gc()
print(paste("token finished", tkn))

# close individual report and open summary to capture current info
file = str_sub(dataFile, 1, nchar(dataFile)-4)
saveWorkbook(wb, paste("reports/DualMSTUSResults", file, ".xlsx", sep=""), overwrite = T )
#saveWorkbook(wb, paste("reports/DualMSTUSRR_TEST.xlsx", sep=""), overwrite = T )

}  # end of modes
   
Sys.time()-starttime
Sys.time()



