## ----setup, include=FALSE---------------------------------------------------------------------------------------------
oldopts <- options(width = 120)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = " ",
  fig.width=7,
  fig.height=5
)


## ----loadlib, eval=TRUE, message=FALSE, warning=FALSE-----------------------------------------------------------------
library(osktnorm)


## ----load-excel-pkgs, message=FALSE, warning=FALSE--------------------------------------------------------------------
if (!requireNamespace("readxl", quietly = TRUE)) install.packages("readxl")
if (!requireNamespace("writexl", quietly = TRUE)) install.packages("writexl")

library(readxl)
library(writexl)


## ----read-excel, message=FALSE, warning=FALSE-------------------------------------------------------------------------
file_path <- system.file("extdata", "sativapheno.xlsx", package = "osktnorm")
pheno <- read_excel(file_path)

# Select some variables from the pheno
phenoset <- as.data.frame(pheno[, c(4,5,6,7,9,10,12,13,14)])
phenoset <- phenoset[, sapply(phenoset, is.numeric)]

head(phenoset)


## ----box-plots, message=FALSE, warning=FALSE--------------------------------------------------------------------------
oldpar <- par(no.readonly = TRUE)
nvar <- ncol(phenoset)
nrow_plot <- ceiling(sqrt(nvar))
ncol_plot <- ceiling(nvar / nrow_plot)

par(mfrow = c(nrow_plot, ncol_plot), mar = c(4, 4, 3, 1))
cols <- rainbow(nvar)

for(i in seq_len(nvar)) {
  x <- phenoset[[i]]
  x <- x[!is.na(x)]
  
  if(length(x) == 0) next
  h_calc <- hist(x, plot = FALSE) 
  d <- density(x)   
  max_y <- max(c(h_calc$density, d$y)) * 1.1
  hist(x, 
       breaks = 15, 
       freq = FALSE, 
       col = adjustcolor(cols[i], 0.5), 
       border = "white",
       main = colnames(phenoset)[i],
       xlab = "Transformed Value", 
       ylab = "Density",
       ylim = c(0, max_y))
  
  lines(d, col = "black", lwd = 2)
  curve(dnorm(x, mean = mean(x), sd = sd(x)), add = TRUE, col = "red", lty = 2, lwd = 2)
}
par(oldpar)

## ----densplots, message=FALSE, warning=FALSE--------------------------------------------------------------------------
shapiro_results <- data.frame(
  Trait = colnames(phenoset),
  W     = NA_real_,
  pval  = NA_real_,
  IsNormal = NA_character_,
  stringsAsFactors = FALSE
)

for(i in seq_len(ncol(phenoset))) {
  x <- phenoset[[i]]
  x <- x[!is.na(x)]
  
  if(length(x) >= 3) {
    test <- shapiro.test(x)
    shapiro_results$W[i] <- test$statistic
    shapiro_results$pval[i] <- test$p.value
    shapiro_results$IsNormal[i] <- ifelse(test$p.value > 0.05, "YES", "NO")
  } else {
    shapiro_results$W[i] <- NA
    shapiro_results$pval[i] <- NA
    shapiro_results$IsNormal[i] <- NA
  }
}
shapiro_results


## ----batch-normalization, message=FALSE, warning=FALSE----------------------------------------------------------------

# Normalization only 

norm_res1 <- osktnorm(phenoset, normtests = FALSE)
phenonormal <- norm_res1$normalized
head(phenonormal)


## ----boxplots, message=FALSE, warning=FALSE---------------------------------------------------------------------------
oldpar <- par(no.readonly = TRUE)
par(mar = c(8,4,4,2))
boxplot(
  phenonormal,
  col = rainbow(ncol(phenonormal)),
  border = "black",
  outline = TRUE,
  las = 2,
  main = "OSKT Normalized Values",
  ylab = "Values",
  cex.axis = 0.8
)
par(oldpar)

## ----histograms, message=FALSE, warning=FALSE-------------------------------------------------------------------------
oldpar <- par(no.readonly = TRUE)
nvar <- ncol(phenonormal)
nrow_plot <- ceiling(sqrt(nvar))
ncol_plot <- ceiling(nvar / nrow_plot)

par(mfrow = c(nrow_plot, ncol_plot), mar = c(4,4,3,1))
cols <- rainbow(nvar)

for(i in seq_len(nvar)) {
  x <- phenonormal[[i]]
  x <- x[!is.na(x)]
  
  if(length(x) == 0) next
  
  d <- density(x)
  h_calc <- hist(x, plot = FALSE) 
  
  max_y <- max(d$y, h_calc$density) * 1.1
  
  hist(x, freq = FALSE, 
       col = adjustcolor(cols[i], 0.6),
       border = "white",
       main = colnames(phenonormal)[i],
       xlab = "", ylab = "Density",
       ylim = c(0, max_y))
  
  lines(d, col = "black", lwd = 2)
}
par(oldpar)


## ----optparams, message=FALSE, warning=FALSE--------------------------------------------------------------------------
norm_res2 <- osktnorm(phenoset, normtests = "all")

phenonormal <- norm_res2$normalized
paramstable <- norm_res2$parameters
paramstable

## ----testresults, message=FALSE, warning=FALSE------------------------------------------------------------------------
testtable <- norm_res2$normtests
testtable


## ----write-excel, eval=FALSE, echo=TRUE, message=FALSE, warning=FALSE-------------------------------------------------
# writexl::write_xlsx(phenonormal, path = "sativapheno_normalized.xlsx")
# 

## ----final-checks, message=FALSE, warning=FALSE-------------------------------
 options(oldopts)

