####################################################################################
#
# TIME
#
####################################################################################

library(plyr)
library(stringr)

#source("CI-Functions.R")
source("CI-Functions-Bonferroni.R")

defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(defaultpath)

path <- 'plots/3_per_task/raw/' 

# Select phase to analyze, for the analysis per task we consider only the main phase
phase <- '_main'
#phase <- '_training'

file_data <- paste0("aggregated_data", phase,".csv")

if (exists ("mydata")) { rm(mydata) }
mydata <- read.table(file_data, header=T, sep=",")


mydata$question_code <- gsub("C", "CO", mydata$question_code)
mydata$question_code <- gsub("A", "CO", mydata$question_code)
mydata$question_code <- gsub("B", "CO", mydata$question_code)

mydata$question_code <- gsub("D", "MA", mydata$question_code)
mydata$question_code <- gsub("E", "MA", mydata$question_code)
mydata$question_code <- gsub("F", "MA", mydata$question_code)

mydata$question_code <- gsub("I", "SI", mydata$question_code)
mydata$question_code <- gsub("G", "SI", mydata$question_code)
mydata$question_code <- gsub("H", "SI", mydata$question_code)

mydata$question_code <- gsub("J", "CH", mydata$question_code)
mydata$question_code <- gsub("K", "CH", mydata$question_code)
mydata$question_code <- gsub("L", "CH", mydata$question_code)


#############################
# analysis of all questions individually #
#############################


#do the analysis per task
tasks <- c("CO", "MA","SI","CH")

for (current_task in tasks) {
  
  print ("--------------------")
  print (paste("Current task: ", current_task))
  
  elements <- mydata [ which (mydata$question_code == current_task),] # select only data for this task
  # order for the transpose
  elements <- elements [ order(elements$user, elements$vis), ]
  
  
  # keep only columns needed
  myvars <- c("user", "vis", "mean_time")
  elements <- elements [myvars]
  #aggregating all cases per participant (3 repetitions)
  statstable_time <- ddply(elements,
                            c("user","vis"),
                            summarise,
                            time=mean(mean_time)
  )
  elements <- statstable_time
  
  #
  elements <- reshape(elements, timevar="vis", idvar=c("user"), direction="wide")
  colnames(elements) <- gsub("time.", "", colnames(elements))
  
  # drop columns with N/A
  elements <- na.omit(elements)
  
  
  #########
  # stats #
  #########
  
  data <- elements
  
  techniqueA <- bootstrapMeanCI(data$storylines)
  techniqueB <- bootstrapMeanCI(data$paohvis)
  
  analysisData <- c()
  analysisData$name <- c("PAOH","NSL")
  analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1])
  analysisData$ci.max <- c(techniqueB[3], techniqueA[3])
  analysisData$ci.min <- c(techniqueB[2], techniqueA[2])
  
  datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
  colnames(datatoprint) <- c("vis", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_error for the value of the mean even though it's not a time, it's just to parse the data for the plot
  
  filename = paste0("time_means_task_",current_task,phase)
  
  write.table(datatoprint, paste0(path, "printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)

  # CIs with adapted alpha value for multiple comparisons
  diffBA = bootstrapMeanCI_corr(data$paohvis - data$storylines, 1)
  
  analysisData <- c()
  analysisData$name <- c("PAOH-NSL")
  analysisData$pointEstimate <- c(diffBA[1])
  analysisData$ci.max <- c(diffBA[3])
  analysisData$ci.min <- c(diffBA[2])
  analysisData$level <- c(diffBA[4])
  analysisData$ci_corr.max <- c(diffBA[6])
  analysisData$ci_corr.min <- c(diffBA[5])
  
  
  datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
  colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a time, it's just to parse the data for the plot
  
  filename = paste0("time_diffs_task_",current_task,phase)
  
  write.table(datatoprint, paste0(path,"printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)

} # end of loop for each task