####################################################################################
#
# LEARNING TIME PER TASK
#
####################################################################################


library(plyr)
library(stringr)

#source("CI-Functions.R")
source("CI-Functions-Bonferroni.R")

defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(defaultpath)

file <-"./data/user_answers_ms.csv"
tmp <- read.csv(file, stringsAsFactors=FALSE)
tmp <- tmp [ which (tmp$is_training == "False" ),] 
tmp$error <- tmp$is_correct
tmp$error[tmp$is_correct == "False"] <- 1
tmp$error[tmp$is_correct == "True"] <- 0
tmp$error <- as.numeric(as.character(tmp$error))
tmp$vis[tmp$vis == "vis1"] <- "GlyMa"
tmp$vis[tmp$vis == "vis2"] <- "CoordV"
tmp$vis[tmp$vis == "vis3"] <- "LocSto"

mydata <- aggregated_table <- ddply(tmp,
                          c("user","vis", "question_code", "question_internal_id", "map_size" ),
                          summarise,
                          mean_error = mean(error)*100,  # turn [0,1] values to percentages
                          mean_time = mean(time) / 1000, # turn milliseconds to seconds
                          mean_confidence = mean(confidence),
                          mean_difficulty = mean(difficulty)
)

path  = paste0("plots/per_task_mapSize/raw/")


#############################
# analysis of all questions individually #
#############################


#do the analysis per task
tasks <- c("A", "B", "C", "D", "E")
vis_all <- c("CoordV","GlyMa", "LocSto")

#vis <- 'storylines'
#current_task <- 'CO'

for (vis in vis_all) {

  for (current_task in tasks) {
    
    print ("--------------------")
    print (paste("Current task: ", current_task))
    
    elements <- mydata [ which (mydata$question_code == current_task & mydata$vis == vis),] # select only data for this task
    # order for the transpose
    elements <- elements [ order(elements$user, elements$map_size), ]
    
    
    # keep only columns needed
    myvars <- c("user", "map_size", "mean_time")
    elements <- elements [myvars]
    #aggregating all cases per participant (3 repetitions)
    statstable_time <- ddply(elements,
                             c("user","map_size"),
                             summarise,
                             time=mean(mean_time)
    )
    elements <- statstable_time
    
    #
    elements <- reshape(elements, timevar="map_size", idvar=c("user"), direction="wide")
    colnames(elements) <- c("user", "big", "small")
    
    # drop columns with N/A
    elements <- na.omit(elements)
    
    #########
    # stats #
    #########
    
    data <- elements
    
    techniqueA <- bootstrapMeanCI(data$small)
    techniqueB <- bootstrapMeanCI(data$big)
    
    analysisData <- c()
    analysisData$name <- c("big","small")
    analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1])
    analysisData$ci.max <- c(techniqueB[3], techniqueA[3])
    analysisData$ci.min <- c(techniqueB[2], techniqueA[2])
    
    datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
    colnames(datatoprint) <- c("phase", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_error for the value of the mean even though it's not a time, it's just to parse the data for the plot
    
    
    filename = paste0("phase_time_means_task_",current_task,"_", vis)
    
    write.table(datatoprint, paste0(path, "printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
    barChart(datatoprint, analysisData$name, nbTechs = 2, ymin = 0, ymax = 350, mycolor = "dodgerblue2", "", "")
    #SAVE
    ggsave(paste0(path,"plot_",filename,".pdf",seq=""), device = "pdf", width=5, height=2)
    
    # CIs with adapted alpha value for multiple comparisons
    diffBA = bootstrapMeanCI_corr(data$big - data$small, 1)
    
    analysisData <- c()
    analysisData$name <- c("big-small")
    analysisData$pointEstimate <- c(diffBA[1])
    analysisData$ci.max <- c(diffBA[3])
    analysisData$ci.min <- c(diffBA[2])
    analysisData$level <- c(diffBA[4])
    analysisData$ci_corr.max <- c(diffBA[6])
    analysisData$ci_corr.min <- c(diffBA[5])
    
    
    datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
    colnames(datatoprint) <- c("phase", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a time, it's just to parse the data for the plot
    
    filename = paste0("phase_time_diffs_task_",current_task, "_", vis)
    
    write.table(datatoprint, paste0(path,"printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
    
    barChart(datatoprint, analysisData$name, nbTechs = 1, ymin = -350, ymax = 350, mycolor = "dodgerblue2", "", "")
    ggsave(paste0(path,"plot_",filename,".pdf",seq=""), device = "pdf", width=5, height=2)
    
    
  } # end of loop for each task
}