library(plyr)
library(tidyverse)
library(gridExtra)
library(cowplot)
library(data.table)

#source("CI-Functions.R")
source("CI-Functions-Bonferroni.R")

defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(defaultpath)

# Select phase to analyze, for the analysis per task we consider only the main phase
phase <- '_main'
#phase <- '_training'

file_data <- paste0("aggregated_data", phase,".csv")

if (exists ("mydata")) { rm(mydata) }
mydata <- read.table(file_data, header=T, sep=",")

mydata$question_code <- gsub("C", "3_CO", mydata$question_code)
mydata$question_code <- gsub("A", "1_CO", mydata$question_code)
mydata$question_code <- gsub("B", "2_CO", mydata$question_code)

mydata$question_code <- gsub("D", "1_MA", mydata$question_code)
mydata$question_code <- gsub("E", "2_MA", mydata$question_code)
mydata$question_code <- gsub("F", "3_MA", mydata$question_code)

mydata$question_code <- gsub("I", "3_SI", mydata$question_code)
mydata$question_code <- gsub("G", "1_SI", mydata$question_code)
mydata$question_code <- gsub("H", "2_SI", mydata$question_code)

mydata$question_code <- gsub("J", "1_CH", mydata$question_code)
mydata$question_code <- gsub("K", "2_CH", mydata$question_code)
mydata$question_code <- gsub("L", "3_CH", mydata$question_code)


# collect all trials in order to calculate average error rate and time per question per participant, keep non training trials (isTraining = "False")
tmp <- mydata
tmp <- tmp [ order(tmp$user, tmp$vis), ]


# ALL TASKS
tasks <- c('CO', 'CH', 'SI', 'MA')
levels <- c('1', '2', '3')
tasks_names <- c('Find \n relationships', 'Characterize \n relationships', 'Similar \n entities', 'Massive \n events')
levels_names <- c('People + Time', 'People + Locations', 'People + Locations + Time')
plots <- list()

i = 1

#theme_set(theme_gray(base_size = 12))

path <- 'plots/6_per_complexity/'

titles <- list()
titles[[1]] <- NULL
i <- 2
for (level in levels_names) {
  t <- ggdraw() + draw_label(level, x = 0.6)
  titles[[i]] <- t
  i <- i + 1
}


plots[[1]] <- cowplot::plot_grid(plotlist = titles, nrow = 1, rel_widths = c(0.3, 2, 2, 2))
plots[[1]]


i <- 2
for (j in 1:4) {
  task <- tasks[j]
  task_name <- tasks_names[j]
  
  tasks_grid <- list()
  t <- ggdraw() + draw_label(task_name)
  tasks_grid[[1]] <- t
  n <- 2
  for (level in levels) {
    current_task <- paste0(level, '_', task)
    elements <- mydata [ which (mydata$question_code == current_task),] # select only data for this task
    # order for the transpose
    elements <- elements [ order(elements$user, elements$vis), ]
  
    # keep only columns needed
    myvars <- c("user", "vis", "mean_time", "mean_error")
    elements <- elements [myvars]
    #aggregating all cases per participant (3 repetitions)
    statstable <- ddply(elements,
                             c("user","vis"),
                             summarise,
                             time=mean(mean_time),
                             error=mean(mean_error)
    )
    
    #############
    # stats TIME#
    #############
    
    data <- elements %>% select('user', 'vis', 'mean_time')
    data <- reshape(data, timevar="vis", idvar=c("user"), direction="wide")
    colnames(data) <- gsub("mean_time.", "", colnames(data))
    
    ## MEANS
    if (mean (data$storylines) == 0 ) {
      techniqueA <- c(0.000000,0.000000,0.000000)
    } else {
      techniqueA <- bootstrapMeanCI(data$storylines)  
    }
    if (mean (data$paohvis) == 0 ) {
      techniqueB <- c(0.000000,0.000000,0.000000)
    } else {
      techniqueB <- bootstrapMeanCI(data$paohvis)  
    }
    
    analysisData <- c()
    analysisData$name <- c("PAOH","HSL")
    analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1])
    analysisData$ci.max <- c(techniqueB[3], techniqueA[3])
    analysisData$ci.min <- c(techniqueB[2], techniqueA[2])
    
    datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
    colnames(datatoprint) <- c("vis", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_time for the value of the mean even though it's not a error, it's just to parse the data for the plot
    
    filename = paste0("time_means_task_",current_task, phase)
    
    write.table(datatoprint, paste0(path, "raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
    plot_mean_time <- barChart(datatoprint, analysisData$name, nbTechs = 2, ymin = 0, ymax = 200, "", "", mycolor="dodgerblue2")
    
    ## DIFFS
    if (mean(data$paohvis) == 0 & mean(data$storylines) == 0) {
      diffBA <- c(0.000000,0.000000,0.000000,0.98,0.000000,0.000000)
    } else {
      diffBA = bootstrapMeanCI_corr(data$paohvis - data$storylines, 3)
    }
    
    analysisData <- c()
    analysisData$name <- c("PAOH-HSL") 
    analysisData$pointEstimate <- c(diffBA[1])
    analysisData$ci.max <- c(diffBA[3])
    analysisData$ci.min <- c(diffBA[2])
    analysisData$level <- c(diffBA[4])
    analysisData$ci_corr.max <- c(diffBA[6])
    analysisData$ci_corr.min <- c(diffBA[5])
    
    
    datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
    colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a error, it's just to parse the data for the plot
    filename = paste0("time_diffs_task_",current_task, phase)
    write.table(datatoprint, paste0(path,"raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
    
    plot_diff_time <- barChart_corr(datatoprint, analysisData$name, nbTechs = 1, ymin = -200, ymax = 200, "", "", mycolor="dodgerblue2")
    
    time_box <- cowplot::plot_grid(plot_mean_time, plot_diff_time, nrow = 2)
      
    ##############
    # stats ERROR#
    ##############
    
    data <- elements %>% select('user', 'vis', 'mean_error')
    data <- reshape(data, timevar="vis", idvar=c("user"), direction="wide")
    colnames(data) <- gsub("mean_error.", "", colnames(data))
    
    ## MEANS
    if (mean (data$storylines) == 0 ) {
      techniqueA <- c(0.000000,0.000000,0.000000)
    } else {
      techniqueA <- bootstrapMeanCI(data$storylines)  
    }
    if (mean (data$paohvis) == 0 ) {
      techniqueB <- c(0.000000,0.000000,0.000000)
    } else {
      techniqueB <- bootstrapMeanCI(data$paohvis)  
    }
    
    analysisData <- c()
    analysisData$name <- c("PAOH","HSL")
    analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1])
    analysisData$ci.max <- c(techniqueB[3], techniqueA[3])
    analysisData$ci.min <- c(techniqueB[2], techniqueA[2])
    
    datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
    colnames(datatoprint) <- c("vis", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_time for the value of the mean even though it's not a error, it's just to parse the data for the plot
    
    filename = paste0("error_means_task_",current_task, phase)
    
    write.table(datatoprint, paste0(path, "raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
    plot_mean_error <- barChart(datatoprint, analysisData$name, nbTechs = 2, ymin = 0, ymax = 100, "", "", mycolor="darkorange3")
    
    ## DIFFS
    if (mean(data$paohvis) == 0 & mean(data$storylines) == 0) {
      diffBA <- c(0.000000,0.000000,0.000000,0.98,0.000000,0.000000)
    } else {
      diffBA = bootstrapMeanCI_corr(data$paohvis - data$storylines, 3)
    }
    
    analysisData <- c()
    analysisData$name <- c("PAOH-HSL") 
    analysisData$pointEstimate <- c(diffBA[1])
    analysisData$ci.max <- c(diffBA[3])
    analysisData$ci.min <- c(diffBA[2])
    analysisData$level <- c(diffBA[4])
    analysisData$ci_corr.max <- c(diffBA[6])
    analysisData$ci_corr.min <- c(diffBA[5])
    
    
    datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
    colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a error, it's just to parse the data for the plot
    filename = paste0("error_diffs_task_",current_task, phase)
    write.table(datatoprint, paste0(path,"raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
    
    plot_diff_error <- barChart_corr(datatoprint, analysisData$name, nbTechs = 1, ymin = -100, ymax = 100, "", "", mycolor="darkorange3")
    
    error_box <- cowplot::plot_grid(plot_mean_error, plot_diff_error, nrow = 2)
  
    this_grid <- plot_grid(time_box, error_box, ncol=2)
    tasks_grid[[n]] <- this_grid
    n <- n + 1
  }
  plots[[i]] <- cowplot::plot_grid(plotlist = tasks_grid, nrow = 1, rel_widths = c(0.3, 1, 1, 1, 1)) +
    theme(plot.background = element_rect(fill = NA, colour = "grey92", size = 5))
  i <- i + 1
}


#margin = theme(plot.margin = unit(c(2,2,2,2), "cm"))
p <- cowplot::plot_grid(plotlist = plots, nrow = 5, rel_heights = c(0.3, 1, 1, 1, 1))
filename <- paste0("plots/6_per_complexity/per_complexity_all_CI", phase, ".pdf")
ggsave(filename = filename, p, device = "pdf", width=25, height=20)