library(plyr)
library(stringr)

#source("CI-Functions.R")
source("CI-Functions-Bonferroni.R")

defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(defaultpath)

if (exists ("mydata")) { rm(mydata) }
mydata <- read.table("aggregated_data.csv", header=T, sep=",")


##########################################################  ##########################
#
# error
#
####################################################################################


#############################
# analysis of all questions individually #
#############################


#do the analysis per task
tasks <- c("B","C","D","E","F","G","H","I")

for (current_task in tasks) {
  
  print ("--------------------")
  print (paste("Current task: ", current_task))
  
  elements <- mydata [ which (mydata$question_code == current_task),] # select only data for this task
  # order for the transpose
  elements <- elements [ order(elements$user, elements$vis), ]
  
  # keep only columns needed
  myvars <- c("user", "vis", "mean_error")
  elements <- elements [myvars]
  #aggregating all cases per participant (3 repetitions)
  statstable_error <- ddply(elements,
                            c("user","vis"),
                            summarise,
                            error=mean(mean_error)
  )
  elements <- statstable_error
  
  #
  elements <- reshape(elements, timevar="vis", idvar=c("user"), direction="wide")
  colnames(elements) <- gsub("error.", "", colnames(elements))
  
  # drop columns with N/A
  elements <- na.omit(elements)
  
  
  #########
  # stats #
  #########
  
  data <- elements
  
  if (mean (data$Barchart) == 0 ) {
    techniqueA <- c(0.000000,0.000000,0.000000)
  } else {
    techniqueA <- bootstrapMeanCI(data$Barchart)  
  }
  if (mean (data$Dorling) == 0 ) {
    techniqueB <- c(0.000000,0.000000,0.000000)
  } else {
    techniqueB <- bootstrapMeanCI(data$Dorling)  
  }
  if (mean (data$Symbol) == 0 ) {
    techniqueC <- c(0.000000,0.000000,0.000000)
  } else {
    techniqueC <- bootstrapMeanCI(data$Symbol)  
  }
  
  analysisData <- c()
  analysisData$name <- c("Gly","Dor","Bar") # Symbol name has been changed in paper to Glyph
#  analysisData$name <- c("Symbol","Dorling","Barchart")
  analysisData$pointEstimate <- c(techniqueC[1], techniqueB[1], techniqueA[1])
  analysisData$ci.max <- c(techniqueC[3], techniqueB[3], techniqueA[3])
  analysisData$ci.min <- c(techniqueC[2], techniqueB[2], techniqueA[2])
  
  datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
  colnames(datatoprint) <- c("vis", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_error for the value of the mean even though it's not a error, it's just to parse the data for the plot
  
  path  = paste0("plots/")
  filename = paste0("error_means_task_",current_task)
  
  write.table(datatoprint, paste0(path, "printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
  barChart(datatoprint, analysisData$name, nbTechs = 3, ymin = 0, ymax = 50, "", "", mycolor="darkorange3")
  ggsave(paste0(path,"plot_",filename,".pdf",seq=""), device = pdf, width=5, height=2)
  
  # CIs with adapted alpha value for multiple comparisons not needed here
  # checks for situations where error rate is 0 everywhere, else Bootstrap
  if (mean(data$Barchart) == 0 & mean(data$Dorling) == 0) {
    diffBA <- c(0.000000,0.000000,0.000000,8,0.000000,0.000000)
  } else {
    diffBA = bootstrapMeanCI_corr(data$Dorling - data$Barchart, 1)
  }
  if (mean(data$Dorling) == 0 &  mean(data$Symbol) == 0) {
    diffCB <- c(0.000000,0.000000,0.000000,8,0.000000,0.000000)
  } else {
    diffCB = bootstrapMeanCI_corr(data$Symbol - data$Dorling, 1)
  }
  if (mean(data$Symbol) == 0 & mean(data$Barchart)) {
    diffCA <- c(0.000000,0.000000,0.000000,8,0.000000,0.000000)
  } else {
    diffCA = bootstrapMeanCI_corr(data$Symbol - data$Barchart, 1)
  }
  
  
  analysisData <- c()
#  analysisData$name <- c("Symbol - Barchart","Dorling - Barchart", "Symbol - Dorling")
  analysisData$name <- c("Gly-Bar","Dor-Bar","Gly-Dor") # Symbol name has been changed in paper to Glyph
  analysisData$pointEstimate <- c(diffCA[1], diffBA[1], diffCB[1])
  analysisData$ci.max <- c(diffCA[3], diffBA[3], diffCB[3])
  analysisData$ci.min <- c(diffCA[2], diffBA[2], diffCB[2])
  analysisData$level <- c(diffCA[4], diffBA[4], diffCB[4])
  analysisData$ci_corr.max <- c(diffCA[6], diffBA[6], diffCB[6])
  analysisData$ci_corr.min <- c(diffCA[5], diffBA[5], diffCB[5])
  
  
  datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
  colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a error, it's just to parse the data for the plot
  
  path  = paste0("plots/")
  filename = paste0("error_diffs_task_",current_task)
  
  write.table(datatoprint, paste0(path,"printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)
  
  barChart(datatoprint, analysisData$name, nbTechs = 3, ymin = -50, ymax = 50, "", "", mycolor="darkorange3")
  #barChart_corr(datatoprint, analysisData$name, nbTechs = 3, ymin = -50, ymax = 50, "", "", mycolor="darkorange3")
  ggsave(paste0(path,"plot_",filename,".pdf",seq=""),device = pdf, width=5, height=2)
  
  
} # end of loop for each task