library(plyr) library(tidyverse) library(gridExtra) library(cowplot) library(data.table) #source("CI-Functions.R") source("CI-Functions-Bonferroni.R") defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(defaultpath) # Select phase to analyze, for the analysis per task we consider only the main phase phase <- '_main' #phase <- '_training' file_data <- paste0("aggregated_data", phase,".csv") if (exists ("mydata")) { rm(mydata) } mydata <- read.table(file_data, header=T, sep=",") mydata$question_code <- gsub("C", "3_CO", mydata$question_code) mydata$question_code <- gsub("A", "1_CO", mydata$question_code) mydata$question_code <- gsub("B", "2_CO", mydata$question_code) mydata$question_code <- gsub("D", "1_MA", mydata$question_code) mydata$question_code <- gsub("E", "2_MA", mydata$question_code) mydata$question_code <- gsub("F", "3_MA", mydata$question_code) mydata$question_code <- gsub("I", "3_SI", mydata$question_code) mydata$question_code <- gsub("G", "1_SI", mydata$question_code) mydata$question_code <- gsub("H", "2_SI", mydata$question_code) mydata$question_code <- gsub("J", "1_CH", mydata$question_code) mydata$question_code <- gsub("K", "2_CH", mydata$question_code) mydata$question_code <- gsub("L", "3_CH", mydata$question_code) # collect all trials in order to calculate average error rate and time per question per participant, keep non training trials (isTraining = "False") tmp <- mydata tmp <- tmp [ order(tmp$user, tmp$vis), ] # ALL TASKS tasks <- c('CO', 'CH', 'SI', 'MA') levels <- c('1', '2', '3') tasks_names <- c('Find \n relationships', 'Characterize \n relationships', 'Similar \n entities', 'Massive \n events') levels_names <- c('People + Time', 'People + Locations', 'People + Locations + Time') plots <- list() i = 1 #theme_set(theme_gray(base_size = 12)) path <- 'plots/6_per_complexity/' titles <- list() titles[[1]] <- NULL i <- 2 for (level in levels_names) { t <- ggdraw() + draw_label(level, x = 0.6) titles[[i]] <- t i <- i + 1 } plots[[1]] <- cowplot::plot_grid(plotlist = titles, nrow = 1, rel_widths = c(0.3, 2, 2, 2)) plots[[1]] i <- 2 for (j in 1:4) { task <- tasks[j] task_name <- tasks_names[j] tasks_grid <- list() t <- ggdraw() + draw_label(task_name) tasks_grid[[1]] <- t n <- 2 for (level in levels) { current_task <- paste0(level, '_', task) elements <- mydata [ which (mydata$question_code == current_task),] # select only data for this task # order for the transpose elements <- elements [ order(elements$user, elements$vis), ] # keep only columns needed myvars <- c("user", "vis", "mean_time", "mean_error") elements <- elements [myvars] #aggregating all cases per participant (3 repetitions) statstable <- ddply(elements, c("user","vis"), summarise, time=mean(mean_time), error=mean(mean_error) ) ############# # stats TIME# ############# data <- elements %>% select('user', 'vis', 'mean_time') data <- reshape(data, timevar="vis", idvar=c("user"), direction="wide") colnames(data) <- gsub("mean_time.", "", colnames(data)) ## MEANS if (mean (data$storylines) == 0 ) { techniqueA <- c(0.000000,0.000000,0.000000) } else { techniqueA <- bootstrapMeanCI(data$storylines) } if (mean (data$paohvis) == 0 ) { techniqueB <- c(0.000000,0.000000,0.000000) } else { techniqueB <- bootstrapMeanCI(data$paohvis) } analysisData <- c() analysisData$name <- c("PAOH","HSL") analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1]) analysisData$ci.max <- c(techniqueB[3], techniqueA[3]) analysisData$ci.min <- c(techniqueB[2], techniqueA[2]) datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max) colnames(datatoprint) <- c("vis", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_time for the value of the mean even though it's not a error, it's just to parse the data for the plot filename = paste0("time_means_task_",current_task, phase) write.table(datatoprint, paste0(path, "raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE) plot_mean_time <- barChart(datatoprint, analysisData$name, nbTechs = 2, ymin = 0, ymax = 200, "", "", mycolor="dodgerblue2") ## DIFFS if (mean(data$paohvis) == 0 & mean(data$storylines) == 0) { diffBA <- c(0.000000,0.000000,0.000000,0.98,0.000000,0.000000) } else { diffBA = bootstrapMeanCI_corr(data$paohvis - data$storylines, 3) } analysisData <- c() analysisData$name <- c("PAOH-HSL") analysisData$pointEstimate <- c(diffBA[1]) analysisData$ci.max <- c(diffBA[3]) analysisData$ci.min <- c(diffBA[2]) analysisData$level <- c(diffBA[4]) analysisData$ci_corr.max <- c(diffBA[6]) analysisData$ci_corr.min <- c(diffBA[5]) datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min) colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a error, it's just to parse the data for the plot filename = paste0("time_diffs_task_",current_task, phase) write.table(datatoprint, paste0(path,"raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE) plot_diff_time <- barChart_corr(datatoprint, analysisData$name, nbTechs = 1, ymin = -200, ymax = 200, "", "", mycolor="dodgerblue2") time_box <- cowplot::plot_grid(plot_mean_time, plot_diff_time, nrow = 2) ############## # stats ERROR# ############## data <- elements %>% select('user', 'vis', 'mean_error') data <- reshape(data, timevar="vis", idvar=c("user"), direction="wide") colnames(data) <- gsub("mean_error.", "", colnames(data)) ## MEANS if (mean (data$storylines) == 0 ) { techniqueA <- c(0.000000,0.000000,0.000000) } else { techniqueA <- bootstrapMeanCI(data$storylines) } if (mean (data$paohvis) == 0 ) { techniqueB <- c(0.000000,0.000000,0.000000) } else { techniqueB <- bootstrapMeanCI(data$paohvis) } analysisData <- c() analysisData$name <- c("PAOH","HSL") analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1]) analysisData$ci.max <- c(techniqueB[3], techniqueA[3]) analysisData$ci.min <- c(techniqueB[2], techniqueA[2]) datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max) colnames(datatoprint) <- c("vis", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_time for the value of the mean even though it's not a error, it's just to parse the data for the plot filename = paste0("error_means_task_",current_task, phase) write.table(datatoprint, paste0(path, "raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE) plot_mean_error <- barChart(datatoprint, analysisData$name, nbTechs = 2, ymin = 0, ymax = 100, "", "", mycolor="darkorange3") ## DIFFS if (mean(data$paohvis) == 0 & mean(data$storylines) == 0) { diffBA <- c(0.000000,0.000000,0.000000,0.98,0.000000,0.000000) } else { diffBA = bootstrapMeanCI_corr(data$paohvis - data$storylines, 3) } analysisData <- c() analysisData$name <- c("PAOH-HSL") analysisData$pointEstimate <- c(diffBA[1]) analysisData$ci.max <- c(diffBA[3]) analysisData$ci.min <- c(diffBA[2]) analysisData$level <- c(diffBA[4]) analysisData$ci_corr.max <- c(diffBA[6]) analysisData$ci_corr.min <- c(diffBA[5]) datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min) colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a error, it's just to parse the data for the plot filename = paste0("error_diffs_task_",current_task, phase) write.table(datatoprint, paste0(path,"raw/printed_",filename,".txt",seq=""), sep=",",row.names=FALSE) plot_diff_error <- barChart_corr(datatoprint, analysisData$name, nbTechs = 1, ymin = -100, ymax = 100, "", "", mycolor="darkorange3") error_box <- cowplot::plot_grid(plot_mean_error, plot_diff_error, nrow = 2) this_grid <- plot_grid(time_box, error_box, ncol=2) tasks_grid[[n]] <- this_grid n <- n + 1 } plots[[i]] <- cowplot::plot_grid(plotlist = tasks_grid, nrow = 1, rel_widths = c(0.3, 1, 1, 1, 1)) + theme(plot.background = element_rect(fill = NA, colour = "grey92", size = 5)) i <- i + 1 } #margin = theme(plot.margin = unit(c(2,2,2,2), "cm")) p <- cowplot::plot_grid(plotlist = plots, nrow = 5, rel_heights = c(0.3, 1, 1, 1, 1)) filename <- paste0("plots/6_per_complexity/per_complexity_all_CI", phase, ".pdf") ggsave(filename = filename, p, device = "pdf", width=25, height=20)