library(plyr) library(tidyverse) library(gridExtra) library(cowplot) library(data.table) #source("CI-Functions.R") source("CI-Functions-Bonferroni.R") defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(defaultpath) # Select phase to analyze, for the analysis per task we consider only the main phase #phase <- '_main' phase <- '_training' file_data <- paste0("aggregated_data", phase,".csv") if (exists ("mydata")) { rm(mydata) } mydata <- read.table(file_data, header=T, sep=",") mydata$question_code <- gsub("C", "CO", mydata$question_code) mydata$question_code <- gsub("A", "CO", mydata$question_code) mydata$question_code <- gsub("B", "CO", mydata$question_code) mydata$question_code <- gsub("D", "MA", mydata$question_code) mydata$question_code <- gsub("E", "MA", mydata$question_code) mydata$question_code <- gsub("F", "MA", mydata$question_code) mydata$question_code <- gsub("I", "SI", mydata$question_code) mydata$question_code <- gsub("G", "SI", mydata$question_code) mydata$question_code <- gsub("H", "SI", mydata$question_code) mydata$question_code <- gsub("J", "CH", mydata$question_code) mydata$question_code <- gsub("K", "CH", mydata$question_code) mydata$question_code <- gsub("L", "CH", mydata$question_code) # collect all trials in order to calculate average error rate and time per question per participant, keep non training trials (isTraining = "False") tmp <- mydata tmp <- tmp [ order(tmp$user, tmp$vis), ] theme_set(theme_gray(base_size = 12)) # ALL TASKS tasks <- c('CO', 'CH', 'SI', 'MA') tasks_names <- c('Find \n relationships', 'Characterize \n relationships', 'Similar entities', 'Massive events') plots <- list() metrics <- c('Completion Time', 'Error Rate', 'Confidence', 'Easiness') titles <- list() titles[[1]] <- NULL i <- 2 for (m in metrics) { t <- ggdraw() + draw_label(m, x = 0.6, size = 20) titles[[i]] <- t i <- i + 1 } plots[[1]] <- cowplot::plot_grid(plotlist = titles, nrow = 1, rel_widths = c(2, 6, 6, 6, 6)) plots[[1]] for (i in 1:4) { task <- tasks[i] task_name <- tasks_names[i] ##### MAIN path <- 'plots/3_per_task/raw/' # READ TIME filename_time <- paste0(path, 'printed_time_means_task_', task, phase,'.txt', sep='') printed_time <- read.table(filename_time, header = T, sep = ',') colnames(printed_time)[colnames(printed_time)=="upperBound_CI."] <- "upperBound_CI" filename_timediff <- paste0(path, '/printed_time_diffs_task_', task, phase, '.txt', sep='') printed_timediff <- read.table(filename_timediff, header = T, sep = ',') # READ ERROR filename_error <- paste0(path, '/printed_error_means_task_', task, phase, '.txt', sep='') printed_error <- read.table(filename_error, header = T, sep = ',') colnames(printed_error)[colnames(printed_error)=="upperBound_CI."] <- "upperBound_CI" filename_errordiff <- paste0(path, '/printed_error_diffs_task_', task, phase, '.txt', sep='') printed_errordiff <- read.table(filename_errordiff, header = T, sep = ',') # PLOT LABEL df_label <- data.frame( x = c(0), y = c(0), text = c(task_name) ) plot_label <- ggplot(df_label, aes(x, y)) + geom_text(aes(label = text), size=7) + ylim(0, 0) + theme_void() # PLOT TIME analysisData <- c() analysisData$name <- printed_time$vis analysisData$pointEstimate <- printed_time$mean_time analysisData$ci.max <- printed_time$upperBound_CI analysisData$ci.min <- printed_time$lowerBound_CI # TRAINING plot_time <- barChart(printed_time, analysisData$name, nbTechs = 2, ymin = 0, ymax = 200, "", "", mycolor='dodgerblue2') # MAIN #plot_time <- barChart(printed_time, analysisData$name, nbTechs = 2, ymin = 0, ymax = 150, "", "", mycolor='dodgerblue2') ## TIME DIFF analysisData <- c() analysisData$name <- printed_timediff$technique analysisData$pointEstimate <- printed_timediff$mean_time analysisData$ci.max <- printed_timediff$upperBound_CI analysisData$ci.min <- printed_timediff$lowerBound_CI analysisData$level <- printed_timediff$corrected_CI analysisData$ci_corr.max <- printed_timediff$upperBound_CI_corr analysisData$ci_corr.min <- printed_timediff$lowerBound_CI_corr # TRAINING plot_timediff <- barChart(printed_timediff, analysisData$name, nbTechs = 1, ymin = -100, ymax = 100, "", "", mycolor='dodgerblue2') # MIAN #plot_timediff <- barChart(printed_timediff, analysisData$name, nbTechs = 1, ymin = -50, ymax = 50, "", "", mycolor='dodgerblue2') # PLOT ERROR analysisData <- c() analysisData$name <- printed_error$vis analysisData$pointEstimate <- printed_error$mean_time analysisData$ci.max <- printed_error$upperBound_CI analysisData$ci.min <- printed_error$lowerBound_CI plot_error <- barChart(printed_error, analysisData$name, nbTechs = 2, ymin = 0, ymax = 40, "", "", mycolor='darkorange3') ## ERROR DIFF analysisData <- c() analysisData$name <- printed_errordiff$technique analysisData$pointEstimate <- printed_errordiff$mean_time analysisData$ci.max <- printed_errordiff$upperBound_CI analysisData$ci.min <- printed_errordiff$lowerBound_CI analysisData$level <- printed_errordiff$corrected_CI analysisData$ci_corr.max <- printed_errordiff$upperBound_CI_corr analysisData$ci_corr.min <- printed_errordiff$lowerBound_CI_corr plot_errordiff <- barChart(printed_errordiff, analysisData$name, nbTechs = 1, ymin = -40, ymax = 40, "", "", mycolor='darkorange3') #### CONFIDENCE # TODO: MAKE IT PRETTY tmp <- mydata[ which (mydata$question_code == task),] tmp <- tmp [ order(tmp$user, tmp$vis), ] # myvars <- c("user", "vis", "question_code","question_internal_id","mean_confidence") tmp <- tmp [myvars] tmp2 <- reshape(tmp, timevar="vis", idvar=c("user","question_code","question_internal_id"), direction="wide") colnames(tmp2) <- gsub("mean_confidence.", "", colnames(tmp2)) tmp2 <- na.omit(tmp2) setnames(tmp2, old=c("storylines","paohvis"), new=c("HSL","PAOH")) likert_table <- tmp2 myvars <- c("HSL", "PAOH") # Symbol name has been changed in paper to Glyph likert_table <- likert_table[myvars] likert_table$HSL = factor (likert_table$HSL, levels = c("1","2","3","4","5"), ordered = TRUE) likert_table$PAOH = factor (likert_table$PAOH, levels = c("1","2","3","4","5"), ordered = TRUE) library(psych) headTail(likert_table) str(likert_table) library(likert) Result <- likert(likert_table) summary(Result) confidence_plot <- plot(Result, type="bar", group.order=c("HSL", "PAOH"), text.size = 6) + theme(legend.position = 'none', axis.title.x = element_blank(), plot.background = element_blank(), panel.background = element_blank(), text = element_text(size = 18, color = 'black'), axis.text = element_text(size = 18, color = 'black'), strip.text = element_text(size = 18, color = 'black'), legend.text = element_text(size = 18, color = 'black')) + ggtitle('') #### DIFFICULTY # TODO: MAKE IT PRETTY tmp <- mydata[ which (mydata$question_code == task),] tmp <- tmp [ order(tmp$user, tmp$vis), ] # myvars <- c("user", "vis", "question_code","question_internal_id","mean_difficulty") tmp <- tmp [myvars] tmp2 <- reshape(tmp, timevar="vis", idvar=c("user","question_code","question_internal_id"), direction="wide") colnames(tmp2) <- gsub("mean_difficulty.", "", colnames(tmp2)) tmp2 <- na.omit(tmp2) setnames(tmp2, old=c("storylines","paohvis"), new=c("HSL","PAOH")) likert_table <- tmp2 myvars <- c("HSL", "PAOH") # Symbol name has been changed in paper to Glyph likert_table <- likert_table[myvars] likert_table$HSL = factor (likert_table$HSL, levels = c("1","2","3","4","5"), ordered = TRUE) likert_table$PAOH = factor (likert_table$PAOH, levels = c("1","2","3","4","5"), ordered = TRUE) library(psych) headTail(likert_table) str(likert_table) library(likert) Result <- likert(likert_table) summary(Result) difficulty_plot <- plot(Result, type="bar", group.order=c("HSL", "PAOH"), text.size = 6) + theme(legend.position = 'none', axis.title.x = element_blank(), plot.background = element_blank(), panel.background = element_blank(), text = element_text(size = 18, color = 'black'), axis.text = element_text(size = 18, color = 'black'), strip.text = element_text(size = 18, color = 'black'), legend.text = element_text(size = 18, color = 'black')) + ggtitle('') # make grids this_grid <- plot_grid( # label plot_grid(plot_label, ncol = 1), # + theme(plot.background = element_rect(color = "black")), # time plot_grid(plot_time, plot_timediff, ncol = 2, rel_widths =c(2, 3)) + theme(plot.background = element_rect(color = "#999999")), # error plot_grid(plot_error, plot_errordiff, ncol = 2, rel_widths =c(2, 3)) + theme(plot.background = element_rect(color = "#999999")), # confidence plot_grid(confidence_plot, ncol = 1) + theme(plot.background = element_rect(color = "#999999")), # difficulty plot_grid(difficulty_plot, ncol = 1) + theme(plot.background = element_rect(color = "#999999")), # config #nrow = 3, #rel_heights =c(1, 5, 5) ncol = 5, rel_widths =c(2, 6, 6, 6, 6) ) plots[[i+1]] <- this_grid } final_grid <- plot_grid( # label plots[[1]], # FR plots[[2]], # CR plots[[3]], # SE plots[[4]], # ME plots[[5]], # config nrow = 5, rel_heights =c(1, 5, 5, 5, 5) ) filename <- paste0("plots/3_per_task/all_metrics_per_task_phase", phase, ".pdf") ggsave(filename = filename, final_grid, device = "pdf", width=37, height=12)