#################################################################################### # # LEARNING TIME PER TASK # #################################################################################### library(plyr) library(stringr) #source("CI-Functions.R") source("CI-Functions-Bonferroni.R") defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(defaultpath) file <-"./data/user_answers_ms.csv" tmp <- read.csv(file, stringsAsFactors=FALSE) tmp <- tmp [ which (tmp$is_training == "False" ),] tmp$error <- tmp$is_correct tmp$error[tmp$is_correct == "False"] <- 1 tmp$error[tmp$is_correct == "True"] <- 0 tmp$error <- as.numeric(as.character(tmp$error)) tmp$vis[tmp$vis == "vis1"] <- "GlyMa" tmp$vis[tmp$vis == "vis2"] <- "CoordV" tmp$vis[tmp$vis == "vis3"] <- "LocSto" mydata <- aggregated_table <- ddply(tmp, c("user","vis", "question_code", "question_internal_id", "map_size" ), summarise, mean_error = mean(error)*100, # turn [0,1] values to percentages mean_time = mean(time) / 1000, # turn milliseconds to seconds mean_confidence = mean(confidence), mean_difficulty = mean(difficulty) ) path = paste0("plots/per_task_mapSize/raw/") ############################# # analysis of all questions individually # ############################# #do the analysis per task tasks <- c("A", "B", "C", "D", "E") vis_all <- c("CoordV","GlyMa", "LocSto") #vis <- 'storylines' #current_task <- 'CO' for (vis in vis_all) { for (current_task in tasks) { print ("--------------------") print (paste("Current task: ", current_task)) elements <- mydata [ which (mydata$question_code == current_task & mydata$vis == vis),] # select only data for this task # order for the transpose elements <- elements [ order(elements$user, elements$map_size), ] # keep only columns needed myvars <- c("user", "map_size", "mean_time") elements <- elements [myvars] #aggregating all cases per participant (3 repetitions) statstable_time <- ddply(elements, c("user","map_size"), summarise, time=mean(mean_time) ) elements <- statstable_time # elements <- reshape(elements, timevar="map_size", idvar=c("user"), direction="wide") colnames(elements) <- c("user", "big", "small") # drop columns with N/A elements <- na.omit(elements) ######### # stats # ######### data <- elements techniqueA <- bootstrapMeanCI(data$small) techniqueB <- bootstrapMeanCI(data$big) analysisData <- c() analysisData$name <- c("big","small") analysisData$pointEstimate <- c(techniqueB[1], techniqueA[1]) analysisData$ci.max <- c(techniqueB[3], techniqueA[3]) analysisData$ci.min <- c(techniqueB[2], techniqueA[2]) datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max) colnames(datatoprint) <- c("phase", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_error for the value of the mean even though it's not a time, it's just to parse the data for the plot filename = paste0("phase_time_means_task_",current_task,"_", vis) write.table(datatoprint, paste0(path, "printed_",filename,".txt",seq=""), sep=",",row.names=FALSE) barChart(datatoprint, analysisData$name, nbTechs = 2, ymin = 0, ymax = 350, mycolor = "dodgerblue2", "", "") #SAVE ggsave(paste0(path,"plot_",filename,".pdf",seq=""), device = "pdf", width=5, height=2) # CIs with adapted alpha value for multiple comparisons diffBA = bootstrapMeanCI_corr(data$big - data$small, 1) analysisData <- c() analysisData$name <- c("big-small") analysisData$pointEstimate <- c(diffBA[1]) analysisData$ci.max <- c(diffBA[3]) analysisData$ci.min <- c(diffBA[2]) analysisData$level <- c(diffBA[4]) analysisData$ci_corr.max <- c(diffBA[6]) analysisData$ci_corr.min <- c(diffBA[5]) datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min) colnames(datatoprint) <- c("phase", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_error for the value of the mean even though it's not a time, it's just to parse the data for the plot filename = paste0("phase_time_diffs_task_",current_task, "_", vis) write.table(datatoprint, paste0(path,"printed_",filename,".txt",seq=""), sep=",",row.names=FALSE) barChart(datatoprint, analysisData$name, nbTechs = 1, ymin = -350, ymax = 350, mycolor = "dodgerblue2", "", "") ggsave(paste0(path,"plot_",filename,".pdf",seq=""), device = "pdf", width=5, height=2) } # end of loop for each task }