library(plyr)
library(dplyr)
library(MPDiR)
library(quickpsy)
library(fitdistrplus)
library(ggplot2)
library(rstudioapi)

defaultpath <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(defaultpath)

file <-"./data/user_answers_ms.csv"

if (exists ("aggregated_data_training")) { rm(aggregated_data_training) }
if (exists ("aggregated_data_main")) { rm(aggregated_data_main) }

dataFile <- read.csv(file, stringsAsFactors=FALSE)


## TRAINING
# collect all trials in order to calculate average error rate and time per question per participant, keep non training trials (isTraining = "False")
tmp <- dataFile
tmp <- dataFile [ which ((dataFile$is_training == "True") & (dataFile$Marked == 0) ),] # keep trials that are training, only keep the first try


# FIX: I GET AN ERROR HERE, ALTHOUGH IT WORKS
tmp$error <- tmp$is_correct
tmp$error[tmp$is_correct == "False"] <- 1
tmp$error[tmp$is_correct == "True"] <- 0
tmp$error <- as.numeric(as.character(tmp$error))

aggregated_table <- ddply(tmp,
                   c("user","vis", "question_code", "question_internal_id" ),
                   summarise,
                   mean_error = mean(error)*100,  # turn [0,1] values to percentages
                   mean_time = mean(time) / 1000, # turn milliseconds to seconds
                   mean_confidence = mean(confidence),
                   mean_difficulty = mean(difficulty)
)

write.csv(aggregated_table, file="aggregated_data_training.csv")

## MAIN
dataFile <- read.csv(file, stringsAsFactors=FALSE)

# collect all trials in order to calculate average error rate and time per question per participant, keep non training trials (isTraining = "False")
tmp <- dataFile
tmp <- dataFile [ which (dataFile$is_training == "False"),] # keep trials that are not training


# FIX: I GET AN ERROR HERE, ALTHOUGH IT WORKS
tmp$error <- tmp$is_correct
tmp$error[tmp$is_correct == "False"] <- 1
tmp$error[tmp$is_correct == "True"] <- 0
tmp$error <- as.numeric(as.character(tmp$error))

aggregated_table <- ddply(tmp,
                          c("user","vis", "question_code", "question_internal_id" ),
                          summarise,
                          mean_error = mean(error)*100,  # turn [0,1] values to percentages
                          mean_time = mean(time) / 1000, # turn milliseconds to seconds
                          mean_confidence = mean(confidence),
                          mean_difficulty = mean(difficulty)
)

write.csv(aggregated_table, file="aggregated_data_main.csv")