# ========================================================================
# Subject:    ESTIMATION OF A PLACKETT-LUCE MODEL OF SECTORAL PRIORITIES
# Authors:    Aldo Benini, Attilio Benini
# Date:       2019-01-03
# Data source:Bangladesh, Rohingya - NPM Site Assessment Round 11, 2018    
# ========================================================================
# Recommended preparations for running this script:
# 1. Create a working directory 
# 2. Unzip the demo zip-file into the working directory 
# 3. Open RStudio from within the subdir "R", by 
#    double-clicking "190103_0459AB_PlackettLuce_RelativePaths.Rproj"
# 4. Then only open this script "190103_0500AB_PlackettLuce.R".

# Recommended preparations for the general case:
# 1. Create a working directory 
# 2. Inside, create three subdirectories: "data", "output", "R"
# 3. If you wish to make the project portable (relative paths),
#    copy "190103_0459AB_PlackettLuce_RelativePaths.Rproj" into subdir "R"
#    Open RStudio by double-clicking this file.
# 4. Open and adapt script "190103_0500AB_PlackettLuce.R" as desired.
# 5. Create a comma-delimited data file, with the variables for the 
#    Plackett-Luce model to be prefixed with "l_" ("l" in honor of "Luce");
#    place it in the subdir "data".
# ========================================================================
# Packages required - install and activate
# ------------------------------------------------------------------------
packages_required <- c("foreign", "PlackettLuce", "tidyverse", "qvcalc")
# Determine new packages to install:
new_packages <- packages_required[!packages_required %in% installed.packages()]
# If any new ones are indeed required, install them:
if(length(new_packages) > 0){install.packages(new_packages)}
# Load / activate the required packages:
lapply(X = packages_required, function(X) library(package = X, character.only = T))
# (lapply = apply to list)
# ========================================================================
# Set paths:
# ------------------------------------------------------------------------
# path.wd     <- setwd("[User-selected path]")
# Better: Start RStudio with "...\R\190103_0459AB_PlackettLuce_RelativePaths.Rproj"
# This enables relative paths and thus portability of the entire directory.
path.wd     <- getwd()
# working directory: holds this and other project-related R-scripts.
path.base   <- substring(path.wd, 1, nchar(path.wd) - nchar("/R"))
# Relative path to the directory of the project (by clipping off the
# suffix of the path that leads within the projekt folder to the folder
# with the R stuff)
dir.exists(path.base)
# check, if path leads to the correct folder

path.data   <- paste0(path.base, "/data")
# Relative path to input data.
dir.exists(path.data)
# check, if path leads to the correct folder
path.output <- paste0(path.base, "/output")
# Relative path to output data.
dir.exists(path.output)
# check, if path leads to the correct folder
# ========================================================================
# Read data:
# ------------------------------------------------------------------------
list.files(path.data)
file.exists(paste0(path.data, "/", "181227_2027AB_NPM11_Priorities_PlackettLuce.csv"  ))
data_csv <- read.csv(file = paste0(path.data, "/", "181227_2027AB_NPM11_Priorities_PlackettLuce.csv"),
                     header = T, sep = ",")
# Structure of the data:
str(data_csv)
# Arguments to the Plackett-Luce model have been prefixed "l_".
# All non-numeric data columns are read as factors because
default.stringsAsFactors()
# ========================================================================
# Analyze data:
# -----------------------------------------------------------------------
# In the needs assessment context, we call the PL-"worth" coefficients 
# "Intensities"; this terminology is arbitrary.
data_csv %>%
  dplyr::select(starts_with("l_")) %>%
  PlackettLuce() -> intensities
# The error messages 
  # Recoded rankings that are not in dense form
  # Removed rankings with less than 2 items
# will apppear. They are specific of this dataset and can be ignored.
names(intensities)
#intensities_coef <- coef(intensities)  # Not necessary for the following.
#intensities_coef

# Summary of intensities:
PL_est <- summary(intensities, ref = NULL) 
#"ref = NULL" sets the mean of all intensities as the reference value.
# Else the first item would be the reference, with its coefficent constrained to 0.
PL_est # The key output.
# ========================================================================
# Preparations for confidence intervals and exporting output to .csv files
# ------------------------------------------------------------------------
# Prepares a table for export, stripping the prefix "l_".
# Ensures items will be listed in the sequence of the arguments, not alphabetically.
PL_est$coefficients %>% 
  gsub(pattern     = "l_",
       replacement = "",
       x           = row.names(.)) %>%
  factor(x = ., levels = .) -> items
data.frame(items_num = as.integer(items), 
           items     = items,
           PL_est$coefficients) -> PL_est_z.values
# PL_est_z.values

# In order to obtain confidence intervals, quasi-variances
# of the coefficients are needed:
qv <- qvcalc(intensities, ref = NULL) 
summary(qv)

# Strips prefix "l_" in preparation for export.
# Ensures items will be listed in the sequence of the arguments, not alphabetically.
qv$qvframe %>% 
  gsub(pattern     = "l_",
       replacement = "",
       x           = row.names(.)) %>%
  factor(x = ., levels = .) -> items
# Computes additional columns needed for 95%-CIs,
# Exponentiates coefficent and CI bound estimates, in accordance with PL-model.
qv$qvframe %>% # qv table ...
  data.frame(items_num = as.integer(items),
             items     = items,
             .) %>%
  mutate(quasiSD  = sqrt(quasiVar),
         quasiLCI = estimate - quasiSD * qnorm(0.975, 0, 1),
         quasiUCI = estimate + quasiSD * qnorm(0.975, 0, 1),
         expLCI   = exp(quasiLCI),
         expEst   = exp(estimate),
         expUCI   = exp(quasiUCI)) -> qv_estim_CI
# qv_estim_CI
# ========================================================================
# Export output:
# ------------------------------------------------------------------------
write.csv(x         = PL_est_z.values,
          file      = paste0(path.output, "/", "PL_coef_table.csv"),
          row.names = F)

write.csv(x         = qv_estim_CI, 
          file      = paste0(path.output, "/", "PL_estimates_CI.csv"),
          row.names = F)
# ========================================================================
# Optional end-of-program house-cleaning commands:

# Empties environment completely:
# rm(list = ls())

# Explicit removal of named objects only:
# rm(object_1, .... object_N)
# ========================================================================
# End of script
# ========================================================================