Appendix

Functions

# This function was created to more easily download different indeces
downloadDem <- function(data_source, fh_data = "FIH_hist", dd_data = "col_sheet",
                        remove_file = TRUE) {

  # Freedom House
    # There are different FH datasets and the fh_data parameter specifies which one to download
  if(tolower(data_source) == "fh") {
    if(fh_data == "FIH_hist") {
      download.file(url =
"https://freedomhouse.org/sites/default/files/2021-02/Country_and_Territory_Ratings_and_Statuses_FIW1973-2021.xlsx",
                    destfile = "fh.xlsx", method = "curl")

      temp <- read.xlsx(file = "fh.xlsx", sheetIndex = 2, startRow = 3)

      colnames(temp) <- c("country", apply(expand.grid(c("PR","CL", "Status"),
                                                       c(1972:1981,1983:2020)),
                                           1, paste, collapse="."))
    }

    if(fh_data == "FIH_recent") {
      download.file(url =
"https://freedomhouse.org/sites/default/files/2021-02/All_data_FIW_2013-2021.xlsx",
destfile = "fh.xlsx", method = "curl")

      temp <- read.xlsx(file = "fh.xlsx", sheetIndex = 2, startRow = 2)
    }

    if(fh_data == "FIH_aggregate") {
      download.file(url =
"https://freedomhouse.org/sites/default/files/2021-02/Aggregate_Category_and_Subcategory_Scores_FIW_2003-2021.xlsx",
destfile = "fh.xlsx", method = "curl")

      temp <- read.xlsx(file = "fh.xlsx", sheetIndex = 2)
    }

    if(remove_file == TRUE) {
      file.remove("fh.xlsx")
    }
  }

  # V-Dem
  if(tolower(data_source) == "vdem") {
    download.file(url = "https://v-dem.net/media/datasets/Country_Year_V-Dem_Full_others_R_v11.1.zip",
                  destfile = "vdem.zip", method = "curl")
    unzip("vdem.zip")

    temp <- readRDS("Country_Year_V-Dem_Full+others_R_v11.1/V-Dem-CY-Full+Others-v11.1.rds")

    if(remove_file == TRUE) {
      unlink("Country_Year_V-Dem_Full+others_R_v11.1/", recursive = TRUE)
      file.remove("vdem.zip")
    }
  }

  # PolityV
  if(tolower(data_source) == "polityv") {
    download.file(url = "http://www.systemicpeace.org/inscr/p5v2018.sav",
                  destfile = "polityV.sav", method = "curl")

    temp <- read.spss("polityV.sav", to.data.frame = TRUE)

    if(remove_file == TRUE) {
      file.remove("polityV.sav")
    }
  }

  # UDS
  if(tolower(data_source) == "uds") {
    download.file(url = "http://www.unified-democracy-scores.net/files/20140312/z/uds_summary.csv.gz",
                  destfile = "uds_summary.csv.gz")

    temp <- read.table("uds_summary.csv.gz", header = T, sep = ",")

    if(remove_file == TRUE) {
      file.remove("uds_summary.csv.gz")
    }
  }

  # DD
    # There are different DD datasets and the dd_data parameter specifies which one to download
  if(tolower(data_source) == "dd") {

    download.file(url =
"http://www.christianbjoernskov.com/wp-content/uploads/2020/09/Bj%C3%B8rnskov-Rode-integrated-dataset-v3.2.xlsx",
destfile = "dd.xlsx", method = "curl")

    if(dd_data == "reg_char") {
      temp <- read_excel(path = "dd.xlsx", sheet = 1)
    } else {
      temp <- read_excel(path = "dd.xlsx", sheet = 3)
    }

    if(remove_file == TRUE) {
      file.remove("dd.xlsx")
    }
  }

  return(temp)

}

# This function just facilitates the prinint of how far along a loop is
print_progress <- function(c_state, max) {
  print(paste(round((c_state/max)*100, digits = 2), "%"))
}

# This function runs the SVM regression
  # It needs identifying and support variables, as well as varaibles for the SVM
  # regression, a dataset with the input data and a dataset with the priming
  # observations
SVM_index <- function(svm_dataframe, id_vars, support_vars, svm_data_vars,
                      priming_selec, save_path = "") {
  # Combining all variables
  var_SVM <- c(id_vars,support_vars, svm_data_vars)

  # Extranting the priming observation from the dataset with input variables
  priming_df <- svm_dataframe %>%
    filter(iso_year %in% priming_selec$iso_year) %>%
    select(-all_of(c(id_vars,support_vars)))

  # Creating dataset to be filled with the data from the 2000 iterative runs of
  # the SVM regression
  result_full_df <- svm_dataframe %>% select(all_of(id_vars))

  # Loop that takes a sample of the priming data, trains the dataset, applies it
  # to the overall input data, iteratively 2000 times
  for(i in 1:2000) {
    sample <- sample.split(1:nrow(priming_df), SplitRatio = runif(1, min=0.2, max=0.5))

    train <- subset(priming_df, sample==TRUE)

    train_x <- train %>%
      select(-v2x_polyarchy)

    train_y <- train[,"v2x_polyarchy"]

    model1 <- svm(x = train_x, y = train_y, scale = TRUE, kernel = "radial",
                  cost = 1, na.action = na.omit, epsilon = 0.025)

    res1 <- predict(model1, svm_df %>% select(-all_of(c(id_vars,support_vars,
                                                        "v2x_polyarchy"))))

    result_full_df <- cbind(result_full_df, res1)

    colnames(result_full_df)[ncol(result_full_df)] <- paste("iter",i,sep = "_")

    print_progress(i, 2000)
  }

  # Creating a reult dataset that will include the output data
  result_output_df <- svm_dataframe %>% select(all_of(c(id_vars, "v2x_polyarchy")))

  temp_output_df <- data.frame()

  # Loop that takes the 2000 prediction for each observation, takes the median
  # as value of the SVM index, calcualtes the standard deviation and all percentiles
  for(j in 1:nrow(result_full_df)) {
    res_vec <- as.numeric(result_full_df[j,7:ncol(result_full_df)])

    temp_obs_df <- data.frame(med = median(res_vec), sd = sd(res_vec),
                              t(quantile(res_vec, probs = seq(from = 0, to = 1, by = 0.01)
                                         )
                                )
                              )

    colnames(temp_obs_df)[3:103] <- paste(seq(from = 0, to = 100, by = 1), "p",
                                          sep = "")

    temp_output_df <- rbind(temp_output_df, temp_obs_df)

    print_progress(j, nrow(result_full_df))
  }

  # Binding the columns of the output with its identifying variables
  result_output_df <- cbind(result_output_df, temp_output_df)

  # Creating a list that includes the predicted values for all 2000 iterations
  # for each observation in the Full_Result and the output data in the Output
  result_SVM <- list(
    Full_Result = result_full_df,
    Output = result_output_df
  )

  # If statement that saves the result to an RDS file, should a path or filename
  # be specified
  if (save_path != "") {
    saveRDS(result_SVM, save_path)
  }

  # Returns the result
  return(result_SVM)
}

R Script for priming data, SVM regression, and analysis

#### Libraries ####
library(dplyr)
library(tidyr)
library(readxl)
library(e1071)
library(caTools)
library(countrycode)
library(haven)
library(descr)
library(ggplot2)
library(zoo)
library(ggplot2)
library(ggpubr)

#### Standard variables/datasets ####

vDem <- downloadDem(data_source = "vdem") # Custom function created to easily download datasets

start_year <- 1919

set.seed(44)

#### Section 4: Priming data ####

# This variable contains the variable names for all of the indicators used to
# create the priming data with the extremes of the UDS and the EDI as well as
# the variables used to check for consensus
var_vDem <- c("country_name","country_text_id","country_id","year","COWcode",
              "v2x_polyarchy","e_uds_mean","e_boix_regime", "e_democracy_omitteddata",
              "e_lexical_index","e_p_polity","e_fh_status", "e_fh_cl", "e_fh_pr")


  priming_selection <- vDem %>%
  select(all_of(var_vDem)) %>%
  filter(year >= 1919)

# Finding the decile of the UDS scale
ten_p_uds <- -(min(priming_selection$e_uds_mean, na.rm = TRUE) -
                 max(priming_selection$e_uds_mean, na.rm = TRUE)) * 0.1

decile_UDS <- c(
  min(priming_selection$e_uds_mean, na.rm = TRUE)+ten_p_uds,
  max(priming_selection$e_uds_mean, na.rm = TRUE)-ten_p_uds
)


priming_selection <- priming_selection %>%
  # Filtering by the deciles of both scales
  filter(
    (e_uds_mean < decile_UDS[1] | e_uds_mean > decile_UDS[2]) |
      (v2x_polyarchy < 0.1 | v2x_polyarchy > 0.9)
  ) %>%
  # Correcting missing histoprical iso3c codes
  mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
  mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
  # Creating a dichotomous variable to idedntify democracies and autocracies
  # based on their EDI and UDS scores
  mutate(prim_dem = 0) %>%
  mutate(prim_dem = ifelse(v2x_polyarchy > 0.9 &
                             !is.na(v2x_polyarchy), 1, prim_dem)) %>%
  mutate(prim_dem = ifelse(e_uds_mean > decile_UDS[2] &
                             !is.na(e_uds_mean), 1, prim_dem)) %>%
  # Creating identifying variable and moving certain variables closer to the
  # first columns
  mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
  relocate(c("iso3c","iso_year","prim_dem"), .after = COWcode) %>%
  # Creating the FH score by calculating it using the CL and PR variables
  mutate(fh_avg = (e_fh_cl + e_fh_pr)/2)

# Downloading the DD dataset
dd_index <- downloadDem("dd", dd_data = "reg_char") %>%
  # Correcting disparities in nomenclature
  mutate(country = ifelse(country == "Korea, People's Republic",
                          "North Korea", country)) %>%
  # Creating the iso3c codes and using them to create unique observation identifiers
  mutate(iso3c = countryname(country, destination = "iso3c")) %>%
  mutate(iso_year = paste(iso3c, year, sep = "_"))
# Removing columns with NA (due to the format of the file some columns are just blank)
dd_index <- dd_index[colSums(!is.na(dd_index)) > 0]

# Creating the final priming_selection dataset by combining the priming_selection
# with the DD dataset
priming_selection <- priming_selection %>%
  left_join(dd_index %>% select(iso_year, Democracy), by = "iso_year") %>%
  rename(dd_index = Democracy)

  # Section 4.1: Consensus #+
  # The following lines create tables used to check for consensus between
  # indicators

    ## Boix ##
crosstab(priming_selection$prim_dem, priming_selection$e_democracy_omitteddata)

    ## DD ##
crosstab(priming_selection$prim_dem, priming_selection$dd_index)

    ## Lexical ##
crosstab(priming_selection$prim_dem, priming_selection$e_lexical_index)

    ## FH ##
crosstab(priming_selection$prim_dem, priming_selection$e_fh_status)

    ## Polity ##
crosstab(priming_selection$prim_dem, priming_selection$e_p_polity)


  # Section 4.2: Representativeness #

# Downloading the DD dataset that contains regime information
dd_reg_char <- downloadDem("dd", dd_data = "reg_char")
# Removing empty columns (again due to the formatting of the original file)
dd_reg_char <- dd_reg_char[colSums(!is.na(dd_reg_char)) > 0] %>%
  # Correcting disparities in nomenclature
  mutate(country = ifelse(country == "Korea, People's Republic",
                          "North Korea", country)) %>%
  mutate(iso3c = countryname(country, destination = "iso3c")) %>%
  mutate(iso_year = paste(iso3c, year, sep = "_"))

# Creating a new file based on the priming_selection that just contains identifying
# variables and the dichotomous variable created in the previous section
priming_repr <- priming_selection %>%
  select(country_name, iso3c, year, iso_year, prim_dem) %>%
  # Joining the regime info dataset with the priming_selection
  left_join(dd_reg_char, by = "iso_year") %>%
  rename(dd_index = Democracy) %>%
  # Removing observations that are not in the DD dataset
  filter(!is.na(`DD category`)) %>%
  # Creating a character variable for regime type from the original numeric
  mutate(regime_type = ifelse(
    `DD regime` == 0, "Parliamentary democracy",
    ifelse(`DD regime` == 1, "Mixed democracy",
           ifelse(`DD regime` == 2, "Presidential democracy",
                  ifelse(`DD regime` == 3, "Civilian autocracy",
                         ifelse(`DD regime` == 4, "Military dictatorship",
                                ifelse(`DD regime` == 5, "Royal dictatorship", NA
                                )
                         )
                  )
           )
    )
  )
  ) %>%
  mutate(regime_type = ifelse(`DD regime` <= 3 & Communist == 1,
                              "Communist dictatorship", regime_type)) %>%
  # Creating a character variable for election type from the original numeric
  mutate(
    election_type = ifelse(electoral == 0, "No elections",
                           ifelse(electoral == 1, "Single-party elections",
                                  ifelse(electoral == 2,
                                         "Non-democratic multi-party elections",
                                         ifelse(electoral == 3,
                                                "Democratic elections", NA)
                                  )
                           )
    )
  ) %>%
  # Creating a character variable for whether there are elections from the original numeric
  mutate(election_type2 = ifelse(election_type == "No elections",
                                 "No elections", "Elections")) %>%
  # Creating a character variable for voting system from the original numeric
  mutate(voting_system = ifelse(`proportional voting` == 1,
                                "Proportional", "Majoritarian")
         )

# Creating dataset to check for regime length
reg_length <- read_excel(
  "data/Autocracy Dataset/GWF Autocratic Regimes 1.2/GWF All Political Regimes Case List.xls",
  sheet = 1)
# Removing uneeded columns
reg_length <- reg_length[,1:5] %>%
  # Correcting disparities in nomenclature
  mutate(Country = ifelse(Country == "Lietchtenstein", "Liechtenstein", Country)) %>%
  mutate(iso3c = countryname(Country, destination = "iso3c", warn = F)) %>%
  mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
  mutate(iso3c = ifelse(Country == "Cen African Rep", "CAF", iso3c)) %>%
  # Removing empty rows
  filter(!is.na(Country)) %>%
  # Establishing 2014 as ending year
  mutate(`end yr` = ifelse(is.na(`end yr`), 2014, `end yr`)) %>%
  # Calculating total length
  mutate(tot_length = `end yr` - `beg yr`)

# Creating the final regime length dataset by running a loop that creates a
# country and year observation for each regime
reg_length_final <- data.frame()
for(i in 1:nrow(reg_length)) {
  year_seq <- seq(from=reg_length$`beg yr`[i], to=reg_length$`end yr`[i])

  temp_df <- data.frame(
    Country = reg_length$Country[i],
    iso3c = reg_length$iso3c[i],
    year = year_seq,
    tot_length = reg_length$tot_length[i]
  ) %>%
    # creating regime length up to the date of observation
    mutate(cur_length = year - reg_length$`beg yr`[i])

  reg_length_final <- rbind(reg_length_final, temp_df)
}

# Creating identifying variable for the regime length dataset
reg_length_final <- reg_length_final %>%
  mutate(iso_year = paste(iso3c, year, sep = "_"))

# Joining the regime length dataset with the priming_selection
priming_selection <- priming_selection %>%
  left_join(
    reg_length_final %>%
      mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
      select(iso_year, tot_length, cur_length),
    by = "iso_year"
  )

# In the following section the countries are checked for their regime characteristics
# Two tables are calculated: one with absolute numbers and one with percentages
# for both democracies and autocracies

    ## Autocracies ##
      ### Dictatorship type: ###
table(priming_repr$regime_type[priming_repr$prim_dem==0])
round(prop.table(
  table(priming_repr$regime_type[priming_repr$prim_dem==0])
)*100, digits = 2)

      ### Electoral Systems: ###
# Whether there are election or not
table(priming_repr$election_type2[priming_repr$prim_dem==0])
round(prop.table(
  table(priming_repr$election_type2[priming_repr$prim_dem==0])
)*100, digits = 2)

# Type of election system
table(priming_repr$election_type[priming_repr$prim_dem==0])
round(prop.table(
  table(priming_repr$election_type[priming_repr$prim_dem==0])
)*100, digits = 2)

    ## Democracies:
      ###Democracy type:
table(priming_repr$regime_type[priming_repr$prim_dem==1])
round(prop.table(
  table(priming_repr$regime_type[priming_repr$prim_dem==1])
)*100, digits = 2)

      ###Amount of chambers:
table(priming_repr$`No. of chambers in parliament`[priming_repr$prim_dem==1])
round(prop.table(
  table(priming_repr$`No. of chambers in parliament`[priming_repr$prim_dem==1])
)*100, digits = 2)

      ###Voting System:
table(priming_repr$voting_system[priming_repr$prim_dem==1])
round(prop.table(
  table(priming_repr$voting_system[priming_repr$prim_dem==1])
)*100, digits = 2)

    ##Length for Autocracies and Democracies:
# The length is checked by running a summary function for democracies and autocracies
# separately
      ###Total length:
tapply(priming_selection$tot_length,priming_selection$prim_dem, summary)

      ###Current length:
tapply(priming_selection$cur_length,priming_selection$prim_dem, summary)

####  Section 5.1: Aggregation comparison with EDI ####

# List of mid-level indices and their constituting low-level indices
polyarchy_list <- list(
  v2x_frassoc_thick = c("v2psparban_ord", "v2psbars_ord", "v2psoppaut_ord",
                        "v2elmulpar_ord", "v2cseeorgs","v2csreprss"),

  v2xel_frefair = c("v2elembaut", "v2elembcap", "v2elrgstry_ord", "v2elvotbuy_ord",
                    "v2elirreg_ord", "v2elintim_ord", "v2elpeace_ord",
                    "v2elfrfair_ord"),

  v2x_freexp_altinf = c("v2mecenefm", "v2meharjrn", "v2mebias", "v2meslfcen",
                        "v2mecrit", "v2merange", "v2cldiscm", "v2cldiscw",
                        "v2clacfree")
)

# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year",
             "iso_year")

# Variables used for the SVM regression
svm_data_vars <- c("v2x_polyarchy",

                    "v2psparban_ord", "v2psbars_ord", "v2psoppaut_ord",
                   "v2elmulpar_ord", "v2cseeorgs","v2csreprss",

                    "v2x_suffr",

                    "v2elembaut", "v2elembcap", "v2elrgstry_ord", "v2elvotbuy_ord",
                   "v2elirreg_ord", "v2elintim_ord", "v2elpeace_ord",
                   "v2elfrfair_ord",

                    "v2x_elecoff",

                    "v2mecenefm", "v2meharjrn", "v2mebias", "v2meslfcen",
                   "v2mecrit", "v2merange", "v2cldiscm", "v2cldiscw",
                   "v2clacfree"
)

# Support variables needed for adaptations to V-Dem's coding rules
support_vars <- c("v2x_elecreg")

# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)

# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
  # Creating iso3c identifying variable
  mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
  # Correcting disparities in nomenclature
  mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
  # Filtering for the start year and removing observations without an EDI score
  filter(year >= start_year, !is.na(v2x_polyarchy)) %>%
  # Creating identifying variable for each observation
  mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
  # Selecting the needed variables
  select(all_of(var_SVM)) %>%
  # Relocating certain variables for better visualization
  relocate(c("iso3c", "iso_year"), .after = country_id) %>%
  # Grouping by country name so that operations aren't conducted cross country
  group_by(country_name) %>%
  # Filling missing electoral variables as mentioned by V-Dem's coding rules
  mutate(v2elmulpar_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elmulpar_ord, na.rm = F), 0)) %>%
  mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
  mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
  mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
  mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elintim_ord, na.rm = F), 0)) %>%
  mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
  mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
  # Filling up electoral variables that did not have an election in the first year
  # of the dataset
  fill(c("v2elmulpar_ord","v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord",
         "v2elintim_ord", "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
  # Filling missing party variables as mentioned by V-Dem's coding rules
  mutate(v2psoppaut_ord = ifelse(v2psbars_ord <= 1 & is.na(v2psoppaut_ord), 0,
                                 v2psoppaut_ord)) %>%
  mutate(v2elembcap = ifelse(v2x_elecreg == 0 & is.na(v2elembcap), 0,
                             v2elembcap)) %>%
  mutate(v2elembaut = ifelse(v2x_elecreg == 0 & is.na(v2elembaut), 0,
                             v2elembaut)) %>%
  # Interpolating missing data
  mutate(v2mebias = na.spline(v2mebias)) %>%
  mutate(v2merange = na.spline(v2merange)) %>%
  mutate(v2meharjrn = na.spline(v2meharjrn)) %>%
  mutate(v2mecenefm = na.spline(v2mecenefm)) %>%
  mutate(v2elembcap = na.spline(v2elembcap)) %>%
  mutate(v2mecrit = na.spline(v2mecrit)) %>%
  mutate(v2x_suffr = na.spline(v2x_suffr)) %>%
  # Ungrouping before SVM regression
  ungroup() %>%
  # Removing still missing data
  drop_na()

# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
          svm_data_vars = svm_data_vars, priming_selec = priming_selection,
          save_path = "result/result_SVM_polyarchy3.RDS")

# Reading saved result
result_EDI <- readRDS("result/result_SVM_polyarchy3.RDS")$Output

# Creating dataset with the delta between indices and input data
df_output_polyarchy <- result_EDI[,1:8] %>%
  mutate(diff = med - v2x_polyarchy) %>%
  left_join(svm_df)

# OLS regression of all of the data
lm1 <- lm(v2x_polyarchy ~ . , df_output_polyarchy[,c(7,11:35)])
summary(lm1)
lm2 <- lm(med ~ . , df_output_polyarchy[,c(8,11:35)])
summary(lm2)

# OLS regression of the data with a delta larger than 0.01
lm1 <- lm(v2x_polyarchy ~ . , df_output_polyarchy[df_output_polyarchy$diff > 0.01 | df_output_polyarchy$diff < -0.01,c(7,11:35)])
summary(lm1)
lm2 <- lm(med ~ . , df_output_polyarchy[df_output_polyarchy$diff > 0.01 | df_output_polyarchy$diff < -0.01,c(8,11:35)])
summary(lm2)

# Creating OLS regression table with stargazer in HTML format
stargazer(lm1,lm2, summary = T, dep.var.labels = c("EDI", "SVM Ind."), type = "html", notes.label = "Signif. codes:")

# Creating a dataset with the results of the SVM regression, its delta from
# the EDI, and the input variables
graph_EDI <- result_EDI[,1:8] %>%
  mutate(diff = med - v2x_polyarchy) %>%
  left_join(svm_df)

# Scaling the data for graphical representation later on
graph_EDI_scaled <- graph_EDI
graph_EDI_scaled[,11:35] <- scale(graph_EDI[,11:35], center = F, scale = T)

# Keeping variables that will not be averaged into mid-level indices
graph_EDI_mid_level <- graph_EDI_scaled[,c("country_name","iso3c","year",
                                           "iso_year","v2x_polyarchy","med",
                                           "diff","v2x_suffr","v2x_elecoff")]
# This loop creates mid-level indices out of the scaled low-level indices
# (only for graphical representation)
for(i in 1:length(polyarchy_list)){
  temp <- data.frame(rowMeans(graph_EDI_scaled[,polyarchy_list[[i]]]))
  colnames(temp) <- names(polyarchy_list)[i]

  graph_EDI_mid_level <- cbind(graph_EDI_mid_level,temp)
}

  # Figures #

    ## Figure 1 ##
agg_plot_1 <- ggplot(graph_EDI, aes(x = med, y = v2x_polyarchy, color = diff)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "green") +
  scale_color_gradient(low="blue", high="red") +
  labs(y = "EDI", x = "SVM Index") +
  guides(color = guide_legend(title = "Delta"))

agg_plot_1

# Saving plot as a png
png("img/figure1.png", res = 120, width = 700, height = 700)
agg_plot_1
dev.off()

    ## Figure 2 ##
agg_plot_2 <- graph_EDI %>%
  filter(iso3c == "DEU") %>%
  select(iso3c, year, v2x_polyarchy, med) %>%
  rename(EDI = v2x_polyarchy, `SVM Ind` = med) %>%
  pivot_longer(cols = c("EDI","SVM Ind")) %>%
  as.data.frame(.) %>%
  ggplot(aes(x = year,y = value, color = name)) +
  #ggplot(aes(x = year,y = value, linetype = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Score", x = "Year") +
  guides(color = guide_legend(title = "Index")) +
  scale_color_manual(values=c("#990000", "#000066"))

agg_plot_2

png("img/figure2.png", res = 120, width = 700, height = 700)
agg_plot_2
dev.off()

    ## Figure 3 ##
agg_plot_3 <- graph_EDI %>%
  filter(iso3c == "RUS") %>%
  select(iso3c, year, v2x_polyarchy, med) %>%
  rename(EDI = v2x_polyarchy, `SVM Ind` = med) %>%
  pivot_longer(cols = c("EDI","SVM Ind")) %>%
  as.data.frame(.) %>%
  ggplot(aes(x = year,y = value, color = name)) +
  #ggplot(aes(x = year,y = value, linetype = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Score", x = "Year") +
  guides(color = guide_legend(title = "Index")) +
  scale_color_manual(values=c("#990000", "#000066"))

agg_plot_3

png("img/figure3.png", res = 120, width = 700, height = 700)
agg_plot_3
dev.off()

  ## Figure 4 ##

agg_plot_4 <- graph_EDI_mid_level %>%
  filter(iso3c == "RUS") %>%
  select(c("year","v2x_suffr","v2x_elecoff","v2x_frassoc_thick","v2xel_frefair",
           "v2x_freexp_altinf")) %>%
  pivot_longer(cols = c("v2x_suffr","v2x_elecoff","v2x_frassoc_thick",
                        "v2xel_frefair","v2x_freexp_altinf")) %>%
  as.data.frame(.) %>%
  ggplot(aes(x = year,y = value, color = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Value", x = "Year") +
  guides(color = guide_legend(title = "Variable"))

agg_plot_4

png("img/figure4.png", res = 120, width = 700, height = 700)
agg_plot_4
dev.off()

    ## Figure 5 ##
agg_plot_5 <- graph_EDI %>%
  filter(iso3c == "COL") %>%
  select(iso3c, year, v2x_polyarchy, med) %>%
  rename(EDI = v2x_polyarchy, `SVM Ind` = med) %>%
  pivot_longer(cols = c("EDI","SVM Ind")) %>%
  as.data.frame(.) %>%
  ggplot(aes(x = year,y = value, color = name)) +
  #ggplot(aes(x = year,y = value, linetype = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Score", x = "Year") +
  guides(color = guide_legend(title = "Index")) +
  scale_color_manual(values=c("#990000", "#000066"))

agg_plot_5

png("img/figure5.png", res = 120, width = 700, height = 700)
agg_plot_5
dev.off()

    ## Figure 6 ##

agg_plot_6 <- graph_EDI_mid_level %>%
  filter(iso3c == "COL") %>%
  select(c("year","v2x_suffr","v2x_elecoff","v2x_frassoc_thick","v2xel_frefair",
           "v2x_freexp_altinf")) %>%
  pivot_longer(cols = c("v2x_suffr","v2x_elecoff","v2x_frassoc_thick","v2xel_frefair",
                        "v2x_freexp_altinf")) %>%
  as.data.frame(.) %>%
  ggplot(aes(x = year,y = value, color = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Value", x = "Year") +
  guides(color = guide_legend(title = "Variable"))

agg_plot_6

png("img/figure6.png", res = 120, width = 700, height = 700)
agg_plot_6
dev.off()

#### Section 5.2 SVM Indices ####

# Loading the variable list as they relate to Embedded Democracy
svm_data_table <- read_excel("data/SVM_variables.xlsx")

  # Internally embedded #

# Variables used for the SVM regression
svm_data_vars <- c(svm_data_table$Code[svm_data_table$Embeddedness == "Internal"],
                   "v2x_polyarchy")

# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year",
             "iso_year")

# Variables used for the SVM regression
support_vars <- c("v2x_elecreg", "v2exhoshog", "v2lgbicam")

# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)

# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
  # Creating iso3c identifying variable
  mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
  # Correcting disparities in nomenclature
  mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
  # Removing empty variables related to the DDR before its formal founding in 1949
  filter(!(iso3c == "DDR") | year > 1948) %>%
  # Filtering for the start year
  filter(year >= start_year) %>%
  # Creating identifying variable for each observation
  mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
  # Selecting the needed variables
  select(all_of(var_SVM)) %>%
  # Relocating certain variables for better visualization
  relocate(c("iso3c", "iso_year"), .after = country_id) %>%
  # Grouping by country name so that operations aren't conducted cross country
  group_by(country_name) %>%
  # Filling missing electoral variables as mentioned by V-Dem's coding rules
  mutate(v2elmulpar_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elmulpar_ord, na.rm = F), 0)) %>%
  mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
  mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
  mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
  mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elintim_ord, na.rm = F), 0)) %>%
  mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
  mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
  # If HoS and HoG are the same, both have the same value
  mutate(v2exctlhg_0 = ifelse(v2exhoshog == 1, v2exctlhs_0, v2exctlhg_0)) %>%
  # Filling up electoral variables that did not have an election in the first year
  # of the dataset
  fill(c("v2elmulpar_ord","v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord",
         "v2elintim_ord", "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
  # Filling missing party and legislature variables as mentioned by V-Dem's coding rules
  mutate(v2psoppaut_ord = ifelse(v2psbars_ord <= 1 & is.na(v2psoppaut_ord), 0,
                                 v2psoppaut_ord)) %>%
  mutate(v2lgqstexp_ord = ifelse(v2lgbicam == 0,0,v2lgqstexp_ord)) %>%
  mutate(v2lginvstp_ord = ifelse(v2lgbicam == 0,0,v2lginvstp_ord)) %>%
  # Interpolating missing data
  mutate(v2lgbicam = na.approx(v2lgbicam, na.rm = F)) %>%
  mutate(v2x_elecoff = na.approx(v2x_elecoff, na.rm = F)) %>%
  mutate(v2x_suffr = na.spline(v2x_suffr, na.rm = F)) %>%
  mutate(v2elrstrct = na.approx(v2elrstrct, na.rm = F)) %>%
  mutate(v2elembcap = na.spline(v2elembcap, na.rm = F)) %>%
  mutate(v2elembaut = na.spline(v2elembaut, na.rm = F)) %>%
  mutate(v2meharjrn = na.spline(v2meharjrn)) %>%
  mutate(v2mecenefm = na.spline(v2mecenefm)) %>%
  mutate(v2meslfcen = na.spline(v2meslfcen, na.rm = F)) %>%
  mutate(v2exrescon = na.spline(v2exrescon, na.rm = F)) %>%
  mutate(v2jucomp = na.spline(v2jucomp, na.rm = F)) %>%
  mutate(v2juhccomp = na.spline(v2juhccomp, na.rm = F)) %>%
  mutate(v2juhcind = na.spline(v2juhcind, na.rm = F)) %>%
  mutate(v2juncind = na.spline(v2juncind, na.rm = F)) %>%
  mutate(v2lgqstexp_ord = na.approx(v2lgqstexp_ord, na.rm = F)) %>%
  mutate(v2lginvstp_ord = na.approx(v2lginvstp_ord, na.rm = F)) %>%
  # Ungrouping before SVM regression
  ungroup() %>%
  # Removing still missing data
  drop_na()

# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
          svm_data_vars = svm_data_vars, priming_selec = priming_selection,
          save_path = "result/result_SVM_intern_emb4.RDS")




  # Electoral Regime #

# Variables used for the SVM regression
svm_data_vars <- c(svm_data_table$Code[svm_data_table$Regime == "Electoral"],
                   "v2x_polyarchy")

# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year","iso_year")

# Variables used for the SVM regression
support_vars <- c("v2x_elecreg", "v2exhoshog", "v2lgbicam")

# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)

# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
  #  Creating iso3c identifying variable
  mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
  # Correcting disparities in nomenclature
  mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
  # Filtering for the start year
  filter(year >= start_year) %>%
  # Creating identifying variable for each observation
  mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
  # Selecting the needed variables
  select(all_of(var_SVM)) %>%
  # Relocating certain variables for better visualization
  relocate(c("iso3c", "iso_year"), .after = country_id) %>%
  # Grouping by country name so that operations aren't conducted cross country
  group_by(country_name) %>%
  # Filling missing electoral variables as mentioned by V-Dem's coding rules
  mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
  mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
  mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
  mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elintim_ord, na.rm = F), 0)) %>%
  mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
  mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
  # Filling up electoral variables that did not have an election in the first year
  # of the dataset
  fill(c("v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord", "v2elintim_ord",
         "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
  # Interpolating missing data
  mutate(v2x_elecoff = na.approx(v2x_elecoff, na.rm = F)) %>%
  mutate(v2x_suffr = na.spline(v2x_suffr, na.rm = F)) %>%
  mutate(v2elrstrct = na.approx(v2elrstrct, na.rm = F)) %>%
  mutate(v2elembcap = na.spline(v2elembcap, na.rm = F)) %>%
  mutate(v2elembaut = na.spline(v2elembaut, na.rm = F)) %>%
  # Ungrouping before SVM regression
  ungroup() %>%
  # Removing still missing data
  drop_na()

# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
          svm_data_vars = svm_data_vars, priming_selec = priming_selection,
          save_path = "result/result_SVM_elec_reg4.RDS"
)





  # Fully Embedded #

# Variables used for the SVM regression
svm_data_vars <- c(svm_data_table$Code,"v2x_polyarchy")

# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year",
             "iso_year")

# Variables used for the SVM regression
support_vars <- c("v2x_elecreg", "v2exhoshog", "v2lgbicam")

# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)

# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
  # Creating iso3c identifying variable
  mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
  # Correcting disparities in nomenclature
  mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
  mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
  # Removing empty variables related to the DDR before its formal founding in 1949
  filter(!(iso3c == "DDR") | year > 1948) %>%
  # Filtering for the start year
  filter(year >= start_year) %>%
  # Creating identifying variable for each observation
  mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
  # Selecting the needed variables
  select(all_of(var_SVM)) %>%
  # Relocating certain variables for better visualization
  relocate(c("iso3c", "iso_year"), .after = country_id) %>%
  # Grouping by country name so that operations aren't conducted cross country
  group_by(country_name) %>%
  # Filling missing electoral variables as mentioned by V-Dem's coding rules
  mutate(v2elmulpar_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elmulpar_ord, na.rm = F), 0)) %>%
  mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
  mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
  mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
  mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elintim_ord, na.rm = F), 0)) %>%
  mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
                                na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
  mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
                                 na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
  # If HoS and HoG are the same, both have the same value
  mutate(v2exctlhg_0 = ifelse(v2exhoshog == 1, v2exctlhs_0, v2exctlhg_0)) %>%
  # Filling up electoral variables that did not have an election in the first year
  # of the dataset
  fill(c("v2elmulpar_ord","v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord",
         "v2elintim_ord", "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
  # Filling missing party and legislature variables as mentioned by V-Dem's coding rules
  mutate(v2psoppaut_ord = ifelse(v2psbars_ord <= 1 & is.na(v2psoppaut_ord), 0,
                                 v2psoppaut_ord)) %>%
  mutate(v2lgqstexp_ord = ifelse(v2lgbicam == 0,0,v2lgqstexp_ord)) %>%
  mutate(v2lginvstp_ord = ifelse(v2lgbicam == 0,0,v2lginvstp_ord)) %>%
  # Interpolating missing data
  mutate(v2meharjrn = na.spline(v2meharjrn)) %>%
  mutate(v2mecenefm = na.spline(v2mecenefm)) %>%
  mutate(e_peedgini = na.spline(e_peedgini, na.rm = F)) %>%
  mutate(e_migdppc = na.spline(e_migdppc, na.rm = F)) %>%
  mutate(e_pelifeex = na.spline(e_pelifeex, na.rm = F)) %>%
  mutate(v2csprtcpt = na.spline(v2csprtcpt, na.rm = F)) %>%
  mutate(v2elembcap = na.spline(v2elembcap, na.rm = F)) %>%
  mutate(v2elembaut = na.spline(v2elembaut, na.rm = F)) %>%
  mutate(v2elrstrct = na.approx(v2elrstrct, na.rm = F)) %>%
  mutate(v2x_elecoff = na.approx(v2x_elecoff, na.rm = F)) %>%
  mutate(v2lgbicam = na.approx(v2lgbicam, na.rm = F)) %>%
  mutate(v2x_suffr = na.spline(v2x_suffr, na.rm = F)) %>%
  mutate(v2meslfcen = na.spline(v2meslfcen, na.rm = F)) %>%
  mutate(v2exrescon = na.spline(v2exrescon, na.rm = F)) %>%
  mutate(v2jucomp = na.spline(v2jucomp, na.rm = F)) %>%
  mutate(v2juhccomp = na.spline(v2juhccomp, na.rm = F)) %>%
  mutate(v2juhcind = na.spline(v2juhcind, na.rm = F)) %>%
  mutate(v2juncind = na.spline(v2juncind, na.rm = F)) %>%
  mutate(v2lgqstexp_ord = na.approx(v2lgqstexp_ord, na.rm = F)) %>%
  mutate(v2lginvstp_ord = na.approx(v2lginvstp_ord, na.rm = F)) %>%
  mutate(v2pscnslnl = na.spline(v2pscnslnl, na.rm = F)) %>%
  # Ungrouping before SVM regression
  ungroup() %>%
  # Removing still missing data
  drop_na()

# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
          svm_data_vars = svm_data_vars, priming_selec = priming_selection,
          save_path = "result/result_SVM_extern_emb4.RDS")

# Comparison

# Reading individual SVM dataset created in this section
result_int_emb <- readRDS("result/result_SVM_intern_emb4.RDS")$Output

result_elec_reg <- readRDS("result/result_SVM_elec_reg4.RDS")$Output

result_full_emb <- readRDS("result/result_SVM_extern_emb4.RDS")$Output

# Reading SVMDI
SVMDI <- read_excel("data/Gruendler/ML indices (1).xlsx") %>%
  mutate(iso_year = paste(ISO, Year, sep = "_"))

# Joining all datasets
final_df <- result_elec_reg[,c(id_vars, "med")] %>%
  rename(`Elec. Reg.` = med) %>%
  full_join(
    result_int_emb[,c(id_vars, "med")] %>%
      rename(`Intern. Emb.` = med),
    by = id_vars
  ) %>%
  full_join(
    result_full_emb[,c(id_vars, "med")] %>%
      rename(`Fully Emb.` = med),
    by = id_vars
  ) %>%
  full_join(
    SVMDI[,c("iso_year","C_MLDI")] %>%
      rename(SVMDI = C_MLDI),
    by = "iso_year"
  ) %>%
  pivot_longer(cols = c("Elec. Reg.","Intern. Emb.","Fully Emb.","SVMDI"))

  # Figures

    ## Figure 7

agg_plot_7 <- final_df %>%
  filter(iso3c == "DEU") %>%
  ggplot(aes(x = year, y = value, color = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Score", x = "Year") +
  guides(color = guide_legend(title = "Index"))

agg_plot_7

png("img/figure7.png", res = 120, width = 700, height = 700)
agg_plot_7
dev.off()

    ## Figure 8

agg_plot_8 <- final_df %>%
  filter(iso3c == "HUN" & year > 2000) %>%
  ggplot(aes(x = year, y = value, color = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Score", x = "Year") +
  guides(color = guide_legend(title = "Index"))

agg_plot_8

png("img/figure8.png", res = 120, width = 700, height = 700)
agg_plot_8
dev.off()

    ## Figure 9

agg_plot_9 <- final_df %>%
  filter(iso3c == "POL" & year > 2000) %>%
  ggplot(aes(x = year, y = value, color = name)) +
  geom_line(size = 1.2, alpha = 0.6) +
  labs(y = "Score", x = "Year") +
  guides(color = guide_legend(title = "Index"))

agg_plot_9

png("img/figure9.png", res = 120, width = 700, height = 700)
agg_plot_9
dev.off()