#### Libraries ####
library(dplyr)
library(tidyr)
library(readxl)
library(e1071)
library(caTools)
library(countrycode)
library(haven)
library(descr)
library(ggplot2)
library(zoo)
library(ggplot2)
library(ggpubr)
#### Standard variables/datasets ####
vDem <- downloadDem(data_source = "vdem") # Custom function created to easily download datasets
start_year <- 1919
set.seed(44)
#### Section 4: Priming data ####
# This variable contains the variable names for all of the indicators used to
# create the priming data with the extremes of the UDS and the EDI as well as
# the variables used to check for consensus
var_vDem <- c("country_name","country_text_id","country_id","year","COWcode",
"v2x_polyarchy","e_uds_mean","e_boix_regime", "e_democracy_omitteddata",
"e_lexical_index","e_p_polity","e_fh_status", "e_fh_cl", "e_fh_pr")
priming_selection <- vDem %>%
select(all_of(var_vDem)) %>%
filter(year >= 1919)
# Finding the decile of the UDS scale
ten_p_uds <- -(min(priming_selection$e_uds_mean, na.rm = TRUE) -
max(priming_selection$e_uds_mean, na.rm = TRUE)) * 0.1
decile_UDS <- c(
min(priming_selection$e_uds_mean, na.rm = TRUE)+ten_p_uds,
max(priming_selection$e_uds_mean, na.rm = TRUE)-ten_p_uds
)
priming_selection <- priming_selection %>%
# Filtering by the deciles of both scales
filter(
(e_uds_mean < decile_UDS[1] | e_uds_mean > decile_UDS[2]) |
(v2x_polyarchy < 0.1 | v2x_polyarchy > 0.9)
) %>%
# Correcting missing histoprical iso3c codes
mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
# Creating a dichotomous variable to idedntify democracies and autocracies
# based on their EDI and UDS scores
mutate(prim_dem = 0) %>%
mutate(prim_dem = ifelse(v2x_polyarchy > 0.9 &
!is.na(v2x_polyarchy), 1, prim_dem)) %>%
mutate(prim_dem = ifelse(e_uds_mean > decile_UDS[2] &
!is.na(e_uds_mean), 1, prim_dem)) %>%
# Creating identifying variable and moving certain variables closer to the
# first columns
mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
relocate(c("iso3c","iso_year","prim_dem"), .after = COWcode) %>%
# Creating the FH score by calculating it using the CL and PR variables
mutate(fh_avg = (e_fh_cl + e_fh_pr)/2)
# Downloading the DD dataset
dd_index <- downloadDem("dd", dd_data = "reg_char") %>%
# Correcting disparities in nomenclature
mutate(country = ifelse(country == "Korea, People's Republic",
"North Korea", country)) %>%
# Creating the iso3c codes and using them to create unique observation identifiers
mutate(iso3c = countryname(country, destination = "iso3c")) %>%
mutate(iso_year = paste(iso3c, year, sep = "_"))
# Removing columns with NA (due to the format of the file some columns are just blank)
dd_index <- dd_index[colSums(!is.na(dd_index)) > 0]
# Creating the final priming_selection dataset by combining the priming_selection
# with the DD dataset
priming_selection <- priming_selection %>%
left_join(dd_index %>% select(iso_year, Democracy), by = "iso_year") %>%
rename(dd_index = Democracy)
# Section 4.1: Consensus #+
# The following lines create tables used to check for consensus between
# indicators
## Boix ##
crosstab(priming_selection$prim_dem, priming_selection$e_democracy_omitteddata)
## DD ##
crosstab(priming_selection$prim_dem, priming_selection$dd_index)
## Lexical ##
crosstab(priming_selection$prim_dem, priming_selection$e_lexical_index)
## FH ##
crosstab(priming_selection$prim_dem, priming_selection$e_fh_status)
## Polity ##
crosstab(priming_selection$prim_dem, priming_selection$e_p_polity)
# Section 4.2: Representativeness #
# Downloading the DD dataset that contains regime information
dd_reg_char <- downloadDem("dd", dd_data = "reg_char")
# Removing empty columns (again due to the formatting of the original file)
dd_reg_char <- dd_reg_char[colSums(!is.na(dd_reg_char)) > 0] %>%
# Correcting disparities in nomenclature
mutate(country = ifelse(country == "Korea, People's Republic",
"North Korea", country)) %>%
mutate(iso3c = countryname(country, destination = "iso3c")) %>%
mutate(iso_year = paste(iso3c, year, sep = "_"))
# Creating a new file based on the priming_selection that just contains identifying
# variables and the dichotomous variable created in the previous section
priming_repr <- priming_selection %>%
select(country_name, iso3c, year, iso_year, prim_dem) %>%
# Joining the regime info dataset with the priming_selection
left_join(dd_reg_char, by = "iso_year") %>%
rename(dd_index = Democracy) %>%
# Removing observations that are not in the DD dataset
filter(!is.na(`DD category`)) %>%
# Creating a character variable for regime type from the original numeric
mutate(regime_type = ifelse(
`DD regime` == 0, "Parliamentary democracy",
ifelse(`DD regime` == 1, "Mixed democracy",
ifelse(`DD regime` == 2, "Presidential democracy",
ifelse(`DD regime` == 3, "Civilian autocracy",
ifelse(`DD regime` == 4, "Military dictatorship",
ifelse(`DD regime` == 5, "Royal dictatorship", NA
)
)
)
)
)
)
) %>%
mutate(regime_type = ifelse(`DD regime` <= 3 & Communist == 1,
"Communist dictatorship", regime_type)) %>%
# Creating a character variable for election type from the original numeric
mutate(
election_type = ifelse(electoral == 0, "No elections",
ifelse(electoral == 1, "Single-party elections",
ifelse(electoral == 2,
"Non-democratic multi-party elections",
ifelse(electoral == 3,
"Democratic elections", NA)
)
)
)
) %>%
# Creating a character variable for whether there are elections from the original numeric
mutate(election_type2 = ifelse(election_type == "No elections",
"No elections", "Elections")) %>%
# Creating a character variable for voting system from the original numeric
mutate(voting_system = ifelse(`proportional voting` == 1,
"Proportional", "Majoritarian")
)
# Creating dataset to check for regime length
reg_length <- read_excel(
"data/Autocracy Dataset/GWF Autocratic Regimes 1.2/GWF All Political Regimes Case List.xls",
sheet = 1)
# Removing uneeded columns
reg_length <- reg_length[,1:5] %>%
# Correcting disparities in nomenclature
mutate(Country = ifelse(Country == "Lietchtenstein", "Liechtenstein", Country)) %>%
mutate(iso3c = countryname(Country, destination = "iso3c", warn = F)) %>%
mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
mutate(iso3c = ifelse(Country == "Cen African Rep", "CAF", iso3c)) %>%
# Removing empty rows
filter(!is.na(Country)) %>%
# Establishing 2014 as ending year
mutate(`end yr` = ifelse(is.na(`end yr`), 2014, `end yr`)) %>%
# Calculating total length
mutate(tot_length = `end yr` - `beg yr`)
# Creating the final regime length dataset by running a loop that creates a
# country and year observation for each regime
reg_length_final <- data.frame()
for(i in 1:nrow(reg_length)) {
year_seq <- seq(from=reg_length$`beg yr`[i], to=reg_length$`end yr`[i])
temp_df <- data.frame(
Country = reg_length$Country[i],
iso3c = reg_length$iso3c[i],
year = year_seq,
tot_length = reg_length$tot_length[i]
) %>%
# creating regime length up to the date of observation
mutate(cur_length = year - reg_length$`beg yr`[i])
reg_length_final <- rbind(reg_length_final, temp_df)
}
# Creating identifying variable for the regime length dataset
reg_length_final <- reg_length_final %>%
mutate(iso_year = paste(iso3c, year, sep = "_"))
# Joining the regime length dataset with the priming_selection
priming_selection <- priming_selection %>%
left_join(
reg_length_final %>%
mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
select(iso_year, tot_length, cur_length),
by = "iso_year"
)
# In the following section the countries are checked for their regime characteristics
# Two tables are calculated: one with absolute numbers and one with percentages
# for both democracies and autocracies
## Autocracies ##
### Dictatorship type: ###
table(priming_repr$regime_type[priming_repr$prim_dem==0])
round(prop.table(
table(priming_repr$regime_type[priming_repr$prim_dem==0])
)*100, digits = 2)
### Electoral Systems: ###
# Whether there are election or not
table(priming_repr$election_type2[priming_repr$prim_dem==0])
round(prop.table(
table(priming_repr$election_type2[priming_repr$prim_dem==0])
)*100, digits = 2)
# Type of election system
table(priming_repr$election_type[priming_repr$prim_dem==0])
round(prop.table(
table(priming_repr$election_type[priming_repr$prim_dem==0])
)*100, digits = 2)
## Democracies:
###Democracy type:
table(priming_repr$regime_type[priming_repr$prim_dem==1])
round(prop.table(
table(priming_repr$regime_type[priming_repr$prim_dem==1])
)*100, digits = 2)
###Amount of chambers:
table(priming_repr$`No. of chambers in parliament`[priming_repr$prim_dem==1])
round(prop.table(
table(priming_repr$`No. of chambers in parliament`[priming_repr$prim_dem==1])
)*100, digits = 2)
###Voting System:
table(priming_repr$voting_system[priming_repr$prim_dem==1])
round(prop.table(
table(priming_repr$voting_system[priming_repr$prim_dem==1])
)*100, digits = 2)
##Length for Autocracies and Democracies:
# The length is checked by running a summary function for democracies and autocracies
# separately
###Total length:
tapply(priming_selection$tot_length,priming_selection$prim_dem, summary)
###Current length:
tapply(priming_selection$cur_length,priming_selection$prim_dem, summary)
#### Section 5.1: Aggregation comparison with EDI ####
# List of mid-level indices and their constituting low-level indices
polyarchy_list <- list(
v2x_frassoc_thick = c("v2psparban_ord", "v2psbars_ord", "v2psoppaut_ord",
"v2elmulpar_ord", "v2cseeorgs","v2csreprss"),
v2xel_frefair = c("v2elembaut", "v2elembcap", "v2elrgstry_ord", "v2elvotbuy_ord",
"v2elirreg_ord", "v2elintim_ord", "v2elpeace_ord",
"v2elfrfair_ord"),
v2x_freexp_altinf = c("v2mecenefm", "v2meharjrn", "v2mebias", "v2meslfcen",
"v2mecrit", "v2merange", "v2cldiscm", "v2cldiscw",
"v2clacfree")
)
# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year",
"iso_year")
# Variables used for the SVM regression
svm_data_vars <- c("v2x_polyarchy",
"v2psparban_ord", "v2psbars_ord", "v2psoppaut_ord",
"v2elmulpar_ord", "v2cseeorgs","v2csreprss",
"v2x_suffr",
"v2elembaut", "v2elembcap", "v2elrgstry_ord", "v2elvotbuy_ord",
"v2elirreg_ord", "v2elintim_ord", "v2elpeace_ord",
"v2elfrfair_ord",
"v2x_elecoff",
"v2mecenefm", "v2meharjrn", "v2mebias", "v2meslfcen",
"v2mecrit", "v2merange", "v2cldiscm", "v2cldiscw",
"v2clacfree"
)
# Support variables needed for adaptations to V-Dem's coding rules
support_vars <- c("v2x_elecreg")
# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)
# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
# Creating iso3c identifying variable
mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
# Correcting disparities in nomenclature
mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
# Filtering for the start year and removing observations without an EDI score
filter(year >= start_year, !is.na(v2x_polyarchy)) %>%
# Creating identifying variable for each observation
mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
# Selecting the needed variables
select(all_of(var_SVM)) %>%
# Relocating certain variables for better visualization
relocate(c("iso3c", "iso_year"), .after = country_id) %>%
# Grouping by country name so that operations aren't conducted cross country
group_by(country_name) %>%
# Filling missing electoral variables as mentioned by V-Dem's coding rules
mutate(v2elmulpar_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elmulpar_ord, na.rm = F), 0)) %>%
mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elintim_ord, na.rm = F), 0)) %>%
mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
# Filling up electoral variables that did not have an election in the first year
# of the dataset
fill(c("v2elmulpar_ord","v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord",
"v2elintim_ord", "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
# Filling missing party variables as mentioned by V-Dem's coding rules
mutate(v2psoppaut_ord = ifelse(v2psbars_ord <= 1 & is.na(v2psoppaut_ord), 0,
v2psoppaut_ord)) %>%
mutate(v2elembcap = ifelse(v2x_elecreg == 0 & is.na(v2elembcap), 0,
v2elembcap)) %>%
mutate(v2elembaut = ifelse(v2x_elecreg == 0 & is.na(v2elembaut), 0,
v2elembaut)) %>%
# Interpolating missing data
mutate(v2mebias = na.spline(v2mebias)) %>%
mutate(v2merange = na.spline(v2merange)) %>%
mutate(v2meharjrn = na.spline(v2meharjrn)) %>%
mutate(v2mecenefm = na.spline(v2mecenefm)) %>%
mutate(v2elembcap = na.spline(v2elembcap)) %>%
mutate(v2mecrit = na.spline(v2mecrit)) %>%
mutate(v2x_suffr = na.spline(v2x_suffr)) %>%
# Ungrouping before SVM regression
ungroup() %>%
# Removing still missing data
drop_na()
# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
svm_data_vars = svm_data_vars, priming_selec = priming_selection,
save_path = "result/result_SVM_polyarchy3.RDS")
# Reading saved result
result_EDI <- readRDS("result/result_SVM_polyarchy3.RDS")$Output
# Creating dataset with the delta between indices and input data
df_output_polyarchy <- result_EDI[,1:8] %>%
mutate(diff = med - v2x_polyarchy) %>%
left_join(svm_df)
# OLS regression of all of the data
lm1 <- lm(v2x_polyarchy ~ . , df_output_polyarchy[,c(7,11:35)])
summary(lm1)
lm2 <- lm(med ~ . , df_output_polyarchy[,c(8,11:35)])
summary(lm2)
# OLS regression of the data with a delta larger than 0.01
lm1 <- lm(v2x_polyarchy ~ . , df_output_polyarchy[df_output_polyarchy$diff > 0.01 | df_output_polyarchy$diff < -0.01,c(7,11:35)])
summary(lm1)
lm2 <- lm(med ~ . , df_output_polyarchy[df_output_polyarchy$diff > 0.01 | df_output_polyarchy$diff < -0.01,c(8,11:35)])
summary(lm2)
# Creating OLS regression table with stargazer in HTML format
stargazer(lm1,lm2, summary = T, dep.var.labels = c("EDI", "SVM Ind."), type = "html", notes.label = "Signif. codes:")
# Creating a dataset with the results of the SVM regression, its delta from
# the EDI, and the input variables
graph_EDI <- result_EDI[,1:8] %>%
mutate(diff = med - v2x_polyarchy) %>%
left_join(svm_df)
# Scaling the data for graphical representation later on
graph_EDI_scaled <- graph_EDI
graph_EDI_scaled[,11:35] <- scale(graph_EDI[,11:35], center = F, scale = T)
# Keeping variables that will not be averaged into mid-level indices
graph_EDI_mid_level <- graph_EDI_scaled[,c("country_name","iso3c","year",
"iso_year","v2x_polyarchy","med",
"diff","v2x_suffr","v2x_elecoff")]
# This loop creates mid-level indices out of the scaled low-level indices
# (only for graphical representation)
for(i in 1:length(polyarchy_list)){
temp <- data.frame(rowMeans(graph_EDI_scaled[,polyarchy_list[[i]]]))
colnames(temp) <- names(polyarchy_list)[i]
graph_EDI_mid_level <- cbind(graph_EDI_mid_level,temp)
}
# Figures #
## Figure 1 ##
agg_plot_1 <- ggplot(graph_EDI, aes(x = med, y = v2x_polyarchy, color = diff)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "green") +
scale_color_gradient(low="blue", high="red") +
labs(y = "EDI", x = "SVM Index") +
guides(color = guide_legend(title = "Delta"))
agg_plot_1
# Saving plot as a png
png("img/figure1.png", res = 120, width = 700, height = 700)
agg_plot_1
dev.off()
## Figure 2 ##
agg_plot_2 <- graph_EDI %>%
filter(iso3c == "DEU") %>%
select(iso3c, year, v2x_polyarchy, med) %>%
rename(EDI = v2x_polyarchy, `SVM Ind` = med) %>%
pivot_longer(cols = c("EDI","SVM Ind")) %>%
as.data.frame(.) %>%
ggplot(aes(x = year,y = value, color = name)) +
#ggplot(aes(x = year,y = value, linetype = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Score", x = "Year") +
guides(color = guide_legend(title = "Index")) +
scale_color_manual(values=c("#990000", "#000066"))
agg_plot_2
png("img/figure2.png", res = 120, width = 700, height = 700)
agg_plot_2
dev.off()
## Figure 3 ##
agg_plot_3 <- graph_EDI %>%
filter(iso3c == "RUS") %>%
select(iso3c, year, v2x_polyarchy, med) %>%
rename(EDI = v2x_polyarchy, `SVM Ind` = med) %>%
pivot_longer(cols = c("EDI","SVM Ind")) %>%
as.data.frame(.) %>%
ggplot(aes(x = year,y = value, color = name)) +
#ggplot(aes(x = year,y = value, linetype = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Score", x = "Year") +
guides(color = guide_legend(title = "Index")) +
scale_color_manual(values=c("#990000", "#000066"))
agg_plot_3
png("img/figure3.png", res = 120, width = 700, height = 700)
agg_plot_3
dev.off()
## Figure 4 ##
agg_plot_4 <- graph_EDI_mid_level %>%
filter(iso3c == "RUS") %>%
select(c("year","v2x_suffr","v2x_elecoff","v2x_frassoc_thick","v2xel_frefair",
"v2x_freexp_altinf")) %>%
pivot_longer(cols = c("v2x_suffr","v2x_elecoff","v2x_frassoc_thick",
"v2xel_frefair","v2x_freexp_altinf")) %>%
as.data.frame(.) %>%
ggplot(aes(x = year,y = value, color = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Value", x = "Year") +
guides(color = guide_legend(title = "Variable"))
agg_plot_4
png("img/figure4.png", res = 120, width = 700, height = 700)
agg_plot_4
dev.off()
## Figure 5 ##
agg_plot_5 <- graph_EDI %>%
filter(iso3c == "COL") %>%
select(iso3c, year, v2x_polyarchy, med) %>%
rename(EDI = v2x_polyarchy, `SVM Ind` = med) %>%
pivot_longer(cols = c("EDI","SVM Ind")) %>%
as.data.frame(.) %>%
ggplot(aes(x = year,y = value, color = name)) +
#ggplot(aes(x = year,y = value, linetype = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Score", x = "Year") +
guides(color = guide_legend(title = "Index")) +
scale_color_manual(values=c("#990000", "#000066"))
agg_plot_5
png("img/figure5.png", res = 120, width = 700, height = 700)
agg_plot_5
dev.off()
## Figure 6 ##
agg_plot_6 <- graph_EDI_mid_level %>%
filter(iso3c == "COL") %>%
select(c("year","v2x_suffr","v2x_elecoff","v2x_frassoc_thick","v2xel_frefair",
"v2x_freexp_altinf")) %>%
pivot_longer(cols = c("v2x_suffr","v2x_elecoff","v2x_frassoc_thick","v2xel_frefair",
"v2x_freexp_altinf")) %>%
as.data.frame(.) %>%
ggplot(aes(x = year,y = value, color = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Value", x = "Year") +
guides(color = guide_legend(title = "Variable"))
agg_plot_6
png("img/figure6.png", res = 120, width = 700, height = 700)
agg_plot_6
dev.off()
#### Section 5.2 SVM Indices ####
# Loading the variable list as they relate to Embedded Democracy
svm_data_table <- read_excel("data/SVM_variables.xlsx")
# Internally embedded #
# Variables used for the SVM regression
svm_data_vars <- c(svm_data_table$Code[svm_data_table$Embeddedness == "Internal"],
"v2x_polyarchy")
# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year",
"iso_year")
# Variables used for the SVM regression
support_vars <- c("v2x_elecreg", "v2exhoshog", "v2lgbicam")
# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)
# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
# Creating iso3c identifying variable
mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
# Correcting disparities in nomenclature
mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
# Removing empty variables related to the DDR before its formal founding in 1949
filter(!(iso3c == "DDR") | year > 1948) %>%
# Filtering for the start year
filter(year >= start_year) %>%
# Creating identifying variable for each observation
mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
# Selecting the needed variables
select(all_of(var_SVM)) %>%
# Relocating certain variables for better visualization
relocate(c("iso3c", "iso_year"), .after = country_id) %>%
# Grouping by country name so that operations aren't conducted cross country
group_by(country_name) %>%
# Filling missing electoral variables as mentioned by V-Dem's coding rules
mutate(v2elmulpar_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elmulpar_ord, na.rm = F), 0)) %>%
mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elintim_ord, na.rm = F), 0)) %>%
mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
# If HoS and HoG are the same, both have the same value
mutate(v2exctlhg_0 = ifelse(v2exhoshog == 1, v2exctlhs_0, v2exctlhg_0)) %>%
# Filling up electoral variables that did not have an election in the first year
# of the dataset
fill(c("v2elmulpar_ord","v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord",
"v2elintim_ord", "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
# Filling missing party and legislature variables as mentioned by V-Dem's coding rules
mutate(v2psoppaut_ord = ifelse(v2psbars_ord <= 1 & is.na(v2psoppaut_ord), 0,
v2psoppaut_ord)) %>%
mutate(v2lgqstexp_ord = ifelse(v2lgbicam == 0,0,v2lgqstexp_ord)) %>%
mutate(v2lginvstp_ord = ifelse(v2lgbicam == 0,0,v2lginvstp_ord)) %>%
# Interpolating missing data
mutate(v2lgbicam = na.approx(v2lgbicam, na.rm = F)) %>%
mutate(v2x_elecoff = na.approx(v2x_elecoff, na.rm = F)) %>%
mutate(v2x_suffr = na.spline(v2x_suffr, na.rm = F)) %>%
mutate(v2elrstrct = na.approx(v2elrstrct, na.rm = F)) %>%
mutate(v2elembcap = na.spline(v2elembcap, na.rm = F)) %>%
mutate(v2elembaut = na.spline(v2elembaut, na.rm = F)) %>%
mutate(v2meharjrn = na.spline(v2meharjrn)) %>%
mutate(v2mecenefm = na.spline(v2mecenefm)) %>%
mutate(v2meslfcen = na.spline(v2meslfcen, na.rm = F)) %>%
mutate(v2exrescon = na.spline(v2exrescon, na.rm = F)) %>%
mutate(v2jucomp = na.spline(v2jucomp, na.rm = F)) %>%
mutate(v2juhccomp = na.spline(v2juhccomp, na.rm = F)) %>%
mutate(v2juhcind = na.spline(v2juhcind, na.rm = F)) %>%
mutate(v2juncind = na.spline(v2juncind, na.rm = F)) %>%
mutate(v2lgqstexp_ord = na.approx(v2lgqstexp_ord, na.rm = F)) %>%
mutate(v2lginvstp_ord = na.approx(v2lginvstp_ord, na.rm = F)) %>%
# Ungrouping before SVM regression
ungroup() %>%
# Removing still missing data
drop_na()
# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
svm_data_vars = svm_data_vars, priming_selec = priming_selection,
save_path = "result/result_SVM_intern_emb4.RDS")
# Electoral Regime #
# Variables used for the SVM regression
svm_data_vars <- c(svm_data_table$Code[svm_data_table$Regime == "Electoral"],
"v2x_polyarchy")
# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year","iso_year")
# Variables used for the SVM regression
support_vars <- c("v2x_elecreg", "v2exhoshog", "v2lgbicam")
# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)
# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
# Creating iso3c identifying variable
mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
# Correcting disparities in nomenclature
mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
# Filtering for the start year
filter(year >= start_year) %>%
# Creating identifying variable for each observation
mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
# Selecting the needed variables
select(all_of(var_SVM)) %>%
# Relocating certain variables for better visualization
relocate(c("iso3c", "iso_year"), .after = country_id) %>%
# Grouping by country name so that operations aren't conducted cross country
group_by(country_name) %>%
# Filling missing electoral variables as mentioned by V-Dem's coding rules
mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elintim_ord, na.rm = F), 0)) %>%
mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
# Filling up electoral variables that did not have an election in the first year
# of the dataset
fill(c("v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord", "v2elintim_ord",
"v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
# Interpolating missing data
mutate(v2x_elecoff = na.approx(v2x_elecoff, na.rm = F)) %>%
mutate(v2x_suffr = na.spline(v2x_suffr, na.rm = F)) %>%
mutate(v2elrstrct = na.approx(v2elrstrct, na.rm = F)) %>%
mutate(v2elembcap = na.spline(v2elembcap, na.rm = F)) %>%
mutate(v2elembaut = na.spline(v2elembaut, na.rm = F)) %>%
# Ungrouping before SVM regression
ungroup() %>%
# Removing still missing data
drop_na()
# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
svm_data_vars = svm_data_vars, priming_selec = priming_selection,
save_path = "result/result_SVM_elec_reg4.RDS"
)
# Fully Embedded #
# Variables used for the SVM regression
svm_data_vars <- c(svm_data_table$Code,"v2x_polyarchy")
# Identifying variables
id_vars <- c("country_name","country_text_id","country_id","iso3c","year",
"iso_year")
# Variables used for the SVM regression
support_vars <- c("v2x_elecreg", "v2exhoshog", "v2lgbicam")
# Complete variable list
var_SVM <- c(id_vars,support_vars, svm_data_vars)
# Creating the dataset to be used in the SVM regression from the original V-Dem
# dataset
svm_df <- vDem %>%
# Creating iso3c identifying variable
mutate(iso3c = countryname(country_name, destination = "iso3c")) %>%
# Correcting disparities in nomenclature
mutate(iso3c = ifelse(iso3c == "Yugoslavia", "SRB", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Czechoslovakia", "CZE", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Republic of Vietnam", "VDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Yemen People's Republic", "YMD", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "German Democratic Republic", "DDR", iso3c)) %>%
mutate(iso3c = ifelse(iso3c == "Zanzibar", "EAZ", iso3c)) %>%
# Removing empty variables related to the DDR before its formal founding in 1949
filter(!(iso3c == "DDR") | year > 1948) %>%
# Filtering for the start year
filter(year >= start_year) %>%
# Creating identifying variable for each observation
mutate(iso_year = paste(iso3c, year, sep = "_")) %>%
# Selecting the needed variables
select(all_of(var_SVM)) %>%
# Relocating certain variables for better visualization
relocate(c("iso3c", "iso_year"), .after = country_id) %>%
# Grouping by country name so that operations aren't conducted cross country
group_by(country_name) %>%
# Filling missing electoral variables as mentioned by V-Dem's coding rules
mutate(v2elmulpar_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elmulpar_ord, na.rm = F), 0)) %>%
mutate(v2elrgstry_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elrgstry_ord, na.rm = F), 0)) %>%
mutate(v2elvotbuy_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elvotbuy_ord, na.rm = F), 0)) %>%
mutate(v2elirreg_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elirreg_ord, na.rm = F), 0)) %>%
mutate(v2elintim_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elintim_ord, na.rm = F), 0)) %>%
mutate(v2elpeace_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elpeace_ord, na.rm = F), 0)) %>%
mutate(v2elfrfair_ord = ifelse(v2x_elecreg == 1,
na.locf(v2elfrfair_ord, na.rm = F), 0)) %>%
# If HoS and HoG are the same, both have the same value
mutate(v2exctlhg_0 = ifelse(v2exhoshog == 1, v2exctlhs_0, v2exctlhg_0)) %>%
# Filling up electoral variables that did not have an election in the first year
# of the dataset
fill(c("v2elmulpar_ord","v2elrgstry_ord", "v2elvotbuy_ord", "v2elirreg_ord",
"v2elintim_ord", "v2elpeace_ord", "v2elfrfair_ord"), .direction = "up") %>%
# Filling missing party and legislature variables as mentioned by V-Dem's coding rules
mutate(v2psoppaut_ord = ifelse(v2psbars_ord <= 1 & is.na(v2psoppaut_ord), 0,
v2psoppaut_ord)) %>%
mutate(v2lgqstexp_ord = ifelse(v2lgbicam == 0,0,v2lgqstexp_ord)) %>%
mutate(v2lginvstp_ord = ifelse(v2lgbicam == 0,0,v2lginvstp_ord)) %>%
# Interpolating missing data
mutate(v2meharjrn = na.spline(v2meharjrn)) %>%
mutate(v2mecenefm = na.spline(v2mecenefm)) %>%
mutate(e_peedgini = na.spline(e_peedgini, na.rm = F)) %>%
mutate(e_migdppc = na.spline(e_migdppc, na.rm = F)) %>%
mutate(e_pelifeex = na.spline(e_pelifeex, na.rm = F)) %>%
mutate(v2csprtcpt = na.spline(v2csprtcpt, na.rm = F)) %>%
mutate(v2elembcap = na.spline(v2elembcap, na.rm = F)) %>%
mutate(v2elembaut = na.spline(v2elembaut, na.rm = F)) %>%
mutate(v2elrstrct = na.approx(v2elrstrct, na.rm = F)) %>%
mutate(v2x_elecoff = na.approx(v2x_elecoff, na.rm = F)) %>%
mutate(v2lgbicam = na.approx(v2lgbicam, na.rm = F)) %>%
mutate(v2x_suffr = na.spline(v2x_suffr, na.rm = F)) %>%
mutate(v2meslfcen = na.spline(v2meslfcen, na.rm = F)) %>%
mutate(v2exrescon = na.spline(v2exrescon, na.rm = F)) %>%
mutate(v2jucomp = na.spline(v2jucomp, na.rm = F)) %>%
mutate(v2juhccomp = na.spline(v2juhccomp, na.rm = F)) %>%
mutate(v2juhcind = na.spline(v2juhcind, na.rm = F)) %>%
mutate(v2juncind = na.spline(v2juncind, na.rm = F)) %>%
mutate(v2lgqstexp_ord = na.approx(v2lgqstexp_ord, na.rm = F)) %>%
mutate(v2lginvstp_ord = na.approx(v2lginvstp_ord, na.rm = F)) %>%
mutate(v2pscnslnl = na.spline(v2pscnslnl, na.rm = F)) %>%
# Ungrouping before SVM regression
ungroup() %>%
# Removing still missing data
drop_na()
# Running SVM function created for the purpose of this contribution
SVM_index(svm_dataframe = svm_df, id_vars = id_vars, support_vars = support_vars,
svm_data_vars = svm_data_vars, priming_selec = priming_selection,
save_path = "result/result_SVM_extern_emb4.RDS")
# Comparison
# Reading individual SVM dataset created in this section
result_int_emb <- readRDS("result/result_SVM_intern_emb4.RDS")$Output
result_elec_reg <- readRDS("result/result_SVM_elec_reg4.RDS")$Output
result_full_emb <- readRDS("result/result_SVM_extern_emb4.RDS")$Output
# Reading SVMDI
SVMDI <- read_excel("data/Gruendler/ML indices (1).xlsx") %>%
mutate(iso_year = paste(ISO, Year, sep = "_"))
# Joining all datasets
final_df <- result_elec_reg[,c(id_vars, "med")] %>%
rename(`Elec. Reg.` = med) %>%
full_join(
result_int_emb[,c(id_vars, "med")] %>%
rename(`Intern. Emb.` = med),
by = id_vars
) %>%
full_join(
result_full_emb[,c(id_vars, "med")] %>%
rename(`Fully Emb.` = med),
by = id_vars
) %>%
full_join(
SVMDI[,c("iso_year","C_MLDI")] %>%
rename(SVMDI = C_MLDI),
by = "iso_year"
) %>%
pivot_longer(cols = c("Elec. Reg.","Intern. Emb.","Fully Emb.","SVMDI"))
# Figures
## Figure 7
agg_plot_7 <- final_df %>%
filter(iso3c == "DEU") %>%
ggplot(aes(x = year, y = value, color = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Score", x = "Year") +
guides(color = guide_legend(title = "Index"))
agg_plot_7
png("img/figure7.png", res = 120, width = 700, height = 700)
agg_plot_7
dev.off()
## Figure 8
agg_plot_8 <- final_df %>%
filter(iso3c == "HUN" & year > 2000) %>%
ggplot(aes(x = year, y = value, color = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Score", x = "Year") +
guides(color = guide_legend(title = "Index"))
agg_plot_8
png("img/figure8.png", res = 120, width = 700, height = 700)
agg_plot_8
dev.off()
## Figure 9
agg_plot_9 <- final_df %>%
filter(iso3c == "POL" & year > 2000) %>%
ggplot(aes(x = year, y = value, color = name)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(y = "Score", x = "Year") +
guides(color = guide_legend(title = "Index"))
agg_plot_9
png("img/figure9.png", res = 120, width = 700, height = 700)
agg_plot_9
dev.off()