
library("tidyverse")
library(readxl)
library(foreign)
library(lubridate)

setwd() #set your working directory


# #################################################################################
#     READ ME
# #################################################################################
#   
#   Short description:
#   
#   
# With the help of this R-File it is possible to convert the data structure of the CPDS 
# from country-year to country-government structure. Please consider that the conversion only 
# begins with the first cabinet for which a date of investiture is available. 
# 
# In the following R-file the variable "unemp" was used as an example. 
# You can add the variables of interest in section "MERGE WITH STANDARD CPDS DATASET", as described.
# 
# 
#   You will need the following files:
#   
#     - Comparative Political Data Set (in Stata form)
#     - Government Composition Data (as Excel file)
# 
# These can be found on the website of the CPDS
# http://www.cpds-data.org/index.php/data
# 
# 
#   Please cite as: 
#  
# Klaus Armingeon, Sarah Engler and Lucas Leemann. 2022. 
# Comparative Political Data Set 1960-2020. 
# Zurich: Department of Political Science, University of Zurich.
# 
# Klaus Armingeon, Sarah Engler and Lucas Leemann. 2022. 
# Supplement to the Comparative Political Data Set - Government Composition 1960-2020. 
# Zurich: Department of Political Science, University of Zurich.
# 
#   Corresponding address: 
#
# Angela Odermatt  
# angelacarmen.odermatt@uzh.ch
# 
#   Authors: 
#   
# Klaus Armingeon, Sarah Engler, Lucas Leemann



# #################################################################################
# Function TO HELP COLAPSE GOVERNMENT STRUCTURE
# #################################################################################

help_collapse_government_structure <- function(df){
  
  cn <- df
  
  colnames(cn) <- c("year", "date", "gov", "empty", "days", "termination", "rightmin", "centmin", "leftmin", "totalmin", 
                    "govright1", "govcent1", "govleft1", "govtot1", "govright3", "govcent3", "govleft3", "govsup", 
                    "govtype", "elect", "investiture", "govparty1", "govfam1", "govideo1", "govseats1", "govparty2",
                    "govfam2", "govideo2", "govseats2", "govparty3", "govfam3", "govideo3", "govseats3", "govparty4",
                    "govfam4", "govideo4", "govseats4", "govparty5", "govfam5", "govideo5", "govseats5",
                    "govparty6", "govfam6", "govideo6", "govseats6", "govparty7", "govfam7", "govideo7", "govseats7",
                    "govparty8", "govfam8", "govideo8", "govseats8")
  
  # deal with reshuffles
  cn$investiture[cn$termination == 0] <- NA
  
  # round year
  cn$year <- round(cn$year)
  
  # use mean of numeric variables, sum for days, value for factors per government
  collapse_means <- c( "totalmin", "govright1", "govcent1", "govleft1", "govtot1",
                       "govright3", "govcent3", "govleft3", "govsup", "govseats1",
                       "govseats2", "govseats3", "govseats4", "govseats5","govseats6",
                       "govseats7", "govseats8")
  sums <- names(select(cn, !c(collapse_means, gov, days, year, empty, date)))
  
  
  wmeans <- cn[, c( "gov", "days", collapse_means)]
  wmeans[, collapse_means] <- sapply(wmeans[, collapse_means], as.numeric)
  wmeans <- wmeans %>%
    mutate(weights = days)%>%
    gather(key = var, value = value, -gov, -weights) %>%
    mutate(type = ifelse(str_detect(var, "days"), "sum", "wmean")) %>%
    group_by(gov, var) %>%
    summarise(wmean = ifelse(type[1] == "sum", sum(value), weighted.mean(value, weights)), .groups = "keep") %>%
    ungroup() %>%
    spread(key = var, value = wmean)
  
  needed <- cn[!is.na(cn[, "investiture"]),]
  needed <- needed[, c("gov", sums)]
  
  cn <- merge(wmeans, needed, by = "gov")
  
  cn <- cn %>%
    mutate_at(vars(starts_with(c("govparty", "govfam", "govideo"))), ~ replace(., is.na(.), "")) %>%
    arrange(investiture) %>%
    filter(substr(investiture, 1,4) >= 1960, is.na(investiture) == FALSE)
  
}


# #################################################################################
# Import CPDS Government Dataset (Excel-Sheets) to R
# #################################################################################

#read excel sheetwise
n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                      sheet = "Australia",
                      range = c("A5:BA136"))

n_country <- help_collapse_government_structure(n_country)

n_country <- n_country %>%
  mutate(country = "Australia") %>%
  mutate(countryn = 1) %>%
  arrange(country, countryn)

government_composition_data <- n_country


n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Austria",
                        range = c("A5:BA131"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Austria") %>%
  mutate(countryn = 2) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)


n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Belgium",
                        range = c("A5:BA138"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Belgium") %>%
  mutate(countryn = 3) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Bulgaria",
                        range = c("A5:BA77"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Bulgaria") %>%
  mutate(countryn = 4) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Canada",
                        range = c("A5:BA131"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Canada") %>%
  mutate(countryn = 5) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Croatia",
                        range = c("A5:BA54"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Croatia") %>%
  mutate(countryn = 6) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Cyprus",
                        range = c("A5:BA104"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Cyprus") %>%
  mutate(countryn = 7) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Czech Republic",
                        range = c("A5:BA74"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Czech Republic") %>%
  mutate(countryn = 8) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Denmark",
                        range = c("A5:BA131"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Denmark") %>%
  mutate(countryn = 9) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Estonia",
                        range = c("A5:BA66"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Estonia") %>%
  mutate(countryn = 10) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Finland",
                        range = c("A5:BA142"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Finland") %>%
  mutate(countryn = 11) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "France",
                        range = c("A5:BA146"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "France") %>%
  mutate(countryn = 12) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Germany",
                        range = c("A5:BA134"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Germany") %>%
  mutate(countryn = 13) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Greece",
                        range = c("A5:BA147"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Greece") %>%
  mutate(countryn = 14) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Hungary",
                        range = c("A5:BA71"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Hungary") %>%
  mutate(countryn = 15) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Iceland",
                        range = c("A5:BA131"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Iceland") %>%
  mutate(countryn = 16) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Ireland",
                        range = c("A5:BA134"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Ireland") %>%
  mutate(countryn = 17) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Italy",
                        range = c("A5:BA153"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Italy") %>%
  mutate(countryn = 18) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Japan",
                        range = c("A5:BA145"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Japan") %>%
  mutate(countryn = 19) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Latvia",
                        range = c("A5:BA69"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Latvia") %>%
  mutate(countryn = 20) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Lithuania",
                        range = c("A5:BA69"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Lithuania") %>%
  mutate(countryn = 21) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Luxembourg",
                        range = c("A5:BA129"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Luxembourg") %>%
  mutate(countryn = 22) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Malta",
                        range = c("A5:BA116"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Malta") %>%
  mutate(countryn = 23) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Netherlands",
                        range = c("A5:BA137"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Netherlands") %>%
  mutate(countryn = 24) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "New Zealand",
                        range = c("A5:BA135"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "New Zealand") %>%
  mutate(countryn = 25) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Norway",
                        range = c("A5:BA131"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Norway") %>%
  mutate(countryn = 26) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Poland",
                        range = c("A5:BA104"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Poland") %>%
  mutate(countryn = 27) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Portugal",
                        range = c("A5:BA101"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Portugal") %>%
  mutate(countryn = 28) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Romania",
                        range = c("A5:BA97"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Romania") %>%
  mutate(countryn = 29) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Slovakia",
                        range = c("A5:BA75"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Slovakia") %>%
  mutate(countryn = 30) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Slovenia",
                        range = c("A5:BA71"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Slovenia") %>%
  mutate(countryn = 31) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Spain",
                        range = c("A5:BA94"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Spain") %>%
  mutate(countryn = 32) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Sweden",
                        range = c("A5:BA129"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Sweden") %>%
  mutate(countryn =33) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "Switzerland",
                        range = c("A5:BA129"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "Switzerland") %>%
  mutate(countryn = 34) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "United Kingdom",
                        range = c("A5:BA133"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "United Kingdom") %>%
  mutate(countryn = 35) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)

n_country <- read_excel("Government_Composition_1960-2020_Update_2022.xlsx",
                        sheet = "United States",
                        range = c("A5:BA143"))

n_country <- help_collapse_government_structure(n_country)
n_country <- n_country %>%
  mutate(country = "United States") %>%
  mutate(countryn = 36) %>%
  arrange(country, countryn)

government_composition_data <- rbind(government_composition_data, n_country)


government_composition_data <- arrange(government_composition_data, country, countryn, investiture)


write.csv(government_composition_data, "Government_Composition.csv")

rm(n_country)


#############################################################################################################################################################################################
# Calculate Years of Influence
#############################################################################################################################################################################################

# Attributes the governments to years in which it effectively had influence

# Government which was in office for the longest duration during the first 8 months of the year
# This ASSUMPTION can be changed by adapting the number of months in m_influence

m_influence <- 8
d_influence <- m_influence * 30

# Generate and format government variables and generate year variables
government_composition_data <- government_composition_data %>%
  group_by(country) %>%
  mutate(govnr = 1:n()) %>%
  mutate(enddate = investiture[govnr+1] - days(1)) %>%
  mutate(minyear = year(investiture)) %>%
  mutate(maxyear = year(enddate))

government_composition_data <- arrange(government_composition_data, countryn, govnr)

# Generate years of influence of a government
government_composition_data["yearinfluence1"] <- NA
government_composition_data["yearinfluence2"] <- NA
government_composition_data["yearinfluence3"] <- NA
government_composition_data["yearinfluence4"] <- NA
government_composition_data["yearinfluence5"] <- NA
government_composition_data["yearinfluence6"] <- NA
government_composition_data["yearinfluence7"] <- NA
government_composition_data["yearinfluence8"] <- NA

countries <- unique(government_composition_data$country)


for (c in 1:length(countries)) {

  c_group <- filter(government_composition_data, country == countries[c]) %>%
    select(c("country", "gov","govnr","investiture", "enddate", "minyear", "maxyear", starts_with("yearinfluence"))) %>%
    arrange(govnr) %>%
    mutate(maxyear = ifelse(is.na(maxyear == TRUE), 2020, maxyear))

  years <- min(c_group$minyear):2020

  gov_years <- matrix(ncol = length(years), nrow = nrow(c_group))
  colnames(gov_years) <- years
  rownames(gov_years) <- c_group$gov

  
# for years in-between government changes
  for (g in 1:nrow(c_group)) {
    if(c_group[g, "maxyear"] - c_group[g, "minyear"] >= 2){
      between_years <- head(tail(as.numeric(c_group[g, "minyear"]):as.numeric(c_group[g, "maxyear"]), -1), -1)
      gov_years[g, as.character(between_years)] <- 1
    }
  }


# for years in which governments change
  time_govs <- vector(mode = "list", length = length(years))
  names(time_govs) <- years
  
  for(i in 1:length(years)){
    max <- which(c_group$maxyear == years[i])
    min <- which(c_group$minyear == years[i])
    time_govs[i] <- c_group[c(max, min), "gov"]
  }
  
  
  c_sums <- colSums(gov_years, na.rm = T)

  for (i in 1:ncol(gov_years)) {
    if(c_sums[i] == 0){
      y <- colnames(gov_years)[i]
      govs <- unique(unlist(time_govs[y]))
      durations <- vector(length = length(govs))
      
      durations[1] <- c_group[[which(c_group$gov == govs[1]), "enddate"]] - floor_date(c_group[[which(c_group$gov == govs[1]), "enddate"]], "year") + 1
      durations[1] <- ifelse(durations[1] > d_influence, d_influence, durations[1])
      
      if(i == 1 & year(floor_date(c_group[[which(c_group$gov == govs[1]), "investiture"]], "year")) == c_group[[which(c_group$gov == govs[1]), "minyear"]]){
        durations[1] <- c_group[[which(c_group$gov == govs[1]), "enddate"]] - floor_date(c_group[[which(c_group$gov == govs[1]), "investiture"]], "month") + 1
        durations[1] <- ifelse(durations[1] > d_influence, d_influence, durations[1])
      }
      
      durations[length(durations)] <- case_when(
        month(c_group[[which(c_group$gov == tail(govs, n=1)), "investiture"]]) <= m_influence ~ 
          as.numeric(ceiling_date(c_group[[which(c_group$gov == tail(govs, n=1)), "investiture"]], "year") - months(12-m_influence) - c_group[[which(c_group$gov == tail(govs, n=1)), "investiture"]]),
        month(c_group[[which(c_group$gov == tail(govs, n=1)), "investiture"]]) > m_influence ~ 0
      )
      
      between_govs <- head(tail(govs, -1), -1)
      
      if(length(between_govs) != 0){
        for (j in 1:length(between_govs)) {
          durations[j+1] <- case_when(
            month(c_group[[which(c_group$gov == between_govs[j]), "investiture"]]) > m_influence ~ 0,
            month(c_group[[which(c_group$gov == between_govs[j]), "investiture"]]) <= m_influence & month(c_group[[which(c_group$gov == between_govs[j]), "enddate"]]) <= m_influence ~ 
              as.numeric(c_group[[which(c_group$gov == between_govs[j]), "enddate"]] - c_group[[which(c_group$gov == between_govs[j]), "investiture"]]),
            month(c_group[[which(c_group$gov == between_govs[j]), "investiture"]]) <= m_influence & month(c_group[[which(c_group$gov == between_govs[j]), "enddate"]]) > m_influence ~ 
              as.numeric(ceiling_date(c_group[[which(c_group$gov == between_govs[j]), "investiture"]], "year") - months(12-m_influence) - c_group[[which(c_group$gov == between_govs[j]), "investiture"]])
            )
        }
      }
      gov_years[govs[which.max(durations)], y] <- 1
    }
  }
  
 
  for (g in 1:nrow(gov_years)) {
    infyears <- which(gov_years[g, ] == 1)
    infyears <- years[infyears]
    length(infyears) <- length(c_group[g, 8:15])
    c_group[g, 8:15] <- as.list(infyears)
  }

  government_composition_data[which(government_composition_data$country == countries[c]), 57:64] <- c_group[8:15]

}



rm(c, c_group, gov_years, time_govs, between_govs, between_years, c_sums, countries, durations, g, govs, i, infyears, j, max, min, y, years)

# Generate variable for number of influence years
government_composition_data$influenceyears <- 8 - rowSums(is.na(government_composition_data[, 57:64]))

# Drop if government has no influenceyear
government_composition_data <- government_composition_data %>%
  filter(is.na(yearinfluence1) == FALSE)

# Order Variables
government_composition_data <- arrange(government_composition_data, countryn, govnr)
 
government_composition_data <- government_composition_data[, c("country", "countryn", "govnr", "gov", "investiture", "enddate", "minyear", "maxyear", "days", "influenceyears",
                                "yearinfluence1", "yearinfluence2", "yearinfluence3", "yearinfluence4", "yearinfluence5", "yearinfluence6", "yearinfluence7", "yearinfluence8",
                                "totalmin", 
                                "govright1", "govcent1", "govleft1", "govtot1", "govright3", "govcent3", "govleft3", "govsup", 
                                "govtype", "elect", "govparty1", "govfam1", "govideo1", "govseats1", "govparty2",
                                "govfam2", "govideo2", "govseats2", "govparty3", "govfam3", "govideo3", "govseats3", "govparty4",
                                "govfam4", "govideo4", "govseats4", "govparty5", "govfam5", "govideo5", "govseats5",
                                "govparty6", "govfam6", "govideo6", "govseats6", "govparty7", "govfam7", "govideo7", "govseats7",
                                "govparty8", "govfam8", "govideo8", "govseats8")]


write.csv(government_composition_data, "Government_Composition2.csv")


#############################################################################################################################################################################################
# MERGE WITH STANDARD CPDS DATASET
#############################################################################################################################################################################################

government_composition_data$minyearinfluence <- apply(government_composition_data[,c(11:18)], 1, FUN = min, na.rm = TRUE)
government_composition_data$maxyearinfluence <- apply(government_composition_data[,c(11:18)], 1, FUN = max, na.rm = TRUE)
government_composition_data[sapply(government_composition_data, is.infinite)] <- NA

government_composition_data <- government_composition_data[rep(seq_len(nrow(government_composition_data)), each = 8), ]

government_composition_data <- government_composition_data %>%
  arrange(countryn, govnr) %>%
  group_by(gov) %>%
  mutate(govyearnr = 1:n()) %>%
  arrange(countryn, govnr, govyearnr, gov) %>%
  mutate(year = minyear + (govyearnr-1))

government_composition_data["country"][government_composition_data["country"] == "United States"] <- "USA"

cpds <- read.dta("CPDS_1960-2020_Update_2022.dta")
cpds <- select(cpds, country, year, unemp) ###### add here all variables of interest from the CPDS (or merge with further data of interest) ######

cpds_by_government <- merge(government_composition_data, cpds)
cpds_by_government <- filter(cpds_by_government, !(is.na(country)))

cpds_by_government <- cpds_by_government %>% 
  filter(year >= minyearinfluence | is.na(minyearinfluence) == TRUE) %>%
  filter(year <= maxyearinfluence | is.na(maxyearinfluence) == TRUE) %>%
  group_by(gov) %>%
  mutate(govyearinfnr = 1:n())


#########################################	Prepare variables of interest (unemp as example) #########################################
# Generate variables for each year of influence

cpds_by_government <- cpds_by_government %>%
  group_by(gov) %>%
  mutate(unemp_y1 = ifelse(is.na(yearinfluence1) == FALSE, unemp[govyearinfnr == 1], NA)) %>%
  mutate(unemp_y2 = ifelse(is.na(yearinfluence2) == FALSE, unemp[govyearinfnr == 2], NA)) %>%
  mutate(unemp_y3 = ifelse(is.na(yearinfluence3) == FALSE, unemp[govyearinfnr == 3], NA)) %>%
  mutate(unemp_y4 = ifelse(is.na(yearinfluence4) == FALSE, unemp[govyearinfnr == 4], NA)) %>%
  mutate(unemp_y5 = ifelse(is.na(yearinfluence5) == FALSE, unemp[govyearinfnr == 5], NA)) %>%
  mutate(unemp_y6 = ifelse(is.na(yearinfluence6) == FALSE, unemp[govyearinfnr == 6], NA)) %>%
  mutate(unemp_y7 = ifelse(is.na(yearinfluence7) == FALSE, unemp[govyearinfnr == 7], NA)) %>%
  mutate(unemp_y8 = ifelse(is.na(yearinfluence8) == FALSE, unemp[govyearinfnr == 8], NA)) %>%
  rowwise() %>% 
  mutate(unemp_avg = mean(c(unemp_y1, unemp_y2, unemp_y3, unemp_y4, unemp_y5, unemp_y6, unemp_y7, unemp_y8), na.rm = T))


# Drop Missings and duplicated governments
cpds_by_government <- cpds_by_government %>%
  group_by(country) %>%
  filter(!duplicated(govnr))
  

# Order and arrange dataset
cpds_by_government <- cpds_by_government[, c("country", "countryn", "govnr", "gov", "investiture", "enddate", "minyear", "maxyear", "days", "influenceyears",
                                                               "yearinfluence1", "yearinfluence2", "yearinfluence3", "yearinfluence4", "yearinfluence5", "yearinfluence6", "yearinfluence7", "yearinfluence8",
                                                               "unemp_y1", "unemp_y2", "unemp_y3", "unemp_y4", "unemp_y5", "unemp_y6", "unemp_y7", "unemp_y8", "unemp_avg",
                                                               "totalmin", "govright1", "govcent1", "govleft1", "govtot1", "govright3", "govcent3", "govleft3", "govsup", 
                                                               "govtype", "elect", "govparty1", "govfam1", "govideo1", "govseats1", "govparty2",
                                                               "govfam2", "govideo2", "govseats2", "govparty3", "govfam3", "govideo3", "govseats3", "govparty4",
                                                               "govfam4", "govideo4", "govseats4", "govparty5", "govfam5", "govideo5", "govseats5",
                                                               "govparty6", "govfam6", "govideo6", "govseats6", "govparty7", "govfam7", "govideo7", "govseats7",
                                                               "govparty8", "govfam8", "govideo8", "govseats8", "minyearinfluence", "maxyearinfluence")]


cpds_by_government <- arrange(cpds_by_government, countryn, govnr)


write.csv(cpds_by_government, "cpds_by_government.csv")