# gonzo.R
#
# Competing risks analysis of civil war durations
#
# Patrick T. Brandt
# pbrandt@utdallas.edu
# Original version: 20051104
# Updates : 20051116 -- new intervention counter variable added to
#                       dataset.  Models adjusted to account for this
#                       and interpret new results.
#                    -- Also reorganized code so it is easier to modify.
#           20070305 -- update code to save results

# Load the R packages we need to use
library(foreign)
library(survival)
library(xtable)

# Read and attach the data frame
data <- read.dta("gonzo-eha-data.dta")
attach(data)

# Set up and estimate a standard Cox regression with all outcomes
# treated the same -- this is the pooled model

endwar <- ifelse(outcome=="Ongoing", 0, 1)
newstart <- start-start
newend <- end-start

# Define the survival objects for each outcome and the pooled model.
govt.Surv <- Surv(time=newstart, time2=newend, event=govtwin)
rebel.Surv <- Surv(time=newstart, time2=newend, event=rebelwin)
negsett <- ifelse(outcome=="NSPKOyes" | outcome=="NSPPKOno", 1, 0)
negsett.Surv <- Surv(time=newstart, time2=newend, event=negsett)
pooled.Surv <- Surv(time=newstart, time2=newend, event=endwar)

# Recode / rescale some variables
# Rescale ef to percentages
ef <- 100*ef

# Set the control parameters for the Cox regression estimation
coxph.control(iter.max=500, outer.max=200)

# Put the survival objects -- the DVs into a list so we can loop over
# them in the estimation of the models with the same covariates
DV.list <- list(govt=govt.Surv, rebel=rebel.Surv,
                negsett=negsett.Surv, pooled=pooled.Surv)

nm <- names(DV.list)

for(m in 1:4)
{
    tmp  <- coxph(DV.list[[m]] ~ secede + gdpenl + Oil + mtnest +
                  milsize + deadper + ef + relper +
                  anocl + instab + deml +
                  intercount + cluster(conflictid))
    tmp.cox.zph <- cox.zph(tmp)
    print(tmp.cox.zph)

    # Plot the test results -- x axis is log time scale
    x11()
    par(mfrow=c(3,4))
    for (i in 1:12)
    {
        plot(tmp.cox.zph[i], log="x")
        abline(h=0)
    }

}

graphics.off()

# Now estimate a set of models that correct for the non-proportionate
# hazards

# Object to store the results
final.models <- list(mode="vector", length=4)

for(m in 1:4)
{
    final.models[[m]]  <- coxph(DV.list[[m]] ~ secede + gdpenl + Oil + mtnest +
                                milsize + deadper + ef + relper +
                                anocl + instab + deml +
                                intercount + secede:logtime +
                                gdpenl:logtime + Oil:logtime +
                                mtnest:logtime + ef:logtime +
                                anocl:logtime + instab:logtime +
                                cluster(conflictid))
}


# Plot the cumulative hazard curves
#pdf(file="cumhazards.pdf", width=6, height=6)
png(file="cumhazards.png", width=480, height=480, res=72)

for(m in 1:4)
{ if (m==1)
  { plot(survfit(final.models[[m]]), conf.int=F, fun="cumhaz", lty=m, lwd=2,
     ylab="Cumulative Hazard", xlab="War years")
  }

  lines(survfit(final.models[[m]]), fun="cumhaz", lty=m, lwd=2)

}

# Add a legend
legend(1.5, 0.09, c("Government", "Rebels", "Settlement", "Pooled"),
       lty=c(1,2,3,4), lwd=c(2,2,2,2),
       col=c("black","black","black","black"))
dev.off()

# Now gather up the regression results so we can make a table
llf <- vector()
ns <- vector()
for (i in 1:4)
{  # get coefs and se's
    coef <- round(exp(final.models[[i]]$coefficients),3)
    se <- paste(paste("(",round(final.models[[i]]$coefficients/
                                sqrt(diag(final.models[[i]]$var)),3),
                      sep=""), ")", sep="")
    # stack
    stacked <- matrix(t(cbind(coef, se)), ncol=1, byrow=F)
    if(i==1) results <- stacked
    else results <- cbind(results, stacked)

    # get llf and N values
    llf <- append(llf, value=final.models[[i]]$loglik[2])
    ns <- append(ns, value=final.models[[i]]$n)

}
# add LLF and N values
results <-  rbind(results, round(llf, 2), ns)

colnames(results) <- c("Government", "Rebels",
                       "Negotiated Settlement", "Pooled")

# Output the table in a file
print(xtable(as.data.frame(results)), type="html", file="cmprsk.html")
write.table(results, file="cmprsk.txt", sep="\t")


# Set up some plots for the time interaction variables
coefs <- vector()
for (i in 1:4) coefs <- append(coefs, final.models[[i]]$coefficients)

coefs <- matrix(coefs, (length(coefs)/4), 4)

# Change in odds for the various variables
t <- 20
secede.effects <- ts(exp(matrix(rep((coefs[1,]) , t), t, 4, byrow=T) +
                         matrix(log(seq(1:t)), t, 1)%*%(coefs[13,])))

gdp.effects <- ts(exp(matrix(rep((coefs[2,]) , t), t, 4, byrow=T) +
                      matrix(log(seq(1:t)), t, 1)%*%(coefs[14,])))

oil.effects <- ts(exp(matrix(rep((coefs[3,]) , t), t, 4, byrow=T) +
                      matrix(log(seq(1:t)), t, 1)%*%(coefs[15,])))

mtn.effects <- ts(exp(matrix(rep((coefs[4,]) , t), t, 4, byrow=T) +
                      matrix(log(seq(1:t)), t, 1)%*%(coefs[16,])))

ef.effects <- ts(exp(matrix(rep((coefs[7,]) , t), t, 4, byrow=T) +
                     matrix(log(seq(1:t)), t, 1)%*%(coefs[17,])))

anoc.effects <- ts(exp(matrix(rep((coefs[9,]) , t), t, 4, byrow=T) +
                       matrix(log(seq(1:t)), t, 1)%*%(coefs[18,])))

instab.effects <- ts(exp(matrix(rep((coefs[10,]) , t), t, 4, byrow=T) +
                         matrix(log(seq(1:t)), t, 1)%*%(coefs[19,])))

inter.effects <- ts(t(exp(matrix(coefs[12,], 4, 1) %*% matrix(seq(1:t), 1, t))))

effects <- list(secede.effects, gdp.effects, oil.effects, mtn.effects,
                ef.effects, anoc.effects, instab.effects, inter.effects)

effect.names <- c("Secession", "GDP", "Oil", "Mountains",
                  "Ethnic fractionalization", "Anocracy",
                  "Instability", "Intervention")

#pdf(file="marginalcmprsk.pdf", width=6, height=7.5)
png(file="marginalcmprsk.png", width=400, height=600, res=72)

par(mfrow=c(4,2), mai=c(0.35,0.6,0.25,0.25))
for (i in 1:8)
{
    plot(effects[[i]], plot.type=c("single"), lwd=2, lty=c(1,2,3,4),
         col=c(rep("black",3), "red"),
         ylab=effect.names[i])
    abline(h=1)
}

dev.off()

# Do some likelihood ratio tests for the effects for the variables
# that are interacted with time so we can say definitively that these
# are non-zero effects.  For each of these, we fit a restricted model
# (zeroing out the relevant variable and its interaction) and then
# compare the LLFs for the unrestricted and restricted models to
# compute a Likelhood ratio test.

# This is done using loops to simplify the amount of coding necessary.
tests <- matrix(0, 7, 4)

for(i in 1:4)
{  # restrict secession
    fit <- coxph(DV.list[[i]] ~ gdpenl + Oil + mtnest +
                 milsize + deadper + ef + relper +
                 anocl + instab + deml +
                 intercount +
                 gdpenl:logtime + Oil:logtime +
                 mtnest:logtime + ef:logtime +
                 anocl:logtime + instab:logtime +
                 cluster(conflictid))
    tests[1,i] <- -2*(fit$loglik[2] - llf[i])
    # restrict GDP
    fit <- coxph(DV.list[[i]] ~ secede + Oil + mtnest +
                 milsize + deadper + ef + relper +
                 anocl + instab + deml +
                 intercount + secede:logtime +
                 Oil:logtime +
                 mtnest:logtime + ef:logtime +
                 anocl:logtime + instab:logtime +
                 cluster(conflictid))
    tests[2,i] <- -2*(fit$loglik[2] - llf[i])
    # restrict oil
    fit <- coxph(DV.list[[i]] ~ secede + gdpenl + mtnest +
                 milsize + deadper + ef + relper +
                 anocl + instab + deml +
                 intercount + secede:logtime +
                 gdpenl:logtime +
                 mtnest:logtime + ef:logtime +
                 anocl:logtime + instab:logtime +
                 cluster(conflictid))
    tests[3,i] <- -2*(fit$loglik[2] - llf[i])
    # restrict mtns
    fit <- coxph(DV.list[[i]] ~ secede + gdpenl + Oil +
                 milsize + deadper + ef + relper +
                 anocl + instab + deml +
                 intercount + secede:logtime +
                 gdpenl:logtime + Oil:logtime +
                 ef:logtime +
                 anocl:logtime + instab:logtime +
                 cluster(conflictid))
    tests[4,i] <- -2*(fit$loglik[2] - llf[i])
    # restrict ef
    fit <- coxph(DV.list[[i]] ~ secede + gdpenl + Oil + mtnest +
                 milsize + deadper + relper +
                 anocl + instab + deml +
                 intercount + secede:logtime +
                 gdpenl:logtime + Oil:logtime +
                 mtnest:logtime +
                 anocl:logtime + instab:logtime +
                 cluster(conflictid))
    tests[5,i] <- -2*(fit$loglik[2] - llf[i])
    # restrict anoc
    fit <- coxph(DV.list[[i]] ~ secede + gdpenl + Oil + mtnest +
                 milsize + deadper + ef + relper +
                 instab + deml +
                 intercount + secede:logtime +
                 gdpenl:logtime + Oil:logtime +
                 mtnest:logtime + ef:logtime +
                 instab:logtime +
                 cluster(conflictid))
    tests[6,i] <- -2*(fit$loglik[2] - llf[i])
    #restrict instab
    fit <- coxph(DV.list[[i]] ~ secede + gdpenl + Oil + mtnest +
                 milsize + deadper + ef + relper +
                 anocl + deml +
                 intercount + secede:logtime +
                 gdpenl:logtime + Oil:logtime +
                 mtnest:logtime + ef:logtime +
                 anocl:logtime +
                 cluster(conflictid))
    tests[7,i] <- -2*(fit$loglik[2] - llf[i])



}

colnames(tests) <- c("Govt", "Rebels", "NS", "Pooled")
rownames(tests) <- c("secede", "gdp", "oil", "mtns", "ef", "anoc", "instab")

p.tests <- round(1-pchisq(tests, 2), 3)
cat("Likelihood ratio tests for the variables that interact with time\n")
print(tests)
cat("P-values for Likelihood ratio tests\n")
print(p.tests)


save.image(file="gonzo.RData")

q(save="no")
