dat <- read.csv("EmCAB_Biomarker_Data.csv") library(survival) #################### dat$CVorMI = NA for (i in 1:length(dat$CVDeath5yr)){ dat$CVorMI[i] <- 0 if (dat$CVDeath5yr[i] == 1){ dat$CVorMI[i] <- 1 } else if (dat$MI5yr[i] == 1){ dat$CVorMI[i] <- 1 } } dat$DeathorMI <- 0 for (i in 1:length(dat$DeathorMI)){ dat$DeathorMI[i] <- 0 if (dat$death5yr[i] == 1){ dat$DeathorMI[i] <- 1 } else if (dat$MI5yr[i] == 1){ dat$DeathorMI[i] <- 1 } } # CREATING OUTCOME VARIABLE dat$SurvObj3 <- with(dat, Surv(dat$timetoMI5yr, DeathorMI==1)) # MEAN EVENTS mean(dat$DeathorMI) ################################### # Correcting zeros ################################### min(dat$hsTroponin_pgmL[which(dat$hsTroponin_pgmL > 0)]) for (i in 1:nrow(dat)){ if (dat$hsTroponin_pgmL[i] == 0){ dat$hsTroponin_pgmL[i] <- 0.3*(1/sqrt(2)) } } min(dat$HSP70_ngmL[which(dat$HSP70_ngmL > 0)]) for (i in 1:nrow(dat)){ if (dat$HSP70_ngmL[i] == 0){ dat$HSP70_ngmL[i] <- 1*(1/sqrt(2)) } } min(dat$FDP_ugmL[which(dat$FDP_ugmL > 0)]) for (i in 1:nrow(dat)){ if (dat$FDP_ugmL[i] == 0){ dat$FDP_ugmL[i] <- 0.005*(1/sqrt(2)) } } min(dat$suPAR_ngmL[which(dat$suPAR_ngmL > 0)]) for (i in 1:nrow(dat)){ if (dat$suPAR_ngmL[i] == 0){ dat$suPAR_ngmL[i] <- 0.78*(1/sqrt(2)) } } min(dat$CRP_mgL[which(dat$CRP_mgL > 0)]) for (i in 1:nrow(dat)){ if (dat$CRP_mgL[i] == 0){ dat$CRP_mgL[i] <- 0.05*(1/sqrt(2)) } } ############################################## ############################################## dat$hsTroponin_pgmL.log <- log(dat$hsTroponin_pgmL) dat$HSP70_ngmL.log <- log(dat$HSP70_ngmL) dat$suPAR_ngmL.log <-log(dat$suPAR_ngmL) dat$FDP_ugmL.log <- log(dat$FDP_ugmL) dat$CRP_mgL.log <-log(dat$CRP_mgL) ################ smp_size <- floor(0.5*(nrow(dat))) set.seed(1836) train_ind <- sample(seq_len(nrow(dat)),size=smp_size) training <- dat[train_ind,] testing <- dat[-train_ind,] dat<- training #################### #################### #################### #################### #################### ################## ################## library(survival) library(KMsurv) library(xtable) library(splines) library("pROC") library("aod") head(dat) ######################### dat.f <- dat[which(testing$male==0),] dat.m <- dat[which(testing$male==1),] ########################## dat.f <- dat[which(dat$male==0),] dat.m <- dat[which(dat$male==1),] ########################## dat$A3 <- NA for (i in 1:length(dat$uniqueid)){ if (dat$hsTroponin_pgmL[i] <= 1.6){ dat$A3[i] = 1 } else if (dat$hsTroponin_pgmL[i] <= 2.1){ dat$A3[i] = 2 } else if (dat$hsTroponin_pgmL[i] <= 2.6){ dat$A3[i] = 3 } else if(dat$hsTroponin_pgmL[i] <= 6.9){ dat$A3[i] = 4 } else if(dat$hsTroponin_pgmL[i] > 6.9){ dat$A3[i] = 5 } } length(dat$uniqueid[which(dat$B3==2)]) dat$B3 <- NA for (i in 1:length(dat$uniqueid)){ if (dat$HSP70_ngmL.log[i] <= 0.04116454){ dat$B3[i] = 1 } else if (dat$HSP70_ngmL.log[i] <= 5.017524){ dat$B3[i] = 2 } else if(dat$HSP70_ngmL.log[i] > 5.017524){ dat$B3[i] = 3 } } table(dat$B3) dat$D3 <- NA for (i in 1:length(dat$uniqueid)){ if (dat$FDP_ugmL[i] <= 0.27){ dat$D3[i] = 1 } else if (dat$FDP_ugmL[i] <= 0.48){ dat$D3[i] = 2 } else if (dat$FDP_ugmL[i] <= 0.88){ dat$D3[i] = 3 } else if(dat$FDP_ugmL[i] > 0.88){ dat$D3[i] = 4 } } dat$E3 <- NA for (i in 1:length(dat$uniqueid)){ if (dat$CRP_mgL[i] <= 1.6){ dat$E3[i] = 1 } else if (dat$CRP_mgL[i] <= 5.8){ dat$E3[i] = 2 } else if (dat$CRP_mgL[i] <= 16.9){ dat$E3[i] = 3 } else if(dat$CRP_mgL[i] > 16.9){ dat$E3[i] = 4 } } dat.f$C3 <- NA for (i in 1:length(dat.f$uniqueid)){ if (dat.f$suPAR_ngmL[i] <= 3.6){ dat.f$C3[i] = 1 } else if(dat.f$suPAR_ngmL[i] > 3.6){ dat.f$C3[i] = 2 } } table(dat.f$C3) dat.m$C3 <- NA for (i in 1:length(dat.m$uniqueid)){ if (dat.m$suPAR_ngmL[i] <= 3.2){ dat.m$C3[i] = 1 } else if(dat.m$suPAR_ngmL[i] > 3.2){ dat.m$C3[i] = 2 } } dat$C3 = NA dat$C3[which(testing$male==0 & dat$suPAR_ngmL <= 3.6)] <- 1 dat$C3[which(testing$male==0 & dat$suPAR_ngmL > 3.6)] <- 2 dat$C3[which(testing$male !=0 & dat$suPAR_ngmL <= 3.2)] <- 1 dat$C3[which(testing$male !=0 & dat$suPAR_ngmL > 3.2)] <- 2 table(dat$C3) dat.m$SurvObj3 <- with(dat.m, Surv(dat.m$timetoMI5yr, DeathorMI==1)) dat.f$SurvObj3 <- with(dat.f, Surv(dat.f$timetoMI5yr, DeathorMI==1)) table(dat$A3) table(dat$B3) table(dat$D3) table(dat$E3) table(dat.f$C3) table(dat.m$C3) ################################### ################################### testing$SurvObj3 <- with(testing, Surv(testing$timetoMI5yr, DeathorMI==1)) testing.f <- dat[which(testing$male==0),] testing.m <- dat[which(testing$male==1),] testing.f$SurvObj3 <- with(testing.f, Surv(testing.f$timetoMI5yr, DeathorMI==1)) testing.m$SurvObj3 <- with(testing.m, Surv(testing.m$timetoMI5yr, DeathorMI==1)) testing$A3 <- NA for (i in 1:length(testing$uniqueid)){ if (testing$hsTroponin_pgmL[i] <= 1.6){ testing$A3[i] = 1 } else if (testing$hsTroponin_pgmL[i] <= 2.1){ testing$A3[i] = 2 } else if (testing$hsTroponin_pgmL[i] <= 2.6){ testing$A3[i] = 3 } else if(testing$hsTroponin_pgmL[i] <= 6.9){ testing$A3[i] = 4 } else if(testing$hsTroponin_pgmL[i] > 6.9){ testing$A3[i] = 5 } } length(testing$uniqueid[which(testing$B3==2)]) testing$B3 <- NA for (i in 1:length(testing$uniqueid)){ if (testing$HSP70_ngmL.log[i] <= 0.04116454){ testing$B3[i] = 1 } else if (testing$HSP70_ngmL.log[i] <= 5.017524){ testing$B3[i] = 2 } else if(testing$HSP70_ngmL.log[i] > 5.017524){ testing$B3[i] = 3 } } testing$D3 <- NA for (i in 1:length(testing$uniqueid)){ if (testing$FDP_ugmL[i] <= 0.27){ testing$D3[i] = 1 } else if (testing$FDP_ugmL[i] <= 0.48){ testing$D3[i] = 2 } else if (testing$FDP_ugmL[i] <= 0.88){ testing$D3[i] = 3 } else if(testing$FDP_ugmL[i] > 0.88){ testing$D3[i] = 4 } } testing$E3 <- NA for (i in 1:length(testing$uniqueid)){ if (testing$CRP_mgL[i] <= 1.6){ testing$E3[i] = 1 } else if (testing$CRP_mgL[i] <= 5.8){ testing$E3[i] = 2 } else if (testing$CRP_mgL[i] <= 16.9){ testing$E3[i] = 3 } else if(testing$CRP_mgL[i] > 16.9){ testing$E3[i] = 4 } } testing.f$C3 <- NA for (i in 1:length(testing.f$uniqueid)){ if (testing.f$suPAR_ngmL[i] <= 3.6){ testing.f$C3[i] = 3 } else if(testing.f$suPAR_ngmL[i] > 3.6){ testing.f$C3[i] = 4 } } testing.m$C3 <- NA for (i in 1:length(testing.m$uniqueid)){ if (testing.m$suPAR_ngmL[i] <= 3.2){ testing.m$C3[i] = 1 } else if(testing.m$suPAR_ngmL[i] > 3.2){ testing.m$C3[i] = 2 } } for (i in 1:length(testing$uniqueid)){ if (is.na(testing$male[i])) { testing$male[i] <- 0 } } testing$C3 = NA for (i in 1:length(dat$uniqueid)){ if (testing$male[i] == 1 & testing$suPAR_ngmL[i] <= 3.2){ testing$C3[i] <- 1 } else if (testing$male[i] == 1 & testing$suPAR_ngmL[i] > 3.2){ testing$C3[i] <- 2 } else if (testing$male[i] == 0 & testing$suPAR_ngmL[i] <= 3.6){ testing$C3[i] <- 1 } else if (testing$male[i] == 0 & testing$suPAR_ngmL[i] > 3.6){ testing$C3[i] <- 2 } } testing$C3[which(testing$male==0 & testing$suPAR_ngmL <= 3.6)] <- 1 testing$C3[which(testing$male==0 & testing$suPAR_ngmL > 3.6)] <- 2 testing$C3[which(testing$male !=0 & testing$suPAR_ngmL <= 3.2)] <- 1 testing$C3[which(testing$male !=0 & testing$suPAR_ngmL > 3.2)] <- 2 table(testing$A3) table(testing$B3) table(testing$D3) table(testing$E3) table(testing.m$C3) table(testing.f$C3) KM_A3 <- survfit(SurvObj3~A3, data=testing, conf.type="log") plot(KM_A3, col=c(1:5), main=("Death or MI by HS-Trop Levels"), ylim=c(0.5, 1)) legend("bottomleft", c("Low","Medium","High","Very High","Extremely High"), col=c(1:5), lty=1, bty="n") KM_B3 <- survfit(SurvObj3~B3, data=testing, conf.type="log") plot(KM_B3, col=c(1:4), main=("Death or MI by HSP-70"), ylim=c(0.5, 1)) legend("bottomleft", c("Low","Medium","High"), col=c(1:3), lty=1, bty="n") KM_D3 <- survfit(SurvObj3~D3, data=testing, conf.type="log") plot(KM_D3, col=c(1:4), main=("Death or MI by FDP"), ylim=c(0.5, 1)) legend("bottomleft", c("Low","Medium","High","Very High"), col=c(1:4), lty=1, bty="n") KM_E3 <- survfit(SurvObj3~E3, data=testing, conf.type="log") plot(KM_E3, col=c(1:4), main=("Death or MI by CRP"), ylim=c(0.5, 1)) legend("bottomleft", c("Low","Medium","High","Very High"), col=c(1:4), lty=1, bty="n") KM_C3 <- survfit(SurvObj3~C3, data=testing, conf.type="log") plot(KM_C3, col=c(1:2), main=("Death or MI by SuPAR"), ylim=c(0.5, 1)) legend("bottomleft", c("Low","High"), col=c(1:2), lty=1, bty="n") table(testing$A3) table(testing$B3) table(testing$D3) table(testing$E3) table(testing.f$C3) table(testing.m$C3) C3t = c(testing.m$C3, testing.f$C3) #************************* REGRESSION #Age can try categories (e.g., <60, 60-70, >70, etc.) #BMI can try to create categories (e.g., normal, overweight, obese, etc.) #eGFR eGFR<60 is defined as chronic kidney disease #Gensini_OM0 ?an define severe CAD (binary) using Gensini score > 20 #male #race (black vs. nonblack) #Smoking (current, past, never) ?check if past can be combined with never smokers #HTN #DM #HC #MIHx #HFhx #everstat #everARBACE dat$age.Cat <- NA for (i in 1:length(dat$uniqueid)){ if (dat$age[i] < 60){ dat$age.Cat[i] = 1 } else if(dat$age[i] <= 70){ dat$age.Cat[i] = 2 } else if(dat$age[i] > 70){ dat$age.Cat[i] = 3 } } dat$BMI.Cat <- NA for (i in 1:length(dat$uniqueid)){ if (dat$BMI_Impute[i] < 18.5){ dat$BMI.Cat[i] = 3 } else if(dat$BMI_Impute[i] < 25){ dat$BMI.Cat[i] = 0 } else if(dat$BMI_Impute[i] < 30){ dat$BMI.Cat[i] = 1 } else if(dat$BMI_Impute[i] >= 30){ dat$BMI.Cat[i] = 2 } } dat$eGFR.Cat <- NA for (i in 1:length(dat$uniqueid)){ if (is.na(dat$eGFR_max120[i])) { dat$eGFR.Cat[i] = NA } else if (dat$eGFR_max120[i] < 60){ dat$eGFR.Cat[i] = 1 } else if(dat$eGFR_max120[i] >= 60){ dat$eGFR.Cat[i] = 2 } } dat$Gensini.Cat <- NA for (i in 1:length(dat$uniqueid)){ if (is.na(dat$Gensini_OM0[i])) { dat$Gensini.Cat[i] = NA } else if (dat$Gensini_OM0[i] < 20){ dat$Gensini.Cat[i] = 1 } else if(dat$Gensini_OM0[i] >= 20){ dat$Gensini.Cat[i] = 2 } } dat$Smoke.Cat <- NA for (i in 1:length(dat$uniqueid)){ if (dat$PastSmoking[i] == 1){ dat$Smoke.Cat[i] = 1 } else if(dat$CurrentSmoking[i] == 1){ dat$Smoke.Cat[i] = 2 } else {dat$Smoke.Cat[i] = 0} } #******************************# # Making the testing stuff too testing$age.Cat <- NA for (i in 1:length(testing$uniqueid)){ if (testing$age[i] < 60){ testing$age.Cat[i] = 1 } else if(testing$age[i] <= 70){ testing$age.Cat[i] = 2 } else if(testing$age[i] > 70){ testing$age.Cat[i] = 3 } } testing$BMI.Cat <- NA for (i in 1:length(testing$uniqueid)){ if (testing$BMI_Impute[i] < 18.5){ testing$BMI.Cat[i] = 3 } else if(testing$BMI_Impute[i] < 25){ testing$BMI.Cat[i] = 0 } else if(testing$BMI_Impute[i] < 30){ testing$BMI.Cat[i] = 1 } else if(testing$BMI_Impute[i] >= 30){ testing$BMI.Cat[i] = 2 } } testing$eGFR.Cat <- NA for (i in 1:length(testing$uniqueid)){ if (is.na(testing$eGFR_max120[i])) { testing$eGFR.Cat[i] = NA } else if (testing$eGFR_max120[i] < 60){ testing$eGFR.Cat[i] = 1 } else if(testing$eGFR_max120[i] >= 60){ testing$eGFR.Cat[i] = 2 } } testing$Gensini.Cat <- NA for (i in 1:length(testing$uniqueid)){ if (is.na(testing$Gensini_OM0[i])) { testing$Gensini.Cat[i] = NA } else if (testing$Gensini_OM0[i] < 20){ testing$Gensini.Cat[i] = 1 } else if(testing$Gensini_OM0[i] >= 20){ testing$Gensini.Cat[i] = 2 } } testing$Smoke.Cat <- NA for (i in 1:length(testing$uniqueid)){ if (testing$PastSmoking[i] == 1){ testing$Smoke.Cat[i] = 1 } else if(testing$CurrentSmoking[i] == 1){ testing$Smoke.Cat[i] = 2 } else {testing$Smoke.Cat[i] = 0} } KM <- coxph(SurvObj3 ~ age + BMI_Impute + eGFR.Cat + Gensini.Cat + male + black + factor(Smoke.Cat) + HTN + DM + HC + MIhx + HFhx + EverStat+ EverARBACE + hsTroponin_pgmL.log + HSP70_ngmL.log + FDP_ugmL.log + CRP_mgL.log + suPAR_ngmL.log , data=testing) sumKM <- summary(KM) sumKM RISK.NoBRS <- exp(KM$linear.predictors) #**************# KMf <- coxph(SurvObj3 ~ factor(A3) + factor(B3) + factor(C3) + factor(D3) + factor(E3) , data=testing) coefs <- KMf$coefficients coefs BRS <- 0 for (i in 1:length(testing$uniqueid)){ BRS[i] <- 0 if (testing$A3[i] == 2) { BRS[i] <- BRS[i] + KMf$coefficients[1] } if (testing$A3[i] == 3) { BRS[i] <- BRS[i] + KMf$coefficients[2] } if (testing$A3[i] == 4) { BRS[i] <- BRS[i] + KMf$coefficients[3] } if (testing$A3[i] == 5) { BRS[i] <- BRS[i] + KMf$coefficients[4] } ########################################## if (testing$B3[i] == 2) { BRS[i] <- BRS[i] + KMf$coefficients[5] } if (testing$B3[i] == 3) { BRS[i] <- BRS[i] + KMf$coefficients[6] } ########################################## if (testing$D3[i] == 2) { BRS[i] <- BRS[i] + KMf$coefficients[7] } if (testing$D3[i] == 3) { BRS[i] <- BRS[i] + KMf$coefficients[8] } if (testing$D3[i] == 4) { BRS[i] <- BRS[i] + KMf$coefficients[9] } ######################################### if (testing$E3[i] == 2) { BRS[i] <- BRS[i] + KMf$coefficients[10] } if (testing$E3[i] == 3) { BRS[i] <- BRS[i] + KMf$coefficients[11] } if (testing$E3[i] == 4) { BRS[i] <- BRS[i] + KMf$coefficients[12] } } hist(BRS) KMscore <- coxph(testing$SurvObj3 ~ BRS) sum <- summary(KMscore) sum BRS.CENTERED <- BRS-mean(BRS) testing$BRS.CENTERED = BRS.CENTERED ###################################################### testing$age.Cat list.of.packages <- c("foreign", "survC1", "survIDINRI") new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] if(length(new.packages)) install.packages(new.packages) library(foreign) library(survC1) library(survIDINRI) model1 <- c(BRS) mydata = cbind(testing$timetoMI5yr, testing$DeathorMI, model1) mod2dat <- cbind(testing$age.Cat , testing$BMI.Cat, testing$eGFR.Cat, testing$Gensini.Cat, testing$male, testing$black, testing$PastSmoking, testing$CurrentSmoking, testing$HTN, testing$DM, testing$HC, testing$MIhx, testing$HFhx, testing$EverStat, testing$EverARBACE, testing$BRS.CENTERED) head(mod2dat) mydata = cbind(mydata, mod2dat) head(mydata) mydata[,1:2] head(mydata) Cfun = Est.Cval(mydata[,1:3], 1826, nofit=FALSE) Cfun$Dhat mydatacomp = mydata[complete.cases(mydata),] ################################################ mydatacomp <- as.data.frame(mydatacomp) head(mydatacomp) colnames(mydatacomp) <- c("timetoMI5yr", "DeathorMI", "BRS", "age.Cat" , "BMI.Cat", "eGFR.Cat", "Gensini.Cat", "male", "black", "PastSmoking", "CurrentSmoking", "HTN", "DM", "HC", "MIhx", "HFhx", "EverStat", "EverARBACE", "BRS.CENTERED") head(mydatacomp) mydatacomp$SurvObj3 <- with(mydatacomp, Surv(mydatacomp$timetoMI5yr, mydatacomp$DeathorMI==1)) mydatacomp$Smoke.Cat <- NA for (i in 1:length(mydatacomp$BRS)){ if (mydatacomp$PastSmoking[i] == 1){ mydatacomp$Smoke.Cat[i] = 1 } else if(mydatacomp$CurrentSmoking[i] == 1){ mydatacomp$Smoke.Cat[i] = 2 } else {mydatacomp$Smoke.Cat[i] = 0} } head(mydatacomp) mydatacomp$age.Cat <- mydatacomp$age.Cat - 1 mydatacomp$Gensini.Cat <- mydatacomp$Gensini.Cat - 1 mydatacomp$eGFR.Cat <- mydatacomp$eGFR.Cat - 1 KM0 <- coxph(SurvObj3 ~ factor(age.Cat) + factor(BMI.Cat) + factor(eGFR.Cat) + factor(Gensini.Cat) + male + black + factor(Smoke.Cat) + HTN + DM + HC + MIhx + HFhx + EverStat+ EverARBACE , data=mydatacomp) sumKM0 <- summary(KM0) KM0_coeffs <- sumKM0$coefficients RISK.NoBRS <- exp(KM$linear.predictors) head(mydatacomp) head(testing$SurvObj3) KM1 <- coxph(SurvObj3 ~ factor(age.Cat) + factor(BMI.Cat) + factor(eGFR.Cat) + factor(Gensini.Cat) + male + black + factor(Smoke.Cat) + HTN + DM + HC + MIhx + HFhx + EverStat+ EverARBACE + BRS.CENTERED , data=mydatacomp) KM1 sumKM1 <- summary(KM1) KM1_coeffs <- sumKM1$coefficients sumKM1 <- summary(KM1) cumhaz <- basehaz(KM1, centered = FALSE) cumhaz <- cumhaz$hazard BASELINESURVIVAL <- exp(-1*cumhaz) BS5yr1 <- BASELINESURVIVAL[length(BASELINESURVIVAL)] sumKM0 <- summary(KM0) cumhaz <- basehaz(KM0) cumhaz <- cumhaz$hazard BASELINESURVIVAL <- exp(-1*cumhaz) BS5yr0 <- BASELINESURVIVAL[length(BASELINESURVIVAL)] ###################################### # BASELINE SURVIVAL FINDINGS ################### head(mydatacomp) KM1 BetaKM1 = rep(0,1345) for (i in 1:1345){ if (mydatacomp$age.Cat[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[1] } if (mydatacomp$age.Cat[i] == 2){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[2] } if (mydatacomp$BMI.Cat[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[3] } if (mydatacomp$BMI.Cat[i] == 2){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[4] } if (mydatacomp$age.Cat[i] == 3){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[5] } if (mydatacomp$eGFR.Cat[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[6] } if (mydatacomp$Gensini.Cat[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[7] } if (mydatacomp$male[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[8] } if (mydatacomp$black[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[9] } if (mydatacomp$Smoke.Cat[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[10] } if (mydatacomp$Smoke.Cat[i] == 2){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[11] } if (mydatacomp$HTN[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[12] } if (mydatacomp$DM[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[13] } if (mydatacomp$HC[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[14] } if (mydatacomp$MIhx[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[15] } if (mydatacomp$HFhx[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[16] } if (mydatacomp$EverStat[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[17] } if (mydatacomp$EverARBACE[i] == 1){ BetaKM1[i] = BetaKM1[i]+KM1_coeffs[18] } BetaKM1[i] = BetaKM1[i] + (mydatacomp$BRS.CENTERED[i]*KM1_coeffs[19]) } Est.Risk1 <- rep(0,1345) for (i in 1:1345){ Est.Risk1[i] = 1-BS5yr1^(exp(BetaKM1[i])) } mean(Est.Risk1) ############################ # MAKE THE 10 GROUPS mydatacomp <- cbind(mydatacomp,Est.Risk1) ################################################# # GOOD Model ####### ###### mean(Est.Risk1) decilegroups = findInterval(Est.Risk1, quantile(Est.Risk1, probs=(0:9)/10)) ESTIMATED_RISK1 <- as.vector(tapply(Est.Risk1, decilegroups, mean)) mydatacomp <- cbind(mydatacomp, decilegroups) KM_EST_RISK = rep(0,10) for (i in 1:10){ KMEST <- survfit(mydatacomp$SurvObj3[which(mydatacomp$decilegroups == i)]~1) sumEST <- summary(KMEST) KM_EST_RISK[i] <- 1-min(sumEST$surv) } KM_EST_RISK mean(ESTIMATED_RISK1) mean(KM_EST_RISK) BOTH <- rbind(ESTIMATED_RISK1,KM_EST_RISK) par(mfrow=c(1,1)) barplot(BOTH,beside=T, xlab=("Risk Group"),legend=c("Mean Est Risk", "KM Observed Risk"),args.legend = list(x = "topleft"), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), ylab=("Proportion of Individuals with Event / Estimated Risk")) abline(h=0) mean(mydatacomp[,2]) KM1