Last updated: 2018-04-20
Code version: e96adb1
library(dplyr)
library(data.table)
library(magrittr)
library(purrr)
library(here) # for tracking working directory
library(ggplot2)
library(epistats)
library(broom)
library(survival)
lc <- read.csv(here("data", "lungca.csv"))
cont_vars <- c("age", "Karn.phys", "Karn.pt", "wtloss")
lc %<>%
mutate(status = status - 1,
sex = factor(sex, levels = c("1", "2"), labels = c("male", "female")),
wtloss = 0.453592 * wtloss,
ECOG.phys = factor(ifelse(ECOG.phys == "3", "2", ECOG.phys)),
ECOG.phys = factor(ECOG.phys, levels = c("0", "1", "2"),
labels = c("0", "1", "2-3")))
# age = age - mean(age))
str(lc)
'data.frame': 167 obs. of 10 variables:
$ center : int 3 5 12 7 11 1 7 6 12 22 ...
$ time : int 455 210 1022 310 361 218 166 170 567 613 ...
$ status : num 1 1 0 1 1 1 1 1 1 1 ...
$ age : int 68 57 74 68 71 53 61 57 57 70 ...
$ sex : Factor w/ 2 levels "male","female": 1 1 1 2 2 1 1 1 1 1 ...
$ ECOG.phys: Factor w/ 3 levels "0","1","2-3": 1 2 2 3 3 2 3 2 2 2 ...
$ Karn.phys: int 90 90 50 70 60 70 70 80 80 90 ...
$ Karn.pt : int 90 60 80 60 80 80 70 80 70 100 ...
$ calories : int 1225 1150 513 384 538 825 271 1025 2600 1150 ...
$ wtloss : num 6.804 4.99 0 4.536 0.454 ...
summary(lc)
center time status age
Min. : 1.00 Min. : 5.0 Min. :0.0000 Min. :39.00
1st Qu.: 3.00 1st Qu.: 174.5 1st Qu.:0.0000 1st Qu.:57.00
Median :11.00 Median : 268.0 Median :1.0000 Median :64.00
Mean :10.71 Mean : 309.9 Mean :0.7186 Mean :62.57
3rd Qu.:15.00 3rd Qu.: 419.5 3rd Qu.:1.0000 3rd Qu.:70.00
Max. :32.00 Max. :1022.0 Max. :1.0000 Max. :82.00
sex ECOG.phys Karn.phys Karn.pt calories
male :103 0 :47 Min. : 50.00 Min. : 30.00 Min. : 96.0
female: 64 1 :81 1st Qu.: 70.00 1st Qu.: 70.00 1st Qu.: 619.0
2-3:39 Median : 80.00 Median : 80.00 Median : 975.0
Mean : 82.04 Mean : 79.58 Mean : 929.1
3rd Qu.: 90.00 3rd Qu.: 90.00 3rd Qu.:1162.5
Max. :100.00 Max. :100.00 Max. :2600.0
wtloss
Min. :-10.886
1st Qu.: 0.000
Median : 3.175
Mean : 4.408
3rd Qu.: 6.804
Max. : 30.844
nna(lc)
center time status age sex ECOG.phys Karn.phys
0 0 0 0 0 0 0
Karn.pt calories wtloss
0 0 0
table(lc$ECOG.phys, lc$sex)
male female
0 28 19
1 52 29
2-3 23 16
lc %>%
data.table::melt(measure.vars = cont_vars) %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_grid(sex~variable, scales = "free_x") +
theme_minimal()
Recode Karnophsky as factors
factor_vars <- c("center", "sex", "ECOG.phys", "Karn.phys", "Karn.pt")
lc %<>%
mutate_at(vars(factor_vars), funs(as.factor))
lc %>%
ggplot(aes(x = calories, y = wtloss)) +
geom_point() + theme_minimal()
lc %>%
mutate(
wt_loss_ratio = wtloss / calories
) %>%
ggplot(aes(x = wt_loss_ratio)) +
geom_histogram()
require(ggfortify)
surv_plot <- lc %>%
survfit(Surv(time / 365, status) ~ 1, data = .) %>%
autoplot() + theme_minimal() +
labs(x = "Time (years)",
y = "Survival probability") +
ggtitle("Overview of survival (Kaplan-Meier estimate)")
surv_plot
survfit1 <- survfit(Surv(time, status) ~ 1, data = lc)
print(survfit1)
Call: survfit(formula = Surv(time, status) ~ 1, data = lc)
n events median 0.95LCL 0.95UCL
167 120 310 285 371
require(ggfortify)
lc %>%
mutate(dummy_variable = T,
status = factor(status, levels = c(0, 1), labels = c("censored", "deceased"))) %>%
survfit(Surv(time / 365, dummy_variable) ~ status, data = .) %>%
autoplot() + theme_minimal()
lc %>%
coxph(Surv(time, status) ~ age + sex + ECOG.phys + Karn.phys + Karn.pt + calories + wtloss,
data = .) %>%
summary()
Call:
coxph(formula = Surv(time, status) ~ age + sex + ECOG.phys +
Karn.phys + Karn.pt + calories + wtloss, data = .)
n= 167, number of events= 120
coef exp(coef) se(coef) z Pr(>|z|)
age 6.272e-03 1.006e+00 1.205e-02 0.520 0.60286
sexfemale -6.211e-01 5.373e-01 2.146e-01 -2.894 0.00381 **
ECOG.phys1 6.387e-01 1.894e+00 3.527e-01 1.811 0.07018 .
ECOG.phys2-3 1.357e+00 3.885e+00 5.451e-01 2.490 0.01278 *
Karn.phys60 1.101e+00 3.007e+00 6.825e-01 1.613 0.10670
Karn.phys70 1.025e+00 2.787e+00 6.556e-01 1.564 0.11792
Karn.phys80 1.200e+00 3.321e+00 6.681e-01 1.796 0.07244 .
Karn.phys90 1.341e+00 3.822e+00 6.859e-01 1.955 0.05058 .
Karn.phys100 1.484e+00 4.411e+00 7.743e-01 1.917 0.05528 .
Karn.pt40 -3.821e-01 6.824e-01 1.521e+00 -0.251 0.80158
Karn.pt50 7.358e-01 2.087e+00 1.216e+00 0.605 0.54524
Karn.pt60 1.340e-01 1.143e+00 1.042e+00 0.129 0.89773
Karn.pt70 -1.180e-01 8.887e-01 1.078e+00 -0.109 0.91283
Karn.pt80 -2.206e-01 8.020e-01 1.086e+00 -0.203 0.83906
Karn.pt90 -1.580e-02 9.843e-01 1.082e+00 -0.015 0.98835
Karn.pt100 -5.112e-01 5.998e-01 1.105e+00 -0.463 0.64366
calories -5.387e-05 9.999e-01 2.839e-04 -0.190 0.84952
wtloss -3.021e-02 9.702e-01 1.831e-02 -1.650 0.09903 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
age 1.0063 0.9937 0.98280 1.0303
sexfemale 0.5373 1.8610 0.35281 0.8184
ECOG.phys1 1.8940 0.5280 0.94873 3.7810
ECOG.phys2-3 3.8849 0.2574 1.33485 11.3067
Karn.phys60 3.0072 0.3325 0.78925 11.4580
Karn.phys70 2.7874 0.3588 0.77114 10.0752
Karn.phys80 3.3208 0.3011 0.89644 12.3015
Karn.phys90 3.8222 0.2616 0.99660 14.6594
Karn.phys100 4.4111 0.2267 0.96701 20.1219
Karn.pt40 0.6824 1.4654 0.03465 13.4403
Karn.pt50 2.0871 0.4791 0.19241 22.6384
Karn.pt60 1.1434 0.8746 0.14819 8.8218
Karn.pt70 0.8887 1.1252 0.10745 7.3502
Karn.pt80 0.8020 1.2468 0.09543 6.7411
Karn.pt90 0.9843 1.0159 0.11805 8.2077
Karn.pt100 0.5998 1.6673 0.06877 5.2312
calories 0.9999 1.0001 0.99939 1.0005
wtloss 0.9702 1.0307 0.93603 1.0057
Concordance= 0.662 (se = 0.031 )
Rsquare= 0.182 (max possible= 0.998 )
Likelihood ratio test= 33.53 on 18 df, p=0.01437
Wald test = 33.29 on 18 df, p=0.01539
Score (logrank) test = 35.97 on 18 df, p=0.007125
Model with random intercept per center
Random slopes make no sense -> why different effect of age in different centers?
fit1 <- coxph(Surv(time, status) ~ age + sex + ECOG.phys + Karn.phys + Karn.pt + calories * wtloss + frailty(center, dist = "gauss"),
data = lc)
# summary(fit)
fit2 <- coxph(Surv(time, status) ~ age + sex + ECOG.phys + Karn.phys + Karn.pt + calories * wtloss,
data = lc)
fits <- list(with_random_full = fit1, no_random_full = fit2)
map_df(fits, extractAIC)
# A tibble: 2 x 2
with_random_full no_random_full
<dbl> <dbl>
1 23.9 19.0
2 1014 1021
extractAIC(fit1)
[1] 23.91509 1014.17005
extractAIC(fit2)
[1] 19.000 1020.647
anova(fit1, fit2, test = "Chisq")
Analysis of Deviance Table
Cox model: response is Surv(time, status)
Model 1: ~ age + sex + ECOG.phys + Karn.phys + Karn.pt + calories * wtloss + frailty(center, dist = "gauss")
Model 2: ~ age + sex + ECOG.phys + Karn.phys + Karn.pt + calories * wtloss
loglik Chisq Df P(>|Chi|)
1 -483.17
2 -491.32 16.307 4.9151 0.00564 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
With manual likelihood-ratio test for chi-square distribution with 1 degree of freedom
pchisq(2*(logLik(fit1) - logLik(fit2)), df = 1, lower.tail = F)
'log Lik.' 5.38541e-05 (df=0.9761468)
Follow LRT the model with random intercept per center is better.
Use this random part, now reduce fixed part.
fit_full <- coxph(Surv(time, status) ~ age + sex + ECOG.phys + Karn.phys + Karn.pt + calories * wtloss + frailty(center, dist = "gauss"), data = lc)
fit <- fit_full
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ age + sex + ECOG.phys + Karn.phys + Karn.pt +
calories * wtloss + frailty(center, dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 1014.2
age 0.84864 1012.4 -0.1119 1.000000
sex 2.69586 1024.0 15.2547 0.001155 **
ECOG.phys 1.32255 1016.3 4.7459 0.045981 *
Karn.phys 5.23149 1010.7 7.0146 0.241562
Karn.pt 6.35830 1004.7 3.2178 0.814319
frailty(center, dist = "gauss") 4.91509 1020.6 16.3074 0.005640 **
calories:wtloss 1.00408 1012.4 0.2284 0.634434
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- coxph(Surv(time, status) ~ sex + ECOG.phys + Karn.phys + Karn.pt + calories * wtloss + frailty(center, dist = "gauss"),
data = lc)
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ sex + ECOG.phys + Karn.phys + Karn.pt +
calories * wtloss + frailty(center, dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 1012.4
sex 2.71446 1022.4 15.4992 0.001049 **
ECOG.phys 1.39257 1014.5 4.9108 0.045759 *
Karn.phys 5.34558 1008.8 7.1333 0.243242
Karn.pt 6.26163 1003.0 3.1719 0.811303
frailty(center, dist = "gauss") 5.06645 1018.9 16.6657 0.005450 **
calories:wtloss 0.97932 1010.6 0.2180 0.631965
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- coxph(Surv(time, status) ~ sex + ECOG.phys + Karn.phys + calories * wtloss + frailty(center, dist = "gauss"),
data = lc)
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ sex + ECOG.phys + Karn.phys + calories *
wtloss + frailty(center, dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 1003.0
sex 2.61157 1014.6 16.7801 0.0005019 ***
ECOG.phys 2.01604 1009.2 10.2417 0.0060855 **
Karn.phys 5.64572 1001.5 9.8126 0.1125923
frailty(center, dist = "gauss") 5.80483 1009.6 18.2374 0.0049296 **
calories:wtloss 0.94795 1001.3 0.1821 0.6477098
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- coxph(Surv(time, status) ~ sex + ECOG.phys + Karn.phys + calories + wtloss + frailty(center, dist = "gauss"),
data = lc)
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ sex + ECOG.phys + Karn.phys + calories +
wtloss + frailty(center, dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 1001.3
sex 2.6093 1012.9 16.7953 0.0004968 ***
ECOG.phys 2.0940 1007.5 10.4420 0.0060347 **
Karn.phys 5.7320 1000.2 10.3679 0.0966516 .
calories 1.2941 1000.1 1.3760 0.3201357
wtloss 1.5644 1004.2 6.0541 0.0303939 *
frailty(center, dist = "gauss") 5.8569 1007.8 18.1768 0.0052451 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- coxph(Surv(time, status) ~ sex + ECOG.phys + Karn.phys + wtloss + frailty(center, dist = "gauss"),
data = lc)
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ sex + ECOG.phys + Karn.phys + wtloss + frailty(center,
dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 1000.08
sex 2.3324 1010.94 15.5180 0.0006593 ***
ECOG.phys 2.3676 1006.90 11.5541 0.0047662 **
Karn.phys 5.5511 998.46 9.4831 0.1207032
wtloss 1.7026 1003.14 6.4606 0.0288310 *
frailty(center, dist = "gauss") 5.5628 1005.89 16.9309 0.0070480 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- coxph(Surv(time, status) ~ sex + ECOG.phys + wtloss + frailty(center, dist = "gauss"),
data = lc)
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ sex + ECOG.phys + wtloss + frailty(center,
dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 998.46
sex 1.8785 1005.85 11.1391 0.0032752 **
ECOG.phys 6.7385 1013.96 28.9683 0.0001179 ***
wtloss 1.2017 999.66 3.5979 0.0758057 .
frailty(center, dist = "gauss") 5.0117 1001.90 13.4614 0.0195731 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- coxph(Surv(time, status) ~ sex + ECOG.phys + frailty(center, dist = "gauss"),
data = lc)
drop1(fit, test = "Chisq")
Single term deletions
Model:
Surv(time, status) ~ sex + ECOG.phys + frailty(center, dist = "gauss")
Df AIC LRT Pr(>Chi)
<none> 999.66
sex 1.5970 1005.44 8.9712 0.0068833 **
ECOG.phys 6.6708 1011.98 25.6673 0.0004465 ***
frailty(center, dist = "gauss") 4.8100 1002.65 12.6151 0.0240372 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit_final <- coxph(Surv(time, status) ~ sex + ECOG.phys + frailty(center, dist = "gauss"), data = lc)
summary(fit_final)
Call:
coxph(formula = Surv(time, status) ~ sex + ECOG.phys + frailty(center,
dist = "gauss"), data = lc)
n= 167, number of events= 120
coef se(coef) se2 Chisq DF p
sexfemale -0.5269 0.1986 0.1973 7.04 1.00 8.0e-03
ECOG.phys1 0.3268 0.2391 0.2350 1.87 1.00 1.7e-01
ECOG.phys2-3 1.0598 0.2693 0.2628 15.49 1.00 8.3e-05
frailty(center, dist = "g 7.71 4.92 1.7e-01
exp(coef) exp(-coef) lower .95 upper .95
sexfemale 0.5904 1.6937 0.4001 0.8714
ECOG.phys1 1.3866 0.7212 0.8678 2.2156
ECOG.phys2-3 2.8859 0.3465 1.7023 4.8925
Iterations: 8 outer, 30 Newton-Raphson
Variance of random effect= 0.07332914
Degrees of freedom for terms= 1.0 1.9 4.9
Concordance= 0.674 (se = 0.031 )
Likelihood ratio test= 32.19 on 7.81 df, p=7.379e-05
knitr::kable(extract_RR(fit_final))
estimate | ci_low | ci_high | |
---|---|---|---|
sexfemale | 0.5904203 | 0.2011851 | 0.9796554 |
ECOG.phys1 | 1.3865878 | 0.9179148 | 1.8552608 |
ECOG.phys2-3 | 2.8858958 | 2.3580244 | 3.4137673 |
cox.zph(fit_final)
rho chisq p
sexfemale 0.1074 1.357 0.2441
ECOG.phys1 -0.0522 0.341 0.5594
ECOG.phys2-3 -0.1836 4.165 0.0413
GLOBAL NA 6.115 0.1061
lc %<>% mutate(lp = fit_final$linear.predictors)
lc %>%
mutate(lp_quantile = quant(lp, n.tiles = 3, label = "risk group ")) %>%
survfit(Surv(time / 365, status) ~ lp_quantile, data = .) %>%
autoplot() + theme_minimal() +
labs(x = "Time (years)",
y = "Survival probability") +
ggtitle("Overview of survival (Kaplan-Meier estimate)", "strata by terciles of linear predictor")
lc %>%
survfit(Surv(time / 365, status) ~ sex, data = .) %>%
autoplot() + theme_minimal() +
labs(x = "Time (years)",
y = "Survival probability") +
ggtitle("Overview of survival (Kaplan-Meier estimate)", "strata by sex")
lc %>%
survfit(Surv(time / 365, status) ~ ECOG.phys, data = .) %>%
autoplot() + theme_minimal() +
labs(x = "Time (years)",
y = "Survival probability") +
ggtitle("Overview of survival (Kaplan-Meier estimate)",
"strata by ECOG performance score")
fit_sex <- coxph(Surv(time, status)~ strata(sex) + ECOG.phys + frailty(center, "gauss"), data = lc)
plot(survfit(fit_sex), fun = "cloglog",
main = "c-loglog plot for final model stratified by sex",
col = c("blue", "red"))
legend("topleft", c("male", "female"), col = c("blue", "red"), lty = 1)
fit_ecog <- coxph(Surv(time, status)~ sex + strata(ECOG.phys) + frailty(center, "gauss"), data = lc)
plot(survfit(fit_ecog), fun = "cloglog", col = c("green", "blue", "red"),
main = "c-loglog plot for final model stratified by ECOG performance status")
legend("topleft", legend = c("0", "1", "2-3"), lty = 1, col = c("green", "blue", "red"))
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggfortify_0.4.2 bindrcpp_0.2 survival_2.41-3
[4] broom_0.4.3 epistats_0.1.0 ggplot2_2.2.1
[7] here_0.1 purrr_0.2.4 magrittr_1.5
[10] data.table_1.10.4-3 dplyr_0.7.4
loaded via a namespace (and not attached):
[1] Rcpp_0.12.15 highr_0.6 pillar_1.1.0 compiler_3.4.3
[5] git2r_0.21.0 plyr_1.8.4 bindr_0.1 tools_3.4.3
[9] digest_0.6.15 evaluate_0.10.1 tibble_1.4.2 gtable_0.2.0
[13] nlme_3.1-131 lattice_0.20-35 pkgconfig_2.0.1 rlang_0.1.6
[17] Matrix_1.2-12 psych_1.7.8 cli_1.0.0 parallel_3.4.3
[21] yaml_2.1.16 gridExtra_2.3 stringr_1.2.0 knitr_1.19
[25] rprojroot_1.3-2 grid_3.4.3 glue_1.2.0 R6_2.2.2
[29] foreign_0.8-69 rmarkdown_1.8 reshape2_1.4.3 tidyr_0.8.0
[33] splines_3.4.3 backports_1.1.2 scales_0.5.0 htmltools_0.3.6
[37] mnormt_1.5-5 assertthat_0.2.0 colorspace_1.3-2 labeling_0.3
[41] utf8_1.1.3 stringi_1.1.6 lazyeval_0.2.1 munsell_0.4.3
[45] crayon_1.3.4
This R Markdown site was created with workflowr