Plotting survival curves after multiple imputation

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

Plotting survival curves after multiple imputation

Robert Long
I am working with some survival data with missing values.

I am using the mice package to do multiple imputation.

I have found code in this thread which handles pooling of the MI results:
https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html

Now I would like to plot a survival curve using the pooled results.

Here is a reproducible example:

require(survival)
require(mice)

set.seed(2)

dt <- colon

fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon)

dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40))
plot(survfit(fit, newdata=dummy) )

# now create some missing values in the data
dt <- colon
dt$rx[sample(1:nrow(dt),50)] <- NA
dt$sex [sample(1:nrow(dt),50)] <- NA
dt$age[sample(1:nrow(dt),50)] <- NA

imp <-mice(dt)

fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp)
# Note, this function is defined below...

imputed=summary.impute(pool.impute(fit.imp))
print(imputed)

# now, how to plot a survival curve with the pooled results ?




########## begin code from linked thread above

coxph.mids <- function (formula, data, ...) {

     call <- match.call()
     if (!is.mids(data)) stop("The data must have class mids")

     analyses <- as.list(1:data$m)

     for (i in 1:data$m) {
       data.i        <- complete(data, i)
       analyses[[i]] <- coxph(formula, data = data.i, ...)
     }

     object <- list(call = call, call1 = data$call,
                    nmis = data$nmis, analyses = analyses)

     return(object)
}

pool.impute <- function (object, method = "smallsample") {

     if ((m <- length(object$analyses)) < 2)
       stop("At least two imputations are needed for pooling.\n")

     analyses <- object$analyses

     k     <- length(coef(analyses[[1]]))
     names <- names(coef(analyses[[1]]))
     qhat  <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names))
     u     <- array(NA, dim = c(m, k, k),
                    dimnames = list(1:m, names, names))

     for (i in 1:m) {
       fit       <- analyses[[i]]
       qhat[i, ] <- coef(fit)
       u[i, , ]  <- vcov(fit)
     }

     qbar <- apply(qhat, 2, mean)
     ubar <- apply(u, c(2, 3), mean)
     e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
     b <- (t(e) %*% e)/(m - 1)
     t <- ubar + (1 + 1/m) * b
     r <- (1 + 1/m) * diag(b/ubar)
     f <- (1 + 1/m) * diag(b/t)
     df <- (m - 1) * (1 + 1/r)^2

     if (method == "smallsample") {

       if( any( class(fit) == "coxph" ) ){

         ### this loop is the hack for survival analysis ###

         status   <- fit$y[ , 2]
         n.events <- sum(status == max(status))
         p        <- length( coefficients( fit )  )
         dfc      <- n.events - p

       } else {

         dfc <- fit$df.residual
       }

       df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
     }

     names(r) <- names(df) <- names(f) <- names
     fit <- list(call = call, call1 = object$call, call2 = object$call1,
                 nmis = object$nmis, m = m, qhat = qhat, u = u,
                 qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df,
                 f = f)

     return(fit)
}

summary.impute <- function(object){

      if (!is.null(object$call1)){
        cat("Call: ")
        dput(object$call1)
      }

      est  <- object$qbar
      se   <- sqrt(diag(object$t))
      tval <- est/se
      df   <- object$df
      pval <- 2 * pt(abs(tval), df, lower.tail = FALSE)

      coefmat <- cbind(est, se, tval, pval)
      colnames(coefmat) <- c("Estimate", "Std. Error",
                                           "t value", "Pr(>|t|)")

      cat("\nCoefficients:\n")
      printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T )

      cat("\nFraction of information about the coefficients
                                      missing due to nonresponse:","\n")
      print(object$f)

      ans <- list( coefficients=coefmat, df=df,
                   call=object$call1, fracinfo.miss=object$f )
      invisible( ans )
  }

### end code from linked thread above

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Reply | Threaded
Open this post in threaded view
|

Re: Plotting survival curves after multiple imputation

Robert Long
Can anyone help with this ?

On 14/02/2013 14:07, Robert Long wrote:

> I am working with some survival data with missing values.
>
> I am using the mice package to do multiple imputation.
>
> I have found code in this thread which handles pooling of the MI results:
> https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html
>
> Now I would like to plot a survival curve using the pooled results.
>
> Here is a reproducible example:
>
> require(survival)
> require(mice)
>
> set.seed(2)
>
> dt <- colon
>
> fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon)
>
> dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40))
> plot(survfit(fit, newdata=dummy) )
>
> # now create some missing values in the data
> dt <- colon
> dt$rx[sample(1:nrow(dt),50)] <- NA
> dt$sex [sample(1:nrow(dt),50)] <- NA
> dt$age[sample(1:nrow(dt),50)] <- NA
>
> imp <-mice(dt)
>
> fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp)
> # Note, this function is defined below...
>
> imputed=summary.impute(pool.impute(fit.imp))
> print(imputed)
>
> # now, how to plot a survival curve with the pooled results ?
>
>
>
>
> ########## begin code from linked thread above
>
> coxph.mids <- function (formula, data, ...) {
>
>       call <- match.call()
>       if (!is.mids(data)) stop("The data must have class mids")
>
>       analyses <- as.list(1:data$m)
>
>       for (i in 1:data$m) {
>         data.i        <- complete(data, i)
>         analyses[[i]] <- coxph(formula, data = data.i, ...)
>       }
>
>       object <- list(call = call, call1 = data$call,
>                      nmis = data$nmis, analyses = analyses)
>
>       return(object)
> }
>
> pool.impute <- function (object, method = "smallsample") {
>
>       if ((m <- length(object$analyses)) < 2)
>         stop("At least two imputations are needed for pooling.\n")
>
>       analyses <- object$analyses
>
>       k     <- length(coef(analyses[[1]]))
>       names <- names(coef(analyses[[1]]))
>       qhat  <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names))
>       u     <- array(NA, dim = c(m, k, k),
>                      dimnames = list(1:m, names, names))
>
>       for (i in 1:m) {
>         fit       <- analyses[[i]]
>         qhat[i, ] <- coef(fit)
>         u[i, , ]  <- vcov(fit)
>       }
>
>       qbar <- apply(qhat, 2, mean)
>       ubar <- apply(u, c(2, 3), mean)
>       e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
>       b <- (t(e) %*% e)/(m - 1)
>       t <- ubar + (1 + 1/m) * b
>       r <- (1 + 1/m) * diag(b/ubar)
>       f <- (1 + 1/m) * diag(b/t)
>       df <- (m - 1) * (1 + 1/r)^2
>
>       if (method == "smallsample") {
>
>         if( any( class(fit) == "coxph" ) ){
>
>           ### this loop is the hack for survival analysis ###
>
>           status   <- fit$y[ , 2]
>           n.events <- sum(status == max(status))
>           p        <- length( coefficients( fit )  )
>           dfc      <- n.events - p
>
>         } else {
>
>           dfc <- fit$df.residual
>         }
>
>         df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
>       }
>
>       names(r) <- names(df) <- names(f) <- names
>       fit <- list(call = call, call1 = object$call, call2 = object$call1,
>                   nmis = object$nmis, m = m, qhat = qhat, u = u,
>                   qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df,
>                   f = f)
>
>       return(fit)
> }
>
> summary.impute <- function(object){
>
>        if (!is.null(object$call1)){
>          cat("Call: ")
>          dput(object$call1)
>        }
>
>        est  <- object$qbar
>        se   <- sqrt(diag(object$t))
>        tval <- est/se
>        df   <- object$df
>        pval <- 2 * pt(abs(tval), df, lower.tail = FALSE)
>
>        coefmat <- cbind(est, se, tval, pval)
>        colnames(coefmat) <- c("Estimate", "Std. Error",
>                                             "t value", "Pr(>|t|)")
>
>        cat("\nCoefficients:\n")
>        printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T )
>
>        cat("\nFraction of information about the coefficients
>                                        missing due to nonresponse:","\n")
>        print(object$f)
>
>        ans <- list( coefficients=coefmat, df=df,
>                     call=object$call1, fracinfo.miss=object$f )
>        invisible( ans )
>    }
>
> ### end code from linked thread above
>
> ______________________________________________
> [hidden email] mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Reply | Threaded
Open this post in threaded view
|

Re: Plotting survival curves after multiple imputation

Frank Harrell
In reply to this post by Robert Long
I haven't seen anyone solve this.  I think it would be reasonable to do a time point by time point averaging (over multiple imputations) of the underlying survival curve, although there is some question about whether to "freeze" the centering constant (sum of beta times covariate mean).  What will be harder to get is confidence intervals for predicted probabilities after multiple imputation.  I hope someone will take up the challenge.
Frank
Robert Long wrote
I am working with some survival data with missing values.

I am using the mice package to do multiple imputation.

I have found code in this thread which handles pooling of the MI results:
https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html

Now I would like to plot a survival curve using the pooled results.

Here is a reproducible example:

require(survival)
require(mice)

set.seed(2)

dt <- colon

fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon)

dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40))
plot(survfit(fit, newdata=dummy) )

# now create some missing values in the data
dt <- colon
dt$rx[sample(1:nrow(dt),50)] <- NA
dt$sex [sample(1:nrow(dt),50)] <- NA
dt$age[sample(1:nrow(dt),50)] <- NA

imp <-mice(dt)

fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp)
# Note, this function is defined below...

imputed=summary.impute(pool.impute(fit.imp))
print(imputed)

# now, how to plot a survival curve with the pooled results ?




########## begin code from linked thread above

coxph.mids <- function (formula, data, ...) {

     call <- match.call()
     if (!is.mids(data)) stop("The data must have class mids")

     analyses <- as.list(1:data$m)

     for (i in 1:data$m) {
       data.i        <- complete(data, i)
       analyses[[i]] <- coxph(formula, data = data.i, ...)
     }

     object <- list(call = call, call1 = data$call,
                    nmis = data$nmis, analyses = analyses)

     return(object)
}

pool.impute <- function (object, method = "smallsample") {

     if ((m <- length(object$analyses)) < 2)
       stop("At least two imputations are needed for pooling.\n")

     analyses <- object$analyses

     k     <- length(coef(analyses[[1]]))
     names <- names(coef(analyses[[1]]))
     qhat  <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names))
     u     <- array(NA, dim = c(m, k, k),
                    dimnames = list(1:m, names, names))

     for (i in 1:m) {
       fit       <- analyses[[i]]
       qhat[i, ] <- coef(fit)
       u[i, , ]  <- vcov(fit)
     }

     qbar <- apply(qhat, 2, mean)
     ubar <- apply(u, c(2, 3), mean)
     e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
     b <- (t(e) %*% e)/(m - 1)
     t <- ubar + (1 + 1/m) * b
     r <- (1 + 1/m) * diag(b/ubar)
     f <- (1 + 1/m) * diag(b/t)
     df <- (m - 1) * (1 + 1/r)^2

     if (method == "smallsample") {

       if( any( class(fit) == "coxph" ) ){

         ### this loop is the hack for survival analysis ###

         status   <- fit$y[ , 2]
         n.events <- sum(status == max(status))
         p        <- length( coefficients( fit )  )
         dfc      <- n.events - p

       } else {

         dfc <- fit$df.residual
       }

       df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
     }

     names(r) <- names(df) <- names(f) <- names
     fit <- list(call = call, call1 = object$call, call2 = object$call1,
                 nmis = object$nmis, m = m, qhat = qhat, u = u,
                 qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df,
                 f = f)

     return(fit)
}

summary.impute <- function(object){

      if (!is.null(object$call1)){
        cat("Call: ")
        dput(object$call1)
      }

      est  <- object$qbar
      se   <- sqrt(diag(object$t))
      tval <- est/se
      df   <- object$df
      pval <- 2 * pt(abs(tval), df, lower.tail = FALSE)

      coefmat <- cbind(est, se, tval, pval)
      colnames(coefmat) <- c("Estimate", "Std. Error",
                                           "t value", "Pr(>|t|)")

      cat("\nCoefficients:\n")
      printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T )

      cat("\nFraction of information about the coefficients
                                      missing due to nonresponse:","\n")
      print(object$f)

      ans <- list( coefficients=coefmat, df=df,
                   call=object$call1, fracinfo.miss=object$f )
      invisible( ans )
  }

### end code from linked thread above

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Frank Harrell
Department of Biostatistics, Vanderbilt University