# This example shows how to create a survival curve from data that have dates.
# Example data: survivaldates.rda
# The DataFrame is called survivaldates
# The first variable is Number (Integer)
# The second variable is the date of diagnosis: DODiag (Date)
# The third variable is the date of failure: DODeath (Date), if NA not failed
# Function by Paul Cool 2015
# First convert the data to a format, so that survival analysis can be performed
# Required is a Follow Up variable (FU) and a Censor (Yes/No)
# If DODeath is NA, teh censor is NULL, otherwise 1:
survivaldates$Censor<-survivaldates$DODeath
y<-which(is.na(survivaldates$Censor)==TRUE) # The numbers that are NA
z<-which(is.na(survivaldates$Censor)==FALSE) # The numbers that are not NA (Failed / Died)
survivaldates$Censor<-as.numeric(0) # Create a numeric variable and set o in all
survivaldates$Censor[z]<-1 # 1 in the numbers that have failed / died
survivaldates$Censor<-as.logical(survivaldates$Censor) # Convert to logical
# The Follow Up is the DODeath (Failure) minus the DODiag if the patients has died
# Otherwise, the Follow Up is Today - DODiag
today<-format(Sys.Date(),"%Y-%m-%d") # today's date
survivaldates$Last<-as.Date(today) # Set all to today's date
survivaldates$Last[z]<-survivaldates$DODeath[z] # Date Last Seen is DODeath if died
# Now calculate the FU
survivaldates$FU<-difftime(survivaldates$Last,survivaldates$DODiag,units="days")/365.25
# Now the survival curve can be created with the prodlim package:
library("prodlim")
kmsurvival<-prodlim(Hist(as.numeric(FU),Censor)~1,data=survivaldates)
plot(kmsurvival,percent=FALSE,axes=TRUE,axis1.at=seq(0,kmsurvival$maxtime+1,1),axis1.lab=seq(0,kmsurvival$maxtime+1,1),marktime=TRUE,legend=TRUE,legend.x=0,legend.y=0.75,legend.cex=1,xlab="years",atrisk=TRUE,confint=TRUE,confint.citype="shadow",col=c(2,3,4,5))
title(main="Kaplan-Meier Survival")