# This example shows how to create a survival curve from data that have dates. # Example data: survivaldates.rda # The DataFrame is called survivaldates # The first variable is Number (Integer) # The second variable is the date of diagnosis: DODiag (Date) # The third variable is the date of failure: DODeath (Date), if NA not failed # Function by Paul Cool 2015 # First convert the data to a format, so that survival analysis can be performed # Required is a Follow Up variable (FU) and a Censor (Yes/No) # If DODeath is NA, teh censor is NULL, otherwise 1: survivaldates$Censor<-survivaldates$DODeath y<-which(is.na(survivaldates$Censor)==TRUE) # The numbers that are NA z<-which(is.na(survivaldates$Censor)==FALSE) # The numbers that are not NA (Failed / Died) survivaldates$Censor<-as.numeric(0) # Create a numeric variable and set o in all survivaldates$Censor[z]<-1 # 1 in the numbers that have failed / died survivaldates$Censor<-as.logical(survivaldates$Censor) # Convert to logical # The Follow Up is the DODeath (Failure) minus the DODiag if the patients has died # Otherwise, the Follow Up is Today - DODiag today<-format(Sys.Date(),"%Y-%m-%d") # today's date survivaldates$Last<-as.Date(today) # Set all to today's date survivaldates$Last[z]<-survivaldates$DODeath[z] # Date Last Seen is DODeath if died # Now calculate the FU survivaldates$FU<-difftime(survivaldates$Last,survivaldates$DODiag,units="days")/365.25 # Now the survival curve can be created with the prodlim package: library("prodlim") kmsurvival<-prodlim(Hist(as.numeric(FU),Censor)~1,data=survivaldates) plot(kmsurvival,percent=FALSE,axes=TRUE,axis1.at=seq(0,kmsurvival$maxtime+1,1),axis1.lab=seq(0,kmsurvival$maxtime+1,1),marktime=TRUE,legend=TRUE,legend.x=0,legend.y=0.75,legend.cex=1,xlab="years",atrisk=TRUE,confint=TRUE,confint.citype="shadow",col=c(2,3,4,5)) title(main="Kaplan-Meier Survival")