# The data frame TNMStage.rda should be loaded # this creates a stacked bar plot based on staging data # the curative patients are grouped together (N0M0) and plotted with all other stages in a stacked bar plot # the plyr library (ddply) is required later to ascertain the numbers (labels) are correctly positioned on the stacks library(plyr) # in the dataframe, first remova NAs and save in a new data frame (TNM): TNM1<-TNMstage[!(is.na(TNMstage$TNM)),] # using the NOT opearator(!), if TNM is NOT NA, then save in new data frame # If there are null values: TNM2<-TNM1[!(TNM1$TNM == ""),] # the same for null values # Reclassify with the grep function to make a nicer graph: TNM2[grep("T1N",TNM2$TNM),"Stage"] <- "T1" # if the TNM variable contains T1N in its value, create a new variable Stage and give this the value T1 TNM2[grep("T1N",TNM2$TNM),"Staging"] <- "Other" # if the TNM variable contains T1N in its value, create a new variable Stageing and give this the value N0M0 TNM2[grep("T1N0M0",TNM2$TNM),"Staging"] <- "N0M0"# if the TNM variable contains T1N0M0 in its value, set the Stageing variable value to N0M0 TNM2[grep("T2N",TNM2$TNM),"Stage"] <- "T2" TNM2[grep("T2N",TNM2$TNM),"Staging"] <- "Other" TNM2[grep("T2N0M0",TNM2$TNM),"Staging"] <- "N0M0" TNM2[grep("T1aN",TNM2$TNM),"Stage"] <- "T1a" TNM2[grep("T1aN",TNM2$TNM),"Staging"] <- "Other" TNM2[grep("T1aN0M0",TNM2$TNM),"Staging"] <- "N0M0" TNM2[grep("T1bN",TNM2$TNM),"Stage"] <- "T1b" TNM2[grep("T1bN",TNM2$TNM),"Staging"] <- "Other" TNM2[grep("T1bN0M0",TNM2$TNM),"Staging"] <- "N0M0" TNM2[grep("T2aN",TNM2$TNM),"Stage"] <- "T2a" TNM2[grep("T2aN",TNM2$TNM),"Staging"] <- "Other" TNM2[grep("T2aN0M0",TNM2$TNM),"Staging"] <- "N0M0" TNM2[grep("T2bN",TNM2$TNM),"Stage"] <- "T2b" TNM2[grep("T2bN",TNM2$TNM),"Staging"] <- "Other" TNM2[grep("T2bN0M0",TNM2$TNM),"Staging"] <- "N0M0" TNM2[grep("Tx",TNM2$TNM),"Stage"] <- "Tx" TNM2[grep("Tx",TNM2$TNM),"Staging"] <- "Other" TNM2[grep("TxN0M0",TNM2$TNM),"Staging"] <- "N0M0" # now group for the plot with the ddply function in the plyr library: TNM3<-ddply(TNM2,c('BS','Stage','Staging'),summarise,Total=sum(Number)) # calculate the cumsum in a new dataframe for the ypos in a stacked bar plot TNM4<-ddply(TNM3, c('BS','Stage'),transform,ypos=cumsum(Total)) # create a stack bar-plot faceted on bonee / soft tissue dev.new() # create a new plot window ggplot(TNM4, aes(x=Stage, y=Total, fill=Staging)) + # use the TNM4 data frame with the variables Stage and Total and the variable Staging as fill group geom_bar(stat='identity',position='stack')+ # create a stacked bar plot facet_grid(~BS,scales ="free",space="free")+ # create facets based on the BS (bone or soft tissue) variable; the axes are free to remove unused categories theme_bw(base_size=16)+ # remove grey scales and set the font size geom_text(aes(y=ypos,label = ifelse(Total>1,Total,"")),vjust=1.1, color="Black", size=5)+ # if the variable Total is <1, no label, else print the label ggtitle(label = 'Classification 2016') + # set a title scale_fill_manual(values=c("green","red"))+ # set the colour order so that 'curative' staging (N0M0) is greaan and 'non curative' staging red xlab(label = 'TNM Classification') + # add an x axis label ylab(label = 'Number') # and a y axis label