# the data frame Diag.rda should be loaded # remove the NULL values: Diagnoses3<-Diag[!(Diag$Diagnosis == ""),] # the ! operator means NOT library(plyr) # load this library for the ddply function later # group the data better based on the following rules: # syntax: if the MDT.Diagnosis field contains the string part as defined populate the Diag variable with stated # the | operator means OR # first the bone tumours: Diagnoses3[grep("Osteosarcoma|osteosarcoma",Diagnoses3$Diagnosis),"Diag"] <- "Osteosarcoma" Diagnoses3[grep("Ewing|ewing",Diagnoses3$Diagnosis),"Diag"] <- "Ewing" Diagnoses3[grep("Chondrosarcoma|chondrosarcoma",Diagnoses3$Diagnosis),"Diag"] <- "Chondrosarcoma" Diagnoses3[grep("Giant Cell Tumour|GCT",Diagnoses3$Diagnosis),"Diag"] <- "GCT" Diagnoses3[grep("Chordoma|chordoma",Diagnoses3$Diagnosis),"Diag"] <- "Chordoma" # Pleomorphic bone tumours are classified below under undiff! # and the soft tissue tumours Diagnoses3[grep("Gastrointestinal",Diagnoses3$Diagnosis),"Diag"] <- "GIST" Diagnoses3[grep("DFSP|Dermatofibrosarcoma",Diagnoses3$Diagnosis),"Diag"] <- "DFSP" Diagnoses3[grep("Desmoid",Diagnoses3$Diagnosis),"Diag"] <- "Desmoid" Diagnoses3[grep("Kaposi",Diagnoses3$Diagnosis),"Diag"] <- "Kaposi Sarcoma" Diagnoses3[grep("Undifferentiated",Diagnoses3$Diagnosis),"Diag"] <- "Pleomorphic Sarcoma" Diagnoses3[grep("Leiomyosarcoma",Diagnoses3$Diagnosis),"Diag"] <- "Leiomyosarcoma" Diagnoses3[grep("Angiosarcoma",Diagnoses3$Diagnosis),"Diag"] <- "Angiosarcoma" Diagnoses3[grep("Myxofibro",Diagnoses3$Diagnosis),"Diag"] <- "Myxofibrosarcoma" Diagnoses3[grep("Myxoid liposarcoma|Myxoid Liposarcoma",Diagnoses3$Diagnosis),"Diag"] <- "Myxoid Liposarcoma" Diagnoses3[grep("Synovial|synovial",Diagnoses3$Diagnosis),"Diag"] <- "Synovial Sarcoma" Diagnoses3[grep("Epithelioid|epithelioid",Diagnoses3$Diagnosis),"Diag"] <- "Epithelioid Sarcoma" Diagnoses3[grep("MPNST",Diagnoses3$Diagnosis),"Diag"] <- "MPNST" Diagnoses3[grep("Dermal|dermal|scalp",Diagnoses3$Diagnosis),"Diag"] <- "Dermal Sarcoma" # this is AFTER the undifferentiated call, so undiff dermal are classed as dermal Diagnoses3[grep("Atypical Lipomatous|ALT|WDLLL",Diagnoses3$Diagnosis),"Diag"] <- "ALT/WDLLL" # all others should be Other: Diagnoses3$Diag <- ifelse(is.na(Diagnoses3$Diag), "Other Sarcoma",Diagnoses3$Diag) # if the Diag variable is empty, populate it with 'Other Sarcoma' Diagnoses<-ddply(Diagnoses3,c('Diag','BS'),summarise,Number=sum(Number)) # group the different diagnoses together usings the ddply function in a new data frame # now create the bar plot dev.new() # a new plot window ggplot(Diagnoses, aes(x=Diag, y= Number)) + # use the Diagnoses data frame with the variables Diag and Number geom_bar(stat='identity',colour = 'darkgreen',fill = 'green')+ # create a bar plot with specified colour (outline and fill) facet_grid(~BS)+ # facet the plot by the variable bs (bone or soft tissue tumour theme_bw(base_size=14)+ # remove greay scales and set the font size geom_text(aes(label=Number),hjust= 1.6, color="black", size=3.5) + # add the number in the group to the plot ggtitle(label = 'Diagnoses 2016') + #Êgive a title to the plot scale_x_discrete(limits = c('Osteosarcoma','Chondrosarcoma','Ewing','GCT','Chordoma','Pleomorphic Sarcoma','Leiomyosarcoma','Myxofibrosarcoma','Myxoid Liposarcoma','Synovial Sarcoma','Epithelioid Sarcoma','Angiosarcoma','MPNST','Desmoid','ALT/WDLLL','Other Sarcoma','Dermal Sarcoma','DFSP','GIST','Kaposi Sarcoma'))+ # set the order on the categorcial axis xlab(label = 'Diagnosis') + # an x axis label ylab(label = 'Number')+ # a y axis label coord_flip() # flip the plot to make it more readable