# Load all packages library(ggplot2) library(reshape2) library(dplyr) library(ggthemes) # Script generalized into a function mosaicGG <- function(data, X, FILL) { # Proportions in raw data DF <- as.data.frame.matrix(table(data[[X]], data[[FILL]])) DF$groupSum <- rowSums(DF) DF$xmax <- cumsum(DF$groupSum) DF$xmin <- DF$xmax - DF$groupSum DF$X <- row.names(DF) DF$groupSum <- NULL DF_melted <- melt(DF, id = c("X", "xmin", "xmax"), variable.name = "FILL") DF_melted <- DF_melted %>% group_by(X) %>% mutate(ymax = cumsum(value/sum(value)), ymin = ymax - value/sum(value)) # Chi-sq test results <- chisq.test(table(data[[FILL]], data[[X]])) # fill and then x resid <- melt(results$residuals) names(resid) <- c("FILL", "X", "residual") # Merge data DF_all <- merge(DF_melted, resid) # Positions for labels DF_all$xtext <- DF_all$xmin + (DF_all$xmax - DF_all$xmin)/2 index <- DF_all$xmax == max(DF_all$xmax) DF_all$ytext <- DF_all$ymin[index] + (DF_all$ymax[index] - DF_all$ymin[index])/2 # plot: g <- ggplot(DF_all, aes(ymin = ymin, ymax = ymax, xmin = xmin, xmax = xmax, fill = residual), family = 'serif') + geom_rect(col = "white") + geom_text(aes(x = xtext, label = X, col = X),y = 1, size = 5, hjust = 0, vjust = -0.25, family = 'serif') + geom_text(aes(x = max(xmax), y = ytext, label = FILL, col = FILL), size = 5, hjust = -0.25, vjust = 0,family = 'serif') + scale_fill_gradient2("Chi Sq \nResiduals", low = 'red', high = 'blue') + scale_x_continuous("Patients") + scale_y_continuous("Proportion") + theme_tufte() + theme(legend.position = "right") print(g) print(table(data[[FILL]], data[[X]])) print(results) print(resid) }