#---------------------------------------------------- # ----- SHIFTING BOXPLOT'S CODE ------------------ #--------------------------------------------------- # sbplot function library for shifting boxplot. # # Author: T. Siva Tian sbplot <- function(x, ...) UseMethod("sbplot") sbplot.default <- function (x, ..., names, box.ratio = 1, plot.median = TRUE, plot.mean = TRUE, bca.iter=2000, pch = 15, at = NULL, pos.offset = 0, ci.method = 'independent', add = FALSE, cex = 1, col = 'black', frac = 1) { # Default plotting function # # Args: # x: The input data. User could input a vector or multiple vectors. # names: The names of the input data. # box.ratio: The parameter that controls the width of the boxes. # plot.median: A boolean variable that determines whether the median dot # and the CI should be plotted. # plot.mean: A boolean variable that determines whether the mean line # and the CI should be plotted. # bca.iter: The iteration used in calculating the bootstraped CI. # pch: The parameter that controls the shape of the median dot. # at: Controls where to plot the ticks on the x-axis. # pos.offset: Offset parameter that controls the offset to the position # specified by the "at" parameter. # ci.method: Whether to treat groups of data as independent or dependent. # Not currently used. # add: A boolean variable that determines the plot should be added on an # existing plot. # cex: Controls the size of the median dot. # col: The color of boxes. # frac: The parameter that controls the width of the lines that represent # outliers. # # Returns: # shifting boxplot. args <- list(x, ...) namedargs <- if (!is.null(attributes(args)$names)) attributes(args)$names != "" else rep(FALSE, length.out = length(args)) # data by groups groups <- if (is.list(x)) x else args[!namedargs] # Error handling if (0L == (n <- length(groups))) stop("invalid first argument") if (length(class(groups))) groups <- unclass(groups) # Specify the names of the groups if (!missing(names)) attr(groups, "names") <- names else { if (is.null(attr(groups, "names"))) attr(groups, "names") <- 1L:n names <- attr(groups, "names") } # Specify the color of the boxes for different groups if (length(groups) > 1) { color <- rep(col, length.out = length(groups)) } else { color <- col[1] } # load the required library require(bootstrap, quietly=T) # get rid of NA values for (i in 1:length(groups)) { ok <- !is.na(groups[[i]]) groups[[i]] <- groups[[i]][ok] } y.unique <- 1:length(groups) # Specify the plotting options plotopts <- list(type='n', bty='n', xaxt='n') if (!'xlim' %in% attr(args[namedargs], 'name')) plotopts <- c(plotopts, list(xlim=c(0, length(y.unique) + 1))) if (!'ylim' %in% attr(args[namedargs], 'name')) plotopts <- c(plotopts, list(ylim=range(groups))) if (!'xlab' %in% attr(args[namedargs], 'name')) plotopts <- c(plotopts, list(xlab='')) if (!'ylab' %in% attr(args[namedargs], 'name')) plotopts <- c(plotopts, list(ylab='')) if (!add) do.call("plot", c(list(0, 0), args[namedargs], plotopts)) # Plot the ticks on the x-axis if (!missing(at)) y.unique <- at if (length(y.unique) > 1) { axis(1, at = y.unique, labels = names) } width <- box.ratio/(1 + box.ratio) w <- width/2 box.size <- c(1/2, 1/2, 2/2, 2/2, 3/2, 3/2) # Plot the boxplot for each group of data for (jj in 1:length(y.unique)) { # sort the data for preparing calculating the CI's X <- sort(groups[[jj]]) Y <- y.unique[jj] + pos.offset if (!length(X)) next theta <- function(d) {mean(d)} theta2 <- function(d) {median(d)} #non-parametric BCa results <- bcanon(X,bca.iter,theta) results2 <- bcanon(X,bca.iter,theta2) # limits for halves first_half <- mean(X[X < mean(X)]) third_half <- mean(X[X > mean(X)]) # limits for the 2 SD. The function "scale" does the same job as "scores" # in the "oultiers" library lim_inf <- min(which((scale(X))>= (-2))) lim_sup <- max(which((scale(X))<= (+2))) upper_SD <- ((scale(X)[lim_sup])*sd(X))+mean(X) lower_SD <- ((scale(X)[lim_inf])*sd(X))+mean(X) bc1 <- results$confpoints[1, 2] bc2 <- results$confpoints[8, 2] bc21 <- results2$confpoints[1, 2] bc22 <- results2$confpoints[8, 2] q <- c(upper_SD, lower_SD, first_half, third_half, bc1, bc2) # Plot the boxes for the (upper_SD, lower_SD), (first_half, third_half), and # (bc1, bc2) pairs for (ii in 1:3) { ind <- (ii-1)*2+1 #((ii-1)*2+1):(ii*2) lineopts <- c(list(lty=1, lwd=1.5, border=color[jj])) do.call("rect", c(list(Y - w * box.size[ind], q[ind], Y + w * box.size[ind], q[ind+1]), lineopts)) } # Plot the median dot if plot.median = TRUE if (plot.median) { lineopts <- c(list(lty=1, lwd=1, col=color[jj])) do.call("segments", c(list(Y, bc21, Y, bc22), lineopts)) median.value <- list(x = Y, y = median(X)) box.dot.par <- c(list(pch = pch, cex = cex, col = color[jj])) do.call("points", c(median.value, box.dot.par)) } # Plot the mean line if plot.mean = TRUE if (plot.mean) { mean.value <- mean(X) lineopts <- c(list(lty=1, lwd=2.5, col=color[jj])) do.call("segments", c(list(Y - w * box.size[5], mean.value, Y + w * box.size[5], mean.value), lineopts)) } # Plot the outliers, if any ii <- which(X < lower_SD | X > upper_SD) if (sum(ii)) { lineopts <- c(list(lty=1, lwd=1, col=color[jj])) do.call("segments", c(list(Y - frac * w * box.size[1], X[ii], Y + frac * w * box.size[1], X[ii]), lineopts)) } } } sbplot.formula <- function (formula, data = NULL, ..., na.action = NULL) { # Plot shifting boxplot. Handles formula input. # # Args: # formula: The formula to be plotted. # data: A data frame containing the data. # na.action: How to handle NA values in the data. # # Returns: # A shifting boxplot. # Error handling if (missing(formula) || (length(formula) != 3L)) stop("'formula' missing or incorrect") m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m$... <- NULL m$na.action <- na.action m[[1L]] <- as.name("model.frame") mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") sbplot(split(mf[[response]], mf[-response]), ...) } # -------------------------------------------------------------------------- # Below are modified functions for traditional boxplot. Removed the lines at # the upper and right position. boxplot1 <- function (x, ..., range = 1.5, width = NULL, varwidth = FALSE, notch = FALSE, outline = TRUE, names, plot = TRUE, border = par("fg"), col = NULL, log = "", pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5), horizontal = FALSE, add = FALSE, at = NULL) { args <- list(x, ...) namedargs <- if (!is.null(attributes(args)$names)) attributes(args)$names != "" else rep(FALSE, length.out = length(args)) groups <- if (is.list(x)) x else args[!namedargs] if (0L == (n <- length(groups))) stop("invalid first argument") if (length(class(groups))) groups <- unclass(groups) if (!missing(names)) attr(groups, "names") <- names else { if (is.null(attr(groups, "names"))) attr(groups, "names") <- 1L:n names <- attr(groups, "names") } cls <- sapply(groups, function(x) class(x)[1L]) cl <- if (all(cls == cls[1L])) cls[1L] else NULL for (i in 1L:n) groups[i] <- list(boxplot.stats(unclass(groups[[i]]), range)) stats <- matrix(0, nrow = 5L, ncol = n) conf <- matrix(0, nrow = 2L, ncol = n) ng <- out <- group <- numeric(0L) ct <- 1 for (i in groups) { stats[, ct] <- i$stats conf[, ct] <- i$conf ng <- c(ng, i$n) if ((lo <- length(i$out))) { out <- c(out, i$out) group <- c(group, rep.int(ct, lo)) } ct <- ct + 1 } if (length(cl) && cl != "numeric") oldClass(stats) <- cl z <- list(stats = stats, n = ng, conf = conf, out = out, group = group, names = names) if (plot) { if (is.null(pars$boxfill) && is.null(args$boxfill)) pars$boxfill <- col do.call("bxp1", c(list(z, notch = notch, width = width, varwidth = varwidth, log = log, border = border, pars = pars, outline = outline, horizontal = horizontal, add = add, at = at), args[namedargs])) print(args[namedargs]) invisible(z) } else z } boxplot.formula1 <- function (formula, data = NULL, ..., subset, na.action = NULL) { if (missing(formula) || (length(formula) != 3L)) stop("'formula' missing or incorrect") m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m$... <- NULL m$na.action <- na.action require(stats, quietly = TRUE) m[[1L]] <- as.name("model.frame") mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") boxplot1(split(mf[[response]], mf[-response]), ...) } bxp1 <- function (z, notch = FALSE, width = NULL, varwidth = FALSE, outline = TRUE, notch.frac = 0.5, log = "", border = par("fg"), pars = NULL, frame.plot = axes, horizontal = FALSE, add = FALSE, at = NULL, show.names = NULL, ...) { pars <- c(list(...), pars) pars <- pars[unique(names(pars))] bplt <- function(x, wid, stats, out, conf, notch, xlog, i) { ok <- TRUE if (!any(is.na(stats))) { xP <- if (xlog) function(x, w) x * exp(w) else function(x, w) x + w wid <- wid/2 if (notch) { ok <- stats[2L] <= conf[1L] && conf[2L] <= stats[4L] xx <- xP(x, wid * c(-1, 1, 1, notch.frac, 1, 1, -1, -1, -notch.frac, -1)) yy <- c(stats[c(2, 2)], conf[1L], stats[3L], conf[2L], stats[c(4, 4)], conf[2L], stats[3L], conf[1L]) } else { xx <- xP(x, wid * c(-1, 1, 1, -1)) yy <- stats[c(2, 2, 4, 4)] } if (!notch) notch.frac <- 1 wntch <- notch.frac * wid xypolygon(xx, yy, lty = "blank", col = boxfill[i]) xysegments(xP(x, -wntch), stats[3L], xP(x, +wntch), stats[3L], lty = medlty[i], lwd = medlwd[i], col = medcol[i], lend = 1) xypoints(x, stats[3L], pch = medpch[i], cex = medcex[i], col = medcol[i], bg = medbg[i]) xysegments(rep.int(x, 2), stats[c(1, 5)], rep.int(x, 2), stats[c(2, 4)], lty = whisklty[i], lwd = whisklwd[i], col = whiskcol[i]) xysegments(rep.int(xP(x, -wid * staplewex[i]), 2), stats[c(1, 5)], rep.int(xP(x, +wid * staplewex[i]), 2), stats[c(1, 5)], lty = staplelty[i], lwd = staplelwd[i], col = staplecol[i]) xypolygon(xx, yy, lty = boxlty[i], lwd = boxlwd[i], border = boxcol[i]) if ((nout <- length(out))) { xysegments(rep(x - wid * outwex, nout), out, rep(x + wid * outwex, nout), out, lty = outlty[i], lwd = outlwd[i], col = outcol[i]) xypoints(rep.int(x, nout), out, pch = outpch[i], cex = outcex[i], col = outcol[i], bg = outbg[i]) } if (any(inf <- !is.finite(out))) { warning(sprintf(ngettext(length(unique(out[inf])), "Outlier (%s) in boxplot %d is not drawn", "Outliers (%s) in boxplot %d are not drawn"), paste(unique(out[inf]), collapse = ", "), x), domain = NA) } } return(ok) } if (!is.list(z) || 0L == (n <- length(z$n))) stop("invalid first argument") if (is.null(at)) at <- 1L:n else if (length(at) != n) stop("'at' must have same length as 'z$n', i.e. ", n) if (is.null(z$out)) z$out <- numeric() if (is.null(z$group) || !outline) z$group <- integer() if (is.null(pars$ylim)) ylim <- range(z$stats[is.finite(z$stats)], if (outline) z$out[is.finite(z$out)], if (notch) z$conf[is.finite(z$conf)]) else { ylim <- pars$ylim pars$ylim <- NULL } if (is.null(pars$xlim)) xlim <- c(0.5, n + 0.5) else { xlim <- pars$xlim pars$xlim <- NULL } if (length(border) == 0L) border <- par("fg") if (!add) { plot.new() if (horizontal) plot.window(ylim = xlim, xlim = ylim, log = log, bty = 'l', xaxs = pars$yaxs) else plot.window(xlim = xlim, ylim = ylim, log = log, bty = 'l', yaxs = pars$yaxs) } xlog <- (par("ylog") && horizontal) || (par("xlog") && !horizontal) pcycle <- function(p, def1, def2 = NULL) rep(if (length(p)) p else if (length(def1)) def1 else def2, length.out = n) p <- function(sym) pars[[sym, exact = TRUE]] boxlty <- pcycle(pars$boxlty, p("lty"), par("lty")) boxlwd <- pcycle(pars$boxlwd, p("lwd"), par("lwd")) boxcol <- pcycle(pars$boxcol, border) boxfill <- pcycle(pars$boxfill, par("bg")) boxwex <- pcycle(pars$boxwex, 0.8 * { if (n <= 1) 1 else stats::quantile(diff(sort(if (xlog) log(at) else at)), 0.1) }) medlty <- pcycle(pars$medlty, p("lty"), par("lty")) medlwd <- pcycle(pars$medlwd, 3 * p("lwd"), 3 * par("lwd")) medpch <- pcycle(pars$medpch, NA_integer_) medcex <- pcycle(pars$medcex, p("cex"), par("cex")) medcol <- pcycle(pars$medcol, border) medbg <- pcycle(pars$medbg, p("bg"), par("bg")) whisklty <- pcycle(pars$whisklty, p("lty"), "dashed") whisklwd <- pcycle(pars$whisklwd, p("lwd"), par("lwd")) whiskcol <- pcycle(pars$whiskcol, border) staplelty <- pcycle(pars$staplelty, p("lty"), par("lty")) staplelwd <- pcycle(pars$staplelwd, p("lwd"), par("lwd")) staplecol <- pcycle(pars$staplecol, border) staplewex <- pcycle(pars$staplewex, 0.5) outlty <- pcycle(pars$outlty, "blank") outlwd <- pcycle(pars$outlwd, p("lwd"), par("lwd")) outpch <- pcycle(pars$outpch, p("pch"), par("pch")) outcex <- pcycle(pars$outcex, p("cex"), par("cex")) outcol <- pcycle(pars$outcol, border) outbg <- pcycle(pars$outbg, p("bg"), par("bg")) outwex <- pcycle(pars$outwex, 0.5) width <- if (!is.null(width)) { if (length(width) != n | any(is.na(width)) | any(width <= 0)) stop("invalid boxplot widths") boxwex * width/max(width) } else if (varwidth) boxwex * sqrt(z$n/max(z$n)) else if (n == 1) 0.5 * boxwex else rep.int(boxwex, n) if (horizontal) { xypoints <- function(x, y, ...) points(y, x, ...) xypolygon <- function(x, y, ...) polygon(y, x, ...) xysegments <- function(x0, y0, x1, y1, ...) segments(y0, x0, y1, x1, ...) } else { xypoints <- points xypolygon <- polygon xysegments <- segments } ok <- TRUE for (i in 1L:n) ok <- ok & bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z$group == i], conf = z$conf[, i], notch = notch, xlog = xlog, i = i) if (!ok) warning("some notches went outside hinges ('box'): maybe set notch=FALSE") axes <- is.null(pars$axes) if (!axes) { axes <- pars$axes pars$axes <- NULL } if (axes) { ax.pars <- pars[names(pars) %in% c("xaxt", "yaxt", "xaxp", "yaxp", "las", "cex.axis", "col.axis", "format")] if (is.null(show.names)) show.names <- n > 1 if (show.names) do.call("axis", c(list(side = 1 + horizontal, at = at, labels = z$names), ax.pars)) do.call("Axis", c(list(x = z$stats, side = 2 - horizontal), ax.pars)) } do.call("title", pars[names(pars) %in% c("main", "cex.main", "col.main", "sub", "cex.sub", "col.sub", "xlab", "ylab", "cex.lab", "col.lab")]) if (frame.plot) box(bty='l') invisible(at) } #---------------------------------------------------- # ----- FIGURE 1 IN THE PAPER ------------------ #--------------------------------------------------- set.seed(2) require(gamlss.dist) D = rexGAUS(500, 300, 50, 300) C = runif(1000, 1000, 4000) F = c(rnorm(250, 700, 100), rnorm(250, 1300, 100)) # 2 plots in 2 rows and 3 columns par(mfrow=c(2,3)) # typeface set to Serif (or Times) par(family="serif") boxplot(D, main="Traditional boxplot"); rug(D, side=2) boxplot(D, notch=T, main="Boxplot with notches"); rug(D, side=2) require(robustbase) adjbox(D, main="Adjusted boxplot"); rug(D, side=2) require(Hmisc) bpplot(D, name=c('')); rug(D, side=2) require(vioplot) vioplot(D, col="white");rug(D, side=2) require(lvplot) LVboxplot(D, horizontal=FALSE, ylab="", main="Letter Value Boxplot"); rug(D, side=2) #---------------------------------------------------- # ----- FIGURE 2 IN THE PAPER ------------------ #--------------------------------------------------- set.seed(100) require(gamlss.dist) D = rnorm(50, 300, 50) C = runif(50, 200, 400) F = c(rnorm(25, 200, 30), rnorm(25, 400, 30)) # 2 plots in 2 rows and 2 columns par(mfrow=c(3,2)) # typeface set to Serif (or Times) par(family="serif") # ----- pair 1 require(graphics) boxplot(D, main=""); stripchart(D, add=TRUE, method="jitter", vertical=T, col="grey") require(vioplot) vioplot(D, col="white"); stripchart(D, add=TRUE, method="jitter", vertical=T, col="grey") #------ pair 2 boxplot(C, main=""); stripchart(C, add=TRUE, method="jitter", vertical=T, col="grey") vioplot(C, col="white"); stripchart(C, add=TRUE, method="jitter", vertical=T, col="grey") #----- pair 3 boxplot(F, main=""); stripchart(F, add=TRUE, method="jitter", vertical=T, col="grey") vioplot(F, col="white"); stripchart(F, add=TRUE, method="jitter", vertical=T, col="grey") #---------------------------------------------------- # ----- FIGURE 3 IN THE PAPER ------------------ #--------------------------------------------------- set.seed(4) a = rnorm(15, 300, 50) b = rnorm(30, 300, 50) c = rnorm(100, 300, 50) require(gamlss.dist) d = rexGAUS(15, 300, 50, 300) e = rexGAUS(30, 300, 50, 300) f = rexGAUS(100, 300, 50, 300) # 2 plots in 2 rows and 3 columns par(mfrow=c(2,3)) # typeface set to Serif (or Times) par(family="serif") sbplot(a, main="n=15"); rug(a, side = 2) sbplot(b, main="n=30"); rug(b, side=2) sbplot(c, main="n=100"); rug(c, side=2) sbplot(d, main="n=15", plot.median=F); rug(d, side=2) sbplot(e, main="n=30", plot.median=F); rug(e, side=2) sbplot(f, main="n=100", plot.median=F); rug(f, side=2) #---------------------------------------------------- # ----- AN EXAMPLE OF THE SB PRESENTING DATA -------- #--------------------------------------------------- require(gplots) #for smartlegend data(ToothGrowth) attach(ToothGrowth) names(ToothGrowth) # typeface set to Serif (or Times) par(family="serif") sbplot(len ~ dose, data = ToothGrowth, main="Guinea Pigs' Tooth Growth", xlab="Vitamin C dose mg", at = 1:3, box.ratio = .2, pos.offset = -0.2, ylab="tooth length", ylim=c(0,35), subset= supp == "VC", col = 'black') # this one produces the second subset sbplot(len ~ dose, data = ToothGrowth, add = TRUE, pos.offset = 0.2, at = 1:3, box.ratio = .2, subset= supp == "OJ", col = 'red') # finally a legend smartlegend(x="left",y="top", inset = 0, c("Ascorbic acid", "Orange juice"), fill = c("black", "red"))