######################################################################## # # John W. Emerson # Department of Statistics # Yale University # # Walton Green # Department of Geology and Geophysics # Yale University # ######################################################################## ######################################################################## ## Copyright (C) 2006 John W. Emerson ## ## This document is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2, or (at your option) ## any later version. ## ## This program is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## A copy of the GNU General Public License is available via WWW at ## http://www.gnu.org/copyleft/gpl.html. You can also obtain it by ## writing to the Free Software Foundation, Inc., 675 Mass Ave, ## Cambridge, MA 02139, USA. ## ## Bug reports to john.emerson@yale.edu ######################################################################## ######################################################################## gpairs <- function(x, outer.margins=list(bottom=unit(2, "lines"), left=unit(2, "lines"), top=unit(2, "lines"), right=unit(2, "lines")), outer.labels=NULL, outer.rot=c(0,0), gap=0.05, buffer=0.02, stat.pars=NULL, scatter.pars=NULL, bwplot.pars=NULL, stripplot.pars=NULL, mosaic.pars=NULL, axis.pars=NULL, diag.pars=NULL, upper.pars=NULL, lower.pars=NULL, whatis=FALSE ) { ######################################## # Sanity checks and initialization here: if (!require(grid)) stop("library(grid) is required and unavailable.\n\n") if (!require(lattice)) stop("library(lattice) is required and unavailable.\n\n") if (!require(vcd)) stop("library(vcd) is required and unavailable.\n\n") if (!is.data.frame(x)) { if (!is.matrix(x)) { stop("You might want to use Excel: what did you give me??") } else { x <- as.data.frame(x) } } if (is.null(outer.labels)) { outer.labels$top <- rep(FALSE, ncol(x)) outer.labels$top[seq(2, ncol(x), by=2)] <- TRUE outer.labels$left <- rep(FALSE, ncol(x)) outer.labels$left[seq(2, ncol(x), by=2)] <- TRUE outer.labels$right <- !outer.labels$left outer.labels$bottom <- !outer.labels$top } else { if (pmatch(as.character(outer.labels), "all", nomatch = FALSE)) { all.labeling <- TRUE } else if (pmatch(as.character(outer.labels), "none", nomatch = FALSE)){ all.labeling <- FALSE } else { stop('argument to outer.labels not understood\n') } outer.labels <- NULL outer.labels$top <- rep(all.labeling, ncol(x)) outer.labels$left <- rep(all.labeling, ncol(x)) outer.labels$bottom <- rep(all.labeling, ncol(x)) outer.labels$right <- rep(all.labeling, ncol(x)) } if (is.null(stat.pars$fontsize)) { stat.pars$fontsize <- 7 } if (is.null(stat.pars$signif)) { stat.pars$signif <- 0.05 } if (is.null(scatter.pars$pch)) { scatter.pars$pch <- 1 } if (is.null(scatter.pars$size)) { scatter.pars$size <- unit(0.25, "char") } if (is.null(scatter.pars$col)) { scatter.pars$col <- "black" } if (is.null(axis.pars$n.ticks)) { axis.pars$n.ticks <- 5 } if (is.null(axis.pars$fontsize)) { axis.pars$fontsize <- 9 } if (is.null(diag.pars$fontsize)) { diag.pars$fontsize <- 9 } if (is.null(diag.pars$show.hist)) { diag.pars$show.hist <- TRUE } if (is.null(diag.pars$hist.color)) { diag.pars$hist.color <- "black" } if (is.null(stripplot.pars$pch)) { stripplot.pars$pch <- 1 } if (is.null(stripplot.pars$size)) { stripplot.pars$size <- unit(0.5, "char") } if (is.null(stripplot.pars$col)) { stripplot.pars$col <- "black" } if (is.null(stripplot.pars$jitter)) { stripplot.pars$jitter <- FALSE } if (is.null(upper.pars$lm.plot)) { upper.pars$lm.plot <- FALSE } if (is.null(upper.pars$ci.plot)) { upper.pars$ci.plot <- FALSE } if (is.null(upper.pars$loess.plot)) { upper.pars$loess.plot <- FALSE } if (is.null(upper.pars$symlm.plot)) { upper.pars$symlm.plot <- FALSE } if (is.null(upper.pars$lm.stats)) { upper.pars$lm.stats <- FALSE } if (is.null(upper.pars$corrgram)) { upper.pars$corrgram <- FALSE } if (is.null(upper.pars$barcode)) { upper.pars$barcode <- TRUE } if (is.null(upper.pars$stripplot)) { upper.pars$stripplot <- FALSE } if (is.null(lower.pars$lm.plot)) { lower.pars$lm.plot <- FALSE } if (is.null(lower.pars$ci.plot)) { lower.pars$ci.plot <- FALSE } if (is.null(lower.pars$loess.plot)) { lower.pars$loess.plot <- FALSE } if (is.null(lower.pars$symlm.plot)) { lower.pars$symlm.plot <- FALSE } if (is.null(lower.pars$lm.stats)) { lower.pars$lm.stats <- FALSE } if (is.null(lower.pars$corrgram)) { lower.pars$corrgram <- FALSE } if (is.null(lower.pars$barcode)) { lower.pars$barcode <- FALSE } if (is.null(lower.pars$stripplot)) { lower.pars$stripplot <- FALSE } lower.pars$qqplot <- 1 - (lower.pars$lm.plot + lower.pars$ci.plot + lower.pars$loess.plot + lower.pars$symlm.plot + lower.pars$lm.stats + lower.pars$corrgram + lower.pars$barcode + lower.pars$stripplot) # Similar options for boxplot-style tiles # Similar options for mosaic-style tiles if (is.null(mosaic.pars$gp_labels)) { mosaic.pars$gp_labels <- gpar(fontsize=9) } #if (is.null(mosaic.pars$rot_labels)) { mosaic.pars$rot_labels <- c(0,90,0,90) } if (axis.pars$n.ticks<3) { axis.pars$n.ticks <- 3 warning("Fewer than 3 axis ticks might cause problems.") } ##################################################################### # Panel functions ################################################### draw.axis <- function(x, y, axis.pars, xpos, ypos, cat.labels=NULL, horiz=NULL) { px <- pretty(x, axis.pars$n.ticks) px <- px[px>min(x, na.rm=TRUE) & pxmin(y, na.rm=TRUE) & py j))){ complete.obs <- nrow(na.omit(cbind(x, y))) missing <- length(x) - complete.obs # This should be smarter to save time... pear.test <- cor.test (x, y, method = 'pearson', alternative ='two.sided') corr <- format(pear.test$estimate, digits = 2) rho.test <- cor.test(x, y, method = 'spearman', alternative = 'two.sided') tau.test <- cor.test(x, y, method = 'kendall', alternative = 'two.sided') rho <- format(rho.test$estimate, digits = 2) tau <- format(tau.test$estimate, digits = 2) xy.lm <- lm(y ~ x) r2 <- format (summary(xy.lm)$r.squared, digits = 2) p <- format (pf(q= as.numeric(summary(xy.lm)$fstatistic)[1], df1 = as.numeric(summary(lm (xy.lm))$fstatistic)[2], df2 = as.numeric(summary(lm (xy.lm))$fstatistic)[3], lower.tail = FALSE), digits = 2) bonfp = stat.pars$signif / (N * (N - 1)) / 2 sig <- 1 sigrho <- NULL sigtau <- NULL sigcor <- NULL sigp <- NULL if(pear.test$p.value < bonfp){ sig <- sig + 1 sigcor <- '*' } if(rho.test$p.value < bonfp){ sig <- sig + 1 sigrho <- '*' } if(tau.test$p.value < bonfp){ sig <- sig + 1 sigtau <- '*' } if(as.numeric(p) < bonfp){ sig <- sig + 1 sigp <- '*' } if(upper.pars$corrgram != TRUE && lower.pars$corrgram != TRUE){ if(mean(as.numeric(rho), as.numeric(tau), as.numeric(corr)) > 0){ text.color <- 'black' if(sig == 1){ box.color <- 0.5 }else if(sig > 1 && sig < 5){ box.color <- 0.75 }else if(sig == 5){ box.color <- 1 } }else if(mean(as.numeric(rho), as.numeric(tau), as.numeric(corr)) < 0){ text.color <- 'white' if(sig == 1){ box.color <- 0.5 }else if(sig > 1 && sig < 5){ box.color <- 0.25 }else if(sig == 5){ box.color <- 0 } } panel.fill(col = grey(box.color), border = grey(box.color)) }else{ zebra.col = 0.5 panel.fill(col = hsv(h = zebra.col, s = abs(as.numeric(corr)), v = 1), border = hsv(h = zebra.col, s = abs(as.numeric(corr)), v = 1)) if(as.numeric(corr) > 0){ grid.lines(x = unit(c(0,1), 'npc'), y = unit(c(0,1), 'npc'), gp = gpar(col = 'white', lwd = 2)) }else{ grid.lines(x = unit(c(0,1), 'npc'), y = unit(c(1,0), 'npc'), gp = gpar(col = 'white', lwd = 2)) } text.color = 'black' popViewport(1) return() } if(upper.pars$lm.stats == 'verbose' || lower.pars$lm.stats == 'verbose'){ grid.text(bquote(rho == .(rho) * .(sigrho)), x = 0.5, y = 0.9, gp=gpar(fontsize=stat.pars$fontsize, col = text.color)) grid.text(bquote(tau == .(tau) * .(sigtau)), x = 0.5, y = 0.7, gp=gpar(fontsize=stat.pars$fontsize, col = text.color)) grid.text(paste ('r=', corr, sigcor, sep = ''), x = 0.5, y = 0.5, gp=gpar(fontsize=stat.pars$fontsize, col = text.color)) #panel.text (x = 0.5, y = 0.3, labels = bquote(r^2 == .(r2)), col = text.color) grid.text(paste ('p=', p, sigp, sep = ''), x = 0.5, y = 0.3, gp=gpar(fontsize=stat.pars$fontsize, col = text.color)) grid.text(paste(missing, 'missing'), x = 0.5, y = 0.1, gp=gpar(fontsize=stat.pars$fontsize, col = 'red')) } else { grid.text(paste ('r=', corr, sigcor, sep = ''), x = 0.5, y = 0.7, gp=gpar(fontsize=stat.pars$fontsize, col = text.color)) grid.text(paste(missing, 'missing'), x = 0.5, y = 0.3, gp=gpar(fontsize=stat.pars$fontsize, col = text.color)) } popViewport(1) return() } if(((upper.pars$ci.plot == TRUE) && (i < j)) || ((lower.pars$ci.plot == TRUE) && (i > j))){ xy.lm <- lm(y ~ x) # the next three lines modified from Maindonald & Braun p117 xy <- data.frame(x = seq(min(x, na.rm=TRUE), max(x, na.rm=TRUE), length.out = 20)) yhat <- predict(xy.lm, newdata = xy, interval = 'confidence') ci <- data.frame(lower = yhat[,'lwr'], upper = yhat[,'upr']) grid.lines(x = c(xy$x), y = c(ci$lower), default.units = 'native') grid.lines(x = c(xy$x), y = c(ci$upper), default.units = 'native') grid.polygon(x = c(xy$x, xy$x[length(xy$x):1]), y = c(ci$lower, ci$upper[length(ci$upper):1]), gp = gpar(fill = 'grey'), default.units = 'native') } if(((upper.pars$lm.plot == TRUE) && (i < j)) || ((lower.pars$lm.plot == TRUE) && (i > j))){ xy.lm <- lm(y ~ x) panel.abline(xy.lm$coef[1], xy.lm$coef[2], col = 'red', lwd=2) } if(((upper.pars$loess.plot == TRUE) && (i < j)) || ((lower.pars$loess.plot == TRUE) && (i > j))){ panel.loess(x, y, col = 'red') } if(((upper.pars$symlm.plot == TRUE) && (i < j)) || ((lower.pars$symlm.plot == TRUE) && (i > j))){ symlm <- function(x, y, driver = prcomp, ...){ pcs <- prcomp(cbind(x,y)) slope <- -(pcs$rotation[1,2] / pcs$rotation[1,1]) coefficients <- c(pcs$center[2] - slope * pcs$center[1], slope) fitted.values <- pcs$x[,1] residuals <- pcs$x[,2] return(list(coefficients = coefficients, residuals = residuals, fitted.values = fitted.values)) } # End of symlm function xy.symlm <- symlm(x,y) panel.abline(xy.symlm$coef[1], xy.symlm$coef[2], col = 'blue') } grid.points(x, y, pch=scatter.pars$pch, size=scatter.pars$size, gp=gpar(col=scatter.pars$col)) popViewport(1) } mosaic.panel <- function(x, y, mosaic.pars, axis.pars, xpos, ypos) { if (!is.null(xpos) & !is.null(ypos)) { strucplot(table(y,x), margins=c(0,0,0,0), newpage=FALSE, pop=FALSE, keep_aspect_ratio=FALSE, shade=mosaic.pars$shade, legend=FALSE, labeling_args=list(tl_labels=c(xpos, !ypos), gp_labels=mosaic.pars$gp_labels, varnames=c(FALSE,FALSE), rot_labels=c(outer.rot, outer.rot))) #mosaic.pars$rot_labels)) } else { if (is.null(xpos) & is.null(ypos)) { strucplot(table(y,x), margins=c(0,0,0,0), shade=mosaic.pars$shade, legend=FALSE, newpage=FALSE, pop=FALSE, keep_aspect_ratio=FALSE, labeling=NULL) } else { if (is.null(xpos)) { strucplot(table(y,x), margins=c(0,0,0,0), newpage=FALSE, pop=FALSE, keep_aspect_ratio=FALSE, shade=mosaic.pars$shade, legend=FALSE, labeling_args=list(labels=c(TRUE,FALSE), tl_labels=c(ypos, FALSE), gp_labels=mosaic.pars$gp_labels, varnames=c(FALSE,FALSE), rot_labels=c(outer.rot, outer.rot))) #rot_labels=mosaic.pars$rot_labels)) } else { strucplot(table(y,x), margins=c(0,0,0,0), newpage=FALSE, pop=FALSE, keep_aspect_ratio=FALSE, shade=mosaic.pars$shade, legend=FALSE, labeling_args=list(labels=c(FALSE,TRUE), tl_labels=c(FALSE, !xpos), gp_labels=mosaic.pars$gp_labels, varnames=c(FALSE,FALSE), rot_labels=c(outer.rot, outer.rot))) #rot_labels=mosaic.pars$rot_labels)) } } } } boxplot.panel <- function(x, y, catcont.pars, axis.pars, xpos, ypos, stripplot, stripplot.pars) { old.color <- trellis.par.get("box.rectangle")$col trellis.par.set(name="box.rectangle", value=list(col="black")) trellis.par.set(name="box.umbrella", value=list(col="black")) trellis.par.set(name="box.dot", value=list(col="black")) trellis.par.set(name="plot.symbol", value=list(col="black")) if (is.factor(x)) { cat.labels <- levels(x) k <- length(levels(x)) cat.var <- as.numeric(x) cont.var <- y horiz <- FALSE } else { cat.labels <- levels(y) k <- length(levels(y)) cat.labels <- cat.labels[k:1] cat.var <- k + 1 - as.numeric(y) cont.var <- x horiz <- TRUE } if (horiz) { xlim <- range(cont.var, na.rm=TRUE) + c(-buffer*(max(cont.var, na.rm=TRUE)-min(cont.var, na.rm=TRUE)), buffer*(max(cont.var, na.rm=TRUE)-min(cont.var, na.rm=TRUE))) pushViewport(viewport(xscale=xlim, yscale=c(0.5,max(cat.var, na.rm=TRUE)+0.5))) if (!stripplot) panel.bwplot(cont.var, cat.var, horizontal=horiz, col="black", pch="|", gp=gpar(box.umbrella=list(col="black"))) else panel.stripplot(cont.var, cat.var, horizontal=horiz, jitter=stripplot.pars$jitter, col=stripplot.pars$col, cex=stripplot.pars$size, pch=stripplot.pars$pch) if (is.null(ypos)) cat.labels <- NULL draw.axis(cont.var, cat.var, axis.pars, xpos, ypos, cat.labels, horiz) } else { ylim <- range(cont.var, na.rm=TRUE) + c(-buffer*(max(cont.var, na.rm=TRUE)-min(cont.var, na.rm=TRUE)), buffer*(max(cont.var, na.rm=TRUE)-min(cont.var, na.rm=TRUE))) pushViewport(viewport(yscale=ylim, xscale=c(0.5,max(cat.var, na.rm=TRUE)+0.5))) if (!stripplot) panel.bwplot(cat.var, cont.var, horizontal=horiz, col="black", pch="|", gp=gpar(box.umbrella=list(col="black"))) else panel.stripplot(cat.var, cont.var, horizontal=horiz, jitter=stripplot.pars$jitter, col=stripplot.pars$col, cex=stripplot.pars$size, pch=stripplot.pars$pch) if (is.null(xpos)) cat.labels <- NULL draw.axis(cat.var, cont.var, axis.pars, xpos, ypos, cat.labels, horiz) } grid.rect(gp=gpar(fill=NULL)) popViewport(1) trellis.par.set(name="box.rectangle", value=list(col=old.color)) trellis.par.set(name="box.umbrella", value=list(col=old.color)) trellis.par.set(name="box.dot", value=list(col=old.color)) trellis.par.set(name="plot.symbol", value=list(col=old.color)) } diag.panel <- function(x, varname, diag.pars, axis.pars, xpos, ypos) { #print(x) x <- x[!is.na(x)] pushViewport(dataViewport(as.numeric(x), as.numeric(x))) if (!diag.pars$show.hist) { grid.rect() grid.text(varname, 0.5, 0.5, gp=gpar(fontsize=diag.pars$fontsize, fontface=2)) } draw.axis(as.numeric(x), as.numeric(x), axis.pars, xpos, ypos, NULL, NULL) popViewport(1) if (diag.pars$show.hist) { if (!is.factor(x)) { pushViewport(viewport(xscale=c(min(as.numeric(x), na.rm=TRUE), max(as.numeric(x), na.rm=TRUE)), yscale=c(0, 100) )) } else { pushViewport(viewport(xscale=c(min(as.numeric(x), na.rm=TRUE)-1, max(as.numeric(x), na.rm=TRUE)+1), yscale=c(0, 100) )) } panel.histogram(as.numeric(x), breaks=NULL, type="percent", col=diag.pars$hist.color) grid.text(varname, 0.5, 0.85, gp=gpar(fontsize=diag.pars$fontsize)) popViewport(1) } } stats.panel <- function(){ } ################################################################### # MAIN ############################################################ grid.newpage() N <- ncol(x) vp.main <- viewport(x=outer.margins$bottom, y=outer.margins$left, w=unit(1, "npc")-outer.margins$right-outer.margins$left, h=unit(1, "npc")-outer.margins$top-outer.margins$bottom, just=c("left", "bottom"), name="main", clip="off") pushViewport(vp.main) tiles <- NULL for (i in 1:N) { for (j in 1:N) { x[is.infinite(x[,i]),i] <- NA x[is.infinite(x[,j]),j] <- NA vp <- viewport(x=(j-1)/N, y=1-i/N, w=1/N, h=1/N, just=c("left", "bottom"), name=as.character(i*N+j)) pushViewport(vp) vp.in <- viewport(x=0.5, y=0.5, w=1-gap, h=1-gap, just=c("center", "center"), name=paste("IN", as.character(i*N+j))) pushViewport(vp.in) xpos <- NULL if (i==1 && outer.labels$top[j] ) { xpos <- FALSE } if (i==N && outer.labels$bottom[j]) { xpos <- TRUE } ypos <- NULL if (j==N && outer.labels$right[i]) { ypos <- FALSE } if (j==1 && outer.labels$left[i]) { ypos <- TRUE } if (i==j) { diag.panel(x[,i], names(x)[i], diag.pars, axis.pars, xpos, ypos) } else { if (is.factor(x[,i]) + is.factor(x[,j]) == 1) { if (ij & !lower.pars$barcode) boxplot.panel(x[,j], x[,i], catcont.pars, axis.pars, xpos, ypos, lower.pars$stripplot, stripplot.pars) if (ij & lower.pars$barcode) { if (is.factor(x[,i])) barcode(split(x[,j], x[,i])[length(levels(x[,i])):1], horizontal=TRUE, labelloc=ypos, axisloc=xpos, labelouter=TRUE, newpage=FALSE, fontsize=axis.pars$fontsize) else { if (!is.null(ypos)) ypos <- !ypos barcode(split(x[,i], x[,j])[length(levels(x[,j])):1], horizontal=FALSE, labelloc=xpos, axisloc=ypos, labelouter=TRUE, newpage=FALSE, fontsize=axis.pars$fontsize) } } } if (is.factor(x[,i]) + is.factor(x[,j]) == 0) { if ( (i