SCM

SCM Repository

[directlabels] View of /tex/2012-semin-r/2012-03-29-HOCKING-directlabels-semin-r.Rnw
ViewVC logotype

View of /tex/2012-semin-r/2012-03-29-HOCKING-directlabels-semin-r.Rnw

Parent Directory Parent Directory | Revision Log Revision Log


Revision 636 - (download) (annotate)
Thu Oct 18 07:17:22 2012 UTC (4 years, 8 months ago) by tdhock
File size: 10696 byte(s)
new methods todo
\documentclass[10pt]{beamer}

\newcommand{\RR}{\mathbb R}

\begin{document}

\title{Recent developments in direct labeled graphics\\
  {\small\url{http://directlabels.r-forge.r-project.org}}}
\author{Toby Dylan Hocking \\ toby.hocking@inria.fr}
\date{29 March 2012}

\frame{\titlepage}

\SweaveOpts{dev='tikz',fig.width=9,fig.height=4.5,out.width='\\linewidth',cache=TRUE,
  tidy=FALSE,
fig.path='figure/graphics-',cache.path='cache/graphics-',fig.align='center'}

\frame{\tableofcontents}
\section{Motivation: confusing legends}

<<echo=FALSE,results='hide',cache=FALSE>>=
  suppressPackageStartupMessages({
    options(width=60)
    library(grid)
    big.last <- list(cex=1.5,"last.qp")
    library(directlabels)
    library(lattice)
    library(ggplot2)
    library(proto)
    library(quadprog)
    data(prostate,package="ElemStatLearn")
    pros <- subset(prostate,select=-train,train==TRUE)
    ycol <- which(names(pros)=="lpsa")
    x <- as.matrix(pros[-ycol])
    y <- pros[[ycol]]
    library(lars)
    fit <- lars(x,y,type="lasso")
    beta <- scale(coef(fit),FALSE,1/fit$normx)
    arclength <- rowSums(abs(beta))
    library(reshape2)
    path <- data.frame(melt(beta),arclength)
    names(path)[1:3] <- c("step","variable","standardized.coef")
  })
@ 


<<echo=FALSE,results='hide'>>=
set.seed(1)
loci <- data.frame(score=c(rbeta(800,10,10),rbeta(100,0.15,1),rbeta(100,1,0.25)),
  type=factor(c(rep("Neutral",800),rep("Positive",100),rep("Balancing",100)))) 
head(loci)
@ 
  

\begin{frame}[fragile]
  \frametitle{Problem 1: legend inconsistent with data}
<<dens-confusing>>=
library(lattice)
dens <- densityplot(~score,loci,groups=type,
  auto.key=list(space="top",columns=3),n=500,
  main="Distribution of scores by selection type")
print(dens)
@   
\end{frame}


\begin{frame}[fragile]
  \frametitle{Problem 2: too many classes render legend unreadable}
<<rat-unreadable>>=
data(BodyWeight,package="nlme")
library(ggplot2)
ratplot <- ggplot(BodyWeight,aes(Time,weight,colour=Rat))+
  facet_grid(.~Diet)+
  geom_line()
print(ratplot)
@   
\end{frame}


\frame{\tableofcontents}
\section{How to add direct labels to some common plots}
\begin{frame}\frametitle{The protocol I use for everyday plots in practice:}
  Do as many steps as needed until the plot is readable:
  \begin{enumerate}
  \item Make a lattice or ggplot2 plot \texttt{p} using colors and
    default legends.
  \item Try the default direct labels: \texttt{direct.label(p)}.
  \item Check to see if another Positioning Method
    exists on\\{\small \url{http://directlabels.r-forge.r-project.org/docs/index.html}}
    then use \texttt{direct.label(p,"method")}.
  \item If no Positioning Methods exist you can always write your own.
  \end{enumerate}
\end{frame}
% 1---- examples: works well by default

% http://directlabels.r-forge.r-project.org/docs/lineplot/plots/sexdeaths.html

\begin{frame}[fragile]
  \frametitle{Add default direct labels at the mode of each density}
<<dens>>=
library(directlabels)
direct.label(dens)
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{With 2 groups, we label the min and max points}
<<echo=FALSE,results='hide'>>=
tx <- time(mdeaths)
Time <- ISOdate(floor(tx),round(tx%%1 * 12)+1,1,0,0,0)
sexdf <- function(sex,x)data.frame(Time,sex,deaths=as.integer(x))
uk.lung <- rbind(sexdf("male",mdeaths),sexdf("female",fdeaths))
@ 

<<lines2>>=
p <- xyplot(deaths~Time,uk.lung,
            groups=sex,type=c("l","g"))
direct.label(p)
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{Label a scatterplot of the iris data by species}
<<irisp>>=
set.seed(1)
irisp <- xyplot(jitter(Sepal.Length)~jitter(Petal.Length),
                iris,groups=Species)
direct.label(irisp)
@   
\end{frame}






\begin{frame}[fragile]
  \frametitle{Default direct labels for lineplots}
<<ratplot>>=
direct.label(ratplot)
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{Look up the Positioning Method on the directlabels website}
<<ratplot-last-qp>>=
direct.label(ratplot,"last.qp")
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{Construct your own custom Positioning Method}
<<big-last>>=
rp2 <- ratplot+
  xlim(0,70)+ylim(150,650)
big.last <- list(cex=1.5,"last.qp")
direct.label(rp2,"big.last") 
@   
\end{frame}



\begin{frame}
  \frametitle{Direct label the LASSO path to visualize variables}
<<lasso>>=
lasso.plot <- ggplot(path,aes(arclength,standardized.coef,colour=variable))+
  geom_line(aes(group=variable))+
  opts(title="LASSO path for prostate cancer data calculated using the LARS")+
  xlim(0,20)
direct.label(lasso.plot)  

@ 
\end{frame}

\begin{frame}
  \frametitle{Label the zero point to emphasize variable
    importance}
<<lasso-labels>>=
direct.label(lasso.plot,"lasso.labels")

@ 
\end{frame}


\begin{frame}
  \frametitle{Increase text size to make reading easier}
<<lasso-labels-big>>=
direct.label(lasso.plot,list(cex=2,"lasso.labels"))

@ 
\end{frame}




 


% 3---- examples: ... writing custom methods?

% scatterplot: make numbers bigger
% http://directlabels.r-forge.r-project.org/docs/scatterplot/plots/cylinders.html

% ridge regression lineplot: space out using dl.trans
% http://directlabels.r-forge.r-project.org/docs/lineplot/plots/ridge.html

% lasso plot: add lasso.labels as well!
% http://directlabels.r-forge.r-project.org/docs/lineplot/plots/lars.html

\frame{\tableofcontents}
\section{Recent developments in direct labeling}

\begin{frame}
   \frametitle{Changes in recent versions of directlabels}
   \begin{tabular}{rcccc}
directlabels version  & $<2.0$ &$<2.0$& $\geq 2.0$ & $\geq 2.0$\\
  plotting package & lattice & ggplot2 & lattice & ggplot2\\
     \hline
     basic
     Positioning Methods & \checkmark & \checkmark & \checkmark & \checkmark \\
     \hline
     smart Positioning Methods\\
    that avoid label collisions & \checkmark &  & \checkmark & \checkmark \\
    \hline
    redraw labels\\
     after window resize  &  &  & \checkmark & \checkmark \\
     \hline
      \texttt{fontface} and \texttt{fontfamily}\\
  text parameters   & \checkmark &  & \checkmark & \checkmark \\
     \hline
label black and white plots   & \checkmark &  & \checkmark & \checkmark \\
      \hline
      label aesthetics\\
  other than color   &  &  &  & \checkmark \\
   \end{tabular}
\end{frame}


\begin{frame}[fragile]
  \frametitle{Label a scatterplot of the iris data by species}
<<irisp2,results='hide'>>=
direct.label(irisp) 
@   
\end{frame}


\begin{frame}[fragile]
  \frametitle{Show the grid for the search}
  TODO: animation with grey and black boxes, as in thesis.
<<irisp-debug,results='hide'>>=
direct.label(irisp,debug=TRUE) 
@   
\end{frame}





 
\begin{frame}[fragile]
  \frametitle{Increase text size \alert{before} calculating label positions}
<<big-last>>
\end{frame}

\begin{frame}[fragile]
  \frametitle{Show the label borders used in the position calculation}
<<big-last-boxes>>=
direct.label(rp2,
  list("big.last",
       "calc.boxes",
       "draw.rects"))
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{Label positions for lineplots are the solutions of a QP}
  \begin{minipage}{1.1in}
<<qp-fig,background='white',echo=FALSE,fig.width=2,fig.height=2,out.width='1in',out.height='1in'>>=
par(mar=c(0,0,0,3))
y <- c(1,2,5,6)
x <- rep(0,length(y))
h <- c(1,0.5,1,0.8)
ymin <- 0.15
ymax <- 0.35
plot(c(ymin,ymax),c(0,7),type="n")
rect(ymin,y-h/2,ymax,y+h/2,col="grey90")
i <- c(1,2,"k-1","k")
axis(4,y,sprintf("$t_{%s}$",i),las=2)
axis(4,3.5,"$\\vdots$",las=2,tick=FALSE)
arrows(0.2,y-h/2,0.2,y+h/2,0.1,code=3)
text(0.25,y,sprintf("$h_{%s}$",i),adj=c(0,0.5))
text(0.25,3.5,"$\\vdots$",adj=c(0,0.5))
@   
\end{minipage}
\begin{minipage}{3in}
    Assume that for each text label
    $i=1,\dots,k$ 
    we have its 
    position $t_i$ and 
    height $h_i$.

    Then optimal direct labels do not overlap, and are as close as
  possible to the target locations:
\begin{equation*}
\begin{aligned}
&\min_{b\in\RR^k} && \sum_{i=1}^k (b_i-t_i)^2
=||b-t||^2
\\
&\text{subject to} && b_{i+1}- h_{i+1}\geq b_i/2+h_i/2,
\ \forall\ i=1,\dots,k-1\\
&&& b_1 - h_1/2 \geq \underbar L\\
&&& b_K +h_k/2 \leq \overbar L
\end{aligned}
\end{equation*}
This is a quadratic program (QP). QPs are convex so there is a
unique global minimum which corresponds to the best labels. 

We can solve this using \texttt{quadprog::solve.QP()} and use the
optimal $b$ for the direct label positions.
\end{minipage}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Start with boxes at the end of each line}
<<tall-boxes1>>=
direct.label(rp2,list("last.points",cex=1.5,
                      "calc.boxes", 
                      "draw.rects"))
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{Adjust box height if desired}
<<tall-boxes2>>=
direct.label(rp2,list("last.points",cex=1.5,"calc.boxes",
                      dl.trans(h=h+h/3),"calc.borders",
                      "draw.rects"))
@   
\end{frame}

\begin{frame}[fragile]
  \frametitle{Apply QP solver to get optimal labels}
<<tall-boxes3>>=
direct.label(rp2,list("last.points",cex=1.5,"calc.boxes",
                      dl.trans(h=h+h/3),"calc.borders",
                      qp.labels("y","h")))
@   
\end{frame}





\frame{\tableofcontents}
\section{Conclusions}

\begin{frame}[fragile]
  \frametitle{What directlabels is NOT}

  \begin{itemize}
  \item Automatic publication-quality direct labels.
    \\ (some manual tweaking will always be necessary)
  \item Optimal labels for individual points on scatterplots.
    \\ (it is a bit more complicated)
  \end{itemize}
<<pointLabel,background='white',echo=FALSE,results='hide'>>=
source("pointLabel.R")
# Examples:
set.seed(100)
n = 50
x = rnorm(n)*10
y = rnorm(n)*10
plot(x,y, col="red",pch=20)
pointLabel(x, y, as.character(round(x,5)), offset = 0, 
           allowSmallOverlap = FALSE,cex=0.7, trace = TRUE)

@       
\end{frame}

\begin{frame}[fragile]
  \frametitle{Use directlabels instead of confusing legends!}
  \begin{itemize}
    \item Works with lattice and ggplot2.
    \item Sensible defaults.
    \item Useful in everyday plots in practice.
    \item Smart Positioning Methods that avoid label collisions.
    \item Customizable: you can write your own Positioning Methods.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Future work}
    \begin{itemize}
    \item Automatically adjust xlim/ylim so labels stay on plot region?
    \item Contourplot labels as in \texttt{contour()}?
    \item Label using images instead of textual factor names?\\
      Possible Google Summer of Code 2012 project:\\
      {\scriptsize \url{http://rwiki.sciviews.org/doku.php?id=developers:projects:gsoc2012}}

    \end{itemize}
    \includegraphics[width=2in]{iris-images}
\end{frame}

\end{document}

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge