# Stop Thinking, Just Do!

Sung-Soo Kim's Blog

# R Graphics

R is a high-level programming language used primarily for statistical computing and graphics.

## Base Graphics Package

library(ggplot2)

# Base Graphics package
help(graphics)
demo(graphics, package='graphics')
plot(diamonds$color) colors <- table(diamonds$color)
pie(colors, labels=names(colors))
dotchart(colors)

boxplot(which="plot", diamonds$carat) plot(diamonds$color, diamonds$carat) plot(diamonds$carat, diamonds$color) hist(diamonds$carat)
stripchart(diamonds$carat) spineplot(diamonds$carat, diamonds$color) pairs(data=diamonds, carat ~ price) # Three examples to compare with other packages plot(x=diamonds$cut)  # bar
plot(x=diamonds$color, y=diamonds$price) # box
plot(x=diamonds$x, y=diamonds$y, cex=.3) # scatter


## Lattice Graphics Package

library(ggplot2) # Using this for the diamonds dataset

library(lattice)
demo('lattice',package='lattice')

barchart(diamonds$color) xyplot(price ~ carat, data = diamonds) #Comparing ggplot2 and lattice #http://learnr.wordpress.com/2009/06/28/ggplot2-version-of-figures-in-lattice-multivariate-data-visualization-with-r-part-1/ # Compare lattice and plot from base grapics plot(iris$Sepal.Length, iris$Sepal.Width, cex=0.5) # plot(x, y) # xyplot(y ~ x, data = dataset) # Lattice uses xyplot y ~ x # The dataframe is split out, so only the columns need to be identified. xyplot(Sepal.Width ~ Sepal.Length, iris) # Lattice is better at dealing with additional variables. # At times, analyzing 2 variables is insufficient. # Interaction with a third variable might affect the relationship. # This is possible but gets a bit cumbersome in base graphics. par(mfrow=c(2,2)) s<-subset(iris, Species=='virginica') plot(s$Sepal.Length, s$Sepal.Width, cex=0.5) title(main = "virginica") # skip a cell. plot.new() s<-subset(iris, Species=='setosa') plot(s$Sepal.Length, s$Sepal.Width, cex=0.5) title(main = "setosa") s<-subset(iris, Species=='versicolor') plot(s$Sepal.Length, s$Sepal.Width, cex=0.5) title(main = "versicolor") # Base graphics often requires a number of short function calls # lattice charts can often be created with a single long call xyplot(Sepal.Width ~ Sepal.Length | Species, iris) # y~x | A means display the relationship between variables y and x # separately for every combination of factor A  ## ggplot2 Package demo(package='ggplot2') #None # Comparing base R plots and ggplot2 # Bar Chart plot(x=diamonds$color)
ggplot(diamonds, aes(x=color) ) + geom_bar()

# Box Chart
plot(x=diamonds$color, y=diamonds$price)
ggplot(diamonds, aes(x=color, y=price)) + geom_boxplot()

# scatter plot
plot(x=diamonds$x, y=diamonds$y, cex=.3)
ggplot(diamonds, aes(x=x, y=y)) + geom_point()

# ggvis
library(ggvis)
library(ggplot2)
ggplot(diamonds, aes(x=x, y=y))  +  geom_point()
ggvis (diamonds,      ~x,   ~y) %>% layer_points()
ggvis (diamonds,      ~x,   ~y) %>% layer_points(size := input_slider(10, 300, value = 10))

# We start with a bar chart that indicates a count of
# the number of times each color appears in the diamonds
# data set.  You might want to make a mental note, color G has the most
# and color J the least.
ggplot(diamonds, aes(x=color) ) + geom_bar()

# The data in this example is the diamonds dataset, specifically
# a count of occurences of each value in the color field.  Data
# is one aspect of the grammar of graphics.  The count of each
# value in color is represented by the height of the bar in the
# bar chart.  This is called an "aesthetic mapping" in ggplot2.
# We can map this data in many different ways.  For instance, in
# addition to making the size of the bar dependent upon the count
# for each value, we could indicate that - rather than dark
# grey - each bar should be its own color.  We can accomplish this
# by mapping diamond color to the fill aesthetic.

ggplot(diamonds, aes(x=color, fill=color) ) + geom_bar()

# Now each bar is its own color, and a legend is included on the side.
# One of the less obvious grammatical elements in this math is a
# coordinate system.  We are using traditional cartesian coordinate
# system where a two dimensional plain includes a horizontal axis
# and A vertical axis.  But other coordinate systems do exist and
# are made evident when considering charts represented by the grammar
# of graphics.  So let's say we wanted to simply stack all of these
# bars into one single bar.  We can do this by assigning a single
# value to the x-axis.

ggplot(diamonds, aes(x='', fill=color) ) + geom_bar(width = 1)

# This type of chart might not seem very familiar in a cartesian
# coordinate system.  But if we represent the same graph with polar
# coordinates:
ggplot(diamonds, aes(x='', fill=color) ) + geom_bar(width = 1) + coord_polar()

# Now rather than bars, the count of each color is represented by the
# width of the ring.  By modifying the call to use polar coordinates
# to divide the circle by making the count represent the width of an
# angle emminating from the center of the circle...
ggplot(diamonds, aes(x='', fill=color) ) + geom_bar(width = 1) + coord_polar(theta="y")

# We see a familar pie chart.