Stop Thinking, Just Do!

Sungsoo Kim's Blog

R Graphics

tagsTags

26 September 2015


R Graphics

R is a high-level programming language used primarily for statistical computing and graphics.

Base Graphics Package

library(ggplot2)

# Base Graphics package
help(graphics)
demo(graphics, package='graphics')
plot(diamonds$color) 

colors <- table(diamonds$color)
pie(colors, labels=names(colors))
dotchart(colors)

boxplot(which="plot", diamonds$carat)
plot(diamonds$color, diamonds$carat)
plot(diamonds$carat, diamonds$color)

hist(diamonds$carat)
stripchart(diamonds$carat)

spineplot(diamonds$carat, diamonds$color)
pairs(data=diamonds, carat ~ price)

# Three examples to compare with other packages
plot(x=diamonds$cut)  # bar
plot(x=diamonds$color, y=diamonds$price) # box
plot(x=diamonds$x, y=diamonds$y, cex=.3) # scatter

Lattice Graphics Package

library(ggplot2) # Using this for the diamonds dataset

library(lattice)
demo('lattice',package='lattice')

barchart(diamonds$color)
xyplot(price ~ carat, data = diamonds)

#Comparing ggplot2 and lattice
#http://learnr.wordpress.com/2009/06/28/ggplot2-version-of-figures-in-lattice-multivariate-data-visualization-with-r-part-1/

# Compare lattice and plot from base grapics

plot(iris$Sepal.Length, iris$Sepal.Width, cex=0.5)

# plot(x, y)
# xyplot(y ~ x, data = dataset)

# Lattice uses xyplot y ~ x
# The dataframe is split out, so only the columns need to be identified.

xyplot(Sepal.Width ~ Sepal.Length, iris)

# Lattice is better at dealing with additional variables.

# At times, analyzing 2 variables is insufficient.
# Interaction with a third variable might affect the relationship.

# This is possible but gets a bit cumbersome in base graphics.


par(mfrow=c(2,2))
s<-subset(iris, Species=='virginica')
plot(s$Sepal.Length, s$Sepal.Width, cex=0.5)
title(main = "virginica")

# skip a cell.
plot.new()

s<-subset(iris, Species=='setosa')
plot(s$Sepal.Length, s$Sepal.Width, cex=0.5)
title(main = "setosa")
s<-subset(iris, Species=='versicolor')
plot(s$Sepal.Length, s$Sepal.Width, cex=0.5)
title(main = "versicolor")

# Base graphics often requires a number of short function calls
# lattice charts can often be created with a single long call

xyplot(Sepal.Width ~ Sepal.Length | Species, iris)

# y~x | A means display the relationship between variables y and x
# separately for every combination of factor A

ggplot2 Package

demo(package='ggplot2') #None

# Comparing base R plots and ggplot2

# Bar Chart
plot(x=diamonds$color) 
ggplot(diamonds, aes(x=color) ) + geom_bar()

# Box Chart
plot(x=diamonds$color, y=diamonds$price)
ggplot(diamonds, aes(x=color, y=price)) + geom_boxplot()

# scatter plot
plot(x=diamonds$x, y=diamonds$y, cex=.3)
ggplot(diamonds, aes(x=x, y=y)) + geom_point()


# ggvis
library(ggvis)
library(ggplot2)
ggplot(diamonds, aes(x=x, y=y))  +  geom_point()
ggvis (diamonds,      ~x,   ~y) %>% layer_points()
ggvis (diamonds,      ~x,   ~y) %>% layer_points(size := input_slider(10, 300, value = 10))


# We start with a bar chart that indicates a count of
# the number of times each color appears in the diamonds
# data set.  You might want to make a mental note, color G has the most
# and color J the least.
ggplot(diamonds, aes(x=color) ) + geom_bar()

# The data in this example is the diamonds dataset, specifically
# a count of occurences of each value in the color field.  Data
# is one aspect of the grammar of graphics.  The count of each
# value in color is represented by the height of the bar in the 
# bar chart.  This is called an "aesthetic mapping" in ggplot2.
# We can map this data in many different ways.  For instance, in
# addition to making the size of the bar dependent upon the count
# for each value, we could indicate that - rather than dark
# grey - each bar should be its own color.  We can accomplish this
# by mapping diamond color to the fill aesthetic.

ggplot(diamonds, aes(x=color, fill=color) ) + geom_bar()

# Now each bar is its own color, and a legend is included on the side.
# One of the less obvious grammatical elements in this math is a
# coordinate system.  We are using traditional cartesian coordinate
# system where a two dimensional plain includes a horizontal axis 
# and A vertical axis.  But other coordinate systems do exist and
# are made evident when considering charts represented by the grammar
# of graphics.  So let's say we wanted to simply stack all of these
# bars into one single bar.  We can do this by assigning a single
# value to the x-axis.

ggplot(diamonds, aes(x='', fill=color) ) + geom_bar(width = 1)

# This type of chart might not seem very familiar in a cartesian
# coordinate system.  But if we represent the same graph with polar
# coordinates:
ggplot(diamonds, aes(x='', fill=color) ) + geom_bar(width = 1) + coord_polar()

# Now rather than bars, the count of each color is represented by the
# width of the ring.  By modifying the call to use polar coordinates
# to divide the circle by making the count represent the width of an
# angle emminating from the center of the circle...
ggplot(diamonds, aes(x='', fill=color) ) + geom_bar(width = 1) + coord_polar(theta="y")

# We see a familar pie chart.  

comments powered by Disqus