# We'll use the Old Faithful data that comes with R # Type just "faithful" and hit enter to see the raw data faithful # To get information on this data, see the help file ?faithful # Notice this is a "data frame" in R, which just means a table of data with # named columns. To retrieve a single column, use the "$" operator: faithful$eruptions faithful$waiting # The "summary" command in R will give you some basic statistics summary(faithful) # The usual sample statistics are also commands: mean(faithful$eruptions) # mean median(faithful$eruptions) # median var(faithful$eruptions) # variance sd(faithful$eruptions) # standard deviation # We can look at joint statistics such as sample covariance and correlation also cov(faithful$waiting, faithful$eruptions) cor(faithful$waiting, faithful$eruptions) # As a first visualization, we could look at the histogram hist(faithful$eruptions, main="Histogram of Old Faithful Eruption Time") # The empirical cdf is plotted using the "ecdf" command plot(ecdf(faithful$eruptions), cex.points=0.5, main="Empirical CDF for Old Faithful Eruption Time") # Here's how to do box plots: boxplot(faithful$eruptions, main="Box Plot of Old Faithful Eruption Time") # Scatter plots are useful for looking at two random variables, X, Y, and their relationships plot(faithful$eruptions, faithful$waiting, main="Scatter Plot of Old Faithful Waiting vs Eruption Times") # This example is taken from Wikipedia. It is a box plot of the Michelson-Morley # speed of light experiments morley$Expt <- factor(morley$Expt) par(las=1, mar=c(5.1, 5.1, 2.1, 2.1)) boxplot(Speed ~ Expt, morley, xlab = "Experiment No.", ylab="Speed of light (km/s minus 299,000)") abline(h=792.458, col="red") text(3,792.458,"true\nspeed") # Another nice example of a box plot is at the bottom of the help page for # boxplot (type "?boxplot")