Differences

This shows you the differences between two versions of the page.

--- r-tutorial [2011/10/12 23:54]
liuyipei created
+++ r-tutorial [2011/10/12 23:55]
liuyipei
@@ Line 1: / Line 1: @@
-test
+<code>
+# R is best used as an interactive environment for statistical analysis.
+# Don't think of it primarily as a compiler/interpreter for scripts!
+# You should be spending most of your time in the REPL (Read-Eval-Print Loop).
+# R as a calculator
+log2(32)
+sqrt(2)
+# R as a graphics tool
+# Define a vector named cars with five values
+cars <- c(1,3,6,4,9)
+# Plot the values in cars with default options
+plot(cars)
+r <- rlnorm(1000)
+h <- hist(r, plot=F, breaks=c(seq(0, max(r)+1, by=0.1)))
+plot(h$counts,
+     log="xy",
+     pch=20,
+     col="blue",
+     main="log-normal distribution",
+     xlab="Value",
+     ylab="Frequency")
+# Primitive data types
+# Numeric - floating point
+# Integers
+# Boolean values - TRUE, FALSE
+# Special values - NA, Inf, -Inf
+x <- 3.14159
+x <- 1 / 0
+y <- TRUE
+!y
+z <- NA
+u <- 2.71828
+v <- "The quick brown fox jumped over the lazy dog"
+# You can compare values using the usual binary infix operators, which return TRUE, FALSE or NA
+x > u
+x == u
+x == z
+# There are also some handy tests you can use to detect special values.
+is.na(x)
+is.na(y)
+is.na(z)
+is.infinite(x)
+is.infinite(y)
+# Compound data - vectors, matrices, lists, data frames,
+# The most basic type of compound data in R is a vector.
+# Vectors of numeric values
+x <- c(1,2,3,4,5,6)
+# Can also have vectors of boolean or string values.
+x >= 3
+y <- c("the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog")
+# You can specify a range using the ':' operator
+x <- 1:6
+# c(...) is a handy function for building vectors from other vectors
+y <- c(1:6)
+z <- c(1:3,c(4:6))
+s <- c(x,y)
+c(x, y, s)
+# Referencing elements of a vector using [...]
+x[1]
+x[2]
+x[3]
+# You can use variables as indices.
+i <- 4
+x[i]
+# You can reference elements of a vector using
+# a vector of indices:
+x[1:3]
+selection <- c(4:6)
+x[selection]
+# The elements of a vector can have symbolic names
+names(x) <- c('a', 'b', 'c', 'd', 'e', 'f')
+# Now you can reference elements of the vector using
+# the symbolic names.  This can be very handy when you
+# have a big vector and you don't want to remember, e.g.
+# the index that corresponds to your gene of interest.
+x['c']
+x[c('a', 'c')]
+# Some special ways to build vectors
+numeric(10)
+character(10)
+rep(NA, 10)
+rep(1, 10)
+rep(1:2, 10)
+seq(from=1,to=10, by=2)
+seq(from=0, to=10, by=0.1)
+# MODIFYING VECTORS
+# Modifying elements of a vector
+x[3] <- NA
+x[5] <- 1/0
+# You can modify multiple elements of a vector using
+# a vector of the the indices of elements you want to modify.
+x[selection] <- 10
+x[selection] <- c(10:12)
+# DOING STUFF TO VECTORS
+# Sometimes, you want to know the indices of the elements of a vector
+# which are NA, or Infinite, or whatever.  You can get a vector of the
+# elements which are na like this:
+x['e'] <- NA
+is.na(x)
+which(is.na(x))
+x[is.na(x)] <- 0
+# Sorting
+sort(x)
+# Order
+order(x)
+x[order(x)]
+# Matching the elements of two vectors to each other
+a <- c('a', 'b', 'c', 'd', 'e', 'f')
+b <- c('d', 'e', 'a')
+match(b, a)
+a[match(b,a)]
+# Some convenient functions for operating on strings and vectors of strings
+z <- paste(y, collapse="_")
+strsplit(z, split="_")
+# Vectorized operations - many operations operate on vectors
+# in an element-wise fashion, returning vectors.
+x <- c(1:6)
+z <- rev(x)
+x[c(3,6)] <- NA
+x + 1
+x + z
+x - z
+x * z
+x > 3
+!(x > 3)
+sum(x)
+sum(x[!is.na(x)])
+sum(x, na.rm=TRUE)
+mean(x)
+mean(x, na.rm=TRUE)
+var(x, na.rm=TRUE)
+# MATRICES - n x m tables
+m <- matrix(0, nrow=2, ncol=2)
+m <- matrix(x, nrow=2)
+m <- matrix(x, nrow=2, byrow=TRUE)
+t(m)
+dim(m)
+m <- rbind(m, c(10:12))
+m <- cbind(m, c(13:15))
+# Referencing elements of a matrix
+m[1,1]
+m[1:3,1:3]
+m <- m[,1:3]
+m[1,]
+m[,1]
+m[ is.na(m) ] <- 0
+my.data <- as.matrix(read.table("http://www.stanford.edu/~kjung/my.data.txt", sep="\t"))
+# The columns of a matrix can have symbolic names
+rownames(my.data)
+colnames(my.data)
+my.data['Shaggy', 'B_4']
+my.data['Shaggy',]
+# LISTS - Generally used like a hash table / associative map,
+# though it is also an ordered list]
+# Making a new lists
+x <- list(a=c(1,2,3), b=c('d', 'e', 'f'), c="foo")
+names(x)
+# Referencing elements of a list
+x[[1]]
+x[['a']]
+x$a
+# Modifying elements of a list
+x$a <- 3.14159
+# unlist()
+# Some R functions return lists, and we want to get vectors - we can
+# convert a list into a vector using unlist(...)
+strsplit("a_b_c", split="_")
+unlist(strsplit("a_b_c", split="_"))
+# FACTORS - R's representation of categorical values.
+f <- factor(rep(c('a', 'b'), 10))
+levels(f)
+# DATA FRAMES
+# Tables with heterogeneous columns.
+pheno.data <- read.table("http://www.stanford.edu/~kjung/pheno.data.txt", sep="\t")
+# Reference elements of a data frame like it was a matrix, or using
+# column names
+pheno.data[1:3,1:3]
+pheno.data$plays.tuba
+# ITERATING OVER DATA
+# for loops
+means <- numeric(nrow(my.data))
+names(means) <- rownames(my.data)
+for (i in 1:nrow(my.data)) {
+  means[i] <- mean(my.data[i,], na.rm=TRUE)
+}
+# apply
+means <- numeric(nrow(my.data))
+means <- apply(my.data, MARGIN=1, FUN=mean, na.rm=TRUE)
+# rowMeans, colMeans
+means <- rowMeans(my.data, na.rm=TRUE)
+# lapply
+# Printing stuff out
+# cat - print something to STDOUT
+cat('a', 'b', '\n')
+cat('a', 'b', '\n', sep="_")
+cat(c(1:4), "\n")
+# sprintf
+u <- 3
+v <- 3.14
+s <- "Hello"
+sprintf("%g %f %s", u, v, s)
+sprintf("%g %.3f %s", u, v, s)
+cat(sprintf("%g %.3f %s\n", u, v, s))
+# Getting stuff into and out of R
+# read.table, write.table
+# We saw these above - read through the help page for these functions to get
+# a feel for the available options.
+# read.table
+# write.table
+# save, load - use these with large data structures as
+save(my.data, file="my.data.RData")
+rm(my.data)
+my.data
+load(file="my.data.RData")
+my.data
+# If you want to save everything in your session, use save.image
+save.image(file="my.image.RData")
+ls()  # Lists all bindings in your session
+rm(list=ls()) # Remove all bindings
+load(file="my.image.RData")
+ls()
+# source - read and execute from a text file containing an R script
+# Debugging - Two main options
+# Sprinkle cat(...) liberally throughout your code to monitor what is going on.
+# debug(), undebug() - Turn on/off step through debugging for a function
+# SIMPLE STATISTICS
+# Some simple statistics - note that many functions that perform statistical tests
+# etc, in R return list objects.
+help(t.test)
+ttest.result <- t.test(my.data['Hairless',] ~ pheno.data$plays.tuba)
+ttest.result
+names(ttest.result)
+ttest.result$statistic
+ttest.result$p.value
+help(cor.test)
+cor.results <- cor.test(my.data['Hedgehog',], my.data['Shaggy',], method="spearman")
+help(chisq.test)
+chisq.results <- chisq.test(pheno.data$rides.bike, pheno.data$plays.tuba)
+chisq.results <- chisq.test(table(pheno.data$rides.bike, pheno.data$plays.tuba))
+help(prcomp)
+prcomp.results <- prcomp(my.data)
+my.data[is.na(my.data)] <- 0
+prcomp.results <- prcomp(my.data)
+</code>

Shah Lab

User Tools

Site Tools

Differences

Page Tools