This shows you the differences between two versions of the page.
Next revision Both sides next revision | |||
r-tutorial [2011/10/12 23:54] liuyipei created |
r-tutorial [2011/10/12 23:55] liuyipei |
||
---|---|---|---|
Line 1: | Line 1: | ||
- | test | + | < |
+ | |||
+ | # R is best used as an interactive environment for statistical analysis. | ||
+ | # Don't think of it primarily as a compiler/ | ||
+ | # You should be spending most of your time in the REPL (Read-Eval-Print Loop). | ||
+ | |||
+ | # R as a calculator | ||
+ | log2(32) | ||
+ | sqrt(2) | ||
+ | |||
+ | # R as a graphics tool | ||
+ | # Define a vector named cars with five values | ||
+ | cars <- c(1, | ||
+ | |||
+ | # Plot the values in cars with default options | ||
+ | plot(cars) | ||
+ | |||
+ | r <- rlnorm(1000) | ||
+ | h <- hist(r, plot=F, breaks=c(seq(0, | ||
+ | plot(h$counts, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | |||
+ | # Primitive data types | ||
+ | # Numeric - floating point | ||
+ | # Integers | ||
+ | # Boolean values - TRUE, FALSE | ||
+ | # Special values - NA, Inf, -Inf | ||
+ | x <- 3.14159 | ||
+ | x <- 1 / 0 | ||
+ | y <- TRUE | ||
+ | !y | ||
+ | z <- NA | ||
+ | u <- 2.71828 | ||
+ | v <- "The quick brown fox jumped over the lazy dog" | ||
+ | |||
+ | # You can compare values using the usual binary infix operators, which return TRUE, FALSE or NA | ||
+ | x > u | ||
+ | x == u | ||
+ | x == z | ||
+ | |||
+ | # There are also some handy tests you can use to detect special values. | ||
+ | is.na(x) | ||
+ | is.na(y) | ||
+ | is.na(z) | ||
+ | is.infinite(x) | ||
+ | is.infinite(y) | ||
+ | |||
+ | # Compound data - vectors, matrices, lists, data frames, | ||
+ | |||
+ | # The most basic type of compound data in R is a vector. | ||
+ | # Vectors of numeric values | ||
+ | x <- c(1, | ||
+ | |||
+ | # Can also have vectors of boolean or string values. | ||
+ | x >= 3 | ||
+ | y <- c(" | ||
+ | |||
+ | # You can specify a range using the ':' | ||
+ | x <- 1:6 | ||
+ | |||
+ | # c(...) is a handy function for building vectors from other vectors | ||
+ | y <- c(1:6) | ||
+ | z <- c(1: | ||
+ | s <- c(x,y) | ||
+ | c(x, y, s) | ||
+ | |||
+ | # Referencing elements of a vector using [...] | ||
+ | x[1] | ||
+ | x[2] | ||
+ | x[3] | ||
+ | |||
+ | # You can use variables as indices. | ||
+ | i <- 4 | ||
+ | x[i] | ||
+ | |||
+ | # You can reference elements of a vector using | ||
+ | # a vector of indices: | ||
+ | x[1:3] | ||
+ | selection <- c(4:6) | ||
+ | x[selection] | ||
+ | |||
+ | # The elements of a vector can have symbolic names | ||
+ | names(x) <- c(' | ||
+ | |||
+ | # Now you can reference elements of the vector using | ||
+ | # the symbolic names. | ||
+ | # have a big vector and you don't want to remember, e.g. | ||
+ | # the index that corresponds to your gene of interest. | ||
+ | x[' | ||
+ | x[c(' | ||
+ | |||
+ | # Some special ways to build vectors | ||
+ | numeric(10) | ||
+ | character(10) | ||
+ | rep(NA, 10) | ||
+ | rep(1, 10) | ||
+ | rep(1:2, 10) | ||
+ | seq(from=1, | ||
+ | seq(from=0, to=10, by=0.1) | ||
+ | |||
+ | # MODIFYING VECTORS | ||
+ | # Modifying elements of a vector | ||
+ | x[3] <- NA | ||
+ | x[5] <- 1/0 | ||
+ | |||
+ | # You can modify multiple elements of a vector using | ||
+ | # a vector of the the indices of elements you want to modify. | ||
+ | x[selection] <- 10 | ||
+ | x[selection] <- c(10:12) | ||
+ | |||
+ | |||
+ | # DOING STUFF TO VECTORS | ||
+ | |||
+ | # Sometimes, you want to know the indices of the elements of a vector | ||
+ | # which are NA, or Infinite, or whatever. | ||
+ | # elements which are na like this: | ||
+ | x[' | ||
+ | is.na(x) | ||
+ | which(is.na(x)) | ||
+ | x[is.na(x)] <- 0 | ||
+ | |||
+ | # Sorting | ||
+ | sort(x) | ||
+ | |||
+ | # Order | ||
+ | order(x) | ||
+ | x[order(x)] | ||
+ | |||
+ | # Matching the elements of two vectors to each other | ||
+ | a <- c(' | ||
+ | b <- c(' | ||
+ | match(b, a) | ||
+ | a[match(b, | ||
+ | |||
+ | # Some convenient functions for operating on strings and vectors of strings | ||
+ | z <- paste(y, collapse=" | ||
+ | strsplit(z, split=" | ||
+ | |||
+ | |||
+ | # Vectorized operations - many operations operate on vectors | ||
+ | # in an element-wise fashion, returning vectors. | ||
+ | x <- c(1:6) | ||
+ | z <- rev(x) | ||
+ | x[c(3,6)] <- NA | ||
+ | x + 1 | ||
+ | x + z | ||
+ | x - z | ||
+ | x * z | ||
+ | x > 3 | ||
+ | !(x > 3) | ||
+ | sum(x) | ||
+ | sum(x[!is.na(x)]) | ||
+ | sum(x, na.rm=TRUE) | ||
+ | mean(x) | ||
+ | mean(x, na.rm=TRUE) | ||
+ | var(x, na.rm=TRUE) | ||
+ | |||
+ | |||
+ | # MATRICES - n x m tables | ||
+ | m <- matrix(0, nrow=2, ncol=2) | ||
+ | m <- matrix(x, nrow=2) | ||
+ | m <- matrix(x, nrow=2, byrow=TRUE) | ||
+ | t(m) | ||
+ | dim(m) | ||
+ | m <- rbind(m, c(10:12)) | ||
+ | m <- cbind(m, c(13:15)) | ||
+ | |||
+ | # Referencing elements of a matrix | ||
+ | m[1,1] | ||
+ | m[1: | ||
+ | m <- m[,1:3] | ||
+ | m[1,] | ||
+ | m[,1] | ||
+ | m[ is.na(m) ] <- 0 | ||
+ | |||
+ | my.data <- as.matrix(read.table(" | ||
+ | |||
+ | |||
+ | # The columns of a matrix can have symbolic names | ||
+ | rownames(my.data) | ||
+ | colnames(my.data) | ||
+ | my.data[' | ||
+ | my.data[' | ||
+ | |||
+ | |||
+ | # LISTS - Generally used like a hash table / associative map, | ||
+ | # though it is also an ordered list] | ||
+ | # Making a new lists | ||
+ | x <- list(a=c(1, | ||
+ | names(x) | ||
+ | |||
+ | # Referencing elements of a list | ||
+ | x[[1]] | ||
+ | x[[' | ||
+ | x$a | ||
+ | |||
+ | # Modifying elements of a list | ||
+ | x$a <- 3.14159 | ||
+ | |||
+ | # unlist() | ||
+ | # Some R functions return lists, and we want to get vectors - we can | ||
+ | # convert a list into a vector using unlist(...) | ||
+ | strsplit(" | ||
+ | unlist(strsplit(" | ||
+ | |||
+ | |||
+ | # FACTORS - R's representation of categorical values. | ||
+ | f <- factor(rep(c(' | ||
+ | levels(f) | ||
+ | |||
+ | # DATA FRAMES | ||
+ | # Tables with heterogeneous columns. | ||
+ | pheno.data <- read.table(" | ||
+ | |||
+ | # Reference elements of a data frame like it was a matrix, or using | ||
+ | # column names | ||
+ | pheno.data[1: | ||
+ | pheno.data$plays.tuba | ||
+ | |||
+ | # ITERATING OVER DATA | ||
+ | |||
+ | # for loops | ||
+ | means <- numeric(nrow(my.data)) | ||
+ | names(means) <- rownames(my.data) | ||
+ | for (i in 1: | ||
+ | means[i] <- mean(my.data[i, | ||
+ | } | ||
+ | |||
+ | # apply | ||
+ | means <- numeric(nrow(my.data)) | ||
+ | means <- apply(my.data, | ||
+ | |||
+ | # rowMeans, colMeans | ||
+ | means <- rowMeans(my.data, | ||
+ | |||
+ | # lapply | ||
+ | |||
+ | # Printing stuff out | ||
+ | |||
+ | # cat - print something to STDOUT | ||
+ | cat(' | ||
+ | cat(' | ||
+ | cat(c(1:4), " | ||
+ | |||
+ | |||
+ | # sprintf | ||
+ | u <- 3 | ||
+ | v <- 3.14 | ||
+ | s <- " | ||
+ | sprintf(" | ||
+ | sprintf(" | ||
+ | cat(sprintf(" | ||
+ | |||
+ | # Getting stuff into and out of R | ||
+ | |||
+ | # read.table, write.table | ||
+ | # We saw these above - read through the help page for these functions to get | ||
+ | # a feel for the available options. | ||
+ | # read.table | ||
+ | # write.table | ||
+ | |||
+ | # save, load - use these with large data structures as | ||
+ | save(my.data, | ||
+ | rm(my.data) | ||
+ | my.data | ||
+ | load(file=" | ||
+ | my.data | ||
+ | |||
+ | |||
+ | # If you want to save everything in your session, use save.image | ||
+ | save.image(file=" | ||
+ | ls() # Lists all bindings in your session | ||
+ | rm(list=ls()) # Remove all bindings | ||
+ | load(file=" | ||
+ | ls() | ||
+ | |||
+ | |||
+ | # source - read and execute from a text file containing an R script | ||
+ | |||
+ | # Debugging - Two main options | ||
+ | # Sprinkle cat(...) liberally throughout your code to monitor what is going on. | ||
+ | # debug(), undebug() - Turn on/off step through debugging for a function | ||
+ | |||
+ | # SIMPLE STATISTICS | ||
+ | # Some simple statistics - note that many functions that perform statistical tests | ||
+ | # etc, in R return list objects. | ||
+ | help(t.test) | ||
+ | ttest.result <- t.test(my.data[' | ||
+ | ttest.result | ||
+ | names(ttest.result) | ||
+ | ttest.result$statistic | ||
+ | ttest.result$p.value | ||
+ | |||
+ | help(cor.test) | ||
+ | cor.results <- cor.test(my.data[' | ||
+ | |||
+ | help(chisq.test) | ||
+ | chisq.results <- chisq.test(pheno.data$rides.bike, | ||
+ | chisq.results <- chisq.test(table(pheno.data$rides.bike, | ||
+ | |||
+ | help(prcomp) | ||
+ | prcomp.results <- prcomp(my.data) | ||
+ | my.data[is.na(my.data)] <- 0 | ||
+ | prcomp.results <- prcomp(my.data) | ||
+ | |||
+ | </ |