This shows you the differences between two versions of the page.
Next revision | Previous revision Next revision Both sides next revision | ||
r-tutorial [2011/10/12 23:54] liuyipei created |
r-tutorial [2011/10/13 00:58] liuyipei |
||
---|---|---|---|
Line 1: | Line 1: | ||
- | test | + | ====Basics==== |
+ | * Basics borrowed from Pablo Cordero | ||
+ | < | ||
+ | # R is best used as an interactive environment for statistical analysis. | ||
+ | # Don't think of it primarily as a compiler/ | ||
+ | # You should be spending most of your time in the REPL (Read-Eval-Print Loop). | ||
+ | |||
+ | # R as a calculator | ||
+ | log2(32) | ||
+ | sqrt(2) | ||
+ | |||
+ | # R as a graphics tool | ||
+ | # Define a vector named cars with five values | ||
+ | my.vector <- c(1, | ||
+ | |||
+ | # Some basic variables that come with R to play with | ||
+ | head(cars) | ||
+ | dim(cars) | ||
+ | |||
+ | plot(x=cars$speed, | ||
+ | | ||
+ | | ||
+ | | ||
+ | hist(cars$speed) | ||
+ | |||
+ | # Primitive data types | ||
+ | # Numeric - floating point | ||
+ | # Integers | ||
+ | # Boolean values - TRUE, FALSE | ||
+ | # Special values - NA, Inf, -Inf | ||
+ | x <- 3.14159 | ||
+ | x <- 1 / 0 | ||
+ | y <- TRUE | ||
+ | !y | ||
+ | z <- NA | ||
+ | u <- 2.71828 | ||
+ | v <- "The quick brown fox jumped over the lazy dog" | ||
+ | |||
+ | # You can compare values using the usual binary infix operators, which return TRUE, FALSE or NA | ||
+ | x > u | ||
+ | x == u | ||
+ | x == z | ||
+ | |||
+ | # There are also some handy tests you can use to detect special values. | ||
+ | is.na(x) | ||
+ | is.na(y) | ||
+ | is.na(z) | ||
+ | is.infinite(x) | ||
+ | is.infinite(y) | ||
+ | |||
+ | # Compound data - vectors, matrices, lists, data frames, | ||
+ | |||
+ | # The most basic type of compound data in R is a vector. | ||
+ | # Vectors of numeric values | ||
+ | x <- c(1, | ||
+ | |||
+ | # Can also have vectors of boolean or string values. | ||
+ | x >= 3 | ||
+ | y <- c(" | ||
+ | |||
+ | # You can specify a range using the ':' | ||
+ | x <- 1:6 | ||
+ | |||
+ | # c(...) is a handy function for building vectors from other vectors | ||
+ | y <- c(1:6) | ||
+ | z <- c(1: | ||
+ | s <- c(x,y) | ||
+ | c(x, y, s) | ||
+ | |||
+ | # Referencing elements of a vector using [...] | ||
+ | x[1] | ||
+ | x[2] | ||
+ | x[3] | ||
+ | |||
+ | # You can use variables as indices. | ||
+ | i <- 4 | ||
+ | x[i] | ||
+ | |||
+ | # You can reference elements of a vector using | ||
+ | # a vector of indices: | ||
+ | x[1:3] | ||
+ | selection <- c(4:6) | ||
+ | x[selection] | ||
+ | |||
+ | # The elements of a vector can have symbolic names | ||
+ | names(x) <- c(' | ||
+ | |||
+ | # Now you can reference elements of the vector using | ||
+ | # the symbolic names. | ||
+ | # have a big vector and you don't want to remember, e.g. | ||
+ | # the index that corresponds to your gene of interest. | ||
+ | x[' | ||
+ | x[c(' | ||
+ | |||
+ | # Some special ways to build vectors | ||
+ | numeric(10) | ||
+ | character(10) | ||
+ | rep(NA, 10) | ||
+ | rep(1, 10) | ||
+ | rep(1:2, 10) | ||
+ | seq(from=1, | ||
+ | seq(from=0, to=10, by=0.1) | ||
+ | |||
+ | # MODIFYING VECTORS | ||
+ | # Modifying elements of a vector | ||
+ | x[3] <- NA | ||
+ | x[5] <- 1/0 | ||
+ | |||
+ | # You can modify multiple elements of a vector using | ||
+ | # a vector of the the indices of elements you want to modify. | ||
+ | x[selection] <- 10 | ||
+ | x[selection] <- c(10:12) | ||
+ | |||
+ | |||
+ | # DOING STUFF TO VECTORS | ||
+ | |||
+ | # Sometimes, you want to know the indices of the elements of a vector | ||
+ | # which are NA, or Infinite, or whatever. | ||
+ | # elements which are na like this: | ||
+ | x[' | ||
+ | is.na(x) | ||
+ | which(is.na(x)) | ||
+ | x[is.na(x)] <- 0 | ||
+ | |||
+ | # Sorting | ||
+ | sort(x) | ||
+ | |||
+ | # Order | ||
+ | order(x) | ||
+ | x[order(x)] | ||
+ | |||
+ | # Matching the elements of two vectors to each other | ||
+ | a <- c(' | ||
+ | b <- c(' | ||
+ | match(b, a) | ||
+ | a[match(b, | ||
+ | |||
+ | # Some convenient functions for operating on strings and vectors of strings | ||
+ | z <- paste(y, collapse=" | ||
+ | strsplit(z, split=" | ||
+ | |||
+ | |||
+ | # Vectorized operations - many operations operate on vectors | ||
+ | # in an element-wise fashion, returning vectors. | ||
+ | x <- c(1:6) | ||
+ | z <- rev(x) | ||
+ | x[c(3,6)] <- NA | ||
+ | x + 1 | ||
+ | x + z | ||
+ | x - z | ||
+ | x * z | ||
+ | x > 3 | ||
+ | !(x > 3) | ||
+ | sum(x) | ||
+ | sum(x[!is.na(x)]) | ||
+ | sum(x, na.rm=TRUE) | ||
+ | mean(x) | ||
+ | mean(x, na.rm=TRUE) | ||
+ | var(x, na.rm=TRUE) | ||
+ | |||
+ | |||
+ | # MATRICES - n x m tables | ||
+ | m <- matrix(0, nrow=2, ncol=2) | ||
+ | m <- matrix(x, nrow=2) | ||
+ | m <- matrix(x, nrow=2, byrow=TRUE) | ||
+ | t(m) | ||
+ | dim(m) | ||
+ | m <- rbind(m, c(10:12)) | ||
+ | m <- cbind(m, c(13:15)) | ||
+ | |||
+ | # Referencing elements of a matrix | ||
+ | m[1,1] | ||
+ | m[1: | ||
+ | m <- m[,1:3] | ||
+ | m[1,] | ||
+ | m[,1] | ||
+ | m[ is.na(m) ] <- 0 | ||
+ | |||
+ | my.data <- as.matrix(read.table(" | ||
+ | |||
+ | # The columns of a matrix can have symbolic names | ||
+ | rownames(my.data) | ||
+ | colnames(my.data) | ||
+ | my.data[' | ||
+ | my.data[' | ||
+ | |||
+ | |||
+ | # LISTS - Generally used like a hash table / associative map, | ||
+ | # though it is also an ordered list] | ||
+ | # Making a new lists | ||
+ | x <- list(a=c(1, | ||
+ | names(x) | ||
+ | |||
+ | # Referencing elements of a list | ||
+ | x[[1]] | ||
+ | x[[' | ||
+ | x$a | ||
+ | |||
+ | # Modifying elements of a list | ||
+ | x$a <- 3.14159 | ||
+ | |||
+ | # unlist() | ||
+ | # Some R functions return lists, and we want to get vectors - we can | ||
+ | # convert a list into a vector using unlist(...) | ||
+ | strsplit(" | ||
+ | unlist(strsplit(" | ||
+ | |||
+ | |||
+ | # FACTORS - R's representation of categorical values. | ||
+ | f <- factor(rep(c(' | ||
+ | levels(f) | ||
+ | |||
+ | # DATA FRAMES | ||
+ | # Tables with heterogeneous columns. | ||
+ | pheno.data <- read.table(" | ||
+ | |||
+ | # Reference elements of a data frame like it was a matrix, or using | ||
+ | # column names | ||
+ | pheno.data[1: | ||
+ | pheno.data$plays.tuba | ||
+ | |||
+ | # ITERATING OVER DATA | ||
+ | |||
+ | # for loops | ||
+ | means <- numeric(nrow(my.data)) | ||
+ | names(means) <- rownames(my.data) | ||
+ | for (i in 1: | ||
+ | means[i] <- mean(my.data[i, | ||
+ | } | ||
+ | |||
+ | # apply | ||
+ | means <- numeric(nrow(my.data)) | ||
+ | means <- apply(my.data, | ||
+ | |||
+ | # rowMeans, colMeans | ||
+ | means <- rowMeans(my.data, | ||
+ | |||
+ | # lapply | ||
+ | |||
+ | # Printing stuff out | ||
+ | |||
+ | # cat - print something to STDOUT | ||
+ | cat(' | ||
+ | cat(' | ||
+ | cat(c(1:4), " | ||
+ | |||
+ | |||
+ | # sprintf | ||
+ | u <- 3 | ||
+ | v <- 3.14 | ||
+ | s <- " | ||
+ | sprintf(" | ||
+ | sprintf(" | ||
+ | cat(sprintf(" | ||
+ | |||
+ | # Getting stuff into and out of R | ||
+ | |||
+ | # read.table, write.table | ||
+ | # We saw these above - read through the help page for these functions to get | ||
+ | # a feel for the available options. | ||
+ | # read.table | ||
+ | # write.table | ||
+ | |||
+ | # save, load - use these with large data structures as | ||
+ | save(my.data, | ||
+ | rm(my.data) | ||
+ | my.data | ||
+ | load(file=" | ||
+ | my.data | ||
+ | |||
+ | |||
+ | # If you want to save everything in your session, use save.image | ||
+ | save.image(file=" | ||
+ | ls() # Lists all bindings in your session | ||
+ | rm(list=ls()) # Remove all bindings | ||
+ | load(file=" | ||
+ | ls() | ||
+ | |||
+ | </ | ||
+ | |||
+ | |||
+ | ====What I used to do assignment 1==== | ||
+ | < | ||
+ | setwd('/ | ||
+ | dz< | ||
+ | head(dz) | ||
+ | |||
+ | cd1< | ||
+ | colnames(cd1)< | ||
+ | cd1$chole< | ||
+ | head(cd0) | ||
+ | |||
+ | cd0< | ||
+ | cd0$chole< | ||
+ | cd.table< | ||
+ | head(cd1) | ||
+ | head(cd0) | ||
+ | head(cd.table) | ||
+ | |||
+ | q1.table <- merge(cd.table, | ||
+ | q1.table$hifreq <- ifelse(q1.table$freq > 0.1, ' | ||
+ | head(q1.table) | ||
+ | nrow(q1.table) | ||
+ | ncol(q1.table) | ||
+ | |||
+ | sum(q1.table$chole == ' | ||
+ | sum(q1.table$chole == ' | ||
+ | table(q1.table$chole, | ||
+ | |||
+ | library(plyr) | ||
+ | hard.work< | ||
+ | c(nrow(x), ncol(x), x$chole[1]==' | ||
+ | } | ||
+ | ddply(q1.table, | ||
+ | head(lets.talk.about.what.happened) | ||
+ | dim(lets.talk.about.what.happened) | ||
+ | summary(lets.talk.about.what.happened) | ||
+ | colnames(lets.talk.about.what.happened)< | ||
+ | |||
+ | library(caTools) | ||
+ | my.x< | ||
+ | my.y< | ||
+ | plot(my.x, my.y) | ||
+ | trapz(my.x, my.y) | ||
+ | |||
+ | data.frame(a=c(8, | ||
+ | t | ||
+ | fisher.test(t)-> | ||
+ | ls(f.t) | ||
+ | f.t$p.value | ||
+ | f.t$conf.int | ||
+ | |||
+ | </ |