Differences

This shows you the differences between two versions of the page.

--- r-tutorial [2011/10/12 23:54]
liuyipei created
+++ r-tutorial [2011/10/15 12:05] (current)
liuyipei [Problem Set 1 Hints]
@@ Line 1: / Line 1: @@
-test
+====Basics====
+  * Basics borrowed from Pablo Cordero
+<code>
+# R is best used as an interactive environment for statistical analysis.
+# Don't think of it primarily as a compiler/interpreter for scripts!
+# You should be spending most of your time in the REPL (Read-Eval-Print Loop).
+# R as a calculator
+log2(32)
+sqrt(2)
+# R as a graphics tool
+# Define a vector named cars with five values
+my.vector <- c(1,3,6,4,9)
+# Some basic variables that come with R to play with
+head(cars)
+dim(cars)
+plot(x=cars$speed, y=cars$dist,
+     main="cars",
+     xlab="speed",
+     ylab="dist")
+hist(cars$speed)
+# Primitive data types
+# Numeric - floating point
+# Integers
+# Boolean values - TRUE, FALSE
+# Special values - NA, Inf, -Inf
+x <- 3.14159
+x <- 1 / 0
+y <- TRUE
+!y
+z <- NA
+u <- 2.71828
+v <- "The quick brown fox jumped over the lazy dog"
+# You can compare values using the usual binary infix operators, which return TRUE, FALSE or NA
+x > u
+x == u
+x == z
+# There are also some handy tests you can use to detect special values.
+is.na(x)
+is.na(y)
+is.na(z)
+is.infinite(x)
+is.infinite(y)
+# Compound data - vectors, matrices, lists, data frames,
+# The most basic type of compound data in R is a vector.
+# Vectors of numeric values
+x <- c(1,2,3,4,5,6)
+# Can also have vectors of boolean or string values.
+x >= 3
+y <- c("the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog")
+# You can specify a range using the ':' operator
+x <- 1:6
+# c(...) is a handy function for building vectors from other vectors
+y <- c(1:6)
+z <- c(1:3,c(4:6))
+s <- c(x,y)
+c(x, y, s)
+# Referencing elements of a vector using [...]
+x[1]
+x[2]
+x[3]
+# You can use variables as indices.
+i <- 4
+x[i]
+# You can reference elements of a vector using
+# a vector of indices:
+x[1:3]
+selection <- c(4:6)
+x[selection]
+# The elements of a vector can have symbolic names
+names(x) <- c('a', 'b', 'c', 'd', 'e', 'f')
+# Now you can reference elements of the vector using
+# the symbolic names.  This can be very handy when you
+# have a big vector and you don't want to remember, e.g.
+# the index that corresponds to your gene of interest.
+x['c']
+x[c('a', 'c')]
+# Some special ways to build vectors
+numeric(10)
+character(10)
+rep(NA, 10)
+rep(1, 10)
+rep(1:2, 10)
+seq(from=1,to=10, by=2)
+seq(from=0, to=10, by=0.1)
+# MODIFYING VECTORS
+# Modifying elements of a vector
+x[3] <- NA
+x[5] <- 1/0
+# You can modify multiple elements of a vector using
+# a vector of the the indices of elements you want to modify.
+x[selection] <- 10
+x[selection] <- c(10:12)
+# DOING STUFF TO VECTORS
+# Sometimes, you want to know the indices of the elements of a vector
+# which are NA, or Infinite, or whatever.  You can get a vector of the
+# elements which are na like this:
+x['e'] <- NA
+is.na(x)
+which(is.na(x))
+x[is.na(x)] <- 0
+# Sorting
+sort(x)
+# Order
+order(x)
+x[order(x)]
+# Matching the elements of two vectors to each other
+a <- c('a', 'b', 'c', 'd', 'e', 'f')
+b <- c('d', 'e', 'a')
+match(b, a)
+a[match(b,a)]
+# Some convenient functions for operating on strings and vectors of strings
+z <- paste(y, collapse="_")
+strsplit(z, split="_")
+# Vectorized operations - many operations operate on vectors
+# in an element-wise fashion, returning vectors.
+x <- c(1:6)
+z <- rev(x)
+x[c(3,6)] <- NA
+x + 1
+x + z
+x - z
+x * z
+x > 3
+!(x > 3)
+sum(x)
+sum(x[!is.na(x)])
+sum(x, na.rm=TRUE)
+mean(x)
+mean(x, na.rm=TRUE)
+var(x, na.rm=TRUE)
+# MATRICES - n x m tables
+m <- matrix(0, nrow=2, ncol=2)
+m <- matrix(x, nrow=2)
+m <- matrix(x, nrow=2, byrow=TRUE)
+t(m)
+dim(m)
+m <- rbind(m, c(10:12))
+m <- cbind(m, c(13:15))
+# Referencing elements of a matrix
+m[1,1]
+m[1:3,1:3]
+m <- m[,1:3]
+m[1,]
+m[,1]
+m[ is.na(m) ] <- 0
+my.data <- as.matrix(read.table("http://www.stanford.edu/~kjung/my.data.txt", sep="\t"))
+# The columns of a matrix can have symbolic names
+rownames(my.data)
+colnames(my.data)
+my.data['Shaggy', 'B_4']
+my.data['Shaggy',]
+# LISTS - Generally used like a hash table / associative map,
+# though it is also an ordered list]
+# Making a new lists
+x <- list(a=c(1,2,3), b=c('d', 'e', 'f'), c="foo")
+names(x)
+# Referencing elements of a list
+x[[1]]
+x[['a']]
+x$a
+# Modifying elements of a list
+x$a <- 3.14159
+# unlist()
+# Some R functions return lists, and we want to get vectors - we can
+# convert a list into a vector using unlist(...)
+strsplit("a_b_c", split="_")
+unlist(strsplit("a_b_c", split="_"))
+# FACTORS - R's representation of categorical values.
+f <- factor(rep(c('a', 'b'), 10))
+levels(f)
+# DATA FRAMES
+# Tables with heterogeneous columns.
+pheno.data <- read.table("http://www.stanford.edu/~kjung/pheno.data.txt", sep="\t")
+# Reference elements of a data frame like it was a matrix, or using
+# column names
+pheno.data[1:3,1:3]
+pheno.data$plays.tuba
+# ITERATING OVER DATA
+# for loops
+means <- numeric(nrow(my.data))
+names(means) <- rownames(my.data)
+for (i in 1:nrow(my.data)) {
+  means[i] <- mean(my.data[i,], na.rm=TRUE)
+}
+# apply
+means <- numeric(nrow(my.data))
+means <- apply(my.data, MARGIN=1, FUN=mean, na.rm=TRUE)
+# rowMeans, colMeans
+means <- rowMeans(my.data, na.rm=TRUE)
+# lapply
+# Printing stuff out
+# cat - print something to STDOUT
+cat('a', 'b', '\n')
+cat('a', 'b', '\n', sep="_")
+cat(c(1:4), "\n")
+# sprintf
+u <- 3
+v <- 3.14
+s <- "Hello"
+sprintf("%g %f %s", u, v, s)
+sprintf("%g %.3f %s", u, v, s)
+cat(sprintf("%g %.3f %s\n", u, v, s))
+# Getting stuff into and out of R
+# read.table, write.table
+# We saw these above - read through the help page for these functions to get
+# a feel for the available options.
+# read.table
+# write.table
+# save, load - use these with large data structures as
+save(my.data, file="my.data.RData")
+rm(my.data)
+my.data
+load(file="my.data.RData")
+my.data
+# If you want to save everything in your session, use save.image
+save.image(file="my.image.RData")
+ls()  # Lists all bindings in your session
+rm(list=ls()) # Remove all bindings
+load(file="my.image.RData")
+ls()
+</code>
+====Problem Set 1 Hints and Other Tricks====
+<code>
+setwd('/Users/liuyipei/BMI215/Module 1 - Nick/DrugSafety-Homework')
+dz<-read.csv('single_drug_event_frequencies.csv')
+head(dz)
+cd1<-read.csv('cholesterol_drugs.txt', col.names=F)
+colnames(cd1)<-c('singlet')
+cd1$chole<-'choles'
+head(cd0)
+cd0<-data.frame(singlet=setdiff(as.character(unique(dz$singlet)), cd1$singlet),chole=0)
+cd0$chole<-'noncholes'
+cd.table<-rbind(cd1, cd0)
+head(cd1)
+head(cd0)
+head(cd.table)
+q1.table <- merge(cd.table, dz)
+q1.table$hifreq <- ifelse(q1.table$freq > 0.1, 'hifr', 'lofr')
+head(q1.table)
+nrow(q1.table)
+ncol(q1.table)
+sum(q1.table$chole == 'choles')
+sum(q1.table$chole == 'noncholes')
+table(q1.table$chole, q1.table$hifreq)
+library(plyr)
+hard.work<-function(x){
+  c(nrow(x), ncol(x), x$chole[1]=='choles')
+}
+ddply(q1.table, .(singlet), .fun=hard.work)->lets.talk.about.what.happened
+head(lets.talk.about.what.happened)
+dim(lets.talk.about.what.happened)
+summary(lets.talk.about.what.happened)
+colnames(lets.talk.about.what.happened)<-c('singlet', 'row.count', 'col.count', 'ch.drug')
+library(caTools)
+my.x<-c(0:10)*0.1
+my.y<-c(0,1,3,5,6,7,7,9,9,10,10)*0.1
+plot(my.x, my.y)
+trapz(my.x, my.y)
+data.frame(a=c(8,12),b=c(31,41))->t
+t
+fisher.test(t)->f.t
+ls(f.t)
+f.t$p.value
+f.t$conf.int
+</code>

Shah Lab

User Tools

Site Tools

Differences

Page Tools