User Tools

Site Tools


r-tutorial

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
r-tutorial [2011/10/12 23:54]
liuyipei created
r-tutorial [2011/10/15 12:05] (current)
liuyipei [Problem Set 1 Hints]
Line 1: Line 1:
-test+====Basics==== 
 +  * Basics borrowed from Pablo Cordero 
 +<code> 
 +# R is best used as an interactive environment for statistical analysis. 
 +# Don't think of it primarily as a compiler/interpreter for scripts! 
 +# You should be spending most of your time in the REPL (Read-Eval-Print Loop). 
 + 
 +# R as a calculator 
 +log2(32) 
 +sqrt(2) 
 + 
 +# R as a graphics tool 
 +# Define a vector named cars with five values 
 +my.vector <- c(1,3,6,4,9) 
 + 
 +# Some basic variables that come with R to play with 
 +head(cars) 
 +dim(cars) 
 + 
 +plot(x=cars$speed, y=cars$dist, 
 +     main="cars", 
 +     xlab="speed", 
 +     ylab="dist"
 +hist(cars$speed) 
 + 
 +# Primitive data types 
 +# Numeric - floating point 
 +# Integers 
 +# Boolean values - TRUE, FALSE 
 +# Special values - NA, Inf, -Inf 
 +x <- 3.14159 
 +x <- 1 / 0 
 +y <- TRUE 
 +!y 
 +z <- NA 
 +u <- 2.71828 
 +v <- "The quick brown fox jumped over the lazy dog" 
 + 
 +# You can compare values using the usual binary infix operators, which return TRUE, FALSE or NA 
 +x > u 
 +x == u 
 +x == z 
 + 
 +# There are also some handy tests you can use to detect special values. 
 +is.na(x) 
 +is.na(y) 
 +is.na(z) 
 +is.infinite(x) 
 +is.infinite(y) 
 + 
 +# Compound data - vectors, matrices, lists, data frames,  
 + 
 +# The most basic type of compound data in R is a vector. 
 +# Vectors of numeric values 
 +x <- c(1,2,3,4,5,6) 
 + 
 +# Can also have vectors of boolean or string values. 
 +x >= 3 
 +y <- c("the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog"
 + 
 +# You can specify a range using the ':' operator 
 +x <- 1:6 
 + 
 +# c(...) is a handy function for building vectors from other vectors 
 +y <- c(1:6) 
 +z <- c(1:3,c(4:6)) 
 +s <- c(x,y) 
 +c(x, y, s) 
 + 
 +# Referencing elements of a vector using [...] 
 +x[1] 
 +x[2] 
 +x[3] 
 + 
 +# You can use variables as indices. 
 +i <- 4 
 +x[i] 
 + 
 +# You can reference elements of a vector using 
 +# a vector of indices: 
 +x[1:3] 
 +selection <- c(4:6) 
 +x[selection] 
 + 
 +# The elements of a vector can have symbolic names 
 +names(x) <- c('a', 'b', 'c', 'd', 'e', 'f'
 + 
 +# Now you can reference elements of the vector using 
 +# the symbolic names.  This can be very handy when you 
 +# have a big vector and you don't want to remember, e.g. 
 +# the index that corresponds to your gene of interest. 
 +x['c'
 +x[c('a', 'c')] 
 + 
 +# Some special ways to build vectors 
 +numeric(10) 
 +character(10) 
 +rep(NA, 10) 
 +rep(1, 10) 
 +rep(1:2, 10) 
 +seq(from=1,to=10, by=2) 
 +seq(from=0, to=10, by=0.1) 
 + 
 +# MODIFYING VECTORS 
 +# Modifying elements of a vector 
 +x[3] <- NA 
 +x[5] <- 1/0 
 + 
 +# You can modify multiple elements of a vector using 
 +# a vector of the the indices of elements you want to modify. 
 +x[selection] <- 10 
 +x[selection] <- c(10:12) 
 + 
 + 
 +# DOING STUFF TO VECTORS 
 + 
 +# Sometimes, you want to know the indices of the elements of a vector 
 +# which are NA, or Infinite, or whatever.  You can get a vector of the 
 +# elements which are na like this: 
 +x['e'] <- NA 
 +is.na(x) 
 +which(is.na(x)) 
 +x[is.na(x)] <- 0 
 + 
 +# Sorting 
 +sort(x) 
 + 
 +# Order 
 +order(x) 
 +x[order(x)] 
 + 
 +# Matching the elements of two vectors to each other 
 +a <- c('a', 'b', 'c', 'd', 'e', 'f'
 +b <- c('d', 'e', 'a'
 +match(b, a) 
 +a[match(b,a)] 
 + 
 +# Some convenient functions for operating on strings and vectors of strings 
 +z <- paste(y, collapse="_"
 +strsplit(z, split="_"
 + 
 + 
 +# Vectorized operations - many operations operate on vectors 
 +# in an element-wise fashion, returning vectors. 
 +x <- c(1:6) 
 +z <- rev(x) 
 +x[c(3,6)] <- NA 
 +x + 1 
 +x + z 
 +x - z 
 +x * z 
 +x > 3 
 +!(x > 3) 
 +sum(x) 
 +sum(x[!is.na(x)]) 
 +sum(x, na.rm=TRUE) 
 +mean(x) 
 +mean(x, na.rm=TRUE) 
 +var(x, na.rm=TRUE) 
 + 
 + 
 +# MATRICES - n x m tables 
 +m <- matrix(0, nrow=2, ncol=2) 
 +m <- matrix(x, nrow=2) 
 +m <- matrix(x, nrow=2, byrow=TRUE) 
 +t(m) 
 +dim(m) 
 +m <- rbind(m, c(10:12)) 
 +m <- cbind(m, c(13:15)) 
 + 
 +# Referencing elements of a matrix 
 +m[1,1] 
 +m[1:3,1:3] 
 +m <- m[,1:3] 
 +m[1,] 
 +m[,1] 
 +m[ is.na(m) ] <- 0 
 + 
 +my.data <- as.matrix(read.table("http://www.stanford.edu/~kjung/my.data.txt", sep="\t")) 
 + 
 +# The columns of a matrix can have symbolic names 
 +rownames(my.data) 
 +colnames(my.data) 
 +my.data['Shaggy', 'B_4'
 +my.data['Shaggy',
 + 
 + 
 +# LISTS - Generally used like a hash table / associative map, 
 +# though it is also an ordered list]   
 +# Making a new lists 
 +x <- list(a=c(1,2,3), b=c('d', 'e', 'f'), c="foo"
 +names(x) 
 + 
 +# Referencing elements of a list 
 +x[[1]] 
 +x[['a']] 
 +x$a 
 + 
 +# Modifying elements of a list 
 +x$a <- 3.14159 
 + 
 +# unlist() 
 +# Some R functions return lists, and we want to get vectors - we can 
 +# convert a list into a vector using unlist(...) 
 +strsplit("a_b_c", split="_"
 +unlist(strsplit("a_b_c", split="_")) 
 + 
 + 
 +# FACTORS - R's representation of categorical values. 
 +f <- factor(rep(c('a', 'b'), 10)) 
 +levels(f) 
 + 
 +# DATA FRAMES 
 +# Tables with heterogeneous columns. 
 +pheno.data <- read.table("http://www.stanford.edu/~kjung/pheno.data.txt", sep="\t"
 + 
 +# Reference elements of a data frame like it was a matrix, or using 
 +# column names 
 +pheno.data[1:3,1:3] 
 +pheno.data$plays.tuba 
 + 
 +# ITERATING OVER DATA 
 + 
 +# for loops 
 +means <- numeric(nrow(my.data)) 
 +names(means) <- rownames(my.data) 
 +for (i in 1:nrow(my.data)) { 
 +  means[i] <- mean(my.data[i,], na.rm=TRUE) 
 +
 + 
 +# apply 
 +means <- numeric(nrow(my.data)) 
 +means <- apply(my.data, MARGIN=1, FUN=mean, na.rm=TRUE) 
 + 
 +# rowMeans, colMeans 
 +means <- rowMeans(my.data, na.rm=TRUE) 
 + 
 +# lapply 
 + 
 +# Printing stuff out 
 + 
 +# cat - print something to STDOUT 
 +cat('a', 'b', '\n'
 +cat('a', 'b', '\n', sep="_"
 +cat(c(1:4), "\n"
 + 
 + 
 +# sprintf 
 +u <- 3 
 +v <- 3.14 
 +s <- "Hello" 
 +sprintf("%g %f %s", u, v, s) 
 +sprintf("%g %.3f %s", u, v, s) 
 +cat(sprintf("%g %.3f %s\n", u, v, s)) 
 + 
 +# Getting stuff into and out of R 
 + 
 +# read.table, write.table 
 +# We saw these above - read through the help page for these functions to get 
 +# a feel for the available options. 
 +# read.table 
 +# write.table 
 + 
 +# save, load - use these with large data structures as 
 +save(my.data, file="my.data.RData"
 +rm(my.data) 
 +my.data 
 +load(file="my.data.RData"
 +my.data 
 +     
 + 
 +# If you want to save everything in your session, use save.image 
 +save.image(file="my.image.RData"
 +ls()  # Lists all bindings in your session 
 +rm(list=ls()) # Remove all bindings 
 +load(file="my.image.RData")  
 +ls() 
 + 
 +</code> 
 + 
 + 
 +====Problem Set 1 Hints and Other Tricks==== 
 +<code> 
 +setwd('/Users/liuyipei/BMI215/Module 1 - Nick/DrugSafety-Homework'
 +dz<-read.csv('single_drug_event_frequencies.csv'
 +head(dz) 
 + 
 +cd1<-read.csv('cholesterol_drugs.txt', col.names=F) 
 +colnames(cd1)<-c('singlet'
 +cd1$chole<-'choles' 
 +head(cd0) 
 + 
 +cd0<-data.frame(singlet=setdiff(as.character(unique(dz$singlet)), cd1$singlet),chole=0) 
 +cd0$chole<-'noncholes' 
 +cd.table<-rbind(cd1, cd0) 
 +head(cd1) 
 +head(cd0) 
 +head(cd.table) 
 + 
 +q1.table <- merge(cd.table, dz) 
 +q1.table$hifreq <- ifelse(q1.table$freq > 0.1, 'hifr', 'lofr'
 +head(q1.table) 
 +nrow(q1.table) 
 +ncol(q1.table) 
 + 
 +sum(q1.table$chole == 'choles'
 +sum(q1.table$chole == 'noncholes'
 +table(q1.table$chole, q1.table$hifreq) 
 + 
 +library(plyr) 
 +hard.work<-function(x){ 
 +  c(nrow(x), ncol(x), x$chole[1]=='choles'
 +
 +ddply(q1.table, .(singlet), .fun=hard.work)->lets.talk.about.what.happened 
 +head(lets.talk.about.what.happened) 
 +dim(lets.talk.about.what.happened) 
 +summary(lets.talk.about.what.happened) 
 +colnames(lets.talk.about.what.happened)<-c('singlet', 'row.count', 'col.count', 'ch.drug'
 + 
 +library(caTools) 
 +my.x<-c(0:10)*0.1 
 +my.y<-c(0,1,3,5,6,7,7,9,9,10,10)*0.1 
 +plot(my.x, my.y) 
 +trapz(my.x, my.y) 
 + 
 +data.frame(a=c(8,12),b=c(31,41))->
 +
 +fisher.test(t)->f.t 
 +ls(f.t) 
 +f.t$p.value 
 +f.t$conf.int 
 + 
 +</code>
r-tutorial.1318488868.txt.gz · Last modified: 2011/10/12 23:54 by liuyipei