###--------------------------------------------------
### Step 1
###--------------------------------------------------
library('biomaRt')
library('affy')
library('gplots')
library('lattice')


###--------------------------------------------------
### Step 2
###--------------------------------------------------
sampleAnnot = read.AnnotatedDataFrame('E-TABM-157.txt',row.names = 'Array.Data.File')
mRNAraw = ReadAffy(phenoData = sampleAnnot, 
  sampleNames = sampleAnnot$Source.Name)


###--------------------------------------------------
### Step 3
###--------------------------------------------------
mRNA = rma(mRNAraw)


###--------------------------------------------------
### Step 4
###--------------------------------------------------
probesetvar = apply(exprs(mRNA), 1, var)
ord = order(probesetvar, decreasing=TRUE)[1:200]
pca = prcomp(t(exprs(mRNA)[ord,]), scale=TRUE)
typeColors = c('Lu'='firebrick1','BaA'='dodgerblue2','BaB'='darkblue')
plot(pca$x[, 1:2], pch=16, col=typeColors[as.character(mRNA$CancerType)], 
     xlab='PC1', ylab='PC2', asp=1)
legend('topleft', c('luminal', 'basal A', 'basal B'), fill=typeColors)


###--------------------------------------------------
### Step 5
###--------------------------------------------------
cghData = read.csv('aCGH.csv', header=TRUE, row.names=1)
cgh = new('ExpressionSet', exprs = as.matrix(cghData[,4:56]))
featureData(cgh) = as(cghData[,1:3], 'AnnotatedDataFrame')


###--------------------------------------------------
### Step 6
###--------------------------------------------------
chr1 = which(featureData(cgh)$Chrom == 1)
clColors = c('MCF10A' = 'dodgerblue3',  'BT483' = 'orange', 'BT549' =
  'olivedrab')
logRatio = exprs(cgh)[chr1, names(clColors)]
kb = rep(featureData(cgh)$kb[chr1], times=ncol(logRatio))
clName = rep(names(clColors), each=nrow(logRatio))

print(xyplot(
             logRatio ~ kb | clName, 
             pch=16, layout=c(1,3), ylim=c(-0.5, 1.1), 
             panel=function(...){
               panel.xyplot(...,col=clColors[panel.number()])
               panel.abline(h=0, col='firebrick2')
             }))


###--------------------------------------------------
### Step 7
###--------------------------------------------------
ensembl = useMart('ensembl', dataset='hsapiens_gene_ensembl')
probes = getBM(attributes = c('affy_hg_u133a', 'start_position'),
  filters = c('chromosome_name', 'with_affy_hg_u133a'),
  values = list(1, TRUE), mart = ensembl)


###--------------------------------------------------
### Step 8
###--------------------------------------------------
xpr = exprs(mRNA)[probes[,'affy_hg_u133a'], c('BT549','BT483')]
pos = probes[,'start_position']
plot(xpr, pch=16, cex=0.5,  col=ifelse(pos>140e6, 'red', 'darkgrey'))


###--------------------------------------------------
### Step 9
###--------------------------------------------------
logRatios = xpr[,2] - xpr[,1]
t.test(logRatios ~ (pos>140e6))


###--------------------------------------------------
### Step 10
###--------------------------------------------------
proteinData = read.csv('mmc6.csv', header=TRUE, row.names=1)
protein = new('ExpressionSet', exprs = as.matrix(proteinData))


###--------------------------------------------------
### Step 11
###--------------------------------------------------
prmax = apply(exprs(protein), 1, max)  
barplot(prmax, las=2)


###--------------------------------------------------
### Step 12
###--------------------------------------------------
hmColors = colorRampPalette(c('white', 'darkblue'))(256)
sideColors = typeColors[as.character(pData(mRNA)[ sampleNames(protein),
  'CancerType'])]
sideColors[is.na(sideColors)] = 'grey'
heatmap.2(exprs(protein)/prmax, col=hmColors, trace='none', 
          ColSideColors = sideColors)


###--------------------------------------------------
### Step 13
###--------------------------------------------------
samples = intersect(sampleNames(protein), sampleNames(mRNA))
samples = intersect(samples, sampleNames(cgh))
mRNA = mRNA[,samples]
protein = protein[,samples]
cgh = cgh[,samples]



###--------------------------------------------------
### Step 14
###--------------------------------------------------
map = getBM(attributes = c('ensembl_gene_id', 'affy_hg_u133a', 
              'hgnc_symbol'), 
  filters = c('hgnc_symbol', 'with_hgnc', 'with_affy_hg_u133a'),
  values = list(featureNames(protein), TRUE, TRUE), 
  mart = ensembl)
head(map)


###--------------------------------------------------
### Step 15
###--------------------------------------------------
geneProbesetsAll = split(map$affy_hg_u133a, map$hgnc_symbol)
table(listLen(geneProbesetsAll))


###--------------------------------------------------
### Step 16
###--------------------------------------------------
geneProbesets = lapply(geneProbesetsAll, 
  function(x) { 
    good = grep('[0-9]._at', x)
    if (length(good)>0) x[good] else x
  })
summaries = lapply(geneProbesets, 
  function(i) {
    colMeans(exprs(mRNA)[i,,drop=FALSE])
  })
summarized_mRNA = do.call(rbind, summaries)


###--------------------------------------------------
### Step 17
###--------------------------------------------------
colors = c('orange','olivedrab')
correlation = cor(summarized_mRNA['AURKA',],
  exprs(protein)['AURKA',],
  method='spearman')
matplot(cbind(
              summarized_mRNA['AURKA', ], 
              log2(exprs(protein)['AURKA', ])),
        type='l', col=colors, lwd=2, lty=1, 
        ylab='mRNA and protein expression levels',
        xlab='cell lines',
        main=bquote(rho==.(round(correlation, 3))))
legend('bottomright', c('mRNA','protein'), fill=colors)


###--------------------------------------------------
### Step 18
###--------------------------------------------------
samples = sampleNames(protein)[c(5,11,16,24)]
v_mRNA  = as.vector(summarized_mRNA[,samples]) 
v_protein = as.vector(exprs(protein)[ rownames(summarized_mRNA),
  samples]) 
print(xyplot(
       v_protein ~ v_mRNA | rep(samples, each = nrow(summarized_mRNA)),
       pch=16, xlab = 'mRNA level', scales=list(y=list(log=TRUE)),
       ylab='protein level'))
