## ---- eval=FALSE-----------------------------------------------------------
#  source("https://bioconductor.org/biocLite.R")
#  biocLite("zinbwave")

## ----options, include=FALSE, echo=FALSE------------------------------------
knitr::opts_chunk$set(warning=FALSE, error=FALSE, message=FALSE)

## ----load_packs------------------------------------------------------------
library(zinbwave)
library(scRNAseq)
library(matrixStats)
library(magrittr)
library(ggplot2)
library(biomaRt)

# Register BiocParallel Serial Execution
BiocParallel::register(BiocParallel::SerialParam())

## ----pollen----------------------------------------------------------------
data("fluidigm")
fluidigm

table(colData(fluidigm)$Coverage_Type)

## ----filter----------------------------------------------------------------
filter <- rowSums(assay(fluidigm)>5)>5
table(filter)

fluidigm <- fluidigm[filter,]

## ----variance--------------------------------------------------------------
assay(fluidigm) %>% log1p %>% rowVars -> vars
names(vars) <- rownames(fluidigm)
vars <- sort(vars, decreasing = TRUE)
head(vars)

fluidigm <- fluidigm[names(vars)[1:100],]

## ----zinb------------------------------------------------------------------
zinb <- zinbFit(fluidigm, K=2, epsilon=1000)

## ----zinb_plot-------------------------------------------------------------
W <- getW(zinb)
colnames(W) <- paste0("W", 1:2)

data.frame(W, bio=colData(fluidigm)$Biological_Condition,
           coverage=colData(fluidigm)$Coverage_Type) %>%
    ggplot(aes(W1, W2, colour=bio, shape=coverage)) + geom_point() + 
    scale_color_brewer(type = "qual", palette = "Set1") + theme_classic()

## ----zinb_coverage---------------------------------------------------------
zinb_cov <- zinbFit(fluidigm, K=2, X="~Coverage_Type", epsilon=1000)

## ----zinb_plot2------------------------------------------------------------
W <- getW(zinb_cov)
colnames(W) <- paste0("W", 1:2)

data.frame(W, bio=colData(fluidigm)$Biological_Condition,
           coverage=colData(fluidigm)$Coverage_Type) %>%
    ggplot(aes(W1, W2, colour=bio, shape=coverage)) + geom_point() + 
    scale_color_brewer(type = "qual", palette = "Set1") + theme_classic()

## ----gcc-------------------------------------------------------------------
mart <- useMart("ensembl")
mart <- useDataset("hsapiens_gene_ensembl", mart = mart)
bm <- getBM(attributes=c('hgnc_symbol', 'start_position',
                         'end_position', 'percentage_gene_gc_content'),
            filters = 'hgnc_symbol',
            values = rownames(fluidigm),
            mart = mart)

bm$length <- bm$end_position - bm$start_position
len <- tapply(bm$length, bm$hgnc_symbol, mean)
len <- len[rownames(fluidigm)]
gcc <- tapply(bm$percentage_gene_gc_content, bm$hgnc_symbol, mean)
gcc <- gcc[rownames(fluidigm)]

## ----rowdata---------------------------------------------------------------
rowData(fluidigm) <- data.frame(gccontent = gcc, length = len)

## ----zinb_gcc--------------------------------------------------------------
zinb_gcc <- zinbFit(fluidigm, K=2, V="~gccontent + log(length)", epsilon=1000)

## ----zinb_plot3------------------------------------------------------------
W <- getW(zinb_gcc)
colnames(W) <- paste0("W", 1:2)

data.frame(W, bio=colData(fluidigm)$Biological_Condition,
           coverage=colData(fluidigm)$Coverage_Type) %>%
    ggplot(aes(W1, W2, colour=bio, shape=coverage)) + geom_point() + 
    scale_color_brewer(type = "qual", palette = "Set1") + theme_classic()

## ----tsne------------------------------------------------------------------
set.seed(93024)

library(Rtsne)
d <- dist(getW(zinb_gcc))
tsne_data <- Rtsne(d, is_distance = TRUE, pca = FALSE, 
                   perplexity=10, max_iter=5000)

data.frame(Dim1=tsne_data$Y[,1], Dim2=tsne_data$Y[,2], 
           bio=colData(fluidigm)$Biological_Condition,
           coverage=colData(fluidigm)$Coverage_Type) %>%
    ggplot(aes(Dim1, Dim2, colour=bio, shape=coverage)) + geom_point() + 
    scale_color_brewer(type = "qual", palette = "Set1") + theme_classic()

## ----zinbwave, eval=FALSE--------------------------------------------------
#  se_norm <- zinbwave(fluidigm, K=2, epsilon=1000, normalizedValues=TRUE,
#                      residuals = TRUE)

## ----zinbwave2-------------------------------------------------------------
se_norm <- zinbwave(fluidigm, fitted_model=zinb, normalizedValues=TRUE,
                    residuals = TRUE)

## ----assays----------------------------------------------------------------
se_norm

## ----dimReduce-------------------------------------------------------------
head(reducedDim(se_norm))

## ---- eval=FALSE-----------------------------------------------------------
#  library(BiocParallel)
#  zinb_res <- zinbFit(fluidigm, K=2, BPPARAM=MulticoreParam(2))

## --------------------------------------------------------------------------
sessionInfo()

