## ----include=FALSE, eval=TRUE--------------------------------------------
library(Biobase)
library(knitr)
library(reshape2)
library(ggplot2)

## ----init_monocle, include=FALSE, cache=FALSE, eval=TRUE-----------------
library(monocle)
data(HSMM)

## ----load_data_tables, eval=FALSE----------------------------------------
#  #not run
#  fpkm_matrix <- read.table("fpkm_matrix.txt")
#  sample_sheet <- read.delim("cell_sample_sheet.txt")
#  gene_ann <- read.delim("gene_annotations.txt")
#  pd <- new("AnnotatedDataFrame", data = sample_sheet)
#  fd <- new("AnnotatedDataFrame", data = gene_ann)
#  HSMM <- newCellDataSet(as.matrix(fpkm_matrix), phenoData = pd, featureData = fd)

## ----detect_genes, eval=TRUE---------------------------------------------
HSMM <- detectGenes(HSMM, min_expr = 0.1)
print(head(fData(HSMM)))
expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 50))

## ----show_pData, eval = TRUE---------------------------------------------
print(head(pData(HSMM)))

## ----select_cells, eval = FALSE------------------------------------------
#  valid_cells <- row.names(subset(pData(HSMM), Cells.in.Well == 1 & Control == FALSE & Clump == FALSE & Debris == FALSE & Mapped.Fragments > 1000000))
#  HSMM <- HSMM[,valid_cells]

## ----lognormal_plot, eval=TRUE, fig.width = 3, fig.height = 2, fig.align="center"----
# Log-transform each value in the expression matrix.
L <- log(exprs(HSMM[expressed_genes,]))

# Standardize each gene, so that they are all on the same scale,
# Then melt the data with plyr so we can plot it easily"
melted_dens_df <- melt(t(scale(t(L))))

# Plot the distribution of the standardized gene expression values.
qplot(value, geom="density", data=melted_dens_df) +  stat_function(fun = dnorm, size=0.5, color='red') + 
xlab("Standardized log(FPKM)") +
ylab("Density")

## ----select_genes, eval=TRUE---------------------------------------------
marker_genes <- row.names(subset(fData(HSMM), 
                                 gene_short_name %in% c("MEF2C", "MEF2D", "MYF5", "ANPEP", "PDGFRA",
                                                        "MYOG", "TPM1", "TPM2", "MYH2", "MYH3", "NCAM1", "TNNT1", "TNNT2", "TNNC1",
                                                        "CDK1", "CDK2", "CCNB1", "CCNB2", "CCND1", "CCNA1", "ID1")))

## ----basic_diff, eval=TRUE-----------------------------------------------
diff_test_res <- differentialGeneTest(HSMM[marker_genes,], 
                                      fullModelFormulaStr="expression~Media")

# Select genes that are significant at an FDR < 10%
sig_genes <- subset(diff_test_res, qval < 0.1)

# Attach the HUGO symbols and other featureData for these genes
sig_genes <- merge(fData(HSMM), sig_genes, by="row.names")
sig_genes[,c("gene_short_name", "pval", "qval")]

## ----plot_myog_jitter, eval=TRUE, fig.width = 4, fig.height = 2, fig.align="center"----
MYOG_ID1 <- HSMM[row.names(subset(fData(HSMM), 
                                 gene_short_name %in% c("MYOG", "ID1"))),]
plot_genes_jitter(MYOG_ID1, grouping="Media", ncol=2)

## ----ordering_not_run, eval=FALSE----------------------------------------
#  #not run
#  diff_test_res <- differentialGeneTest(HSMM[expressed_genes,], fullModelFormulaStr="expression~Media")
#  ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))

## ----select_ordering_cells, eval=TRUE------------------------------------
ordering_genes <- row.names (subset(diff_test_res, qval < 0.1))

#Only use genes are detectably expressed in a sufficient number of cells
ordering_genes <- intersect(ordering_genes, expressed_genes)

## ----set_ordering_filter, eval=TRUE--------------------------------------
HSMM <- setOrderingFilter(HSMM, ordering_genes)

## ----reduce_dimension, eval=TRUE-----------------------------------------
HSMM <- reduceDimension(HSMM, use_irlba=FALSE)

## ----order_cells, eval=TRUE----------------------------------------------
HSMM <- orderCells(HSMM, num_paths=2, reverse=TRUE)

## ----plot_ordering_mst, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
plot_spanning_tree(HSMM)

## ----plot_markers, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
HSMM_filtered <- HSMM[expressed_genes, pData(HSMM)$State != 3]

my_genes <- row.names(subset(fData(HSMM_filtered), 
                             gene_short_name %in% c("CDK1", "MEF2C", "MYH3"))) 

cds_subset <- HSMM_filtered[my_genes,]
plot_genes_in_pseudotime(cds_subset, color_by="Hours")

## ----setup_test_genes, eval=TRUE-----------------------------------------
to_be_tested <- row.names(subset(fData(HSMM), 
                             gene_short_name %in% c("TBP", "MYH3", "NCAM1", "PDGFRA", "ANPEP"))) 
cds_subset <- HSMM[to_be_tested, pData(HSMM)$State != 1]

## ----all_in_one_test, eval = TRUE----------------------------------------
diff_test_res <- differentialGeneTest(cds_subset, fullModelFormulaStr="expression~State")
diff_test_res <- merge(fData(HSMM), diff_test_res, by="row.names")
diff_test_res[,c("gene_short_name", "pval", "qval")]

## ----jitter_plot_diff_res, eval=TRUE, fig.width = 8, fig.height = 2.5, fig.align="center"----
plot_genes_jitter(cds_subset, color_by="Media", nrow=1, ncol=NULL, plot_trend=TRUE)

## ----piecewise_test, eval=FALSE------------------------------------------
#  full_model_fits <- fitModel(cds_subset,  modelFormulaStr="expression~State")
#  reduced_model_fits <- fitModel(cds_subset, modelFormulaStr="expression~1")
#  diff_test_res <- compareModels(full_model_fits, reduced_model_fits)
#  diff_test_res

## ----setup_test_genes_pt, eval=TRUE--------------------------------------
to_be_tested <- row.names(subset(fData(HSMM), 
                             gene_short_name %in% c("MYH3", "MEF2C", "CCNB2", "TNNT1"))) 
cds_subset <- HSMM[to_be_tested, pData(HSMM)$State != 3]

## ----piecewise_test_pt, eval=TRUE----------------------------------------
diff_test_res <- differentialGeneTest(cds_subset,  fullModelFormulaStr="expression~sm.ns(Pseudotime)")

## ----all_in_one_test_pt, eval=TRUE---------------------------------------
diff_test_res <- merge(fData(HSMM), diff_test_res, by="row.names")
diff_test_res[,c("gene_short_name", "pval", "qval")]

## ----plot_diff_res_pt, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
plot_genes_in_pseudotime(cds_subset, color_by="Hours")

## ----plot_diff_res_multi, eval=TRUE, fig.width = 8, fig.height = 4, fig.align="center"----
to_be_tested <- row.names(subset(fData(HSMM), 
                             gene_short_name %in% c("MT2A", "REXO2", "HDAC4"))) 
cds_subset <- HSMM[to_be_tested, pData(HSMM)$Media == "DM" & pData(HSMM)$State != 1]

diff_test_res <- differentialGeneTest(cds_subset,  
                                      fullModelFormulaStr="expression~State * Hours", 
                                      reducedModelFormulaStr="expression~Hours")
diff_test_res <- merge(fData(cds_subset), diff_test_res, by="row.names")
diff_test_res[,c("gene_short_name", "pval", "qval")]
plot_genes_jitter(cds_subset, grouping="Hours", color_by="State", plot_trend=TRUE) + facet_wrap( ~ feature_label, scales="free_y")

## ----fit_full_model_for_cluster, eval=TRUE-------------------------------
sampled_gene_cds <- HSMM_filtered[sample(nrow(fData(HSMM_filtered)), 100),]
full_model_fits <- fitModel(sampled_gene_cds,  modelFormulaStr="expression~sm.ns(Pseudotime, df=3)")

## ----extract_response_curve, eval=TRUE-----------------------------------
expression_curve_matrix <- responseMatrix(full_model_fits)
dim(expression_curve_matrix)

## ----cluster_and_plot, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
clusters <- clusterGenes(expression_curve_matrix, k=4)
plot_clusters(HSMM_filtered[ordering_genes,], clusters)

## ----citation, eval=TRUE-------------------------------------------------
citation("monocle")

## ----sessi---------------------------------------------------------------
sessionInfo()

