if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
# orthogene is only available on Bioconductor>=3.14
if(BiocManager::version()<"3.14")
BiocManager::install(update = TRUE, ask = FALSE)
BiocManager::install("orthogene")
library(orthogene)
data("exp_mouse")
# Setting to "homologene" for the purposes of quick demonstration.
# We generally recommend using method="gprofiler" (default).
method <- "homologene"
It’s not always clear whether a dataset is using the original species gene names, human gene names, or some other species’ gene names.
infer_species
takes a list/matrix/data.frame with genes and
infers the species that they best match to!
For the sake of speed, the genes extracted from gene_df
are tested against genomes from only the following 6 test_species
by default:
- human
- monkey
- rat
- mouse
- zebrafish
- fly
However, you can supply your own list of test_species
, which will
be automatically be mapped and standardised using map_species
.
matches <- orthogene::infer_species(gene_df = exp_mouse,
method = method)
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 15,259 genes extracted.
## Testing for gene overlap with: human
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: human
## Common name mapping found for human
## 1 organism identified from search: 9606
## Gene table with 19,129 rows retrieved.
## Returning all 19,129 genes from human.
## Testing for gene overlap with: monkey
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: monkey
## Common name mapping found for monkey
## 1 organism identified from search: 9544
## Gene table with 16,843 rows retrieved.
## Returning all 16,843 genes from monkey.
## Testing for gene overlap with: rat
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: rat
## Common name mapping found for rat
## 1 organism identified from search: 10116
## Gene table with 20,616 rows retrieved.
## Returning all 20,616 genes from rat.
## Testing for gene overlap with: mouse
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Gene table with 21,207 rows retrieved.
## Returning all 21,207 genes from mouse.
## Testing for gene overlap with: zebrafish
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: zebrafish
## Common name mapping found for zebrafish
## 1 organism identified from search: 7955
## Gene table with 20,897 rows retrieved.
## Returning all 20,897 genes from zebrafish.
## Testing for gene overlap with: fly
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: fly
## Common name mapping found for fly
## 1 organism identified from search: 7227
## Gene table with 8,438 rows retrieved.
## Returning all 8,438 genes from fly.
## Top match:
## - species: mouse
## - percent_match: 92%
To create an example dataset, turn the gene names into rat genes.
exp_rat <- orthogene::convert_orthologs(gene_df = exp_mouse,
input_species = "mouse",
output_species = "rat",
method = method)
matches <- orthogene::infer_species(gene_df = exp_rat,
method = method)
To create an example dataset, turn the gene names into human genes.
exp_human <- orthogene::convert_orthologs(gene_df = exp_mouse,
input_species = "mouse",
output_species = "human",
method = method)
matches <- orthogene::infer_species(gene_df = exp_human,
method = method)
test_species
You can even supply test_species
with the name of one of the R packages that
orthogene
gets orthologs from. This will test against all species available
in that particular R package.
For example, by setting test_species="homologene"
we automatically test for
% gene matches in each of the 20+ species available in homologene
.
matches <- orthogene::infer_species(gene_df = exp_human,
test_species = method,
method = method)
utils::sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Mojave 10.14.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
locale:
[1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] orthogene_1.2.1 BiocStyle_2.24.0
loaded via a namespace (and not attached):
[1] Rcpp_1.0.9 ape_5.6-2
[3] lattice_0.20-45 tidyr_1.2.1
[5] assertthat_0.2.1 digest_0.6.29
[7] utf8_1.2.2 R6_2.5.1
[9] backports_1.4.1 evaluate_0.16
[11] highr_0.9 httr_1.4.4
[13] ggplot2_3.3.6 pillar_1.8.1
[15] ggfun_0.0.7 yulab.utils_0.0.5
[17] rlang_1.0.6 lazyeval_0.2.2
[19] data.table_1.14.2 car_3.1-0
[21] jquerylib_0.1.4 magick_2.7.3
[23] Matrix_1.5-1 rmarkdown_2.16
[25] labeling_0.4.2 stringr_1.4.1
[27] htmlwidgets_1.5.4 munsell_0.5.0
[29] broom_1.0.1 gprofiler2_0.2.1
[31] compiler_4.2.1 xfun_0.33
[33] pkgconfig_2.0.3 gridGraphics_0.5-1
[35] htmltools_0.5.3 tidyselect_1.1.2
[37] tibble_3.1.8 bookdown_0.29
[39] viridisLite_0.4.1 fansi_1.0.3
[41] dplyr_1.0.10 ggpubr_0.4.0
[43] grid_4.2.1 nlme_3.1-159
[45] jsonlite_1.8.2 gtable_0.3.1
[47] lifecycle_1.0.2 DBI_1.1.3
[49] magrittr_2.0.3 scales_1.2.1
[51] tidytree_0.4.1 cli_3.4.1
[53] stringi_1.7.8 cachem_1.0.6
[55] carData_3.0-5 farver_2.1.1
[57] ggsignif_0.6.3 ggtree_3.4.4
[59] bslib_0.4.0 ellipsis_0.3.2
[61] generics_0.1.3 vctrs_0.4.2
[63] treeio_1.20.2 tools_4.2.1
[65] homologene_1.4.68.19.3.27 ggplotify_0.1.0
[67] glue_1.6.2 purrr_0.3.4
[69] abind_1.4-5 parallel_4.2.1
[71] fastmap_1.1.0 yaml_2.3.5
[73] babelgene_22.9 colorspace_2.0-3
[75] BiocManager_1.30.18 rstatix_0.7.0
[77] aplot_0.1.7 plotly_4.10.0
[79] knitr_1.40 patchwork_1.1.2
[81] sass_0.4.2