seurat.obj.big <- readRDS("my_seurat_object")
### Construct the cds object
#Extract data, phenotype data, and feature data from the SeuratObject
data <- as(as.matrix(seurat.obj.big@assays$RNA@counts), 'sparseMatrix')
pData <- seurat.obj.big@meta.data
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
#Construct monocle cds
monocle_cds <- monocle3::new_cell_data_set(expression_data = data,
cell_metadata = pData,
gene_metadata = fData)
# provide cluster_ids to garnett
pData(monocle_cds)$garnett_cluster <- pData(monocle_cds)$seurat_clusters
>PMCs
CellType: PMCs
references: https://www.cell.com/cell-reports/pdf/S2211-1247(19)30525-X.pdf
>GMCs
CellType: GMCs
>MSCs
CellType: MSCs
>Enterocytes
CellType: Enterocytes
>Enteroendocrine
CellType: Enteroendocrine
>PCs
CellType: PCs
>Cancer Cells
expressed: Cancer Cells
>Goblet Cells
CellType: Goblet Cells
>Neck-like Cells
CellType: Neck-like Cells
>Chief Cells
CellType: Chief Cells
>Endocrine Cells
CellType: Endocrine Cells
>Parietal Cells
CellType: Parietal Cells# train classifier
gastric_classifier <- train_cell_classifier(cds = monocle_cds,
marker_file = garnett.marker.file,
db=org.Hs.eg.db,
cds_gene_id_type = "SYMBOL",
num_unknown = 50,
marker_file_gene_id_type = "SYMBOL")
There are 12 cell type definitions
Error in value[[3L]](cond) : Garnett cannot convert the gene IDs using the db and types provided. Please check that your db, cds_gene_id_type and marker_file_gene_id_type parameters are correct. Please note that the
cds_gene_id_type refers to the type of the row.names of the feature (gene) table in your cds. Conversion error: Error in .testForValidKeys(x, keys, keytype, fks): None of the keys entered are valid keys for 'SYMBOL'. Please use the keys method to see a listing of valid arguments.
> head(fData(monocle_cds))
DataFrame with 6 rows and 1 column
gene_short_name
<factor>
STPG1 STPG1
NIPAL3 NIPAL3
AK2 AK2
KDM1A KDM1A
DNAJC11 DNAJC11
E2F2 E2F2
> head(exprs(monocle_cds)[,1:10])
6 x 10 sparse Matrix of class "dgCMatrix"
[[ suppressing 10 column names ‘CAG1_AAACCTGCACAGGTTT-1’, ‘CAG1_AAACCTGCATCCTTGC-1’, ‘CAG1_AAACGGGGTCTTCGTC-1’ ... ]]
STPG1 . . . . . . . . . .
NIPAL3 . . . . . . . . . .
AK2 1 . . . . . . . . .
KDM1A 1 . . . . . 1 . . .
DNAJC11 . . . . . . . . . .
E2F2 . . . . . . . . . .
> sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.3.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] garnett_0.2.11 monocle3_0.2.1.9 SingleCellExperiment_1.8.0 org.Hs.eg.db_3.10.0 AnnotationDbi_1.48.0 DESeq2_1.26.0 SummarizedExperiment_1.16.1 DelayedArray_0.12.3
[9] BiocParallel_1.20.1 matrixStats_0.56.0 Biobase_2.46.0 GenomicRanges_1.38.0 GenomeInfoDb_1.22.1 IRanges_2.20.2 S4Vectors_0.24.4 BiocGenerics_0.32.0
[17] cowplot_1.0.0 patchwork_1.0.0 forcats_0.5.0 stringr_1.4.0 purrr_0.3.4 readr_1.3.1 tidyr_1.0.2 tibble_3.0.1
[25] tidyverse_1.3.0 umap_0.2.5.0 reticulate_1.15 Rtsne_0.15 gridGraphics_0.5-0 beanplot_1.2 RColorBrewer_1.1-2 scales_1.1.0
[33] viridis_0.5.1 viridisLite_0.3.0 ggridges_0.5.2 ggplot2_3.3.0 svd_0.5 qlcMatrix_0.9.7 sparsesvd_0.2 slam_0.1-47
[41] Matrix_1.2-18 dplyr_0.8.5 Seurat_3.1.5 devtools_2.3.0 usethis_1.6.1 robustbase_0.93-6
loaded via a namespace (and not attached):
[1] tidyselect_1.0.0 RSQLite_2.2.0 htmlwidgets_1.5.1 docopt_0.6.1 munsell_0.5.0 codetools_0.2-16 ica_1.0-2 future_1.17.0 withr_2.2.0 colorspace_1.4-1
[11] knitr_1.28 rstudioapi_0.11 ROCR_1.0-7 listenv_0.8.0 GenomeInfoDbData_1.2.2 bit64_0.9-7 rhdf5_2.30.1 rprojroot_1.3-2 vctrs_0.3.0 generics_0.0.2
[21] lambda.r_1.2.4 xfun_0.13 R6_2.4.1 clue_0.3-57 rsvd_1.0.3 locfit_1.5-9.4 bitops_1.0-6 assertthat_0.2.1 nnet_7.3-14 gtable_0.3.0
[31] npsurv_0.4-0 globals_0.12.5 processx_3.4.2 rlang_0.4.6 genefilter_1.68.0 GlobalOptions_0.1.1 splines_3.6.0 lazyeval_0.2.2 acepack_1.4.1 checkmate_2.0.0
[41] broom_0.5.6 BiocManager_1.30.10 reshape2_1.4.4 modelr_0.1.7 backports_1.1.6 Hmisc_4.4-0 tools_3.6.0 ellipsis_0.3.0 gplots_3.0.3 sessioninfo_1.1.1
[51] Rcpp_1.0.4.6 plyr_1.8.6 base64enc_0.1-3 zlibbioc_1.32.0 RCurl_1.98-1.2 ps_1.3.2 prettyunits_1.1.1 rpart_4.1-15 openssl_1.4.1 pbapply_1.4-2
[61] GetoptLong_0.1.8 zoo_1.8-8 haven_2.2.0 ggrepel_0.8.2 cluster_2.1.0 fs_1.4.1 magrittr_1.5 futile.options_1.0.1 data.table_1.12.8 RSpectra_0.16-0
[71] circlize_0.4.9 reprex_0.3.0 lmtest_0.9-37 RANN_2.6.1 packrat_0.5.0 fitdistrplus_1.0-14 pkgload_1.0.2 xtable_1.8-4 hms_0.5.3 lsei_1.2-0
[81] XML_3.99-0.3 jpeg_0.1-8.1 readxl_1.3.1 gridExtra_2.3 shape_1.4.4 testthat_2.3.2 compiler_3.6.0 KernSmooth_2.23-17 crayon_1.3.4 htmltools_0.4.0
[91] Formula_1.2-3 geneplotter_1.64.0 lubridate_1.7.8 DBI_1.1.0 formatR_1.7 dbplyr_1.4.3 ComplexHeatmap_2.2.0 MASS_7.3-51.6 rappdirs_0.3.1 cli_2.0.2
[101] gdata_2.18.0 igraph_1.2.5 pkgconfig_2.0.3 foreign_0.8-76 plotly_4.9.2.1 xml2_1.3.2 annotate_1.64.0 ArchR_0.9.3 XVector_0.26.0 rvest_0.3.5
[111] callr_3.4.3 digest_0.6.25 sctransform_0.2.1 RcppAnnoy_0.0.16 tsne_0.1-3 cellranger_1.1.0 leiden_0.3.3 htmlTable_1.13.3 uwot_0.1.8 curl_4.3
[121] gtools_3.8.2 rjson_0.2.20 lifecycle_0.2.0 nlme_3.1-147 jsonlite_1.6.1 Rhdf5lib_1.8.0 futile.logger_1.4.3 desc_1.2.0 askpass_1.1 fansi_0.4.1
[131] pillar_1.4.3 lattice_0.20-41 ggrastr_0.1.7 httr_1.4.1 DEoptimR_1.0-8 pkgbuild_1.0.7 survival_3.1-12 glue_1.4.0 remotes_2.1.1 rly_1.6.2
[141] png_0.1-7 bit_1.1-15.2 stringi_1.4.6 blob_1.2.1 latticeExtra_0.6-29 caTools_1.18.0 memoise_1.1.0 irlba_2.3.3 future.apply_1.5.0 ape_5.3