The goal of ‘readgmt’ is to easily import a GMT file from MSigDB as a list of gene sets.
See the full documentation here: jhrcook.github.io/readgmt
This package is not available on CRAN (it is much too simple for that), so it must be installed from GitHub with
devtools::install_github("jhrcook/readgmt")
Here is an example of reading in the KEGG gene set (downloaded on 2019/04/01).
library(readgmt)
kegg_path <- system.file("extdata", "c2.cp.kegg.v6.2.symbols.gmt.txt",
package = "readgmt")
kegg <- read_gmt(kegg_path)
head(names(kegg))
#> [1] "KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
#> [2] "KEGG_CITRATE_CYCLE_TCA_CYCLE"
#> [3] "KEGG_PENTOSE_PHOSPHATE_PATHWAY"
#> [4] "KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS"
#> [5] "KEGG_FRUCTOSE_AND_MANNOSE_METABOLISM"
#> [6] "KEGG_GALACTOSE_METABOLISM"
head(kegg$KEGG_COLORECTAL_CANCER)
#> [1] "JUN" "CASP9" "RAF1" "BIRC5" "APC2" "RHOA"
This can also be read in as a “tidy” tibble, or converted after the reading it in as a list.
# read in as a tibble
read_gmt(kegg_path, tidy = TRUE)
#> # A tibble: 12,875 x 2
#> gene_set gene
#> <chr> <chr>
#> 1 KEGG_GLYCOLYSIS_GLUCONEOGENESIS ACSS2
#> 2 KEGG_GLYCOLYSIS_GLUCONEOGENESIS GCK
#> 3 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PGK2
#> 4 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PGK1
#> 5 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PDHB
#> 6 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PDHA1
#> 7 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PDHA2
#> 8 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PGM2
#> 9 KEGG_GLYCOLYSIS_GLUCONEOGENESIS TPI1
#> 10 KEGG_GLYCOLYSIS_GLUCONEOGENESIS ACSS1
#> # … with 12,865 more rows
# convert from list format to tibble
tidy_gmt(kegg)
#> # A tibble: 12,875 x 2
#> gene_set gene
#> <chr> <chr>
#> 1 KEGG_GLYCOLYSIS_GLUCONEOGENESIS ACSS2
#> 2 KEGG_GLYCOLYSIS_GLUCONEOGENESIS GCK
#> 3 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PGK2
#> 4 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PGK1
#> 5 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PDHB
#> 6 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PDHA1
#> 7 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PDHA2
#> 8 KEGG_GLYCOLYSIS_GLUCONEOGENESIS PGM2
#> 9 KEGG_GLYCOLYSIS_GLUCONEOGENESIS TPI1
#> 10 KEGG_GLYCOLYSIS_GLUCONEOGENESIS ACSS1
#> # … with 12,865 more rows