GO/KEGG富集分析enrichment analysis

楼主  收藏   举报   帖子创建时间:  2018-01-26 00:00 回复:0 关注量:161

本教程使用GOstats来对给定的基因symbols做富集分析。


> library("org.Hs.eg.db")
> library("GSEABase")
> library("GOstats")
> genes <- c("AREG", "FKBP5", "CXCL13", "KLF9", "ZC3H12A", "P4HA1", "TLE1", "CREB3L2", "TXNIP", "PBX1", "GJA1", "ITGB8", "CCL3", "CCND2", "KCNJ15", "CFLAR", "CXCL10", "CYSLTR1", "IGFBP7", "RHOB", "MAP3K5", "CAV2", "CAPN2", "AKAP13", "RND3", "IL6ST", "RGS1", "IRF4", "G3BP1", "SEL1L", "VEGFA", "SMAD1", "CCND1", "CLEC3B", "NEB", "AMD1", "PDCD4", "SCD", "TM2D3", "BACH2", "LDLR", "BMPR1B", "RFXAP", "ASPH", "PTK2B", "SLC1A5", "ENO2", "TRPM8", "SATB1", "MIER1", "SRSF1", "ATF3", "CCL5", "MCM6", "GCH1", "CAV1", "SLC20A1")
> ##GO BP enrichment analysis
> goAnn <- get("org.Hs.egGO")
> universe <- Lkeys(goAnn)
> head(universe)
[1] "1" "10" "100" "1000" "10000" "100008586"
> entrezIDs <- mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA)
> head(entrezIDs)
$AREG
[1] "374"

$FKBP5
[1] "2289"

$CXCL13
[1] "10563"

$KLF9
[1] "687"

$ZC3H12A
[1] "80149"

$P4HA1
[1] "5033"

> entrezIDs <- as.character(entrezIDs)
> head(entrezIDs)
[1] "374" "2289" "10563" "687" "80149" "5033"
> params <- new("GOHyperGParams",
+ geneIds=entrezIDs,
+ universeGeneIds=universe,
+ annotation="org.Hs.eg.db",
+ ontology="BP",
+ pvalueCutoff=0.01,
+ conditional=FALSE,
+ testDirection="over")
> over <- hyperGTest(params)
> library(Category)
> glist <- geneIdsByCategory(over)
> glist <- sapply(glist, function(.ids) {
+ .sym <- mget(.ids, envir=org.Hs.egSYMBOL, ifnotfound=NA)
+ .sym[is.na(.sym)] <- .ids[is.na(.sym)]
+ paste(.sym, collapse=";")
+ })
> head(glist)
GO:0002690
"PTK2B;CXCL10;CCL3;CCL5;VEGFA;CXCL13"
GO:0002688
"PTK2B;CXCL10;CCL3;CCL5;VEGFA;CXCL13"
GO:0048518
"RHOB;ASPH;ATF3;CCND1;BMPR1B;CAV1;CAV2;CCND2;PTK2B;GJA1;IL6ST;CXCL10;IRF4;LDLR;SMAD1;MAP3K5;PBX1;RFXAP;CCL3;CCL5;SLC20A1;TLE1;CLEC3B;VEGFA;CFLAR;CXCL13;TXNIP;CYSLTR1;AKAP13;MIER1;CREB3L2;ZC3H12A"
GO:1901623
"PTK2B;CCL3;CCL5;CXCL13"
GO:0002687
"PTK2B;CXCL10;CCL3;CCL5;VEGFA;CXCL13"
GO:2000403
"PTK2B;CCL3;CCL5;CXCL13"
> bp <- summary(over)
> bp$Symbols <- glist[as.character(bp$GOBPID)]
> head(bp)
GOBPID Pvalue OddsRatio ExpCount Count Size Term
1 GO:0002690 3.490023e-08 39.24490 0.19267044 6 52 positive regulation of leukocyte chemotaxis
2 GO:0002688 9.278865e-08 32.80297 0.22601725 6 61 regulation of leukocyte chemotaxis
3 GO:0048518 1.170957e-07 4.28014 13.56103476 32 3660 positive regulation of biological process
4 GO:1901623 1.176400e-07 128.80174 0.04816761 4 13 regulation of lymphocyte chemotaxis
5 GO:0002687 1.496978e-07 30.05918 0.24454325 6 66 positive regulation of leukocyte migration
6 GO:2000403 2.969857e-07 96.58170 0.05928321 4 16 positive regulation of lymphocyte migration
Symbols
1 PTK2B;CXCL10;CCL3;CCL5;VEGFA;CXCL13
2 PTK2B;CXCL10;CCL3;CCL5;VEGFA;CXCL13
3 RHOB;ASPH;ATF3;CCND1;BMPR1B;CAV1;CAV2;CCND2;PTK2B;GJA1;IL6ST;CXCL10;IRF4;LDLR;SMAD1;MAP3K5;PBX1;RFXAP;CCL3;CCL5;SLC20A1;TLE1;CLEC3B;VEGFA;CFLAR;CXCL13;TXNIP;CYSLTR1;AKAP13;MIER1;CREB3L2;ZC3H12A
4 PTK2B;CCL3;CCL5;CXCL13
5 PTK2B;CXCL10;CCL3;CCL5;VEGFA;CXCL13
6 PTK2B;CCL3;CCL5;CXCL13
> dim(bp)
[1] 525 8
> ##KEGG enrichment analysis
> keggAnn <- get("org.Hs.egPATH")
> universe <- Lkeys(keggAnn)
> params <- new("KEGGHyperGParams",
+ geneIds=entrezIDs,
+ universeGeneIds=universe,
+ annotation="org.Hs.eg.db",
+ categoryName="KEGG",
+ pvalueCutoff=0.01,
+ testDirection="over")
> over <- hyperGTest(params)
> kegg <- summary(over)
> library(Category)
> glist <- geneIdsByCategory(over)
> glist <- sapply(glist, function(.ids) {
+ .sym <- mget(.ids, envir=org.Hs.egSYMBOL, ifnotfound=NA)
+ .sym[is.na(.sym)] <- .ids[is.na(.sym)]
+ paste(.sym, collapse=";")
+ })
> kegg$Symbols <- glist[as.character(kegg$KEGGID)]
> kegg
KEGGID Pvalue OddsRatio ExpCount Count Size Term Symbols
1 04510 9.643801e-05 7.873256 1.124361 7 200 Focal adhesion CCND1;CAPN2;CAV1;CAV2;CCND2;ITGB8;VEGFA
2 04060 5.487123e-04 5.821855 1.489779 7 265 Cytokine-cytokine receptor interaction BMPR1B;IL6ST;CXCL10;CCL3;CCL5;VEGFA;CXCL13
3 04062 3.739699e-03 5.486219 1.062521 5 189 Chemokine signaling pathway PTK2B;CXCL10;CCL3;CCL5;CXCL13
> library("pathview")
> gIds <- mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA)
> gEns <- unlist(gIds)
> gene.data <- rep(1, length(gEns))
> names(gene.data) <- gEns
> for(i in 1:3){pv.out <- pathview(gene.data, pathway.id=as.character(kegg$KEGGID)[i], species="hsa", out.suffix="pathview", kegg.native=T)}

原文来自:http://pgfe.umassmed.edu/ou/archives/3665