
谢大飞
V1
2023/05/03阅读:36主题:默认主题
GO富集分析和KEGG分析_ClusterProfiler
GO 富集分析
GO富集分析用到的包是ClusterProfiler,因为研究的物种不是特异性的物种,所以需要利用基因组的注释信息自己构建OrgDB
构建OrgDB的流程是参照基因课的15天入门生物信息学

安装加载需要的R包
BiocManager::install('clusterProfiler')
library(clusterProfiler)
BiocManager::install("tidyverse")
library(tidyverse)

需要的数据
需要的数据主要是基因组的GO注释信息和差异表达基因

导入基因组的GO注释信息表
emapper <- read_delim(
file = 'pecan_geneModels3_annot2_GO_noNovel.txt',"\t",
escape_double = FALSE, col_names = FALSE,
comment = "#", trim_ws = TRUE
)%>%
dplyr::select(GID = X1,
Gene_Name = X5,
GO = X8)
emapper <- emapper[-1,] #去除掉第一行的备注信息

提取基因信息
gene_info <- dplyr::select(emapper, GID, Gene_Name) %>%
dplyr::filter(!is.na(Gene_Name))
gene_info <- gene_info[-1,] #去掉第一行的名称

提取GO信息
gene2go <- dplyr::select(emapper, GID, GO) %>%
separate_rows(GO,sep = ';', convert = F) %>%
filter(!is.na(GO)) %>%
mutate(EVIDENCE = "IEA")
gene2go <-gene2go[-1,] #去掉第一行的备注信息

去除掉里面有的重复值
library(dplyr)
gene_info <- dplyr::distinct(gene_info)
gene2go <- dplyr::distinct(gene2go)
gene2go <- gene2go[!(gene2go$GO=='-'),] #取出不是-的所有数据
构建OrgDB
# 构建OrgDB
AnnotationForge::makeOrgPackage(gene_info=gene_info,
go=gene2go,
maintainer = 'Xieyf<3327492855@qq.com>',
author = 'xieyf',
outputDir = './org.My.db', #指定存放的文件夹的位置
tax_id = 0000,
genus = 'M',
species = 'y',
goTable = "go",
version = '1.0'
)
## 将生成的orgDB打包
pkgbuild::build('GO_annotation/org.My.eg.db',dest_path = 'GO_annotation/')


导入构建的R包
创建一个文件夹,将R包装在我们当前文件夹下面
dir.create('R_library',recursive = T)
install.packages('org.My.eg.db_1.0.tar.gz',
repos = NULL, #从本地安装
lib = 'R_library' #安装到指定的文件夹下
)
setwd('GO_annotation/org.My.eg.db')
library(org.My.eg.db,lib = "R_library")


GO富集分析
导入差异基因的数据,然后进行GO富集分析
### 导入FMD差异基因的数据
de_result_FMD1 <- read.table(
file = 'FMD.lrt_St1.DElist.txt',
header = TRUE
)
gene_FMD1 <- filter(de_result_FMD1,
abs(logFC)>1 & FDR < 0.05) %>%
pull(ID)
### 基因差异向量
#### 有名字的向量,用来记录差异大小,名字是基因的ID,值是logFC降序排列
geneList <- de_result$logFC #取出logFC值
names(geneList) <- de_result$ID #使用names函数给值命名
geneList <- sort(geneList,decreasing = T) #按降序排列
### 富集分析
de_ego_FMD1 <- enrichGO(gene=gene_FMD1,
OrgDb = org.My.eg.db,
keyType = 'GID',
ont = 'ALL',
qvalueCutoff = 0.05,
pvalueCutoff = 0.05)
de_ego_FMD1@result <- arrange(de_ego_FMD1@result,de_ego_FMD1@result$pvalue) #按照类别升序排列,这样就可以确保位置在一起
de_ego_FMD1_df <- as.data.frame(de_ego_FMD1) #将结果转换成可读的表格形式

图像可视化
将结果可视化的时候,可以选择不同的展示方式,我选择的是
ggplot(data = de_ego_FMD1_df[1:30,], mapping = aes(x=Description, y= -log10(pvalue),size = Count,colour = pvalue))+
geom_point(shape = 20,)+
theme(axis.text.x = element_text(angle = 90,vjust = 1,hjust = 1))+
coord_flip() +
theme_bw()+
scale_colour_gradient(low="red",high="blue")+
theme(axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 16),
plot.title = element_text(size = 12))+ theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "FD1vsMD1")
write.csv(de_ego_FMD1_df,'de_ego_FMD1.csv',quote = F)

以圈图的模式展示
pdf('FMD.pdf',height =8 ,width =10 )
cnetplot(de_ego,
foldChange = geneList,
showCategory = 10,
node_label = "all",
circular = TRUE,
colorEdge = TRUE)
dev.off()
柱状图
barplot(de_ego,showCategory = 50)
点图
enrichplot::dotplot(de_ego,showCategory = 20)
KEGG
KEGG分析的话是师妹教的简便快捷好方法,只需用到KEGG的注释信息和差异分析结果即可
(手动感谢我的乖巧可爱活泼的师妹们以及内敛且靠谱的师弟,呜呜呜太爱我们课题组的人了)
## 加载需要的R包
library(clusterProfiler)
##导入差异基因数据
FMD1_ID <- read.table("FMD1_id.txt")
##导入KEGG信息
peacn_geneModel3_annot2_KEGG_noNovel <- read.csv("peacn_geneModel3_annot2_KEGG_noNovel.csv")
#KEGG分析
kegg_FMD1 <- compareCluster(FMD1_ID,fun = "enricher",
TERM2GENE=peacn_geneModel3_annot2_KEGG_noNovel[c(4,2)],
TERM2NAME=peacn_geneModel3_annot2_KEGG_noNovel[4:5])
kegg_FMD1@compareClusterResult <- arrange(kegg_FMD1@compareClusterResult,kegg_FMD1@compareClusterResult$pvalue)
#图片绘制
ggplot(data = kegg_FMD1[1:20,], mapping = aes(x=Description, y= -log10(pvalue),size = Count,colour = pvalue))+
geom_point(shape = 20,)+
theme(axis.text.x = element_text(angle = 90,vjust = 1,hjust = 1))+
coord_flip() +
theme_bw()+
scale_colour_gradient(low="red",high="blue")+
theme(axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 12))+ theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "FMD1")

作者介绍

谢大飞
V1