Expression to Pathways Workflow

Convert differential expression results into biological insights through functional enrichment analysis.

Workflow Overview

Input Preparation

From DESeq2 Results

library(DESeq2) library(clusterProfiler) library(org.Hs.eg.db)

Load DE results

res <- read.csv('deseq2_results.csv', row.names = 1)

Significant genes for ORA

sig_genes <- rownames(subset(res, padj < 0.05 & abs(log2FoldChange) > 1))

All genes for background

all_genes <- rownames(res)

Ranked list for GSEA (by stat or log2FC)

ranked_genes <- res$log2FoldChange names(ranked_genes) <- rownames(res) ranked_genes <- sort(ranked_genes, decreasing = TRUE) ranked_genes <- ranked_genes[!is.na(ranked_genes)]

Gene ID Conversion

Convert gene symbols to Entrez IDs

sig_entrez <- bitr(sig_genes, fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db)

For ranked list

ranked_entrez <- bitr(names(ranked_genes), fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db) ranked_list <- ranked_genes[ranked_entrez$SYMBOL] names(ranked_list) <- ranked_entrez$ENTREZID

Step 1: GO Over-representation Analysis

Biological Process

go_bp <- enrichGO(gene = sig_entrez$ENTREZID, OrgDb = org.Hs.eg.db, ont = 'BP', pAdjustMethod = 'BH', pvalueCutoff = 0.05, qvalueCutoff = 0.1, readable = TRUE)

Molecular Function

go_mf <- enrichGO(gene = sig_entrez$ENTREZID, OrgDb = org.Hs.eg.db, ont = 'MF', pAdjustMethod = 'BH', pvalueCutoff = 0.05, readable = TRUE)

Cellular Component

go_cc <- enrichGO(gene = sig_entrez$ENTREZID, OrgDb = org.Hs.eg.db, ont = 'CC', pAdjustMethod = 'BH', pvalueCutoff = 0.05, readable = TRUE)

Simplify redundant terms

go_bp_simple <- simplify(go_bp, cutoff = 0.7, by = 'p.adjust')

Step 2: KEGG Pathway Enrichment

kegg <- enrichKEGG(gene = sig_entrez$ENTREZID, organism = 'hsa', pvalueCutoff = 0.05, qvalueCutoff = 0.1)

Convert KEGG IDs to readable names

kegg <- setReadable(kegg, OrgDb = org.Hs.eg.db, keyType = 'ENTREZID')

Step 3: Reactome Pathway Enrichment

library(ReactomePA)

reactome <- enrichPathway(gene = sig_entrez$ENTREZID, organism = 'human', pvalueCutoff = 0.05, readable = TRUE)

Step 4: Gene Set Enrichment Analysis (GSEA)

GO GSEA

gsea_go <- gseGO(geneList = ranked_list, OrgDb = org.Hs.eg.db, ont = 'BP', minGSSize = 10, maxGSSize = 500, pvalueCutoff = 0.05, verbose = FALSE)

KEGG GSEA

gsea_kegg <- gseKEGG(geneList = ranked_list, organism = 'hsa', minGSSize = 10, maxGSSize = 500, pvalueCutoff = 0.05, verbose = FALSE)

Step 5: Visualization

library(enrichplot) library(ggplot2)

Dot plot

dotplot(go_bp_simple, showCategory = 20) + ggtitle('GO Biological Process Enrichment') ggsave('go_bp_dotplot.pdf', width = 10, height = 8)

Bar plot

barplot(kegg, showCategory = 15) + ggtitle('KEGG Pathway Enrichment') ggsave('kegg_barplot.pdf', width = 9, height = 6)

Enrichment map (network of related terms)

go_bp_simple <- pairwise_termsim(go_bp_simple) emapplot(go_bp_simple, showCategory = 30) + ggtitle('GO Term Similarity Network') ggsave('go_network.pdf', width = 10, height = 10)

Concept network (gene-term connections)

cnetplot(go_bp, showCategory = 5, categorySize = 'pvalue') + ggtitle('Gene-Concept Network') ggsave('cnet_plot.pdf', width = 12, height = 10)

GSEA plot for specific pathway

gseaplot2(gsea_kegg, geneSetID = 1:3, pvalue_table = TRUE) ggsave('gsea_plot.pdf', width = 10, height = 8)

Ridge plot for GSEA

ridgeplot(gsea_go, showCategory = 15) ggsave('gsea_ridge.pdf', width = 8, height = 10)

Step 6: Export Results

Export enrichment results

write.csv(as.data.frame(go_bp), 'go_bp_enrichment.csv', row.names = FALSE) write.csv(as.data.frame(kegg), 'kegg_enrichment.csv', row.names = FALSE) write.csv(as.data.frame(reactome), 'reactome_enrichment.csv', row.names = FALSE) write.csv(as.data.frame(gsea_go), 'gsea_go_results.csv', row.names = FALSE)

Combine key results

combined <- rbind( data.frame(Database = 'GO_BP', as.data.frame(go_bp_simple)[1:10,]), data.frame(Database = 'KEGG', as.data.frame(kegg)[1:10,]), data.frame(Database = 'Reactome', as.data.frame(reactome)[1:10,]) ) write.csv(combined, 'top_enriched_pathways.csv', row.names = FALSE)

Parameter Recommendations

Analysis Parameter Value

enrichGO pvalueCutoff 0.05

enrichGO qvalueCutoff 0.1

simplify cutoff 0.7

gseGO minGSSize 10

gseGO maxGSSize 500

GSEA perm 1000 (default)

Troubleshooting

Issue Likely Cause Solution

No enriched terms Too few genes, wrong IDs Check gene IDs, relax thresholds

All terms significant Too many genes Be more stringent with DE cutoffs

Gene ID conversion fails Wrong organism, format Check OrgDb package, gene format

GSEA no results Poor ranking, small gene sets Check ranked list, adjust minGSSize

Complete Workflow Script

library(clusterProfiler) library(org.Hs.eg.db) library(ReactomePA) library(enrichplot) library(ggplot2)

Configuration

de_file <- 'deseq2_results.csv' output_dir <- 'pathway_analysis' dir.create(output_dir, showWarnings = FALSE)

Load and prepare data

res <- read.csv(de_file, row.names = 1) sig_genes <- rownames(subset(res, padj < 0.05 & abs(log2FoldChange) > 1)) cat('Significant genes:', length(sig_genes), '\n')

Convert IDs

sig_entrez <- bitr(sig_genes, fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db) cat('Converted to Entrez:', nrow(sig_entrez), '\n')

Ranked list for GSEA

ranked <- res$log2FoldChange names(ranked) <- rownames(res) ranked <- sort(ranked[!is.na(ranked)], decreasing = TRUE) ranked_entrez <- bitr(names(ranked), fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db) ranked_list <- ranked[ranked_entrez$SYMBOL] names(ranked_list) <- ranked_entrez$ENTREZID

GO enrichment

go_bp <- enrichGO(sig_entrez$ENTREZID, OrgDb = org.Hs.eg.db, ont = 'BP', readable = TRUE) go_bp_simple <- simplify(go_bp, cutoff = 0.7)

KEGG

kegg <- enrichKEGG(sig_entrez$ENTREZID, organism = 'hsa') kegg <- setReadable(kegg, OrgDb = org.Hs.eg.db, keyType = 'ENTREZID')

Reactome

reactome <- enrichPathway(sig_entrez$ENTREZID, organism = 'human', readable = TRUE)

GSEA

gsea_go <- gseGO(ranked_list, OrgDb = org.Hs.eg.db, ont = 'BP', verbose = FALSE)

Plots

pdf(file.path(output_dir, 'enrichment_plots.pdf'), width = 10, height = 8) print(dotplot(go_bp_simple, showCategory = 20) + ggtitle('GO Biological Process')) print(barplot(kegg, showCategory = 15) + ggtitle('KEGG Pathways')) if (nrow(as.data.frame(reactome)) > 0) { print(dotplot(reactome, showCategory = 15) + ggtitle('Reactome Pathways')) } dev.off()

Export

write.csv(as.data.frame(go_bp_simple), file.path(output_dir, 'go_bp.csv'), row.names = FALSE) write.csv(as.data.frame(kegg), file.path(output_dir, 'kegg.csv'), row.names = FALSE) write.csv(as.data.frame(reactome), file.path(output_dir, 'reactome.csv'), row.names = FALSE)

cat('\nResults saved to:', output_dir, '\n') cat('GO BP terms:', nrow(as.data.frame(go_bp_simple)), '\n') cat('KEGG pathways:', nrow(as.data.frame(kegg)), '\n') cat('Reactome pathways:', nrow(as.data.frame(reactome)), '\n')

Related Skills

pathway-analysis/go-enrichment - GO enrichment details
pathway-analysis/kegg-pathways - KEGG analysis
pathway-analysis/reactome-pathways - Reactome analysis
pathway-analysis/gsea - GSEA methods
pathway-analysis/enrichment-visualization - Visualization options

bio-workflows-expression-to-pathways

Safety Notice

Copy this and send it to your AI assistant to learn