Heatmaps and Clustering

pheatmap (R) - Quick Heatmaps

library(pheatmap) library(RColorBrewer)

Basic heatmap with clustering

pheatmap(mat, scale = 'row', cluster_rows = TRUE, cluster_cols = TRUE)

With annotations

annotation_col <- data.frame( Condition = metadata$condition, Batch = metadata$batch, row.names = colnames(mat) )

annotation_row <- data.frame( Pathway = gene_info$pathway, row.names = rownames(mat) )

pheatmap(mat, scale = 'row', annotation_col = annotation_col, annotation_row = annotation_row, color = colorRampPalette(rev(brewer.pal(9, 'RdBu')))(100), show_rownames = FALSE, fontsize = 8)

pheatmap Customization

Custom annotation colors

ann_colors <- list( Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'), Batch = c(A = '#00A087', B = '#3C5488', C = '#F39B7F'), Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2') )

pheatmap(mat, scale = 'row', annotation_col = annotation_col, annotation_colors = ann_colors, clustering_distance_rows = 'correlation', clustering_distance_cols = 'euclidean', clustering_method = 'ward.D2', cutree_rows = 4, cutree_cols = 2, gaps_col = c(5, 10), border_color = NA, main = 'Gene Expression Heatmap')

ComplexHeatmap (R) - Advanced

library(ComplexHeatmap) library(circlize)

Color function

col_fun <- colorRamp2(c(-2, 0, 2), c('blue', 'white', 'red'))

Basic heatmap

Heatmap(mat, name = 'Z-score', col = col_fun, cluster_rows = TRUE, cluster_columns = TRUE, show_row_names = FALSE, show_column_names = TRUE)

ComplexHeatmap with Annotations

Column annotation

ha_col <- HeatmapAnnotation( Condition = metadata$condition, Batch = metadata$batch, Age = anno_barplot(metadata$age), col = list( Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'), Batch = c(A = '#00A087', B = '#3C5488') ) )

Row annotation

ha_row <- rowAnnotation( Pathway = gene_info$pathway, LogFC = anno_barplot(gene_info$log2FC, baseline = 0, gp = gpar(fill = ifelse(gene_info$log2FC > 0, 'red', 'blue'))), col = list(Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2')) )

Heatmap(mat, name = 'Z-score', col = col_fun, top_annotation = ha_col, left_annotation = ha_row, row_split = gene_info$pathway, column_split = metadata$condition)

Multiple Heatmaps

Combine heatmaps horizontally

ht1 <- Heatmap(mat1, name = 'Expression', col = col_fun) ht2 <- Heatmap(mat2, name = 'Methylation', col = colorRamp2(c(0, 0.5, 1), c('blue', 'white', 'red')))

ht_list <- ht1 + ht2 draw(ht_list, row_title = 'Genes', column_title = 'Samples')

seaborn (Python)

import seaborn as sns import matplotlib.pyplot as plt import pandas as pd

Basic clustermap

g = sns.clustermap(df, cmap='RdBu_r', center=0, figsize=(10, 12), row_cluster=True, col_cluster=True, standard_scale=0) # 0 = rows, 1 = columns plt.savefig('heatmap.png', dpi=150, bbox_inches='tight')

seaborn with Annotations

Create color mappings

condition_colors = {'Control': '#4DBBD5', 'Treatment': '#E64B35'} batch_colors = {'A': '#00A087', 'B': '#3C5488', 'C': '#F39B7F'}

col_colors = pd.DataFrame({ 'Condition': metadata['condition'].map(condition_colors), 'Batch': metadata['batch'].map(batch_colors) })

row_colors = gene_info['pathway'].map({'Metabolism': '#8491B4', 'Signaling': '#91D1C2'})

g = sns.clustermap(df, cmap='RdBu_r', center=0, row_colors=row_colors, col_colors=col_colors, figsize=(12, 14), dendrogram_ratio=0.15, cbar_pos=(0.02, 0.8, 0.03, 0.15))

g.ax_heatmap.set_xlabel('Samples') g.ax_heatmap.set_ylabel('Genes')

Clustering Methods

Distance metrics

'euclidean', 'correlation', 'manhattan', 'maximum', 'canberra', 'binary'

Linkage methods

'complete', 'single', 'average', 'ward.D', 'ward.D2', 'mcquitty', 'median', 'centroid'

pheatmap(mat, clustering_distance_rows = 'correlation', clustering_distance_cols = 'euclidean', clustering_method = 'ward.D2')

Extract Cluster Assignments

pheatmap

p <- pheatmap(mat, scale = 'row', cutree_rows = 4, silent = TRUE) row_clusters <- cutree(p$tree_row, k = 4)

ComplexHeatmap

ht <- Heatmap(mat, row_split = 4) ht <- draw(ht) row_order <- row_order(ht)

seaborn

g = sns.clustermap(df, cmap='RdBu_r') row_linkage = g.dendrogram_row.linkage from scipy.cluster.hierarchy import fcluster clusters = fcluster(row_linkage, t=4, criterion='maxclust')

Save Heatmaps

pheatmap to file

pheatmap(mat, filename = 'heatmap.pdf', width = 8, height = 10)

ComplexHeatmap to file

pdf('heatmap.pdf', width = 8, height = 10) draw(ht) dev.off()

Related Skills

data-visualization/ggplot2-fundamentals - General plotting
data-visualization/color-palettes - Color selection
differential-expression/de-visualization - Expression heatmaps

bio-data-visualization-heatmaps-clustering

Safety Notice

Copy this and send it to your AI assistant to learn