bio-data-visualization-heatmaps-clustering

Heatmaps and Clustering

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "bio-data-visualization-heatmaps-clustering" with this command: npx skills add gptomics/bioskills/gptomics-bioskills-bio-data-visualization-heatmaps-clustering

Heatmaps and Clustering

pheatmap (R) - Quick Heatmaps

library(pheatmap) library(RColorBrewer)

Basic heatmap with clustering

pheatmap(mat, scale = 'row', cluster_rows = TRUE, cluster_cols = TRUE)

With annotations

annotation_col <- data.frame( Condition = metadata$condition, Batch = metadata$batch, row.names = colnames(mat) )

annotation_row <- data.frame( Pathway = gene_info$pathway, row.names = rownames(mat) )

pheatmap(mat, scale = 'row', annotation_col = annotation_col, annotation_row = annotation_row, color = colorRampPalette(rev(brewer.pal(9, 'RdBu')))(100), show_rownames = FALSE, fontsize = 8)

pheatmap Customization

Custom annotation colors

ann_colors <- list( Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'), Batch = c(A = '#00A087', B = '#3C5488', C = '#F39B7F'), Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2') )

pheatmap(mat, scale = 'row', annotation_col = annotation_col, annotation_colors = ann_colors, clustering_distance_rows = 'correlation', clustering_distance_cols = 'euclidean', clustering_method = 'ward.D2', cutree_rows = 4, cutree_cols = 2, gaps_col = c(5, 10), border_color = NA, main = 'Gene Expression Heatmap')

ComplexHeatmap (R) - Advanced

library(ComplexHeatmap) library(circlize)

Color function

col_fun <- colorRamp2(c(-2, 0, 2), c('blue', 'white', 'red'))

Basic heatmap

Heatmap(mat, name = 'Z-score', col = col_fun, cluster_rows = TRUE, cluster_columns = TRUE, show_row_names = FALSE, show_column_names = TRUE)

ComplexHeatmap with Annotations

Column annotation

ha_col <- HeatmapAnnotation( Condition = metadata$condition, Batch = metadata$batch, Age = anno_barplot(metadata$age), col = list( Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'), Batch = c(A = '#00A087', B = '#3C5488') ) )

Row annotation

ha_row <- rowAnnotation( Pathway = gene_info$pathway, LogFC = anno_barplot(gene_info$log2FC, baseline = 0, gp = gpar(fill = ifelse(gene_info$log2FC > 0, 'red', 'blue'))), col = list(Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2')) )

Heatmap(mat, name = 'Z-score', col = col_fun, top_annotation = ha_col, left_annotation = ha_row, row_split = gene_info$pathway, column_split = metadata$condition)

Multiple Heatmaps

Combine heatmaps horizontally

ht1 <- Heatmap(mat1, name = 'Expression', col = col_fun) ht2 <- Heatmap(mat2, name = 'Methylation', col = colorRamp2(c(0, 0.5, 1), c('blue', 'white', 'red')))

ht_list <- ht1 + ht2 draw(ht_list, row_title = 'Genes', column_title = 'Samples')

seaborn (Python)

import seaborn as sns import matplotlib.pyplot as plt import pandas as pd

Basic clustermap

g = sns.clustermap(df, cmap='RdBu_r', center=0, figsize=(10, 12), row_cluster=True, col_cluster=True, standard_scale=0) # 0 = rows, 1 = columns plt.savefig('heatmap.png', dpi=150, bbox_inches='tight')

seaborn with Annotations

Create color mappings

condition_colors = {'Control': '#4DBBD5', 'Treatment': '#E64B35'} batch_colors = {'A': '#00A087', 'B': '#3C5488', 'C': '#F39B7F'}

col_colors = pd.DataFrame({ 'Condition': metadata['condition'].map(condition_colors), 'Batch': metadata['batch'].map(batch_colors) })

row_colors = gene_info['pathway'].map({'Metabolism': '#8491B4', 'Signaling': '#91D1C2'})

g = sns.clustermap(df, cmap='RdBu_r', center=0, row_colors=row_colors, col_colors=col_colors, figsize=(12, 14), dendrogram_ratio=0.15, cbar_pos=(0.02, 0.8, 0.03, 0.15))

g.ax_heatmap.set_xlabel('Samples') g.ax_heatmap.set_ylabel('Genes')

Clustering Methods

Distance metrics

'euclidean', 'correlation', 'manhattan', 'maximum', 'canberra', 'binary'

Linkage methods

'complete', 'single', 'average', 'ward.D', 'ward.D2', 'mcquitty', 'median', 'centroid'

pheatmap(mat, clustering_distance_rows = 'correlation', clustering_distance_cols = 'euclidean', clustering_method = 'ward.D2')

Extract Cluster Assignments

pheatmap

p <- pheatmap(mat, scale = 'row', cutree_rows = 4, silent = TRUE) row_clusters <- cutree(p$tree_row, k = 4)

ComplexHeatmap

ht <- Heatmap(mat, row_split = 4) ht <- draw(ht) row_order <- row_order(ht)

seaborn

g = sns.clustermap(df, cmap='RdBu_r') row_linkage = g.dendrogram_row.linkage from scipy.cluster.hierarchy import fcluster clusters = fcluster(row_linkage, t=4, criterion='maxclust')

Save Heatmaps

pheatmap to file

pheatmap(mat, filename = 'heatmap.pdf', width = 8, height = 10)

ComplexHeatmap to file

pdf('heatmap.pdf', width = 8, height = 10) draw(ht) dev.off()

Related Skills

  • data-visualization/ggplot2-fundamentals - General plotting

  • data-visualization/color-palettes - Color selection

  • differential-expression/de-visualization - Expression heatmaps

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

bioskills

No summary provided by upstream source.

Repository SourceNeeds Review
General

bio-data-visualization-genome-tracks

No summary provided by upstream source.

Repository SourceNeeds Review
General

bio-epitranscriptomics-merip-preprocessing

No summary provided by upstream source.

Repository SourceNeeds Review
General

bio-data-visualization-multipanel-figures

No summary provided by upstream source.

Repository SourceNeeds Review