Geometric Analysis
Measure distances, angles, and dihedrals. Superimpose structures and calculate RMSD. Find neighbor atoms and contacts.
Required Imports
from Bio.PDB import PDBParser, NeighborSearch, Superimposer from Bio.PDB import calc_angle, calc_dihedral import numpy as np
Distance Between Atoms
from Bio.PDB import PDBParser
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
chain = structure[0]['A'] atom1 = chain[100]['CA'] atom2 = chain[200]['CA']
Direct subtraction gives distance
distance = atom1 - atom2 print(f'Distance: {distance:.2f} Angstroms')
Or use numpy
import numpy as np distance = np.linalg.norm(atom1.coord - atom2.coord)
Distance Matrix
import numpy as np from Bio.PDB import PDBParser
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
ca_atoms = [r['CA'] for r in structure.get_residues() if r.has_id('CA') and r.id[0] == ' '] n = len(ca_atoms)
dist_matrix = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): dist = ca_atoms[i] - ca_atoms[j] dist_matrix[i, j] = dist dist_matrix[j, i] = dist
print(f'Distance matrix shape: {dist_matrix.shape}')
Angle Between Three Atoms
from Bio.PDB import PDBParser, calc_angle
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
residue = structure[0]['A'][100] n = residue['N'] ca = residue['CA'] c = residue['C']
calc_angle takes Vector objects (atom.coord returns array, use atom.get_vector())
angle_rad = calc_angle(n.get_vector(), ca.get_vector(), c.get_vector()) angle_deg = np.degrees(angle_rad) print(f'N-CA-C angle: {angle_deg:.1f} degrees')
Dihedral Angles
from Bio.PDB import PDBParser, calc_dihedral import numpy as np
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
chain = structure[0]['A']
Calculate phi angle (C-N-CA-C)
res_prev = chain[99] res_curr = chain[100]
phi = calc_dihedral( res_prev['C'].get_vector(), res_curr['N'].get_vector(), res_curr['CA'].get_vector(), res_curr['C'].get_vector() ) print(f'Phi: {np.degrees(phi):.1f} degrees')
Calculate psi angle (N-CA-C-N)
res_next = chain[101] psi = calc_dihedral( res_curr['N'].get_vector(), res_curr['CA'].get_vector(), res_curr['C'].get_vector(), res_next['N'].get_vector() ) print(f'Psi: {np.degrees(psi):.1f} degrees')
Ramachandran Angles for All Residues
from Bio.PDB import PDBParser, PPBuilder, calc_dihedral import numpy as np
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
ppb = PPBuilder() phi_psi = []
for pp in ppb.build_peptides(structure): angles = pp.get_phi_psi_list() for residue, (phi, psi) in zip(pp, angles): if phi is not None and psi is not None: phi_psi.append((residue.resname, np.degrees(phi), np.degrees(psi)))
for name, phi, psi in phi_psi[:10]: print(f'{name}: phi={phi:.1f}, psi={psi:.1f}')
Finding Neighbor Atoms
from Bio.PDB import PDBParser, NeighborSearch
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Build search tree from all atoms
all_atoms = list(structure.get_atoms()) ns = NeighborSearch(all_atoms)
Find atoms within radius of a point
center = structure[0]['A'][100]['CA'].coord radius = 5.0 # Angstroms neighbors = ns.search(center, radius) print(f'Found {len(neighbors)} atoms within {radius}A')
Find atoms within radius of another atom
ca_atom = structure[0]['A'][100]['CA'] neighbors = ns.search(ca_atom.coord, 5.0)
Finding Residue Contacts
from Bio.PDB import PDBParser, NeighborSearch
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
all_atoms = list(structure.get_atoms()) ns = NeighborSearch(all_atoms)
Find all atom pairs within distance
contact_distance = 4.0 contacts = ns.search_all(contact_distance, level='R') # R = residue level
print(f'Found {len(contacts)} residue contacts within {contact_distance}A') for res1, res2 in contacts[:10]: print(f' {res1.resname}{res1.id[1]} - {res2.resname}{res2.id[1]}')
Contact Levels
from Bio.PDB import NeighborSearch
search_all returns pairs at specified level
Level codes: A=atom, R=residue, C=chain, M=model, S=structure
atom_contacts = ns.search_all(4.0, level='A') # Atom pairs residue_contacts = ns.search_all(4.0, level='R') # Residue pairs chain_contacts = ns.search_all(10.0, level='C') # Chain pairs
Superimposing Structures
from Bio.PDB import PDBParser, Superimposer
parser = PDBParser(QUIET=True) ref_structure = parser.get_structure('ref', 'reference.pdb') mobile_structure = parser.get_structure('mobile', 'mobile.pdb')
Get CA atoms from both structures
ref_atoms = [r['CA'] for r in ref_structure.get_residues() if r.has_id('CA') and r.id[0] == ' '] mobile_atoms = [r['CA'] for r in mobile_structure.get_residues() if r.has_id('CA') and r.id[0] == ' ']
Ensure same number of atoms
n = min(len(ref_atoms), len(mobile_atoms)) ref_atoms = ref_atoms[:n] mobile_atoms = mobile_atoms[:n]
Superimpose
sup = Superimposer() sup.set_atoms(ref_atoms, mobile_atoms) print(f'RMSD before: {sup.rms:.2f} Angstroms')
Apply transformation to all atoms in mobile structure
sup.apply(mobile_structure.get_atoms())
Calculating RMSD
from Bio.PDB import PDBParser, Superimposer import numpy as np
parser = PDBParser(QUIET=True) struct1 = parser.get_structure('s1', 'structure1.pdb') struct2 = parser.get_structure('s2', 'structure2.pdb')
atoms1 = [r['CA'] for r in struct1.get_residues() if r.has_id('CA') and r.id[0] == ' '] atoms2 = [r['CA'] for r in struct2.get_residues() if r.has_id('CA') and r.id[0] == ' ']
Using Superimposer (with alignment)
sup = Superimposer() sup.set_atoms(atoms1, atoms2) rmsd_aligned = sup.rms print(f'RMSD (aligned): {rmsd_aligned:.2f} A')
Raw RMSD (no alignment)
coords1 = np.array([a.coord for a in atoms1]) coords2 = np.array([a.coord for a in atoms2]) rmsd_raw = np.sqrt(np.mean(np.sum((coords1 - coords2) ** 2, axis=1))) print(f'RMSD (raw): {rmsd_raw:.2f} A')
CEAligner for Dissimilar Structures
from Bio.PDB import PDBParser, CEAligner
parser = PDBParser(QUIET=True) ref = parser.get_structure('ref', 'reference.pdb') mobile = parser.get_structure('mobile', 'query.pdb')
CE alignment works for structures with different sequences
aligner = CEAligner() aligner.set_reference(ref) aligner.align(mobile)
print(f'RMSD: {aligner.rms:.2f} A')
CEAligner automatically modifies mobile structure coordinates
Center of Mass
from Bio.PDB import PDBParser import numpy as np
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Unweighted center (geometric center)
coords = np.array([a.coord for a in structure.get_atoms()]) center = coords.mean(axis=0) print(f'Geometric center: {center}')
Mass-weighted center (approximate - uses atom count)
For accurate mass-weighted, use element masses
atoms = list(structure.get_atoms()) masses = {'C': 12.0, 'N': 14.0, 'O': 16.0, 'S': 32.0, 'H': 1.0} total_mass = 0 weighted_sum = np.zeros(3) for atom in atoms: mass = masses.get(atom.element, 12.0) weighted_sum += mass * atom.coord total_mass += mass center_of_mass = weighted_sum / total_mass print(f'Center of mass: {center_of_mass}')
Radius of Gyration
import numpy as np from Bio.PDB import PDBParser
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
coords = np.array([a.coord for a in structure.get_atoms()]) center = coords.mean(axis=0)
Radius of gyration
rg = np.sqrt(np.mean(np.sum((coords - center) ** 2, axis=1))) print(f'Radius of gyration: {rg:.2f} A')
Finding Surface Residues
from Bio.PDB import PDBParser, NeighborSearch
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Simple approach: residues with few neighbors
all_atoms = list(structure.get_atoms()) ns = NeighborSearch(all_atoms)
surface_residues = [] for residue in structure.get_residues(): if residue.id[0] != ' ': continue if not residue.has_id('CA'): continue
ca = residue['CA']
neighbors = ns.search(ca.coord, 10.0, level='R')
if len(neighbors) < 15: # Threshold for surface
surface_residues.append(residue)
print(f'Surface residues: {len(surface_residues)}')
Vector Operations
from Bio.PDB import PDBParser from Bio.PDB.vectors import Vector, rotaxis
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Get vectors from atoms
atom1 = structure[0]['A'][100]['CA'] atom2 = structure[0]['A'][101]['CA']
v1 = atom1.get_vector() v2 = atom2.get_vector()
Vector operations
diff = v2 - v1 print(f'Distance vector: {diff}') print(f'Length: {diff.norm():.2f}')
Normalize
unit = diff.normalized()
Cross product
cross = v1 ** v2
Dot product
dot = v1 * v2
Chi Angles (Sidechain Dihedrals)
from Bio.PDB import PDBParser, calc_dihedral import numpy as np
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Chi1 angle for a residue (N-CA-CB-CG)
residue = structure[0]['A'][100]
if residue.has_id('CB') and residue.has_id('CG'): chi1 = calc_dihedral( residue['N'].get_vector(), residue['CA'].get_vector(), residue['CB'].get_vector(), residue['CG'].get_vector() ) print(f'Chi1: {np.degrees(chi1):.1f} degrees')
Solvent Accessible Surface Area (SASA)
from Bio.PDB import PDBParser from Bio.PDB.SASA import ShrakeRupley
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Calculate SASA using Shrake-Rupley algorithm
sr = ShrakeRupley() sr.compute(structure, level='S') # S=structure, M=model, C=chain, R=residue, A=atom
Access total SASA
print(f'Total SASA: {structure.sasa:.2f} A^2')
Access per-residue SASA
for residue in structure.get_residues(): if hasattr(residue, 'sasa'): print(f'{residue.resname}{residue.id[1]}: {residue.sasa:.2f} A^2')
SASA with Custom Parameters
from Bio.PDB import PDBParser from Bio.PDB.SASA import ShrakeRupley
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
Higher precision (more points = slower but more accurate)
sr = ShrakeRupley(probe_radius=1.4, n_points=960) sr.compute(structure, level='R')
Per-atom SASA
for atom in structure.get_atoms(): print(f'{atom.name}: {atom.sasa:.2f} A^2')
Identifying Buried vs Exposed Residues
from Bio.PDB import PDBParser from Bio.PDB.SASA import ShrakeRupley
parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb')
sr = ShrakeRupley() sr.compute(structure, level='R')
buried = [] exposed = [] for residue in structure.get_residues(): if residue.id[0] != ' ': continue if hasattr(residue, 'sasa'): if residue.sasa < 10.0: # Threshold in A^2 buried.append(residue) else: exposed.append(residue)
print(f'Buried: {len(buried)}, Exposed: {len(exposed)}')
Related Skills
-
structure-io - Parse and write structure files
-
structure-navigation - Access chains, residues, atoms
-
structure-modification - Transform coordinates, modify structures
-
alignment - Sequence alignment for structure comparison