bio-structural-biology-alphafold-predictions

AlphaFold Predictions

Download and analyze AlphaFold predicted protein structures from the AlphaFold Protein Structure Database.

Download Structures

Single Structure by UniProt ID

import requests

def download_alphafold(uniprot_id, output_dir='.'): '''Download AlphaFold structure for UniProt accession''' base_url = 'https://alphafold.ebi.ac.uk/files' pdb_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.pdb' cif_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.cif'

response = requests.get(pdb_url)
if response.status_code == 200:
    output_path = f'{output_dir}/AF-{uniprot_id}-F1-model_v4.pdb'
    with open(output_path, 'w') as f:
        f.write(response.text)
    return output_path
return None

pdb_file = download_alphafold('P04637') # Human p53

Check Availability

def check_alphafold_exists(uniprot_id): '''Check if AlphaFold prediction exists''' url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}' response = requests.get(url) return response.status_code == 200

if check_alphafold_exists('P04637'): print('AlphaFold structure available')

Get Metadata

def get_alphafold_info(uniprot_id): '''Get AlphaFold prediction metadata''' url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}' response = requests.get(url) if response.status_code == 200: return response.json()[0] return None

info = get_alphafold_info('P04637') print(f"Gene: {info['gene']}") print(f"Organism: {info['organismScientificName']}") print(f"Model version: {info['latestVersion']}")

File Types Available

Database version v4 (current as of 2025). The version number refers to the database release, not the AlphaFold model version.

File URL Pattern Description

PDB AF-{id}-F1-model_v4.pdb

Structure coordinates

mmCIF AF-{id}-F1-model_v4.cif

Structure with metadata

PAE JSON AF-{id}-F1-predicted_aligned_error_v4.json

Predicted aligned error

def download_pae(uniprot_id, output_dir='.'): '''Download PAE (predicted aligned error) matrix''' url = f'https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-predicted_aligned_error_v4.json' response = requests.get(url) if response.status_code == 200: output_path = f'{output_dir}/AF-{uniprot_id}-F1-pae.json' with open(output_path, 'w') as f: f.write(response.text) return output_path return None

Analyze pLDDT Confidence Scores

Extract from PDB B-factors

AlphaFold stores pLDDT scores in the B-factor column.

from Bio.PDB import PDBParser

def extract_plddt(pdb_file): '''Extract pLDDT confidence scores from AlphaFold PDB''' parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', pdb_file)

residue_plddt = {}
for model in structure:
    for chain in model:
        for residue in chain:
            if residue.id[0] == ' ':  # Standard residue
                ca = residue['CA'] if 'CA' in residue else list(residue.get_atoms())[0]
                residue_plddt[residue.id[1]] = ca.get_bfactor()
return residue_plddt

plddt = extract_plddt('AF-P04637-F1-model_v4.pdb') avg_plddt = sum(plddt.values()) / len(plddt) print(f'Average pLDDT: {avg_plddt:.1f}')

Confidence Interpretation

pLDDT Confidence Interpretation

90 Very high High accuracy, can be used as experimental

70-90 Confident Good backbone, may have sidechain errors

50-70 Low Caution, may be disordered

<50 Very low Likely disordered or wrong

Plot pLDDT per Residue

import matplotlib.pyplot as plt

def plot_plddt(plddt_dict, output='plddt_plot.png'): residues = sorted(plddt_dict.keys()) scores = [plddt_dict[r] for r in residues]

plt.figure(figsize=(12, 4))
plt.fill_between(residues, scores, alpha=0.3)
plt.plot(residues, scores)
plt.axhline(y=70, color='orange', linestyle='--', label='Confident threshold')
plt.axhline(y=90, color='green', linestyle='--', label='Very high threshold')
plt.xlabel('Residue')
plt.ylabel('pLDDT')
plt.ylim(0, 100)
plt.legend()
plt.savefig(output)
plt.close()

plot_plddt(plddt)

Analyze PAE (Predicted Aligned Error)

import json import numpy as np import matplotlib.pyplot as plt

def load_pae(pae_file): '''Load PAE matrix from JSON''' with open(pae_file) as f: data = json.load(f)

# AlphaFold v4 format
if 'predicted_aligned_error' in data[0]:
    return np.array(data[0]['predicted_aligned_error'])
# Older format
return np.array(data['predicted_aligned_error'])

def plot_pae(pae_matrix, output='pae_plot.png'): plt.figure(figsize=(8, 8)) plt.imshow(pae_matrix, cmap='Greens_r', vmin=0, vmax=30) plt.colorbar(label='Expected position error (A)') plt.xlabel('Scored residue') plt.ylabel('Aligned residue') plt.title('Predicted Aligned Error') plt.savefig(output) plt.close()

pae = load_pae('AF-P04637-F1-pae.json') plot_pae(pae)

PAE Interpretation

Low PAE (green): Residues have well-defined relative positions
High PAE (white): Uncertain relative positions (flexible linkers, domains)
Diagonal blocks: Distinct structural domains

Batch Download

def batch_download_alphafold(uniprot_ids, output_dir='.'): '''Download multiple AlphaFold structures''' import os os.makedirs(output_dir, exist_ok=True)

results = {}
for uid in uniprot_ids:
    pdb_file = download_alphafold(uid, output_dir)
    results[uid] = pdb_file
    if pdb_file:
        print(f'Downloaded: {uid}')
    else:
        print(f'Not found: {uid}')
return results

ids = ['P04637', 'P53_HUMAN', 'Q9Y6K9'] files = batch_download_alphafold(ids, 'alphafold_structures')

Compare with Experimental Structure

from Bio.PDB import PDBParser, Superimposer

def compare_structures(alphafold_pdb, experimental_pdb): '''Calculate RMSD between AlphaFold and experimental structure''' parser = PDBParser(QUIET=True) af_struct = parser.get_structure('af', alphafold_pdb) exp_struct = parser.get_structure('exp', experimental_pdb)

# Get CA atoms from first chain
af_atoms = [r['CA'] for r in af_struct[0].get_residues() if 'CA' in r]
exp_atoms = [r['CA'] for r in exp_struct[0].get_residues() if 'CA' in r]

# Align by length (simple approach)
min_len = min(len(af_atoms), len(exp_atoms))
af_atoms = af_atoms[:min_len]
exp_atoms = exp_atoms[:min_len]

super_imposer = Superimposer()
super_imposer.set_atoms(exp_atoms, af_atoms)
rmsd = super_imposer.rms
return rmsd

Related Skills

structural-biology/structure-io - Load and parse PDB/mmCIF files
structural-biology/geometric-analysis - RMSD, superimposition
database-access/uniprot-access - Get UniProt IDs for proteins
structural-biology/structure-navigation - Navigate structure hierarchy

bio-structural-biology-alphafold-predictions

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

bioskills

bio-data-visualization-genome-tracks

bio-epitranscriptomics-merip-preprocessing

bio-data-visualization-multipanel-figures