AlphaFold Pipeline Simplificado
Implementação básica para análise de estruturas proteicas usando AlphaFold. Este código mostra como acessar, analisar e visualizar predições estruturais.
import requests
import numpy as np
from Bio.PDB import PDBParser, DSSP
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
class AlphaFoldAnalyzer:
def __init__(self):
self.base_url = "https://alphafold.ebi.ac.uk/files/"
self.parser = PDBParser(QUIET=True)
def download_structure(self, uniprot_id):
"""Download AlphaFold structure for given UniProt ID"""
url = f"{self.base_url}AF-{uniprot_id}-F1-model_v4.pdb"
response = requests.get(url)
if response.status_code == 200:
with open(f"AF-{uniprot_id}.pdb", "wb") as f:
f.write(response.content)
return f"AF-{uniprot_id}.pdb"
else:
raise Exception(f"Structure not found for {uniprot_id}")
def parse_confidence_scores(self, pdb_file):
"""Extract pLDDT confidence scores from B-factor column"""
structure = self.parser.get_structure("protein", pdb_file)
confidences = []
residue_numbers = []
for model in structure:
for chain in model:
for residue in chain:
if residue.has_id("CA"): # Alpha carbon
ca_atom = residue["CA"]
confidences.append(ca_atom.bfactor)
residue_numbers.append(residue.id[1])
return np.array(residue_numbers), np.array(confidences)
def calculate_contact_map(self, pdb_file, cutoff=8.0):
"""Calculate residue contact map"""
structure = self.parser.get_structure("protein", pdb_file)
ca_coords = []
for model in structure:
for chain in model:
for residue in chain:
if residue.has_id("CA"):
ca_coords.append(residue["CA"].coord)
ca_coords = np.array(ca_coords)
# Calculate distance matrix
distances = squareform(pdist(ca_coords))
# Create contact map
contact_map = distances < cutoff
return distances, contact_map
def analyze_secondary_structure(self, pdb_file):
"""Analyze secondary structure using DSSP"""
try:
structure = self.parser.get_structure("protein", pdb_file)
model = structure[0]
# Run DSSP analysis
dssp = DSSP(model, pdb_file)
ss_sequence = ""
ss_counts = {"H": 0, "E": 0, "C": 0} # Helix, Sheet, Coil
for residue in dssp:
ss = residue[2]
if ss in "HGI":
ss_sequence += "H"
ss_counts["H"] += 1
elif ss in "BE":
ss_sequence += "E"
ss_counts["E"] += 1
else:
ss_sequence += "C"
ss_counts["C"] += 1
return ss_sequence, ss_counts
except Exception as e:
print(f"DSSP analysis failed: {e}")
return None, None
def plot_confidence_profile(self, residue_numbers, confidences):
"""Plot confidence score profile"""
plt.figure(figsize=(12, 6))
# Color by confidence level
colors = ['red' if c < 50 else 'orange' if c < 70 else
'yellow' if c < 90 else 'blue' for c in confidences]
plt.scatter(residue_numbers, confidences, c=colors, alpha=0.7)
plt.xlabel('Residue Number')
plt.ylabel('pLDDT Confidence Score')
plt.title('AlphaFold Confidence Profile')
# Add confidence thresholds
plt.axhline(y=90, color='blue', linestyle='--', alpha=0.5, label='Very High (>90)')
plt.axhline(y=70, color='yellow', linestyle='--', alpha=0.5, label='Confident (70-90)')
plt.axhline(y=50, color='orange', linestyle='--', alpha=0.5, label='Low (50-70)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
return plt.gcf()
# Exemplo de uso
def main():
analyzer = AlphaFoldAnalyzer()
# Analisar proteína específica
uniprot_id = "P04637" # p53 tumor suppressor
try:
# Download structure
pdb_file = analyzer.download_structure(uniprot_id)
print(f"Downloaded: {pdb_file}")
# Analyze confidence
residues, confidences = analyzer.parse_confidence_scores(pdb_file)
print(f"Average confidence: {np.mean(confidences):.1f}")
# Calculate contacts
distances, contact_map = analyzer.calculate_contact_map(pdb_file)
print(f"Contact map shape: {contact_map.shape}")
# Secondary structure
ss_seq, ss_counts = analyzer.analyze_secondary_structure(pdb_file)
if ss_counts:
print(f"Secondary structure: {ss_counts}")
# Plot confidence profile
analyzer.plot_confidence_profile(residues, confidences)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()