Source code for proteinworkshop.tasks.torsional_denoising
"""Implementation of the Torsional Noise Transform."""
import copy
from typing import Union
import torch
from graphein.protein.tensor import pnerf
from graphein.protein.tensor.angles import angle_to_unit_circle, dihedrals
from graphein.protein.tensor.data import Protein
from torch_geometric import transforms as T
from torch_geometric.data import Data
[docs]
class TorsionalNoiseTransform(T.BaseTransform):
"""
Adds noise to the torsional angles of a protein.
Cartesian coordinates are re-computed from the noisy dihedral angles
using the pNeRF algorithm.
The true dihedral angles are stored as an attribute on the protein object:
``batch.true_dihedrals``.
.. warning::
This will subset the data to only include the backbone atoms
(N, Ca, C). The backbone oxygen can be placed with:
``graphein.protein.tensor.reconstruction.place_fourth_coord``.
This will break, for example, sidechain torsion angle computation for
the first few chi angles that are partially defined by backbone atoms.
"""
def __init__(
self,
corruption_strategy: str = "gaussian",
corruption_rate: float = 0.1,
):
"""Adds noise to the torsional angles of a protein.
:param corruption_strategy: Type of noise distribution to use (gaussian
or uniform), defaults to ``"gaussian"``.
:type corruption_strategy: str, optional
:param corruption_rate: Amount to scale noise by, defaults to 0.1
:type corruption_rate: float, optional
"""
self.corruption_strategy = corruption_strategy
self.corruption_rate = corruption_rate
def __call__(self, batch: Union[Data, Protein]) -> Union[Data, Protein]:
# Compute uncorrupted dihedrals and store
dihedral_angles = dihedrals(
batch.coords, None, rad=True, embed=False
) # N_nodes x 3 (phi, psi, omega)
batch.true_dihedrals = angle_to_unit_circle(
copy.deepcopy(dihedral_angles)
) # N_nodes x 6
# Compute noise (sample lengths)
# Given a geodesic length s, the angle theta (in radians) can be
# calculated as theta = s / r, where r is the radius of the circle.
# Since the circle is a unit circle, r = 1, so theta = s.
if self.corruption_strategy == "gaussian":
noise = (
torch.randn((dihedral_angles.shape[0], 3))
* self.corruption_rate
)
elif self.corruption_strategy == "uniform":
# Uniform noise in the range [-noise_std, noise_std]
noise = (
(torch.rand((dihedral_angles.shape[0], 3)) - 0.5)
* 2
* self.corruption_rate
)
else:
raise ValueError(
f"Unknown noise scheme: {self.corruption_strategy}. Must be 'gaussian' or 'uniform'"
)
# Store Cos/Sin transformed noised
batch.torsional_noise = angle_to_unit_circle(noise) # N_nodes x 6
# The noised angle nu is the sum of the initial angle and the angle
# corresponding to the geodesic length,
# i.e., nu = phi/psi/omega + theta.
noised_angles = dihedral_angles + noise # N_nodes x 3
# Embed noised angles onto the unit circle.
# I.e. theta = [cos(theta), sin(theta)]
noised_angles = angle_to_unit_circle(noised_angles) # N_nodes x 6
# Recompute coordinates from noisy dihedral angles using pNeRF
# Uses uncorrupted bond lengths and angles
noised_coords = pnerf.reconstruct_dihedrals(
noised_angles, batch.coords
)
batch.coords = noised_coords.reshape(-1, 3, 3)
return batch
def __repr__(self) -> str:
return f"{self.__class__.__name__}(Corruption Strategy: {self.corruption_strategy}, Corruption Rate: {self.corruption_rate})"
if __name__ == "__main__":
import hydra
import omegaconf
from graphein.protein.tensor.data import get_random_protein
config = omegaconf.OmegaConf.load(
"../proteinworkshop/config/transforms/torsional_denoising.yaml"
)
a = get_random_protein()
b = copy.deepcopy(a)
t = hydra.utils.instantiate(config.torsional_denoising)
print(t)
out = t(a)
def rmsd(a, b):
return torch.sqrt(torch.mean((a - b) ** 2))
print(rmsd(out.coords[:, 1, :], b.coords[:, 1, :]))
out.plot_structure(["N", "CA", "C"])