Skip to content

Atom featurizers

AromaticityFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom based on its aromaticity.

Source code in bionemo/geometric/atom_featurizers.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
class AromaticityFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom based on its aromaticity."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 1

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of representing if atoms are aromatic as integers.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([int(mol.GetAtomWithIdx(a).GetIsAromatic()) for a in _atom_indices], dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor of representing if atoms are aromatic as integers.

Source code in bionemo/geometric/atom_featurizers.py
197
198
199
200
201
202
203
204
205
206
207
208
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of representing if atoms are aromatic as integers.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([int(mol.GetAtomWithIdx(a).GetIsAromatic()) for a in _atom_indices], dtype=torch.int)

AtomicNumberFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its atomic number.

Source code in bionemo/geometric/atom_featurizers.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class AtomicNumberFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its atomic number."""

    def __init__(self, dim_atomic_num: Optional[int] = None) -> None:
        """Initializes AtomicNumberFeaturizer class."""
        DIM_ATOMIC_NUM = 118
        self.dim_atomic_num = dim_atomic_num if dim_atomic_num else DIM_ATOMIC_NUM

    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_atomic_num

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing atomic numbers of atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetAtomicNum() for a in _atom_indices], dtype=torch.int)

__init__(dim_atomic_num=None)

Initializes AtomicNumberFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
44
45
46
47
def __init__(self, dim_atomic_num: Optional[int] = None) -> None:
    """Initializes AtomicNumberFeaturizer class."""
    DIM_ATOMIC_NUM = 118
    self.dim_atomic_num = dim_atomic_num if dim_atomic_num else DIM_ATOMIC_NUM

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor of integers representing atomic numbers of atoms.

Source code in bionemo/geometric/atom_featurizers.py
53
54
55
56
57
58
59
60
61
62
63
64
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing atomic numbers of atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetAtomicNum() for a in _atom_indices], dtype=torch.int)

n_dim()

Returns dimensionality of the computed features.

Source code in bionemo/geometric/atom_featurizers.py
49
50
51
def n_dim(self) -> int:
    """Returns dimensionality of the computed features."""
    return self.dim_atomic_num

AtomicRadiusFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its bond, covalent, and vdW radii.

Source code in bionemo/geometric/atom_featurizers.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
class AtomicRadiusFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its bond, covalent, and vdW radii."""

    def __init__(self) -> None:
        """Initializes AtomicRadiusFeaturizer class."""
        self.pt = Chem.GetPeriodicTable()
        self._min_val = torch.Tensor(
            [
                0.0,  # Bond radius
                0.28,  # Covalent radius
                1.2,  # van der Waals radius
            ]
        )

        self._max_val = torch.Tensor(
            [
                2.4,  # Bond radius
                2.6,  # Covalent radius
                3.0,  # van der Waals radius
            ]
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 3

    @property
    def min_val(self) -> torch.tensor:
        """Returns minimum values for features: bond, covalent, and vdW radius."""
        return self._min_val

    @property
    def max_val(self) -> torch.tensor:
        """Returns maximum values for features: bond, covalent, and vdW radius."""
        return self._max_val

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
        """Computes bond radius, covalent radius, and van der Waals radius without normalization.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.Tensor of different atomic radii. Each atom is featurizer by bond radius, covalent radius, and van der Waals radius.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

        feats = []
        for aidx in _atom_indices:
            atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
            feats.append([self.pt.GetRb0(atomic_num), self.pt.GetRcovalent(atomic_num), self.pt.GetRvdw(atomic_num)])

        return torch.Tensor(feats)

max_val property

Returns maximum values for features: bond, covalent, and vdW radius.

min_val property

Returns minimum values for features: bond, covalent, and vdW radius.

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes AtomicRadiusFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def __init__(self) -> None:
    """Initializes AtomicRadiusFeaturizer class."""
    self.pt = Chem.GetPeriodicTable()
    self._min_val = torch.Tensor(
        [
            0.0,  # Bond radius
            0.28,  # Covalent radius
            1.2,  # van der Waals radius
        ]
    )

    self._max_val = torch.Tensor(
        [
            2.4,  # Bond radius
            2.6,  # Covalent radius
            3.0,  # van der Waals radius
        ]
    )

get_atom_features(mol, atom_indices=None)

Computes bond radius, covalent radius, and van der Waals radius without normalization.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
Tensor

A torch.Tensor of different atomic radii. Each atom is featurizer by bond radius, covalent radius, and van der Waals radius.

Source code in bionemo/geometric/atom_featurizers.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
    """Computes bond radius, covalent radius, and van der Waals radius without normalization.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.Tensor of different atomic radii. Each atom is featurizer by bond radius, covalent radius, and van der Waals radius.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

    feats = []
    for aidx in _atom_indices:
        atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
        feats.append([self.pt.GetRb0(atomic_num), self.pt.GetRcovalent(atomic_num), self.pt.GetRvdw(atomic_num)])

    return torch.Tensor(feats)

ChiralTypeFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its chirality type.

Source code in bionemo/geometric/atom_featurizers.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class ChiralTypeFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its chirality type."""

    def __init__(self) -> None:
        """Initializes ChiralTypeFeaturizer class."""
        self.dim_chiral_types = len(ChiralType.values)

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_chiral_types

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor representing chirality type of atoms as integers.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([int(mol.GetAtomWithIdx(a).GetChiralTag()) for a in _atom_indices], dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes ChiralTypeFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
114
115
116
def __init__(self) -> None:
    """Initializes ChiralTypeFeaturizer class."""
    self.dim_chiral_types = len(ChiralType.values)

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor representing chirality type of atoms as integers.

Source code in bionemo/geometric/atom_featurizers.py
123
124
125
126
127
128
129
130
131
132
133
134
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor representing chirality type of atoms as integers.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([int(mol.GetAtomWithIdx(a).GetChiralTag()) for a in _atom_indices], dtype=torch.int)

CrippenFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by Crippen logP and molar refractivity.

Source code in bionemo/geometric/atom_featurizers.py
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
class CrippenFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by Crippen logP and molar refractivity."""

    def __init__(self):
        """Initializes CrippenFeaturizer class."""
        self._min_val = torch.Tensor(
            [
                -2.996,  # logP
                0.0,  # MR
            ]
        )

        self._max_val = torch.Tensor(
            [
                0.8857,  # logP
                6.0,  # MR
            ]
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 2

    @property
    def min_val(self) -> torch.tensor:
        """Returns minimum values for features: logP and molar refractivity."""
        return self._min_val

    @property
    def max_val(self) -> torch.tensor:
        """Returns maximum values for features: logP and molar refractivity."""
        return self._max_val

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
        """Compute atomic contributions to Crippen logP and molar refractivity.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.Tensor featurizing atoms by its atomic contribution to logP and molar refractivity.
        """
        logp_mr_list = torch.Tensor(rdMolDescriptors._CalcCrippenContribs(mol))
        logp_mr_list = torch.clamp(logp_mr_list, min=self.min_val, max=self.max_val)
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return logp_mr_list[_atom_indices, :]

max_val property

Returns maximum values for features: logP and molar refractivity.

min_val property

Returns minimum values for features: logP and molar refractivity.

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes CrippenFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
def __init__(self):
    """Initializes CrippenFeaturizer class."""
    self._min_val = torch.Tensor(
        [
            -2.996,  # logP
            0.0,  # MR
        ]
    )

    self._max_val = torch.Tensor(
        [
            0.8857,  # logP
            6.0,  # MR
        ]
    )

get_atom_features(mol, atom_indices=None)

Compute atomic contributions to Crippen logP and molar refractivity.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
Tensor

A torch.Tensor featurizing atoms by its atomic contribution to logP and molar refractivity.

Source code in bionemo/geometric/atom_featurizers.py
497
498
499
500
501
502
503
504
505
506
507
508
509
510
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
    """Compute atomic contributions to Crippen logP and molar refractivity.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.Tensor featurizing atoms by its atomic contribution to logP and molar refractivity.
    """
    logp_mr_list = torch.Tensor(rdMolDescriptors._CalcCrippenContribs(mol))
    logp_mr_list = torch.clamp(logp_mr_list, min=self.min_val, max=self.max_val)
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return logp_mr_list[_atom_indices, :]

DegreeFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its degree (excluding hydrogens) of connectivity.

Source code in bionemo/geometric/atom_featurizers.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class DegreeFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its degree (excluding hydrogens) of connectivity."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 6

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing degree of connectivity of atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetDegree() for a in _atom_indices], dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor of integers representing degree of connectivity of atoms.

Source code in bionemo/geometric/atom_featurizers.py
75
76
77
78
79
80
81
82
83
84
85
86
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing degree of connectivity of atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetDegree() for a in _atom_indices], dtype=torch.int)

ElectronicPropertyFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its electronic properties.

This class computes electronic properties like electronegativity, ionization energy, and electron affinity.

Source code in bionemo/geometric/atom_featurizers.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
class ElectronicPropertyFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its electronic properties.

    This class computes electronic properties like electronegativity, ionization energy, and electron affinity.
    """

    def __init__(self, data_file=None) -> None:
        """Initializes PeriodicTableFeaturizer class.

        Args:
            data_file: Path to the data file.
        """
        if data_file is None:
            # Use default
            root_path = Path(__file__).resolve().parent
            data_file = root_path / "data" / "electronic_data.csv"
        self.data_df = pd.read_csv(data_file).set_index("AtomicNumber")

        self.pauling_en_dict = self.data_df["Electronegativity"].to_dict()
        self.ie_dict = self.data_df["IonizationEnergy"].to_dict()
        self.ea_dict = self.data_df["ElectronAffinity"].to_dict()

        self._min_val = torch.Tensor(
            [
                self.data_df["Electronegativity"].min(),
                self.data_df["IonizationEnergy"].min(),
                self.data_df["ElectronAffinity"].min(),
            ]
        )

        self._max_val = torch.Tensor(
            [
                self.data_df["Electronegativity"].max(),
                self.data_df["IonizationEnergy"].max(),
                self.data_df["ElectronAffinity"].max(),
            ]
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 3

    @property
    def min_val(self) -> torch.Tensor:
        """Returns minimum values for features: electronegativity, ionization energy, electron affinity."""
        return self._min_val

    @property
    def max_val(self) -> torch.Tensor:
        """Returns maximum values for features: electronegativity, ionization energy, electron affinity."""
        return self._max_val

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
        """Returns electronic features of the atom.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.Tensor consisting of Pauling scale electronegativity, ionization energy, and electron affinity for each atom.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

        feats = []
        for aidx in _atom_indices:
            atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
            feats.append([self.pauling_en_dict[atomic_num], self.ie_dict[atomic_num], self.ea_dict[atomic_num]])
        return torch.Tensor(feats)

max_val property

Returns maximum values for features: electronegativity, ionization energy, electron affinity.

min_val property

Returns minimum values for features: electronegativity, ionization energy, electron affinity.

n_dim property

Returns dimensionality of the computed features.

__init__(data_file=None)

Initializes PeriodicTableFeaturizer class.

Parameters:

Name Type Description Default
data_file

Path to the data file.

None
Source code in bionemo/geometric/atom_featurizers.py
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def __init__(self, data_file=None) -> None:
    """Initializes PeriodicTableFeaturizer class.

    Args:
        data_file: Path to the data file.
    """
    if data_file is None:
        # Use default
        root_path = Path(__file__).resolve().parent
        data_file = root_path / "data" / "electronic_data.csv"
    self.data_df = pd.read_csv(data_file).set_index("AtomicNumber")

    self.pauling_en_dict = self.data_df["Electronegativity"].to_dict()
    self.ie_dict = self.data_df["IonizationEnergy"].to_dict()
    self.ea_dict = self.data_df["ElectronAffinity"].to_dict()

    self._min_val = torch.Tensor(
        [
            self.data_df["Electronegativity"].min(),
            self.data_df["IonizationEnergy"].min(),
            self.data_df["ElectronAffinity"].min(),
        ]
    )

    self._max_val = torch.Tensor(
        [
            self.data_df["Electronegativity"].max(),
            self.data_df["IonizationEnergy"].max(),
            self.data_df["ElectronAffinity"].max(),
        ]
    )

get_atom_features(mol, atom_indices=None)

Returns electronic features of the atom.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
Tensor

A torch.Tensor consisting of Pauling scale electronegativity, ionization energy, and electron affinity for each atom.

Source code in bionemo/geometric/atom_featurizers.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
    """Returns electronic features of the atom.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.Tensor consisting of Pauling scale electronegativity, ionization energy, and electron affinity for each atom.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

    feats = []
    for aidx in _atom_indices:
        atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
        feats.append([self.pauling_en_dict[atomic_num], self.ie_dict[atomic_num], self.ea_dict[atomic_num]])
    return torch.Tensor(feats)

HybridizationFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its hybridization type.

Source code in bionemo/geometric/atom_featurizers.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
class HybridizationFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its hybridization type."""

    def __init__(self) -> None:
        """Initializes HybridizationFeaturizer class."""
        self.dim_hybridization_types = len(HybridizationType.values)

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_hybridization_types

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor representing hybridization type of atoms as integers.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([int(mol.GetAtomWithIdx(a).GetHybridization()) for a in _atom_indices], dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes HybridizationFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
166
167
168
def __init__(self) -> None:
    """Initializes HybridizationFeaturizer class."""
    self.dim_hybridization_types = len(HybridizationType.values)

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor representing hybridization type of atoms as integers.

Source code in bionemo/geometric/atom_featurizers.py
175
176
177
178
179
180
181
182
183
184
185
186
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor representing hybridization type of atoms as integers.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([int(mol.GetAtomWithIdx(a).GetHybridization()) for a in _atom_indices], dtype=torch.int)

PeriodicTableFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its position (period and group) in the periodic table.

Source code in bionemo/geometric/atom_featurizers.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
class PeriodicTableFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its position (period and group) in the periodic table."""

    def __init__(self) -> None:
        """Initializes PeriodicTableFeaturizer class."""
        self.pt = Chem.GetPeriodicTable()
        # The number of elements per period in the periodic table
        self.period_limits = [2, 10, 18, 36, 54, 86, 118]

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 25

    def get_period(self, atom: Chem.Atom) -> int:
        """Returns periodic table period of atom."""
        atomic_number = atom.GetAtomicNum()

        # Determine the period based on atomic number.
        for period, limit in enumerate(self.period_limits, start=1):
            if atomic_number <= limit:
                return period
        return None

    def get_group(self, atom: Chem.Atom) -> int:
        """Returns periodic table group of atom."""
        group = self.pt.GetNOuterElecs(atom.GetAtomicNum())
        return group

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes periodic table position of atoms of all or select atoms specific in `atom_indices`.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of representing positions of atoms in periodic table. First index represents period and second index represents group.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor(
            [(self.get_period(mol.GetAtomWithIdx(a)), self.get_group(mol.GetAtomWithIdx(a))) for a in _atom_indices],
            dtype=torch.int,
        )

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes PeriodicTableFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
214
215
216
217
218
def __init__(self) -> None:
    """Initializes PeriodicTableFeaturizer class."""
    self.pt = Chem.GetPeriodicTable()
    # The number of elements per period in the periodic table
    self.period_limits = [2, 10, 18, 36, 54, 86, 118]

get_atom_features(mol, atom_indices=None)

Computes periodic table position of atoms of all or select atoms specific in atom_indices.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor of representing positions of atoms in periodic table. First index represents period and second index represents group.

Source code in bionemo/geometric/atom_featurizers.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes periodic table position of atoms of all or select atoms specific in `atom_indices`.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of representing positions of atoms in periodic table. First index represents period and second index represents group.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor(
        [(self.get_period(mol.GetAtomWithIdx(a)), self.get_group(mol.GetAtomWithIdx(a))) for a in _atom_indices],
        dtype=torch.int,
    )

get_group(atom)

Returns periodic table group of atom.

Source code in bionemo/geometric/atom_featurizers.py
235
236
237
238
def get_group(self, atom: Chem.Atom) -> int:
    """Returns periodic table group of atom."""
    group = self.pt.GetNOuterElecs(atom.GetAtomicNum())
    return group

get_period(atom)

Returns periodic table period of atom.

Source code in bionemo/geometric/atom_featurizers.py
225
226
227
228
229
230
231
232
233
def get_period(self, atom: Chem.Atom) -> int:
    """Returns periodic table period of atom."""
    atomic_number = atom.GetAtomicNum()

    # Determine the period based on atomic number.
    for period, limit in enumerate(self.period_limits, start=1):
        if atomic_number <= limit:
            return period
    return None

ScaffoldFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom based on whether it is present in Bemis-Murcko scaffold.

Source code in bionemo/geometric/atom_featurizers.py
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
class ScaffoldFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom based on whether it is present in Bemis-Murcko scaffold."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 1

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Returns position of the atoms with respect to Bemis-Murcko scaffold.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor indicating if atoms are present in the Bemis-Murcko scaffold of the molecule.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

        scaffold = MurckoScaffold.GetScaffoldForMol(mol)
        scaffold_atom_idx = set(mol.GetSubstructMatch(scaffold))

        feats = [int(aidx in scaffold_atom_idx) for aidx in _atom_indices]
        return torch.tensor(feats, dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

get_atom_features(mol, atom_indices=None)

Returns position of the atoms with respect to Bemis-Murcko scaffold.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor indicating if atoms are present in the Bemis-Murcko scaffold of the molecule.

Source code in bionemo/geometric/atom_featurizers.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Returns position of the atoms with respect to Bemis-Murcko scaffold.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor indicating if atoms are present in the Bemis-Murcko scaffold of the molecule.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

    scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    scaffold_atom_idx = set(mol.GetSubstructMatch(scaffold))

    feats = [int(aidx in scaffold_atom_idx) for aidx in _atom_indices]
    return torch.tensor(feats, dtype=torch.int)

SmartsFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by hydrogen donor/acceptor and acidity/basicity.

Source code in bionemo/geometric/atom_featurizers.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
class SmartsFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by hydrogen donor/acceptor and acidity/basicity."""

    def __init__(self):
        """Initializes SmartsFeaturizer class."""
        self.hydrogen_donor = Chem.MolFromSmarts("[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0]),n&H1&+0]")
        self.hydrogen_acceptor = Chem.MolFromSmarts(
            "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),"
            "n&H0&+0,$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"
        )
        self.acidic = Chem.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
        self.basic = Chem.MolFromSmarts(
            "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))]),$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);"
            "!$([C,a](=O))]),$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 4

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes matches by prefixed SMARTS patterns.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            An torch.tensor indicating if atoms are hydrogen bond donors, hydrogen bond acceptors, acidic, or basic.
        """
        hydrogen_donor_match = sum(mol.GetSubstructMatches(self.hydrogen_donor), ())
        hydrogen_acceptor_match = sum(mol.GetSubstructMatches(self.hydrogen_acceptor), ())
        acidic_match = sum(mol.GetSubstructMatches(self.acidic), ())
        basic_match = sum(mol.GetSubstructMatches(self.basic), ())

        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        feats = [
            [
                aidx in hydrogen_donor_match,
                aidx in hydrogen_acceptor_match,
                aidx in acidic_match,
                aidx in basic_match,
            ]
            for aidx in _atom_indices
        ]

        return torch.tensor(feats, dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes SmartsFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
416
417
418
419
420
421
422
423
424
425
426
427
def __init__(self):
    """Initializes SmartsFeaturizer class."""
    self.hydrogen_donor = Chem.MolFromSmarts("[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0]),n&H1&+0]")
    self.hydrogen_acceptor = Chem.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),"
        "n&H0&+0,$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"
    )
    self.acidic = Chem.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    self.basic = Chem.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))]),$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);"
        "!$([C,a](=O))]),$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"
    )

get_atom_features(mol, atom_indices=None)

Computes matches by prefixed SMARTS patterns.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

An torch.tensor indicating if atoms are hydrogen bond donors, hydrogen bond acceptors, acidic, or basic.

Source code in bionemo/geometric/atom_featurizers.py
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes matches by prefixed SMARTS patterns.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        An torch.tensor indicating if atoms are hydrogen bond donors, hydrogen bond acceptors, acidic, or basic.
    """
    hydrogen_donor_match = sum(mol.GetSubstructMatches(self.hydrogen_donor), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(self.hydrogen_acceptor), ())
    acidic_match = sum(mol.GetSubstructMatches(self.acidic), ())
    basic_match = sum(mol.GetSubstructMatches(self.basic), ())

    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    feats = [
        [
            aidx in hydrogen_donor_match,
            aidx in hydrogen_acceptor_match,
            aidx in acidic_match,
            aidx in basic_match,
        ]
        for aidx in _atom_indices
    ]

    return torch.tensor(feats, dtype=torch.int)

TotalDegreeFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by its total degree (including hydrogens) of connectivity.

Source code in bionemo/geometric/atom_featurizers.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class TotalDegreeFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its total degree (including hydrogens) of connectivity."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 6

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing total connectivity (including hydrogens) of atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetTotalDegree() for a in _atom_indices], dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor of integers representing total connectivity (including hydrogens) of atoms.

Source code in bionemo/geometric/atom_featurizers.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing total connectivity (including hydrogens) of atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetTotalDegree() for a in _atom_indices], dtype=torch.int)

TotalNumHFeaturizer

Bases: BaseAtomFeaturizer

Class for featurizing atom by total number of hydrogens.

Source code in bionemo/geometric/atom_featurizers.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class TotalNumHFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by total number of hydrogens."""

    def __init__(self) -> None:
        """Initializes TotalNumHFeaturizer class."""
        self.dim_total_num_hydrogen = 5  # 4 + 1 (no hydrogens)

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_total_num_hydrogen

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing total number of hydrogens on atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetTotalNumHs() for a in _atom_indices], dtype=torch.int)

n_dim property

Returns dimensionality of the computed features.

__init__()

Initializes TotalNumHFeaturizer class.

Source code in bionemo/geometric/atom_featurizers.py
140
141
142
def __init__(self) -> None:
    """Initializes TotalNumHFeaturizer class."""
    self.dim_total_num_hydrogen = 5  # 4 + 1 (no hydrogens)

get_atom_features(mol, atom_indices=None)

Computes features of atoms of all of select atoms.

Parameters:

Name Type Description Default
mol Mol

An RDkit Chem.Mol object

required
atom_indices Optional[Iterable]

Indices of atoms for feature computation. By default, features for all atoms is computed.

None

Returns:

Type Description
tensor

A torch.tensor of integers representing total number of hydrogens on atoms.

Source code in bionemo/geometric/atom_featurizers.py
149
150
151
152
153
154
155
156
157
158
159
160
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing total number of hydrogens on atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetTotalNumHs() for a in _atom_indices], dtype=torch.int)