Using featurizers
In [1]:
Copied!
import logging
from typing import List, Optional
import pandas as pd
from polymetrix.featurizers.polymer import Polymer
from polymetrix.featurizers.chemical_featurizer import (
NumHBondDonors,
NumHBondAcceptors,
NumRotatableBonds,
NumRings,
NumNonAromaticRings,
NumAromaticRings,
NumAtoms,
TopologicalSurfaceArea,
FractionBicyclicRings,
NumAliphaticHeterocycles,
SlogPVSA1,
BalabanJIndex,
MolecularWeight,
Sp3CarbonCountFeaturizer,
Sp2CarbonCountFeaturizer,
MaxEStateIndex,
SmrVSA5,
FpDensityMorgan1,
HalogenCounts,
BondCounts,
BridgingRingsCount,
MaxRingSize,
HeteroatomCount,
HeteroatomDensity,
)
from polymetrix.featurizers.sidechain_backbone_featurizer import (
SideChainFeaturizer,
NumSideChainFeaturizer,
BackBoneFeaturizer,
NumBackBoneFeaturizer,
FullPolymerFeaturizer,
SidechainLengthToStarAttachmentDistanceRatioFeaturizer,
StarToSidechainMinDistanceFeaturizer,
SidechainDiversityFeaturizer,
)
from polymetrix.featurizers.multiple_featurizer import MultipleFeaturizer
import logging
from typing import List, Optional
import pandas as pd
from polymetrix.featurizers.polymer import Polymer
from polymetrix.featurizers.chemical_featurizer import (
NumHBondDonors,
NumHBondAcceptors,
NumRotatableBonds,
NumRings,
NumNonAromaticRings,
NumAromaticRings,
NumAtoms,
TopologicalSurfaceArea,
FractionBicyclicRings,
NumAliphaticHeterocycles,
SlogPVSA1,
BalabanJIndex,
MolecularWeight,
Sp3CarbonCountFeaturizer,
Sp2CarbonCountFeaturizer,
MaxEStateIndex,
SmrVSA5,
FpDensityMorgan1,
HalogenCounts,
BondCounts,
BridgingRingsCount,
MaxRingSize,
HeteroatomCount,
HeteroatomDensity,
)
from polymetrix.featurizers.sidechain_backbone_featurizer import (
SideChainFeaturizer,
NumSideChainFeaturizer,
BackBoneFeaturizer,
NumBackBoneFeaturizer,
FullPolymerFeaturizer,
SidechainLengthToStarAttachmentDistanceRatioFeaturizer,
StarToSidechainMinDistanceFeaturizer,
SidechainDiversityFeaturizer,
)
from polymetrix.featurizers.multiple_featurizer import MultipleFeaturizer
Full Polymer Featurization¶
In [2]:
Copied!
psmiles_list = [
"c1ccccc1[*]CCO[*]",
"CC[*]CCCC[*]",
]
full_featurizers = [
FullPolymerFeaturizer(NumRings()),
FullPolymerFeaturizer(MolecularWeight()),
FullPolymerFeaturizer(TopologicalSurfaceArea()),
]
full_multi_featurizer = MultipleFeaturizer(full_featurizers)
for psmiles in psmiles_list:
polymer = Polymer.from_psmiles(psmiles)
features = full_multi_featurizer.featurize(polymer)
labels = full_multi_featurizer.feature_labels()
for label, value in zip(labels, features):
print(f"{label}: {value:.2f}")
psmiles_list = [
"c1ccccc1[*]CCO[*]",
"CC[*]CCCC[*]",
]
full_featurizers = [
FullPolymerFeaturizer(NumRings()),
FullPolymerFeaturizer(MolecularWeight()),
FullPolymerFeaturizer(TopologicalSurfaceArea()),
]
full_multi_featurizer = MultipleFeaturizer(full_featurizers)
for psmiles in psmiles_list:
polymer = Polymer.from_psmiles(psmiles)
features = full_multi_featurizer.featurize(polymer)
labels = full_multi_featurizer.feature_labels()
for label, value in zip(labels, features):
print(f"{label}: {value:.2f}")
num_rings_sum_fullpolymerfeaturizer: 1.00 molecular_weight_sum_fullpolymerfeaturizer: 121.07 topological_surface_area_sum_fullpolymerfeaturizer: 9.23 num_rings_sum_fullpolymerfeaturizer: 0.00 molecular_weight_sum_fullpolymerfeaturizer: 85.10 topological_surface_area_sum_fullpolymerfeaturizer: 0.00
Side Chain Featurization¶
In [3]:
Copied!
sidechain_featurizers = [
NumSideChainFeaturizer(),
SideChainFeaturizer(NumAtoms(agg=["sum"])),
SideChainFeaturizer(NumHBondDonors(agg=["sum"])),
SideChainFeaturizer(NumRotatableBonds(agg=["sum"])),
]
sidechain_multi_featurizer = MultipleFeaturizer(sidechain_featurizers)
for psmiles in psmiles_list:
polymer = Polymer.from_psmiles(psmiles)
features = sidechain_multi_featurizer.featurize(polymer)
labels = sidechain_multi_featurizer.feature_labels()
for label, value in zip(labels, features):
print(f"{label}: {value:.2f}")
sidechain_featurizers = [
NumSideChainFeaturizer(),
SideChainFeaturizer(NumAtoms(agg=["sum"])),
SideChainFeaturizer(NumHBondDonors(agg=["sum"])),
SideChainFeaturizer(NumRotatableBonds(agg=["sum"])),
]
sidechain_multi_featurizer = MultipleFeaturizer(sidechain_featurizers)
for psmiles in psmiles_list:
polymer = Polymer.from_psmiles(psmiles)
features = sidechain_multi_featurizer.featurize(polymer)
labels = sidechain_multi_featurizer.feature_labels()
for label, value in zip(labels, features):
print(f"{label}: {value:.2f}")
numsidechainfeaturizer: 1.00 num_atoms_sidechainfeaturizer_sum: 6.00 num_hbond_donors_sidechainfeaturizer_sum: 0.00 num_rotatable_bonds_sidechainfeaturizer_sum: 0.00 numsidechainfeaturizer: 1.00 num_atoms_sidechainfeaturizer_sum: 2.00 num_hbond_donors_sidechainfeaturizer_sum: 0.00 num_rotatable_bonds_sidechainfeaturizer_sum: 0.00
Backbone Featurization¶
In [4]:
Copied!
backbone_featurizers = [
NumBackBoneFeaturizer(),
BackBoneFeaturizer(NumRings()),
BackBoneFeaturizer(NumAtoms()),
BackBoneFeaturizer(TopologicalSurfaceArea()),
]
backbone_multi_featurizer = MultipleFeaturizer(backbone_featurizers)
for psmiles in psmiles_list:
polymer = Polymer.from_psmiles(psmiles)
features = backbone_multi_featurizer.featurize(polymer)
labels = backbone_multi_featurizer.feature_labels()
for label, value in zip(labels, features):
print(f"{label}: {value:.2f}")
backbone_featurizers = [
NumBackBoneFeaturizer(),
BackBoneFeaturizer(NumRings()),
BackBoneFeaturizer(NumAtoms()),
BackBoneFeaturizer(TopologicalSurfaceArea()),
]
backbone_multi_featurizer = MultipleFeaturizer(backbone_featurizers)
for psmiles in psmiles_list:
polymer = Polymer.from_psmiles(psmiles)
features = backbone_multi_featurizer.featurize(polymer)
labels = backbone_multi_featurizer.feature_labels()
for label, value in zip(labels, features):
print(f"{label}: {value:.2f}")
numbackbonefeaturizer: 1.00 num_rings_sum_backbonefeaturizer: 0.00 num_atoms_sum_backbonefeaturizer: 5.00 topological_surface_area_sum_backbonefeaturizer: 9.23 numbackbonefeaturizer: 1.00 num_rings_sum_backbonefeaturizer: 0.00 num_atoms_sum_backbonefeaturizer: 6.00 topological_surface_area_sum_backbonefeaturizer: 0.00