Skip to content

Commit 5202ef4

Browse files
committed
return None for augmented nodes
1 parent 88ed36b commit 5202ef4

3 files changed

Lines changed: 58 additions & 30 deletions

File tree

chebai_graph/preprocessing/properties/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,12 @@
2828
AugAtomType,
2929
AugNumAtomBonds,
3030
AugAtomCharge,
31-
AugAtomChirality,
3231
AugAtomHybridization,
3332
AugAtomNumHs,
3433
AugAtomAromaticity,
3534
AugBondAromaticity,
3635
AugBondType,
3736
AugBondInRing,
38-
AugMoleculeNumRings,
3937
AugRDKit2DNormalized,
4038
)
4139

@@ -65,13 +63,11 @@
6563
"AugAtomType",
6664
"AugNumAtomBonds",
6765
"AugAtomCharge",
68-
"AugAtomChirality",
6966
"AugAtomHybridization",
7067
"AugAtomNumHs",
7168
"AugAtomAromaticity",
7269
"AugBondAromaticity",
7370
"AugBondType",
7471
"AugBondInRing",
75-
"AugMoleculeNumRings",
7672
"AugRDKit2DNormalized",
7773
]

chebai_graph/preprocessing/properties/augmented_properties.py

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@ def get_property_value(self, augmented_mol: Dict):
5252

5353
return prop_list
5454

55-
@abstractmethod
56-
def get_atom_value(self, atom: Chem.rdchem.Atom | Dict):
57-
pass
58-
5955
def _check_modify_atom_prop_value(self, atom: Chem.rdchem.Atom | Dict, prop: str):
6056
value = self._get_atom_prop_value(atom, prop)
6157
if not value:
@@ -113,32 +109,63 @@ def get_atom_value(self, atom: Chem.rdchem.Atom | Dict):
113109
# Delegate to superclass method for atom
114110
return super().get_atom_value(atom)
115111
elif isinstance(atom, dict):
116-
return 0
112+
return None
117113
else:
118114
raise TypeError(
119115
f"Expected Chem.rdchem.Atom or dict, got {type(atom).__name__}"
120116
)
121117

122118

123-
class AugAtomType(AugNodeValueDefaulter, pr.AtomType): ...
119+
class AugAtomType(AugNodeValueDefaulter, pr.AtomType):
120+
# This property uses OneHotEncoder as default encoder
121+
# TODO: Can we return 0 for augmented Nodes for this property? which will lead to use of one hot tensor for augmented nodes
122+
# Currently, we return None which leads to zero-tensor for augmented nodes
124123

124+
# RDKit uses 0 as the atomic number for a "dummy atom", which usually means:
125+
# A placeholder atom (e.g. [*], R#, or attachment points in SMARTS/SMILES).
126+
# An undefined or wildcard atom.
127+
# A pseudoatom (e.g., for certain fragments or placeholders in reaction centers).
128+
...
125129

126-
class AugNumAtomBonds(AugNodeValueDefaulter, pr.NumAtomBonds): ...
127130

131+
class AugNumAtomBonds(AugNodeValueDefaulter, pr.NumAtomBonds):
132+
# This property uses OneHotEncoder as default encoder
133+
# Default return value for this property can't be zero, 0 is used for isolated atoms in molecule. It has to be None or actual node degree.
134+
# TODO: Can return actual node degree/num of connections for augmented Nodes for this property? which will lead to use of one hot tensor for augmented nodes
135+
# Currently, we return None which leads to zero-tensor for augmented nodes
136+
# But then the question aries shall we count only the atoms connected to a fg node, or all nodes including atoms. Consider graph node too.
137+
...
128138

129-
class AugAtomCharge(AugNodeValueDefaulter, pr.AtomCharge): ...
130139

140+
class AugAtomCharge(AugNodeValueDefaulter, pr.AtomCharge):
141+
# This property uses OneHotEncoder as default encoder
142+
# Default return value for this property can't be zero, as atoms can have 0 charge.
143+
# TODO: Can return some `unk` value for augmented Nodes for this property? which will lead to use of one hot tensor for augmented nodes
144+
# Currently, we return None which leads to zero-tensor for augmented nodes
145+
...
131146

132-
class AugAtomChirality(AugNodeValueDefaulter, pr.AtomChirality): ...
133147

148+
class AugAtomHybridization(AugNodeValueDefaulter, pr.AtomHybridization):
149+
# This property uses OneHotEncoder as default encoder
150+
# TODO: Can return some `HybridizationType.UNSPECIFIED` value which is 0 for augmented Nodes for this property? which will lead to use of one hot tensor for augmented nodes
151+
# Check: https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.HybridizationType
152+
# Currently, we return None which leads to zero-tensor for augmented nodes
153+
...
134154

135-
class AugAtomHybridization(AugNodeValueDefaulter, pr.AtomHybridization): ...
136155

156+
class AugAtomNumHs(AugNodeValueDefaulter, pr.AtomNumHs):
157+
# This property uses OneHotEncoder as default encoder
158+
# Default return value for this property can't be zero, as atoms can have 0 Hydrogen atoms attached which mean atoms is full balanced by bonding with other non-hydrogen atoms.
159+
# TODO: Can return some `unk` value for augmented Nodes for this property? which will lead to use of one hot tensor for augmented nodes
160+
# Currently, we return None which leads to zero-tensor for augmented nodes
161+
...
137162

138-
class AugAtomNumHs(AugNodeValueDefaulter, pr.AtomNumHs): ...
139163

140-
141-
class AugAtomAromaticity(AugNodeValueDefaulter, pr.AtomAromaticity): ...
164+
class AugAtomAromaticity(AugNodeValueDefaulter, pr.AtomAromaticity):
165+
# This property uses BoolEncoder as default encoder
166+
# Currently, we return None for augmented nodes which leads to BoolEncoder setting 0 internally.
167+
# This is None is right value for augmented nodes its not part of any kind of aromatic ring.
168+
...
142169

143170

144171
# --------------------- Bond Properties ------------------------------
@@ -193,10 +220,6 @@ def get_property_value(self, augmented_mol: Dict) -> List:
193220

194221
return prop_list
195222

196-
@abstractmethod
197-
def get_bond_value(self, bond: Chem.rdchem.Bond | Dict):
198-
pass
199-
200223
def _check_modify_bond_prop_value(self, bond: Chem.rdchem.Bond | Dict, prop: str):
201224
value = self._get_bond_prop_value(bond, prop)
202225
if not value:
@@ -228,29 +251,39 @@ def get_bond_value(self, bond: Chem.rdchem.Bond | Dict):
228251
# Delegate to superclass method for bond
229252
return super().get_bond_value(bond)
230253
elif isinstance(bond, dict):
231-
return 0
254+
return None
232255
else:
233256
raise TypeError("Bond/Edge should be of type `Chem.rdchem.Bond` or `dict`.")
234257

235258

236-
class AugBondAromaticity(AugBondValueDefaulter, pr.BondAromaticity): ...
259+
class AugBondAromaticity(AugBondValueDefaulter, pr.BondAromaticity):
260+
# This property uses BoolEncoder as default encoder
261+
# Currently, we return None for augmented nodes which leads to BoolEncoder setting 0 internally.
262+
# This is None is right value for augmented nodes its not part of any kind of aromatic ring.
263+
...
237264

238265

239-
class AugBondType(AugBondValueDefaulter, pr.BondType): ...
266+
class AugBondType(AugBondValueDefaulter, pr.BondType):
267+
# This property uses OneHotEncoder as default encoder
268+
# TODO: Can return some `BondType.UNSPECIFIED` value which is 0 for augmented Nodes for this property? which will lead to use of one hot tensor for augmented nodes
269+
# Check: https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.BondType
270+
# Currently, we return None which leads to zero-tensor for augmented nodes
271+
...
240272

241273

242-
class AugBondInRing(AugBondValueDefaulter, pr.BondInRing): ...
274+
class AugBondInRing(AugBondValueDefaulter, pr.BondInRing):
275+
# This property uses BoolEncoder as default encoder
276+
# Currently, we return None for augmented nodes which leads to BoolEncoder setting 0 internally.
277+
# This is None is right value for augmented nodes its not part of any kind of aromatic ring.
278+
...
243279

244280

245281
# --------------------- Molecular Properties ------------------------------
246282
class AugmentedMolecularProperty(pr.MolecularProperty, ABC):
247283
def get_property_value(self, augmented_mol: Dict) -> list:
248-
mol: Chem.Mol = augmented_mol[self.MAIN_KEY]["atom_nodes"]
284+
mol: Chem.Mol = augmented_mol[AugmentedAtomProperty.MAIN_KEY]["atom_nodes"]
249285
assert isinstance(mol, Chem.Mol), "Molecule should be instance of `Chem.Mol`"
250286
return super().get_property_value(mol)
251287

252288

253-
class AugMoleculeNumRings(AugmentedMolecularProperty, pr.MoleculeNumRings): ...
254-
255-
256289
class AugRDKit2DNormalized(AugmentedMolecularProperty, pr.RDKit2DNormalized): ...

configs/data/chebi50_augmented_baseline.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,4 @@ init_args:
1010
- chebai_graph.preprocessing.properties.AugBondType
1111
- chebai_graph.preprocessing.properties.AugBondInRing
1212
- chebai_graph.preprocessing.properties.AugBondAromaticity
13-
#- chebai_graph.preprocessing.properties.AugMoleculeNumRings
1413
- chebai_graph.preprocessing.properties.AugRDKit2DNormalized

0 commit comments

Comments
 (0)