Skip to content

Commit fb03f26

Browse files
committed
NO_FG instead to atom symbol to reduce no of tokens
1 parent 62e61d5 commit fb03f26

1 file changed

Lines changed: 5 additions & 3 deletions

File tree

chebai_graph/preprocessing/reader/augmented_reader.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -477,18 +477,20 @@ def _set_fg_prop(
477477
)
478478

479479
if "" in fg_set and len(fg_set) == 1:
480+
NO_FG = "NO_FG"
480481
if len(connected_atoms) == 1:
481482
# If there is only one atom and one edge connecting this atom to its fg_atom,
482483
# the functional group will be the symbol of this atom
483484
# This special case is to handle wildcard SMILES Eg. CHEBI:33429
484485
atom = connected_atoms[0]
485-
# TODO: needed or can we set to default fg prop `NO_FG`?
486-
atom.SetProp("FG", atom.GetSymbol())
486+
# needed or can we set to default fg prop `NO_FG`?
487+
# default to NO_FG, as very distinct atom symbols increases number of tokens
488+
atom.SetProp("FG", NO_FG)
487489
else:
488490
# If there are multiple atoms connected to the functional group, and no atoms have a functional group property/name
489491
# assigned, Eg. CHEBI:55388, atom idx 2 and 3 ([C-]#[C-]") have no functional group name, so default FG prop is used
490492
for atom in connected_atoms:
491-
atom.SetProp("FG", "NO_FG")
493+
atom.SetProp("FG", NO_FG)
492494
# atom.SetProp("FG", fg_smiles)
493495

494496
if len(fg_set - {""}) > 1:

0 commit comments

Comments
 (0)