Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions openad/smols/smol_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,14 @@
"cid": "cid",
"formula": "formula",
}
# [{'urn': {'label': 'SMILES', 'name': 'Absolute', 'datatype': 1, 'version': '2.3.0', 'software': 'OEChem', 'source': 'OpenEye Scientific Software', 'release': '2025.04.14'}, 'value': {'sval': 'CCCCCCO'}}, {'urn': {'label': 'SMILES', 'name': 'Connectivity', 'datatype': 1, 'version': '2.3.0', 'software': 'OEChem', 'source': 'OpenEye Scientific Software', 'release': '2025.06.30'}, 'value': {'sval': 'CCCCCCO'}}]
MOL_PROPERTY_SOURCES = {
"Log P-XLogP3-AA": "xlogp",
"Log P-XLogP3": "xlogp",
"SMILES-Isomeric": "isomeric_smiles",
"SMILES-Canonical": "canonical_smiles",
# "SMILES-Isomeric": "isomeric_smiles",
# "SMILES-Canonical": "canonical_smiles",
"SMILES-Absolute": "isomeric_smiles",
"SMILES-Connectivity": "canonical_smiles",
"Molecular Weight": "molecular_weight",
"Compound Complexity": "complexity",
"Count-Rotatable Bond": "rotatable_bond_count",
Expand Down Expand Up @@ -432,7 +435,6 @@ def _add_pcy_data(smol, smol_pcy, identifier, identifier_type):
synonyms = pcy.get_synonyms(smol_pcy["iupac_name"], "name")
smol["synonyms"] = synonyms[0].get("Synonym") if synonyms and len(synonyms) > 0 else []

# Add name
if identifier_type == PCY_IDFR["name"]:
smol["identifiers"]["name"] = identifier
elif len(smol.get("synonyms", [])) > 1:
Expand All @@ -456,13 +458,17 @@ def _add_pcy_data(smol, smol_pcy, identifier, identifier_type):
# - Before: {"source": "pubchem"}
# - After: { 'label': 'IUPAC Name', 'name': 'Preferred', 'datatype': 1, 'version': '2.7.0',
# 'software': 'Lexichem TK', 'source': 'OpenEye Scientific Software', 'release': '2021.10.14'}

for x in SMOL_PROPERTIES:
smol["property_sources"][x] = {"source": "PubChem"}
for prop_name, prop_name_key in MOL_PROPERTY_SOURCES.items():
if prop_name_key == x:
if len(prop_name.split("-")) > 0:

for y in smol_pcy["record"]["props"]:

if "label" not in y["urn"]:

pass
elif y["urn"]["label"] == prop_name.split("-", maxsplit=1)[0] and "name" not in y["urn"]:
smol["property_sources"][x] = y["urn"]
Expand Down Expand Up @@ -500,6 +506,14 @@ def _sep_identifiers_from_properties(smol: dict) -> dict:
if prop.lower() in molIdfrs:
del smol["properties"][prop]

# This is a Workaround for Pub Chempy and Pubchem being out of sync
for src in smol["properties"]["record"]["props"]:
if "urn" in src:
if src["urn"]["label"] == "SMILES" and src["urn"]["name"] == "Absolute":
smol["identifiers"]["isomeric_smiles"] = src["value"]["sval"]
elif src["urn"]["label"] == "SMILES" and src["urn"]["name"] == "Connectivity":
smol["identifiers"]["canonical_smiles"] = src["value"]["sval"]

return smol


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "openad"
version = "0.7.5"
version = "0.7.5.2"

description = "Open Accelerated Discovery"
authors = ["Phil Downey <phil.downey1@ibm.com>"]
Expand Down
Loading