Skip to content

Commit

Permalink
Merge pull request #63 from molssi-seamm/dev
Browse files Browse the repository at this point in the history
Bugfix: error in QCSchema bonds; enhancement: RDKit
  • Loading branch information
seamm authored Jul 26, 2023
2 parents de56caa + 1803e5b commit afaa651
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 45 deletions.
6 changes: 5 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
History
=======

2023.7.26 -- Bugfix: error in QCSchema bonds; enhancement: RDKit
* Fixed bug in the bond indices in QCSchema
* Added ability to use RDKit for SMILES and InChI

2023.7.18.1 -- Added support for creating structures from InChIKeys
* Uses PubChem to translate the InChiKey to InChI.

2023.7.18 -- Added support for InChI and InChIKeys

2023.7.9 -- Added JSON properties
Expand Down
56 changes: 38 additions & 18 deletions molsystem/inchi.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
" conda install -c conda-forge openbabel"
)
raise
try:
from rdkit import Chem
except ModuleNotFoundError:
print("Please install RDKit using conda:\n conda install -c conda-forge rdkit")
raise

logger = logging.getLogger(__name__)

Expand All @@ -30,13 +35,15 @@ def inchikey(self):
"""Return the InChIKey string for this object."""
return self.to_inchi(key=True)

def to_inchi(self, key=False):
def to_inchi(self, key=False, rdkit=False):
"""Create the InChI string from the system.
Parameters
----------
key : bool = False
Whether to create the InChIKey
rdkit : bool = False
Whether to use RDKit rather than default of OpenBabel
Returns
-------
Expand All @@ -45,21 +52,28 @@ def to_inchi(self, key=False):
"""
logger.info("to_inchi")

obConversion = openbabel.OBConversion()
if key:
obConversion.SetOutFormat("inchikey")
if rdkit:
mol = self.to_RDKMol()
if key:
inchi = Chem.inchi.MolToInchiKey(mol)
else:
inchi = Chem.inchi.MolToInchi(mol)
else:
obConversion.SetOutFormat("inchi")
obConversion = openbabel.OBConversion()
if key:
obConversion.SetOutFormat("inchikey")
else:
obConversion.SetOutFormat("inchi")

mol = self.to_OBMol()
mol = self.to_OBMol()

inchi = obConversion.WriteString(mol)
inchi = obConversion.WriteString(mol)

logger.info(f"inchi = '{inchi}'")

return inchi.strip()

def from_inchi(self, inchi, name=None):
def from_inchi(self, inchi, name=None, rdkit=False):
"""Create the system from a InChI string.
Parameters
Expand All @@ -68,6 +82,8 @@ def from_inchi(self, inchi, name=None):
The InChI string
name : str = None
The name of the molecule
rdkit : bool = False
Whether to use RDKit rather than default of OpenBabel
Returns
-------
Expand All @@ -76,19 +92,23 @@ def from_inchi(self, inchi, name=None):

save = self.name

obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("inchi", "mdl")
mol = openbabel.OBMol()
obConversion.ReadString(mol, inchi)
if rdkit:
mol = Chem.inchi.MolFromInchi(inchi, sanitize=True, removeHs=False)
self.from_RDKMol(mol)
else:
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("inchi", "mdl")
mol = openbabel.OBMol()
obConversion.ReadString(mol, inchi)

# Add hydrogens
mol.AddHydrogens()
# Add hydrogens
mol.AddHydrogens()

# Get coordinates for a 3-D structure
builder = openbabel.OBBuilder()
builder.Build(mol)
# Get coordinates for a 3-D structure
builder = openbabel.OBBuilder()
builder.Build(mol)

self.from_OBMol(mol)
self.from_OBMol(mol)

if name is not None:
self.name = name
Expand Down
6 changes: 4 additions & 2 deletions molsystem/qcschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ def to_qcschema_dict(self, properties=None):
result["symbols"] = [*self.atoms.symbols]
factor = Q_(1.0, "Å").m_as("a_0")
xyz = []
# round() below helps tests work across platforms. 9 digits are enough!
for row in self.atoms.get_coordinates(fractionals=False):
for val in row:
xyz.append(val * factor)
xyz.append(round(val * factor, 9))
result["geometry"] = xyz

# Charge and multiplicity
Expand All @@ -35,8 +36,9 @@ def to_qcschema_dict(self, properties=None):

# Bonds, if any
bonds = []
index = {j: i for i, j in zip(range(self.n_atoms), self.atoms.ids)}
for row in self.bonds.bonds():
bonds.append((row["i"], row["j"], row["bondorder"]))
bonds.append((index[row["i"]], index[row["j"]], row["bondorder"]))
if len(bonds) > 0:
result["connectivity"] = bonds

Expand Down
80 changes: 56 additions & 24 deletions molsystem/smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
" conda install -c conda-forge openbabel"
)
raise
try:
from rdkit import Chem
from rdkit.Chem import AllChem
except ModuleNotFoundError:
print("Please install RDKit using conda:\n conda install -c conda-forge rdkit")
raise

logger = logging.getLogger(__name__)

Expand All @@ -34,7 +40,7 @@ def smiles(self):
"""Return the SMILES string for this object."""
return self.to_smiles()

def to_smiles(self, canonical=False, hydrogens=False):
def to_smiles(self, canonical=False, hydrogens=False, isomeric=True, rdkit=False):
"""Create the SMILES string from the system.
Parameters
Expand All @@ -43,6 +49,10 @@ def to_smiles(self, canonical=False, hydrogens=False):
Whether to create canonical SMILES
hydrogens : bool = False
Whether to keep H's in the SMILES string.
isomeric : bool = True
Whether to use isomeric SMILES
rdkit : bool = False
Whether to use RDKit rather than default of OpenBabel
Returns
-------
Expand All @@ -51,23 +61,40 @@ def to_smiles(self, canonical=False, hydrogens=False):
"""
logger.info("to_smiles")

obConversion = openbabel.OBConversion()
if canonical:
obConversion.SetOutFormat("can")
if rdkit:
mol = self.to_RDKMol()
if isomeric:
Chem.FindPotentialStereo(mol)
if hydrogens:
mol2 = mol
else:
mol2 = AllChem.RemoveHs(mol)
if canonical:
smiles = Chem.MolToSmiles(mol2, isomericSmiles=isomeric)
else:
smiles = Chem.MolToSmiles(
mol2, isomericSmiles=isomeric, canonical=False
)
else:
obConversion.SetOutFormat("smi")
obConversion = openbabel.OBConversion()
if canonical:
obConversion.SetOutFormat("can")
else:
obConversion.SetOutFormat("smi")

mol = self.to_OBMol()
mol = self.to_OBMol()

if hydrogens:
obConversion.AddOption("h")
smiles = obConversion.WriteString(mol)
if hydrogens:
obConversion.AddOption("h")
if not isomeric:
obConversion.AddOption("i")
smiles = obConversion.WriteString(mol)

logger.info(f"smiles = '{smiles}'")

return smiles.strip()

def from_smiles(self, smiles, name=None):
def from_smiles(self, smiles, name=None, rdkit=False):
"""Create the system from a SMILES string.
Parameters
Expand All @@ -76,6 +103,8 @@ def from_smiles(self, smiles, name=None):
The SMILES string
name : str = None
The name of the molecule
rdkit : bool = False
Whether to use RDKit rather than default of OpenBabel
Returns
-------
Expand All @@ -84,23 +113,26 @@ def from_smiles(self, smiles, name=None):

save = self.name

obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("smi", "mdl")
obConversion.AddOption("3")
mol = openbabel.OBMol()
obConversion.ReadString(mol, smiles)

# Add hydrogens
mol.AddHydrogens()
if rdkit:
mol = Chem.rdmolfiles.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol)
self.from_RDKMol(mol)
else:
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("smi", "mdl")
obConversion.AddOption("3")
mol = openbabel.OBMol()
obConversion.ReadString(mol, smiles)

# Get coordinates for a 3-D structure
builder = openbabel.OBBuilder()
builder.Build(mol)
# Add hydrogens
mol.AddHydrogens()

# molfile = obConversion.WriteString(mol)
# self.from_molfile_text(molfile)
# Get coordinates for a 3-D structure
builder = openbabel.OBBuilder()
builder.Build(mol)

self.from_OBMol(mol)
self.from_OBMol(mol)

if name is not None:
self.name = name
Expand Down
44 changes: 44 additions & 0 deletions tests/test_qcschema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Tests for the QCSchema mixin of the class."""

import pprint # noqa: F401
import pytest # noqa: F401


def test_water(H2O):
"""Test creating QCSchema for water."""
correct = '{"schema_name": "qcschema_molecule", "schema_version": 2, "symbols": ["O", "H", "H"], "geometry": [0.0, 0.0, 0.0, 1.430428808, 0.0, 1.107157044, -1.430428808, 0.0, 1.107157044], "molecular_charge": 0, "molecular_multiplicity": 1, "connectivity": [[0, 1, 1], [0, 2, 1]], "fragments": [[0, 1, 2]], "name": "water / TIP3P", "atom_labels": ["O", "H1", "H2"]}' # noqa: E501

data = H2O.to_qcschema_json()

if correct != data:
print("----")
print(data)
print("----")

assert data == correct


def test_acetic_acid(AceticAcid):
"""Test creating QCSchema for actic acid."""
correct = '{"schema_name": "qcschema_molecule", "schema_version": 2, "symbols": ["C", "H", "H", "H", "C", "O", "O", "H"], "geometry": [2.040337297, 0.034204043, -0.034770961, 1.092639645, 5.929204689, 0.531579959, 1.362303563, -1.272919518, -1.485135761, 1.332634863, -0.593940921, 1.800720024, 1.079600535, 2.626530341, -0.597342428, -0.250010766, 3.239368523, -2.375007793, 1.84380578, 4.340700908, 1.118528893, 4.105241033, 0.030424591, -0.057825619], "molecular_charge": 0, "molecular_multiplicity": 1, "connectivity": [[0, 1, 1], [0, 2, 1], [0, 3, 1], [0, 4, 1], [4, 5, 2], [4, 6, 1], [6, 7, 1]], "fragments": [[0, 1, 2, 3, 4, 5, 6, 7]], "name": "acetic acid / acetic acid"}' # noqa: E501

data = AceticAcid.to_qcschema_json()

if correct != data:
print("----")
print(data)
print("----")

assert data == correct


def test_from_schema(configuration, AceticAcid):
"""Test creating QCSchema for actic acid."""
correct = '{"schema_name": "qcschema_molecule", "schema_version": 2, "symbols": ["C", "H", "H", "H", "C", "O", "O", "H"], "geometry": [2.040337296754675, 0.03420404285566326, -0.03477096069304994, 1.092639645256602, 5.929204688614864, 0.5315799588562472, 1.3623035632402012, -1.2729195175455674, -1.4851357613406495, 1.3326348630836315, -0.5939409209687825, 1.80072002415257, 1.0796005349967084, 2.626530340612506, -0.5973424279931026, -0.25001076628752755, 3.2393685228275113, -2.3750077934252807, 1.843805779793958, 4.340700908257376, 1.1185288931639272, 4.10524103312944, 0.030424590606418698, -0.05782561941344175], "molecular_charge": 0, "molecular_multiplicity": 1, "connectivity": [[0, 1, 1], [0, 2, 1], [0, 3, 1], [0, 4, 1], [4, 5, 2], [4, 6, 1], [6, 7, 1]], "fragments": [[0, 1, 2, 3, 4, 5, 6, 7]], "name": "acetic acid / acetic acid"}' # noqa: E501

configuration.from_qcschema_json(correct)

assert configuration == AceticAcid

0 comments on commit afaa651

Please sign in to comment.