Skip to content

Commit

Permalink
Merge pull request #50 from molssi-seamm/dev
Browse files Browse the repository at this point in the history
Added writers for CIF and mmCIF.
  • Loading branch information
seamm authored Nov 5, 2023
2 parents 5951119 + 74a3f2e commit 53fce0a
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 1 deletion.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
=======
History
=======
2023.11.5 -- Added writers for CIF and mmCIF.

2023.11.2 -- Initial changes to structure handling
* Moving towards the standard structure handling that has developed across SEAMM.

2023.8.30 -- Support for spacegroup symmetry

2023.7.28 -- Implemented ranges for reading XYZ and SDF files.
Expand Down
90 changes: 89 additions & 1 deletion read_structure_step/formats/cif/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from ..registries import register_format_checker
from ..registries import register_reader
from ..registries import register_writer
from ..registries import set_format_metadata
from seamm_util.printing import FormattedText as __
from ...utils import parse_indices
Expand Down Expand Up @@ -326,4 +327,91 @@ def load_cif(
if n_errors > 0:
printer(f" {n_errors} structures could not be read due to errors.")

return configurations
return configurations


@register_writer(".cif -- Crystallographic Information File")
def write_cif(
path,
configurations,
extension=None,
remove_hydrogens="no",
printer=None,
references=None,
bibliography=None,
):
"""Write to CIF files.
Parameters
----------
path : str
Name of the file
configurations : [Configuration]
The SEAMM configurations to write
extension : str, optional, default: None
The extension, including initial dot, defining the format.
remove_hydrogens : str = "no"
Whether to remove hydrogen atoms before writing the structure to file.
printer : Logger or Printer
A function that prints to the appropriate place, used for progress.
references : ReferenceHandler = None
The reference handler object or None
bibliography : dict
The bibliography as a dictionary.
"""

if isinstance(path, str):
path = Path(path)
path.expanduser().resolve()

n_structures = len(configurations)
last_percent = 0
last_t = t0 = time.time()
structure_no = 0
compress = path.suffix in (".gz", ".bz")
with (
gzip.open(path, mode="wb")
if path.suffix == ".gz"
else bz2.open(path, mode="wb")
if path.suffix == ".bz2"
else open(path, "w")
) as fd:
for configuration in configurations:
text = configuration.to_cif_text()

structure_no += 1

if compress:
fd.write(bytes(text, "utf-8"))
else:
fd.write(text)

if printer:
percent = int(100 * structure_no / n_structures)
if percent > last_percent:
t1 = time.time()
if t1 - last_t >= 60:
t = int(t1 - t0)
rate = structure_no / (t1 - t0)
t_left = int((n_structures - structure_no) / rate)
printer(
f"\t{structure_no:6} ({percent}%) structures wrote in {t} "
f"seconds. About {t_left} seconds remaining."
)
last_t = t1
last_percent = percent

if printer:
t1 = time.time()
rate = structure_no / (t1 - t0)
printer(
f" Wrote {structure_no} structures in {t1 - t0:.1f} seconds = "
f"{rate:.2f} per second"
)
return configurations
91 changes: 91 additions & 0 deletions read_structure_step/formats/cif/mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
The mmcif reader/writer
"""

import bz2
import gzip
import logging
from pathlib import Path
import time

from ..registries import register_format_checker
from ..registries import register_reader
from ..registries import register_writer
from ..registries import set_format_metadata

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -241,3 +245,90 @@ def load_mmcif(
configuration.name = str(configuration_name)

return configurations


@register_writer(".mmcif -- Macromolecular Crystallographic Information File")
def write_mmcif(
path,
configurations,
extension=None,
remove_hydrogens="no",
printer=None,
references=None,
bibliography=None,
):
"""Write to MMCIF files.
Parameters
----------
path : str
Name of the file
configurations : [Configuration]
The SEAMM configurations to write
extension : str, optional, default: None
The extension, including initial dot, defining the format.
remove_hydrogens : str = "no"
Whether to remove hydrogen atoms before writing the structure to file.
printer : Logger or Printer
A function that prints to the appropriate place, used for progress.
references : ReferenceHandler = None
The reference handler object or None
bibliography : dict
The bibliography as a dictionary.
"""

if isinstance(path, str):
path = Path(path)
path.expanduser().resolve()

n_structures = len(configurations)
last_percent = 0
last_t = t0 = time.time()
structure_no = 0
compress = path.suffix in (".gz", ".bz")
with (
gzip.open(path, mode="wb")
if path.suffix == ".gz"
else bz2.open(path, mode="wb")
if path.suffix == ".bz2"
else open(path, "w")
) as fd:
for configuration in configurations:
text = configuration.to_mmcif_text()

structure_no += 1

if compress:
fd.write(bytes(text, "utf-8"))
else:
fd.write(text)

if printer:
percent = int(100 * structure_no / n_structures)
if percent > last_percent:
t1 = time.time()
if t1 - last_t >= 60:
t = int(t1 - t0)
rate = structure_no / (t1 - t0)
t_left = int((n_structures - structure_no) / rate)
printer(
f"\t{structure_no:6} ({percent}%) structures wrote in {t} "
f"seconds. About {t_left} seconds remaining."
)
last_t = t1
last_percent = percent

if printer:
t1 = time.time()
rate = structure_no / (t1 - t0)
printer(
f" Wrote {structure_no} structures in {t1 - t0:.1f} seconds = "
f"{rate:.2f} per second"
)
return configurations

0 comments on commit 53fce0a

Please sign in to comment.