diff --git a/package/CHANGELOG b/package/CHANGELOG index 64fcb63fe0..85a7208627 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -23,6 +23,8 @@ The rules for this file: * 2.8.0 Fixes + * Adds guessed attributes documentation back to each parser page + and updates overall guesser docs (Issue #4696) * Fix Bohrium (Bh) atomic mass in tables.py (PR #3753) * set `n_parts` to the total number of frames being analyzed if `n_parts` is bigger. (Issue #4685) diff --git a/package/MDAnalysis/guesser/default_guesser.py b/package/MDAnalysis/guesser/default_guesser.py index a64b023309..f49e75b24c 100644 --- a/package/MDAnalysis/guesser/default_guesser.py +++ b/package/MDAnalysis/guesser/default_guesser.py @@ -27,9 +27,70 @@ DefaultGuesser is a generic guesser class that has basic guessing methods. This class is a general purpose guesser that can be used with most topologies, -but being generic makes it the less accurate among all guessers. +but being generic makes it the least accurate among all guessers. +Guessing behavior +----------------- + +This section describes how each attribute is guessed by the DefaultGuesser. + +Masses +~~~~~~ + +We first attempt to look up the mass of an atom based on its element if the +element TopologyAttr is available. If not, we attempt to lookup the mass based +on the atom type (``type``) TopologyAttr. If neither of these is available, we +attempt to guess the atom type based on the atom name (``name``) and then +lookup the mass based on the guessed atom type. + + +Types +~~~~~ + +We attempt to guess the atom type based on the atom name (``name``). +The name is first stripped of any numbers and symbols, and then looked up in +the :data:`MDAnalysis.guesser.tables.atomelements` table. If the name is not +found, we continue checking variations of the name following the logic in +:meth:`DefaultGuesser.guess_atom_element`. Ultimately, if no match is found, +the first character of the stripped name is returned. + +Elements +~~~~~~~~ + +This follows the same method as guessing atom types. + + +Bonds +~~~~~ + +Bonds are guessed based on the distance between atoms. +See :meth:`DefaultGuesser.guess_bonds` for more details. + +Angles +~~~~~~ + +Angles are guessed based on the bonds between atoms. +See :meth:`DefaultGuesser.guess_angles` for more details. + +Dihedrals +~~~~~~~~~ + +Dihedrals are guessed based on the angles between atoms. +See :meth:`DefaultGuesser.guess_dihedrals` for more details. + +Improper Dihedrals +~~~~~~~~~~~~~~~~~~ + +Improper dihedrals are guessed based on the angles between atoms. +See :meth:`DefaultGuesser.guess_improper_dihedrals` for more details. + +Aromaticities +~~~~~~~~~~~~~ + +Aromaticity is guessed using RDKit's GetIsAromatic method. +See :meth:`DefaultGuesser.guess_aromaticities` for more details. + @@ -70,6 +131,23 @@ class DefaultGuesser(GuesserBase): You can use this guesser either directly through an instance, or through the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` method. + Parameters + ---------- + universe : Universe + The Universe to apply the guesser on + box : np.ndarray, optional + The box of the Universe. This is used for bond guessing. + vdwradii : dict, optional + Dict relating atom types: vdw radii. This is used for bond guessing + fudge_factor : float, optional + The factor by which atoms must overlap each other to be considered + a bond. Larger values will increase the number of bonds found. [0.55] + lower_bound : float, optional + The minimum bond length. All bonds found shorter than this length + will be ignored. This is useful for parsing PDB with altloc records + where atoms with altloc A and B may be very close together and + there should be no chemical bond between them. [0.1] + Examples -------- to guess bonds for a universe:: @@ -84,8 +162,23 @@ class DefaultGuesser(GuesserBase): """ context = 'default' - def __init__(self, universe, **kwargs): - super().__init__(universe, **kwargs) + def __init__( + self, + universe, + box=None, + vdwradii=None, + fudge_factor=0.55, + lower_bound=0.1, + **kwargs + ): + super().__init__( + universe, + box=box, + vdwradii=vdwradii, + fudge_factor=fudge_factor, + lower_bound=lower_bound, + **kwargs + ) self._guesser_methods = { 'masses': self.guess_masses, 'types': self.guess_types, @@ -212,8 +305,19 @@ def guess_types(self, atom_types=None, indices_to_guess=None): def guess_atom_element(self, atomname): """Guess the element of the atom from the name. - Looks in dict to see if element is found, otherwise it uses the first - character in the atomname. The table comes from CHARMM and AMBER atom + First all numbers and symbols are stripped from the name. + Then the name is looked up in the + :data:`MDAnalysis.guesser.tables.atomelements` table. + If the name is not found, we remove the last character or + first character from the name and check the table for both, + with a preference for removing the last character. If the name is + still not found, we iteratively continue to remove the last character + or first character until we find a match. If ultimately no match + is found, the first character of the stripped name is returned. + + If the input name is an empty string, an empty string is returned. + + The table comes from CHARMM and AMBER atom types, where the first character is not sufficient to determine the atom type. Some GROMOS ions have also been added. @@ -270,26 +374,11 @@ def guess_bonds(self, atoms=None, coords=None): Parameters ---------- - atoms : AtomGroup - atoms for which bonds should be guessed - fudge_factor : float, optional - The factor by which atoms must overlap eachother to be considered a - bond. Larger values will increase the number of bonds found. [0.55] - vdwradii : dict, optional - To supply custom vdwradii for atoms in the algorithm. Must be a - dict of format {type:radii}. The default table of van der Waals - radii is hard-coded as :data:`MDAnalysis.guesser.tables.vdwradii`. - Any user defined vdwradii passed as an argument will supercede the - table values. [``None``] - lower_bound : float, optional - The minimum bond length. All bonds found shorter than this length - will be ignored. This is useful for parsing PDB with altloc records - where atoms with altloc A and B maybe very close together and - there should be no chemical bond between them. [0.1] - box : array_like, optional - Bonds are found using a distance search, if unit cell information - is given, periodic boundary conditions will be considered in the - distance search. [``None``] + atoms: AtomGroup + atoms for which bonds should be guessed + coords: np.ndarray, optional + coordinates of the atoms. If not provided, the coordinates + of the ``atoms`` in the universe are used. Returns ------- diff --git a/package/MDAnalysis/topology/CRDParser.py b/package/MDAnalysis/topology/CRDParser.py index 5e4406732f..9a1fc72ec0 100644 --- a/package/MDAnalysis/topology/CRDParser.py +++ b/package/MDAnalysis/topology/CRDParser.py @@ -33,6 +33,12 @@ Residues are detected through a change is either resid or resname while segments are detected according to changes in segid. +.. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _CRD: https://www.charmmtutorial.org/index.php/CHARMM:The_Basics @@ -72,6 +78,13 @@ class CRDParser(TopologyReaderBase): - Resnums - Segids + + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.8.0 Type and mass are not longer guessed here. Until 3.0 these will still be set by default through through universe.guess_TopologyAttrs() API. diff --git a/package/MDAnalysis/topology/DLPolyParser.py b/package/MDAnalysis/topology/DLPolyParser.py index b85a0d188c..4148a38c06 100644 --- a/package/MDAnalysis/topology/DLPolyParser.py +++ b/package/MDAnalysis/topology/DLPolyParser.py @@ -30,6 +30,12 @@ - Atomnames - Atomids +.. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _Poly: http://www.stfc.ac.uk/SCD/research/app/ccg/software/DL_POLY/44516.aspx Classes diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py index f165272fc2..f37a854c72 100644 --- a/package/MDAnalysis/topology/DMSParser.py +++ b/package/MDAnalysis/topology/DMSParser.py @@ -86,11 +86,17 @@ class DMSParser(TopologyReaderBase): Segment: - Segids + .. note:: + + By default, atomtypes will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _DESRES: http://www.deshawresearch.com .. _Desmond: http://www.deshawresearch.com/resources_desmond.html .. _DMS: http://www.deshawresearch.com/Desmond_Users_Guide-0.7.pdf .. versionchanged:: 2.8.0 - Removed type and mass guessing (attributes guessing takes place now + Removed type guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). """ diff --git a/package/MDAnalysis/topology/ExtendedPDBParser.py b/package/MDAnalysis/topology/ExtendedPDBParser.py index ec6e1e527d..b41463403e 100644 --- a/package/MDAnalysis/topology/ExtendedPDBParser.py +++ b/package/MDAnalysis/topology/ExtendedPDBParser.py @@ -78,6 +78,12 @@ class ExtendedPDBParser(PDBParser.PDBParser): - bonds - formalcharges + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + See Also -------- diff --git a/package/MDAnalysis/topology/FHIAIMSParser.py b/package/MDAnalysis/topology/FHIAIMSParser.py index fcf95691f3..8738d5e3ce 100644 --- a/package/MDAnalysis/topology/FHIAIMSParser.py +++ b/package/MDAnalysis/topology/FHIAIMSParser.py @@ -66,6 +66,13 @@ class FHIAIMSParser(TopologyReaderBase): Creates the following attributes: - Atomnames + + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.8.0 Removed type and mass guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). diff --git a/package/MDAnalysis/topology/GMSParser.py b/package/MDAnalysis/topology/GMSParser.py index 812207ed67..2223cc4275 100644 --- a/package/MDAnalysis/topology/GMSParser.py +++ b/package/MDAnalysis/topology/GMSParser.py @@ -73,6 +73,13 @@ class GMSParser(TopologyReaderBase): - names - atomic charges + + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionadded:: 0.9.1 .. versionchanged:: 2.8.0 Removed type and mass guessing (attributes guessing takes place now diff --git a/package/MDAnalysis/topology/GROParser.py b/package/MDAnalysis/topology/GROParser.py index 6bcaec24cb..ebb51e7cd0 100644 --- a/package/MDAnalysis/topology/GROParser.py +++ b/package/MDAnalysis/topology/GROParser.py @@ -67,6 +67,12 @@ class GROParser(TopologyReaderBase): - atomids - atomnames + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.8.0 Removed type and mass guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py index d855216027..9c9dd37976 100644 --- a/package/MDAnalysis/topology/ITPParser.py +++ b/package/MDAnalysis/topology/ITPParser.py @@ -473,6 +473,13 @@ class ITPParser(TopologyReaderBase): .. _ITP: http://manual.gromacs.org/current/reference-manual/topologies/topology-file-formats.html#molecule-itp-file .. _TOP: http://manual.gromacs.org/current/reference-manual/file-formats.html#top + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation + if they are not read from the input file. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.2.0 no longer adds angles for water molecules with SETTLE constraint .. versionchanged:: 2.8.0 diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py index 62664b568b..52a58f7729 100644 --- a/package/MDAnalysis/topology/LAMMPSParser.py +++ b/package/MDAnalysis/topology/LAMMPSParser.py @@ -27,6 +27,12 @@ Parses data_ or dump_ files produced by LAMMPS_. +.. note:: + + By default, masses will be guessed on Universe creation if they are not + read from the input file. This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _LAMMPS: http://lammps.sandia.gov/ .. _data: DATA file format: :http://lammps.sandia.gov/doc/2001/data_format.html .. _dump: http://lammps.sandia.gov/doc/dump.html diff --git a/package/MDAnalysis/topology/MMTFParser.py b/package/MDAnalysis/topology/MMTFParser.py index e9332e9d68..5a58f1b245 100644 --- a/package/MDAnalysis/topology/MMTFParser.py +++ b/package/MDAnalysis/topology/MMTFParser.py @@ -64,6 +64,12 @@ - segid - model + .. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + Classes ------- diff --git a/package/MDAnalysis/topology/MOL2Parser.py b/package/MDAnalysis/topology/MOL2Parser.py index 4345ca0efe..f554985875 100644 --- a/package/MDAnalysis/topology/MOL2Parser.py +++ b/package/MDAnalysis/topology/MOL2Parser.py @@ -78,6 +78,13 @@ class MOL2Parser(TopologyReaderBase): - Elements + .. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + + Notes ----- Elements are obtained directly from the SYBYL atom types. If some atoms have diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index bad6d2bc6d..8349be9133 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -35,6 +35,13 @@ :mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue numbers up to 99,999. +.. note:: + + Atomtypes will be created from elements if they are present and valid. + Otherwise, they will be guessed on Universe creation. + By default, masses will also be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. .. Note:: diff --git a/package/MDAnalysis/topology/PDBQTParser.py b/package/MDAnalysis/topology/PDBQTParser.py index 9764082021..435ec5678c 100644 --- a/package/MDAnalysis/topology/PDBQTParser.py +++ b/package/MDAnalysis/topology/PDBQTParser.py @@ -32,6 +32,12 @@ * Reads a PDBQT file line by line and does not require sequential atom numbering. * Multi-model PDBQT files are not supported. +.. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + Notes ----- Only reads atoms and their names; connectivity is not diff --git a/package/MDAnalysis/topology/PQRParser.py b/package/MDAnalysis/topology/PQRParser.py index 1adcd7fba2..9ef6d3e6f9 100644 --- a/package/MDAnalysis/topology/PQRParser.py +++ b/package/MDAnalysis/topology/PQRParser.py @@ -80,6 +80,14 @@ class PQRParser(TopologyReaderBase): - Resnames - Segids + .. note:: + + Atomtypes will be read from the input file if they are present + (e.g. GROMACS PQR files). Otherwise, they will be guessed on Universe + creation. By default, masses will also be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 0.9.0 Read chainID from a PQR file and use it as segid (before we always used diff --git a/package/MDAnalysis/topology/TXYZParser.py b/package/MDAnalysis/topology/TXYZParser.py index 0781488c9d..206f381e9e 100644 --- a/package/MDAnalysis/topology/TXYZParser.py +++ b/package/MDAnalysis/topology/TXYZParser.py @@ -73,6 +73,12 @@ class TXYZParser(TopologyReaderBase): - Atomtypes - Elements (if all atom names are element symbols) + .. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionadded:: 0.17.0 .. versionchanged:: 2.4.0 Adding the `Element` attribute if all names are valid element symbols. diff --git a/package/MDAnalysis/topology/XYZParser.py b/package/MDAnalysis/topology/XYZParser.py index cb0df129e0..5fe736fec6 100644 --- a/package/MDAnalysis/topology/XYZParser.py +++ b/package/MDAnalysis/topology/XYZParser.py @@ -59,6 +59,12 @@ class XYZParser(TopologyReaderBase): Creates the following attributes: - Atomnames + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionadded:: 0.9.1 diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst index 7747fdc380..96cb324270 100644 --- a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst @@ -1,19 +1,30 @@ .. Contains the formatted docstrings from the guesser modules located in 'mdanalysis/package/MDAnalysis/guesser' +.. _Guessers: + ************************** Guesser modules ************************** -This module contains the context-aware guessers, which are used by the :meth:`~MDAnalysis.core.Universe.Universe.guess_TopologyAttrs` API. Context-aware guessers' main purpose +This module contains the context-aware guessers, which are used by the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API. Context-aware guessers' main purpose is to be tailored guesser classes that target specific file format or force field (eg. PDB file format, or Martini forcefield). -Having such guessers makes attribute guessing more accurate and reliable than having generic guessing methods that doesn't fit all topologies. +Having such guessers makes attribute guessing more accurate and reliable than having generic guessing methods that don't fit all scenarios. Example uses of guessers ------------------------ +Default behavior +~~~~~~~~~~~~~~~~ + +By default, MDAnalysis will guess the "mass" and "type" (atom type) attributes for all particles in the Universe +using the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` at the time of Universe creation, +if they are not read from the input file. +Please see the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` for more information. + + + Guessing using :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Guessing can be done through the Universe's :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` as following:: import MDAnalysis as mda @@ -24,12 +35,12 @@ Guessing can be done through the Universe's :meth:`~MDAnalysis.core.universe.Uni u.guess_TopologyAttrs(to_guess=['elements']) print(u.atoms.elements) # print ['N' 'H' 'H' ... 'NA' 'NA' 'NA'] -In the above example, we passed ``elements`` as the attribute we want to guess, and +In the above example, we passed ``elements`` as the attribute we want to guess :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` guess then add it as a topology attribute to the ``AtomGroup`` of the universe. -If the attribute already exist in the universe, passing the attribute of interest to the ``to_guess`` parameter will only fill the empty values of the attribute if any exists. -To override all the attribute values, you can pass the attribute to the ``force_guess`` parameter instead of the to_guess one as following:: +If the attribute already exists in the universe, passing the attribute of interest to the ``to_guess`` parameter will only fill the empty values of the attribute if any exists. +To override all the attribute values, you can pass the attribute to the ``force_guess`` parameter instead of ``to_guess`` as following:: import MDAnalysis as mda from MDAnalysisTests.datafiles import PRM12 @@ -38,9 +49,14 @@ To override all the attribute values, you can pass the attribute to the ``force_ u.guess_TopologyAttrs(force_guess=['types']) # types ['H', 'O', ..] -N.B.: If you didn't pass any ``context`` to the API, it will use the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` -.. rubric:: available guessers +.. note:: + The default ``context`` will use the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` + + + + +.. rubric:: Available guessers .. toctree:: :maxdepth: 1 @@ -48,7 +64,7 @@ N.B.: If you didn't pass any ``context`` to the API, it will use the :class:`~MD guesser_modules/default_guesser -.. rubric:: guesser core modules +.. rubric:: Guesser core modules The remaining pages are primarily of interest to developers as they contain functions and classes that are used in the implementation of