Skip to content

Commit

Permalink
Merge pull request #3 from molssi-seamm/dev
Browse files Browse the repository at this point in the history
Improved fitting of curves
  • Loading branch information
seamm authored Jul 4, 2024
2 parents 056f39e + 56a77c4 commit dcdd84c
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 59 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
=======
History
=======
2024.7.4 -- Improved fitting of curves
* Removed weighting of the fit by the stdev since it is too biased to the beginning
* Added control over the portion of the data to fit in order to avoid the initial
curvature and poor data towards the end.

2024.6.3 -- Bugfix: handling of options for subflowchart
* Fixed a bug where the options for the subflowchart were not being parsed
correctly.
Expand Down
1 change: 1 addition & 0 deletions devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
# SEAMM
- molsystem
- numpy
- rdkit
- scipy
- seamm
- seamm-util
Expand Down
96 changes: 60 additions & 36 deletions diffusivity_step/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def create_helfand_moments(v, species, m=None):
return Ms, M_errs


def fit_helfand_moment(y, xs, sigma=None, start=1):
def fit_helfand_moment(y, xs, sigma=None, start=0.1, end=0.9):
"""Find the best linear fit.
Parameters
Expand All @@ -143,6 +143,12 @@ def fit_helfand_moment(y, xs, sigma=None, start=1):
sigma : [float] or numpy.ndarray()
Optional standard error of y
start : float
Fraction of vector to ignore at the beginning
end : float
The fraction of vector to end at
Returns
-------
slope : float
Expand All @@ -154,33 +160,39 @@ def fit_helfand_moment(y, xs, sigma=None, start=1):
ys : [float]
The y values for the fit curve.
"""
dx = xs[1] - xs[0]

# We know the curves curve near the origin, so ignore the first part
i = int(start / dx)
if i > len(y):
i = int(1 / dx)

popt, pcov, infodict, msg, ierr = curve_fit(
axb,
xs[i:],
y[i:],
full_output=True,
sigma=sigma[i:],
absolute_sigma=True,
)
# We know the curves curve near the origin, so ignore the first part and last
n = len(y)
i = int(n * start)
j = int(n * end)

if sigma is None:
popt, pcov, infodict, msg, ierr = curve_fit(
axb,
xs[i:j],
y[i:j],
full_output=True,
)
else:
popt, pcov, infodict, msg, ierr = curve_fit(
axb,
xs[i:j],
y[i:j],
full_output=True,
sigma=sigma[i:j],
absolute_sigma=True,
)
slope = float(popt[0])
b = float(popt[1])
err = float(np.sqrt(np.diag(pcov)[0]))

ys = []
for x in xs[i:]:
for x in xs[i:j]:
ys.append(axb(x, slope, b))

return slope, err, xs[i:], ys
return slope, err, xs[i:j], ys


def fit_msd(y, xs, sigma=None, start=1):
def fit_msd(y, xs, sigma=None, start=0.1, end=0.9):
"""Find the best linear fit to longest possible segment.
Parameters
Expand All @@ -194,6 +206,12 @@ def fit_msd(y, xs, sigma=None, start=1):
sigma : [float] or numpy.ndarray()
Optional standard error of y
start : float
Fraction of vector to ignore at the beginning
end : float
The fraction of vector to end at
Returns
-------
slope : float
Expand All @@ -205,30 +223,36 @@ def fit_msd(y, xs, sigma=None, start=1):
ys : [float]
The y values for the fit curve.
"""
dx = xs[1] - xs[0]

# We know the curves curve near the origin, so ignore the first part
i = int(start / dx)
if i > len(y):
i = int(1 / dx)

popt, pcov, infodict, msg, ierr = curve_fit(
axb,
xs[i:],
y[i:],
full_output=True,
sigma=sigma[i:],
absolute_sigma=True,
)
# We know the curves curve near the origin, so ignore the first part and last
n = len(y)
i = int(n * start)
j = int(n * end)

if sigma is None:
popt, pcov, infodict, msg, ierr = curve_fit(
axb,
xs[i:j],
y[i:j],
full_output=True,
)
else:
popt, pcov, infodict, msg, ierr = curve_fit(
axb,
xs[i:j],
y[i:j],
full_output=True,
sigma=sigma[i:],
absolute_sigma=True,
)
slope = float(popt[0])
b = float(popt[1])
err = float(np.sqrt(np.diag(pcov)[0]))

ys = []
for x in xs[i:]:
for x in xs[i:j]:
ys.append(axb(x, slope, b))

return slope, err, xs[i:], ys
return slope, err, xs[i:j], ys


def add_helfand_trace(
Expand Down
75 changes: 55 additions & 20 deletions diffusivity_step/diffusivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,15 @@ def species(self):
self._species = self.parse_molecules()
return self._species

def analyze(self, indent="", P=None, style="full", run=None, **kwargs):
def analyze(
self,
indent="",
P=None,
style="full",
run=None,
weighted=False,
**kwargs,
):
"""Do any analysis of the output from this step.
Also print important results to the local step.out file using
Expand All @@ -211,6 +219,8 @@ def analyze(self, indent="", P=None, style="full", run=None, **kwargs):
----------
indent: str
An extra indentation for the output
weighted : bool
Whether to use the stderr to wieght the fit, default False
"""
data = {}

Expand Down Expand Up @@ -263,13 +273,21 @@ def analyze(self, indent="", P=None, style="full", run=None, **kwargs):
# Convert units and remember the factor of 1/6 in the Einstein equation
factor = Q_("Å^2/ps").m_as("m^2/s") / 6
for i in range(nalpha):
start = 1
slope, err, xs, ys = fit_msd(
self._msd[spec][:, i],
ts,
sigma=self._msd_err[spec][:, i],
start=start,
)
if weighted:
slope, err, xs, ys = fit_msd(
self._msd[spec][:, i],
ts,
sigma=self._msd_err[spec][:, i],
start=P["msd_fit_start"],
end=P["msd_fit_end"],
)
else:
slope, err, xs, ys = fit_msd(
self._msd[spec][:, i],
ts,
start=P["msd_fit_start"],
end=P["msd_fit_end"],
)
d_coeff = slope * factor
err = err * factor
if self._scale is None:
Expand Down Expand Up @@ -370,13 +388,21 @@ def analyze(self, indent="", P=None, style="full", run=None, **kwargs):
# Fit the slopes
fit = []
for i in range(nalpha):
start = 1
slope, err, xs, ys = fit_msd(
self._M[spec][:, i],
ts,
sigma=self._M_err[spec][:, i],
start=start,
)
if weighted:
slope, err, xs, ys = fit_msd(
self._M[spec][:, i],
ts,
sigma=self._M_err[spec][:, i],
start=P["helfand_fit_start"],
end=P["helfand_fit_end"],
)
else:
slope, err, xs, ys = fit_msd(
self._M[spec][:, i],
ts,
start=P["helfand_fit_start"],
end=P["helfand_fit_end"],
)
d_coeff = slope
err = err
if self._scale is None:
Expand Down Expand Up @@ -585,15 +611,16 @@ def description_text(self, P=None, short=False):
f"Calculate the diffusivity using the {P['approach']} approach, "
f"averaging over {P['nruns']} runs.\n\n"
)
if not short:
# The subflowchart
self.subflowchart.root_directory = self.flowchart.root_directory

# Make sure that the subflowchart has the executor
self.subflowchart.executor = self.flowchart.executor
# Make sure the subflowchart has the data from the parent flowchart
self.subflowchart.root_directory = self.flowchart.root_directory
self.subflowchart.executor = self.flowchart.executor
self.subflowchart.in_jobserver = self.subflowchart.in_jobserver

if not short:
# Get the first real node
node = self.subflowchart.get_node("1").next()
node.all_options = self.all_options

while node is not None:
try:
Expand Down Expand Up @@ -640,6 +667,9 @@ def run(self):

# Remember the configuration
_, self._configuration = self.get_system_configuration()
# Reset parameters if called in e.g. loop
self._n_molecules = None
self._species = None

# Decide what to do
approach = P["approach"]
Expand Down Expand Up @@ -692,6 +722,11 @@ def run(self):
out_level = out_handler.level
out_handler.setLevel(printing.JOB)

# Make sure the subflowchart has the data from the parent flowchart
self.subflowchart.root_directory = self.flowchart.root_directory
self.subflowchart.executor = self.flowchart.executor
self.subflowchart.in_jobserver = self.subflowchart.in_jobserver

# Get the first real node
first_node = self.subflowchart.get_node("1").next()

Expand Down
40 changes: 38 additions & 2 deletions diffusivity_step/diffusivity_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class DiffusivityParameters(seamm.Parameters):

parameters = {
"approach": {
"default": "Mean Square Displacement (MSD)",
"default": "both",
"kind": "enum",
"default_units": "",
"enumeration": (
Expand All @@ -87,14 +87,50 @@ class DiffusivityParameters(seamm.Parameters):
"help_text": "The approach or method for determining the diffusivity.",
},
"nruns": {
"default": "20",
"default": "1",
"kind": "integer",
"default_units": "",
"enumeration": tuple(),
"format_string": "",
"description": "Number of runs to average:",
"help_text": "The number for separate runs to average.",
},
"msd_fit_start": {
"default": "0.1",
"kind": "float",
"default_units": "",
"enumeration": tuple(),
"format_string": "",
"description": "Start point of MSD fit (0.0..1.0):",
"help_text": "Starting point of fit the MSD this far into the data.",
},
"msd_fit_end": {
"default": "0.8",
"kind": "float",
"default_units": "",
"enumeration": tuple(),
"format_string": "",
"description": "End point of MSD fit (0.0..1.0):",
"help_text": "End point of fit the MSD this far into the data.",
},
"helfand_fit_start": {
"default": "0.2",
"kind": "float",
"default_units": "",
"enumeration": tuple(),
"format_string": "",
"description": "Start point of Helfand fit (0.0..1.0):",
"help_text": "Starting point of Helfand fit this far into the data.",
},
"helfand_fit_end": {
"default": "0.95",
"kind": "float",
"default_units": "",
"enumeration": tuple(),
"format_string": "",
"description": "End point of Helfand fit (0.0..1.0):",
"help_text": "End point of Helfnad fit this far into the data.",
},
"errors": {
"default": "continue to next run",
"kind": "string",
Expand Down
12 changes: 11 additions & 1 deletion diffusivity_step/tk_diffusivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def reset_diffusivity_frame(self, widget=None):
"""Layout the widgets in the diffusivity frame
as needed for the current state"""

# approach = self["approach"].get()
approach = self["approach"].get()

frame = self["diffusivity frame"]
for slave in frame.grid_slaves():
Expand All @@ -224,6 +224,16 @@ def reset_diffusivity_frame(self, widget=None):
self[key].grid(row=row, column=0, columnspan=2, sticky=tk.W)
widgets.append(self[key])
row += 1
if approach == "both" or "MSD" in approach:
for key in ("msd_fit_start", "msd_fit_end"):
self[key].grid(row=row, column=0, columnspan=2, sticky=tk.W)
widgets.append(self[key])
row += 1
if approach == "both" or "Helfand" in approach:
for key in ("helfand_fit_start", "helfand_fit_end"):
self[key].grid(row=row, column=0, columnspan=2, sticky=tk.W)
widgets.append(self[key])
row += 1

sw.align_labels(widgets, sticky=tk.E)
frame.columnconfigure(0, minsize=50)
Expand Down

0 comments on commit dcdd84c

Please sign in to comment.