diff --git a/HISTORY.rst b/HISTORY.rst index 52a58a5..84aa607 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,11 @@ ======= History ======= +2024.7.4 -- Improved fitting of curves + * Removed weighting of the fit by the stdev since it is too biased to the beginning + * Added control over the portion of the data to fit in order to avoid the initial + curvature and poor data towards the end. + 2024.6.3 -- Bugfix: handling of options for subflowchart * Fixed a bug where the options for the subflowchart were not being parsed correctly. diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 5620912..acf1700 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -11,6 +11,7 @@ dependencies: # SEAMM - molsystem - numpy + - rdkit - scipy - seamm - seamm-util diff --git a/diffusivity_step/analysis.py b/diffusivity_step/analysis.py index 01eeaf1..4a68ddf 100644 --- a/diffusivity_step/analysis.py +++ b/diffusivity_step/analysis.py @@ -129,7 +129,7 @@ def create_helfand_moments(v, species, m=None): return Ms, M_errs -def fit_helfand_moment(y, xs, sigma=None, start=1): +def fit_helfand_moment(y, xs, sigma=None, start=0.1, end=0.9): """Find the best linear fit. Parameters @@ -143,6 +143,12 @@ def fit_helfand_moment(y, xs, sigma=None, start=1): sigma : [float] or numpy.ndarray() Optional standard error of y + start : float + Fraction of vector to ignore at the beginning + + end : float + The fraction of vector to end at + Returns ------- slope : float @@ -154,33 +160,39 @@ def fit_helfand_moment(y, xs, sigma=None, start=1): ys : [float] The y values for the fit curve. """ - dx = xs[1] - xs[0] - - # We know the curves curve near the origin, so ignore the first part - i = int(start / dx) - if i > len(y): - i = int(1 / dx) - - popt, pcov, infodict, msg, ierr = curve_fit( - axb, - xs[i:], - y[i:], - full_output=True, - sigma=sigma[i:], - absolute_sigma=True, - ) + # We know the curves curve near the origin, so ignore the first part and last + n = len(y) + i = int(n * start) + j = int(n * end) + + if sigma is None: + popt, pcov, infodict, msg, ierr = curve_fit( + axb, + xs[i:j], + y[i:j], + full_output=True, + ) + else: + popt, pcov, infodict, msg, ierr = curve_fit( + axb, + xs[i:j], + y[i:j], + full_output=True, + sigma=sigma[i:j], + absolute_sigma=True, + ) slope = float(popt[0]) b = float(popt[1]) err = float(np.sqrt(np.diag(pcov)[0])) ys = [] - for x in xs[i:]: + for x in xs[i:j]: ys.append(axb(x, slope, b)) - return slope, err, xs[i:], ys + return slope, err, xs[i:j], ys -def fit_msd(y, xs, sigma=None, start=1): +def fit_msd(y, xs, sigma=None, start=0.1, end=0.9): """Find the best linear fit to longest possible segment. Parameters @@ -194,6 +206,12 @@ def fit_msd(y, xs, sigma=None, start=1): sigma : [float] or numpy.ndarray() Optional standard error of y + start : float + Fraction of vector to ignore at the beginning + + end : float + The fraction of vector to end at + Returns ------- slope : float @@ -205,30 +223,36 @@ def fit_msd(y, xs, sigma=None, start=1): ys : [float] The y values for the fit curve. """ - dx = xs[1] - xs[0] - - # We know the curves curve near the origin, so ignore the first part - i = int(start / dx) - if i > len(y): - i = int(1 / dx) - - popt, pcov, infodict, msg, ierr = curve_fit( - axb, - xs[i:], - y[i:], - full_output=True, - sigma=sigma[i:], - absolute_sigma=True, - ) + # We know the curves curve near the origin, so ignore the first part and last + n = len(y) + i = int(n * start) + j = int(n * end) + + if sigma is None: + popt, pcov, infodict, msg, ierr = curve_fit( + axb, + xs[i:j], + y[i:j], + full_output=True, + ) + else: + popt, pcov, infodict, msg, ierr = curve_fit( + axb, + xs[i:j], + y[i:j], + full_output=True, + sigma=sigma[i:], + absolute_sigma=True, + ) slope = float(popt[0]) b = float(popt[1]) err = float(np.sqrt(np.diag(pcov)[0])) ys = [] - for x in xs[i:]: + for x in xs[i:j]: ys.append(axb(x, slope, b)) - return slope, err, xs[i:], ys + return slope, err, xs[i:j], ys def add_helfand_trace( diff --git a/diffusivity_step/diffusivity.py b/diffusivity_step/diffusivity.py index a084d69..0b077b6 100644 --- a/diffusivity_step/diffusivity.py +++ b/diffusivity_step/diffusivity.py @@ -201,7 +201,15 @@ def species(self): self._species = self.parse_molecules() return self._species - def analyze(self, indent="", P=None, style="full", run=None, **kwargs): + def analyze( + self, + indent="", + P=None, + style="full", + run=None, + weighted=False, + **kwargs, + ): """Do any analysis of the output from this step. Also print important results to the local step.out file using @@ -211,6 +219,8 @@ def analyze(self, indent="", P=None, style="full", run=None, **kwargs): ---------- indent: str An extra indentation for the output + weighted : bool + Whether to use the stderr to wieght the fit, default False """ data = {} @@ -263,13 +273,21 @@ def analyze(self, indent="", P=None, style="full", run=None, **kwargs): # Convert units and remember the factor of 1/6 in the Einstein equation factor = Q_("Å^2/ps").m_as("m^2/s") / 6 for i in range(nalpha): - start = 1 - slope, err, xs, ys = fit_msd( - self._msd[spec][:, i], - ts, - sigma=self._msd_err[spec][:, i], - start=start, - ) + if weighted: + slope, err, xs, ys = fit_msd( + self._msd[spec][:, i], + ts, + sigma=self._msd_err[spec][:, i], + start=P["msd_fit_start"], + end=P["msd_fit_end"], + ) + else: + slope, err, xs, ys = fit_msd( + self._msd[spec][:, i], + ts, + start=P["msd_fit_start"], + end=P["msd_fit_end"], + ) d_coeff = slope * factor err = err * factor if self._scale is None: @@ -370,13 +388,21 @@ def analyze(self, indent="", P=None, style="full", run=None, **kwargs): # Fit the slopes fit = [] for i in range(nalpha): - start = 1 - slope, err, xs, ys = fit_msd( - self._M[spec][:, i], - ts, - sigma=self._M_err[spec][:, i], - start=start, - ) + if weighted: + slope, err, xs, ys = fit_msd( + self._M[spec][:, i], + ts, + sigma=self._M_err[spec][:, i], + start=P["helfand_fit_start"], + end=P["helfand_fit_end"], + ) + else: + slope, err, xs, ys = fit_msd( + self._M[spec][:, i], + ts, + start=P["helfand_fit_start"], + end=P["helfand_fit_end"], + ) d_coeff = slope err = err if self._scale is None: @@ -585,15 +611,16 @@ def description_text(self, P=None, short=False): f"Calculate the diffusivity using the {P['approach']} approach, " f"averaging over {P['nruns']} runs.\n\n" ) - if not short: - # The subflowchart - self.subflowchart.root_directory = self.flowchart.root_directory - # Make sure that the subflowchart has the executor - self.subflowchart.executor = self.flowchart.executor + # Make sure the subflowchart has the data from the parent flowchart + self.subflowchart.root_directory = self.flowchart.root_directory + self.subflowchart.executor = self.flowchart.executor + self.subflowchart.in_jobserver = self.subflowchart.in_jobserver + if not short: # Get the first real node node = self.subflowchart.get_node("1").next() + node.all_options = self.all_options while node is not None: try: @@ -640,6 +667,9 @@ def run(self): # Remember the configuration _, self._configuration = self.get_system_configuration() + # Reset parameters if called in e.g. loop + self._n_molecules = None + self._species = None # Decide what to do approach = P["approach"] @@ -692,6 +722,11 @@ def run(self): out_level = out_handler.level out_handler.setLevel(printing.JOB) + # Make sure the subflowchart has the data from the parent flowchart + self.subflowchart.root_directory = self.flowchart.root_directory + self.subflowchart.executor = self.flowchart.executor + self.subflowchart.in_jobserver = self.subflowchart.in_jobserver + # Get the first real node first_node = self.subflowchart.get_node("1").next() diff --git a/diffusivity_step/diffusivity_parameters.py b/diffusivity_step/diffusivity_parameters.py index bb91e01..0b493c5 100644 --- a/diffusivity_step/diffusivity_parameters.py +++ b/diffusivity_step/diffusivity_parameters.py @@ -74,7 +74,7 @@ class DiffusivityParameters(seamm.Parameters): parameters = { "approach": { - "default": "Mean Square Displacement (MSD)", + "default": "both", "kind": "enum", "default_units": "", "enumeration": ( @@ -87,7 +87,7 @@ class DiffusivityParameters(seamm.Parameters): "help_text": "The approach or method for determining the diffusivity.", }, "nruns": { - "default": "20", + "default": "1", "kind": "integer", "default_units": "", "enumeration": tuple(), @@ -95,6 +95,42 @@ class DiffusivityParameters(seamm.Parameters): "description": "Number of runs to average:", "help_text": "The number for separate runs to average.", }, + "msd_fit_start": { + "default": "0.1", + "kind": "float", + "default_units": "", + "enumeration": tuple(), + "format_string": "", + "description": "Start point of MSD fit (0.0..1.0):", + "help_text": "Starting point of fit the MSD this far into the data.", + }, + "msd_fit_end": { + "default": "0.8", + "kind": "float", + "default_units": "", + "enumeration": tuple(), + "format_string": "", + "description": "End point of MSD fit (0.0..1.0):", + "help_text": "End point of fit the MSD this far into the data.", + }, + "helfand_fit_start": { + "default": "0.2", + "kind": "float", + "default_units": "", + "enumeration": tuple(), + "format_string": "", + "description": "Start point of Helfand fit (0.0..1.0):", + "help_text": "Starting point of Helfand fit this far into the data.", + }, + "helfand_fit_end": { + "default": "0.95", + "kind": "float", + "default_units": "", + "enumeration": tuple(), + "format_string": "", + "description": "End point of Helfand fit (0.0..1.0):", + "help_text": "End point of Helfnad fit this far into the data.", + }, "errors": { "default": "continue to next run", "kind": "string", diff --git a/diffusivity_step/tk_diffusivity.py b/diffusivity_step/tk_diffusivity.py index f5f85a5..ffeb9f5 100644 --- a/diffusivity_step/tk_diffusivity.py +++ b/diffusivity_step/tk_diffusivity.py @@ -210,7 +210,7 @@ def reset_diffusivity_frame(self, widget=None): """Layout the widgets in the diffusivity frame as needed for the current state""" - # approach = self["approach"].get() + approach = self["approach"].get() frame = self["diffusivity frame"] for slave in frame.grid_slaves(): @@ -224,6 +224,16 @@ def reset_diffusivity_frame(self, widget=None): self[key].grid(row=row, column=0, columnspan=2, sticky=tk.W) widgets.append(self[key]) row += 1 + if approach == "both" or "MSD" in approach: + for key in ("msd_fit_start", "msd_fit_end"): + self[key].grid(row=row, column=0, columnspan=2, sticky=tk.W) + widgets.append(self[key]) + row += 1 + if approach == "both" or "Helfand" in approach: + for key in ("helfand_fit_start", "helfand_fit_end"): + self[key].grid(row=row, column=0, columnspan=2, sticky=tk.W) + widgets.append(self[key]) + row += 1 sw.align_labels(widgets, sticky=tk.E) frame.columnconfigure(0, minsize=50)