From e8976be6584ef48b2474a20464bb448afeb90862 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 29 Dec 2023 20:31:23 +0000 Subject: [PATCH] fix citation --- joss_paper/paper.bib | 33 +++++++++++++++++++++++++++++++++ joss_paper/paper.md | 16 ++++++++-------- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index 27914214..e15f37f0 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -154,3 +154,36 @@ @article{pronk2013gromacs year={2013}, doi={10.1093/bioinformatics/btt055}, } + +@article{shirts2008statistically, + title={Statistically optimal analysis of samples from multiple equilibrium states}, + author={Shirts, M. R. and Chodera, J. D.}, + journal={J Chem Phys}, + volume={129}, + number={12}, + pages={124105}, + year={2008}, + doi={10.1063/1.2978177}, +} + +@article{yang2004free, + title={Free energy simulations: use of reverse cumulative averaging to determine the equilibrated region and the time required for convergence}, + author={Yang, W. and Bitetti-Putzer, R. and Karplus, M.}, + journal={J Chem Phys}, + volume={120}, + number={6}, + pages={2618-2628}, + year={2004}, + doi={10.1063/1.1638996}, +} + +@article{zwanzig1954high, + title={High‐temperature equation of state by a perturbation method. I. Nonpolar gases}, + author={Zwanzig, R. W.}, + journal={The Journal of Chemical Physics}, + volume={22}, + number={8}, + pages={1420-1426}, + year={1954}, +} + diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 7c2be3dc..981d4b16 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -35,16 +35,16 @@ A distinctive attribute of alchemlyb is its streamlined, end-to-end analysis pro # Statement of need -In the pharmaceutical sector, the utilization of computational chemistry techniques is integral for evaluating potential drug compounds based on their protein-binding affinity [@deng2009computations]. Notably, relative/absolute binding free energy calculations are routinely employed for this purpose (Merz Jr, Ringe, & Reynolds, 2010). The resultant free energy data is essential for understanding binding affinity throughout various stages of drug discovery, such as hit identification and lead optimization (Merz Jr et al., 2010). The alchemlyb software adeptly processes this data, providing crucial insights and establishing itself as an indispensable asset in computational chemistry. +In the pharmaceutical sector, the utilization of computational chemistry techniques is integral for evaluating potential drug compounds based on their protein-binding affinity [@deng2009computations]. Notably, relative/absolute binding free energy calculations are routinely employed for this purpose [@merz2010drug]. The resultant free energy data is essential for understanding binding affinity throughout various stages of drug discovery, such as hit identification and lead optimization [@merz2010drug]. The alchemlyb software adeptly processes this data, providing crucial insights and establishing itself as an indispensable asset in computational chemistry. -In the realm of computational research, various molecular dynamics (MD) engines, including GROMACS (Pronk et al., 2013), AMBER (Case et al., 2014), GOMC (Cummings et al., 2021), and NAMD (Phillips et al., 2020), offer distinct tools for conducting free energy calculations. However, the diversity in output formats and analysis tools among different MD engines complicates the research process. Data generated by each engine requires unique processing and analysis methods, hindering seamless collaboration and comparison of results. +In the realm of computational research, various molecular dynamics (MD) engines, including GROMACS [@pronk2013gromacs], AMBER [@case2014ff14sb], GOMC [@cummings2021open], and NAMD [@phillips2020scalable], offer distinct tools for conducting free energy calculations. However, the diversity in output formats and analysis tools among different MD engines complicates the research process. Data generated by each engine requires unique processing and analysis methods, hindering seamless collaboration and comparison of results. -Addressing this complexity is the "alchemical–analysis.py" tool (Klimovich, Shirts, & Mobley, 2015), which precedes alchemlyb. Although "alchemical–analysis.py" has been deprecated, alchemlyb continues to provide a unified, engine-agnostic analysis workflow. Unlike its predecessor, alchemlyb breaks down components into individual tools, allowing users to customize their analysis. This innovation enables consistent processing of free energy data from diverse MD engines, facilitating streamlined comparison and combination of results. +Addressing this complexity is the "alchemical–analysis.py" tool [@klimovich2015guidelines], which precedes alchemlyb. Although "alchemical–analysis.py" has been deprecated, alchemlyb continues to provide a unified, engine-agnostic analysis workflow. Unlike its predecessor, alchemlyb breaks down components into individual tools, allowing users to customize their analysis. This innovation enables consistent processing of free energy data from diverse MD engines, facilitating streamlined comparison and combination of results. -Notably, alchemlyb's robust and user-friendly nature has led to its integration into other automated workflow libraries such as Biosimspace (Hedges et al., 2023). This further enhances its accessibility and usability within broader scientific workflows, reinforcing its position as a versatile and essential tool in the field of computational chemistry. +Notably, alchemlyb's robust and user-friendly nature has led to its integration into other automated workflow libraries such as Biosimspace [@hedges2023suite]. This further enhances its accessibility and usability within broader scientific workflows, reinforcing its position as a versatile and essential tool in the field of computational chemistry. # Implementation @@ -52,14 +52,14 @@ The binding free energy of a drug within a protein is defined as the disparity i To determine the free energy difference associated with decoupling a drug from its environment, it is essential to ensure sufficient overlap in phase space between the coupled and decoupled states, a condition often challenging to achieve. Overlapping is facilitated by introducing a parameter lambda that connects the two end-states, leading to the creation of a series of intermediate states. Molecular dynamics (MD) engines are employed to simulate the system at these states, generating and accumulating free energy data. -Alchemlyb offers specific parsers designed to load raw free energy data from various MD engines, converting them into standard pandas dataframes. Two types of free energy data are considered: potential energy differences between adjacent lambda states, suitable for free energy perturbation (FEP) methods (Zwanzig, 1954), and dU/dlambda at all lambda states, suitable for thermodynamic integration (TI) methods (Kirkwood, 1935). +Alchemlyb offers specific parsers designed to load raw free energy data from various MD engines, converting them into standard pandas dataframes. Two types of free energy data are considered: potential energy differences between adjacent lambda states, suitable for free energy perturbation (FEP) methods [@zwanzig1954high], and dU/dlambda at all lambda states, suitable for thermodynamic integration (TI) methods [@kirkwood1935statistical]. -In alchemlyb, TI (Paliwal & Shirts, 2011) and TI with Gaussian quadrature (Gusev, Gutkin, Kurnikova, & Isayev, 2023) methods are implemented in the TI category. Perturbation category methods include Bennett Acceptance Ratio (BAR) (Bennett, 1976) and Multistate BAR (MBAR) (Shirts & Chodera, 2008). These methods necessitate uncorrelated samples, and alchemlyb provides tools for data resampling based on autocorrelation times (Chodera, Swope, Pitera, Seok, & Dill, 2007). +In alchemlyb, TI [@paliwal2011benchmark] and TI with Gaussian quadrature [@gusev2023active] methods are implemented in the TI category. Perturbation category methods include Bennett Acceptance Ratio (BAR) [@bennett1976efficient] and Multistate BAR (MBAR) [@shirts2008statistically]. These methods necessitate uncorrelated samples, and alchemlyb provides tools for data resampling based on autocorrelation times [@chodera2007use]. -To evaluate the accuracy of the free energy estimate, alchemlyb offers specific assessment tools. The error of the TI method is correlated with the average curvature (Pham & Shirts, 2011), while the error of perturbation methods depends on the overlap in sampled energy distributions (Pohorille, Jarzynski, & Chipot, 2010). Alchemlyb visualizes the smoothness of the integrand for TI methods and the overlap matrix for perturbation methods. Additionally, the accumulated samples should be at an equilibrated state, and alchemlyb allows for plotting the convergence of the free energy estimate as a function of simulation time (Yang, Bitetti-Putzer, & Karplus, 2004) to detect potentially un-equilibrated data. +To evaluate the accuracy of the free energy estimate, alchemlyb offers specific assessment tools. The error of the TI method is correlated with the average curvature [@pham2011identifying], while the error of perturbation methods depends on the overlap in sampled energy distributions [@pohorille2010good]. Alchemlyb visualizes the smoothness of the integrand for TI methods and the overlap matrix for perturbation methods. Additionally, the accumulated samples should be at an equilibrated state, and alchemlyb allows for plotting the convergence of the free energy estimate as a function of simulation time [@yang2004free] to detect potentially un-equilibrated data. -Alchemlyb offers all these tools as a library for users to customize each stage of the analysis (Figure 2). Additionally, alchemlyb provides an automated end-to-end tool that reads in the raw input data and performs the decorelation, estimation, and quality plotting of the estimate. This automated workflow allows users to experience a similar process as “alchemical–analysis.py" (Klimovich et al., 2015), which is the predecessor of alchemlyb. +Alchemlyb offers all these tools as a library for users to customize each stage of the analysis (Figure 2). Additionally, alchemlyb provides an automated end-to-end tool that reads in the raw input data and performs the decorelation, estimation, and quality plotting of the estimate. This automated workflow allows users to experience a similar process as “alchemical–analysis.py" [@klimovich2015guidelines], which is the predecessor of alchemlyb. # Acknowledgements