diff --git a/README.md b/README.md index 12b85be..884c833 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,8 @@ We also welcome contributions to the material which is already here to extend it ## Changes not yet in a release - Added host-guest structures and input files on 12/19/16 (see [PR 22](https://github.com/MobleyLab/benchmarksets/pull/22)). +- Minor updates to figure captions on 1/20/17 (see [PR 26](https://github.com/MobleyLab/benchmarksets/pull/26)). +- Various edits suggested by Annual Reviews (see [PR 27](https://github.com/MobleyLab/benchmarksets/pull/27)). ## Manifest diff --git a/paper/benchmarkset.bib b/paper/benchmarkset.bib index e701cb7..3383e2e 100644 --- a/paper/benchmarkset.bib +++ b/paper/benchmarkset.bib @@ -3028,6 +3028,22 @@ @article{yin_overview_2016 file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/22989I64/Yin et al. - 2016 - Overview of the SAMPL5 host–guest challenge Are w.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/5NTTWBRW/s10822-016-9974-4.html:text/html} } +@article{bannan_blind_2016, + title = {Blind Prediction of Cyclohexane\textendash{}water Distribution Coefficients from the {{SAMPL5}} Challenge}, + issn = {0920-654X, 1573-4951}, + doi = {10.1007/s10822-016-9954-8}, + abstract = {In the recent SAMPL5 challenge, participants submitted predictions for cyclohexane/water distribution coefficients for a set of 53 small molecules. Distribution coefficients (log D) replace the hydration free energies that were a central part of the past five SAMPL challenges. A wide variety of computational methods were represented by the 76 submissions from 18 participating groups. Here, we analyze submissions by a variety of error metrics and provide details for a number of reference calculations we performed. As in the SAMPL4 challenge, we assessed the ability of participants to evaluate not just their statistical uncertainty, but their model uncertainty\textemdash{}how well they can predict the magnitude of their model or force field error for specific predictions. Unfortunately, this remains an area where prediction and analysis need improvement. In SAMPL4 the top performing submissions achieved a root-mean-squared error (RMSE) around 1.5 kcal/mol. If we anticipate accuracy in log D predictions to be similar to the hydration free energy predictions in SAMPL4, the expected error here would be around 1.54 log units. Only a few submissions had an RMSE below 2.5 log units in their predicted log D values. However, distribution coefficients introduced complexities not present in past SAMPL challenges, including tautomer enumeration, that are likely to be important in predicting biomolecular properties of interest to drug discovery, therefore some decrease in accuracy would be expected. Overall, the SAMPL5 distribution coefficient challenge provided great insight into the importance of modeling a variety of physical effects. We believe these types of measurements will be a promising source of data for future blind challenges, especially in view of the relatively straightforward nature of the experiments and the level of insight provided.}, + language = {en}, + timestamp = {2016-09-27T16:18:52Z}, + urldate = {2016-09-27}, + journal = {J Comput Aided Mol Des}, + author = {Bannan, Caitlin C. and Burley, Kalistyn H. and Chiu, Michael and Shirts, Michael R. and Gilson, Michael K. and Mobley, David L.}, + month = sep, + year = {2016}, + pages = {1--18}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/IERC79FA/Bannan et al. - 2016 - Blind prediction of cyclohexane–water distribution.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/JQWSNG2X/s10822-016-9954-8.html:text/html} +} + @article{Liu:2007:Nucl.AcidsRes., title = {{{BindingDB}}: A Web-Accessible Database of Experimentally Determined Protein\textendash{}ligand Binding Affinities}, volume = {35}, @@ -3895,6 +3911,171 @@ @article{Gilson:2016:Nucl.AcidsRes. pmid = {26481362} } +@article{Nucci:2014:PNAS, + title = {Role of Cavities and Hydration in the Pressure Unfolding of {{T4}} Lysozyme}, + volume = {111}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1410655111}, + abstract = {It is well known that high hydrostatic pressures can induce the unfolding of proteins. The physical underpinnings of this phenomenon have been investigated extensively but remain controversial. Changes in solvation energetics have been commonly proposed as a driving force for pressure-induced unfolding. Recently, the elimination of void volumes in the native folded state has been argued to be the principal determinant. Here we use the cavity-containing L99A mutant of T4 lysozyme to examine the pressure-induced destabilization of this multidomain protein by using solution NMR spectroscopy. The cavity-containing C-terminal domain completely unfolds at moderate pressures, whereas the N-terminal domain remains largely structured to pressures as high as 2.5 kbar. The sensitivity to pressure is suppressed by the binding of benzene to the hydrophobic cavity. These results contrast to the pseudo-WT protein, which has a residual cavity volume very similar to that of the L99A\textendash{}benzene complex but shows extensive subglobal reorganizations with pressure. Encapsulation of the L99A mutant in the aqueous nanoscale core of a reverse micelle is used to examine the hydration of the hydrophobic cavity. The confined space effect of encapsulation suppresses the pressure-induced unfolding transition and allows observation of the filling of the cavity with water at elevated pressures. This indicates that hydration of the hydrophobic cavity is more energetically unfavorable than global unfolding. Overall, these observations point to a range of cooperativity and energetics within the T4 lysozyme molecule and illuminate the fact that small changes in physical parameters can significantly alter the pressure sensitivity of proteins.}, + language = {en}, + timestamp = {2017-01-17T18:21:14Z}, + number = {38}, + urldate = {2017-01-17}, + journal = {PNAS}, + author = {Nucci, Nathaniel V. and Fuglestad, Brian and Athanasoula, Evangelia A. and Wand, A. Joshua}, + month = sep, + year = {2014}, + keywords = {high-pressure NMR,protein folding and cooperativity,protein hydration,protein stability,reverse micelle encapsulation}, + pages = {13846--13851}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/QXA5DR8H/Nucci et al. - 2014 - Role of cavities and hydration in the pressure unf.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/MP3I9CDX/13846.html:text/html}, + pmid = {25201963} +} + +@article{Kitahara:2015:PNAS, + title = {Is Pressure-Induced Signal Loss in {{NMR}} Spectra for the {{Leu99Ala}} Cavity Mutant of {{T4}} Lysozyme due to Unfolding?}, + volume = {112}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1423279112}, + abstract = {National Academy of Sciences}, + language = {en}, + timestamp = {2017-01-17T18:23:33Z}, + number = {9}, + urldate = {2017-01-17}, + journal = {PNAS}, + author = {Kitahara, Ryo and Mulder, Frans A. A.}, + month = mar, + year = {2015}, + pages = {E923--E923}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/V99CIRWQ/Kitahara and Mulder - 2015 - Is pressure-induced signal loss in NMR spectra for.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/R6EMJKZW/E923.html:text/html}, + pmid = {25630507} +} + +@article{Wand:2015:PNAS, + title = {Reply to {{Kitahara}} and {{Mulder}}: {{An}} Ensemble View of Protein Stability Best Explains Pressure Effects in a {{T4}} Lysozyme Cavity Mutant}, + volume = {112}, + issn = {0027-8424, 1091-6490}, + shorttitle = {Reply to {{Kitahara}} and {{Mulder}}}, + doi = {10.1073/pnas.1424002112}, + abstract = {National Academy of Sciences}, + language = {en}, + timestamp = {2017-01-17T18:23:53Z}, + number = {9}, + urldate = {2017-01-17}, + journal = {PNAS}, + author = {Wand, A. Joshua and Nucci, Nathaniel V.}, + month = mar, + year = {2015}, + pages = {E924--E924}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/2PS7GRTU/Wand and Nucci - 2015 - Reply to Kitahara and Mulder An ensemble view of .pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/I4W58GNF/E924.html:text/html}, + pmid = {25630509} +} + +@article{Lerch:2015:PNAS, + title = {Structure-Relaxation Mechanism for the Response of {{T4}} Lysozyme Cavity Mutants to Hydrostatic Pressure}, + volume = {112}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1506505112}, + abstract = {Application of hydrostatic pressure shifts protein conformational equilibria in a direction to reduce the volume of the system. A current view is that the volume reduction is dominated by elimination of voids or cavities in the protein interior via cavity hydration, although an alternative mechanism wherein cavities are filled with protein side chains resulting from a structure relaxation has been suggested [L{\'o}pez CJ, Yang Z, Altenbach C, Hubbell WL (2013) Proc Natl Acad Sci USA 110(46):E4306\textendash{}E4315]. In the present study, mechanisms for elimination of cavities under high pressure are investigated in the L99A cavity mutant of T4 lysozyme and derivatives thereof using site-directed spin labeling, pressure-resolved double electron\textendash{}electron resonance, and high-pressure circular dichroism spectroscopy. In the L99A mutant, the ground state is in equilibrium with an excited state of only $\sim$3\% of the population in which the cavity is filled by a protein side chain [Bouvignies et al. (2011) Nature 477(7362):111\textendash{}114]. The results of the present study show that in L99A the native ground state is the dominant conformation to pressures of 3 kbar, with cavity hydration apparently taking place in the range of 2\textendash{}3 kbar. However, in the presence of additional mutations that lower the free energy of the excited state, pressure strongly populates the excited state, thereby eliminating the cavity with a native side chain rather than solvent. Thus, both cavity hydration and structure relaxation are mechanisms for cavity elimination under pressure, and which is dominant is determined by details of the energy landscape.}, + language = {en}, + timestamp = {2017-01-17T18:24:23Z}, + number = {19}, + urldate = {2017-01-17}, + journal = {PNAS}, + author = {Lerch, Michael T. and L{\'o}pez, Carlos J. and Yang, Zhongyu and Kreitman, Margaux J. and Horwitz, Joseph and Hubbell, Wayne L.}, + month = dec, + year = {2015}, + keywords = {conformational exchange,DEER,EPR,protein structural dynamics}, + pages = {E2437--E2446}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/CGCRGI3E/Lerch et al. - 2015 - Structure-relaxation mechanism for the response of.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/F5EW9I4N/E2437.html:text/html}, + pmid = {25918400} +} + +@article{Maeno:2015:BiophysicalJournal, + title = {Cavity as a {{Source}} of {{Conformational Fluctuation}} and {{High}}-{{Energy State}}: {{High}}-{{Pressure NMR Study}} of a {{Cavity}}-{{Enlarged Mutant}} of {{T4Lysozyme}}}, + volume = {108}, + issn = {0006-3495}, + shorttitle = {Cavity as a {{Source}} of {{Conformational Fluctuation}} and {{High}}-{{Energy State}}}, + doi = {10.1016/j.bpj.2014.11.012}, + language = {English}, + timestamp = {2017-01-17T18:25:59Z}, + number = {1}, + urldate = {2017-01-17}, + journal = {Biophysical Journal}, + author = {Maeno, Akihiro and Sindhikara, Daniel and Hirata, Fumio and Otten, Renee and Dahlquist, Frederick W. and Yokoyama, Shigeyuki and Akasaka, Kazuyuki and Mulder, Frans A. A. and Kitahara, Ryo}, + month = jan, + year = {2015}, + pages = {133--145}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/MCBSKB4I/Maeno et al. - 2015 - Cavity as a Source of Conformational Fluctuation a.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/UJKVJJ4Q/S0006-3495(14)01196-5.html:text/html}, + pmid = {25564860} +} + +@article{Parker:2000:JournalofMolecularBiology, + title = {A Statistical Appraisal of Native State Hydrogen Exchange Data: Evidence for a Burst Phase continuum?1}, + volume = {300}, + issn = {0022-2836}, + shorttitle = {A Statistical Appraisal of Native State Hydrogen Exchange Data}, + doi = {10.1006/jmbi.2000.3922}, + abstract = {For a number of proteins, folding occurs via the rapid accumulation of secondary and tertiary structural features in a so-called burst phase, preceding the relatively slow, highly activated transition leading to the native state. A fundamental question is: do these burst phase reactions comprise two phase-separated thermodynamic states or a continuum of states? Ribonuclease HI (RNase H) from Escherichia coli and phage T4 lysozyme (T4L) both exhibit such a phenomenon. Native-state hydrogen exchange (NHX) data have been collected for these proteins, providing residue-specific free energies and m-values (a measure of hydrocarbon solvation) for the manifold of partially unfolded, exchange-competent forms that are accessible from the native state ($\Delta$Gsg and msg, where the sg subscript denotes sub-global). There is good evidence that these parameters pertain to exchange-competent species comprising the burst phase observed in the global folding kinetics. We combine the results from the global folding kinetics of these proteins with a statistical analysis of their NHX parameters to determine if the distribution of experimental (msg, $\Delta$Gsg) values derive from a mechanism where the burst phase is two-state. For RNase H, this analysis demonstrates that the burst phase of this protein is not two-state; the results imply a distribution of states, m and $\Delta$G exhibiting a linear functional relationship consistent with the global folding parameters. For T4L, it is difficult to distinguish the observed distribution of msg, $\Delta$Gsg values from that expected for a mechanism where the burst phase is two-state. The results for RNase H${_\ast}$ lend support for the idea that the burst phase reaction of this protein comprises a continuum of states. This has important implications for how we model the process of structural acquisition in protein folding reactions.}, + timestamp = {2017-01-17T18:27:05Z}, + number = {5}, + urldate = {2017-01-17}, + journal = {Journal of Molecular Biology}, + author = {Parker, Martin J. and Marqusee, Susan}, + month = jul, + year = {2000}, + keywords = {burst phases,folding intermediates,folding kinetics,hydrogen exchange,protein folding}, + pages = {1361--1375}, + file = {ScienceDirect Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/3XWDDT8I/Parker and Marqusee - 2000 - A statistical appraisal of native state hydrogen e.pdf:application/pdf;ScienceDirect Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/IDAH96HE/S0022283600939226.html:text/html} +} + +@article{Bouvignies:2011:Nature, + title = {Solution Structure of a Minor and Transiently Formed State of a {{T4}} Lysozyme Mutant}, + volume = {477}, + copyright = {\textcopyright{} 2011 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, + issn = {0028-0836}, + doi = {10.1038/nature10349}, + abstract = {Proteins are inherently plastic molecules, whose function often critically depends on excursions between different molecular conformations (conformers). However, a rigorous understanding of the relation between a protein/'s structure, dynamics and function remains elusive. This is because many of the conformers on its energy landscape are only transiently formed and marginally populated (less than a few per cent of the total number of molecules), so that they cannot be individually characterized by most biophysical tools. Here we study a lysozyme mutant from phage T4 that binds hydrophobic molecules and populates an excited state transiently (about 1\,ms) to about 3\% at 25\,$^\circ$C (ref. 5). We show that such binding occurs only via the ground state, and present the atomic-level model of the /`invisible/', excited state obtained using a combined strategy of relaxation-dispersion NMR (ref. 6) and CS-Rosetta model building that rationalizes this observation. The model was tested using structure-based design calculations identifying point mutants predicted to stabilize the excited state relative to the ground state. In this way a pair of mutations were introduced, inverting the relative populations of the ground and excited states and altering function. Our results suggest a mechanism for the evolution of a protein/'s function by changing the delicate balance between the states on its energy landscape. More generally, they show that our approach can generate and validate models of excited protein states.}, + language = {en}, + timestamp = {2017-01-17T18:28:48Z}, + number = {7362}, + urldate = {2017-01-17}, + journal = {Nature}, + author = {Bouvignies, Guillaume and Vallurupalli, Pramodh and Hansen, D. Flemming and Correia, Bruno E. and Lange, Oliver and Bah, Alaji and Vernon, Robert M. and Dahlquist, Frederick W. and Baker, David and Kay, Lewis E.}, + month = sep, + year = {2011}, + keywords = {Biophysics,Chemistry,Structural biology}, + pages = {111--114}, + file = {Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/EE228QG7/Bouvignies et al. - 2011 - Solution structure of a minor and transiently form.pdf:application/pdf;Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/FQ82WBSI/nature10349.html:text/html} +} + +@article{Collins:2007:JournalofMolecularBiology, + title = {Structural {{Rigidity}} of a {{Large Cavity}}-Containing {{Protein Revealed}} by {{High}}-Pressure {{Crystallography}}}, + volume = {367}, + issn = {0022-2836}, + doi = {10.1016/j.jmb.2006.12.021}, + abstract = {Steric constraints, charged interactions and many other forces important to protein structure and function can be explored by mutagenic experiments. Research of this kind has led to a wealth of knowledge about what stabilizes proteins in their folded states. To gain a more complete picture requires that we perturb these structures in a continuous manner, something mutagenesis cannot achieve. With high pressure crystallographic methods it is now possible to explore the detailed properties of proteins while continuously varying thermodynamic parameters. Here, we detail the structural response of the cavity-containing mutant L99A of T4 lysozyme, as well as its pseudo wild-type (WT*) counterpart, to hydrostatic pressure. Surprisingly, the cavity has almost no effect on the pressure response: virtually the same changes are observed in WT* as in L99A under pressure. The cavity is most rigid, while other regions deform substantially. This implies that while some residues may increase the thermodynamic stability of a protein, they may also be structurally irrelevant. As recently shown, the cavity fills with water at pressures above 100~MPa while retaining its overall size. The resultant picture of the protein is one in which conformationally fluctuating side groups provide a liquid-like environment, but which also contribute to the rigidity of the peptide backbone.}, + timestamp = {2017-01-17T19:18:02Z}, + number = {3}, + urldate = {2017-01-17}, + journal = {Journal of Molecular Biology}, + author = {Collins, Marcus D. and Quillin, Michael L. and Hummer, Gerhard and Matthews, Brian W. and Gruner, Sol M.}, + month = mar, + year = {2007}, + keywords = {high pressure crystallography,protein,structural rigidity}, + pages = {752--763}, + file = {ScienceDirect Full Text PDF:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/3D5GIIF4/Collins et al. - 2007 - Structural Rigidity of a Large Cavity-containing P.pdf:application/pdf;ScienceDirect Snapshot:/Users/dmobley/Library/Application Support/Zotero/Profiles/i2jd8b87.default/zotero/storage/BTPNWRAZ/S0022283606016962.html:text/html} +} + +@misc{Mobley:2016:, + title = {Advancing Predictive Modeling through Focused Development of New Systems to Drive New Modeling Innovations}, + timestamp = {2017-01-20T22:59:48Z}, + urldate = {2016-10-05}, + howpublished = {\url{http://doi.org/10.5281/zenodo.163963}}, + author = {Mobley, David L. and Chodera, John D. and Isaacs, Lyle and Gibb, Bruce C.}, + month = oct, + year = {2016} +} + @comment{jabref-meta: groupsversion:3;} @comment{jabref-meta: groupstree: 0 AllEntriesGroup:; @@ -3946,7 +4127,10 @@ @comment{jabref-meta: cificity_1995\;morton_energetic_1995\;wang_identifying_2013\;boyce_pre dicting_2009\;mobley_use_2006\;mobley_confine_2007\;mobley_predicting_ 2007\;Shirts:2007:JPhysChemB\;su_docking_2001\;wei_testing_2004\;eriks -son_cavity-containing_1992\;; +son_cavity-containing_1992\;Nucci:2014:PNAS\;Kitahara:2015:PNAS\;Wand: +2015:PNAS\;Lerch:2015:PNAS\;Maeno:2015:BiophysicalJournal\;Parker:2000 +:JournalofMolecularBiology\;Bouvignies:2011:Nature\;Collins:2007:Journ +alofMolecularBiology\;; 1 ExplicitGroup:MKG\;0\;henriksen_computational_2015\;muddana_sampl4_2 014\;Gibb:2004:J.Am.Chem.Soc.\;Cong:2016:Org.Biomol.Chem.\;vinciguerra _synthesis_2015\;assaf_cucurbiturils:_2015\;jorgensen_quantum_1981\;du diff --git a/paper/benchmarkset.pdf b/paper/benchmarkset.pdf index de1227d..c1750b2 100644 Binary files a/paper/benchmarkset.pdf and b/paper/benchmarkset.pdf differ diff --git a/paper/benchmarkset.tex b/paper/benchmarkset.tex index b684c95..a120e2a 100644 --- a/paper/benchmarkset.tex +++ b/paper/benchmarkset.tex @@ -59,22 +59,25 @@ %%% DATE: % For ongoing development use -\date{\today} +%\date{\today} % For release versions use -%\date{ December 8, 2016 } +\date{ January 20, 2017 } \begin{abstract} Binding free energy calculations based on molecular simulations provide predicted affinities for biomolecular complexes. These calculations begin with a detailed description of a system, including its chemical composition and the interactions between its components. Simulations of the system are then used to compute thermodynamic information, such as binding affinities. -Because of their growing promise for guiding molecular design, these calculations have recently begun to see widespread applications in early stage drug discovery. However, many challenges remain to make them a robust and reliable tool. -Here, we provide an overview of these methods, discuss determinants of accuracy and precision, highlight key challenges, and argue for the development of accepted benchmark test systems that will help the research community generate and evaluate progress. +Because of their growing promise for guiding molecular design, these calculations have recently begun to see widespread applications in early stage drug discovery. +However, many challenges remain to make them a robust and reliable tool. +Here, we highlight key challenges facing these calculations, describe known examples of these challenges, and call for the designation of standard community benchmark test systems that will help the research community generate and evaluate progress. +In our view, progress will require careful assessment and evaluation of new methods, force fields, and modeling innovations on well-characterized benchmark systems, and we lay out our vision for how this can be achieved. \\ \\ %PUT MANUSCRIPT VERSION HERE % Use this style for ongoing development: "Version 1.0.5 pre-release" % Use this style for releases: "Version 2.0" -{\bf Manuscript version 1.1.1 pre-release} See \url{https://github.com/mobleylab/benchmarksets} for all versions. +%{\bf Manuscript version 1.1.1 pre-release.} See \url{https://github.com/mobleylab/benchmarksets} for all versions. +{\bf Manuscript version 1.1.1.} See \url{https://github.com/mobleylab/benchmarksets} for all versions. \end{abstract} @@ -89,6 +92,7 @@ \section{INTRODUCTION} However, simulations also can be used to make quantitative predictions of thermodynamic and kinetic properties, with applications in fields including drug discovery, chemical engineering, and nanoengineering. A thermodynamic property of particular interest is the binding affinity between biomolecules and ligands such as inhibitors, modulators, or activators. With accurate and rapid affinity predictions, we could use simulations in varied health-related applications, such as the prediction of biomolecular interaction networks in support of systems biology, or rapid design of new medications with reduced side-effects and drug resistance. +In this work, we give a view of how these simulations could impact drug discovery, briefly discuss where they stand now, and then argue for benchmark systems chosen to drive and assess the advancement of these methods, helping to make them practical for drug discovery. \subsection{Imagining a tool for drug discovery} A major aim in the development of molecular simulations is to create quantitative, accurate tools which will guide early stage drug discovery. @@ -177,6 +181,17 @@ \subsection{Challenges and the domain of applicability} Would the results improve with more sampling? Were protonation states misassigned---or do they perhaps even change on binding? There might even be a software bug \cite{eklund_cluster_2016} or a human error in the use of the software. As a consequence, it is not clear what steps are most urgently needed to advance the field as a whole. +In this work, we argue that many of these problems can be alleviated, and that the field will advance more rapidly, if we select a set of well-chosen benchmark systems on which free energy methods are regularly tested. + +\subsection{Improving modeling by cycles of testing, prediction, and improvement} +Modeling can in some cases improve rapidly, but, in our experience, rapid advances are most common when computational models undergo regular cycles of improvement, predictive testing, learning, and then further improvement. +This can be particularly difficult for academic groups which may not have the resources for predictive tests; however, these are essential, since it is only in predictive tests that we can be sure we are assessing the performance of a method as it works in real life, rather than relying on knowledge of the expected outcome to inform setup of the calculations. +With this in mind, the Statistical Assessment of the Modeling of Proteins and Ligands (SAMPL) blind challenges, as well as the Community Structure Activity Resource (CSAR) challenge, later replaced by the Drug Design Data Resource (D3R) grand challenges, have arisen to meet part of this need. +Currently, D3R focuses on running blind challenges on protein-ligand binding with datasets from the pharmaceutical industry, allowing testing and evaluation of computational methods on systems of direct pharmaceutical relevance. +SAMPL, in contrast, focuses on predictions in simpler physical settings~\cite{Mobley:2016:}, such as small molecules in aqueous and organic phases, and small molecules binding to supramolecular hosts. +Together, the SAMPL and D3R challenges roughly span the spectrum from properties we can predict now (though they may be challenging in some cases~\cite{bannan_blind_2016, yin_overview_2016, Mobley:2016:}) to the drug binding we want to be able to reliably predict. +These challenges are vital as they provide our only opportunity, at present, to routinely see how different methods compare when attempting to compute the same properties, and they provide the beginnings of a model for how we can best advance free energy techniques: routinely testing our methods on the same, well-understood systems to learn what does and doesn't work to improve performance. +Thus, we need not just blind tests, but retrospective testing on well-understood, ``benchmark'' systems, detailed below. \section{THE NEED FOR WELL-CHOSEN BENCHMARK SYSTEMS} @@ -187,6 +202,7 @@ \section{THE NEED FOR WELL-CHOSEN BENCHMARK SYSTEMS} These selections result in part from two series of blinded prediction challenges (SAMPL~\cite{muddana_sampl4_2014}, and CSAR~\cite{dunbar_csar_2011} followed by D3R~\cite{Gathiaka:2016:JComputAidedMolDes}), which have helped focus the computational chemistry community on a succession of test cases and highlighted the need for methodological improvements. However, broader adoption of a larger and more persistent set of test cases is needed. By coalescing around a compact set of benchmarks, well chosen to challenge and probe free energy calculations, practitioners and developers will be able to better assess and drive progress in binding free energy calculations. +Our primary goals in this work are to explain how benchmark systems can be used to advance the field, to encourage adoption of a standard set of benchmark systems, and to propose some candidates for this set. \subsection{Benchmark types and applications} @@ -290,7 +306,8 @@ \subsection{Host-guest benchmarks} Furthermore, experiments can often be run under conditions that make the protonation states of the host and guest unambiguous. Under these conditions, the level of agreement of correctly executed calculations with experiment effectively reports on the validity of the force field (Section~\ref{pgph:accuracy}). For a number of host-guest systems, the use of isothermal titration calorimetry (ITC) to characterize binding provides both binding free energies and binding enthalpies. -Binding enthalpies can often also be computed to good numerical precision~\cite{henriksen_computational_2015}, so they provide an additional check of the validity of simulations. A variety of curated host-guest binding data is available on BindingDB at \url{http://bindingdb.org/bind/HostGuest.jsp}. +Binding enthalpies can often also be computed to good numerical precision~\cite{henriksen_computational_2015}, so they provide an additional check of the validity of simulations. +A variety of curated host-guest binding data is available on BindingDB at \url{http://bindingdb.org/bind/HostGuest.jsp}. Hosts fall into chemical families, such that all members of each family share a major chemical motif, but individuals vary in terms of localized chemical substitutions and, in some families, the number of characteristic monomers they comprise. For example, all members of the cyclodextrin family are chiral rings of glucose monomers; family members then differ in the number of monomers and in the presence or absence of various chemical substituents. @@ -374,7 +391,9 @@ \subsubsection{Cucurbiturils} For example, in SAMPL4, free energy methods yielded $R^2$ values from 0.1 to 0.8 and RMS errors of about 1.9 to 4.9 kcal/mol for the same set of CB7 cases~\cite{muddana_sampl4_2014}. This spread of results across rather similar methods highlights the need for shared benchmarks. Potential explanations include convergence difficulties, subtle methodological differences, and details of how the methods were applied. -Until the origin of such discrepancies is clear, it is difficult to know how accurate our methods truly are. +Until the origin of such discrepancies is clear, it is difficult to know how accurate our methods truly are. + +To aid the adoption of these systems as benchmarks, input files for the CB7 systems proposed here are available in our GitHub repository. \begingroup \squeezetable @@ -562,6 +581,8 @@ \subsubsection{Gibb Deep Cavity Cavitands (GDCC)} It is worth noting that several groups using different computational approaches but the same force field and water model in SAMPL5 did not obtain identical binding free energies~\cite{yin_overview_2016, bosisio_blinded_2016, bhakat_resolving_2016}. Some of these issues were resolved in follow-up work~\cite{bhakat_resolving_2016}, bringing the methods into fairly good agreement for the majority of cases~\cite{yin_sampl5_2016, bosisio_blinded_2016}. +To aid the adoption of these systems as benchmarks, input files for the GDCC systems proposed here are available in our GitHub repository. + \subsection{Protein-ligand benchmarks: the T4 lysozyme model binding sites} \label{sec:t4} \begin{figure*} @@ -581,12 +602,17 @@ \subsection{Protein-ligand benchmarks: the T4 lysozyme model binding sites} These two binding sites, called L99A~\cite{morton_energetic_1995, morton_specificity_1995} and L99A/M102Q~\cite{wei_model_2002, graves_decoys_2005} for point mutations which create the cavities of interest, have been studied extensively experimentally and via modeling. As protein-ligand systems, they introduce additional complexities beyond those observed in host-guest systems, yet they share some of the same simplicity. The ligands are generally small, neutral, and relatively rigid, with clear protonation states. -In many cases, substantial protein motions do not occur on binding, helping calculated binding free energies to reach apparent convergence relatively easily. +For most ligands, substantial protein motions do not occur on binding at room temperature and ambient pressure, helping calculated binding free energies to reach apparent convergence relatively easily. However, like host-guest systems, these binding sites are still surprisingly challenging~\cite{mobley_use_2006, mobley_confine_2007, mobley_predicting_2007, boyce_predicting_2009, jiang_free_2010, gallicchio_binding_2010, lim_sensitivity_2016}. Thus, precise convergence is sometimes difficult to achieve, and it is in all cases essentially impossible to fully verify. As a consequence, these are ``soft benchmarks" as defined above (Section~\ref{subsec:benchmarktypes}). The utility of the lysozyme model sites is also driven by the large body of available experimental data. It has been relatively easy to identify new ligands and obtain high quality crystal structures and affinity measurements, and this has allowed two different rounds of blinded free energy prediction exercises~\cite{mobley_predicting_2007, boyce_predicting_2009}. +These binding sites do exhibit some surprising experimental complexities which make them interesting ongoing topics of study, such as the fact that the L99A site is empty of water when ligands are not bound~\cite{Nucci:2014:PNAS, Lerch:2015:PNAS, Collins:2007:JournalofMolecularBiology} yet the protein can undergo pressure-induced filing~\cite{Collins:2007:JournalofMolecularBiology,Lerch:2015:PNAS} or denaturation~\cite{Nucci:2014:PNAS} which can be inhibited by binding of ligand~\cite{Nucci:2014:PNAS, Lerch:2015:PNAS}. +Pressure may also cause the protein to populate an excited state~\cite{Maeno:2015:BiophysicalJournal, Kitahara:2015:PNAS} (but see~\cite{Wand:2015:PNAS}) which is already present to a very limited extent at equilibrium~\cite{Bouvignies:2011:Nature}. +Still, as noted below, these issues do not seem to dramatically impact our ability to calculate binding free energies at standard temperature and pressure, probably in large part because these are effects which come into play only at high pressures~\cite{Nucci:2014:PNAS, Lerch:2015:PNAS, Maeno:2015:BiophysicalJournal}, though as we discuss below, some ligands do induce a protein conformational change which affects the same helix as the proposed excited state~\cite{merski_homologous_2015}. +It seems likely that the conformational hetereogeneity observed experimentally will make lysozyme even more of a valuable benchmark system as test cases here can range from simple to challenging depending on the ligand and pressure being considered. + \subsubsection{The apolar and polar cavities and their ligands} The L99A site is also called the ``apolar'' cavity. It is relatively flat and elongated, and binds mostly nonpolar molecules such as benzene, toluene, p-xylene, and n-butylbenzene: basically, a fairly broad range of nonpolar planar five- and six-membered rings and ring systems (such as indole). @@ -613,7 +639,7 @@ \subsubsection{The apolar and polar cavities and their ligands} \item{Backbone rearrangement: Larger ligands induce shifts of the F helix (residues 107 or 108 to 115), which is adjacent to the binding site, allowing the site to enlarge. This occurs in both binding sites~\cite{wei_testing_2004, boyce_predicting_2009, merski_homologous_2015}, but is best characterized for L99A~\cite{merski_homologous_2015}. -There, addition of a series of methyl groups from benzene up to n-hexylbenzene causes a conformational transition in the protein from closed to intermediate to open conformations. +There, addition of a series of methyl groups from benzene up to n-hexylbenzene causes a conformational transition in the protein from closed to intermediate to open conformations; this affects the same region of helix F that undergoes a conformational change in the proposed excited state which is partially populated at equilibrium~\cite{Bouvignies:2011:Nature}. } \end{enumerate} @@ -815,7 +841,7 @@ \subsubsection{Other protein-ligand systems} A number of other proteins also have strong potential to generate useful benchmark sets. For example, free energy calculations have been carried out for influenza neuraminidase inhibitors ~\cite{smith_dihydropyrancarboxamides_1998} with some success~\cite{michel_protein_2006}, but other series include more complicated structure-activity relationships and are associated with protein loop motions that may be difficult to model~\cite{kerry_structural_2013}. Periplasmic oligopeptide binding protein A (OppA) binds a series of two to five-residue peptides, for which there exists a large amount of calorimetric and crystallographic binding data~\cite{tame_crystal_1995, davies_relating_1999, sleigh_crystallographic_1999}, and the system appears challenging but potentially tractable for free energy calculations~\cite{maurer_calculation_2016}. The JNK kinase may pose an interesting conformational sampling challenge, due to its slow interconversion between binding modes for some ligands~\cite{kaus_how_2015}. -And the ongoing series of Drug Design Data Resource (D3R) \cite{} blinded challenges may also be source of informative protein-ligand systems (drugdesigndata.org/about/datasets) that are familiar to the computational chemistry community. +And the ongoing series of Drug Design Data Resource (D3R) \cite{Gathiaka:2016:JComputAidedMolDes} blinded challenges may also be source of informative protein-ligand systems (drugdesigndata.org/about/datasets) that are familiar to the computational chemistry community. As noted above, however, many other protein-ligand binding systems have been characterized experimentally, and a systematic filtering would undoubtedly yield more benchmark candidates. \section{HOW TO USE BENCHMARK SYSTEMS} @@ -891,6 +917,7 @@ \section{CONCLUSIONS AND OUTLOOK} Identifying and addressing failure cases and problems is critically important to advancing this technology, but failures can be harder to publish, and may even go unpublished, even though they serve a unique role in advancing the field. We therefore strongly encourage that such results be shared and welcomed by the research community. Potentially, the GitHub repository connected with this perpetual review paper could serve as a place to deposit input files and summary results of tests on these benchmark systems, with summary information perhaps being included in this work itself or topical sub-reviews within the same repository. +Host-guest input files are now available there. Here, we have proposed several benchmark systems for binding free energy calculations. These embody a subset of the key challenges facing the field, and we plan to expand the set as consensus emerges. @@ -899,13 +926,14 @@ \section{CONCLUSIONS AND OUTLOOK} \section*{DISCLOSURE STATEMENT} -D.L.M. is a member of the Scientific Advisory Board for Schr\"{o}dinger, LLC. M.K.G. is a cofounder and has equity interest in the company VeraChem LLC. +D.L.M. is a member of the Scientific Advisory Board for OpenEye Scientific Software. M.K.G. is a cofounder and has equity interest in the company VeraChem LLC. \section*{ACKNOWLEDGMENTS} DLM appreciates financial support from the National Institutes of Health (NIH; 1R01GM108889-01) and the National Science Foundation (NSF; CHE 1352608). MKG thanks the NIH for partial support of this work through grants R01GM061300, R01GM070064 and U01GM111528. The contents of this publication are solely the responsibility of the authors and do not necessarily represent the official views of the NIH or the NSF. We also appreciate helpful discussions with a huge number of people in the field, including a wide variety of participants at recent meetings such as the 2016 Workshop on Free Energy Methods in Drug Discovery. Conversations with John Chodera (MSKCC), Chris Oostenbrink (BOKU), Julien Michel (Edinburgh), Robert Abel (Schr\"{o}dinger), Bruce Gibb (Tulane), Matt Sullivan (Tulane), and Lyle Isaacs (Maryland) were particularly helpful. +We thank David Slochower (UCSD) for a critical reading of the manuscript, and others listed on our GitHub repository for their contributions. \bibliographystyle{abbrv}