diff --git a/roofit/roofitcore/inc/RooFormulaVar.h b/roofit/roofitcore/inc/RooFormulaVar.h index 8b8058a9394f7..f874b74250fd6 100644 --- a/roofit/roofitcore/inc/RooFormulaVar.h +++ b/roofit/roofitcore/inc/RooFormulaVar.h @@ -77,6 +77,9 @@ class RooFormulaVar : public RooAbsReal { std::string getUniqueFuncName() const; + std::unique_ptr + compileForNormSet(RooArgSet const &normSet, RooFit::Detail::CompileContext &ctx) const override; + protected: // Post-processing of server redirection bool redirectServersHook(const RooAbsCollection& newServerList, bool mustReplaceAll, bool nameChange, bool isRecursive) override ; diff --git a/roofit/roofitcore/src/RooAbsPdf.cxx b/roofit/roofitcore/src/RooAbsPdf.cxx index 2ec8d927c8be9..a4ecd017abae8 100644 --- a/roofit/roofitcore/src/RooAbsPdf.cxx +++ b/roofit/roofitcore/src/RooAbsPdf.cxx @@ -487,103 +487,107 @@ const RooAbsReal* RooAbsPdf::getNormObj(const RooArgSet* nset, const RooArgSet* /// For functions that declare to be self-normalized by overloading the /// selfNormalized() function, a unit normalization is always constructed. -bool RooAbsPdf::syncNormalization(const RooArgSet* nset, bool adjustProxies) const -{ - setActiveNormSet(nset); - - // Check if data sets are identical - CacheElem* cache = static_cast(_normMgr.getObj(nset)) ; - if (cache) { - - bool nintChanged = (_norm!=cache->_norm.get()) ; - _norm = cache->_norm.get(); - - // In the past, this condition read `if (nintChanged && adjustProxies)`. - // However, the cache checks if the nset was already cached **by content**, - // and not by RooArgSet instance! So it can happen that the normalization - // set object is different, but the integral object is the same, in which - // case it would be wrong to not adjust the proxies. They always have to be - // adjusted when the nset changed, which is always the case when - // `syncNormalization()` is called. - if (adjustProxies) { - // Update dataset pointers of proxies - const_cast(this)->setProxyNormSet(nset) ; - } +bool RooAbsPdf::syncNormalization(const RooArgSet *nset, bool adjustProxies) const +{ + setActiveNormSet(nset); + + // Check if data sets are identical + CacheElem *cache = static_cast(_normMgr.getObj(nset)); + if (cache) { + + bool nintChanged = (_norm != cache->_norm.get()); + _norm = cache->_norm.get(); + + // In the past, this condition read `if (nintChanged && adjustProxies)`. + // However, the cache checks if the nset was already cached **by content**, + // and not by RooArgSet instance! So it can happen that the normalization + // set object is different, but the integral object is the same, in which + // case it would be wrong to not adjust the proxies. They always have to be + // adjusted when the nset changed, which is always the case when + // `syncNormalization()` is called. + if (adjustProxies) { + // Update dataset pointers of proxies + const_cast(this)->setProxyNormSet(nset); + } - return nintChanged ; - } + return nintChanged; + } - // Update dataset pointers of proxies - if (adjustProxies) { - const_cast(this)->setProxyNormSet(nset) ; - } + // Update dataset pointers of proxies + if (adjustProxies) { + const_cast(this)->setProxyNormSet(nset); + } - RooArgSet depList; - getObservables(nset, depList); + RooArgSet depList; + getObservables(nset, depList); - if (_verboseEval>0) { - if (!selfNormalized()) { - cxcoutD(Tracing) << ClassName() << "::syncNormalization(" << GetName() - << ") recreating normalization integral " << std::endl ; - depList.printStream(ccoutD(Tracing),kName|kValue|kArgs,kSingleLine) ; - } else { - cxcoutD(Tracing) << ClassName() << "::syncNormalization(" << GetName() << ") selfNormalized, creating unit norm" << std::endl; - } - } - - // Destroy old normalization & create new - if (selfNormalized() || !dependsOn(depList)) { - auto ntitle = std::string(GetTitle()) + " Unit Normalization"; - auto nname = std::string(GetName()) + "_UnitNorm"; - _norm = new RooRealVar(nname.c_str(),ntitle.c_str(),1) ; - } else { - const char* nr = (_normRangeOverride.Length()>0 ? _normRangeOverride.Data() : (_normRange.Length()>0 ? _normRange.Data() : nullptr)) ; - -// std::cout << "RooAbsPdf::syncNormalization(" << GetName() << ") rangeName for normalization is " << (nr?nr:"") << std::endl ; - RooAbsReal* normInt; - { - // Normalization is always over all pdf components. Overriding the global - // component selection temporarily makes all RooRealIntegrals created during - // that time always include all components. - GlobalSelectComponentRAII selCompRAII(true); - normInt = std::unique_ptr{createIntegral(depList,*getIntegratorConfig(),nr)}.release(); - } - static_cast(normInt)->setAllowComponentSelection(false); - normInt->getVal() ; -// std::cout << "resulting normInt = " << normInt->GetName() << std::endl ; - - const char* cacheParamsStr = getStringAttribute("CACHEPARAMINT") ; - if (cacheParamsStr && strlen(cacheParamsStr)) { - - std::unique_ptr intParams{normInt->getVariables()} ; - - RooArgSet cacheParams = RooHelpers::selectFromArgSet(*intParams, cacheParamsStr); - - if (!cacheParams.empty()) { - cxcoutD(Caching) << "RooAbsReal::createIntObj(" << GetName() << ") INFO: constructing " << cacheParams.size() - << "-dim value cache for integral over " << depList << " as a function of " << cacheParams << " in range " << (nr?nr:"") << std::endl ; - std::string name = normInt->GetName() + ("_CACHE_[" + cacheParams.contentsString()) + "]"; - RooCachedReal* cachedIntegral = new RooCachedReal(name.c_str(),name.c_str(),*normInt,cacheParams) ; - cachedIntegral->setInterpolationOrder(2) ; - cachedIntegral->addOwnedComponents(*normInt) ; - cachedIntegral->setCacheSource(true) ; - if (normInt->operMode()==ADirty) { - cachedIntegral->setOperMode(ADirty) ; - } - normInt= cachedIntegral ; + if (_verboseEval > 0) { + if (!selfNormalized()) { + cxcoutD(Tracing) << ClassName() << "::syncNormalization(" << GetName() + << ") recreating normalization integral " << std::endl; + depList.printStream(ccoutD(Tracing), kName | kValue | kArgs, kSingleLine); + } else { + cxcoutD(Tracing) << ClassName() << "::syncNormalization(" << GetName() + << ") selfNormalized, creating unit norm" << std::endl; } + } - } - _norm = normInt ; - } + // Destroy old normalization & create new + if (selfNormalized() || depList.empty()) { + auto ntitle = std::string(GetTitle()) + " Unit Normalization"; + auto nname = std::string(GetName()) + "_UnitNorm"; + _norm = new RooRealVar(nname.c_str(), ntitle.c_str(), 1); + } else { + const char *nr = (_normRangeOverride.Length() > 0 ? _normRangeOverride.Data() + : (_normRange.Length() > 0 ? _normRange.Data() : nullptr)); + + // std::cout << "RooAbsPdf::syncNormalization(" << GetName() << ") rangeName for normalization is " << + // (nr?nr:"") << std::endl ; + RooAbsReal *normInt; + { + // Normalization is always over all pdf components. Overriding the global + // component selection temporarily makes all RooRealIntegrals created during + // that time always include all components. + GlobalSelectComponentRAII selCompRAII(true); + normInt = std::unique_ptr{createIntegral(depList, *getIntegratorConfig(), nr)}.release(); + } + static_cast(normInt)->setAllowComponentSelection(false); + normInt->getVal(); + // std::cout << "resulting normInt = " << normInt->GetName() << std::endl ; + + const char *cacheParamsStr = getStringAttribute("CACHEPARAMINT"); + if (cacheParamsStr && strlen(cacheParamsStr)) { + + std::unique_ptr intParams{normInt->getVariables()}; + + RooArgSet cacheParams = RooHelpers::selectFromArgSet(*intParams, cacheParamsStr); + + if (!cacheParams.empty()) { + cxcoutD(Caching) << "RooAbsReal::createIntObj(" << GetName() << ") INFO: constructing " + << cacheParams.size() << "-dim value cache for integral over " << depList + << " as a function of " << cacheParams << " in range " << (nr ? nr : "") + << std::endl; + std::string name = normInt->GetName() + ("_CACHE_[" + cacheParams.contentsString()) + "]"; + RooCachedReal *cachedIntegral = new RooCachedReal(name.c_str(), name.c_str(), *normInt, cacheParams); + cachedIntegral->setInterpolationOrder(2); + cachedIntegral->addOwnedComponents(*normInt); + cachedIntegral->setCacheSource(true); + if (normInt->operMode() == ADirty) { + cachedIntegral->setOperMode(ADirty); + } + normInt = cachedIntegral; + } + } + _norm = normInt; + } - // Register new normalization with manager (takes ownership) - cache = new CacheElem(*_norm) ; - _normMgr.setObj(nset,cache) ; + // Register new normalization with manager (takes ownership) + cache = new CacheElem(*_norm); + _normMgr.setObj(nset, cache); -// std::cout << "making new object " << _norm->GetName() << std::endl ; + // std::cout << "making new object " << _norm->GetName() << std::endl ; - return true ; + return true; } diff --git a/roofit/roofitcore/src/RooFormulaVar.cxx b/roofit/roofitcore/src/RooFormulaVar.cxx index bea5c7110d1b6..c6fd11cbb7b4b 100644 --- a/roofit/roofitcore/src/RooFormulaVar.cxx +++ b/roofit/roofitcore/src/RooFormulaVar.cxx @@ -316,3 +316,22 @@ std::string RooFormulaVar::getUniqueFuncName() const { return getFormula().getTFormula()->GetUniqueFuncName().Data(); } + +std::unique_ptr +RooFormulaVar::compileForNormSet(RooArgSet const &normSet, RooFit::Detail::CompileContext &ctx) const +{ + // Some users exploit unnormalized RooAbsPdfs as inputs for RooFormulaVars, + // relying on what the pdf returns from RooAbsPdf::evaluate(). This is in + // principle not allowed because every pdf needs to be evaluated with a + // normalization set, but it's so common in user code that we need to + // support it. To make this work, we need to make sure that the no + // normalization over non-dependents is happening at this point, reducing + // the normalization set to the subset of actual dependents. + // See also the "PdfAsFunctionInFormulaVar" test in testRooAbsPdf. + RooArgSet depList; + getObservables(&normSet, depList); + auto newArg = std::unique_ptr{static_cast(Clone())}; + ctx.markAsCompiled(*newArg); + ctx.compileServers(*newArg, depList); + return newArg; +} diff --git a/roofit/roofitcore/test/CMakeLists.txt b/roofit/roofitcore/test/CMakeLists.txt index 029213cde087c..099bfc5686e66 100644 --- a/roofit/roofitcore/test/CMakeLists.txt +++ b/roofit/roofitcore/test/CMakeLists.txt @@ -18,7 +18,7 @@ ROOT_ADD_GTEST(testRooDataHist testRooDataHist.cxx LIBRARIES RooFitCore ROOT_ADD_GTEST(testRooBinSamplingPdf testRooBinSamplingPdf.cxx LIBRARIES RooFitCore) ROOT_ADD_GTEST(testRooWrapperPdf testRooWrapperPdf.cxx LIBRARIES Gpad RooFitCore) ROOT_ADD_GTEST(testGenericPdf testGenericPdf.cxx LIBRARIES RooFitCore) -ROOT_ADD_GTEST(testRooAbsPdf testRooAbsPdf.cxx LIBRARIES RooFitCore) +ROOT_ADD_GTEST(testRooAbsPdf testRooAbsPdf.cxx LIBRARIES RooFitCore RooFit) ROOT_ADD_GTEST(testRooAbsCollection testRooAbsCollection.cxx LIBRARIES RooFitCore) ROOT_ADD_GTEST(testRooDataSet testRooDataSet.cxx LIBRARIES Tree RooFitCore diff --git a/roofit/roofitcore/test/testRooAbsPdf.cxx b/roofit/roofitcore/test/testRooAbsPdf.cxx index 814a6c0eb1898..b69d5b7a3485f 100644 --- a/roofit/roofitcore/test/testRooAbsPdf.cxx +++ b/roofit/roofitcore/test/testRooAbsPdf.cxx @@ -2,23 +2,26 @@ // Authors: Stephan Hageboeck, CERN 04/2020 // Jonas Rembser, CERN 04/2021 -#include #include +#include #include #include #include #include #include #include +#include #include #include +#include #include #include +#include #include #include #include -#include +#include #include #include @@ -375,6 +378,42 @@ TEST_P(FitTest, ProblemsWith2DSimultaneousFit) simPdf.fitTo(*data, PrintLevel(-1), _evalBackend); } +// This test covers a usecase by an ATLAS collaborator. The unnormalized shape +// of a RooFit pdf is used as a function inside a RooFormulaVar, which is used +// for the bins of a RooParametricStep function. This case is potentially +// fragile, because it requires that the top-level normalization set is ignored +// for the inner pdfs that don't depend on the observable, as normalizing over +// a non-dependent is a corner case where the variable should be dropped. +TEST_P(FitTest, PdfAsFunctionInFormulaVar) +{ + using namespace RooFit; + + RooRealVar x{"x", "x", 0., 1.}; + + const double arg = std::sqrt(-8 * std::log(0.5)); + RooGaussian gauss1{"gauss1", "", 10 - arg, 10, 2}; + + RooFormulaVar func1{"func1", "x[0]", {gauss1}}; + + // The parametric step function doesn't make any attempt at self + // normalization, so the pdf value in the first bin is just the value of the + // unnormalized Gaussian. And the first bin is the almost the whole domain. + TArrayD limits(3); + limits[0] = 0.; + limits[1] = 1 - 1e-9; + limits[2] = 1.; + RooParametricStepFunction pdf("pdf", "pdf", x, {func1}, limits, limits.size() - 1); + + int nEvents = 10000; + std::unique_ptr data{pdf.generateBinned(x, nEvents)}; + + std::unique_ptr nll{pdf.createNLL(*data, _evalBackend)}; + + // The test is designed to have an analytical reference value + double ref = nEvents * -std::log(0.5); + EXPECT_FLOAT_EQ(nll->getVal(), ref); +} + // Verifies that a server pdf gets correctly reevaluated when the normalization // set is changed. TEST(RooAbsPdf, NormSetChange)