diff --git a/n3fit/runcards/examples/developing.yml b/n3fit/runcards/examples/developing.yml index 79ee9dadc4..a554a851b7 100644 --- a/n3fit/runcards/examples/developing.yml +++ b/n3fit/runcards/examples/developing.yml @@ -81,7 +81,7 @@ parameters: # This defines the parameter dictionary that is passed to the Model stopping_patience: 0.1 # percentage of the number of epochs layer_type: 'dense' dropout: 0.01 - interpolation_points: 7 + feature_scaling_points: 7 fitting: fitbasis: CCBAR_ASYMM diff --git a/validphys2/src/validphys/comparefittemplates/comparecard.yaml b/validphys2/src/validphys/comparefittemplates/comparecard.yaml index 593c9b687e..b59c59a882 100644 --- a/validphys2/src/validphys/comparefittemplates/comparecard.yaml +++ b/validphys2/src/validphys/comparefittemplates/comparecard.yaml @@ -99,6 +99,26 @@ lumi_report: meta: Null template: lumi.md +mismatched_information: + meta: Null + actions_: + - report + + # Datasets will go to their own page + mismatched_report: + meta: Null + template: mismatched.md + + template_text: | + Mismatched datasets + --------------------- + The following plots corresponds to datasets which are not available in one of the fits. + + {@with mismatched_datasets_by_name@} + [Plots for {@dataset_name@}]({@mismatched_report report@}) + {@endwith@} + + template: report.md positivity: @@ -119,6 +139,8 @@ dataspecs: from_: current speclabel: from_: current + dataset_inputs: + from_: fit - theoryid: from_: reference @@ -128,6 +150,8 @@ dataspecs: from_: reference speclabel: from_: reference + dataset_inputs: + from_: fit t0_info: - use_t0: True diff --git a/validphys2/src/validphys/comparefittemplates/mismatched.md b/validphys2/src/validphys/comparefittemplates/mismatched.md new file mode 100644 index 0000000000..20c76d0848 --- /dev/null +++ b/validphys2/src/validphys/comparefittemplates/mismatched.md @@ -0,0 +1,5 @@ +% Data-theory comparison for the mismatched dataset {@dataset_name@} +# Absolute +{@plot_fancy@} +# Normalized +{@Datanorm plot_fancy@} diff --git a/validphys2/src/validphys/comparefittemplates/report.md b/validphys2/src/validphys/comparefittemplates/report.md index 5dfca92200..3f19d074a5 100644 --- a/validphys2/src/validphys/comparefittemplates/report.md +++ b/validphys2/src/validphys/comparefittemplates/report.md @@ -99,12 +99,17 @@ $\phi$ by {@processed_metadata_group@} {@plot_fits_groups_data_phi@} {@endwith@} + + Dataset plots ------------- {@with matched_datasets_from_dataspecs@} [Plots for {@dataset_name@}]({@dataset_report report@}) {@endwith@} +[Mismatched datasets]({@mismatched_information report@}) +-------------------- + Positivity ---------- {@with matched_positivity_from_dataspecs@} diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index addbb5f718..f7d8079808 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -36,7 +36,7 @@ default_filter_rules_input, default_filter_settings_input, ) -from validphys.fitdata import fitted_replica_indexes, num_fitted_replicas +from validphys.fitdata import fitted_replica_indexes, match_datasets_by_name, num_fitted_replicas from validphys.gridvalues import LUMI_CHANNELS from validphys.loader import ( DataNotFoundError, @@ -981,14 +981,12 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs): for spec in dataspecs: with self.set_context(ns=self._curr_ns.new_child(spec)): _, data_input = self.parse_from_(None, "data_input", write=False) - names = {} for dsin in data_input: cd = self.produce_commondata(dataset_input=dsin) proc = get_info(cd).nnpdf31_process ds = dsin.name names[(proc, ds)] = dsin - all_names.append(names) used_set = set.intersection(*(set(d) for d in all_names)) res = [] @@ -997,13 +995,62 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs): # TODO: Should this have the same name? inner_spec_list = inres["dataspecs"] = [] for ispec, spec in enumerate(dataspecs): - # Passing spec by referene + # Passing spec by reference d = ChainMap({"dataset_input": all_names[ispec][k]}, spec) inner_spec_list.append(d) res.append(inres) res.sort(key=lambda x: (x["process"], x["dataset_name"])) return res + def produce_mismatched_datasets_by_name(self, dataspecs): + """ + Like produce_matched_datasets_from_dataspecs, but for mismatched datasets from a fit comparison. + Returns the mismatched datasets, each tagged with more_info from the dataspecs they came from. Set up to work with plot_fancy. + + Datasets are considered a mismatch if the name is different and if the variant is different. + """ + + self._check_dataspecs_type(dataspecs) + + # Parse the data for the comparison so that only variant and dataset are actually tested + parsed_data = [] + for spec in dataspecs: + tmp = [(i.name, i.variant) for i in spec["dataset_inputs"]] + parsed_data.append((spec, tmp)) + + # TODO: + # This is a convoluted way of checking whether there are mismatches + # between the lists of dataset inputs of a list of specs. + # This is not going to win any codegolf tournaments + already_mismatched = [] + mismatched_dinputs = [] + for spec, parsed_dinputs in parsed_data: + for spec_to_check, parsed_dinputs_to_check in parsed_data: + if spec == spec_to_check: + continue + for i, parsed_dinput in enumerate(parsed_dinputs): + # Use a list of already mismatched data to avoid duplicates + if parsed_dinput in already_mismatched: + continue + if parsed_dinput not in parsed_dinputs_to_check: + dinput = spec["dataset_inputs"][i] + mismatched_dinputs.append((dinput, spec)) + already_mismatched.append(parsed_dinput) + + res = [] + # prepare output for plot_fancy + for dsin, spec in mismatched_dinputs: + res.append( + { + "dataset_input": dsin, + "dataset_name": dsin.name, + "theoryid": spec["theoryid"], + "pdfs": [i["pdf"] for i in dataspecs], + "fit": spec["fit"], + } + ) + return res + def produce_matched_positivity_from_dataspecs(self, dataspecs): """Like produce_matched_datasets_from_dataspecs but for positivity datasets.""" self._check_dataspecs_type(dataspecs) @@ -1014,7 +1061,6 @@ def produce_matched_positivity_from_dataspecs(self, dataspecs): names = {(p.name): (p) for p in pos} all_names.append(names) used_set = set.intersection(*(set(d) for d in all_names)) - res = [] for k in used_set: inres = {"posdataset_name": k} diff --git a/validphys2/src/validphys/scripts/vp_comparefits.py b/validphys2/src/validphys/scripts/vp_comparefits.py old mode 100644 new mode 100755 index 2f13f9a84d..a0955351b8 --- a/validphys2/src/validphys/scripts/vp_comparefits.py +++ b/validphys2/src/validphys/scripts/vp_comparefits.py @@ -68,7 +68,6 @@ def add_positional_arguments(self, parser): help="Use LUX basis (which include the photon) for the report", action='store_true', ) - parser.set_defaults() def try_complete_args(self): @@ -178,6 +177,7 @@ def get_commandline_arguments(self, cmdline=None): args['config_yml'] = comparefittemplates.template_pol_path else: args['config_yml'] = comparefittemplates.template_path + return args def complete_mapping(self): @@ -223,8 +223,16 @@ def complete_mapping(self): 'unpolarized_bc': {'from_': 'positivity_bound'}, } ) + are_the_same = self.check_identical_theory_cuts_covmat() + if are_the_same: + log.info("Adding mismatched datasets page: identical theory, data cuts and covmat detected") + else: + autosettings["mismatched_information"] = { + "template_text": "Mismatched datasets cannot be shown due to cuts theory, data cuts and/or covmat not being identical" + } return autosettings + def get_config(self): self.try_complete_args() # No error handling here because this is our internal file @@ -234,6 +242,23 @@ def get_config(self): c = yaml_safe.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) + + def check_identical_theory_cuts_covmat(self): + """ + Checks whether the theory ID, data cuts, and thcovmat are the same between the two fits. + In the affirmative case, a mismatched datasets page will be added to the report. + """ + args = self.args + l = self.environment.loader + current_runcard = l.check_fit(args['current_fit']).as_input() + reference_runcard = l.check_fit(args['reference_fit']).as_input() + + current_thcovmat = current_runcard.get("theorycovmatconfig") + reference_thcovmat = reference_runcard.get("theorycovmatconfig") + same_theoryid = current_runcard.get("theory", {}).get("theoryid") == reference_runcard.get("theory", {}).get("theoryid") + same_datacuts = current_runcard.get("datacuts") == reference_runcard.get("datacuts") + same_thcovmat = (current_thcovmat == reference_thcovmat) + return same_theoryid and same_datacuts and same_thcovmat def main():