Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f82be15
extra_dataset flag added to autosettings
jekoorn Oct 23, 2025
487c90a
Update postfit.py (i changed something silly)
jekoorn Oct 23, 2025
3532b08
removed print and debug statements
jekoorn Oct 23, 2025
a6624bd
add report_excluded.md
jekoorn Nov 13, 2025
2539204
plot_fancy logic in config
jekoorn Nov 26, 2025
42f293d
added dataset name
jekoorn Nov 26, 2025
d4efc44
improved dataspec handling logic
jekoorn Nov 27, 2025
0a26fc9
clean up code, add comment
jekoorn Nov 27, 2025
71ea8ea
added full report.md to report_with_excluded.md
jekoorn Nov 27, 2025
6f73050
improved report_with_excluded.md formatting
jekoorn Nov 27, 2025
99637b7
faster function by Juan
jekoorn Nov 27, 2025
55c3b95
remove tmp
jekoorn Nov 27, 2025
e172202
rebuilt produce logic and ylim selection for plot fancy
jekoorn Dec 1, 2025
dfa2b88
renamed to mismatched, added report page
jekoorn Dec 3, 2025
6021e51
clean up commented functions
jekoorn Dec 3, 2025
66ae2a7
implement juan suggestions about config
jekoorn Dec 3, 2025
69d064b
clean up comments
jekoorn Dec 3, 2025
39a27d2
removed obsolete comparecard
jekoorn Dec 3, 2025
431d304
removed debug timing
jekoorn Dec 3, 2025
c3b211d
removed obsolete extra comparecard
jekoorn Dec 4, 2025
a437a71
removed comments, included check_fit..as_input() in vp_comparefits
jekoorn Dec 8, 2025
b4751c1
included mismatched functionality in standard report.md
jekoorn Dec 9, 2025
4e78e97
another attempt to rebase
jekoorn Dec 19, 2025
a2df4a1
increased test_vpinterface.py deadline
jekoorn Dec 19, 2025
d01279d
revert deadline, test now passes locally with original deadline (?)
jekoorn Dec 19, 2025
a32dc03
pre-parse the datasets to check only name and variant
scarlehoff Dec 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion n3fit/runcards/examples/developing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ parameters: # This defines the parameter dictionary that is passed to the Model
stopping_patience: 0.1 # percentage of the number of epochs
layer_type: 'dense'
dropout: 0.01
interpolation_points: 7
feature_scaling_points: 7

fitting:
fitbasis: CCBAR_ASYMM
Expand Down
24 changes: 24 additions & 0 deletions validphys2/src/validphys/comparefittemplates/comparecard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,26 @@ lumi_report:
meta: Null
template: lumi.md

mismatched_information:
meta: Null
actions_:
- report

# Datasets will go to their own page
mismatched_report:
meta: Null
template: mismatched.md

template_text: |
Mismatched datasets
---------------------
The following plots corresponds to datasets which are not available in one of the fits.

{@with mismatched_datasets_by_name@}
[Plots for {@dataset_name@}]({@mismatched_report report@})
{@endwith@}


template: report.md

positivity:
Expand All @@ -119,6 +139,8 @@ dataspecs:
from_: current
speclabel:
from_: current
dataset_inputs:
from_: fit

- theoryid:
from_: reference
Expand All @@ -128,6 +150,8 @@ dataspecs:
from_: reference
speclabel:
from_: reference
dataset_inputs:
from_: fit

t0_info:
- use_t0: True
Expand Down
5 changes: 5 additions & 0 deletions validphys2/src/validphys/comparefittemplates/mismatched.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
% Data-theory comparison for the mismatched dataset {@dataset_name@}
# Absolute
{@plot_fancy@}
# Normalized
{@Datanorm plot_fancy@}
5 changes: 5 additions & 0 deletions validphys2/src/validphys/comparefittemplates/report.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,17 @@ $\phi$ by {@processed_metadata_group@}
{@plot_fits_groups_data_phi@}
{@endwith@}



Dataset plots
-------------
{@with matched_datasets_from_dataspecs@}
[Plots for {@dataset_name@}]({@dataset_report report@})
{@endwith@}

[Mismatched datasets]({@mismatched_information report@})
--------------------

Positivity
----------
{@with matched_positivity_from_dataspecs@}
Expand Down
56 changes: 51 additions & 5 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
default_filter_rules_input,
default_filter_settings_input,
)
from validphys.fitdata import fitted_replica_indexes, num_fitted_replicas
from validphys.fitdata import fitted_replica_indexes, match_datasets_by_name, num_fitted_replicas
from validphys.gridvalues import LUMI_CHANNELS
from validphys.loader import (
DataNotFoundError,
Expand Down Expand Up @@ -981,14 +981,12 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
for spec in dataspecs:
with self.set_context(ns=self._curr_ns.new_child(spec)):
_, data_input = self.parse_from_(None, "data_input", write=False)

names = {}
for dsin in data_input:
cd = self.produce_commondata(dataset_input=dsin)
proc = get_info(cd).nnpdf31_process
ds = dsin.name
names[(proc, ds)] = dsin

all_names.append(names)
used_set = set.intersection(*(set(d) for d in all_names))
res = []
Expand All @@ -997,13 +995,62 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
# TODO: Should this have the same name?
inner_spec_list = inres["dataspecs"] = []
for ispec, spec in enumerate(dataspecs):
# Passing spec by referene
# Passing spec by reference
d = ChainMap({"dataset_input": all_names[ispec][k]}, spec)
inner_spec_list.append(d)
res.append(inres)
res.sort(key=lambda x: (x["process"], x["dataset_name"]))
return res

def produce_mismatched_datasets_by_name(self, dataspecs):
"""
Like produce_matched_datasets_from_dataspecs, but for mismatched datasets from a fit comparison.
Returns the mismatched datasets, each tagged with more_info from the dataspecs they came from. Set up to work with plot_fancy.

Datasets are considered a mismatch if the name is different and if the variant is different.
"""

self._check_dataspecs_type(dataspecs)

# Parse the data for the comparison so that only variant and dataset are actually tested
parsed_data = []
for spec in dataspecs:
tmp = [(i.name, i.variant) for i in spec["dataset_inputs"]]
parsed_data.append((spec, tmp))

# TODO:
# This is a convoluted way of checking whether there are mismatches
# between the lists of dataset inputs of a list of specs.
# This is not going to win any codegolf tournaments
already_mismatched = []
mismatched_dinputs = []
for spec, parsed_dinputs in parsed_data:
for spec_to_check, parsed_dinputs_to_check in parsed_data:
if spec == spec_to_check:
continue
for i, parsed_dinput in enumerate(parsed_dinputs):
# Use a list of already mismatched data to avoid duplicates
if parsed_dinput in already_mismatched:
continue
if parsed_dinput not in parsed_dinputs_to_check:
dinput = spec["dataset_inputs"][i]
mismatched_dinputs.append((dinput, spec))
already_mismatched.append(parsed_dinput)

res = []
# prepare output for plot_fancy
for dsin, spec in mismatched_dinputs:
res.append(
{
"dataset_input": dsin,
"dataset_name": dsin.name,
"theoryid": spec["theoryid"],
"pdfs": [i["pdf"] for i in dataspecs],
"fit": spec["fit"],
}
)
return res

def produce_matched_positivity_from_dataspecs(self, dataspecs):
"""Like produce_matched_datasets_from_dataspecs but for positivity datasets."""
self._check_dataspecs_type(dataspecs)
Expand All @@ -1014,7 +1061,6 @@ def produce_matched_positivity_from_dataspecs(self, dataspecs):
names = {(p.name): (p) for p in pos}
all_names.append(names)
used_set = set.intersection(*(set(d) for d in all_names))

res = []
for k in used_set:
inres = {"posdataset_name": k}
Expand Down
27 changes: 26 additions & 1 deletion validphys2/src/validphys/scripts/vp_comparefits.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def add_positional_arguments(self, parser):
help="Use LUX basis (which include the photon) for the report",
action='store_true',
)

parser.set_defaults()

def try_complete_args(self):
Expand Down Expand Up @@ -178,6 +177,7 @@ def get_commandline_arguments(self, cmdline=None):
args['config_yml'] = comparefittemplates.template_pol_path
else:
args['config_yml'] = comparefittemplates.template_path

return args

def complete_mapping(self):
Expand Down Expand Up @@ -223,8 +223,16 @@ def complete_mapping(self):
'unpolarized_bc': {'from_': 'positivity_bound'},
}
)
are_the_same = self.check_identical_theory_cuts_covmat()
if are_the_same:
log.info("Adding mismatched datasets page: identical theory, data cuts and covmat detected")
else:
autosettings["mismatched_information"] = {
"template_text": "Mismatched datasets cannot be shown due to cuts theory, data cuts and/or covmat not being identical"
}
return autosettings


def get_config(self):
self.try_complete_args()
# No error handling here because this is our internal file
Expand All @@ -234,6 +242,23 @@ def get_config(self):
c = yaml_safe.load(f)
c.update(self.complete_mapping())
return self.config_class(c, environment=self.environment)

def check_identical_theory_cuts_covmat(self):
"""
Checks whether the theory ID, data cuts, and thcovmat are the same between the two fits.
In the affirmative case, a mismatched datasets page will be added to the report.
"""
args = self.args
l = self.environment.loader
current_runcard = l.check_fit(args['current_fit']).as_input()
reference_runcard = l.check_fit(args['reference_fit']).as_input()

current_thcovmat = current_runcard.get("theorycovmatconfig")
reference_thcovmat = reference_runcard.get("theorycovmatconfig")
same_theoryid = current_runcard.get("theory", {}).get("theoryid") == reference_runcard.get("theory", {}).get("theoryid")
same_datacuts = current_runcard.get("datacuts") == reference_runcard.get("datacuts")
same_thcovmat = (current_thcovmat == reference_thcovmat)
return same_theoryid and same_datacuts and same_thcovmat


def main():
Expand Down
Loading