From c93b95817602ebd5109d5632723e534356887145 Mon Sep 17 00:00:00 2001 From: Andrew42 Date: Thu, 3 Nov 2022 18:22:28 -0500 Subject: [PATCH 1/5] Implement new clipping to avoid t2w crash --- topcoffea/modules/datacard_tools.py | 30 +++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/topcoffea/modules/datacard_tools.py b/topcoffea/modules/datacard_tools.py index 546f0dae4..c227096b5 100644 --- a/topcoffea/modules/datacard_tools.py +++ b/topcoffea/modules/datacard_tools.py @@ -15,7 +15,11 @@ from topcoffea.modules.paths import topcoffea_path import topcoffea.modules.eft_helper as efth -PRECISION = 6 # Decimal point precision in the text datacard output +PRECISION = 6 # Decimal point precision in the text datacard output +NOM_CLIP_SCALE = 1e-3 # When clipping negative yield bins, this is the ratio to the nominal yield used + +# np.set_printoptions(precision=8,sign=' ',floatmode='fixed') +np.set_printoptions(linewidth=100,formatter={'float': lambda x: f"{x:>+12.8f}"}) def prune_axis(h,axis,to_keep): """ Convenience method to remove all categories except for a selected subset.""" @@ -181,7 +185,8 @@ class DatacardMaker(): "o0pt": [0,100,200,400], "bl0pt": [0,100,200,400], "l0pt": [0,50,100,200], - "lj0pt": [0,150,250,500] + # "lj0pt": [0,150,250,500], + "lj0pt": [0,500], } YEARS = ["UL16","UL16APV","UL17","UL18"] @@ -807,22 +812,31 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): for sp_key,arr in data_sm.items(): data_obs += arr for base,v in decomposed_templates.items(): + # There should be only 1 sparse axis at this point, the systematics axis proc_name = f"{p}_{base}" col_width = max(len(proc_name),col_width) text_card_info[proc_name] = { "shapes": set(), "rate": -1 } - # There should be only 1 sparse axis at this point, the systematics axis + # Construct a positive non-zero scaled down version of the nominal yields + if len(v): + nz_nom_arr = np.abs(v[('nominal',)][0]*NOM_CLIP_SCALE) for sp_key,arr in v.items(): if crop_negative_bins: - negative_bin_mask = np.where( arr[0] < 0) # see where bins are negative - arr[0][negative_bin_mask] = np.zeros_like( arr[0][negative_bin_mask] ) # set those to zero + bin_mask = np.where( arr[0] < 0) # see where bins are negative + if self.verbose and np.sum(nz_nom_arr[bin_mask] > 0): + print(f"{' '*2}{proc_name}_{sp_key[0]}: {arr[0][bin_mask]} -> {nz_nom_arr[bin_mask]}") + print(f"{' '*6}{'Before:':<7} {arr[0]}") + arr[0][bin_mask] = nz_nom_arr[bin_mask] # replace negative values with non-zero values if arr[1] is not None: - arr[1][negative_bin_mask] = np.zeros_like( arr[1][negative_bin_mask] ) # if there's a sumw2 defined, that one's set to zero as well. Otherwise we will get 0 +/- something, which is compatible with negative - + # If there's a sumw2 defined, that one's clipped as well. + # Otherwise we will get 0 +/- something, which is compatible with + # negative + arr[1][bin_mask] = nz_nom_arr[bin_mask]**2 + if self.verbose and np.sum(nz_nom_arr[bin_mask] > 0): + print(f"{' '*6}{'After:':<7} {arr[0]}") syst = sp_key[0] - sum_arr = sum(arr[0]) if syst == "nominal" and base == "sm": if self.verbose: From 88cd668169d417f6633945627cb57d874edc72ff Mon Sep 17 00:00:00 2001 From: Andrew42 Date: Thu, 3 Nov 2022 18:24:35 -0500 Subject: [PATCH 2/5] restore proper lj0pt binning --- topcoffea/modules/datacard_tools.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/topcoffea/modules/datacard_tools.py b/topcoffea/modules/datacard_tools.py index c227096b5..f0734af4c 100644 --- a/topcoffea/modules/datacard_tools.py +++ b/topcoffea/modules/datacard_tools.py @@ -185,8 +185,7 @@ class DatacardMaker(): "o0pt": [0,100,200,400], "bl0pt": [0,100,200,400], "l0pt": [0,50,100,200], - # "lj0pt": [0,150,250,500], - "lj0pt": [0,500], + "lj0pt": [0,150,250,500], } YEARS = ["UL16","UL16APV","UL17","UL18"] From 0e1069bbe39a39c12e54864f52548e8a385c272e Mon Sep 17 00:00:00 2001 From: Andrew42 Date: Fri, 4 Nov 2022 12:01:38 -0500 Subject: [PATCH 3/5] Minor change to variable naming --- topcoffea/modules/datacard_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/topcoffea/modules/datacard_tools.py b/topcoffea/modules/datacard_tools.py index f0734af4c..72f50f760 100644 --- a/topcoffea/modules/datacard_tools.py +++ b/topcoffea/modules/datacard_tools.py @@ -810,9 +810,9 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): raise RuntimeError("filling obs data more than once!") for sp_key,arr in data_sm.items(): data_obs += arr - for base,v in decomposed_templates.items(): + for eft_term,v in decomposed_templates.items(): # There should be only 1 sparse axis at this point, the systematics axis - proc_name = f"{p}_{base}" + proc_name = f"{p}_{eft_term}" col_width = max(len(proc_name),col_width) text_card_info[proc_name] = { "shapes": set(), @@ -837,7 +837,7 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): print(f"{' '*6}{'After:':<7} {arr[0]}") syst = sp_key[0] sum_arr = sum(arr[0]) - if syst == "nominal" and base == "sm": + if syst == "nominal" and eft_term == "sm": if self.verbose: print(f"\t{proc_name:<12}: {sum_arr:.4f} {arr[0]}") if not self.use_real_data: From 30bec25e61cdd5d475b4fe64a620f0cd6fd11dae Mon Sep 17 00:00:00 2001 From: Andrew42 Date: Thu, 8 Dec 2022 13:45:40 -0600 Subject: [PATCH 4/5] Add handling of appl axis and modify clipping procedure --- topcoffea/modules/datacard_tools.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/topcoffea/modules/datacard_tools.py b/topcoffea/modules/datacard_tools.py index 72f50f760..8ffa68aef 100644 --- a/topcoffea/modules/datacard_tools.py +++ b/topcoffea/modules/datacard_tools.py @@ -431,6 +431,10 @@ def read(self,fpath): continue h = h.remove(to_remove,"sample") + # Integrate out the application region axis if its present + if "appl" in [x.name for x in h.sparse_axes()]: + h = h.integrate("appl",["isSR_2lSS","isSR_3l","isSR_4l"]) # This is pretty hardcoded right now, might want to fix + if not self.do_nuisance: # Remove all shape systematics h = prune_axis(h,"systematic",["nominal"]) @@ -794,6 +798,10 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): text_card_info = {} outf_root_name = os.path.join(self.out_dir,outf_root_name) with uproot.recreate(outf_root_name) as f: + # Get a reference for how many total events (ignoring signal processes) are in a given bin + ch_hist.set_sm() + ref_bins,ref_stats = ch_hist.remove(["data"]+list(self.SIGNALS),"sample").integrate("sample").integrate("systematic",["nominal"]).values(sumw2=True,overflow='all')[()] + np.sqrt(ref_stats,out=ref_stats) for p,wcs in selected_wcs.items(): proc_hist = ch_hist.integrate("sample",[p]) if self.verbose: @@ -818,9 +826,26 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): "shapes": set(), "rate": -1 } - # Construct a positive non-zero scaled down version of the nominal yields + # Construct a positive non-zero scaled down version of the nominal yields and + # check if any negative yield bins are 'large' if len(v): - nz_nom_arr = np.abs(v[('nominal',)][0]*NOM_CLIP_SCALE) + nom_arr = v[('nominal',)][0] + bin_mask = np.where( nom_arr < 0) + chk_arr = np.zeros_like(nom_arr) + np.divide(nom_arr,ref_bins,out=chk_arr,where=ref_bins != 0) + + # if np.sum(np.where(np.abs(chk_arr[bin_mask]) > 0.01)): + if np.sum(np.where(np.abs(nom_arr[bin_mask]) > ref_stats[bin_mask])): + diff_arr = ref_stats - np.abs(nom_arr) + print(f"ERROR: {proc_name} has bin with large negative contribution") + print(f"{' '*6}Reference: {ref_bins}") + print(f"{' '*6}Ref stats: {ref_stats}") + print(f"{' '*6}Nominal: {nom_arr}") + print(f"{' '*6}Diff: {diff_arr}") + # print(f"{' '*6}Ratio: {chk_arr}") + + nz_nom_arr = np.abs(nom_arr*NOM_CLIP_SCALE) + for sp_key,arr in v.items(): if crop_negative_bins: bin_mask = np.where( arr[0] < 0) # see where bins are negative From 69a7f29eee11debba37f9e678135a7dd93aa2504 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Wed, 28 Dec 2022 23:17:22 -0500 Subject: [PATCH 5/5] It seems the name of loop variable changed --- topcoffea/modules/datacard_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topcoffea/modules/datacard_tools.py b/topcoffea/modules/datacard_tools.py index 87a616a84..a1cdc2d4f 100644 --- a/topcoffea/modules/datacard_tools.py +++ b/topcoffea/modules/datacard_tools.py @@ -957,7 +957,7 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): hist_name = hist_name.replace(syst_base,split_syst) all_shapes.add(split_syst) text_card_info[proc_name]["shapes"].add(split_syst) - if base == "sm" and self.verbose: + if eft_term == "sm" and self.verbose: print(f"\tDecorrelate {p} for {syst_base} into {split_syst} ({syst.replace(syst_base,'')})") else: all_shapes.add(syst_base)