import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
plt.rcParams["figure.figsize"] = [5, 5]
def getMockData(x_range, y_range):
"""The additional exponentiation of the counts is to still have the nice
shapes even though we need to take the logarithm for the actual data
to display nicely.
"""
gauss = lambda x, mu, sigma: np.exp(-(x-mu)**2/2/sigma**2)
# In Z->nunu sample.
# sig1 = lambda x: gauss(x, 0.75, .1)
# bkg1 = lambda x: .04 - .02*x + .5*gauss(x, .4, .2) + gauss(x, 0, .07)
sig1 = lambda x: 1 / ((1.05 - x )**2) #+ 100 * np.exp(gauss(x, 1.0, .1))
tmp1 = lambda x: 1 / ((0.05 + x ) * (1.01 - x )) + np.exp(2*x)
bkg1 = lambda x: tmp1(x) + 1e-4 #- tmp1(0)*x/(0.05 + x )
# In Z->e+e- sample.
# sig_y = lambda y: gauss(y, .75, .07)
sig_y = lambda x: 1
sig_2d = lambda x, y: sig1(x) * sig_y(y)
gauss2d = lambda x, y, x0, y0, sx, sy, rho: np.exp(
-.5*((x-x0)**2/sx**2 + 2*(x-x0)*(y-y0)/sx/sy*rho + (y-y0)**2/sy**2))
bkg_2d = lambda x, y: (
+ .2/(x*y + .1)
+ np.maximum(0,
3*gauss2d(x, y, .6, .15, .5, .2, .7)
+ 2*gauss2d(x, y, .25, .75, .15, .2, -.3))
)
#s1d = np.exp(sig1(x_range))
s1d = sig1(x_range)
b1d = bkg1(x_range)
s1d = s1d / s1d.sum() * 1e4
b1d = b1d / b1d.sum() * 1.3e6
# Build the 2D hist.
X, Y = np.meshgrid(x_range, y_range)
s2d = sig_2d(X, Y)
b2d = bkg_2d(X, Y)
s2d = s2d / s2d.sum() * 1e4
b2d = b2d / b2d.sum() * 1.3e6
return s1d, b1d, s2d, b2d
mock_data = False
# Some tuning parameters.
default_cut_x = .6
x_range = np.linspace(0, 1, 20, endpoint=False)
y_range = np.linspace(0, 1, 20, endpoint=False)
X, Y = np.meshgrid(x_range, y_range)
if mock_data:
s1d, b1d, s2d, b2d = getMockData(x_range, y_range)
y_label = "arbirtary event counts"
else:
from higgsstrahlung_adapter import getData
s1d, b1d, s2d, b2d = getData(x_range, y_range)
y_label = "N$_\mathrm{ev}$ / 250 ifb"
Rather loose preselection. The signal purity after this preselection is O(1%).
When searching for the $ZH \rightarrow \nu\bar{\nu} H$ signal the assumption is that everything that is visible in the event stems from the Higgs decay.
e.g. # charged Hadrons, $M_\textrm{vis} \widehat{=} M_H$, $p_T$ of leading lepton, ...
Note that the probability density functions are shown - Normalization and shape do not correspond to what is expected to be found in the data.
def draw1D(ax, x, label_and_y, cut_x=0, xy_flipped=False, lim=None):
max_y = 0
for label, y in label_and_y:
y
max_y = max(max_y, max(y))
if xy_flipped:
ax.plot(y, x, label=label)
ax.fill_betweenx(x, y, where=x>=cut_x, alpha=.5)
else:
ax.plot(x, y, label=label)
ax.fill_between(x, y, where=x>=cut_x, alpha=.5)
if xy_flipped:
ax.hlines(cut_x, 0, max_y, color="black", ls="--", label="cut")
ax.set_xlabel(y_label)
ax.set_xscale("log")
if lim:
ax.set_xlim(lim)
else:
ax.vlines(cut_x, 0, max_y, color="black", ls="--", label="cut")
ax.set_ylabel(y_label)
ax.set_yscale("log")
if lim:
ax.set_ylim(lim)
def drawNu(ax, cut_x=0):
x = x_range
label_and_y = [("sig", s1d), ("bkg", b1d)]
draw1D(ax, x, label_and_y, cut_x)
ax.set_xlabel("BDT$_\mathrm{H}$")
ax.set_title("$Z \\rightarrow \\nu\\bar{\\nu}$")
plt.legend()
fig_nu, ax_nu = plt.subplots()
drawNu(ax_nu, cut_x=default_cut_x)
fig_nu.savefig("fig/BDT_H_nu.png", facecolor="white", dpi=300)
In the analyis, both $Z \rightarrow e^+e^-$ and $Z \rightarrow \mu^+\mu^-$ events are used in the reference sample. This sample is used to estimate the (signal) efficiency of the cut on the BDT output (see above).
After seperating the (presumed) Higgs and Z boson remnants (and a again a preselection), the distribution in the BDT output variable can be shown. Under the assumption that the seperation step worked well, the signal distribution has the same shape as in the $Z\rightarrow \nu\bar\nu$ Higgsstrahlung case. The relative normalization of the two signal curves is determined by the Z boson branching ratios.
fig_eH, ax_eH = plt.subplots()
label_and_y = [
("sig", np.sum(s2d, axis=1)),
("bkg", np.sum(b2d, axis=1)),
]
draw1D(ax_eH, x_range, label_and_y, cut_x=default_cut_x)
ax_eH.set_xlabel("BDT$_\mathrm{H}$")
ax_eH.set_title("$Z \\rightarrow e^+ e^-$")
plt.legend()
plt.savefig("fig/BDT_H_e1.png", facecolor="white", dpi=300)
In contrast to the $Z \rightarrow \nu \bar\nu$ sample we now have additional information from the Z decay available.
So far this information is used to perform a strong preselection. It is hard to establish a notion of a good preselection in this way. In a counting experiment we would want to optimize the selection such that it yields a high efficiency $\times$ purity and thus the smallest possible poissonian uncertainty.
In this analysis we are interested in minimizing the uncertainty on the Higgstrahlung cross section $\sigma_\mathrm{ZH}$(LCWS proceedings). With our approach, the dependency of $\sigma_\mathrm{ZH}$ on the BDT$_\mathrm{H}$ cut and the Z-based selection is more complicated. Optimizing with respect to efficiency $\times$ purity will not yield ideal results.
Employing a BDT for the Higgs-part selection means that we build a powerful one-dimensional distribution (as a representation of the space of the training variables). The idea is that the BDT output has a high seperating power between signal and background.
In the following we want to explore what we can do if we have a one-dimensional representation of the Z$(\rightarrow ee)$ part of the event, $f(\mathrm{Z})$. This is not necessarily a complex construct (e.g. BDT). A simpler proposal is:
$f(\mathrm{Z}) = \mathrm{Norm} \cdot (M_\textrm{ee} - 91)^2 + (M_\textrm{recoil} - 125)^2$.
An illustration of the 2-dimensional distribution of the signal in the $f(\mathrm{Z})-\mathrm{BDT}_\mathrm{H}$ space is shown below. The gaussian form is only chosen for simplicity and not expected. As the decays of the Higgs boson and the Z boson in the Higgsstrahlung event are independent, the uncorrelatedness of the two dimensions is a feature that should stay true with the signal distribution from data.
fig, ax_e_2d_signal = plt.subplots()
ax_e_2d_signal.contourf(x_range, y_range, s2d, ls="-")
ax_e_2d_signal.set_xlabel("BDT$_\mathrm{H}$")
ax_e_2d_signal.set_ylabel("$f(\mathrm{Z})$")
fig.savefig("fig/H_vs_Z_signal.png", facecolor="white", dpi=300)
The following figure is an artist's view of the 2-dimensional distributions of signal and background in the $Z \rightarrow e^+e^-$ sample.
The background distribution is shown as colored contour areas. The signal distribution is superimposed as contour lines.
def drawHZ(ax, cut_x=0, cut_y=0):
def logZ(z):
log_z = np.log(z)
log_z[log_z == -np.inf] = min(log_z[log_z != - np.inf])
return log_z
ax.set_xlabel("BDT$_\mathrm{H}$")
ax.set_ylabel("$f(\mathrm{Z})$")
ax.contourf(x_range, y_range, logZ(b2d))
z = logZ(s2d)
z[z < 1e-3*z.max()] = 0 # To avoid drawing artifacts from low statistics.
ax.contour(X, Y, z)
ax.fill_between(x_range, cut_y,
alpha=.75, facecolor="white", hatch="/", edgecolor="black")#"red")
ax.vlines(cut_x, cut_y, max(y_range), color="black", ls="--")
fig, ax_e_2d = plt.subplots()
drawHZ(ax_e_2d, 0, 0)
fig.savefig("fig/H_vs_Z_all.png", facecolor="white", dpi=300)
def allRegionsPlot(cut_x=0, cut_y=0):
fig = plt.figure(figsize=(7, 8))
grid = plt.GridSpec(5, 4, hspace=0.3, wspace=0.3)
ax_main = fig.add_subplot(grid[2:, :-1])
ax_x_pdf1 = fig.add_subplot(grid[0, :-1], sharex=ax_main)
ax_x_pdf2 = fig.add_subplot(grid[1, :-1], sharex=ax_main)
ax_y_pdf = fig.add_subplot(grid[2:, -1], sharey=ax_main)
plt.setp(ax_x_pdf1.get_xticklabels(), visible=False)
plt.setp(ax_x_pdf2.get_xticklabels(), visible=False)
plt.setp(ax_y_pdf.get_yticklabels(), visible=False)
drawHZ(ax_main, cut_x, cut_y)
nu_label_and_y = [("sig", s1d), ("bkg", b1d)]
idx_cut_x = np.argwhere(x_range >= cut_x)[0][0]
idx_cut_y = np.argwhere(y_range >= cut_y)[0][0]
eH_label_and_y = [
("sig", np.sum(s2d[idx_cut_y:, :], axis=0)),
("bkg", np.sum(b2d[idx_cut_y:, :], axis=0)),
]
eZ_label_and_y = [
("sig", np.sum(s2d[:, idx_cut_x:], axis=1)),
("bkg", np.sum(b2d[:, idx_cut_x:], axis=1)),
]
nu_lim = (.1, 1.5*b1d.max())
eH_lim = (.1, 1.5*np.sum(b2d, axis=0).max())
eZ_lim = (.1, 1.5*np.sum(b2d, axis=1).max())
draw1D(ax_x_pdf1, x_range, nu_label_and_y, cut_x, lim=nu_lim)
draw1D(ax_x_pdf2, x_range, eH_label_and_y, cut_x, lim=eH_lim)
draw1D(ax_y_pdf, y_range, eZ_label_and_y, cut_y, xy_flipped=True, lim=eZ_lim)
ax_x_pdf1.set_title("$Z \\rightarrow \\nu\\bar{\\nu}$", fontsize=8)
ax_x_pdf2.set_title("$Z \\rightarrow e^+e^-$", fontsize=8)
ax_y_pdf.set_title("$Z \\rightarrow e^+e^-$", fontsize=8)
ax_x_pdf1.legend(loc="upper left")
return fig
#for cut_x, cut_y in itertools.product(np.arange(0, 1, .1), np.arange(0, 1, .1)):
for cut_x, cut_y in [(.5, .5)]:
fig = allRegionsPlot(cut_x, cut_y)
fig.savefig(f"all_regions/x{cut_x:.2f}_x{cut_y:.2f}.png", facecolor="white", dpi=500)
#!convert -delay 100 -loop 0 -limit disk 4GiB all_regions/*_x0.2.png all_regions.gif
all_counts = []
for cut_x, cut_y in itertools.product(np.arange(0, 1, .1), np.arange(0, 1, .1)):
N = {}
N["H"] = cut_x
N["Z"] = cut_y
idx_cut_x = np.argwhere(x_range >= cut_x)[0][0]
idx_cut_y = np.argwhere(y_range >= cut_y)[0][0]
N["s_nu"] = np.sum(s1d[idx_cut_x:])
N["b_nu"] = np.sum(b1d[idx_cut_x:])
N["s_e1"] = np.sum(s2d[idx_cut_y:, idx_cut_x:])
N["b_e1"] = np.sum(b2d[idx_cut_y:, idx_cut_x:])
N["s_e1_not"] = np.sum(s2d[idx_cut_y:, :idx_cut_x])
N["b_e1_not"] = np.sum(b2d[idx_cut_y:, :idx_cut_x])
all_counts.append(N)
N = pd.DataFrame(all_counts)
N["counting_unc"] = ((N.s_nu + N.b_nu) / N.s_nu ** 2)**.5
N["efficiency_unc"] = (
(N.s_e1 + N.s_e1_not + N.b_e1 + N.b_e1_not) / (N.s_e1 + N.s_e1_not) ** 2
+ (N.s_e1 + N.b_e1) / N.s_e1 ** 2
- 2*(N.s_e1 + N.b_e1) / (N.s_e1 * (N.s_e1 + N.s_e1_not))
)**.5
N["cs_unc"] = (N["counting_unc"]**2 + N["efficiency_unc"]**2)**.5
print(N.counting_unc.min())
N
for unc in [
"counting_unc",
#"efficiency_unc",
#"cs_unc"
]:
(100*N[unc]).plot(label=unc)
plt.legend()
#plt.yscale("log")
plt.xticks(np.arange(0, len(N)+1, 10))
plt.grid()
#plt.ylim((0, 5))
unc = []
N_unc = []
eff_unc = []
for x, df in N.groupby("H"):
idx = df["cs_unc"].idxmin()
unc.append(N.iloc[idx]["cs_unc"])
eff_unc.append(N.iloc[idx]["efficiency_unc"])
N_unc.append(N.iloc[idx]["counting_unc"])
plt.plot(unc, label="unc")
plt.plot(N_unc, label="N_unc")
plt.plot(eff_unc, label="eff_unc")
plt.legend()