From 5abec7e8cbc09cea45f827e25391c722dbf34e92 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Tue, 6 Jul 2021 22:22:35 +0100 Subject: [PATCH 1/6] plotting for nonstandard dataset types --- fast_plotter/plotting.py | 134 +++++++++++++++++++++++++++++++-------- 1 file changed, 108 insertions(+), 26 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 5594caa..e8c1b8b 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import matplotlib.colors as mc import logging +import re logger = logging.getLogger(__name__) @@ -26,7 +27,7 @@ def change_brightness(color, amount): def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset", yscale="log", lumi=None, annotations=[], dataset_order=None, continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral", - figsize=None, **kwargs): + figsize=None, other_dset_types={}, **kwargs): figures = {} dimensions = utils.binning_vars(df) @@ -53,7 +54,7 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat plot = plot_1d_many(projected, data=data, signal=signal, dataset_col=dataset_col, scale_sims=lumi, colourmap=colourmap, dataset_order=dataset_order, - figsize=figsize, **kwargs + figsize=figsize, other_dset_args=other_dset_types, **kwargs ) figures[(("project", dim), ("yscale", yscale))] = plot except Exception as e: @@ -107,7 +108,8 @@ def get_colour(self, index=None, name=None): class FillColl(object): def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None, - colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None): + colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, + expected_xs=None, other_dset_args={}): self.calls = -1 self.expected_xs = expected_xs self.colors = ColorDict(n_colors=n_colors, order=dataset_order, @@ -117,6 +119,8 @@ def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=N self.fill = fill self.line = line self.linewidth = linewidth + self.other_dset_args = other_dset_args + self.dataset_colours=dataset_colours def pre_call(self, column): ax = self.ax @@ -129,16 +133,30 @@ def pre_call(self, column): def __call__(self, col, **kwargs): ax, x, y, color = self.pre_call(col) - if self.fill: + if self.fill and not self.other_dset_args: draw(ax, "fill_between", x=x, ys=["y1"], y1=y, label=col.name, expected_xs=self.expected_xs, linewidth=0, color=color, **kwargs) if self.line: if self.fill: - label = None - color = "k" - width = self.linewidth - style = "-" + if self.other_dset_args: + style = self.other_dset_args['style'] + label = col.name if self.other_dset_args['add_label'] else None + color = self.other_dset_args['colour'] if self.other_dset_args['colour'] and type(self.other_dset_args['colour']) != dict\ + else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ + else color + self.color=color + if type(color) == dict: + logger.warn(f"You didn't specify a colour for dataset '{col.name}'," + + f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." + + " Using black.") + color = "k" + width = self.linewidth + else: + style = "-" + label = None + color = "k" + width = self.linewidth else: color = None label = col.name @@ -162,7 +180,8 @@ def __call__(self, col, **kwargs): def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", - dataset_colours=None, colourmap="nipy_spectral", dataset_order=None): + dataset_colours=None, colourmap="nipy_spectral", + dataset_order=None, other_cfg_args={}): expected_xs = df.index.unique(x_axis).values if kind == "scatter": draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr], @@ -202,6 +221,29 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", y_up = (summed[y] + summed[yerr]).values draw(ax, "fill_between", x, ys=["y1", "y2"], y2=y_down, y1=y_up, color="gray", alpha=0.7, expected_xs=expected_xs) + elif kind == "other_dset_types": + if 'regex' not in other_cfg_args: + raise RuntimeError("Must specify a regex for other plotting datatype to be applied to") + options = ["alpha", "style", "width", "add_label", "add_error", "regex"] + alpha, style, width, add_label, add_error, regex = [other_cfg_args[key] for key in options] + filler = FillColl(n_datasets, ax=ax, fill=True, colourmap=colourmap, dataset_colours=dataset_colours, + dataset_order=dataset_order, expected_xs=expected_xs, linewidth=width, + other_dset_args=other_cfg_args) + vals.apply(filler, axis=0, step="mid") + if add_error: + for dset in list(set(df.reset_index()[dataset_col])): + if not re.compile(regex).match(dset): + continue + color = filler.color + if type(color) == dict: + logger.warn(f"You didn't specify a colour for dataset '{dset}'," + + f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." + + " Using black.") + color = "k" + dset_df = df.reset_index().loc[df.reset_index()[dataset_col] == dset].reset_index() + x = dset_df[x_axis] + draw(ax, "fill_between", x, ys=["y1", "y2"], y1=dset_df.eval("sumw+sqrt(sumw2)"), + y2=dset_df.eval("sumw-sqrt(sumw2)"), color=color, alpha=alpha, expected_xs=expected_xs) else: raise RuntimeError("Unknown value for 'kind', '{}'".format(kind)) @@ -330,7 +372,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", dataset_order=None, figsize=(5, 6), show_over_underflow=False, - dataset_colours=None, err_from_sumw2=False, data_legend="Data", **kwargs): + dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, **kwargs): y = "sumw" yvar = "sumw2" yerr = "err" @@ -352,13 +394,39 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", else: in_df_signal = None + config_extend = [] + if other_dset_args: + for dset_type in other_dset_args.keys(): + dset_type_labels = other_dset_args[dset_type]['regex'] + other_defaults = {"style": "-", "alpha": 0.2, "width": 1, + "colour": [], "dset_type": dset_type, "add_label": True, + "add_error": True, "plot_ratio": False} + default_specs = {key: val for key, val + in other_defaults.items() + if key not in other_dset_args[dset_type].keys()} + other_dset_args[dset_type].update(default_specs) + in_df_other, in_df_sims = utils.split_data_sims( + in_df_sims, data_labels=dset_type_labels, dataset_level=dataset_col) + config_extend.append((in_df_other, None, "other_dset_types", + dset_type_labels, "plot_other_dset", other_dset_args[dset_type])) + else: + in_df_other = None + + def_cfg_args = {"dset_type": ""} + config = [(in_df_sims, plot_sims, kind_sims, "Monte Carlo", "plot_sims", def_cfg_args), + (in_df_data, plot_data, kind_data, data_legend, "plot_data", def_cfg_args), + (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal", def_cfg_args), + ] + + config.extend(config_extend) + if in_df_data is None or in_df_sims is None: summary = None if not summary: - fig, main_ax = plt.subplots(1, 1, figsize=figsize) + fig, main_ax = plt.subplots(1, 1, figsize=[float(i) for i in figsize]) else: fig, ax = plt.subplots( - 2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True, figsize=figsize) + 2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True, figsize=[float(i) for i in figsize]) fig.subplots_adjust(hspace=.1) main_ax, summary_ax = ax @@ -370,18 +438,14 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", "Too few dimensions to multiple 1D graphs, use plot_1d instead") x_axis = x_axis[0] - config = [(in_df_sims, plot_sims, kind_sims, "Monte Carlo", "plot_sims"), - (in_df_data, plot_data, kind_data, data_legend, "plot_data"), - (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal"), - ] - for df, combine, style, label, var_name in config: + for df, combine, style, label, var_name, other_cfg_args in config: if df is None or len(df) == 0: continue merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2) actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, dataset_colours=dataset_colours, - colourmap=colourmap, dataset_order=dataset_order) + colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args) main_ax.set_xlabel(x_axis) if not summary: @@ -406,6 +470,22 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", plot_ratio(summed_data, summed_sims, x=x_axis, y=y, yerr=yerr, ax=summary_ax, error=error, ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"]) + if other_dset_args: + for df, combine, style, label, var_name, other_dset_args in config: + if (style == "other_dset_types") and (other_dset_args['plot_ratio']): + error = "both" + dset = other_dset_args['dset_type'] + color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour'] + if type(color) == dict: + raise ValueError(f"Please specify a color for dataset '{dset}'. Datasets specified are {dataset_colours.keys()}") + add_error = other_dset_args['add_error'] + summed_dset = _merge_datasets( + df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) + if summed_data is not None: + plot_ratio(summed_data, summed_dset, x=x_axis, + y=y, yerr=yerr, ax=summary_ax, error=error, zorder=21, + ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"], + color=color, add_error=add_error) else: raise RuntimeError(err_msg) return main_ax, summary_ax @@ -441,7 +521,8 @@ def plot_1d(df, kind="line", yscale="lin"): return fig -def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC"): +def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC", + color="k", zorder=22, add_error=True): # make sure both sides agree with the binning merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims") data = merged.filter(like="data", axis="columns").fillna(0) @@ -460,9 +541,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D mask = (central != 0) & (lower != 0) ax.errorbar(x=x_axis[mask], y=central[mask], yerr=(lower[mask], upper[mask]), fmt="o", markersize=4, color="k") - draw(ax, "errorbar", x_axis[mask], ys=["y", "yerr"], - y=central[mask], yerr=(lower[mask], upper[mask]), - fmt="o", markersize=4, color="k") + if add_error: + draw(ax, "errorbar", x_axis[mask], ys=["y", "yerr"], + y=central[mask], yerr=(lower[mask], upper[mask]), + fmt="o", markersize=4, color="gray", zorder=zorder-1) elif error == "both": ratio = d / s @@ -471,10 +553,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D draw(ax, "errorbar", x_axis, ys=["y", "yerr"], y=ratio, yerr=rel_d_err, - fmt="o", markersize=4, color="k") - draw(ax, "fill_between", x_axis, ys=["y1", "y2"], - y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, - color="gray", alpha=0.7) + fmt="o", markersize=4, color=color, zorder=zorder) + if add_error: + draw(ax, "fill_between", x_axis, ys=["y1", "y2"], color="gray", + y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, alpha=0.7, zorder=zorder-1) ax.set_ylim(ylim) ax.grid(True) From 3fd31afb2f7b483afc6dfb4b3bb95bc2c6601eed Mon Sep 17 00:00:00 2001 From: David Anthony Date: Tue, 6 Jul 2021 22:49:46 +0100 Subject: [PATCH 2/6] clean up --- fast_plotter/plotting.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index e8c1b8b..a97fc67 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -120,7 +120,7 @@ def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=N self.line = line self.linewidth = linewidth self.other_dset_args = other_dset_args - self.dataset_colours=dataset_colours + self.dataset_colours = dataset_colours def pre_call(self, column): ax = self.ax @@ -142,15 +142,11 @@ def __call__(self, col, **kwargs): if self.other_dset_args: style = self.other_dset_args['style'] label = col.name if self.other_dset_args['add_label'] else None - color = self.other_dset_args['colour'] if self.other_dset_args['colour'] and type(self.other_dset_args['colour']) != dict\ + color = self.other_dset_args['colour'] if self.other_dset_args['colour']\ else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ else color - self.color=color - if type(color) == dict: - logger.warn(f"You didn't specify a colour for dataset '{col.name}'," + - f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." - + " Using black.") - color = "k" + self.color = color + self.other_dset_args['colour'] = color width = self.linewidth else: style = "-" @@ -235,11 +231,6 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", if not re.compile(regex).match(dset): continue color = filler.color - if type(color) == dict: - logger.warn(f"You didn't specify a colour for dataset '{dset}'," + - f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." - + " Using black.") - color = "k" dset_df = df.reset_index().loc[df.reset_index()[dataset_col] == dset].reset_index() x = dset_df[x_axis] draw(ax, "fill_between", x, ys=["y1", "y2"], y1=dset_df.eval("sumw+sqrt(sumw2)"), @@ -476,8 +467,6 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", error = "both" dset = other_dset_args['dset_type'] color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour'] - if type(color) == dict: - raise ValueError(f"Please specify a color for dataset '{dset}'. Datasets specified are {dataset_colours.keys()}") add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) From a0976d3df23b12745824ea2c381914e10e147c52 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 14:26:42 +0100 Subject: [PATCH 3/6] pep8 compliance --- fast_plotter/plotting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index a97fc67..1b5c879 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -143,8 +143,8 @@ def __call__(self, col, **kwargs): style = self.other_dset_args['style'] label = col.name if self.other_dset_args['add_label'] else None color = self.other_dset_args['colour'] if self.other_dset_args['colour']\ - else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ - else color + else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ + else color self.color = color self.other_dset_args['colour'] = color width = self.linewidth @@ -152,7 +152,7 @@ def __call__(self, col, **kwargs): style = "-" label = None color = "k" - width = self.linewidth + width = self.linewidth else: color = None label = col.name From 8f5663d77c9b78a88d3721ad7a7de8edc23fb209 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Thu, 15 Jul 2021 23:30:59 +0100 Subject: [PATCH 4/6] Oversight in parsing of colours --- fast_plotter/plotting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 1b5c879..d047dad 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -146,7 +146,7 @@ def __call__(self, col, **kwargs): else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ else color self.color = color - self.other_dset_args['colour'] = color + self.other_dset_args['tmp_colour'] = color width = self.linewidth else: style = "-" @@ -466,7 +466,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", if (style == "other_dset_types") and (other_dset_args['plot_ratio']): error = "both" dset = other_dset_args['dset_type'] - color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour'] + color = dataset_colours[dset] if dset in dataset_colours\ + else other_dset_args['colour'] if other_dset_args['colour'] + else other_dset_args['tmp_colour'] add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) From e1b313213ca4cf123ce41ecf8afaa03494b4901f Mon Sep 17 00:00:00 2001 From: David Anthony Date: Thu, 15 Jul 2021 23:34:47 +0100 Subject: [PATCH 5/6] syntax --- fast_plotter/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index d047dad..4a7009d 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -467,7 +467,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", error = "both" dset = other_dset_args['dset_type'] color = dataset_colours[dset] if dset in dataset_colours\ - else other_dset_args['colour'] if other_dset_args['colour'] + else other_dset_args['colour'] if other_dset_args['colour']\ else other_dset_args['tmp_colour'] add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( From 8a0c0e28373671b304f6056521d568f7cf734d93 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Mon, 19 Jul 2021 19:36:36 +0100 Subject: [PATCH 6/6] pep8 --- fast_plotter/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 4a7009d..d9b08e9 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -467,8 +467,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", error = "both" dset = other_dset_args['dset_type'] color = dataset_colours[dset] if dset in dataset_colours\ - else other_dset_args['colour'] if other_dset_args['colour']\ - else other_dset_args['tmp_colour'] + else other_dset_args['colour'] if other_dset_args['colour']\ + else other_dset_args['tmp_colour'] add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)