From 33db77cbd3cd85cb21fd7c9b5419a747eb3cc801 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Tue, 6 Jul 2021 16:01:54 +0100 Subject: [PATCH 01/31] Support for line annotations --- fast_plotter/__main__.py | 4 ++-- fast_plotter/plotting.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 54221e5..dc03c7a 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -140,12 +140,12 @@ def process_one_file(infile, args): def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, limits={}, xtickrotation=None, **kwargs): for main_ax, summary_ax in plots.values(): - add_annotations(annotations, main_ax) + add_annotations(annotations, main_ax, summary_ax) if yscale: main_ax.set_yscale(yscale) if ylabel: main_ax.set_ylabel(ylabel) - main_ax.legend(**legend) + main_ax.legend(**legend).set_zorder(20) main_ax.grid(True) main_ax.set_axisbelow(True) for axis, lims in limits.items(): diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 5594caa..7e4231a 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import matplotlib.colors as mc import logging +import re logger = logging.getLogger(__name__) @@ -422,9 +423,41 @@ def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_fr utils.calculate_error(df, do_rel_err=not err_from_sumw2) return df +def annotate_lines(cfg, main_ax, summary_ax): + linename = list(cfg.keys())[0] + annotDict = cfg[linename] + if 'values' not in annotDict.keys(): + raise(RuntimeError("Must provide values for line placement.")) + annotDefaults = {"style": "-", "alpha": 1, "width": 1.5, + "colour": 'k', "label": None, "vmin": 0, + "vmax": 1, "zorder": 10, "axes": ["main"]} + annotDict.update({key: value for key, value in annotDefaults.items() + if key not in annotDict.keys()}) + lineKeys = ['values', 'style', 'alpha', 'width', 'colour', 'label', 'vmin', 'vmax', 'zorder', 'axes'] + if set(annotDict.keys()).difference(set(lineKeys)): + logger.warn("Invalid parameter(s) given to line annotations. Options are {}".format(lineKeys)) + values, style, alpha, width, colour, label, vmin, vmax, zorder, axes = [annotDict[key] for key in lineKeys] + for axis in axes: + awidth = 0.6 * width if (axis == 'summary') else width + ax = main_ax if (str(axis) == 'main') else summary_ax if (str(axis) == 'summary') else None + if ax is None: + logger.warn("Axis must exist and either be 'main' or 'summary'. {} is None".format(axis)) + continue + for value in values: + value = float(value) + if 'hline' in linename: + ax.axhline(value, vmin, vmax, color=colour, label=label, + alpha=alpha, ls=style, lw=awidth, zorder=zorder) + if 'vline' in linename: + ax.axvline(value, vmin, vmax, color=colour, label=label, + alpha=alpha, ls=style, lw=awidth, zorder=zorder) + -def add_annotations(annotations, ax): +def add_annotations(annotations, ax, summary_ax=None): for cfg in annotations: + if list(filter(lambda key: re.match("(.*hline.*|.*vline.*)", key), cfg.keys())): + annotate_lines(cfg, ax, summary_ax) + continue cfg = cfg.copy() s = cfg.pop("text") xy = cfg.pop("position") From 5abec7e8cbc09cea45f827e25391c722dbf34e92 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Tue, 6 Jul 2021 22:22:35 +0100 Subject: [PATCH 02/31] plotting for nonstandard dataset types --- fast_plotter/plotting.py | 134 +++++++++++++++++++++++++++++++-------- 1 file changed, 108 insertions(+), 26 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 5594caa..e8c1b8b 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import matplotlib.colors as mc import logging +import re logger = logging.getLogger(__name__) @@ -26,7 +27,7 @@ def change_brightness(color, amount): def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset", yscale="log", lumi=None, annotations=[], dataset_order=None, continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral", - figsize=None, **kwargs): + figsize=None, other_dset_types={}, **kwargs): figures = {} dimensions = utils.binning_vars(df) @@ -53,7 +54,7 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat plot = plot_1d_many(projected, data=data, signal=signal, dataset_col=dataset_col, scale_sims=lumi, colourmap=colourmap, dataset_order=dataset_order, - figsize=figsize, **kwargs + figsize=figsize, other_dset_args=other_dset_types, **kwargs ) figures[(("project", dim), ("yscale", yscale))] = plot except Exception as e: @@ -107,7 +108,8 @@ def get_colour(self, index=None, name=None): class FillColl(object): def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None, - colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None): + colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, + expected_xs=None, other_dset_args={}): self.calls = -1 self.expected_xs = expected_xs self.colors = ColorDict(n_colors=n_colors, order=dataset_order, @@ -117,6 +119,8 @@ def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=N self.fill = fill self.line = line self.linewidth = linewidth + self.other_dset_args = other_dset_args + self.dataset_colours=dataset_colours def pre_call(self, column): ax = self.ax @@ -129,16 +133,30 @@ def pre_call(self, column): def __call__(self, col, **kwargs): ax, x, y, color = self.pre_call(col) - if self.fill: + if self.fill and not self.other_dset_args: draw(ax, "fill_between", x=x, ys=["y1"], y1=y, label=col.name, expected_xs=self.expected_xs, linewidth=0, color=color, **kwargs) if self.line: if self.fill: - label = None - color = "k" - width = self.linewidth - style = "-" + if self.other_dset_args: + style = self.other_dset_args['style'] + label = col.name if self.other_dset_args['add_label'] else None + color = self.other_dset_args['colour'] if self.other_dset_args['colour'] and type(self.other_dset_args['colour']) != dict\ + else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ + else color + self.color=color + if type(color) == dict: + logger.warn(f"You didn't specify a colour for dataset '{col.name}'," + + f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." + + " Using black.") + color = "k" + width = self.linewidth + else: + style = "-" + label = None + color = "k" + width = self.linewidth else: color = None label = col.name @@ -162,7 +180,8 @@ def __call__(self, col, **kwargs): def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", - dataset_colours=None, colourmap="nipy_spectral", dataset_order=None): + dataset_colours=None, colourmap="nipy_spectral", + dataset_order=None, other_cfg_args={}): expected_xs = df.index.unique(x_axis).values if kind == "scatter": draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr], @@ -202,6 +221,29 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", y_up = (summed[y] + summed[yerr]).values draw(ax, "fill_between", x, ys=["y1", "y2"], y2=y_down, y1=y_up, color="gray", alpha=0.7, expected_xs=expected_xs) + elif kind == "other_dset_types": + if 'regex' not in other_cfg_args: + raise RuntimeError("Must specify a regex for other plotting datatype to be applied to") + options = ["alpha", "style", "width", "add_label", "add_error", "regex"] + alpha, style, width, add_label, add_error, regex = [other_cfg_args[key] for key in options] + filler = FillColl(n_datasets, ax=ax, fill=True, colourmap=colourmap, dataset_colours=dataset_colours, + dataset_order=dataset_order, expected_xs=expected_xs, linewidth=width, + other_dset_args=other_cfg_args) + vals.apply(filler, axis=0, step="mid") + if add_error: + for dset in list(set(df.reset_index()[dataset_col])): + if not re.compile(regex).match(dset): + continue + color = filler.color + if type(color) == dict: + logger.warn(f"You didn't specify a colour for dataset '{dset}'," + + f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." + + " Using black.") + color = "k" + dset_df = df.reset_index().loc[df.reset_index()[dataset_col] == dset].reset_index() + x = dset_df[x_axis] + draw(ax, "fill_between", x, ys=["y1", "y2"], y1=dset_df.eval("sumw+sqrt(sumw2)"), + y2=dset_df.eval("sumw-sqrt(sumw2)"), color=color, alpha=alpha, expected_xs=expected_xs) else: raise RuntimeError("Unknown value for 'kind', '{}'".format(kind)) @@ -330,7 +372,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", dataset_order=None, figsize=(5, 6), show_over_underflow=False, - dataset_colours=None, err_from_sumw2=False, data_legend="Data", **kwargs): + dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, **kwargs): y = "sumw" yvar = "sumw2" yerr = "err" @@ -352,13 +394,39 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", else: in_df_signal = None + config_extend = [] + if other_dset_args: + for dset_type in other_dset_args.keys(): + dset_type_labels = other_dset_args[dset_type]['regex'] + other_defaults = {"style": "-", "alpha": 0.2, "width": 1, + "colour": [], "dset_type": dset_type, "add_label": True, + "add_error": True, "plot_ratio": False} + default_specs = {key: val for key, val + in other_defaults.items() + if key not in other_dset_args[dset_type].keys()} + other_dset_args[dset_type].update(default_specs) + in_df_other, in_df_sims = utils.split_data_sims( + in_df_sims, data_labels=dset_type_labels, dataset_level=dataset_col) + config_extend.append((in_df_other, None, "other_dset_types", + dset_type_labels, "plot_other_dset", other_dset_args[dset_type])) + else: + in_df_other = None + + def_cfg_args = {"dset_type": ""} + config = [(in_df_sims, plot_sims, kind_sims, "Monte Carlo", "plot_sims", def_cfg_args), + (in_df_data, plot_data, kind_data, data_legend, "plot_data", def_cfg_args), + (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal", def_cfg_args), + ] + + config.extend(config_extend) + if in_df_data is None or in_df_sims is None: summary = None if not summary: - fig, main_ax = plt.subplots(1, 1, figsize=figsize) + fig, main_ax = plt.subplots(1, 1, figsize=[float(i) for i in figsize]) else: fig, ax = plt.subplots( - 2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True, figsize=figsize) + 2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True, figsize=[float(i) for i in figsize]) fig.subplots_adjust(hspace=.1) main_ax, summary_ax = ax @@ -370,18 +438,14 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", "Too few dimensions to multiple 1D graphs, use plot_1d instead") x_axis = x_axis[0] - config = [(in_df_sims, plot_sims, kind_sims, "Monte Carlo", "plot_sims"), - (in_df_data, plot_data, kind_data, data_legend, "plot_data"), - (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal"), - ] - for df, combine, style, label, var_name in config: + for df, combine, style, label, var_name, other_cfg_args in config: if df is None or len(df) == 0: continue merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2) actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, dataset_colours=dataset_colours, - colourmap=colourmap, dataset_order=dataset_order) + colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args) main_ax.set_xlabel(x_axis) if not summary: @@ -406,6 +470,22 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", plot_ratio(summed_data, summed_sims, x=x_axis, y=y, yerr=yerr, ax=summary_ax, error=error, ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"]) + if other_dset_args: + for df, combine, style, label, var_name, other_dset_args in config: + if (style == "other_dset_types") and (other_dset_args['plot_ratio']): + error = "both" + dset = other_dset_args['dset_type'] + color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour'] + if type(color) == dict: + raise ValueError(f"Please specify a color for dataset '{dset}'. Datasets specified are {dataset_colours.keys()}") + add_error = other_dset_args['add_error'] + summed_dset = _merge_datasets( + df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) + if summed_data is not None: + plot_ratio(summed_data, summed_dset, x=x_axis, + y=y, yerr=yerr, ax=summary_ax, error=error, zorder=21, + ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"], + color=color, add_error=add_error) else: raise RuntimeError(err_msg) return main_ax, summary_ax @@ -441,7 +521,8 @@ def plot_1d(df, kind="line", yscale="lin"): return fig -def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC"): +def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC", + color="k", zorder=22, add_error=True): # make sure both sides agree with the binning merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims") data = merged.filter(like="data", axis="columns").fillna(0) @@ -460,9 +541,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D mask = (central != 0) & (lower != 0) ax.errorbar(x=x_axis[mask], y=central[mask], yerr=(lower[mask], upper[mask]), fmt="o", markersize=4, color="k") - draw(ax, "errorbar", x_axis[mask], ys=["y", "yerr"], - y=central[mask], yerr=(lower[mask], upper[mask]), - fmt="o", markersize=4, color="k") + if add_error: + draw(ax, "errorbar", x_axis[mask], ys=["y", "yerr"], + y=central[mask], yerr=(lower[mask], upper[mask]), + fmt="o", markersize=4, color="gray", zorder=zorder-1) elif error == "both": ratio = d / s @@ -471,10 +553,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D draw(ax, "errorbar", x_axis, ys=["y", "yerr"], y=ratio, yerr=rel_d_err, - fmt="o", markersize=4, color="k") - draw(ax, "fill_between", x_axis, ys=["y1", "y2"], - y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, - color="gray", alpha=0.7) + fmt="o", markersize=4, color=color, zorder=zorder) + if add_error: + draw(ax, "fill_between", x_axis, ys=["y1", "y2"], color="gray", + y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, alpha=0.7, zorder=zorder-1) ax.set_ylim(ylim) ax.grid(True) From 3fd31afb2f7b483afc6dfb4b3bb95bc2c6601eed Mon Sep 17 00:00:00 2001 From: David Anthony Date: Tue, 6 Jul 2021 22:49:46 +0100 Subject: [PATCH 03/31] clean up --- fast_plotter/plotting.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index e8c1b8b..a97fc67 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -120,7 +120,7 @@ def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=N self.line = line self.linewidth = linewidth self.other_dset_args = other_dset_args - self.dataset_colours=dataset_colours + self.dataset_colours = dataset_colours def pre_call(self, column): ax = self.ax @@ -142,15 +142,11 @@ def __call__(self, col, **kwargs): if self.other_dset_args: style = self.other_dset_args['style'] label = col.name if self.other_dset_args['add_label'] else None - color = self.other_dset_args['colour'] if self.other_dset_args['colour'] and type(self.other_dset_args['colour']) != dict\ + color = self.other_dset_args['colour'] if self.other_dset_args['colour']\ else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ else color - self.color=color - if type(color) == dict: - logger.warn(f"You didn't specify a colour for dataset '{col.name}'," + - f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." - + " Using black.") - color = "k" + self.color = color + self.other_dset_args['colour'] = color width = self.linewidth else: style = "-" @@ -235,11 +231,6 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", if not re.compile(regex).match(dset): continue color = filler.color - if type(color) == dict: - logger.warn(f"You didn't specify a colour for dataset '{dset}'," + - f" and dataset was not found in 'dataset_colours', with keys {color.keys()}." - + " Using black.") - color = "k" dset_df = df.reset_index().loc[df.reset_index()[dataset_col] == dset].reset_index() x = dset_df[x_axis] draw(ax, "fill_between", x, ys=["y1", "y2"], y1=dset_df.eval("sumw+sqrt(sumw2)"), @@ -476,8 +467,6 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", error = "both" dset = other_dset_args['dset_type'] color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour'] - if type(color) == dict: - raise ValueError(f"Please specify a color for dataset '{dset}'. Datasets specified are {dataset_colours.keys()}") add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) From a0976d3df23b12745824ea2c381914e10e147c52 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 14:26:42 +0100 Subject: [PATCH 04/31] pep8 compliance --- fast_plotter/plotting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index a97fc67..1b5c879 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -143,8 +143,8 @@ def __call__(self, col, **kwargs): style = self.other_dset_args['style'] label = col.name if self.other_dset_args['add_label'] else None color = self.other_dset_args['colour'] if self.other_dset_args['colour']\ - else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ - else color + else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ + else color self.color = color self.other_dset_args['colour'] = color width = self.linewidth @@ -152,7 +152,7 @@ def __call__(self, col, **kwargs): style = "-" label = None color = "k" - width = self.linewidth + width = self.linewidth else: color = None label = col.name From cfd2c38fc1512a7b8c719caf21de8b83a9ee377e Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 14:29:32 +0100 Subject: [PATCH 05/31] pep8 compliance --- fast_plotter/plotting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 7e4231a..d3c390b 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -423,6 +423,7 @@ def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_fr utils.calculate_error(df, do_rel_err=not err_from_sumw2) return df + def annotate_lines(cfg, main_ax, summary_ax): linename = list(cfg.keys())[0] annotDict = cfg[linename] From e4600ca0b12bbc572b74edf96bcff23cb9ba87e6 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 19:18:21 +0100 Subject: [PATCH 06/31] Optional autoscaling of axes --- fast_plotter/__main__.py | 80 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 54221e5..33bcc39 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -5,12 +5,15 @@ import six import logging import matplotlib +import numpy as np +import numbers +import pandas as pd matplotlib.use('Agg') matplotlib.rcParams.update({'figure.autolayout': True}) from .version import __version__ # noqa -from .utils import read_binned_df, weighting_vars # noqa +from .utils import read_binned_df, weighting_vars, binning_vars # noqa from .utils import decipher_filename, mask_rows # noqa -from .plotting import plot_all, add_annotations # noqa +from .plotting import plot_all, add_annotations, is_intervals # noqa logger = logging.getLogger("fast_plotter") @@ -100,11 +103,69 @@ def recursive_replace(value, replacements): return args +def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2): + if hasattr(args, "autoscale"): + if len(df_filtered.index.names) > 2: + logger.warn("Autoscaling not supported for multi-index dataframes") + limits = args.limits + else: + xcol = df_filtered.index.get_level_values(1) + if 'y' in args.autoscale: + if weight == "n": + max_y = df_filtered['sumw'].max() + else: + max_mc = df_filtered.loc[mc_rows, 'sumw'].max()*args.lumi + max_data = df_filtered.loc[data_rows, 'n'].max() if 'n' in df_filtered.columns else 0.1 + max_y = max(max_mc, max_data) + max_y = max_y if max_y >= 1 else 1 + if args.yscale == 'log': + ylim_upper_floor = int(np.floor(np.log10(max_y))) + y_buffer = (legend_size + 1 if ylim_upper_floor > 3 + else legend_size if ylim_upper_floor > 2 + else legend_size) # Buffer for legend + ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) + ylim_lower = 1e-1 + else: + buffer_factor = 1 + 0.5*legend_size + ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y))))) # Buffer for legend + ylim = [ylim_lower, ylim_upper] + df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi] + else: + if 'limits' in args: + ylim = args.limits['y'] if 'y' in args.limits else None + else: + ylim = None + df_aboveMin = df_filtered.copy() + if 'x' in args.autoscale: # Determine x-axis limits + if is_intervals(xcol): # If x-axis is interval, take right and leftmost intervals unless they are inf + max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max() + min_x = xcol.left.min() if np.isfinite(xcol.left.min()) else xcol.right.min() + if not np.isfinite(max_x) and hasattr(args, "show_over_underflow") and args.show_over_underflow: + logger.warn("Cannot autoscale overflow bin for x-axis. Removing.") + xlim = [min_x, max_x] + elif isinstance(xcol, numbers.Number): + xlim = [xcol.min, xcol.max] + else: + xlim = [-0.5, len(xcol.unique()) - 0.5] # For non-numeric x-axis (e.g. mtn range) + else: + if 'limits' in args: + xlim = args.limits['x'] if 'x' in args.limits else None + else: + xlim = None + + xlim = None if xlim is not None and np.NaN in xlim else xlim + ylim = None if ylim is not None and np.NaN in ylim else ylim + limits = {"x": xlim, "y": ylim} + else: + limits = args.limits if 'limits' in args else {} + return limits + def process_one_file(infile, args): logger.info("Processing: " + infile) df = read_binned_df(infile, dtype={args.dataset_col: str}) weights = weighting_vars(df) + legend_size = args.legend_size if hasattr(args, "legend_size") else 2 ran_ok = True for weight in weights: if args.weights and weight not in args.weights: @@ -115,13 +176,13 @@ def process_one_file(infile, args): df_filtered["sumw2"] = df_filtered.n else: if "n" in df.columns: - data_rows = mask_rows(df_filtered, - regex=args.data, - level=args.dataset_col) + data_rows_ungrouped = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) for col in df_filtered.columns: if col == "n": continue - df_filtered.loc[data_rows, col] = df["n"][data_rows] + df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped] df_filtered.columns = [ n.replace(weight + ":", "") for n in df_filtered.columns] if hasattr(args, "value_replacements"): @@ -130,8 +191,15 @@ def process_one_file(infile, args): continue df_filtered.rename(replacements, level=column, inplace=True, axis="index") df_filtered = df_filtered.groupby(level=df.index.names).sum() + data_rows = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) + mc_rows = mask_rows(df_filtered, + regex="^((?!"+args.data+").)*$", + level=args.dataset_col) plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok + args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size) dress_main_plots(plots, **vars(args)) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From a1043b48f3231056a6c3b2550c22629dbfa58671 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 19:23:23 +0100 Subject: [PATCH 07/31] pep8 compliance --- fast_plotter/__main__.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 33bcc39..edd52c0 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -103,6 +103,7 @@ def recursive_replace(value, replacements): return args + def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2): if hasattr(args, "autoscale"): if len(df_filtered.index.names) > 2: @@ -119,12 +120,12 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0 max_y = max(max_mc, max_data) max_y = max_y if max_y >= 1 else 1 if args.yscale == 'log': - ylim_upper_floor = int(np.floor(np.log10(max_y))) - y_buffer = (legend_size + 1 if ylim_upper_floor > 3 - else legend_size if ylim_upper_floor > 2 - else legend_size) # Buffer for legend - ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) - ylim_lower = 1e-1 + ylim_upper_floor = int(np.floor(np.log10(max_y))) + y_buffer = (legend_size + 1 if ylim_upper_floor > 3 + else legend_size if ylim_upper_floor > 2 + else legend_size) # Buffer for legend + ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) + ylim_lower = 1e-1 else: buffer_factor = 1 + 0.5*legend_size ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y))))) # Buffer for legend @@ -199,7 +200,7 @@ def process_one_file(infile, args): level=args.dataset_col) plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok - args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size) + args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size) dress_main_plots(plots, **vars(args)) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From 8cffd4e49f5d2c6ead03e1dd30ceb70e881ace9e Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 19:36:45 +0100 Subject: [PATCH 08/31] pep8, fix xscaling --- fast_plotter/__main__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index edd52c0..cd13b18 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -7,7 +7,6 @@ import matplotlib import numpy as np import numbers -import pandas as pd matplotlib.use('Agg') matplotlib.rcParams.update({'figure.autolayout': True}) from .version import __version__ # noqa @@ -110,7 +109,6 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0 logger.warn("Autoscaling not supported for multi-index dataframes") limits = args.limits else: - xcol = df_filtered.index.get_level_values(1) if 'y' in args.autoscale: if weight == "n": max_y = df_filtered['sumw'].max() @@ -137,6 +135,7 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0 else: ylim = None df_aboveMin = df_filtered.copy() + xcol = df_aboveMin.index.get_level_values(1) if 'x' in args.autoscale: # Determine x-axis limits if is_intervals(xcol): # If x-axis is interval, take right and leftmost intervals unless they are inf max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max() From 0298d90805d1e652545abe6ce0e3db3414743e47 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 9 Jul 2021 11:19:49 +0100 Subject: [PATCH 09/31] Cleaning up --- fast_plotter/__main__.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index cd13b18..00c4c4a 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -103,11 +103,17 @@ def recursive_replace(value, replacements): return args -def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2): +def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2): if hasattr(args, "autoscale"): + data_rows = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) + mc_rows = mask_rows(df_filtered, + regex="^((?!"+args.data+").)*$", + level=args.dataset_col) if len(df_filtered.index.names) > 2: logger.warn("Autoscaling not supported for multi-index dataframes") - limits = args.limits + limits = args.limits if 'limits' in args else {} else: if 'y' in args.autoscale: if weight == "n": @@ -176,13 +182,13 @@ def process_one_file(infile, args): df_filtered["sumw2"] = df_filtered.n else: if "n" in df.columns: - data_rows_ungrouped = mask_rows(df_filtered, - regex=args.data, - level=args.dataset_col) + data_rows = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) for col in df_filtered.columns: if col == "n": continue - df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped] + df_filtered.loc[data_rows, col] = df["n"][data_rows] df_filtered.columns = [ n.replace(weight + ":", "") for n in df_filtered.columns] if hasattr(args, "value_replacements"): @@ -191,15 +197,9 @@ def process_one_file(infile, args): continue df_filtered.rename(replacements, level=column, inplace=True, axis="index") df_filtered = df_filtered.groupby(level=df.index.names).sum() - data_rows = mask_rows(df_filtered, - regex=args.data, - level=args.dataset_col) - mc_rows = mask_rows(df_filtered, - regex="^((?!"+args.data+").)*$", - level=args.dataset_col) plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok - args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size) + args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size) dress_main_plots(plots, **vars(args)) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From 2d593d959931f3ca275d7929ca93fff16e446fa7 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 9 Jul 2021 11:59:20 +0100 Subject: [PATCH 10/31] poissonian error for 0 yield --- fast_plotter/plotting.py | 11 ++++++----- fast_plotter/utils.py | 6 +++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 5594caa..e37e7dd 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -374,10 +374,11 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", (in_df_data, plot_data, kind_data, data_legend, "plot_data"), (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal"), ] + kwargs.setdefault("is_null_poissonian", False) for df, combine, style, label, var_name in config: if df is None or len(df) == 0: continue - merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2) + merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian']) actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, dataset_colours=dataset_colours, @@ -392,9 +393,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", if summary.startswith("ratio"): main_ax.set_xlabel("") summed_data = _merge_datasets( - in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) + in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian']) summed_sims = _merge_datasets( - in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) + in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian']) if summary == "ratio-error-both": error = "both" elif summary == "ratio-error-markers": @@ -411,7 +412,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", return main_ax, summary_ax -def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False): +def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False, is_null_poissonian=False): if style == "stack": df = utils.stack_datasets(df, dataset_level=dataset_col) elif style == "sum": @@ -419,7 +420,7 @@ def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_fr elif style: msg = "'{}' must be either 'sum', 'stack' or None. Got {}" raise RuntimeError(msg.format(param_name, style)) - utils.calculate_error(df, do_rel_err=not err_from_sumw2) + utils.calculate_error(df, do_rel_err=not err_from_sumw2, is_null_poissonian=is_null_poissonian) return df diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index b765452..d845d96 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -91,7 +91,7 @@ def split_data_sims(df, data_labels=["data"], dataset_level="dataset"): return split_df(df, first_values=data_labels, level=dataset_level) -def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_rel_err=True): +def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_rel_err=True, is_null_poissonian=False): if not inplace: df = df.copy() if do_rel_err: @@ -105,6 +105,10 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r elif not do_rel_err and sumw2_label in column: err_name = column.replace(sumw2_label, err_label) df[err_name] = np.sqrt(df[column]) + if is_null_poissonian: + print(err_name) + print(df.loc[df[err_name]<=0]) + df[err_name] = df[err_name].apply(lambda x: x if x > 0 else 1.15) if not inplace: return df From 18064d190c233ca0502c498b064c59083b3d39d7 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 9 Jul 2021 12:23:10 +0100 Subject: [PATCH 11/31] Minimum error is 1.15 for n >=0 --- fast_plotter/utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index d845d96..ee3c53b 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -102,13 +102,12 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r errs = np.true_divide(df[column], root_n) errs.loc[~np.isfinite(errs)] = np.nan df[err_name] = errs - elif not do_rel_err and sumw2_label in column: + else: + #elif not do_rel_err and sumw2_label in column: err_name = column.replace(sumw2_label, err_label) df[err_name] = np.sqrt(df[column]) - if is_null_poissonian: - print(err_name) - print(df.loc[df[err_name]<=0]) - df[err_name] = df[err_name].apply(lambda x: x if x > 0 else 1.15) + if is_null_poissonian: + df[err_name] = df[err_name].apply(lambda x: x if x > 1.15 else np.sqrt(1.15**2+x**2)) if not inplace: return df From 25d87bb481aff6400b78845d0ac81f0c04b1f04a Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 9 Jul 2021 12:51:10 +0100 Subject: [PATCH 12/31] pep8 --- fast_plotter/plotting.py | 12 ++++++++---- fast_plotter/utils.py | 6 ++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index e37e7dd..15a0ef4 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -378,7 +378,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", for df, combine, style, label, var_name in config: if df is None or len(df) == 0: continue - merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian']) + merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2, + is_null_poissonian=kwargs['is_null_poissonian']) actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, dataset_colours=dataset_colours, @@ -393,9 +394,11 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", if summary.startswith("ratio"): main_ax.set_xlabel("") summed_data = _merge_datasets( - in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian']) + in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, + is_null_poissonian=kwargs['is_null_poissonian']) summed_sims = _merge_datasets( - in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian']) + in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, + is_null_poissonian=kwargs['is_null_poissonian']) if summary == "ratio-error-both": error = "both" elif summary == "ratio-error-markers": @@ -412,7 +415,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", return main_ax, summary_ax -def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False, is_null_poissonian=False): +def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False, + is_null_poissonian=False): if style == "stack": df = utils.stack_datasets(df, dataset_level=dataset_col) elif style == "sum": diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index ee3c53b..3b30e2a 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -102,10 +102,12 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r errs = np.true_divide(df[column], root_n) errs.loc[~np.isfinite(errs)] = np.nan df[err_name] = errs - else: - #elif not do_rel_err and sumw2_label in column: + elif not do_rel_err and sumw2_label in column: err_name = column.replace(sumw2_label, err_label) df[err_name] = np.sqrt(df[column]) + else: + err_name = "" + continue if is_null_poissonian: df[err_name] = df[err_name].apply(lambda x: x if x > 1.15 else np.sqrt(1.15**2+x**2)) if not inplace: From 4e1dc4ad17ca73ce49939e5a7e17ddfd640b0c1f Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 9 Jul 2021 12:53:55 +0100 Subject: [PATCH 13/31] clean up --- fast_plotter/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index 3b30e2a..1cfbf57 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -106,7 +106,6 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r err_name = column.replace(sumw2_label, err_label) df[err_name] = np.sqrt(df[column]) else: - err_name = "" continue if is_null_poissonian: df[err_name] = df[err_name].apply(lambda x: x if x > 1.15 else np.sqrt(1.15**2+x**2)) From 8f5663d77c9b78a88d3721ad7a7de8edc23fb209 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Thu, 15 Jul 2021 23:30:59 +0100 Subject: [PATCH 14/31] Oversight in parsing of colours --- fast_plotter/plotting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 1b5c879..d047dad 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -146,7 +146,7 @@ def __call__(self, col, **kwargs): else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\ else color self.color = color - self.other_dset_args['colour'] = color + self.other_dset_args['tmp_colour'] = color width = self.linewidth else: style = "-" @@ -466,7 +466,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", if (style == "other_dset_types") and (other_dset_args['plot_ratio']): error = "both" dset = other_dset_args['dset_type'] - color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour'] + color = dataset_colours[dset] if dset in dataset_colours\ + else other_dset_args['colour'] if other_dset_args['colour'] + else other_dset_args['tmp_colour'] add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) From e1b313213ca4cf123ce41ecf8afaa03494b4901f Mon Sep 17 00:00:00 2001 From: David Anthony Date: Thu, 15 Jul 2021 23:34:47 +0100 Subject: [PATCH 15/31] syntax --- fast_plotter/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index d047dad..4a7009d 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -467,7 +467,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", error = "both" dset = other_dset_args['dset_type'] color = dataset_colours[dset] if dset in dataset_colours\ - else other_dset_args['colour'] if other_dset_args['colour'] + else other_dset_args['colour'] if other_dset_args['colour']\ else other_dset_args['tmp_colour'] add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( From c88e7eddeb53a782874a409696697b5f8a90360b Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 23 Jul 2021 11:06:20 +0100 Subject: [PATCH 16/31] flake8 --- fast_plotter/__main__.py | 2 +- fast_plotter/plotting.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 63a202c..50f11b7 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -222,7 +222,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, if axis.lower() in "xy": getattr(main_ax, "set_%slim" % axis)(*lims) elif lims is None: - continue + continue elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) if xtickrotation: diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 812b878..dfc8c27 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -472,8 +472,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", error = "both" dset = other_dset_args['dset_type'] color = dataset_colours[dset] if dset in dataset_colours\ - else other_dset_args['colour'] if other_dset_args['colour']\ - else other_dset_args['tmp_colour'] + else other_dset_args['colour'] if other_dset_args['colour']\ + else other_dset_args['tmp_colour'] add_error = other_dset_args['add_error'] summed_dset = _merge_datasets( df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) From d3172c6acad56119e0294092e2f382bb87139798 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 23 Jul 2021 11:10:31 +0100 Subject: [PATCH 17/31] flake8 --- fast_plotter/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 50f11b7..24a96e3 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -222,7 +222,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, if axis.lower() in "xy": getattr(main_ax, "set_%slim" % axis)(*lims) elif lims is None: - continue + continue elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) if xtickrotation: From 540bac6222622cdc6d661c52c9bc1e0ff4992c31 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Thu, 29 Jul 2021 19:23:00 +0200 Subject: [PATCH 18/31] Add 'apply if' function to postproc stages --- fast_plotter/postproc/__main__.py | 10 +++++++++- fast_plotter/postproc/stages.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py index 2a513f6..f0b519e 100644 --- a/fast_plotter/postproc/__main__.py +++ b/fast_plotter/postproc/__main__.py @@ -86,9 +86,17 @@ def main(args=None): sequence = read_processing_cfg(args.post_process, args.outdir) + apply_if = lambda df, stage: eval(str(stage.apply_if)) + for stage in sequence: logger.info("Working on %d dataframes", len(dfs)) - dfs = stage(dfs) + if stage.apply_if: + apply_to = [apply_if(df[0], stage) for df in dfs] + if not all(apply_to): + logger.info(f"Skipping stage '{stage.name}' for invalid dataframes") + dfs = [stage(df) if apply_to[idx] else df for idx, df in enumerate(dfs)] + else: + dfs = stage(dfs) if debug: dump_debug_df(dfs, args.debug_dfs_query, args.debug_rows) diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py index 806ffcb..bc31f2f 100644 --- a/fast_plotter/postproc/stages.py +++ b/fast_plotter/postproc/stages.py @@ -22,6 +22,11 @@ def __init__(self, **kwargs): self.kwargs = kwargs self.func = getattr(functions, self.func) self.doc = self.func.__doc__ + if "apply_if" in kwargs: + self.apply_if = kwargs['apply_if'] + kwargs.pop("apply_if", None) + else: + self.apply_if = False def __call__(self, dfs): if self.cardinality == "many-to-one": From 7ef9551b40cfd110902cf16ab14dcf67ac10c23c Mon Sep 17 00:00:00 2001 From: David Anthony Date: Thu, 29 Jul 2021 19:38:21 +0200 Subject: [PATCH 19/31] Revert "Add 'apply if' function to postproc stages" This reverts commit 540bac6222622cdc6d661c52c9bc1e0ff4992c31. --- fast_plotter/postproc/__main__.py | 10 +--------- fast_plotter/postproc/stages.py | 5 ----- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py index f0b519e..2a513f6 100644 --- a/fast_plotter/postproc/__main__.py +++ b/fast_plotter/postproc/__main__.py @@ -86,17 +86,9 @@ def main(args=None): sequence = read_processing_cfg(args.post_process, args.outdir) - apply_if = lambda df, stage: eval(str(stage.apply_if)) - for stage in sequence: logger.info("Working on %d dataframes", len(dfs)) - if stage.apply_if: - apply_to = [apply_if(df[0], stage) for df in dfs] - if not all(apply_to): - logger.info(f"Skipping stage '{stage.name}' for invalid dataframes") - dfs = [stage(df) if apply_to[idx] else df for idx, df in enumerate(dfs)] - else: - dfs = stage(dfs) + dfs = stage(dfs) if debug: dump_debug_df(dfs, args.debug_dfs_query, args.debug_rows) diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py index bc31f2f..806ffcb 100644 --- a/fast_plotter/postproc/stages.py +++ b/fast_plotter/postproc/stages.py @@ -22,11 +22,6 @@ def __init__(self, **kwargs): self.kwargs = kwargs self.func = getattr(functions, self.func) self.doc = self.func.__doc__ - if "apply_if" in kwargs: - self.apply_if = kwargs['apply_if'] - kwargs.pop("apply_if", None) - else: - self.apply_if = False def __call__(self, dfs): if self.cardinality == "many-to-one": From 5b814006e5ce38ea0428f630b7d4902f17860267 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 5 Nov 2021 11:51:01 +0000 Subject: [PATCH 20/31] Hack for MR plots --- fast_plotter/__main__.py | 17 +++++++--- fast_plotter/plotting.py | 70 ++++++++++++++++++++++++++++++++-------- 2 files changed, 69 insertions(+), 18 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 24a96e3..88a9fc6 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -12,7 +12,7 @@ from .version import __version__ # noqa from .utils import read_binned_df, weighting_vars, binning_vars # noqa from .utils import decipher_filename, mask_rows # noqa -from .plotting import plot_all, add_annotations, is_intervals # noqa +from .plotting import plot_all, add_annotations, is_intervals, annotate_xlabel_vals # noqa logger = logging.getLogger("fast_plotter") @@ -46,6 +46,7 @@ def arg_parser(args=None): help="Scale the MC yields by this lumi") parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"], help="Use this scale for the y-axis") + parser.add_argument("-a", "--annotate_xlabel", action="store_true", help="Split x-axis information onto plot") parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) def split_equals(arg): @@ -173,6 +174,7 @@ def process_one_file(infile, args): weights = weighting_vars(df) legend_size = args.legend_size if hasattr(args, "legend_size") else 2 ran_ok = True + print(vars(args)) for weight in weights: if args.weights and weight not in args.weights: continue @@ -200,21 +202,23 @@ def process_one_file(infile, args): plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size) - dress_main_plots(plots, **vars(args)) + dress_main_plots(plots, **vars(args), df=df_filtered) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, - limits={}, xtickrotation=None, **kwargs): + limits={}, xtickrotation=None, df=None, annotate_xlabel=False, grid='both', **kwargs): for main_ax, summary_ax in plots.values(): add_annotations(annotations, main_ax, summary_ax) + if annotate_xlabel: + met_cats=annotate_xlabel_vals(df, main_ax) if yscale: main_ax.set_yscale(yscale) if ylabel: main_ax.set_ylabel(ylabel) main_ax.legend(**legend).set_zorder(20) - main_ax.grid(True) + main_ax.grid(axis=grid) main_ax.set_axisbelow(True) for axis, lims in limits.items(): if isinstance(lims, (tuple, list)): @@ -225,6 +229,11 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, continue elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) + if annotate_xlabel: + print(met_cats) + x_ticks = [i for i in range(len(met_cats))] + main_ax.set_xticks(x_ticks) + main_ax.set_xticklabels(met_cats) if xtickrotation: matplotlib.pyplot.xticks(rotation=xtickrotation) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index dfc8c27..e80b2c9 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -9,7 +9,6 @@ import re logger = logging.getLogger(__name__) - def change_brightness(color, amount): if amount is None: return @@ -23,20 +22,64 @@ def change_brightness(color, amount): c = colorsys.rgb_to_hls(*color) return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2]) - +def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)"): + df=df.reset_index() + met_cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[3:][0] for category in df['category'].unique()] + cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[:3] for category in df['category'].unique()] + n_cats = len(cats) + for i, cat in enumerate(cats): + if i==0: + a1,a2,a3=cat + old_cat = cat + labels = {i:{0:{val.replace(" ",""):0}} for i,val in enumerate(cat)} + else: + for j, val in enumerate(cat): + val = val.replace(" ", "") + if old_cat[j].replace(" ","") == val: + continue + else: + labels[j][i]={val:0} + if j == len(cat)-1: + old_cat=cat + for depth, label in labels.items(): + for i, split in enumerate(label): + label_str = list(label[split].keys())[0] + if i == len(label) - 1: + label_length = len(cats) - split + else: + label_length = dict(enumerate(label))[i+1] - split + labels[depth][split][label_str]=label_length + label_positions = {} + for depth, label in labels.items(): + label_positions[depth] = {} + for left_edge, len_dict in label.items(): + label_str = list(len_dict.keys())[0] + position = left_edge + (len_dict[label_str]/2) + if label_str in label_positions[depth]: + label_positions[depth][label_str].append(position-0.5) + else: + label_positions[depth][label_str] = [position-0.5] + + for depth, label_dict in label_positions.items(): + y = (0.80 - 0.05*(depth + 1)) + for label, xvals in label_dict.items(): + for x in xvals: + x = (x+0.5)/n_cats + ax.text(x, y, label, fontsize=12-depth, transform=ax.transAxes, ha='center', weight='medium') + return met_cats + def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset", yscale="log", lumi=None, annotations=[], dataset_order=None, continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral", - figsize=None, other_dset_types={}, **kwargs): + figsize=None, other_dset_types={}, grid='both', **kwargs): figures = {} - dimensions = utils.binning_vars(df) ran_ok = True if len(dimensions) == 1: df = utils.rename_index(df, bin_variable_replacements) figures[(("yscale", yscale),)] = plot_1d( - df, yscale=yscale, annotations=annotations) + df, yscale=yscale, annotations=annotations, grid=grid) if dataset_col in dimensions: dimensions = tuple(dim for dim in dimensions if dim != dataset_col) @@ -54,7 +97,7 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat plot = plot_1d_many(projected, data=data, signal=signal, dataset_col=dataset_col, scale_sims=lumi, colourmap=colourmap, dataset_order=dataset_order, - figsize=figsize, other_dset_args=other_dset_types, **kwargs + figsize=figsize, other_dset_args=other_dset_types, grid=grid, **kwargs ) figures[(("project", dim), ("yscale", yscale))] = plot except Exception as e: @@ -177,7 +220,7 @@ def __call__(self, col, **kwargs): def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", dataset_colours=None, colourmap="nipy_spectral", - dataset_order=None, other_cfg_args={}): + dataset_order=None, other_cfg_args={}, grid='both'): expected_xs = df.index.unique(x_axis).values if kind == "scatter": draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr], @@ -363,7 +406,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", dataset_order=None, figsize=(5, 6), show_over_underflow=False, - dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, **kwargs): + dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, grid='both', **kwargs): y = "sumw" yvar = "sumw2" yerr = "err" @@ -439,7 +482,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, dataset_colours=dataset_colours, - colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args) + colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args, grid=grid) main_ax.set_xlabel(x_axis) if not summary: @@ -541,18 +584,17 @@ def add_annotations(annotations, ax, summary_ax=None): cfg.setdefault("xycoords", "axes fraction") ax.annotate(s, xy=xy, **cfg) - -def plot_1d(df, kind="line", yscale="lin"): +def plot_1d(df, kind="line", yscale="lin", grid='both'): fig, ax = plt.subplots(1) df["sumw"].plot(kind=kind) ax.set_axisbelow(True) - plt.grid(True) + plt.grid(axis=grid) plt.yscale(yscale) return fig def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC", - color="k", zorder=22, add_error=True): + color="k", zorder=22, add_error=True, grid='both'): # make sure both sides agree with the binning merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims") data = merged.filter(like="data", axis="columns").fillna(0) @@ -589,7 +631,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, alpha=0.7, zorder=zorder-1) ax.set_ylim(ylim) - ax.grid(True) + ax.grid(axis=grid) ax.set_axisbelow(True) ax.set_xlabel(x) ax.set_ylabel(ylabel) From 14935621bd4c4509be10ec18c2752abb9ea1f4b4 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Fri, 5 Nov 2021 13:04:34 +0000 Subject: [PATCH 21/31] Update __main__.py rm spurious print statements --- fast_plotter/__main__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 88a9fc6..fb945fd 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -174,7 +174,6 @@ def process_one_file(infile, args): weights = weighting_vars(df) legend_size = args.legend_size if hasattr(args, "legend_size") else 2 ran_ok = True - print(vars(args)) for weight in weights: if args.weights and weight not in args.weights: continue @@ -230,7 +229,6 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) if annotate_xlabel: - print(met_cats) x_ticks = [i for i in range(len(met_cats))] main_ax.set_xticks(x_ticks) main_ax.set_xticklabels(met_cats) From 1f37ab94058370c0e6ace877d2e4bfa3e5458540 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 5 Nov 2021 17:55:25 +0000 Subject: [PATCH 22/31] rm print --- fast_plotter/__main__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 88a9fc6..e465b05 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -97,7 +97,6 @@ def recursive_replace(value, replacements): if isinstance(value, six.string_types): return Template(value).safe_substitute(replacements) return value - replacements = dict(args.variables) args = Namespace(**recursive_replace(vars(args), replacements)) @@ -174,7 +173,6 @@ def process_one_file(infile, args): weights = weighting_vars(df) legend_size = args.legend_size if hasattr(args, "legend_size") else 2 ran_ok = True - print(vars(args)) for weight in weights: if args.weights and weight not in args.weights: continue @@ -230,7 +228,6 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) if annotate_xlabel: - print(met_cats) x_ticks = [i for i in range(len(met_cats))] main_ax.set_xticks(x_ticks) main_ax.set_xticklabels(met_cats) From 81568d5282b68bb72db60b4a397c2c4bce8948d2 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 5 Nov 2021 17:59:06 +0000 Subject: [PATCH 23/31] ratio plot grid options --- fast_plotter/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index e80b2c9..64e4d4f 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -507,7 +507,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", kwargs.setdefault("ratio_ylim", [0., 2.]) kwargs.setdefault("ratio_ylabel", "Data / MC") plot_ratio(summed_data, summed_sims, x=x_axis, - y=y, yerr=yerr, ax=summary_ax, error=error, + y=y, yerr=yerr, ax=summary_ax, error=error, grid=grid, ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"]) if other_dset_args: for df, combine, style, label, var_name, other_dset_args in config: @@ -523,7 +523,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", if summed_data is not None: plot_ratio(summed_data, summed_dset, x=x_axis, y=y, yerr=yerr, ax=summary_ax, error=error, zorder=21, - ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"], + ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"], grid=grid, color=color, add_error=add_error) else: raise RuntimeError(err_msg) From 5574ba49b8d1cf830f4677b333506755412e311e Mon Sep 17 00:00:00 2001 From: David Anthony Date: Tue, 9 Nov 2021 18:42:46 +0000 Subject: [PATCH 24/31] Alternative regex for MR labels --- fast_plotter/plotting.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 64e4d4f..45427bb 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -22,20 +22,22 @@ def change_brightness(color, amount): c = colorsys.rgb_to_hls(*color) return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2]) -def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)"): +def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): df=df.reset_index() - met_cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[3:][0] for category in df['category'].unique()] - cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[:3] for category in df['category'].unique()] + re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))) + compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups() + met_cats=[compile_correct_regex(category)[3:][0] for category in df['category'].unique()] + cats=[compile_correct_regex(category)[:3] for category in df['category'].unique()] n_cats = len(cats) for i, cat in enumerate(cats): if i==0: a1,a2,a3=cat old_cat = cat - labels = {i:{0:{val.replace(" ",""):0}} for i,val in enumerate(cat)} + labels = {i:{0:{val.strip():0}} for i,val in enumerate(cat)} else: for j, val in enumerate(cat): - val = val.replace(" ", "") - if old_cat[j].replace(" ","") == val: + val = val.strip() + if old_cat[j].strip() == val: continue else: labels[j][i]={val:0} From 72deea9ac38b6b48103c8726ce00c9de4109479a Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Tue, 12 Apr 2022 19:46:59 +0100 Subject: [PATCH 25/31] Update __main__.py austoscaling updates --- fast_plotter/__main__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index e465b05..63d1f16 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -103,8 +103,9 @@ def recursive_replace(value, replacements): return args -def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2): +def autoscale_values(args, df_filtered, weight, ylim_lower=0.5, legend_size=2): if hasattr(args, "autoscale"): + legend_size = int(legend_size) data_rows = mask_rows(df_filtered, regex=args.data, level=args.dataset_col) @@ -129,7 +130,6 @@ def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2): else legend_size if ylim_upper_floor > 2 else legend_size) # Buffer for legend ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) - ylim_lower = 1e-1 else: buffer_factor = 1 + 0.5*legend_size ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y))))) # Buffer for legend @@ -255,4 +255,4 @@ def save_plots(infile, weight, plots, outdir, extensions): if __name__ == "__main__": - main() + main() From e040b8a60012e501af2c659ab2de536f06ddb1c3 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Tue, 12 Apr 2022 19:55:47 +0100 Subject: [PATCH 26/31] Update plotting.py --- fast_plotter/plotting.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 45427bb..59bbe7d 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -38,6 +38,8 @@ def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.* for j, val in enumerate(cat): val = val.strip() if old_cat[j].strip() == val: + if j == len(cat)-1: + old_cat=cat continue else: labels[j][i]={val:0} @@ -58,7 +60,7 @@ def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.* label_str = list(len_dict.keys())[0] position = left_edge + (len_dict[label_str]/2) if label_str in label_positions[depth]: - label_positions[depth][label_str].append(position-0.5) + label_positions[depth][label_str].append(position-0.5) else: label_positions[depth][label_str] = [position-0.5] @@ -69,7 +71,7 @@ def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.* x = (x+0.5)/n_cats ax.text(x, y, label, fontsize=12-depth, transform=ax.transAxes, ha='center', weight='medium') return met_cats - + def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset", yscale="log", lumi=None, annotations=[], dataset_order=None, continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral", @@ -681,4 +683,5 @@ def draw(ax, method, x, ys, **kwargs): if ticks is not None: ax.set_xticks(x) ax.set_xticklabels(ticks) - return x, ticks + return x, ticks +~ From 293f158e434d5e83fe04e8d2e8205ef20eb41f79 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:37:00 +0100 Subject: [PATCH 27/31] Update plotting.py Remove accidental addition --- fast_plotter/plotting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 59bbe7d..7ad175e 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -684,4 +684,3 @@ def draw(ax, method, x, ys, **kwargs): ax.set_xticks(x) ax.set_xticklabels(ticks) return x, ticks -~ From 19cc838362bccda27e55f15a63ae9ca8944ed959 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Tue, 16 Aug 2022 15:42:08 +0100 Subject: [PATCH 28/31] Customisation from config updates --- fast_plotter/__main__.py | 9 +++++++-- fast_plotter/plotting.py | 19 +++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 63d1f16..36dfa51 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -140,7 +140,10 @@ def autoscale_values(args, df_filtered, weight, ylim_lower=0.5, legend_size=2): ylim = args.limits['y'] if 'y' in args.limits else None else: ylim = None - df_aboveMin = df_filtered.copy() + if 'x' in args.autoscale: + df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi] + else: + df_aboveMin = df_filtered.copy() xcol = df_aboveMin.index.get_level_values(1) if 'x' in args.autoscale: # Determine x-axis limits if is_intervals(xcol): # If x-axis is interval, take right and leftmost intervals unless they are inf @@ -200,6 +203,7 @@ def process_one_file(infile, args): plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size) + print(args.limits) dress_main_plots(plots, **vars(args), df=df_filtered) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok @@ -215,6 +219,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, main_ax.set_yscale(yscale) if ylabel: main_ax.set_ylabel(ylabel) + legend['ncol'] = int(legend['ncol']) main_ax.legend(**legend).set_zorder(20) main_ax.grid(axis=grid) main_ax.set_axisbelow(True) @@ -255,4 +260,4 @@ def save_plots(infile, weight, plots, outdir, extensions): if __name__ == "__main__": - main() + main() diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 7ad175e..69688f5 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -22,12 +22,14 @@ def change_brightness(color, amount): c = colorsys.rgb_to_hls(*color) return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2]) -def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): +#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): +#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P.*?(?=,\s)),\s(?P.*?(?=,\s))(?P()),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): +def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P.*?(?=,\s)),\s(?P.*?(?=,\s))(?P()),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): df=df.reset_index() re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))) compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups() - met_cats=[compile_correct_regex(category)[3:][0] for category in df['category'].unique()] - cats=[compile_correct_regex(category)[:3] for category in df['category'].unique()] + met_cats=[compile_correct_regex(category)[3:][-1] for category in df[binning_col].unique()] + cats=[compile_correct_regex(category)[:3] for category in df[binning_col].unique()] n_cats = len(cats) for i, cat in enumerate(cats): if i==0: @@ -60,18 +62,19 @@ def annotate_xlabel_vals(df, ax, regex="(?P.*?(?=\s))\s(?P\d.* label_str = list(len_dict.keys())[0] position = left_edge + (len_dict[label_str]/2) if label_str in label_positions[depth]: - label_positions[depth][label_str].append(position-0.5) + label_positions[depth][label_str].append(position-0.5) else: label_positions[depth][label_str] = [position-0.5] for depth, label_dict in label_positions.items(): - y = (0.80 - 0.05*(depth + 1)) + #y = (0.80 - 0.05*(depth + 1)) + y = (0.95 - 0.05*(depth + 1)) for label, xvals in label_dict.items(): for x in xvals: x = (x+0.5)/n_cats ax.text(x, y, label, fontsize=12-depth, transform=ax.transAxes, ha='center', weight='medium') return met_cats - + def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset", yscale="log", lumi=None, annotations=[], dataset_order=None, continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral", @@ -201,7 +204,7 @@ def __call__(self, col, **kwargs): color = "k" width = self.linewidth else: - color = None + color = color label = col.name width = 2 style = "--" @@ -683,4 +686,4 @@ def draw(ax, method, x, ys, **kwargs): if ticks is not None: ax.set_xticks(x) ax.set_xticklabels(ticks) - return x, ticks + return x, ticks From a5383bc14614e147022a30c753e975ae1dca0186 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Tue, 16 Aug 2022 15:49:54 +0100 Subject: [PATCH 29/31] Update plotting.py Revert local regex chages --- fast_plotter/plotting.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 69688f5..0daeac1 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -22,9 +22,7 @@ def change_brightness(color, amount): c = colorsys.rgb_to_hls(*color) return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2]) -#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): -#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P.*?(?=,\s)),\s(?P.*?(?=,\s))(?P()),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): -def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P.*?(?=,\s)),\s(?P.*?(?=,\s))(?P()),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): +def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): df=df.reset_index() re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))) compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups() @@ -62,13 +60,12 @@ def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P.*?(?=,\s label_str = list(len_dict.keys())[0] position = left_edge + (len_dict[label_str]/2) if label_str in label_positions[depth]: - label_positions[depth][label_str].append(position-0.5) + label_positions[depth][label_str].append(position-0.5) else: label_positions[depth][label_str] = [position-0.5] for depth, label_dict in label_positions.items(): - #y = (0.80 - 0.05*(depth + 1)) - y = (0.95 - 0.05*(depth + 1)) + y = (0.80 - 0.05*(depth + 1)) for label, xvals in label_dict.items(): for x in xvals: x = (x+0.5)/n_cats From 9004b36cad38f9e4a071eafad3b6586d7ba77645 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Tue, 16 Aug 2022 15:50:55 +0100 Subject: [PATCH 30/31] Update __main__.py --- fast_plotter/__main__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 36dfa51..47346c1 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -203,7 +203,6 @@ def process_one_file(infile, args): plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size) - print(args.limits) dress_main_plots(plots, **vars(args), df=df_filtered) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From 9c65f73e3c746bcae590f0ce5412166806e3be87 Mon Sep 17 00:00:00 2001 From: DBAnthony <43857191+DBAnthony@users.noreply.github.com> Date: Wed, 17 Aug 2022 12:14:06 +0100 Subject: [PATCH 31/31] Update plotting.py --- fast_plotter/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 0daeac1..43a8697 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -22,7 +22,7 @@ def change_brightness(color, amount): c = colorsys.rgb_to_hls(*color) return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2]) -def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): +def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P.*?(?=\s))\s(?P\d.*?(?=\d))(?P.*?(?=,\s)),\s(?P.*)", backup_regex="(?P.*?(?=\,))(?P()),\s(?P.*)"): df=df.reset_index() re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))) compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups()