From 33db77cbd3cd85cb21fd7c9b5419a747eb3cc801 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Tue, 6 Jul 2021 16:01:54 +0100
Subject: [PATCH 01/31] Support for line annotations

---
 fast_plotter/__main__.py |  4 ++--
 fast_plotter/plotting.py | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 54221e5..dc03c7a 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -140,12 +140,12 @@ def process_one_file(infile, args):
 def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
                      limits={}, xtickrotation=None, **kwargs):
     for main_ax, summary_ax in plots.values():
-        add_annotations(annotations, main_ax)
+        add_annotations(annotations, main_ax, summary_ax)
         if yscale:
             main_ax.set_yscale(yscale)
         if ylabel:
             main_ax.set_ylabel(ylabel)
-        main_ax.legend(**legend)
+        main_ax.legend(**legend).set_zorder(20)
         main_ax.grid(True)
         main_ax.set_axisbelow(True)
         for axis, lims in limits.items():
diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 5594caa..7e4231a 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -6,6 +6,7 @@
 import matplotlib.pyplot as plt
 import matplotlib.colors as mc
 import logging
+import re
 logger = logging.getLogger(__name__)
 
 
@@ -422,9 +423,41 @@ def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_fr
     utils.calculate_error(df, do_rel_err=not err_from_sumw2)
     return df
 
+def annotate_lines(cfg, main_ax, summary_ax):
+    linename = list(cfg.keys())[0]
+    annotDict = cfg[linename]
+    if 'values' not in annotDict.keys():
+        raise(RuntimeError("Must provide values for line placement."))
+    annotDefaults = {"style": "-", "alpha": 1, "width": 1.5,
+                     "colour": 'k', "label": None, "vmin": 0,
+                     "vmax": 1, "zorder": 10, "axes": ["main"]}
+    annotDict.update({key: value for key, value in annotDefaults.items()
+                      if key not in annotDict.keys()})
+    lineKeys = ['values', 'style', 'alpha', 'width', 'colour', 'label', 'vmin', 'vmax', 'zorder', 'axes']
+    if set(annotDict.keys()).difference(set(lineKeys)):
+        logger.warn("Invalid parameter(s) given to line annotations. Options are {}".format(lineKeys))
+    values, style, alpha, width, colour, label, vmin, vmax, zorder, axes = [annotDict[key] for key in lineKeys]
+    for axis in axes:
+        awidth = 0.6 * width if (axis == 'summary') else width
+        ax = main_ax if (str(axis) == 'main') else summary_ax if (str(axis) == 'summary') else None
+        if ax is None:
+            logger.warn("Axis must exist and either be 'main' or 'summary'. {} is None".format(axis))
+            continue
+        for value in values:
+            value = float(value)
+            if 'hline' in linename:
+                ax.axhline(value, vmin, vmax, color=colour, label=label,
+                           alpha=alpha, ls=style, lw=awidth, zorder=zorder)
+            if 'vline' in linename:
+                ax.axvline(value, vmin, vmax, color=colour, label=label,
+                           alpha=alpha, ls=style, lw=awidth, zorder=zorder)
+
 
-def add_annotations(annotations, ax):
+def add_annotations(annotations, ax, summary_ax=None):
     for cfg in annotations:
+        if list(filter(lambda key: re.match("(.*hline.*|.*vline.*)", key), cfg.keys())):
+            annotate_lines(cfg, ax, summary_ax)
+            continue
         cfg = cfg.copy()
         s = cfg.pop("text")
         xy = cfg.pop("position")

From 5abec7e8cbc09cea45f827e25391c722dbf34e92 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Tue, 6 Jul 2021 22:22:35 +0100
Subject: [PATCH 02/31] plotting for nonstandard dataset types

---
 fast_plotter/plotting.py | 134 +++++++++++++++++++++++++++++++--------
 1 file changed, 108 insertions(+), 26 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 5594caa..e8c1b8b 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -6,6 +6,7 @@
 import matplotlib.pyplot as plt
 import matplotlib.colors as mc
 import logging
+import re
 logger = logging.getLogger(__name__)
 
 
@@ -26,7 +27,7 @@ def change_brightness(color, amount):
 def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset",
              yscale="log", lumi=None, annotations=[], dataset_order=None,
              continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral",
-             figsize=None, **kwargs):
+             figsize=None, other_dset_types={}, **kwargs):
     figures = {}
 
     dimensions = utils.binning_vars(df)
@@ -53,7 +54,7 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat
                 plot = plot_1d_many(projected, data=data, signal=signal,
                                     dataset_col=dataset_col, scale_sims=lumi,
                                     colourmap=colourmap, dataset_order=dataset_order,
-                                    figsize=figsize, **kwargs
+                                    figsize=figsize, other_dset_args=other_dset_types, **kwargs
                                     )
                 figures[(("project", dim), ("yscale", yscale))] = plot
             except Exception as e:
@@ -107,7 +108,8 @@ def get_colour(self, index=None, name=None):
 
 class FillColl(object):
     def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None,
-                 colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None):
+                 colourmap="nipy_spectral", dataset_order=None, linewidth=0.5,
+                 expected_xs=None, other_dset_args={}):
         self.calls = -1
         self.expected_xs = expected_xs
         self.colors = ColorDict(n_colors=n_colors, order=dataset_order,
@@ -117,6 +119,8 @@ def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=N
         self.fill = fill
         self.line = line
         self.linewidth = linewidth
+        self.other_dset_args = other_dset_args
+        self.dataset_colours=dataset_colours
 
     def pre_call(self, column):
         ax = self.ax
@@ -129,16 +133,30 @@ def pre_call(self, column):
 
     def __call__(self, col, **kwargs):
         ax, x, y, color = self.pre_call(col)
-        if self.fill:
+        if self.fill and not self.other_dset_args:
             draw(ax, "fill_between", x=x, ys=["y1"],
                  y1=y, label=col.name, expected_xs=self.expected_xs,
                  linewidth=0, color=color, **kwargs)
         if self.line:
             if self.fill:
-                label = None
-                color = "k"
-                width = self.linewidth
-                style = "-"
+                if self.other_dset_args:
+                    style = self.other_dset_args['style']
+                    label = col.name if self.other_dset_args['add_label'] else None
+                    color = self.other_dset_args['colour'] if self.other_dset_args['colour'] and type(self.other_dset_args['colour']) != dict\
+                            else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\
+                            else color
+                    self.color=color
+                    if type(color) == dict:
+                        logger.warn(f"You didn't specify a colour for dataset '{col.name}'," +
+                                    f" and dataset was not found in 'dataset_colours', with keys {color.keys()}."
+                                    + " Using black.")
+                        color = "k"
+                    width = self.linewidth
+                else:
+                    style = "-"
+                    label = None
+                    color = "k"
+                    width = self.linewidth 
             else:
                 color = None
                 label = col.name
@@ -162,7 +180,8 @@ def __call__(self, col, **kwargs):
 
 
 def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
-                  dataset_colours=None, colourmap="nipy_spectral", dataset_order=None):
+                  dataset_colours=None, colourmap="nipy_spectral",
+                  dataset_order=None, other_cfg_args={}):
     expected_xs = df.index.unique(x_axis).values
     if kind == "scatter":
         draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr],
@@ -202,6 +221,29 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         y_up = (summed[y] + summed[yerr]).values
         draw(ax, "fill_between", x, ys=["y1", "y2"], y2=y_down, y1=y_up,
              color="gray", alpha=0.7, expected_xs=expected_xs)
+    elif kind == "other_dset_types":
+        if 'regex' not in other_cfg_args:
+            raise RuntimeError("Must specify a regex for other plotting datatype to be applied to")
+        options = ["alpha", "style", "width", "add_label", "add_error", "regex"]
+        alpha, style, width, add_label, add_error, regex = [other_cfg_args[key] for key in options]
+        filler = FillColl(n_datasets, ax=ax, fill=True, colourmap=colourmap, dataset_colours=dataset_colours,
+                          dataset_order=dataset_order, expected_xs=expected_xs, linewidth=width,
+                          other_dset_args=other_cfg_args)
+        vals.apply(filler, axis=0, step="mid")
+        if add_error:
+            for dset in list(set(df.reset_index()[dataset_col])):
+                if not re.compile(regex).match(dset):
+                    continue
+                color = filler.color
+                if type(color) == dict:
+                   logger.warn(f"You didn't specify a colour for dataset '{dset}'," +
+                               f" and dataset was not found in 'dataset_colours', with keys {color.keys()}."
+                               + " Using black.")
+                   color = "k"
+                dset_df = df.reset_index().loc[df.reset_index()[dataset_col] == dset].reset_index()
+                x = dset_df[x_axis]
+                draw(ax, "fill_between", x, ys=["y1", "y2"], y1=dset_df.eval("sumw+sqrt(sumw2)"),
+                     y2=dset_df.eval("sumw-sqrt(sumw2)"), color=color, alpha=alpha, expected_xs=expected_xs)
     else:
         raise RuntimeError("Unknown value for 'kind', '{}'".format(kind))
 
@@ -330,7 +372,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                  kind_data="scatter", kind_sims="fill-error-last", kind_signal="line",
                  scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral",
                  dataset_order=None, figsize=(5, 6), show_over_underflow=False,
-                 dataset_colours=None, err_from_sumw2=False, data_legend="Data", **kwargs):
+                 dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, **kwargs):
     y = "sumw"
     yvar = "sumw2"
     yerr = "err"
@@ -352,13 +394,39 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     else:
         in_df_signal = None
 
+    config_extend = []
+    if other_dset_args:
+        for dset_type in other_dset_args.keys():
+            dset_type_labels = other_dset_args[dset_type]['regex']
+            other_defaults = {"style": "-", "alpha": 0.2, "width": 1,
+                              "colour": [], "dset_type": dset_type, "add_label": True,
+                              "add_error": True, "plot_ratio": False}
+            default_specs = {key: val for key, val
+                             in other_defaults.items()
+                             if key not in other_dset_args[dset_type].keys()}
+            other_dset_args[dset_type].update(default_specs)
+            in_df_other, in_df_sims = utils.split_data_sims(
+                 in_df_sims, data_labels=dset_type_labels, dataset_level=dataset_col)
+            config_extend.append((in_df_other, None, "other_dset_types",
+                                  dset_type_labels, "plot_other_dset", other_dset_args[dset_type]))
+    else:
+        in_df_other = None
+
+    def_cfg_args = {"dset_type": ""}
+    config = [(in_df_sims, plot_sims, kind_sims, "Monte Carlo", "plot_sims", def_cfg_args),
+              (in_df_data, plot_data, kind_data, data_legend, "plot_data", def_cfg_args),
+              (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal", def_cfg_args),
+              ]
+
+    config.extend(config_extend)
+
     if in_df_data is None or in_df_sims is None:
         summary = None
     if not summary:
-        fig, main_ax = plt.subplots(1, 1, figsize=figsize)
+        fig, main_ax = plt.subplots(1, 1, figsize=[float(i) for i in figsize])
     else:
         fig, ax = plt.subplots(
-            2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True, figsize=figsize)
+            2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True, figsize=[float(i) for i in figsize])
         fig.subplots_adjust(hspace=.1)
         main_ax, summary_ax = ax
 
@@ -370,18 +438,14 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
             "Too few dimensions to multiple 1D graphs, use plot_1d instead")
     x_axis = x_axis[0]
 
-    config = [(in_df_sims, plot_sims, kind_sims, "Monte Carlo", "plot_sims"),
-              (in_df_data, plot_data, kind_data, data_legend, "plot_data"),
-              (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal"),
-              ]
-    for df, combine, style, label, var_name in config:
+    for df, combine, style, label, var_name, other_cfg_args in config:
         if df is None or len(df) == 0:
             continue
         merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2)
         actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style,
                       label=label, ax=main_ax, dataset_col=dataset_col,
                       dataset_colours=dataset_colours,
-                      colourmap=colourmap, dataset_order=dataset_order)
+                      colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args)
     main_ax.set_xlabel(x_axis)
 
     if not summary:
@@ -406,6 +470,22 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
         plot_ratio(summed_data, summed_sims, x=x_axis,
                    y=y, yerr=yerr, ax=summary_ax, error=error,
                    ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"])
+        if other_dset_args:
+            for df, combine, style, label, var_name, other_dset_args in config:
+                if (style == "other_dset_types") and (other_dset_args['plot_ratio']):
+                    error = "both"
+                    dset = other_dset_args['dset_type']
+                    color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour']
+                    if type(color) == dict:
+                       raise ValueError(f"Please specify a color for dataset '{dset}'. Datasets specified are {dataset_colours.keys()}")
+                    add_error = other_dset_args['add_error']
+                    summed_dset = _merge_datasets(
+                        df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)
+                    if summed_data is not None:
+                        plot_ratio(summed_data, summed_dset, x=x_axis,
+                                   y=y, yerr=yerr, ax=summary_ax, error=error, zorder=21,
+                                   ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"],
+                                   color=color, add_error=add_error)
     else:
         raise RuntimeError(err_msg)
     return main_ax, summary_ax
@@ -441,7 +521,8 @@ def plot_1d(df, kind="line", yscale="lin"):
     return fig
 
 
-def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC"):
+def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC",
+               color="k", zorder=22, add_error=True):
     # make sure both sides agree with the binning
     merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims")
     data = merged.filter(like="data", axis="columns").fillna(0)
@@ -460,9 +541,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D
         mask = (central != 0) & (lower != 0)
         ax.errorbar(x=x_axis[mask], y=central[mask], yerr=(lower[mask], upper[mask]),
                     fmt="o", markersize=4, color="k")
-        draw(ax, "errorbar", x_axis[mask], ys=["y", "yerr"],
-             y=central[mask], yerr=(lower[mask], upper[mask]),
-             fmt="o", markersize=4, color="k")
+        if add_error:
+            draw(ax, "errorbar", x_axis[mask], ys=["y", "yerr"],
+                 y=central[mask], yerr=(lower[mask], upper[mask]),
+                 fmt="o", markersize=4, color="gray", zorder=zorder-1)
 
     elif error == "both":
         ratio = d / s
@@ -471,10 +553,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D
 
         draw(ax, "errorbar", x_axis, ys=["y", "yerr"],
              y=ratio, yerr=rel_d_err,
-             fmt="o", markersize=4, color="k")
-        draw(ax, "fill_between", x_axis, ys=["y1", "y2"],
-             y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1,
-             color="gray", alpha=0.7)
+             fmt="o", markersize=4, color=color, zorder=zorder)
+        if add_error:
+            draw(ax, "fill_between", x_axis, ys=["y1", "y2"], color="gray",
+                 y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, alpha=0.7, zorder=zorder-1)
 
     ax.set_ylim(ylim)
     ax.grid(True)

From 3fd31afb2f7b483afc6dfb4b3bb95bc2c6601eed Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Tue, 6 Jul 2021 22:49:46 +0100
Subject: [PATCH 03/31] clean up

---
 fast_plotter/plotting.py | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index e8c1b8b..a97fc67 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -120,7 +120,7 @@ def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=N
         self.line = line
         self.linewidth = linewidth
         self.other_dset_args = other_dset_args
-        self.dataset_colours=dataset_colours
+        self.dataset_colours = dataset_colours
 
     def pre_call(self, column):
         ax = self.ax
@@ -142,15 +142,11 @@ def __call__(self, col, **kwargs):
                 if self.other_dset_args:
                     style = self.other_dset_args['style']
                     label = col.name if self.other_dset_args['add_label'] else None
-                    color = self.other_dset_args['colour'] if self.other_dset_args['colour'] and type(self.other_dset_args['colour']) != dict\
+                    color = self.other_dset_args['colour'] if self.other_dset_args['colour']\
                             else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\
                             else color
-                    self.color=color
-                    if type(color) == dict:
-                        logger.warn(f"You didn't specify a colour for dataset '{col.name}'," +
-                                    f" and dataset was not found in 'dataset_colours', with keys {color.keys()}."
-                                    + " Using black.")
-                        color = "k"
+                    self.color = color
+                    self.other_dset_args['colour'] = color
                     width = self.linewidth
                 else:
                     style = "-"
@@ -235,11 +231,6 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
                 if not re.compile(regex).match(dset):
                     continue
                 color = filler.color
-                if type(color) == dict:
-                   logger.warn(f"You didn't specify a colour for dataset '{dset}'," +
-                               f" and dataset was not found in 'dataset_colours', with keys {color.keys()}."
-                               + " Using black.")
-                   color = "k"
                 dset_df = df.reset_index().loc[df.reset_index()[dataset_col] == dset].reset_index()
                 x = dset_df[x_axis]
                 draw(ax, "fill_between", x, ys=["y1", "y2"], y1=dset_df.eval("sumw+sqrt(sumw2)"),
@@ -476,8 +467,6 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                     error = "both"
                     dset = other_dset_args['dset_type']
                     color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour']
-                    if type(color) == dict:
-                       raise ValueError(f"Please specify a color for dataset '{dset}'. Datasets specified are {dataset_colours.keys()}")
                     add_error = other_dset_args['add_error']
                     summed_dset = _merge_datasets(
                         df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)

From a0976d3df23b12745824ea2c381914e10e147c52 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 14:26:42 +0100
Subject: [PATCH 04/31] pep8 compliance

---
 fast_plotter/plotting.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index a97fc67..1b5c879 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -143,8 +143,8 @@ def __call__(self, col, **kwargs):
                     style = self.other_dset_args['style']
                     label = col.name if self.other_dset_args['add_label'] else None
                     color = self.other_dset_args['colour'] if self.other_dset_args['colour']\
-                            else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\
-                            else color
+                        else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\
+                        else color
                     self.color = color
                     self.other_dset_args['colour'] = color
                     width = self.linewidth
@@ -152,7 +152,7 @@ def __call__(self, col, **kwargs):
                     style = "-"
                     label = None
                     color = "k"
-                    width = self.linewidth 
+                    width = self.linewidth
             else:
                 color = None
                 label = col.name

From cfd2c38fc1512a7b8c719caf21de8b83a9ee377e Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 14:29:32 +0100
Subject: [PATCH 05/31] pep8 compliance

---
 fast_plotter/plotting.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 7e4231a..d3c390b 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -423,6 +423,7 @@ def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_fr
     utils.calculate_error(df, do_rel_err=not err_from_sumw2)
     return df
 
+
 def annotate_lines(cfg, main_ax, summary_ax):
     linename = list(cfg.keys())[0]
     annotDict = cfg[linename]

From e4600ca0b12bbc572b74edf96bcff23cb9ba87e6 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 19:18:21 +0100
Subject: [PATCH 06/31] Optional autoscaling of axes

---
 fast_plotter/__main__.py | 80 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 74 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 54221e5..33bcc39 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -5,12 +5,15 @@
 import six
 import logging
 import matplotlib
+import numpy as np
+import numbers
+import pandas as pd
 matplotlib.use('Agg')
 matplotlib.rcParams.update({'figure.autolayout': True})
 from .version import __version__ # noqa
-from .utils import read_binned_df, weighting_vars # noqa
+from .utils import read_binned_df, weighting_vars, binning_vars # noqa
 from .utils import decipher_filename, mask_rows  # noqa
-from .plotting import plot_all, add_annotations # noqa
+from .plotting import plot_all, add_annotations, is_intervals # noqa
 
 
 logger = logging.getLogger("fast_plotter")
@@ -100,11 +103,69 @@ def recursive_replace(value, replacements):
 
     return args
 
+def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2):
+    if hasattr(args, "autoscale"):
+        if len(df_filtered.index.names) > 2:
+            logger.warn("Autoscaling not supported for multi-index dataframes")
+            limits = args.limits
+        else:
+            xcol = df_filtered.index.get_level_values(1)
+            if 'y' in args.autoscale:
+                if weight == "n":
+                    max_y = df_filtered['sumw'].max()
+                else:
+                    max_mc = df_filtered.loc[mc_rows, 'sumw'].max()*args.lumi
+                    max_data = df_filtered.loc[data_rows, 'n'].max() if 'n' in df_filtered.columns else 0.1
+                    max_y = max(max_mc, max_data)
+                max_y = max_y if max_y >= 1 else 1
+                if args.yscale == 'log':
+                   ylim_upper_floor = int(np.floor(np.log10(max_y)))
+                   y_buffer = (legend_size + 1 if ylim_upper_floor > 3
+                               else legend_size if ylim_upper_floor > 2
+                               else legend_size)  # Buffer for legend
+                   ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
+                   ylim_lower = 1e-1
+                else:
+                    buffer_factor = 1 + 0.5*legend_size
+                    ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y)))))  # Buffer for legend
+                ylim = [ylim_lower, ylim_upper]
+                df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi]
+            else:
+                if 'limits' in args:
+                    ylim = args.limits['y'] if 'y' in args.limits else None
+                else:
+                    ylim = None
+                df_aboveMin = df_filtered.copy()
+            if 'x' in args.autoscale:  # Determine x-axis limits
+                if is_intervals(xcol):  # If x-axis is interval, take right and leftmost intervals unless they are inf
+                    max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max()
+                    min_x = xcol.left.min() if np.isfinite(xcol.left.min()) else xcol.right.min()
+                    if not np.isfinite(max_x) and hasattr(args, "show_over_underflow") and args.show_over_underflow:
+                        logger.warn("Cannot autoscale overflow bin for x-axis. Removing.")
+                    xlim = [min_x, max_x]
+                elif isinstance(xcol, numbers.Number):
+                    xlim = [xcol.min, xcol.max]
+                else:
+                    xlim = [-0.5, len(xcol.unique()) - 0.5]  # For non-numeric x-axis (e.g. mtn range)
+            else:
+                if 'limits' in args:
+                    xlim = args.limits['x'] if 'x' in args.limits else None
+                else:
+                    xlim = None
+
+            xlim = None if xlim is not None and np.NaN in xlim else xlim
+            ylim = None if ylim is not None and np.NaN in ylim else ylim
+            limits = {"x": xlim, "y": ylim}
+    else:
+        limits = args.limits if 'limits' in args else {}
+    return limits
+
 
 def process_one_file(infile, args):
     logger.info("Processing: " + infile)
     df = read_binned_df(infile, dtype={args.dataset_col: str})
     weights = weighting_vars(df)
+    legend_size = args.legend_size if hasattr(args, "legend_size") else 2
     ran_ok = True
     for weight in weights:
         if args.weights and weight not in args.weights:
@@ -115,13 +176,13 @@ def process_one_file(infile, args):
             df_filtered["sumw2"] = df_filtered.n
         else:
             if "n" in df.columns:
-                data_rows = mask_rows(df_filtered,
-                                      regex=args.data,
-                                      level=args.dataset_col)
+                data_rows_ungrouped = mask_rows(df_filtered,
+                                                regex=args.data,
+                                                level=args.dataset_col)
                 for col in df_filtered.columns:
                     if col == "n":
                         continue
-                    df_filtered.loc[data_rows, col] = df["n"][data_rows]
+                    df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped]
             df_filtered.columns = [
                 n.replace(weight + ":", "") for n in df_filtered.columns]
         if hasattr(args, "value_replacements"):
@@ -130,8 +191,15 @@ def process_one_file(infile, args):
                     continue
                 df_filtered.rename(replacements, level=column, inplace=True, axis="index")
                 df_filtered = df_filtered.groupby(level=df.index.names).sum()
+        data_rows = mask_rows(df_filtered,
+                              regex=args.data,
+                              level=args.dataset_col)
+        mc_rows = mask_rows(df_filtered,
+                            regex="^((?!"+args.data+").)*$",
+                            level=args.dataset_col)
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
+        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size)
         dress_main_plots(plots, **vars(args))
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From a1043b48f3231056a6c3b2550c22629dbfa58671 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 19:23:23 +0100
Subject: [PATCH 07/31] pep8 compliance

---
 fast_plotter/__main__.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 33bcc39..edd52c0 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -103,6 +103,7 @@ def recursive_replace(value, replacements):
 
     return args
 
+
 def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2):
     if hasattr(args, "autoscale"):
         if len(df_filtered.index.names) > 2:
@@ -119,12 +120,12 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0
                     max_y = max(max_mc, max_data)
                 max_y = max_y if max_y >= 1 else 1
                 if args.yscale == 'log':
-                   ylim_upper_floor = int(np.floor(np.log10(max_y)))
-                   y_buffer = (legend_size + 1 if ylim_upper_floor > 3
-                               else legend_size if ylim_upper_floor > 2
-                               else legend_size)  # Buffer for legend
-                   ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
-                   ylim_lower = 1e-1
+                    ylim_upper_floor = int(np.floor(np.log10(max_y)))
+                    y_buffer = (legend_size + 1 if ylim_upper_floor > 3
+                                else legend_size if ylim_upper_floor > 2
+                                else legend_size)  # Buffer for legend
+                    ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
+                    ylim_lower = 1e-1
                 else:
                     buffer_factor = 1 + 0.5*legend_size
                     ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y)))))  # Buffer for legend
@@ -199,7 +200,7 @@ def process_one_file(infile, args):
                             level=args.dataset_col)
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
-        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size)
+        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size)
         dress_main_plots(plots, **vars(args))
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From 8cffd4e49f5d2c6ead03e1dd30ceb70e881ace9e Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 19:36:45 +0100
Subject: [PATCH 08/31] pep8, fix xscaling

---
 fast_plotter/__main__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index edd52c0..cd13b18 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -7,7 +7,6 @@
 import matplotlib
 import numpy as np
 import numbers
-import pandas as pd
 matplotlib.use('Agg')
 matplotlib.rcParams.update({'figure.autolayout': True})
 from .version import __version__ # noqa
@@ -110,7 +109,6 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0
             logger.warn("Autoscaling not supported for multi-index dataframes")
             limits = args.limits
         else:
-            xcol = df_filtered.index.get_level_values(1)
             if 'y' in args.autoscale:
                 if weight == "n":
                     max_y = df_filtered['sumw'].max()
@@ -137,6 +135,7 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0
                 else:
                     ylim = None
                 df_aboveMin = df_filtered.copy()
+            xcol = df_aboveMin.index.get_level_values(1)
             if 'x' in args.autoscale:  # Determine x-axis limits
                 if is_intervals(xcol):  # If x-axis is interval, take right and leftmost intervals unless they are inf
                     max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max()

From 0298d90805d1e652545abe6ce0e3db3414743e47 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 9 Jul 2021 11:19:49 +0100
Subject: [PATCH 09/31] Cleaning up

---
 fast_plotter/__main__.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index cd13b18..00c4c4a 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -103,11 +103,17 @@ def recursive_replace(value, replacements):
     return args
 
 
-def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2):
+def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2):
     if hasattr(args, "autoscale"):
+        data_rows = mask_rows(df_filtered,
+                              regex=args.data,
+                              level=args.dataset_col)
+        mc_rows = mask_rows(df_filtered,
+                            regex="^((?!"+args.data+").)*$",
+                            level=args.dataset_col)
         if len(df_filtered.index.names) > 2:
             logger.warn("Autoscaling not supported for multi-index dataframes")
-            limits = args.limits
+            limits = args.limits if 'limits' in args else {}
         else:
             if 'y' in args.autoscale:
                 if weight == "n":
@@ -176,13 +182,13 @@ def process_one_file(infile, args):
             df_filtered["sumw2"] = df_filtered.n
         else:
             if "n" in df.columns:
-                data_rows_ungrouped = mask_rows(df_filtered,
-                                                regex=args.data,
-                                                level=args.dataset_col)
+                data_rows = mask_rows(df_filtered,
+                                      regex=args.data,
+                                      level=args.dataset_col)
                 for col in df_filtered.columns:
                     if col == "n":
                         continue
-                    df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped]
+                    df_filtered.loc[data_rows, col] = df["n"][data_rows]
             df_filtered.columns = [
                 n.replace(weight + ":", "") for n in df_filtered.columns]
         if hasattr(args, "value_replacements"):
@@ -191,15 +197,9 @@ def process_one_file(infile, args):
                     continue
                 df_filtered.rename(replacements, level=column, inplace=True, axis="index")
                 df_filtered = df_filtered.groupby(level=df.index.names).sum()
-        data_rows = mask_rows(df_filtered,
-                              regex=args.data,
-                              level=args.dataset_col)
-        mc_rows = mask_rows(df_filtered,
-                            regex="^((?!"+args.data+").)*$",
-                            level=args.dataset_col)
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
-        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size)
+        args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size)
         dress_main_plots(plots, **vars(args))
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From 2d593d959931f3ca275d7929ca93fff16e446fa7 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 9 Jul 2021 11:59:20 +0100
Subject: [PATCH 10/31] poissonian error for 0 yield

---
 fast_plotter/plotting.py | 11 ++++++-----
 fast_plotter/utils.py    |  6 +++++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 5594caa..e37e7dd 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -374,10 +374,11 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
               (in_df_data, plot_data, kind_data, data_legend, "plot_data"),
               (in_df_signal, plot_signal, kind_signal, "Signal", "plot_signal"),
               ]
+    kwargs.setdefault("is_null_poissonian", False)
     for df, combine, style, label, var_name in config:
         if df is None or len(df) == 0:
             continue
-        merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2)
+        merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian'])
         actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style,
                       label=label, ax=main_ax, dataset_col=dataset_col,
                       dataset_colours=dataset_colours,
@@ -392,9 +393,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     if summary.startswith("ratio"):
         main_ax.set_xlabel("")
         summed_data = _merge_datasets(
-            in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)
+            in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian'])
         summed_sims = _merge_datasets(
-            in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)
+            in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian'])
         if summary == "ratio-error-both":
             error = "both"
         elif summary == "ratio-error-markers":
@@ -411,7 +412,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     return main_ax, summary_ax
 
 
-def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False):
+def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False, is_null_poissonian=False):
     if style == "stack":
         df = utils.stack_datasets(df, dataset_level=dataset_col)
     elif style == "sum":
@@ -419,7 +420,7 @@ def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_fr
     elif style:
         msg = "'{}' must be either 'sum', 'stack' or None. Got {}"
         raise RuntimeError(msg.format(param_name, style))
-    utils.calculate_error(df, do_rel_err=not err_from_sumw2)
+    utils.calculate_error(df, do_rel_err=not err_from_sumw2, is_null_poissonian=is_null_poissonian)
     return df
 
 
diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index b765452..d845d96 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -91,7 +91,7 @@ def split_data_sims(df, data_labels=["data"], dataset_level="dataset"):
     return split_df(df, first_values=data_labels, level=dataset_level)
 
 
-def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_rel_err=True):
+def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_rel_err=True, is_null_poissonian=False):
     if not inplace:
         df = df.copy()
     if do_rel_err:
@@ -105,6 +105,10 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r
         elif not do_rel_err and sumw2_label in column:
             err_name = column.replace(sumw2_label, err_label)
             df[err_name] = np.sqrt(df[column])
+    if is_null_poissonian:
+        print(err_name)
+        print(df.loc[df[err_name]<=0])
+        df[err_name] = df[err_name].apply(lambda x: x if x > 0 else 1.15)
     if not inplace:
         return df
 

From 18064d190c233ca0502c498b064c59083b3d39d7 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 9 Jul 2021 12:23:10 +0100
Subject: [PATCH 11/31] Minimum error is 1.15 for n >=0

---
 fast_plotter/utils.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index d845d96..ee3c53b 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -102,13 +102,12 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r
             errs = np.true_divide(df[column], root_n)
             errs.loc[~np.isfinite(errs)] = np.nan
             df[err_name] = errs
-        elif not do_rel_err and sumw2_label in column:
+        else:
+        #elif not do_rel_err and sumw2_label in column:
             err_name = column.replace(sumw2_label, err_label)
             df[err_name] = np.sqrt(df[column])
-    if is_null_poissonian:
-        print(err_name)
-        print(df.loc[df[err_name]<=0])
-        df[err_name] = df[err_name].apply(lambda x: x if x > 0 else 1.15)
+        if is_null_poissonian:
+            df[err_name] = df[err_name].apply(lambda x: x if x > 1.15 else np.sqrt(1.15**2+x**2))
     if not inplace:
         return df
 

From 25d87bb481aff6400b78845d0ac81f0c04b1f04a Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 9 Jul 2021 12:51:10 +0100
Subject: [PATCH 12/31] pep8

---
 fast_plotter/plotting.py | 12 ++++++++----
 fast_plotter/utils.py    |  6 ++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index e37e7dd..15a0ef4 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -378,7 +378,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     for df, combine, style, label, var_name in config:
         if df is None or len(df) == 0:
             continue
-        merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian'])
+        merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2,
+                                 is_null_poissonian=kwargs['is_null_poissonian'])
         actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style,
                       label=label, ax=main_ax, dataset_col=dataset_col,
                       dataset_colours=dataset_colours,
@@ -393,9 +394,11 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     if summary.startswith("ratio"):
         main_ax.set_xlabel("")
         summed_data = _merge_datasets(
-            in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian'])
+            in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2,
+            is_null_poissonian=kwargs['is_null_poissonian'])
         summed_sims = _merge_datasets(
-            in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2, is_null_poissonian=kwargs['is_null_poissonian'])
+            in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2,
+            is_null_poissonian=kwargs['is_null_poissonian'])
         if summary == "ratio-error-both":
             error = "both"
         elif summary == "ratio-error-markers":
@@ -412,7 +415,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     return main_ax, summary_ax
 
 
-def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False, is_null_poissonian=False):
+def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False,
+                    is_null_poissonian=False):
     if style == "stack":
         df = utils.stack_datasets(df, dataset_level=dataset_col)
     elif style == "sum":
diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index ee3c53b..3b30e2a 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -102,10 +102,12 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r
             errs = np.true_divide(df[column], root_n)
             errs.loc[~np.isfinite(errs)] = np.nan
             df[err_name] = errs
-        else:
-        #elif not do_rel_err and sumw2_label in column:
+        elif not do_rel_err and sumw2_label in column:
             err_name = column.replace(sumw2_label, err_label)
             df[err_name] = np.sqrt(df[column])
+        else:
+            err_name = ""
+            continue
         if is_null_poissonian:
             df[err_name] = df[err_name].apply(lambda x: x if x > 1.15 else np.sqrt(1.15**2+x**2))
     if not inplace:

From 4e1dc4ad17ca73ce49939e5a7e17ddfd640b0c1f Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 9 Jul 2021 12:53:55 +0100
Subject: [PATCH 13/31] clean up

---
 fast_plotter/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index 3b30e2a..1cfbf57 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -106,7 +106,6 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r
             err_name = column.replace(sumw2_label, err_label)
             df[err_name] = np.sqrt(df[column])
         else:
-            err_name = ""
             continue
         if is_null_poissonian:
             df[err_name] = df[err_name].apply(lambda x: x if x > 1.15 else np.sqrt(1.15**2+x**2))

From 8f5663d77c9b78a88d3721ad7a7de8edc23fb209 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Thu, 15 Jul 2021 23:30:59 +0100
Subject: [PATCH 14/31] Oversight in parsing of colours

---
 fast_plotter/plotting.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 1b5c879..d047dad 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -146,7 +146,7 @@ def __call__(self, col, **kwargs):
                         else self.dataset_colours[col.name] if col.name in self.dataset_colours.keys()\
                         else color
                     self.color = color
-                    self.other_dset_args['colour'] = color
+                    self.other_dset_args['tmp_colour'] = color
                     width = self.linewidth
                 else:
                     style = "-"
@@ -466,7 +466,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                 if (style == "other_dset_types") and (other_dset_args['plot_ratio']):
                     error = "both"
                     dset = other_dset_args['dset_type']
-                    color = dataset_colours[dset] if dset in dataset_colours else other_dset_args['colour']
+                    color = dataset_colours[dset] if dset in dataset_colours\
+                            else other_dset_args['colour'] if other_dset_args['colour']
+                            else other_dset_args['tmp_colour']
                     add_error = other_dset_args['add_error']
                     summed_dset = _merge_datasets(
                         df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)

From e1b313213ca4cf123ce41ecf8afaa03494b4901f Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Thu, 15 Jul 2021 23:34:47 +0100
Subject: [PATCH 15/31] syntax

---
 fast_plotter/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index d047dad..4a7009d 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -467,7 +467,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                     error = "both"
                     dset = other_dset_args['dset_type']
                     color = dataset_colours[dset] if dset in dataset_colours\
-                            else other_dset_args['colour'] if other_dset_args['colour']
+                            else other_dset_args['colour'] if other_dset_args['colour']\
                             else other_dset_args['tmp_colour']
                     add_error = other_dset_args['add_error']
                     summed_dset = _merge_datasets(

From c88e7eddeb53a782874a409696697b5f8a90360b Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 23 Jul 2021 11:06:20 +0100
Subject: [PATCH 16/31] flake8

---
 fast_plotter/__main__.py | 2 +-
 fast_plotter/plotting.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 63a202c..50f11b7 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -222,7 +222,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
                 if axis.lower() in "xy":
                     getattr(main_ax, "set_%slim" % axis)(*lims)
             elif lims is None:
-                continue            
+                continue        
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
         if xtickrotation:
diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 812b878..dfc8c27 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -472,8 +472,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                     error = "both"
                     dset = other_dset_args['dset_type']
                     color = dataset_colours[dset] if dset in dataset_colours\
-                            else other_dset_args['colour'] if other_dset_args['colour']\
-                            else other_dset_args['tmp_colour']
+                        else other_dset_args['colour'] if other_dset_args['colour']\
+                        else other_dset_args['tmp_colour']
                     add_error = other_dset_args['add_error']
                     summed_dset = _merge_datasets(
                         df, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)

From d3172c6acad56119e0294092e2f382bb87139798 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 23 Jul 2021 11:10:31 +0100
Subject: [PATCH 17/31] flake8

---
 fast_plotter/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 50f11b7..24a96e3 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -222,7 +222,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
                 if axis.lower() in "xy":
                     getattr(main_ax, "set_%slim" % axis)(*lims)
             elif lims is None:
-                continue        
+                continue
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
         if xtickrotation:

From 540bac6222622cdc6d661c52c9bc1e0ff4992c31 Mon Sep 17 00:00:00 2001
From: David Anthony <zw18769@bristol.ac.uk>
Date: Thu, 29 Jul 2021 19:23:00 +0200
Subject: [PATCH 18/31] Add 'apply if' function to postproc stages

---
 fast_plotter/postproc/__main__.py | 10 +++++++++-
 fast_plotter/postproc/stages.py   |  5 +++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py
index 2a513f6..f0b519e 100644
--- a/fast_plotter/postproc/__main__.py
+++ b/fast_plotter/postproc/__main__.py
@@ -86,9 +86,17 @@ def main(args=None):
 
     sequence = read_processing_cfg(args.post_process, args.outdir)
 
+    apply_if = lambda df, stage: eval(str(stage.apply_if))
+
     for stage in sequence:
         logger.info("Working on %d dataframes", len(dfs))
-        dfs = stage(dfs)
+        if stage.apply_if:
+            apply_to = [apply_if(df[0], stage) for df in dfs]
+            if not all(apply_to):
+                logger.info(f"Skipping stage '{stage.name}' for invalid dataframes")
+                dfs = [stage(df) if apply_to[idx] else df for idx, df in enumerate(dfs)]
+        else:
+            dfs = stage(dfs)
         if debug:
             dump_debug_df(dfs, args.debug_dfs_query, args.debug_rows)
 
diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py
index 806ffcb..bc31f2f 100644
--- a/fast_plotter/postproc/stages.py
+++ b/fast_plotter/postproc/stages.py
@@ -22,6 +22,11 @@ def __init__(self, **kwargs):
         self.kwargs = kwargs
         self.func = getattr(functions, self.func)
         self.doc = self.func.__doc__
+        if "apply_if" in kwargs:
+            self.apply_if = kwargs['apply_if']
+            kwargs.pop("apply_if", None)
+        else:
+            self.apply_if = False
 
     def __call__(self, dfs):
         if self.cardinality == "many-to-one":

From 7ef9551b40cfd110902cf16ab14dcf67ac10c23c Mon Sep 17 00:00:00 2001
From: David Anthony <zw18769@bristol.ac.uk>
Date: Thu, 29 Jul 2021 19:38:21 +0200
Subject: [PATCH 19/31] Revert "Add 'apply if' function to postproc stages"

This reverts commit 540bac6222622cdc6d661c52c9bc1e0ff4992c31.
---
 fast_plotter/postproc/__main__.py | 10 +---------
 fast_plotter/postproc/stages.py   |  5 -----
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py
index f0b519e..2a513f6 100644
--- a/fast_plotter/postproc/__main__.py
+++ b/fast_plotter/postproc/__main__.py
@@ -86,17 +86,9 @@ def main(args=None):
 
     sequence = read_processing_cfg(args.post_process, args.outdir)
 
-    apply_if = lambda df, stage: eval(str(stage.apply_if))
-
     for stage in sequence:
         logger.info("Working on %d dataframes", len(dfs))
-        if stage.apply_if:
-            apply_to = [apply_if(df[0], stage) for df in dfs]
-            if not all(apply_to):
-                logger.info(f"Skipping stage '{stage.name}' for invalid dataframes")
-                dfs = [stage(df) if apply_to[idx] else df for idx, df in enumerate(dfs)]
-        else:
-            dfs = stage(dfs)
+        dfs = stage(dfs)
         if debug:
             dump_debug_df(dfs, args.debug_dfs_query, args.debug_rows)
 
diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py
index bc31f2f..806ffcb 100644
--- a/fast_plotter/postproc/stages.py
+++ b/fast_plotter/postproc/stages.py
@@ -22,11 +22,6 @@ def __init__(self, **kwargs):
         self.kwargs = kwargs
         self.func = getattr(functions, self.func)
         self.doc = self.func.__doc__
-        if "apply_if" in kwargs:
-            self.apply_if = kwargs['apply_if']
-            kwargs.pop("apply_if", None)
-        else:
-            self.apply_if = False
 
     def __call__(self, dfs):
         if self.cardinality == "many-to-one":

From 5b814006e5ce38ea0428f630b7d4902f17860267 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 5 Nov 2021 11:51:01 +0000
Subject: [PATCH 20/31] Hack for MR plots

---
 fast_plotter/__main__.py | 17 +++++++---
 fast_plotter/plotting.py | 70 ++++++++++++++++++++++++++++++++--------
 2 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 24a96e3..88a9fc6 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -12,7 +12,7 @@
 from .version import __version__ # noqa
 from .utils import read_binned_df, weighting_vars, binning_vars # noqa
 from .utils import decipher_filename, mask_rows  # noqa
-from .plotting import plot_all, add_annotations, is_intervals # noqa
+from .plotting import plot_all, add_annotations, is_intervals, annotate_xlabel_vals # noqa
 
 
 logger = logging.getLogger("fast_plotter")
@@ -46,6 +46,7 @@ def arg_parser(args=None):
                         help="Scale the MC yields by this lumi")
     parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"],
                         help="Use this scale for the y-axis")
+    parser.add_argument("-a", "--annotate_xlabel", action="store_true", help="Split x-axis information onto plot")
     parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
 
     def split_equals(arg):
@@ -173,6 +174,7 @@ def process_one_file(infile, args):
     weights = weighting_vars(df)
     legend_size = args.legend_size if hasattr(args, "legend_size") else 2
     ran_ok = True
+    print(vars(args))
     for weight in weights:
         if args.weights and weight not in args.weights:
             continue
@@ -200,21 +202,23 @@ def process_one_file(infile, args):
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
         args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size)
-        dress_main_plots(plots, **vars(args))
+        dress_main_plots(plots, **vars(args), df=df_filtered)
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok
 
 
 def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
-                     limits={}, xtickrotation=None, **kwargs):
+                     limits={}, xtickrotation=None, df=None, annotate_xlabel=False, grid='both', **kwargs):
     for main_ax, summary_ax in plots.values():
         add_annotations(annotations, main_ax, summary_ax)
+        if annotate_xlabel:
+            met_cats=annotate_xlabel_vals(df, main_ax)
         if yscale:
             main_ax.set_yscale(yscale)
         if ylabel:
             main_ax.set_ylabel(ylabel)
         main_ax.legend(**legend).set_zorder(20)
-        main_ax.grid(True)
+        main_ax.grid(axis=grid)
         main_ax.set_axisbelow(True)
         for axis, lims in limits.items():
             if isinstance(lims, (tuple, list)):
@@ -225,6 +229,11 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
                 continue
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
+        if annotate_xlabel:
+            print(met_cats)
+            x_ticks = [i for i in range(len(met_cats))]
+            main_ax.set_xticks(x_ticks)
+            main_ax.set_xticklabels(met_cats)
         if xtickrotation:
             matplotlib.pyplot.xticks(rotation=xtickrotation)
 
diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index dfc8c27..e80b2c9 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -9,7 +9,6 @@
 import re
 logger = logging.getLogger(__name__)
 
-
 def change_brightness(color, amount):
     if amount is None:
         return
@@ -23,20 +22,64 @@ def change_brightness(color, amount):
     c = colorsys.rgb_to_hls(*color)
     return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
 
-
+def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)"):
+    df=df.reset_index()
+    met_cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[3:][0] for category in df['category'].unique()]
+    cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[:3] for category in df['category'].unique()]
+    n_cats = len(cats)
+    for i, cat in enumerate(cats):
+        if i==0:
+            a1,a2,a3=cat
+            old_cat = cat
+            labels = {i:{0:{val.replace(" ",""):0}} for i,val in enumerate(cat)}
+        else:
+           for j, val in enumerate(cat):
+               val = val.replace(" ", "")
+               if old_cat[j].replace(" ","") == val:
+                   continue
+               else:
+                  labels[j][i]={val:0}
+               if j == len(cat)-1:
+                  old_cat=cat
+    for depth, label in labels.items():
+        for i, split in enumerate(label):
+            label_str = list(label[split].keys())[0]
+            if i == len(label) - 1:
+                label_length = len(cats) - split
+            else:
+                label_length = dict(enumerate(label))[i+1] - split
+            labels[depth][split][label_str]=label_length
+    label_positions = {}
+    for depth, label in labels.items():
+        label_positions[depth] = {}
+        for left_edge, len_dict in label.items():
+            label_str = list(len_dict.keys())[0]
+            position = left_edge + (len_dict[label_str]/2)
+            if label_str in label_positions[depth]:
+                label_positions[depth][label_str].append(position-0.5) 
+            else:
+                label_positions[depth][label_str] = [position-0.5]
+
+    for depth, label_dict in label_positions.items():
+        y = (0.80 - 0.05*(depth + 1))
+        for label, xvals in label_dict.items():
+            for x in xvals:
+                x = (x+0.5)/n_cats
+                ax.text(x, y, label, fontsize=12-depth, transform=ax.transAxes, ha='center', weight='medium')
+    return met_cats
+    
 def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset",
              yscale="log", lumi=None, annotations=[], dataset_order=None,
              continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral",
-             figsize=None, other_dset_types={}, **kwargs):
+             figsize=None, other_dset_types={}, grid='both', **kwargs):
     figures = {}
-
     dimensions = utils.binning_vars(df)
     ran_ok = True
 
     if len(dimensions) == 1:
         df = utils.rename_index(df, bin_variable_replacements)
         figures[(("yscale", yscale),)] = plot_1d(
-            df, yscale=yscale, annotations=annotations)
+            df, yscale=yscale, annotations=annotations, grid=grid)
 
     if dataset_col in dimensions:
         dimensions = tuple(dim for dim in dimensions if dim != dataset_col)
@@ -54,7 +97,7 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat
                 plot = plot_1d_many(projected, data=data, signal=signal,
                                     dataset_col=dataset_col, scale_sims=lumi,
                                     colourmap=colourmap, dataset_order=dataset_order,
-                                    figsize=figsize, other_dset_args=other_dset_types, **kwargs
+                                    figsize=figsize, other_dset_args=other_dset_types, grid=grid, **kwargs
                                     )
                 figures[(("project", dim), ("yscale", yscale))] = plot
             except Exception as e:
@@ -177,7 +220,7 @@ def __call__(self, col, **kwargs):
 
 def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
                   dataset_colours=None, colourmap="nipy_spectral",
-                  dataset_order=None, other_cfg_args={}):
+                  dataset_order=None, other_cfg_args={}, grid='both'):
     expected_xs = df.index.unique(x_axis).values
     if kind == "scatter":
         draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr],
@@ -363,7 +406,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                  kind_data="scatter", kind_sims="fill-error-last", kind_signal="line",
                  scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral",
                  dataset_order=None, figsize=(5, 6), show_over_underflow=False,
-                 dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, **kwargs):
+                 dataset_colours=None, err_from_sumw2=False, data_legend="Data", other_dset_args={}, grid='both', **kwargs):
     y = "sumw"
     yvar = "sumw2"
     yerr = "err"
@@ -439,7 +482,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
         actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style,
                       label=label, ax=main_ax, dataset_col=dataset_col,
                       dataset_colours=dataset_colours,
-                      colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args)
+                      colourmap=colourmap, dataset_order=dataset_order, other_cfg_args=other_cfg_args, grid=grid)
     main_ax.set_xlabel(x_axis)
 
     if not summary:
@@ -541,18 +584,17 @@ def add_annotations(annotations, ax, summary_ax=None):
         cfg.setdefault("xycoords", "axes fraction")
         ax.annotate(s, xy=xy, **cfg)
 
-
-def plot_1d(df, kind="line", yscale="lin"):
+def plot_1d(df, kind="line", yscale="lin", grid='both'):
     fig, ax = plt.subplots(1)
     df["sumw"].plot(kind=kind)
     ax.set_axisbelow(True)
-    plt.grid(True)
+    plt.grid(axis=grid)
     plt.yscale(yscale)
     return fig
 
 
 def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="Data / MC",
-               color="k", zorder=22, add_error=True):
+               color="k", zorder=22, add_error=True, grid='both'):
     # make sure both sides agree with the binning
     merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims")
     data = merged.filter(like="data", axis="columns").fillna(0)
@@ -589,7 +631,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2], ylabel="D
                  y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, alpha=0.7, zorder=zorder-1)
 
     ax.set_ylim(ylim)
-    ax.grid(True)
+    ax.grid(axis=grid)
     ax.set_axisbelow(True)
     ax.set_xlabel(x)
     ax.set_ylabel(ylabel)

From 14935621bd4c4509be10ec18c2752abb9ea1f4b4 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Fri, 5 Nov 2021 13:04:34 +0000
Subject: [PATCH 21/31] Update __main__.py

rm spurious print statements
---
 fast_plotter/__main__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 88a9fc6..fb945fd 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -174,7 +174,6 @@ def process_one_file(infile, args):
     weights = weighting_vars(df)
     legend_size = args.legend_size if hasattr(args, "legend_size") else 2
     ran_ok = True
-    print(vars(args))
     for weight in weights:
         if args.weights and weight not in args.weights:
             continue
@@ -230,7 +229,6 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
         if annotate_xlabel:
-            print(met_cats)
             x_ticks = [i for i in range(len(met_cats))]
             main_ax.set_xticks(x_ticks)
             main_ax.set_xticklabels(met_cats)

From 1f37ab94058370c0e6ace877d2e4bfa3e5458540 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 5 Nov 2021 17:55:25 +0000
Subject: [PATCH 22/31] rm print

---
 fast_plotter/__main__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 88a9fc6..e465b05 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -97,7 +97,6 @@ def recursive_replace(value, replacements):
             if isinstance(value, six.string_types):
                 return Template(value).safe_substitute(replacements)
             return value
-
         replacements = dict(args.variables)
         args = Namespace(**recursive_replace(vars(args), replacements))
 
@@ -174,7 +173,6 @@ def process_one_file(infile, args):
     weights = weighting_vars(df)
     legend_size = args.legend_size if hasattr(args, "legend_size") else 2
     ran_ok = True
-    print(vars(args))
     for weight in weights:
         if args.weights and weight not in args.weights:
             continue
@@ -230,7 +228,6 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
         if annotate_xlabel:
-            print(met_cats)
             x_ticks = [i for i in range(len(met_cats))]
             main_ax.set_xticks(x_ticks)
             main_ax.set_xticklabels(met_cats)

From 81568d5282b68bb72db60b4a397c2c4bce8948d2 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 5 Nov 2021 17:59:06 +0000
Subject: [PATCH 23/31] ratio plot grid options

---
 fast_plotter/plotting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index e80b2c9..64e4d4f 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -507,7 +507,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
         kwargs.setdefault("ratio_ylim", [0., 2.])
         kwargs.setdefault("ratio_ylabel", "Data / MC")
         plot_ratio(summed_data, summed_sims, x=x_axis,
-                   y=y, yerr=yerr, ax=summary_ax, error=error,
+                   y=y, yerr=yerr, ax=summary_ax, error=error, grid=grid,
                    ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"])
         if other_dset_args:
             for df, combine, style, label, var_name, other_dset_args in config:
@@ -523,7 +523,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                     if summed_data is not None:
                         plot_ratio(summed_data, summed_dset, x=x_axis,
                                    y=y, yerr=yerr, ax=summary_ax, error=error, zorder=21,
-                                   ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"],
+                                   ylim=kwargs["ratio_ylim"], ylabel=kwargs["ratio_ylabel"], grid=grid,
                                    color=color, add_error=add_error)
     else:
         raise RuntimeError(err_msg)

From 5574ba49b8d1cf830f4677b333506755412e311e Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Tue, 9 Nov 2021 18:42:46 +0000
Subject: [PATCH 24/31] Alternative regex for MR labels

---
 fast_plotter/plotting.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 64e4d4f..45427bb 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -22,20 +22,22 @@ def change_brightness(color, amount):
     c = colorsys.rgb_to_hls(*color)
     return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
 
-def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)"):
+def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
     df=df.reset_index()
-    met_cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[3:][0] for category in df['category'].unique()]
-    cats=[re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$"))).groups()[:3] for category in df['category'].unique()]
+    re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$")))
+    compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups()
+    met_cats=[compile_correct_regex(category)[3:][0] for category in df['category'].unique()]
+    cats=[compile_correct_regex(category)[:3] for category in df['category'].unique()]
     n_cats = len(cats)
     for i, cat in enumerate(cats):
         if i==0:
             a1,a2,a3=cat
             old_cat = cat
-            labels = {i:{0:{val.replace(" ",""):0}} for i,val in enumerate(cat)}
+            labels = {i:{0:{val.strip():0}} for i,val in enumerate(cat)}
         else:
            for j, val in enumerate(cat):
-               val = val.replace(" ", "")
-               if old_cat[j].replace(" ","") == val:
+               val = val.strip()
+               if old_cat[j].strip() == val:
                    continue
                else:
                   labels[j][i]={val:0}

From 72deea9ac38b6b48103c8726ce00c9de4109479a Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Tue, 12 Apr 2022 19:46:59 +0100
Subject: [PATCH 25/31] Update __main__.py

austoscaling updates
---
 fast_plotter/__main__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index e465b05..63d1f16 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -103,8 +103,9 @@ def recursive_replace(value, replacements):
     return args
 
 
-def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2):
+def autoscale_values(args, df_filtered, weight, ylim_lower=0.5, legend_size=2):
     if hasattr(args, "autoscale"):
+        legend_size = int(legend_size)
         data_rows = mask_rows(df_filtered,
                               regex=args.data,
                               level=args.dataset_col)
@@ -129,7 +130,6 @@ def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2):
                                 else legend_size if ylim_upper_floor > 2
                                 else legend_size)  # Buffer for legend
                     ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
-                    ylim_lower = 1e-1
                 else:
                     buffer_factor = 1 + 0.5*legend_size
                     ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y)))))  # Buffer for legend
@@ -255,4 +255,4 @@ def save_plots(infile, weight, plots, outdir, extensions):
 
 
 if __name__ == "__main__":
-    main()
+    main()                                 

From e040b8a60012e501af2c659ab2de536f06ddb1c3 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Tue, 12 Apr 2022 19:55:47 +0100
Subject: [PATCH 26/31] Update plotting.py

---
 fast_plotter/plotting.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 45427bb..59bbe7d 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -38,6 +38,8 @@ def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*
            for j, val in enumerate(cat):
                val = val.strip()
                if old_cat[j].strip() == val:
+                   if j == len(cat)-1:
+                      old_cat=cat
                    continue
                else:
                   labels[j][i]={val:0}
@@ -58,7 +60,7 @@ def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*
             label_str = list(len_dict.keys())[0]
             position = left_edge + (len_dict[label_str]/2)
             if label_str in label_positions[depth]:
-                label_positions[depth][label_str].append(position-0.5) 
+                label_positions[depth][label_str].append(position-0.5)
             else:
                 label_positions[depth][label_str] = [position-0.5]
 
@@ -69,7 +71,7 @@ def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*
                 x = (x+0.5)/n_cats
                 ax.text(x, y, label, fontsize=12-depth, transform=ax.transAxes, ha='center', weight='medium')
     return met_cats
-    
+
 def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset",
              yscale="log", lumi=None, annotations=[], dataset_order=None,
              continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral",
@@ -681,4 +683,5 @@ def draw(ax, method, x, ys, **kwargs):
     if ticks is not None:
         ax.set_xticks(x)
         ax.set_xticklabels(ticks)
-    return x, ticks
+    return x, ticks                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
+~                                           

From 293f158e434d5e83fe04e8d2e8205ef20eb41f79 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Wed, 13 Apr 2022 13:37:00 +0100
Subject: [PATCH 27/31] Update plotting.py

Remove accidental addition
---
 fast_plotter/plotting.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 59bbe7d..7ad175e 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -684,4 +684,3 @@ def draw(ax, method, x, ys, **kwargs):
         ax.set_xticks(x)
         ax.set_xticklabels(ticks)
     return x, ticks                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
-~                                           

From 19cc838362bccda27e55f15a63ae9ca8944ed959 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Tue, 16 Aug 2022 15:42:08 +0100
Subject: [PATCH 28/31] Customisation from config updates

---
 fast_plotter/__main__.py |  9 +++++++--
 fast_plotter/plotting.py | 19 +++++++++++--------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 63d1f16..36dfa51 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -140,7 +140,10 @@ def autoscale_values(args, df_filtered, weight, ylim_lower=0.5, legend_size=2):
                     ylim = args.limits['y'] if 'y' in args.limits else None
                 else:
                     ylim = None
-                df_aboveMin = df_filtered.copy()
+                if 'x' in args.autoscale:
+                    df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi]
+                else:
+                    df_aboveMin = df_filtered.copy()
             xcol = df_aboveMin.index.get_level_values(1)
             if 'x' in args.autoscale:  # Determine x-axis limits
                 if is_intervals(xcol):  # If x-axis is interval, take right and leftmost intervals unless they are inf
@@ -200,6 +203,7 @@ def process_one_file(infile, args):
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
         args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size)
+        print(args.limits)
         dress_main_plots(plots, **vars(args), df=df_filtered)
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok
@@ -215,6 +219,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
             main_ax.set_yscale(yscale)
         if ylabel:
             main_ax.set_ylabel(ylabel)
+        legend['ncol'] = int(legend['ncol'])
         main_ax.legend(**legend).set_zorder(20)
         main_ax.grid(axis=grid)
         main_ax.set_axisbelow(True)
@@ -255,4 +260,4 @@ def save_plots(infile, weight, plots, outdir, extensions):
 
 
 if __name__ == "__main__":
-    main()                                 
+    main()
diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 7ad175e..69688f5 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -22,12 +22,14 @@ def change_brightness(color, amount):
     c = colorsys.rgb_to_hls(*color)
     return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
 
-def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
+#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
+#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P<year>.*?(?=,\s)),\s(?P<category>.*?(?=,\s))(?P<dummy>()),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
+def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P<year>.*?(?=,\s)),\s(?P<category>.*?(?=,\s))(?P<dummy>()),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
     df=df.reset_index()
     re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$")))
     compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups()
-    met_cats=[compile_correct_regex(category)[3:][0] for category in df['category'].unique()]
-    cats=[compile_correct_regex(category)[:3] for category in df['category'].unique()]
+    met_cats=[compile_correct_regex(category)[3:][-1] for category in df[binning_col].unique()]
+    cats=[compile_correct_regex(category)[:3] for category in df[binning_col].unique()]
     n_cats = len(cats)
     for i, cat in enumerate(cats):
         if i==0:
@@ -60,18 +62,19 @@ def annotate_xlabel_vals(df, ax, regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*
             label_str = list(len_dict.keys())[0]
             position = left_edge + (len_dict[label_str]/2)
             if label_str in label_positions[depth]:
-                label_positions[depth][label_str].append(position-0.5)
+                label_positions[depth][label_str].append(position-0.5) 
             else:
                 label_positions[depth][label_str] = [position-0.5]
 
     for depth, label_dict in label_positions.items():
-        y = (0.80 - 0.05*(depth + 1))
+        #y = (0.80 - 0.05*(depth + 1))
+        y = (0.95 - 0.05*(depth + 1))
         for label, xvals in label_dict.items():
             for x in xvals:
                 x = (x+0.5)/n_cats
                 ax.text(x, y, label, fontsize=12-depth, transform=ax.transAxes, ha='center', weight='medium')
     return met_cats
-
+    
 def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dataset_col="dataset",
              yscale="log", lumi=None, annotations=[], dataset_order=None,
              continue_errors=True, bin_variable_replacements={}, colourmap="nipy_spectral",
@@ -201,7 +204,7 @@ def __call__(self, col, **kwargs):
                     color = "k"
                     width = self.linewidth
             else:
-                color = None
+                color = color
                 label = col.name
                 width = 2
                 style = "--"
@@ -683,4 +686,4 @@ def draw(ax, method, x, ys, **kwargs):
     if ticks is not None:
         ax.set_xticks(x)
         ax.set_xticklabels(ticks)
-    return x, ticks                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
+    return x, ticks

From a5383bc14614e147022a30c753e975ae1dca0186 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Tue, 16 Aug 2022 15:49:54 +0100
Subject: [PATCH 29/31] Update plotting.py

Revert local regex chages
---
 fast_plotter/plotting.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 69688f5..0daeac1 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -22,9 +22,7 @@ def change_brightness(color, amount):
     c = colorsys.rgb_to_hls(*color)
     return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
 
-#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
-#def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P<year>.*?(?=,\s)),\s(?P<category>.*?(?=,\s))(?P<dummy>()),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
-def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P<year>.*?(?=,\s)),\s(?P<category>.*?(?=,\s))(?P<dummy>()),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
+def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
     df=df.reset_index()
     re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$")))
     compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups()
@@ -62,13 +60,12 @@ def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P<year>.*?(?=,\s
             label_str = list(len_dict.keys())[0]
             position = left_edge + (len_dict[label_str]/2)
             if label_str in label_positions[depth]:
-                label_positions[depth][label_str].append(position-0.5) 
+                label_positions[depth][label_str].append(position-0.5)
             else:
                 label_positions[depth][label_str] = [position-0.5]
 
     for depth, label_dict in label_positions.items():
-        #y = (0.80 - 0.05*(depth + 1))
-        y = (0.95 - 0.05*(depth + 1))
+        y = (0.80 - 0.05*(depth + 1))
         for label, xvals in label_dict.items():
             for x in xvals:
                 x = (x+0.5)/n_cats

From 9004b36cad38f9e4a071eafad3b6586d7ba77645 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Tue, 16 Aug 2022 15:50:55 +0100
Subject: [PATCH 30/31] Update __main__.py

---
 fast_plotter/__main__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 36dfa51..47346c1 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -203,7 +203,6 @@ def process_one_file(infile, args):
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
         args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size)
-        print(args.limits)
         dress_main_plots(plots, **vars(args), df=df_filtered)
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From 9c65f73e3c746bcae590f0ce5412166806e3be87 Mon Sep 17 00:00:00 2001
From: DBAnthony <43857191+DBAnthony@users.noreply.github.com>
Date: Wed, 17 Aug 2022 12:14:06 +0100
Subject: [PATCH 31/31] Update plotting.py

---
 fast_plotter/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 0daeac1..43a8697 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -22,7 +22,7 @@ def change_brightness(color, amount):
     c = colorsys.rgb_to_hls(*color)
     return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
 
-def annotate_xlabel_vals(df, ax, binning_col='region', regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
+def annotate_xlabel_vals(df, ax, binning_col='category', regex="(?P<category>.*?(?=\s))\s(?P<multi1>\d.*?(?=\d))(?P<multi2>.*?(?=,\s)),\s(?P<MET>.*)", backup_regex="(?P<category>.*?(?=\,))(?P<dummy>()),\s(?P<MET>.*)"):
     df=df.reset_index()
     re_compiler = lambda category,regex: re.compile(regex).match(str(category.replace("$","").replace("\infty","$\infty$")))
     compile_correct_regex = lambda category: (re_compiler(category,regex) if re_compiler(category,regex) is not None else re_compiler(category,backup_regex)).groups()