From e4600ca0b12bbc572b74edf96bcff23cb9ba87e6 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 19:18:21 +0100
Subject: [PATCH 1/5] Optional autoscaling of axes

---
 fast_plotter/__main__.py | 80 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 74 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 54221e5..33bcc39 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -5,12 +5,15 @@
 import six
 import logging
 import matplotlib
+import numpy as np
+import numbers
+import pandas as pd
 matplotlib.use('Agg')
 matplotlib.rcParams.update({'figure.autolayout': True})
 from .version import __version__ # noqa
-from .utils import read_binned_df, weighting_vars # noqa
+from .utils import read_binned_df, weighting_vars, binning_vars # noqa
 from .utils import decipher_filename, mask_rows  # noqa
-from .plotting import plot_all, add_annotations # noqa
+from .plotting import plot_all, add_annotations, is_intervals # noqa
 
 
 logger = logging.getLogger("fast_plotter")
@@ -100,11 +103,69 @@ def recursive_replace(value, replacements):
 
     return args
 
+def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2):
+    if hasattr(args, "autoscale"):
+        if len(df_filtered.index.names) > 2:
+            logger.warn("Autoscaling not supported for multi-index dataframes")
+            limits = args.limits
+        else:
+            xcol = df_filtered.index.get_level_values(1)
+            if 'y' in args.autoscale:
+                if weight == "n":
+                    max_y = df_filtered['sumw'].max()
+                else:
+                    max_mc = df_filtered.loc[mc_rows, 'sumw'].max()*args.lumi
+                    max_data = df_filtered.loc[data_rows, 'n'].max() if 'n' in df_filtered.columns else 0.1
+                    max_y = max(max_mc, max_data)
+                max_y = max_y if max_y >= 1 else 1
+                if args.yscale == 'log':
+                   ylim_upper_floor = int(np.floor(np.log10(max_y)))
+                   y_buffer = (legend_size + 1 if ylim_upper_floor > 3
+                               else legend_size if ylim_upper_floor > 2
+                               else legend_size)  # Buffer for legend
+                   ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
+                   ylim_lower = 1e-1
+                else:
+                    buffer_factor = 1 + 0.5*legend_size
+                    ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y)))))  # Buffer for legend
+                ylim = [ylim_lower, ylim_upper]
+                df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi]
+            else:
+                if 'limits' in args:
+                    ylim = args.limits['y'] if 'y' in args.limits else None
+                else:
+                    ylim = None
+                df_aboveMin = df_filtered.copy()
+            if 'x' in args.autoscale:  # Determine x-axis limits
+                if is_intervals(xcol):  # If x-axis is interval, take right and leftmost intervals unless they are inf
+                    max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max()
+                    min_x = xcol.left.min() if np.isfinite(xcol.left.min()) else xcol.right.min()
+                    if not np.isfinite(max_x) and hasattr(args, "show_over_underflow") and args.show_over_underflow:
+                        logger.warn("Cannot autoscale overflow bin for x-axis. Removing.")
+                    xlim = [min_x, max_x]
+                elif isinstance(xcol, numbers.Number):
+                    xlim = [xcol.min, xcol.max]
+                else:
+                    xlim = [-0.5, len(xcol.unique()) - 0.5]  # For non-numeric x-axis (e.g. mtn range)
+            else:
+                if 'limits' in args:
+                    xlim = args.limits['x'] if 'x' in args.limits else None
+                else:
+                    xlim = None
+
+            xlim = None if xlim is not None and np.NaN in xlim else xlim
+            ylim = None if ylim is not None and np.NaN in ylim else ylim
+            limits = {"x": xlim, "y": ylim}
+    else:
+        limits = args.limits if 'limits' in args else {}
+    return limits
+
 
 def process_one_file(infile, args):
     logger.info("Processing: " + infile)
     df = read_binned_df(infile, dtype={args.dataset_col: str})
     weights = weighting_vars(df)
+    legend_size = args.legend_size if hasattr(args, "legend_size") else 2
     ran_ok = True
     for weight in weights:
         if args.weights and weight not in args.weights:
@@ -115,13 +176,13 @@ def process_one_file(infile, args):
             df_filtered["sumw2"] = df_filtered.n
         else:
             if "n" in df.columns:
-                data_rows = mask_rows(df_filtered,
-                                      regex=args.data,
-                                      level=args.dataset_col)
+                data_rows_ungrouped = mask_rows(df_filtered,
+                                                regex=args.data,
+                                                level=args.dataset_col)
                 for col in df_filtered.columns:
                     if col == "n":
                         continue
-                    df_filtered.loc[data_rows, col] = df["n"][data_rows]
+                    df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped]
             df_filtered.columns = [
                 n.replace(weight + ":", "") for n in df_filtered.columns]
         if hasattr(args, "value_replacements"):
@@ -130,8 +191,15 @@ def process_one_file(infile, args):
                     continue
                 df_filtered.rename(replacements, level=column, inplace=True, axis="index")
                 df_filtered = df_filtered.groupby(level=df.index.names).sum()
+        data_rows = mask_rows(df_filtered,
+                              regex=args.data,
+                              level=args.dataset_col)
+        mc_rows = mask_rows(df_filtered,
+                            regex="^((?!"+args.data+").)*$",
+                            level=args.dataset_col)
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
+        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size)
         dress_main_plots(plots, **vars(args))
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From a1043b48f3231056a6c3b2550c22629dbfa58671 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 19:23:23 +0100
Subject: [PATCH 2/5] pep8 compliance

---
 fast_plotter/__main__.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 33bcc39..edd52c0 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -103,6 +103,7 @@ def recursive_replace(value, replacements):
 
     return args
 
+
 def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2):
     if hasattr(args, "autoscale"):
         if len(df_filtered.index.names) > 2:
@@ -119,12 +120,12 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0
                     max_y = max(max_mc, max_data)
                 max_y = max_y if max_y >= 1 else 1
                 if args.yscale == 'log':
-                   ylim_upper_floor = int(np.floor(np.log10(max_y)))
-                   y_buffer = (legend_size + 1 if ylim_upper_floor > 3
-                               else legend_size if ylim_upper_floor > 2
-                               else legend_size)  # Buffer for legend
-                   ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
-                   ylim_lower = 1e-1
+                    ylim_upper_floor = int(np.floor(np.log10(max_y)))
+                    y_buffer = (legend_size + 1 if ylim_upper_floor > 3
+                                else legend_size if ylim_upper_floor > 2
+                                else legend_size)  # Buffer for legend
+                    ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
+                    ylim_lower = 1e-1
                 else:
                     buffer_factor = 1 + 0.5*legend_size
                     ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y)))))  # Buffer for legend
@@ -199,7 +200,7 @@ def process_one_file(infile, args):
                             level=args.dataset_col)
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
-        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size)
+        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size)
         dress_main_plots(plots, **vars(args))
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From 8cffd4e49f5d2c6ead03e1dd30ceb70e881ace9e Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Wed, 7 Jul 2021 19:36:45 +0100
Subject: [PATCH 3/5] pep8, fix xscaling

---
 fast_plotter/__main__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index edd52c0..cd13b18 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -7,7 +7,6 @@
 import matplotlib
 import numpy as np
 import numbers
-import pandas as pd
 matplotlib.use('Agg')
 matplotlib.rcParams.update({'figure.autolayout': True})
 from .version import __version__ # noqa
@@ -110,7 +109,6 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0
             logger.warn("Autoscaling not supported for multi-index dataframes")
             limits = args.limits
         else:
-            xcol = df_filtered.index.get_level_values(1)
             if 'y' in args.autoscale:
                 if weight == "n":
                     max_y = df_filtered['sumw'].max()
@@ -137,6 +135,7 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0
                 else:
                     ylim = None
                 df_aboveMin = df_filtered.copy()
+            xcol = df_aboveMin.index.get_level_values(1)
             if 'x' in args.autoscale:  # Determine x-axis limits
                 if is_intervals(xcol):  # If x-axis is interval, take right and leftmost intervals unless they are inf
                     max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max()

From 0298d90805d1e652545abe6ce0e3db3414743e47 Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Fri, 9 Jul 2021 11:19:49 +0100
Subject: [PATCH 4/5] Cleaning up

---
 fast_plotter/__main__.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index cd13b18..00c4c4a 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -103,11 +103,17 @@ def recursive_replace(value, replacements):
     return args
 
 
-def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2):
+def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2):
     if hasattr(args, "autoscale"):
+        data_rows = mask_rows(df_filtered,
+                              regex=args.data,
+                              level=args.dataset_col)
+        mc_rows = mask_rows(df_filtered,
+                            regex="^((?!"+args.data+").)*$",
+                            level=args.dataset_col)
         if len(df_filtered.index.names) > 2:
             logger.warn("Autoscaling not supported for multi-index dataframes")
-            limits = args.limits
+            limits = args.limits if 'limits' in args else {}
         else:
             if 'y' in args.autoscale:
                 if weight == "n":
@@ -176,13 +182,13 @@ def process_one_file(infile, args):
             df_filtered["sumw2"] = df_filtered.n
         else:
             if "n" in df.columns:
-                data_rows_ungrouped = mask_rows(df_filtered,
-                                                regex=args.data,
-                                                level=args.dataset_col)
+                data_rows = mask_rows(df_filtered,
+                                      regex=args.data,
+                                      level=args.dataset_col)
                 for col in df_filtered.columns:
                     if col == "n":
                         continue
-                    df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped]
+                    df_filtered.loc[data_rows, col] = df["n"][data_rows]
             df_filtered.columns = [
                 n.replace(weight + ":", "") for n in df_filtered.columns]
         if hasattr(args, "value_replacements"):
@@ -191,15 +197,9 @@ def process_one_file(infile, args):
                     continue
                 df_filtered.rename(replacements, level=column, inplace=True, axis="index")
                 df_filtered = df_filtered.groupby(level=df.index.names).sum()
-        data_rows = mask_rows(df_filtered,
-                              regex=args.data,
-                              level=args.dataset_col)
-        mc_rows = mask_rows(df_filtered,
-                            regex="^((?!"+args.data+").)*$",
-                            level=args.dataset_col)
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
-        args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size)
+        args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size)
         dress_main_plots(plots, **vars(args))
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok

From 547312e509798f070c37a384a2ea29244b20889e Mon Sep 17 00:00:00 2001
From: David Anthony <danthony@cern.ch>
Date: Mon, 19 Jul 2021 19:41:54 +0100
Subject: [PATCH 5/5] Fix oversight in limit application

---
 fast_plotter/__main__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 00c4c4a..5ff8eaa 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -221,6 +221,8 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
                 lims = map(float, lims)
                 if axis.lower() in "xy":
                     getattr(main_ax, "set_%slim" % axis)(*lims)
+            elif lims is None:
+                continue
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
         if xtickrotation: