FAST-HEP · DBAnthony · Jul 6, 2021 · Jul 6, 2021 · Jul 6, 2021 · Jul 7, 2021
diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
@@ -5,12 +5,14 @@
 import six
 import logging
 import matplotlib
+import numpy as np
+import numbers
 matplotlib.use('Agg')
 matplotlib.rcParams.update({'figure.autolayout': True})
 from .version import __version__ # noqa
-from .utils import read_binned_df, weighting_vars # noqa
+from .utils import read_binned_df, weighting_vars, binning_vars # noqa
 from .utils import decipher_filename, mask_rows  # noqa
-from .plotting import plot_all, add_annotations # noqa
+from .plotting import plot_all, add_annotations, is_intervals, annotate_xlabel_vals # noqa
 
 
 logger = logging.getLogger("fast_plotter")
@@ -44,6 +46,7 @@ def arg_parser(args=None):
                         help="Scale the MC yields by this lumi")
     parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"],
                         help="Use this scale for the y-axis")
+    parser.add_argument("-a", "--annotate_xlabel", action="store_true", help="Split x-axis information onto plot")
     parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
 
     def split_equals(arg):
@@ -94,17 +97,84 @@ def recursive_replace(value, replacements):
             if isinstance(value, six.string_types):
                 return Template(value).safe_substitute(replacements)
             return value
-
         replacements = dict(args.variables)
         args = Namespace(**recursive_replace(vars(args), replacements))
 
     return args
 
 
+def autoscale_values(args, df_filtered, weight, ylim_lower=0.5, legend_size=2):
+    if hasattr(args, "autoscale"):
+        legend_size = int(legend_size)
+        data_rows = mask_rows(df_filtered,
+                              regex=args.data,
+                              level=args.dataset_col)
+        mc_rows = mask_rows(df_filtered,
+                            regex="^((?!"+args.data+").)*$",
+                            level=args.dataset_col)
+        if len(df_filtered.index.names) > 2:
+            logger.warn("Autoscaling not supported for multi-index dataframes")
+            limits = args.limits if 'limits' in args else {}
+        else:
+            if 'y' in args.autoscale:
+                if weight == "n":
+                    max_y = df_filtered['sumw'].max()
+                else:
+                    max_mc = df_filtered.loc[mc_rows, 'sumw'].max()*args.lumi
+                    max_data = df_filtered.loc[data_rows, 'n'].max() if 'n' in df_filtered.columns else 0.1
+                    max_y = max(max_mc, max_data)
+                max_y = max_y if max_y >= 1 else 1
+                if args.yscale == 'log':
+                    ylim_upper_floor = int(np.floor(np.log10(max_y)))
+                    y_buffer = (legend_size + 1 if ylim_upper_floor > 3
+                                else legend_size if ylim_upper_floor > 2
+                                else legend_size)  # Buffer for legend
+                    ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer))
+                else:
+                    buffer_factor = 1 + 0.5*legend_size
+                    ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y)))))  # Buffer for legend
+                ylim = [ylim_lower, ylim_upper]
+                df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi]
+            else:
+                if 'limits' in args:
+                    ylim = args.limits['y'] if 'y' in args.limits else None
+                else:
+                    ylim = None
+                if 'x' in args.autoscale:
+                    df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi]
+                else:
+                    df_aboveMin = df_filtered.copy()
+            xcol = df_aboveMin.index.get_level_values(1)
+            if 'x' in args.autoscale:  # Determine x-axis limits
+                if is_intervals(xcol):  # If x-axis is interval, take right and leftmost intervals unless they are inf
+                    max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max()
+                    min_x = xcol.left.min() if np.isfinite(xcol.left.min()) else xcol.right.min()
+                    if not np.isfinite(max_x) and hasattr(args, "show_over_underflow") and args.show_over_underflow:
+                        logger.warn("Cannot autoscale overflow bin for x-axis. Removing.")
+                    xlim = [min_x, max_x]
+                elif isinstance(xcol, numbers.Number):
+                    xlim = [xcol.min, xcol.max]
+                else:
+                    xlim = [-0.5, len(xcol.unique()) - 0.5]  # For non-numeric x-axis (e.g. mtn range)
+            else:
+                if 'limits' in args:
+                    xlim = args.limits['x'] if 'x' in args.limits else None
+                else:
+                    xlim = None
+
+            xlim = None if xlim is not None and np.NaN in xlim else xlim
+            ylim = None if ylim is not None and np.NaN in ylim else ylim
+            limits = {"x": xlim, "y": ylim}
+    else:
+        limits = args.limits if 'limits' in args else {}
+    return limits
+
+
 def process_one_file(infile, args):
     logger.info("Processing: " + infile)
     df = read_binned_df(infile, dtype={args.dataset_col: str})
     weights = weighting_vars(df)
+    legend_size = args.legend_size if hasattr(args, "legend_size") else 2
     ran_ok = True
     for weight in weights:
         if args.weights and weight not in args.weights:
@@ -132,29 +202,39 @@ def process_one_file(infile, args):
                 df_filtered = df_filtered.groupby(level=df.index.names).sum()
         plots, ok = plot_all(df_filtered, **vars(args))
         ran_ok &= ok
-        dress_main_plots(plots, **vars(args))
+        args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size)
+        dress_main_plots(plots, **vars(args), df=df_filtered)
         save_plots(infile, weight, plots, args.outdir, args.extension)
     return ran_ok
 
 
 def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
-                     limits={}, xtickrotation=None, **kwargs):
+                     limits={}, xtickrotation=None, df=None, annotate_xlabel=False, grid='both', **kwargs):
     for main_ax, summary_ax in plots.values():
-        add_annotations(annotations, main_ax)
+        add_annotations(annotations, main_ax, summary_ax)
+        if annotate_xlabel:
+            met_cats=annotate_xlabel_vals(df, main_ax)
         if yscale:
             main_ax.set_yscale(yscale)
         if ylabel:
             main_ax.set_ylabel(ylabel)
-        main_ax.legend(**legend)
-        main_ax.grid(True)
+        legend['ncol'] = int(legend['ncol'])
+        main_ax.legend(**legend).set_zorder(20)
+        main_ax.grid(axis=grid)
         main_ax.set_axisbelow(True)
         for axis, lims in limits.items():
             if isinstance(lims, (tuple, list)):
                 lims = map(float, lims)
                 if axis.lower() in "xy":
                     getattr(main_ax, "set_%slim" % axis)(*lims)
+            elif lims is None:
+                continue
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
+        if annotate_xlabel:
+            x_ticks = [i for i in range(len(met_cats))]
+            main_ax.set_xticks(x_ticks)
+            main_ax.set_xticklabels(met_cats)
         if xtickrotation:
             matplotlib.pyplot.xticks(rotation=xtickrotation)