From e4600ca0b12bbc572b74edf96bcff23cb9ba87e6 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 19:18:21 +0100 Subject: [PATCH 1/5] Optional autoscaling of axes --- fast_plotter/__main__.py | 80 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 54221e5..33bcc39 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -5,12 +5,15 @@ import six import logging import matplotlib +import numpy as np +import numbers +import pandas as pd matplotlib.use('Agg') matplotlib.rcParams.update({'figure.autolayout': True}) from .version import __version__ # noqa -from .utils import read_binned_df, weighting_vars # noqa +from .utils import read_binned_df, weighting_vars, binning_vars # noqa from .utils import decipher_filename, mask_rows # noqa -from .plotting import plot_all, add_annotations # noqa +from .plotting import plot_all, add_annotations, is_intervals # noqa logger = logging.getLogger("fast_plotter") @@ -100,11 +103,69 @@ def recursive_replace(value, replacements): return args +def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2): + if hasattr(args, "autoscale"): + if len(df_filtered.index.names) > 2: + logger.warn("Autoscaling not supported for multi-index dataframes") + limits = args.limits + else: + xcol = df_filtered.index.get_level_values(1) + if 'y' in args.autoscale: + if weight == "n": + max_y = df_filtered['sumw'].max() + else: + max_mc = df_filtered.loc[mc_rows, 'sumw'].max()*args.lumi + max_data = df_filtered.loc[data_rows, 'n'].max() if 'n' in df_filtered.columns else 0.1 + max_y = max(max_mc, max_data) + max_y = max_y if max_y >= 1 else 1 + if args.yscale == 'log': + ylim_upper_floor = int(np.floor(np.log10(max_y))) + y_buffer = (legend_size + 1 if ylim_upper_floor > 3 + else legend_size if ylim_upper_floor > 2 + else legend_size) # Buffer for legend + ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) + ylim_lower = 1e-1 + else: + buffer_factor = 1 + 0.5*legend_size + ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y))))) # Buffer for legend + ylim = [ylim_lower, ylim_upper] + df_aboveMin = df_filtered.loc[df_filtered['sumw'] > ylim_lower/args.lumi] + else: + if 'limits' in args: + ylim = args.limits['y'] if 'y' in args.limits else None + else: + ylim = None + df_aboveMin = df_filtered.copy() + if 'x' in args.autoscale: # Determine x-axis limits + if is_intervals(xcol): # If x-axis is interval, take right and leftmost intervals unless they are inf + max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max() + min_x = xcol.left.min() if np.isfinite(xcol.left.min()) else xcol.right.min() + if not np.isfinite(max_x) and hasattr(args, "show_over_underflow") and args.show_over_underflow: + logger.warn("Cannot autoscale overflow bin for x-axis. Removing.") + xlim = [min_x, max_x] + elif isinstance(xcol, numbers.Number): + xlim = [xcol.min, xcol.max] + else: + xlim = [-0.5, len(xcol.unique()) - 0.5] # For non-numeric x-axis (e.g. mtn range) + else: + if 'limits' in args: + xlim = args.limits['x'] if 'x' in args.limits else None + else: + xlim = None + + xlim = None if xlim is not None and np.NaN in xlim else xlim + ylim = None if ylim is not None and np.NaN in ylim else ylim + limits = {"x": xlim, "y": ylim} + else: + limits = args.limits if 'limits' in args else {} + return limits + def process_one_file(infile, args): logger.info("Processing: " + infile) df = read_binned_df(infile, dtype={args.dataset_col: str}) weights = weighting_vars(df) + legend_size = args.legend_size if hasattr(args, "legend_size") else 2 ran_ok = True for weight in weights: if args.weights and weight not in args.weights: @@ -115,13 +176,13 @@ def process_one_file(infile, args): df_filtered["sumw2"] = df_filtered.n else: if "n" in df.columns: - data_rows = mask_rows(df_filtered, - regex=args.data, - level=args.dataset_col) + data_rows_ungrouped = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) for col in df_filtered.columns: if col == "n": continue - df_filtered.loc[data_rows, col] = df["n"][data_rows] + df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped] df_filtered.columns = [ n.replace(weight + ":", "") for n in df_filtered.columns] if hasattr(args, "value_replacements"): @@ -130,8 +191,15 @@ def process_one_file(infile, args): continue df_filtered.rename(replacements, level=column, inplace=True, axis="index") df_filtered = df_filtered.groupby(level=df.index.names).sum() + data_rows = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) + mc_rows = mask_rows(df_filtered, + regex="^((?!"+args.data+").)*$", + level=args.dataset_col) plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok + args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size) dress_main_plots(plots, **vars(args)) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From a1043b48f3231056a6c3b2550c22629dbfa58671 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 19:23:23 +0100 Subject: [PATCH 2/5] pep8 compliance --- fast_plotter/__main__.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 33bcc39..edd52c0 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -103,6 +103,7 @@ def recursive_replace(value, replacements): return args + def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2): if hasattr(args, "autoscale"): if len(df_filtered.index.names) > 2: @@ -119,12 +120,12 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0 max_y = max(max_mc, max_data) max_y = max_y if max_y >= 1 else 1 if args.yscale == 'log': - ylim_upper_floor = int(np.floor(np.log10(max_y))) - y_buffer = (legend_size + 1 if ylim_upper_floor > 3 - else legend_size if ylim_upper_floor > 2 - else legend_size) # Buffer for legend - ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) - ylim_lower = 1e-1 + ylim_upper_floor = int(np.floor(np.log10(max_y))) + y_buffer = (legend_size + 1 if ylim_upper_floor > 3 + else legend_size if ylim_upper_floor > 2 + else legend_size) # Buffer for legend + ylim_upper = float('1e'+str(ylim_upper_floor+y_buffer)) + ylim_lower = 1e-1 else: buffer_factor = 1 + 0.5*legend_size ylim_upper = round(max_y*buffer_factor, -int(np.floor(np.log10(abs(max_y))))) # Buffer for legend @@ -199,7 +200,7 @@ def process_one_file(infile, args): level=args.dataset_col) plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok - args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size = legend_size) + args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size) dress_main_plots(plots, **vars(args)) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From 8cffd4e49f5d2c6ead03e1dd30ceb70e881ace9e Mon Sep 17 00:00:00 2001 From: David Anthony Date: Wed, 7 Jul 2021 19:36:45 +0100 Subject: [PATCH 3/5] pep8, fix xscaling --- fast_plotter/__main__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index edd52c0..cd13b18 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -7,7 +7,6 @@ import matplotlib import numpy as np import numbers -import pandas as pd matplotlib.use('Agg') matplotlib.rcParams.update({'figure.autolayout': True}) from .version import __version__ # noqa @@ -110,7 +109,6 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0 logger.warn("Autoscaling not supported for multi-index dataframes") limits = args.limits else: - xcol = df_filtered.index.get_level_values(1) if 'y' in args.autoscale: if weight == "n": max_y = df_filtered['sumw'].max() @@ -137,6 +135,7 @@ def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0 else: ylim = None df_aboveMin = df_filtered.copy() + xcol = df_aboveMin.index.get_level_values(1) if 'x' in args.autoscale: # Determine x-axis limits if is_intervals(xcol): # If x-axis is interval, take right and leftmost intervals unless they are inf max_x = xcol.right.max() if np.isfinite(xcol.right.max()) else xcol.left.max() From 0298d90805d1e652545abe6ce0e3db3414743e47 Mon Sep 17 00:00:00 2001 From: David Anthony Date: Fri, 9 Jul 2021 11:19:49 +0100 Subject: [PATCH 4/5] Cleaning up --- fast_plotter/__main__.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index cd13b18..00c4c4a 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -103,11 +103,17 @@ def recursive_replace(value, replacements): return args -def autoscale_values(args, df_filtered, weight, data_rows, mc_rows, ylim_lower=0.1, legend_size=2): +def autoscale_values(args, df_filtered, weight, ylim_lower=0.1, legend_size=2): if hasattr(args, "autoscale"): + data_rows = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) + mc_rows = mask_rows(df_filtered, + regex="^((?!"+args.data+").)*$", + level=args.dataset_col) if len(df_filtered.index.names) > 2: logger.warn("Autoscaling not supported for multi-index dataframes") - limits = args.limits + limits = args.limits if 'limits' in args else {} else: if 'y' in args.autoscale: if weight == "n": @@ -176,13 +182,13 @@ def process_one_file(infile, args): df_filtered["sumw2"] = df_filtered.n else: if "n" in df.columns: - data_rows_ungrouped = mask_rows(df_filtered, - regex=args.data, - level=args.dataset_col) + data_rows = mask_rows(df_filtered, + regex=args.data, + level=args.dataset_col) for col in df_filtered.columns: if col == "n": continue - df_filtered.loc[data_rows_ungrouped, col] = df["n"][data_rows_ungrouped] + df_filtered.loc[data_rows, col] = df["n"][data_rows] df_filtered.columns = [ n.replace(weight + ":", "") for n in df_filtered.columns] if hasattr(args, "value_replacements"): @@ -191,15 +197,9 @@ def process_one_file(infile, args): continue df_filtered.rename(replacements, level=column, inplace=True, axis="index") df_filtered = df_filtered.groupby(level=df.index.names).sum() - data_rows = mask_rows(df_filtered, - regex=args.data, - level=args.dataset_col) - mc_rows = mask_rows(df_filtered, - regex="^((?!"+args.data+").)*$", - level=args.dataset_col) plots, ok = plot_all(df_filtered, **vars(args)) ran_ok &= ok - args.limits = autoscale_values(args, df_filtered, weight, data_rows, mc_rows, legend_size=legend_size) + args.limits = autoscale_values(args, df_filtered, weight, legend_size=legend_size) dress_main_plots(plots, **vars(args)) save_plots(infile, weight, plots, args.outdir, args.extension) return ran_ok From 547312e509798f070c37a384a2ea29244b20889e Mon Sep 17 00:00:00 2001 From: David Anthony Date: Mon, 19 Jul 2021 19:41:54 +0100 Subject: [PATCH 5/5] Fix oversight in limit application --- fast_plotter/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 00c4c4a..5ff8eaa 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -221,6 +221,8 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, lims = map(float, lims) if axis.lower() in "xy": getattr(main_ax, "set_%slim" % axis)(*lims) + elif lims is None: + continue elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) if xtickrotation: