diff --git a/lib/plotting_functions.py b/lib/plotting_functions.py index c980f9ab5..fa3a7f78f 100644 --- a/lib/plotting_functions.py +++ b/lib/plotting_functions.py @@ -15,8 +15,12 @@ Determine central longitude for maps. global_average(fld, wgt, verbose=False) pure numpy global average. -spatial_average(indata, weights=None, spatial_dims=None) +spatial_average_or_sum(indata, weights=None, spatial_dims=None, model_component=None, method="mean") + Compute spatial average or sum, depending on method +spatial_average(indata, weights=None, spatial_dims=None, model_component=None) Compute spatial average +spatial_sum(indata, weights=None, spatial_dims=None, model_component=None) + Compute spatial sum wgt_rmse(fld1, fld2, wgt): Calculate the area-weighted RMSE. annual_mean(data, whole_years=False, time_name='time'): @@ -38,7 +42,8 @@ Plots a vector field on a map. plot_map_and_save(wks, case_nickname, base_nickname, case_climo_yrs, baseline_climo_yrs, - mdlfld, obsfld, diffld, **kwargs): + mdlfld, obsfld, diffld, pctld, + model_component, **kwargs): Map plots of `mdlfld`, `obsfld`, and their difference, `diffld`. pres_from_hybrid(psfc, hya, hyb, p0=100000.): Converts a hybrid level to a pressure @@ -369,18 +374,36 @@ def global_average(fld, wgt, verbose=False): return np.ma.average(avg1) +def spatial_average(*args, **kwargs): + """ + Wrapper function for spatial_average_or_sum(..., method='mean') + """ + return spatial_average_or_sum(*args, **kwargs, method="mean") + +def spatial_sum(*args, **kwargs): + """ + Wrapper function for spatial_average_or_sum(..., method='sum') + """ + return spatial_average_or_sum(*args, **kwargs, method="sum") -def spatial_average(indata, weights=None, spatial_dims=None): +# TODO, should there be some unit conversions for this defined in a variable dictionary? +def spatial_average_or_sum( + indata, weights=None, spatial_dims=None, model_component=None, method="mean" +): """Compute spatial average. Parameters ---------- indata : xr.DataArray input data - weights : np.ndarray or xr.DataArray, optional + model_component: str + the model component whose outputs are being plotted + weights : np.ndarray or xr.DataArray, optional (except required for model_component=="lnd") the weights to apply, see Notes for default behavior spatial_dims : list, optional list of dimensions to average, see Notes for default behavior + method : str, optional + "mean" (default) for weighted mean, "sum" for weighted sum Returns ------- @@ -389,18 +412,27 @@ def spatial_average(indata, weights=None, spatial_dims=None): Notes ----- - When `weights` is not provided, tries to find sensible values. + When `weights` is not provided, tries to find sensible values if `method=="mean"`, else errors. If there is a 'lat' dimension, use `cos(lat)`. If there is a 'ncol' dimension, looks for `area` in `indata`. Otherwise, set to equal weights. Makes an attempt to identify the spatial variables when `spatial_dims` is None. - Will average over `ncol` if present, and then will check for `lat` and `lon`. + Will operate over `ncol` if present, and then will check for `lat` and `lon`. When none of those three are found, raise an AdfError. """ import warnings - if weights is None: + # Convert synonym + if method == "average": + method = "mean" + + if weights is None and method == "mean": + if method != "mean": + raise NotImplementedError( + f"Decide whether/how to set default weights for method '{method}'" + ) + #Calculate spatial weights: if 'lat' in indata.coords: weights = np.cos(np.deg2rad(indata.lat)) @@ -421,11 +453,13 @@ def spatial_average(indata, weights=None, spatial_dims=None): #End if #Apply weights to input data: - weighted = indata.weighted(weights) + weighted = indata.weighted(weights.fillna(0)) # we want to average over all non-time dimensions if spatial_dims is None: - if 'ncol' in indata.dims: + if model_component == 'lnd' and 'lndgrid' in indata.dims: + spatial_dims = ['lndgrid'] + elif model_component != 'lnd' and 'ncol' in indata.dims: spatial_dims = ['ncol'] else: spatial_dims = [dimname for dimname in indata.dims if (('lat' in dimname.lower()) or ('lon' in dimname.lower()))] @@ -435,63 +469,14 @@ def spatial_average(indata, weights=None, spatial_dims=None): #to be removed via the application of the mean. So in order to avoid #possibly unexpected behavior due to arrays being incorrectly dimensioned #(which could be difficult to debug) the ADF should die here: - emsg = "spatial_average: No spatial dimensions were identified," + emsg = "spatial_average_or_sum: No spatial dimensions were identified," emsg += " so can not perform average." raise AdfError(emsg) + if method == "sum": + return weighted.sum(dim=spatial_dims, keep_attrs=True) return weighted.mean(dim=spatial_dims, keep_attrs=True) -# TODO, maybe just adapt the spatial average above? -# TODO, should there be some unit conversions for this defined in a variable dictionary? -def spatial_average_lnd(indata, weights, spatial_dims=None): - """Compute spatial average. - - Parameters - ---------- - indata : xr.DataArray - input data - weights xr.DataArray - weights (area * landfrac) - spatial_dims : list, optional - list of dimensions to average, see Notes for default behavior - - Returns - ------- - xr.DataArray - weighted average of `indata` - - Notes - ----- - weights are required - - Makes an attempt to identify the spatial variables when `spatial_dims` is None. - Will average over `ncol` if present, and then will check for `lat` and `lon`. - When none of those three are found, raise an AdfError. - """ - import warnings - - #Apply weights to input data: - weighted = indata*weights - - # we want to average over all non-time dimensions - if spatial_dims is None: - if 'lndgrid' in indata.dims: - spatial_dims = ['lndgrid'] - else: - spatial_dims = [dimname for dimname in indata.dims if (('lat' in dimname.lower()) or - ('lon' in dimname.lower()))] - if not spatial_dims: - #Scripts using this function likely expect the horizontal dimensions - #to be removed via the application of the mean. So in order to avoid - #possibly unexpected behavior due to arrays being incorrectly dimensioned - #(which could be difficult to debug) the ADF should die here: - emsg = "spatial_average: No spatial dimensions were identified," - emsg += " so can not perform average." - raise AdfError(emsg) - - - return weighted.sum(dim=spatial_dims, keep_attrs=True) - def wgt_rmse(fld1, fld2, wgt): """Calculate the area-weighted RMSE. @@ -710,11 +695,11 @@ def domain_stats(data, domain, unstructured=False): Notes ----- Currently assumes 'lat' is a dimension and uses `cos(lat)` as weight. - Should use `spatial_average` + Should use `spatial_average_or_sum` See Also -------- - spatial_average + spatial_average_or_sum """ if not unstructured: @@ -1255,8 +1240,8 @@ def plot_map_vect_and_save(wks, case_nickname, base_nickname, def plot_map_and_save(wks, case_nickname, base_nickname, case_climo_yrs, baseline_climo_yrs, - mdlfld, obsfld, diffld, pctld, unstructured=False, - obs=False, **kwargs): + mdlfld, obsfld, diffld, pctld, model_component, + unstructured=False, obs=False, **kwargs): """This plots mdlfld, obsfld, diffld in a 3-row panel plot of maps. Parameters @@ -1279,6 +1264,8 @@ def plot_map_and_save(wks, case_nickname, base_nickname, input difference data pctld : xarray.DataArray input percent difference data + model_component : str + the model component whose outputs are being plotted kwargs : dict, optional variable-specific options, See Notes diff --git a/scripts/analysis/lmwg_table.py b/scripts/analysis/lmwg_table.py index 9d7d814b1..38e399ed9 100644 --- a/scripts/analysis/lmwg_table.py +++ b/scripts/analysis/lmwg_table.py @@ -272,7 +272,7 @@ def lmwg_table(adf): # Note: that could be 'lev' which should trigger different behavior # Note: we should be able to handle (lat, lon) or (ncol,) cases, at least # data = pf.spatial_average(data) # changes data "in place" - data = pf.spatial_average_lnd(data,weights) # hard code for land + data = pf.spatial_average(data, weights=weights, model_component="lnd") # TODO, make this optional for lmwg_tables of amwg_table # In order to get correct statistics, average to annual or seasonal data = pf.annual_mean(data, whole_years=True, time_name='time') diff --git a/scripts/plotting/global_latlon_map.py b/scripts/plotting/global_latlon_map.py index ebed009df..7ecbaba7c 100644 --- a/scripts/plotting/global_latlon_map.py +++ b/scripts/plotting/global_latlon_map.py @@ -347,6 +347,7 @@ def global_latlon_map(adfobj): [syear_cases[case_idx],eyear_cases[case_idx]], [syear_baseline,eyear_baseline], mseasons[s], oseasons[s], dseasons[s], pseasons[s], + model_component=adfobj.model_component, obs=adfobj.compare_obs, unstructured=unstructured, **vres) #Add plot to website (if enabled): @@ -391,6 +392,7 @@ def global_latlon_map(adfobj): [syear_baseline,eyear_baseline], mseasons[s].sel(lev=pres), oseasons[s].sel(lev=pres), dseasons[s].sel(lev=pres), pseasons[s].sel(lev=pres), + model_component=adfobj.model_component, obs=adfobj.compare_obs, unstructured=unstructured, **vres) #Add plot to website (if enabled): diff --git a/scripts/plotting/global_mean_timeseries.py b/scripts/plotting/global_mean_timeseries.py index 74bb3ee93..23c95bd4d 100644 --- a/scripts/plotting/global_mean_timeseries.py +++ b/scripts/plotting/global_mean_timeseries.py @@ -89,7 +89,7 @@ def global_mean_timeseries(adfobj): # End if # reference time series global average - ref_ts_da_ga = pf.spatial_average(ref_ts_da, weights=None, spatial_dims=None) + ref_ts_da_ga = pf.spatial_average(ref_ts_da, weights=None, spatial_dims=None, model_component=adfobj.model_component) # annually averaged ref_ts_da = pf.annual_mean(ref_ts_da_ga, whole_years=True, time_name="time") @@ -148,7 +148,7 @@ def global_mean_timeseries(adfobj): # End if # Gather spatial avg for test case - c_ts_da_ga = pf.spatial_average(c_ts_da) + c_ts_da_ga = pf.spatial_average(c_ts_da, model_component=adfobj.model_component) case_ts[labels[case_name]] = pf.annual_mean(c_ts_da_ga) # If this case is 3-d or missing variable, then break the loop and go to next variable diff --git a/scripts/plotting/global_mean_timeseries_lnd.py b/scripts/plotting/global_mean_timeseries_lnd.py index 7a1436df2..fbc6a9874 100644 --- a/scripts/plotting/global_mean_timeseries_lnd.py +++ b/scripts/plotting/global_mean_timeseries_lnd.py @@ -30,8 +30,11 @@ def global_mean_timeseries_lnd(adfobj): Include the CESM2 LENS result if it can be found. """ + model_component = "lnd" + #Notify user that script has started: - print("\n Generating global mean time series plots...") + msg = "\n Generating global mean time series plots..." + print(f"{msg}\n {'-' * (len(msg)-3)}") # Gather ADF configurations plot_loc = get_plot_loc(adfobj) @@ -41,6 +44,8 @@ def global_mean_timeseries_lnd(adfobj): # Loop over variables for field in adfobj.diag_var_list: + #Notify user of variable being plotted: + print(f"\t - time series plot for {field}") # Check res for any variable specific options that need to be used BEFORE going to the plot: if field in res: @@ -49,80 +54,78 @@ def global_mean_timeseries_lnd(adfobj): adfobj.debug_log(f"global_mean_timeseries: Found variable defaults for {field}") else: vres = {} - - # Extract variables: - # including a simpler way to get a dataset timeseries - baseline_name = adfobj.get_baseline_info("cam_case_name", required=True) - input_ts_baseline = Path(adfobj.get_baseline_info("cam_ts_loc", required=True)) - # TODO hard wired for single case name: - case_name = adfobj.get_cam_info("cam_case_name", required=True)[0] - input_ts_case = Path(adfobj.get_cam_info("cam_ts_loc", required=True)[0]) - - #Create list of time series files present for variable: - baseline_ts_filenames = f'{baseline_name}.*.{field}.*nc' - baseline_ts_files = sorted(input_ts_baseline.glob(baseline_ts_filenames)) - case_ts_filenames = f'{case_name}.*.{field}.*nc' - case_ts_files = sorted(input_ts_case.glob(case_ts_filenames)) - - ref_ts_ds = pf.load_dataset(baseline_ts_files) - weights = ref_ts_ds.landfrac * ref_ts_ds.area - ref_ts_da= ref_ts_ds[field] - - c_ts_ds = pf.load_dataset(case_ts_files) - c_weights = c_ts_ds.landfrac * c_ts_ds.area - c_ts_da= c_ts_ds[field] - - #Extract category (if available): - web_category = vres.get("category", None) - - # get variable defaults - scale_factor = vres.get('scale_factor', 1) - scale_factor_table = vres.get('scale_factor_table', 1) - add_offset = vres.get('add_offset', 0) - avg_method = vres.get('avg_method', 'mean') - if avg_method == 'mean': - weights = weights/weights.sum() - c_weights = c_weights/c_weights.sum() - # get units for variable - ref_ts_da.attrs['units'] = vres.get("new_unit", ref_ts_da.attrs.get('units', 'none')) - ref_ts_da.attrs['units'] = vres.get("table_unit", ref_ts_da.attrs.get('units', 'none')) - units = ref_ts_da.attrs['units'] - - # scale for plotting, if needed - ref_ts_da = ref_ts_da * scale_factor * scale_factor_table - ref_ts_da.attrs['units'] = units - c_ts_da = c_ts_da * scale_factor * scale_factor_table - c_ts_da.attrs['units'] = units + #End if + + if model_component == "lnd": + # Extract variables, get defaults, and scale for plotting + weights, ref_ts_da, c_weights, c_ts_da, units, avg_method = _extract_and_scale_vars(adfobj, field, vres) + else: + # reference time series (DataArray) + ref_ts_da = adfobj.data.load_reference_timeseries_da(field) + weights = None + units = vres.get("new_unit","[-]") + + base_name = adfobj.data.ref_case_label # Check to see if this field is available + validate_dims = False if ref_ts_da is None: - print( - f"\t Variable named {field} provides Nonetype. Skipping this variable" - ) - validate_dims = True - else: + if model_component == "lnd": + print( + f"\t WARNING: Variable {field} for case '{base_name}' provides Nonetype. Skipping this variable" + ) + validate_dims = True + else: + if not adfobj.compare_obs: + print( + f"\t WARNING: Variable {field} for case '{base_name}' provides Nonetype. Skipping this variable" + ) + continue + elif model_component == "lnd": validate_dims = False # reference time series global average # TODO, make this more general for land? - ref_ts_da_ga = pf.spatial_average_lnd(ref_ts_da, weights=weights) - c_ts_da_ga = pf.spatial_average_lnd(c_ts_da, weights=c_weights) + ref_ts_da_ga = pf.spatial_average_or_sum(ref_ts_da, weights=weights, model_component=adfobj.model_component, method=avg_method) + if model_component == "lnd": + c_ts_da_ga = pf.spatial_average_or_sum(c_ts_da, weights=c_weights, model_component=adfobj.model_component, method=avg_method) # annually averaged ref_ts_da = pf.annual_mean(ref_ts_da_ga, whole_years=True, time_name="time") - c_ts_da = pf.annual_mean(c_ts_da_ga, whole_years=True, time_name="time") + if model_component == "lnd": + c_ts_da = pf.annual_mean(c_ts_da_ga, whole_years=True, time_name="time") # check if variable has a lev dimension - has_lev_ref = pf.zm_validate_dims(ref_ts_da)[1] - if has_lev_ref: + has_lev_case = pf.zm_validate_dims(ref_ts_da)[1] + if has_lev_case: print( f"Variable named {field} has a lev dimension, which does not work with this script." ) continue + else: + # check data dimensions: + has_lat_ref, has_lev_ref = pf.zm_validate_dims(ref_ts_da) - ## SPECIAL SECTION -- CESM2 LENS DATA: - lens2_data = Lens2Data( - field - ) # Provides access to LENS2 dataset when available (class defined below) + # check if this is a "2-d" varaible: + if has_lev_ref: + print( + f"\t WARNING: Variable {field} has a lev dimension for '{base_name}', which does not work with this script." + ) + continue + # End if + + # check if there is a lat dimension: + if not has_lat_ref: + print( + f"\t WARNING: Variable {field} is missing a lat dimension for '{base_name}', cannot continue to plot." + ) + continue + # End if + + # reference time series global average + ref_ts_da_ga = pf.spatial_average(ref_ts_da, weights=None, spatial_dims=None, model_component=adfobj.model_component) + + # annually averaged + ref_ts_da = pf.annual_mean(ref_ts_da_ga, whole_years=True, time_name="time") # Loop over model cases: case_ts = {} # dictionary of annual mean, global mean time series @@ -136,42 +139,67 @@ def global_mean_timeseries_lnd(adfobj): ref_label = ( adfobj.data.ref_nickname if adfobj.data.ref_nickname - else adfobj.data.ref_case_label + else base_name ) skip_var = False for case_name in adfobj.data.case_names: - #c_ts_da = adfobj.data.load_timeseries_da(case_name, field) + if model_component != "lnd": + c_ts_da = adfobj.data.load_timeseries_da(case_name, field) if c_ts_da is None: print( - f"\t Variable named {field} provides Nonetype. Skipping this variable" + f"\t WARNING: Variable {field} for case '{case_name}' provides Nonetype. Skipping this variable" + ) skip_var = True continue # If no reference, we still need to check if this is a "2-d" varaible: - if validate_dims: - has_lat_ref, has_lev_ref = pf.zm_validate_dims(c_ts_da) + # check data dimensions: + if validate_dims or model_component != "lnd": + has_lat_case, has_lev_case = pf.zm_validate_dims(c_ts_da) # End if + if model_component != "lnd": + has_lat_case, has_lev_case = pf.zm_validate_dims(c_ts_da) + # If 3-d variable, notify user, flag and move to next test case - if has_lev_ref: + if has_lev_case: print( - f"Variable named {field} has a lev dimension for '{case_name}', which does not work with this script." + f"\t WARNING: Variable {field} has a lev dimension for '{case_name}', which does not work with this script." ) skip_var = True continue # End if + # check if there is a lat dimension: + if model_component != "lnd" and not has_lat_case: + print( + f"\t WARNING: Variable {field} is missing a lat dimension for '{case_name}', cannot continue to plot." + ) + skip_var = True + continue + # End if + # Gather spatial avg for test case - case_ts[labels[case_name]] = pf.annual_mean(c_ts_da_ga, whole_years=True, time_name="time") + if model_component != "lnd": + c_ts_da_ga = pf.spatial_average(c_ts_da, model_component=model_component) + case_ts[labels[case_name]] = pf.annual_mean(c_ts_da_ga) + else: + case_ts[labels[case_name]] = pf.annual_mean(c_ts_da_ga, whole_years=True, time_name="time") + # If this case is 3-d or missing variable, then break the loop and go to next variable if skip_var: continue + lens2_data = Lens2Data( + field + ) # Provides access to LENS2 dataset when available (class defined below) + + ## SPECIAL SECTION -- CESM2 LENS DATA: # Plot the timeseries fig, ax = make_plot( case_ts, lens2_data, label=adfobj.data.ref_nickname, ref_ts_da=ref_ts_da @@ -194,6 +222,53 @@ def global_mean_timeseries_lnd(adfobj): #Notify user that script has ended: print(" ... global mean time series plots have been generated successfully.") +def _extract_and_scale_vars(adfobj, field, vres): + """ + Extract variables, get defaults, and scale for plotting + """ + # Extract variables: + # including a simpler way to get a dataset timeseries + baseline_name = adfobj.get_baseline_info("cam_case_name", required=True) + input_ts_baseline = Path(adfobj.get_baseline_info("cam_ts_loc", required=True)) + # TODO hard wired for single case name: + case_name = adfobj.get_cam_info("cam_case_name", required=True)[0] + input_ts_case = Path(adfobj.get_cam_info("cam_ts_loc", required=True)[0]) + + #Create list of time series files present for variable: + baseline_ts_filenames = f'{baseline_name}.*.{field}.*nc' + baseline_ts_files = sorted(input_ts_baseline.glob(baseline_ts_filenames)) + case_ts_filenames = f'{case_name}.*.{field}.*nc' + case_ts_files = sorted(input_ts_case.glob(case_ts_filenames)) + + ref_ts_ds = pf.load_dataset(baseline_ts_files) + weights = ref_ts_ds.landfrac * ref_ts_ds.area + ref_ts_da= ref_ts_ds[field] + + c_ts_ds = pf.load_dataset(case_ts_files) + c_weights = c_ts_ds.landfrac * c_ts_ds.area + c_ts_da= c_ts_ds[field] + + #Extract category (if available): + web_category = vres.get("category", None) + + # get variable defaults + scale_factor = vres.get('scale_factor', 1) + scale_factor_table = vres.get('scale_factor_table', 1) + add_offset = vres.get('add_offset', 0) + avg_method = vres.get('avg_method', 'mean') + # get units for variable + ref_ts_da.attrs['units'] = vres.get("new_unit", ref_ts_da.attrs.get('units', 'none')) + ref_ts_da.attrs['units'] = vres.get("table_unit", ref_ts_da.attrs.get('units', 'none')) + units = ref_ts_da.attrs['units'] + + # scale for plotting, if needed + ref_ts_da = ref_ts_da * scale_factor * scale_factor_table + ref_ts_da.attrs['units'] = units + c_ts_da = c_ts_da * scale_factor * scale_factor_table + c_ts_da.attrs['units'] = units + + return weights,ref_ts_da,c_weights,c_ts_da,units,avg_method + # Helper/plotting functions ###########################