From d6233157d1b2949f85798454334d23c0e6d6470e Mon Sep 17 00:00:00 2001 From: paul-rigby Date: Sat, 21 Sep 2013 13:03:57 +0000 Subject: [PATCH 1/3] Added imosAccumarray.m, a wrapper to allow matlab function accumarray to be used more easily with structures created by ncParse.m --- MATLAB_R2011/commons/NetCDF/imosAccumarray.m | 220 +++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 MATLAB_R2011/commons/NetCDF/imosAccumarray.m diff --git a/MATLAB_R2011/commons/NetCDF/imosAccumarray.m b/MATLAB_R2011/commons/NetCDF/imosAccumarray.m new file mode 100644 index 0000000..4a15f8e --- /dev/null +++ b/MATLAB_R2011/commons/NetCDF/imosAccumarray.m @@ -0,0 +1,220 @@ +function obj = imosAccumarray(obj,unitOfTime, varargin) +%%Facilitates use of inbuilt function accumarray with imos timeseries +% data imported by ncParse. +% +% Elements from a data set are grouped by the input time interval +% and a function is applied to each group ('nanmean' by default). +% This function can be used to compute bin averages and statistical +% summaries for a time series. +% +% TODO force use of nan median? +% Data with Missing Values +% nancov Covariance ignoring NaN values +% nanmax Maximum ignoring NaN values +% nanmean Mean ignoring NaN values +% nanmedian Median ignoring NaN values +% nanmin Minimum ignoring NaN values +% nanstd Standard deviation ignoring NaN values +% nansum Sum ignoring NaN values +% nanvar Variance, ignoring NaN values +% +% +% +%Inputs: +% obj a structure created using ncParse +% unitOfTime 'second', 'minute', 'hour', 'day','month', or 'year'. +% the interval periods are taken to be from the start +% of each unit of measure. +% e.g. specifying 'day', the interval will be +% from 00:00:00 on day n to 23:59:59 on day n+1 +% +% Optional arguments: +% +% [fun],[vars] one or more couplets of a function name fun plus a list of +% variables to apply that function to. +% [fun] any function can be used provided it accepts a vector and +% returns a scalar value. e.g. inbuilt functions +% 'mean','mode','median','max','min','sum' etc. A user defined +% function can also be used if in it's own file and known to +% the matlab path. Default is mean. +% [vars] optional cell array of variable names, if not present 'fun' +% will be applied to all variables. Vars must be specified +% if more than one [fun],[vars] couplet is used. +% 'flags',[flags] vector of qc flags. Only data points with these values will +% be included when 'fun' is applied. The mode of the flags of +% the included data points is taken as the qc flag for the +% new data set. +% +% Output: +% A copy of obj with data fields replaced with the modified time series +% the long_name variable attribute is edited to reflect the function and +% time interval that has been applied. +% +% Examples: +% +% D2 = imosAccumarray(D1,'day','range',{'TEMP','PRESSURE'},... +% 'median',{TURB},'flags',[0 1]); +% Computes daily range for TEMP and PRESSURE, daily median for 'TURB' and +% mean (default) for all other variables. Ignores any datapoints that are not +% flagged with a 1 or 0. +% +% D2 = imosAccumarray(D1,'month','max','flags',[0 1]); +% Computes monthly maximum for all variables ignoring any datapoints that are not +% flagged with a 1 or 0. +% +% Other m-files required: none +% Other files required: none +% Subfunctions: none +% MAT-files required: none +% +% See also: ncParse, outputCSV +% +% Author: Paul Rigby, AIMS/IMOS +% email: p.rigby@aims.gov.au +% Website: http://imos.org.au/ +% Sept 2013; Last revision: 3-Sep-2013 +% +% Copyright 2013 IMOS +% The script is distributed under the terms of the GNU General Public License + +%TODO make sure this doesn't fall over with adcp files e.g. check for 1d +%Could have more averaging options e.g. 6 monthly + +%% Deal with the optional input arguments + +% Set defaults: +flagsToInclude = 0:10; + +% Create a lookup table to map function to variable, default is nanmean +varFunMap = fieldnames(obj.variables); +for i=1:length(varFunMap); + varFunMap{i,2} = 'nanmean'; +end + +if ~isempty(varargin) + for i=1:length(varargin) + if strfind(varargin{i},'flag') %use strfind so flag or flags is ok + %the next entry should be a vector + if isnumeric(varargin{i+1}) + flagsToInclude = varargin{i+1}; + else + error('flags option should be followed by a vector of flags to include.') + end + %remove this couplet from the list + varargin(i:i+1) = []; + break + end + end +end + +if length(varargin) == 1 + for i=1:length(varFunMap); + varFunMap{i,2} = varargin{:}; + end +elseif length(varargin) > 1 + for i = 1:2:length(varargin) + for j = 1:length(varargin{i+1}) + for k = 1:size(varFunMap,1) + if strcmp(varargin{i+1}{j},varFunMap{k,1}) + varFunMap{k,2} = varargin{i}; + end + end + end + end +end + +%debugging, delete me later +disp(varFunMap) +disp(flagsToInclude) + +%% TIME dimension +% FIXME dimension can be time or TIME - make case insensitive? + +%convert to a datevec. +D = datevec(obj.dimensions.TIME.data); + +%enumerate the unitOfTime so it represents a datevec index +%also add a few aliases. +switch unitOfTime + case {'years','year','yearly','Y',1}; Ti = 1; + case {'months','month','monthly','M',2}; Ti = 2; + case {'days','day','daily','D',3}; Ti = 3; + case {'hours','hour','hourly','h',4}; Ti = 4; + case {'minutes','minute','m',5}; Ti = 5; + case {'seconds','second','s',6}; Ti = 6; + otherwise + error('Unit of time not recognised'); +end + +% force a consistent tag so that output files are standard +tags = {'yearly','monthly','daily','hourly','minute','second'}; + +%Find elements in the date vector that are unique up +%until our chosen unit of time. +[uVec, ~, idx] = unique(D(:,1:Ti),'rows'); + +%compute a midpoint based upon the time range within each interval e.g. min + range/2 +intervalMidpoint = accumarray(idx,obj.dimensions.TIME.data,[],@(x) {min(x)+0.5*range(x)}); + +%replace the original data with the new time intervals +obj.dimensions.TIME.data = cell2mat(intervalMidpoint); +obj.dimensions.TIME.name = ['TIME','_',tags{Ti},'_interval_midpoint']; + +%% Variables +vars = fieldnames(obj.variables); + +for i = 1:size(varFunMap,1) + var = varFunMap{i,1}; + fun = varFunMap{i,2}; + try + flags = obj.variables.(var).flag; + %convert to single as int8 trips up mode function (and + %others) + flags = single(flags); + qcAvailable = true; + disp(['QC flags found for variable ',var]); + %get the index of flags that are not on our list + flagsToDiscard = setdiff(flags,flagsToInclude); + badIndex = find(ismember(flags,flagsToDiscard)); + fprintf('Data points discarded: %i \n',length(find(badIndex))); + fprintf('Total flags: %i \n',length(flags)); + catch e + warning(['No QC flags found for variable',var]); + qcAvailable = false; + end + + if sum(sum(flags))==0 + warning(['All QC flags are zero for variable',vars{i}]) + end + + originalData = obj.variables.(vars{i}).data; + if qcAvailable == false + badIndex = []; + end + + %Mark as nan the data points with qc that we want to remove. + originalData(badIndex) = NaN; + + %build an inline function call for accumarray + + inlineFun = ['@(x) {',fun,'(x)}']; + + outputData = accumarray(idx,originalData,[],eval(inlineFun)); + outputData = cell2mat(outputData); + + %replace the original data in the structure + obj.variables.(var).data = outputData; + obj.variables.(var).name = [obj.variables.(var).name,'_',tags{Ti},'_',fun]; + + %In the same manner, roll up the flags using 'mode' to + %give an *indication* of the quality of the processed data. + %'mode' is already resiliant to NaN + flags(badIndex) = NaN; + if qcAvailable + averagedQcFlags = accumarray(idx,flags,[],@(x) {mode(x)}); + averagedQcFlags = cell2mat(averagedQcFlags); + %replace flags with averaged flags + obj.variables.(var).flag = averagedQcFlags; + end +end +end \ No newline at end of file From 3ec9f600e86e51906e8cf86222ec282a7c001ff4 Mon Sep 17 00:00:00 2001 From: Paul Rigby Date: Mon, 23 Sep 2013 14:55:04 +1000 Subject: [PATCH 2/3] Added sub-function to make mean, median etc resilient to NaN --- MATLAB_R2011/commons/NetCDF/imosAccumarray.m | 23 +++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/MATLAB_R2011/commons/NetCDF/imosAccumarray.m b/MATLAB_R2011/commons/NetCDF/imosAccumarray.m index 4a15f8e..9d5d60f 100644 --- a/MATLAB_R2011/commons/NetCDF/imosAccumarray.m +++ b/MATLAB_R2011/commons/NetCDF/imosAccumarray.m @@ -85,10 +85,10 @@ % Set defaults: flagsToInclude = 0:10; -% Create a lookup table to map function to variable, default is nanmean +% Create a lookup table to map function to variable, default is mean varFunMap = fieldnames(obj.variables); for i=1:length(varFunMap); - varFunMap{i,2} = 'nanmean'; + varFunMap{i,2} = 'mean'; end if ~isempty(varargin) @@ -197,9 +197,9 @@ %build an inline function call for accumarray - inlineFun = ['@(x) {',fun,'(x)}']; + %inlineFun = ['@(x) {',fun,'(x)}']; - outputData = accumarray(idx,originalData,[],eval(inlineFun)); + outputData = accumarray(idx,originalData,[],@(x) {executeFunction(x,fun)}); outputData = cell2mat(outputData); %replace the original data in the structure @@ -217,4 +217,17 @@ obj.variables.(var).flag = averagedQcFlags; end end -end \ No newline at end of file +end + +function y = executeFunction(x,fun) + %The NaNs are necessary to keep the array structure, but + %we need to remove them before executing the function as they will + %cause functions such as mean to return NaN + x(isnan(x)) = []; + y = eval([fun,'(x)']); + if isempty(y) + y = single(NaN); + end +end + + From 808b763122bd8da0ea5e9aef6ea34e23b41e63e1 Mon Sep 17 00:00:00 2001 From: paul-rigby Date: Thu, 17 Oct 2013 17:03:28 +1000 Subject: [PATCH 3/3] Added a demo script for imosAccumarray --- MATLAB_R2011/commons/NetCDF/imosAccumarray.m | 37 ++-------- MATLAB_R2011/demos/demo_imosAccumarray.m | 77 ++++++++++++++++++++ 2 files changed, 82 insertions(+), 32 deletions(-) create mode 100644 MATLAB_R2011/demos/demo_imosAccumarray.m diff --git a/MATLAB_R2011/commons/NetCDF/imosAccumarray.m b/MATLAB_R2011/commons/NetCDF/imosAccumarray.m index 9d5d60f..83a4f1a 100644 --- a/MATLAB_R2011/commons/NetCDF/imosAccumarray.m +++ b/MATLAB_R2011/commons/NetCDF/imosAccumarray.m @@ -3,23 +3,10 @@ % data imported by ncParse. % % Elements from a data set are grouped by the input time interval -% and a function is applied to each group ('nanmean' by default). +% and a function is applied to each group ('mean' by default). % This function can be used to compute bin averages and statistical % summaries for a time series. % -% TODO force use of nan median? -% Data with Missing Values -% nancov Covariance ignoring NaN values -% nanmax Maximum ignoring NaN values -% nanmean Mean ignoring NaN values -% nanmedian Median ignoring NaN values -% nanmin Minimum ignoring NaN values -% nanstd Standard deviation ignoring NaN values -% nansum Sum ignoring NaN values -% nanvar Variance, ignoring NaN values -% -% -% %Inputs: % obj a structure created using ncParse % unitOfTime 'second', 'minute', 'hour', 'day','month', or 'year'. @@ -35,7 +22,7 @@ % [fun] any function can be used provided it accepts a vector and % returns a scalar value. e.g. inbuilt functions % 'mean','mode','median','max','min','sum' etc. A user defined -% function can also be used if in it's own file and known to +% function can also be used if in its own file and known to % the matlab path. Default is mean. % [vars] optional cell array of variable names, if not present 'fun' % will be applied to all variables. Vars must be specified @@ -72,14 +59,11 @@ % Author: Paul Rigby, AIMS/IMOS % email: p.rigby@aims.gov.au % Website: http://imos.org.au/ -% Sept 2013; Last revision: 3-Sep-2013 +% Oct 2013; % % Copyright 2013 IMOS % The script is distributed under the terms of the GNU General Public License -%TODO make sure this doesn't fall over with adcp files e.g. check for 1d -%Could have more averaging options e.g. 6 monthly - %% Deal with the optional input arguments % Set defaults: @@ -123,10 +107,6 @@ end end -%debugging, delete me later -disp(varFunMap) -disp(flagsToInclude) - %% TIME dimension % FIXME dimension can be time or TIME - make case insensitive? @@ -151,7 +131,7 @@ %Find elements in the date vector that are unique up %until our chosen unit of time. -[uVec, ~, idx] = unique(D(:,1:Ti),'rows'); +[~, ~, idx] = unique(D(:,1:Ti),'rows'); %compute a midpoint based upon the time range within each interval e.g. min + range/2 intervalMidpoint = accumarray(idx,obj.dimensions.TIME.data,[],@(x) {min(x)+0.5*range(x)}); @@ -172,12 +152,9 @@ %others) flags = single(flags); qcAvailable = true; - disp(['QC flags found for variable ',var]); %get the index of flags that are not on our list flagsToDiscard = setdiff(flags,flagsToInclude); - badIndex = find(ismember(flags,flagsToDiscard)); - fprintf('Data points discarded: %i \n',length(find(badIndex))); - fprintf('Total flags: %i \n',length(flags)); + badIndex = find(ismember(flags,flagsToDiscard)); catch e warning(['No QC flags found for variable',var]); qcAvailable = false; @@ -195,10 +172,6 @@ %Mark as nan the data points with qc that we want to remove. originalData(badIndex) = NaN; - %build an inline function call for accumarray - - %inlineFun = ['@(x) {',fun,'(x)}']; - outputData = accumarray(idx,originalData,[],@(x) {executeFunction(x,fun)}); outputData = cell2mat(outputData); diff --git a/MATLAB_R2011/demos/demo_imosAccumarray.m b/MATLAB_R2011/demos/demo_imosAccumarray.m new file mode 100644 index 0000000..276e17b --- /dev/null +++ b/MATLAB_R2011/demos/demo_imosAccumarray.m @@ -0,0 +1,77 @@ +%% Example use of imosAccumarray +% +% Author: Paul Rigby, AIMS/IMOS +% email: p.rigby@aims.gov.au +% Website: http://imos.org.au/ https://github.com/aodn/imos_user_code_library +% Sep 2013; Last revision: 3-Sep-2013 +% +% Copyright 2013 IMOS +% The script is distributed under the terms of the GNU General Public License + +URL = 'http://thredds.aodn.org.au/thredds/dodsC/IMOS/ANMN/NRS/NRSYON/Biogeochem_timeseries/IMOS_ANMN-NRS_KOSTUZ_20080623T004500Z_NRSYON_FV01_NRSYON-0806-WQM-5_END-20080916T041400Z_C-20130310T112118Z.nc'; +D = ncParse(URL); + +%% Compare different (mean) averaging intervals for TEMP on one plot +month_av = imosAccumarray(D,'month','flags',[0 1],... + 'median',{'TURB','CHLU'}); +day_av = imosAccumarray(D,'day','flags',[0 1],... + 'median',{'TURB','CHLU'}); +hour_av = imosAccumarray(D,'hour','flags',[0 1],... + 'median',{'TURB','CHLU'}); + +figure(1);cla +hold on +plot(month_av.dimensions.TIME.data,month_av.variables.TEMP.data,'r-*') +plot(day_av.dimensions.TIME.data,day_av.variables.TEMP.data,'g-x') +plot(hour_av.dimensions.TIME.data,hour_av.variables.TEMP.data,'b') +datetick('x','yy-mm-dd','keepticks') +axis tight +hold off +title(D.metadata.title) +xlabel(D.dimensions.TIME.standard_name) +ylabel(strrep([D.variables.TEMP.long_name ' in ' D.variables.TEMP.units],'_',' ')) +legend({month_av.variables.TEMP.name,day_av.variables.TEMP.name,hour_av.variables.TEMP.name},'interpreter','none') + +%% Compare different (median) averaging intervals for TURB on one plot + +figure(2);cla +hold on +plot(month_av.dimensions.TIME.data,month_av.variables.TURB.data,'r-*') +plot(day_av.dimensions.TIME.data,day_av.variables.TURB.data,'g-x') +plot(hour_av.dimensions.TIME.data,hour_av.variables.TURB.data,'b') +datetick('x','yy-mm-dd','keepticks') +axis tight +hold off +title(D.metadata.title) +xlabel(D.dimensions.TIME.standard_name) +ylabel(strrep([D.variables.TURB.long_name ' in ' D.variables.TURB.units],'_',' ')) +legend({month_av.variables.TURB.name,day_av.variables.TURB.name,hour_av.variables.TURB.name},'interpreter','none') + +%% Plot daily maximum salinity +day_min = imosAccumarray(D,'day','flags',[0 1],'min'); +day_max = imosAccumarray(D,'day','flags',[0 1],'max'); +figure(3);cla +plot(day_min.dimensions.TIME.data,day_min.variables.PSAL.data,'r-+') +hold on +plot(day_max.dimensions.TIME.data,day_max.variables.PSAL.data,'g-x') +plot(day_av.dimensions.TIME.data,day_av.variables.PSAL.data,'k-.') + +datetick('x','yy-mm-dd','keepticks') +title(D.metadata.title) +xlabel(D.dimensions.TIME.standard_name) +ylabel(strrep([D.variables.PSAL.long_name ' in ' D.variables.PSAL.units],'_',' ')) +legend({day_min.variables.PSAL.name,day_max.variables.PSAL.name,day_av.variables.PSAL.name},'interpreter','none') + + +%% Plot mean DO with 2 sigma error bars +day_std = imosAccumarray(D,'day','flags',[0 1],'std'); +figure(4);cla +errorbar( day_av.dimensions.TIME.data,... + day_av.variables.DOX2.data,... + 2*day_std.variables.DOX2.data,'xk'); +datetick('x','yy-mm-dd','keepticks') +title(D.metadata.title) +xlabel(D.dimensions.TIME.standard_name) +ylabel(strrep([D.variables.DOX2.long_name ' in ' D.variables.DOX2.units],'_',' ')) + + \ No newline at end of file