Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 206 additions & 0 deletions MATLAB_R2011/commons/NetCDF/imosAccumarray.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
function obj = imosAccumarray(obj,unitOfTime, varargin)
%%Facilitates use of inbuilt function accumarray with imos timeseries
% data imported by ncParse.
%
% Elements from a data set are grouped by the input time interval
% and a function is applied to each group ('mean' by default).
% This function can be used to compute bin averages and statistical
% summaries for a time series.
%
%Inputs:
% obj a structure created using ncParse
% unitOfTime 'second', 'minute', 'hour', 'day','month', or 'year'.
% the interval periods are taken to be from the start
% of each unit of measure.
% e.g. specifying 'day', the interval will be
% from 00:00:00 on day n to 23:59:59 on day n+1
%
% Optional arguments:
%
% [fun],[vars] one or more couplets of a function name fun plus a list of
% variables to apply that function to.
% [fun] any function can be used provided it accepts a vector and
% returns a scalar value. e.g. inbuilt functions
% 'mean','mode','median','max','min','sum' etc. A user defined
% function can also be used if in its own file and known to
% the matlab path. Default is mean.
% [vars] optional cell array of variable names, if not present 'fun'
% will be applied to all variables. Vars must be specified
% if more than one [fun],[vars] couplet is used.
% 'flags',[flags] vector of qc flags. Only data points with these values will
% be included when 'fun' is applied. The mode of the flags of
% the included data points is taken as the qc flag for the
% new data set.
%
% Output:
% A copy of obj with data fields replaced with the modified time series
% the long_name variable attribute is edited to reflect the function and
% time interval that has been applied.
%
% Examples:
%
% D2 = imosAccumarray(D1,'day','range',{'TEMP','PRESSURE'},...
% 'median',{TURB},'flags',[0 1]);
% Computes daily range for TEMP and PRESSURE, daily median for 'TURB' and
% mean (default) for all other variables. Ignores any datapoints that are not
% flagged with a 1 or 0.
%
% D2 = imosAccumarray(D1,'month','max','flags',[0 1]);
% Computes monthly maximum for all variables ignoring any datapoints that are not
% flagged with a 1 or 0.
%
% Other m-files required: none
% Other files required: none
% Subfunctions: none
% MAT-files required: none
%
% See also: ncParse, outputCSV
%
% Author: Paul Rigby, AIMS/IMOS
% email: p.rigby@aims.gov.au
% Website: http://imos.org.au/
% Oct 2013;
%
% Copyright 2013 IMOS
% The script is distributed under the terms of the GNU General Public License

%% Deal with the optional input arguments

% Set defaults:
flagsToInclude = 0:10;

% Create a lookup table to map function to variable, default is mean
varFunMap = fieldnames(obj.variables);
for i=1:length(varFunMap);
varFunMap{i,2} = 'mean';
end

if ~isempty(varargin)
for i=1:length(varargin)
if strfind(varargin{i},'flag') %use strfind so flag or flags is ok
%the next entry should be a vector
if isnumeric(varargin{i+1})
flagsToInclude = varargin{i+1};
else
error('flags option should be followed by a vector of flags to include.')
end
%remove this couplet from the list
varargin(i:i+1) = [];
break
end
end
end

if length(varargin) == 1
for i=1:length(varFunMap);
varFunMap{i,2} = varargin{:};
end
elseif length(varargin) > 1
for i = 1:2:length(varargin)
for j = 1:length(varargin{i+1})
for k = 1:size(varFunMap,1)
if strcmp(varargin{i+1}{j},varFunMap{k,1})
varFunMap{k,2} = varargin{i};
end
end
end
end
end

%% TIME dimension
% FIXME dimension can be time or TIME - make case insensitive?

%convert to a datevec.
D = datevec(obj.dimensions.TIME.data);

%enumerate the unitOfTime so it represents a datevec index
%also add a few aliases.
switch unitOfTime
case {'years','year','yearly','Y',1}; Ti = 1;
case {'months','month','monthly','M',2}; Ti = 2;
case {'days','day','daily','D',3}; Ti = 3;
case {'hours','hour','hourly','h',4}; Ti = 4;
case {'minutes','minute','m',5}; Ti = 5;
case {'seconds','second','s',6}; Ti = 6;
otherwise
error('Unit of time not recognised');
end

% force a consistent tag so that output files are standard
tags = {'yearly','monthly','daily','hourly','minute','second'};

%Find elements in the date vector that are unique up
%until our chosen unit of time.
[~, ~, idx] = unique(D(:,1:Ti),'rows');

%compute a midpoint based upon the time range within each interval e.g. min + range/2
intervalMidpoint = accumarray(idx,obj.dimensions.TIME.data,[],@(x) {min(x)+0.5*range(x)});

%replace the original data with the new time intervals
obj.dimensions.TIME.data = cell2mat(intervalMidpoint);
obj.dimensions.TIME.name = ['TIME','_',tags{Ti},'_interval_midpoint'];

%% Variables
vars = fieldnames(obj.variables);

for i = 1:size(varFunMap,1)
var = varFunMap{i,1};
fun = varFunMap{i,2};
try
flags = obj.variables.(var).flag;
%convert to single as int8 trips up mode function (and
%others)
flags = single(flags);
qcAvailable = true;
%get the index of flags that are not on our list
flagsToDiscard = setdiff(flags,flagsToInclude);
badIndex = find(ismember(flags,flagsToDiscard));
catch e
warning(['No QC flags found for variable',var]);
qcAvailable = false;
end

if sum(sum(flags))==0
warning(['All QC flags are zero for variable',vars{i}])
end

originalData = obj.variables.(vars{i}).data;
if qcAvailable == false
badIndex = [];
end

%Mark as nan the data points with qc that we want to remove.
originalData(badIndex) = NaN;

outputData = accumarray(idx,originalData,[],@(x) {executeFunction(x,fun)});
outputData = cell2mat(outputData);

%replace the original data in the structure
obj.variables.(var).data = outputData;
obj.variables.(var).name = [obj.variables.(var).name,'_',tags{Ti},'_',fun];

%In the same manner, roll up the flags using 'mode' to
%give an *indication* of the quality of the processed data.
%'mode' is already resiliant to NaN
flags(badIndex) = NaN;
if qcAvailable
averagedQcFlags = accumarray(idx,flags,[],@(x) {mode(x)});
averagedQcFlags = cell2mat(averagedQcFlags);
%replace flags with averaged flags
obj.variables.(var).flag = averagedQcFlags;
end
end
end

function y = executeFunction(x,fun)
%The NaNs are necessary to keep the array structure, but
%we need to remove them before executing the function as they will
%cause functions such as mean to return NaN
x(isnan(x)) = [];
y = eval([fun,'(x)']);
if isempty(y)
y = single(NaN);
end
end


77 changes: 77 additions & 0 deletions MATLAB_R2011/demos/demo_imosAccumarray.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
%% Example use of imosAccumarray
%
% Author: Paul Rigby, AIMS/IMOS
% email: p.rigby@aims.gov.au
% Website: http://imos.org.au/ https://github.com/aodn/imos_user_code_library
% Sep 2013; Last revision: 3-Sep-2013
%
% Copyright 2013 IMOS
% The script is distributed under the terms of the GNU General Public License

URL = 'http://thredds.aodn.org.au/thredds/dodsC/IMOS/ANMN/NRS/NRSYON/Biogeochem_timeseries/IMOS_ANMN-NRS_KOSTUZ_20080623T004500Z_NRSYON_FV01_NRSYON-0806-WQM-5_END-20080916T041400Z_C-20130310T112118Z.nc';
D = ncParse(URL);

%% Compare different (mean) averaging intervals for TEMP on one plot
month_av = imosAccumarray(D,'month','flags',[0 1],...
'median',{'TURB','CHLU'});
day_av = imosAccumarray(D,'day','flags',[0 1],...
'median',{'TURB','CHLU'});
hour_av = imosAccumarray(D,'hour','flags',[0 1],...
'median',{'TURB','CHLU'});

figure(1);cla
hold on
plot(month_av.dimensions.TIME.data,month_av.variables.TEMP.data,'r-*')
plot(day_av.dimensions.TIME.data,day_av.variables.TEMP.data,'g-x')
plot(hour_av.dimensions.TIME.data,hour_av.variables.TEMP.data,'b')
datetick('x','yy-mm-dd','keepticks')
axis tight
hold off
title(D.metadata.title)
xlabel(D.dimensions.TIME.standard_name)
ylabel(strrep([D.variables.TEMP.long_name ' in ' D.variables.TEMP.units],'_',' '))
legend({month_av.variables.TEMP.name,day_av.variables.TEMP.name,hour_av.variables.TEMP.name},'interpreter','none')

%% Compare different (median) averaging intervals for TURB on one plot

figure(2);cla
hold on
plot(month_av.dimensions.TIME.data,month_av.variables.TURB.data,'r-*')
plot(day_av.dimensions.TIME.data,day_av.variables.TURB.data,'g-x')
plot(hour_av.dimensions.TIME.data,hour_av.variables.TURB.data,'b')
datetick('x','yy-mm-dd','keepticks')
axis tight
hold off
title(D.metadata.title)
xlabel(D.dimensions.TIME.standard_name)
ylabel(strrep([D.variables.TURB.long_name ' in ' D.variables.TURB.units],'_',' '))
legend({month_av.variables.TURB.name,day_av.variables.TURB.name,hour_av.variables.TURB.name},'interpreter','none')

%% Plot daily maximum salinity
day_min = imosAccumarray(D,'day','flags',[0 1],'min');
day_max = imosAccumarray(D,'day','flags',[0 1],'max');
figure(3);cla
plot(day_min.dimensions.TIME.data,day_min.variables.PSAL.data,'r-+')
hold on
plot(day_max.dimensions.TIME.data,day_max.variables.PSAL.data,'g-x')
plot(day_av.dimensions.TIME.data,day_av.variables.PSAL.data,'k-.')

datetick('x','yy-mm-dd','keepticks')
title(D.metadata.title)
xlabel(D.dimensions.TIME.standard_name)
ylabel(strrep([D.variables.PSAL.long_name ' in ' D.variables.PSAL.units],'_',' '))
legend({day_min.variables.PSAL.name,day_max.variables.PSAL.name,day_av.variables.PSAL.name},'interpreter','none')


%% Plot mean DO with 2 sigma error bars
day_std = imosAccumarray(D,'day','flags',[0 1],'std');
figure(4);cla
errorbar( day_av.dimensions.TIME.data,...
day_av.variables.DOX2.data,...
2*day_std.variables.DOX2.data,'xk');
datetick('x','yy-mm-dd','keepticks')
title(D.metadata.title)
xlabel(D.dimensions.TIME.standard_name)
ylabel(strrep([D.variables.DOX2.long_name ' in ' D.variables.DOX2.units],'_',' '))