From 1a4d9a5e436effee930262a0146a4ae712fb8926 Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Tue, 27 Jan 2026 15:50:51 +1100
Subject: [PATCH 1/8] Fix: NRS Darwin Yongala - fix http to https - working
 order

---
 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py | 414 +++++++----
 lib/python/aims_realtime_util.py        | 867 +++++++++++++-----------
 2 files changed, 752 insertions(+), 529 deletions(-)

diff --git a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
index d7cf77f4..2eb53e75 100755
--- a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
+++ b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
@@ -40,80 +40,102 @@
 from itertools import groupby
 from tendo import singleton
 
-from aims_realtime_util import (convert_time_cf_to_imos,
-                                create_list_of_dates_to_download, download_channel,
-                                fix_data_code_from_filename,
-                                fix_provider_code_from_filename,
-                                has_var_only_fill_value,
-                                is_no_data_found, is_time_monotonic,
-                                is_time_var_empty, logging_aims, md5,
-                                modify_aims_netcdf, parse_aims_xml,
-                                remove_dimension_from_netcdf,
-                                remove_end_date_from_filename, save_channel_info,
-                                set_up, rm_tmp_dir, get_main_netcdf_var,
-                                list_recursively_files_abs_path)
+from aims_realtime_util import (
+    convert_time_cf_to_imos,
+    create_list_of_dates_to_download,
+    download_channel,
+    fix_data_code_from_filename,
+    fix_provider_code_from_filename,
+    has_var_only_fill_value,
+    is_no_data_found,
+    is_time_monotonic,
+    is_time_var_empty,
+    logging_aims,
+    md5,
+    modify_aims_netcdf,
+    parse_aims_xml,
+    remove_dimension_from_netcdf,
+    remove_end_date_from_filename,
+    save_channel_info,
+    set_up,
+    rm_tmp_dir,
+    get_main_netcdf_var,
+    list_recursively_files_abs_path,
+)
 from dest_path import get_anmn_nrs_site_name
 from util import pass_netcdf_checker
 
-DATA_WIP_PATH = os.path.join(os.environ.get('WIP_DIR'), 'ANMN', 'NRS_AIMS_Darwin_Yongala_data_rss_download_temporary')
-ANMN_NRS_INCOMING_DIR = os.path.join(os.environ.get('INCOMING_DIR'), 'AODN', 'ANMN_NRS_DAR_YON')
-ANMN_NRS_ERROR_DIR = os.path.join(os.environ['ERROR_DIR'], 'ANMN_NRS_DAR_YON')
+MD5_EXPECTED_VALUE = "a6207e053f1cc0e00d171701f0cdb186"
+
+DATA_WIP_PATH = os.path.join(
+    os.environ.get("WIP_DIR"),
+    "ANMN",
+    "NRS_AIMS_Darwin_Yongala_data_rss_download_temporary",
+)
+ANMN_NRS_INCOMING_DIR = os.path.join(
+    os.environ.get("INCOMING_DIR"), "AODN", "ANMN_NRS_DAR_YON"
+)
+ANMN_NRS_ERROR_DIR = os.path.join(os.environ["ERROR_DIR"], "ANMN_NRS_DAR_YON")
 
 
 def modify_anmn_nrs_netcdf(netcdf_file_path, channel_id_info):
-    """ Modify the downloaded netCDF file so it passes both CF and IMOS checker
+    """Modify the downloaded netCDF file so it passes both CF and IMOS checker
     input:
        netcdf_file_path(str)    : path of netcdf file to modify
        channel_id_index(tupple) : information from xml for the channel
     """
     modify_aims_netcdf(netcdf_file_path, channel_id_info)
 
-    netcdf_file_obj                 = Dataset(netcdf_file_path, 'a', format='NETCDF4')
-    netcdf_file_obj.aims_channel_id =  int(channel_id_info['channel_id'])
-
-    if 'Yongala' in channel_id_info['site_name']:
-        netcdf_file_obj.site_code     = 'NRSYON'
-        netcdf_file_obj.platform_code = 'Yongala NRS Buoy'
-    elif 'Darwin' in channel_id_info['site_name']:
-        netcdf_file_obj.site_code     = 'NRSDAR'
-        netcdf_file_obj.platform_code = 'Darwin NRS Buoy'
-    elif 'Beagle' in channel_id_info['site_name']:
-        netcdf_file_obj.site_code     = 'DARBGF'
-        netcdf_file_obj.platform_code = 'Beagle Gulf Mooring'
+    netcdf_file_obj = Dataset(netcdf_file_path, "a", format="NETCDF4")
+    netcdf_file_obj.aims_channel_id = int(channel_id_info["channel_id"])
+
+    if "Yongala" in channel_id_info["site_name"]:
+        netcdf_file_obj.site_code = "NRSYON"
+        netcdf_file_obj.platform_code = "Yongala NRS Buoy"
+    elif "Darwin" in channel_id_info["site_name"]:
+        netcdf_file_obj.site_code = "NRSDAR"
+        netcdf_file_obj.platform_code = "Darwin NRS Buoy"
+    elif "Beagle" in channel_id_info["site_name"]:
+        netcdf_file_obj.site_code = "DARBGF"
+        netcdf_file_obj.platform_code = "Beagle Gulf Mooring"
     else:
         return False
 
-    if not (channel_id_info['metadata_uuid'] == 'Not Available'):
-        netcdf_file_obj.metadata_uuid = channel_id_info['metadata_uuid']
+    if not (channel_id_info["metadata_uuid"] == "Not Available"):
+        netcdf_file_obj.metadata_uuid = channel_id_info["metadata_uuid"]
 
     # some weather stations channels don't have a depth variable if sensor above water
-    if 'depth' in netcdf_file_obj.variables.keys():
-        var                 = netcdf_file_obj.variables['depth']
-        var.long_name       = 'nominal depth'
-        var.positive        = 'down'
-        var.axis            = 'Z'
-        var.reference_datum = 'sea surface'
-        var.valid_min       = -10.0
-        var.valid_max       = 30.0
-        var.units           = 'm'  # some channels put degrees celcius instead ...
-        netcdf_file_obj.renameVariable('depth', 'NOMINAL_DEPTH')
-
-    if 'DEPTH' in netcdf_file_obj.variables.keys():
-        var                 = netcdf_file_obj.variables['DEPTH']
-        var.coordinates     = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
-        var.long_name       = 'actual depth'
-        var.reference_datum = 'sea surface'
-        var.positive        = 'down'
-        var.valid_min       = -10.0
-        var.valid_max       = 30.0
-        var.units           = 'm'  # some channels put degrees celcius instead ...
+    if "depth" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["depth"]
+        var.long_name = "nominal depth"
+        var.positive = "down"
+        var.axis = "Z"
+        var.reference_datum = "sea surface"
+        var.valid_min = -10.0
+        var.valid_max = 30.0
+        var.units = "m"  # some channels put degrees celcius instead ...
+        netcdf_file_obj.renameVariable("depth", "NOMINAL_DEPTH")
+
+    if "DEPTH" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["DEPTH"]
+        var.coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
+        var.long_name = "actual depth"
+        var.reference_datum = "sea surface"
+        var.positive = "down"
+        var.valid_min = -10.0
+        var.valid_max = 30.0
+        var.units = "m"  # some channels put degrees celcius instead ...
 
     netcdf_file_obj.close()
-    netcdf_file_obj = Dataset(netcdf_file_path, 'a', format='NETCDF4')  # need to close to save to file. as we call get_main_var just after
-    main_var        = get_main_netcdf_var(netcdf_file_path)
+    netcdf_file_obj = Dataset(
+        netcdf_file_path, "a", format="NETCDF4"
+    )  # need to close to save to file. as we call get_main_var just after
+    main_var = get_main_netcdf_var(netcdf_file_path)
     # DEPTH, LATITUDE and LONGITUDE are not dimensions, so we make them into auxiliary cooordinate variables by adding this attribute
-    if 'NOMINAL_DEPTH' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables[main_var].coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
+    if "NOMINAL_DEPTH" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables[
+            main_var
+        ].coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
     else:
         netcdf_file_obj.variables[main_var].coordinates = "TIME LATITUDE LONGITUDE"
 
@@ -122,20 +144,27 @@ def modify_anmn_nrs_netcdf(netcdf_file_path, channel_id_info):
     if not convert_time_cf_to_imos(netcdf_file_path):
         return False
 
-    remove_dimension_from_netcdf(netcdf_file_path)  # last modification to do in this order!
+    remove_dimension_from_netcdf(
+        netcdf_file_path
+    )  # last modification to do in this order!
     return True
 
 
 def move_to_tmp_incoming(netcdf_path):
     # [org_filename withouth creation date].[md5].nc to have unique filename in
-    new_filename = '%s.%s.nc' % (os.path.splitext(os.path.basename(remove_end_date_from_filename(netcdf_path)))[0], md5(netcdf_path))
+    new_filename = "%s.%s.nc" % (
+        os.path.splitext(os.path.basename(remove_end_date_from_filename(netcdf_path)))[
+            0
+        ],
+        md5(netcdf_path),
+    )
 
     os.chmod(netcdf_path, 0o0664)  # change to 664 for pipeline v2
     shutil.move(netcdf_path, os.path.join(TMP_MANIFEST_DIR, new_filename))
 
 
 def process_monthly_channel(channel_id, aims_xml_info, level_qc):
-    """ Downloads all the data available for one channel_id and moves the file to a wip_path dir
+    """Downloads all the data available for one channel_id and moves the file to a wip_path dir
     channel_id(str)
     aims_xml_info(tuple)
     level_qc(int)
@@ -145,88 +174,139 @@ def process_monthly_channel(channel_id, aims_xml_info, level_qc):
                    300 -> NRS DATA
     for monthly data download, only 1 and 300 should be use
     """
-    logger.info('QC{level_qc} - Processing channel {channel_id}'.format(channel_id=str(channel_id),
-                                                                        level_qc=str(level_qc)))
+    logger.info(
+        "QC{level_qc} - Processing channel {channel_id}".format(
+            channel_id=str(channel_id), level_qc=str(level_qc)
+        )
+    )
     channel_id_info = aims_xml_info[channel_id]
-    from_date = channel_id_info['from_date']
-    thru_date = channel_id_info['thru_date']
-    [start_dates, end_dates] = create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date)
+    from_date = channel_id_info["from_date"]
+    thru_date = channel_id_info["thru_date"]
+    [start_dates, end_dates] = create_list_of_dates_to_download(
+        channel_id, level_qc, from_date, thru_date
+    )
 
     if len(start_dates) != 0:
         # download monthly file
         for start_date, end_date in zip(start_dates, end_dates):
-            start_date           = start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
-            end_date             = end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
-            netcdf_tmp_file_path = download_channel(channel_id, start_date, end_date, level_qc)
-            contact_aims_msg     = "Process of channel aborted - CONTACT AIMS"
+            start_date = start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
+            end_date = end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
+            netcdf_tmp_file_path = download_channel(
+                channel_id, start_date, end_date, level_qc
+            )
+            contact_aims_msg = "Process of channel aborted - CONTACT AIMS"
 
             if netcdf_tmp_file_path is None:
-                logger.error('   Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg))
+                logger.error(
+                    "   Channel %s - not valid zip file - %s"
+                    % (str(channel_id), contact_aims_msg)
+                )
                 break
 
             # NO_DATA_FOUND file only means there is no data for the selected time period. Could be some data afterwards
             if is_no_data_found(netcdf_tmp_file_path):
-                logger.info('Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]'.format(
-                    channel_id=str(channel_id),
-                    start_date=start_date,
-                    end_date=end_date))
+                logger.info(
+                    "Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]".format(
+                        channel_id=str(channel_id),
+                        start_date=start_date,
+                        end_date=end_date,
+                    )
+                )
                 shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
             else:
                 if is_time_var_empty(netcdf_tmp_file_path):
-                    logger.error('Channel {channel_id}: No values in TIME variable - {message}'.format(
-                        channel_id=str(channel_id),
-                        message=contact_aims_msg))
+                    logger.error(
+                        "Channel {channel_id}: No values in TIME variable - {message}".format(
+                            channel_id=str(channel_id), message=contact_aims_msg
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
                 if not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info):
-                    logger.error('Channel{channel_id}: Could not modify the NetCDF file - Process of channel aborted'.
-                                 format(channel_id=str(channel_id)))
+                    logger.error(
+                        "Channel{channel_id}: Could not modify the NetCDF file - Process of channel aborted".format(
+                            channel_id=str(channel_id)
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
                 main_var = get_main_netcdf_var(netcdf_tmp_file_path)
                 if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
-                    logger.error('Channel {channel_id}: _Fillvalues only in main variable - {message}'.format(
-                        channel_id=str(channel_id),
-                        message=contact_aims_msg))
+                    logger.error(
+                        "Channel {channel_id}: _Fillvalues only in main variable - {message}".format(
+                            channel_id=str(channel_id), message=contact_aims_msg
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
                 if get_anmn_nrs_site_name(netcdf_tmp_file_path) == []:
-                    logger.error('Channel {channel_id}: Unknown site_code gatt value - {message}'.format(
-                        channel_id=str(channel_id),
-                        message=contact_aims_msg))
+                    logger.error(
+                        "Channel {channel_id}: Unknown site_code gatt value - {message}".format(
+                            channel_id=str(channel_id), message=contact_aims_msg
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
                 if not is_time_monotonic(netcdf_tmp_file_path):
-                    logger.error('Channel {channel_id}: TIME value is not strictly monotonic \
-                                 - {message}'.format(channel_id=str(channel_id),
-                                                     message=contact_aims_msg))
+                    logger.error(
+                        "Channel {channel_id}: TIME value is not strictly monotonic \
+                                 - {message}".format(
+                            channel_id=str(channel_id), message=contact_aims_msg
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
                 # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
-                wip_path = os.environ.get('data_wip_path')
-                checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:1.6', 'imos:1.3'])
+                wip_path = os.environ.get("data_wip_path")
+                checker_retval = pass_netcdf_checker(
+                    netcdf_tmp_file_path, tests=["cf:1.6", "imos:1.3"]
+                )
                 if not checker_retval:
-                    logger.error('Channel {channel_id}: File does not pass CF/IMOS compliance checker - Process of channel aborted'
-                                 .format(channel_id=str(channel_id)))
-                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
-
-                    logger.error('File copied to {path} for debugging'.format(
-                        path=os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
+                    logger.error(
+                        "Channel {channel_id}: File does not pass CF/IMOS compliance checker - Process of channel aborted".format(
+                            channel_id=str(channel_id)
+                        )
+                    )
+                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, "errors"))
+
+                    logger.error(
+                        "File copied to {path} for debugging".format(
+                            path=os.path.join(
+                                wip_path,
+                                "errors",
+                                os.path.basename(netcdf_tmp_file_path),
+                            )
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
                 netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
-                netcdf_tmp_file_path = fix_provider_code_from_filename(netcdf_tmp_file_path, 'IMOS_ANMN')
-
-                if re.search('IMOS_ANMN_[A-Z]{1}_', netcdf_tmp_file_path) is None:
-                    logger.error('   Channel %s - File name Data code does not pass REGEX - Process of channel aborted' % str(channel_id))
-                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
-                    logger.error('   File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
+                netcdf_tmp_file_path = fix_provider_code_from_filename(
+                    netcdf_tmp_file_path, "IMOS_ANMN"
+                )
+
+                if re.search("IMOS_ANMN_[A-Z]{1}_", netcdf_tmp_file_path) is None:
+                    logger.error(
+                        "   Channel %s - File name Data code does not pass REGEX - Process of channel aborted"
+                        % str(channel_id)
+                    )
+                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, "errors"))
+                    logger.error(
+                        "   File copied to %s for debugging"
+                        % (
+                            os.path.join(
+                                wip_path,
+                                "errors",
+                                os.path.basename(netcdf_tmp_file_path),
+                            )
+                        )
+                    )
                     shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                     break
 
@@ -240,70 +320,89 @@ def process_monthly_channel(channel_id, aims_xml_info, level_qc):
             save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
 
     else:
-        logger.info('QC{level_qc} - Channel {channel_id}: already up to date'.format(channel_id=str(channel_id),
-                                                                                     level_qc=str(level_qc)))
+        logger.info(
+            "QC{level_qc} - Channel {channel_id}: already up to date".format(
+                channel_id=str(channel_id), level_qc=str(level_qc)
+            )
+        )
 
 
 def process_qc_level(level_qc):
-    """ Downloads all channels for a QC level
+    """Downloads all channels for a QC level
     level_qc(int) : 0 or 1
     """
 
-    logger.info('Process ANMN NRS download from AIMS web service - QC level {level_qc}'.format(level_qc=level_qc))
-    xml_url = 'https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level{level_qc}/300'.format(level_qc=level_qc)
+    logger.info(
+        "Process ANMN NRS download from AIMS web service - QC level {level_qc}".format(
+            level_qc=level_qc
+        )
+    )
+    xml_url = "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level{level_qc}/300".format(
+        level_qc=level_qc
+    )
     try:
         aims_xml_info = parse_aims_xml(xml_url)
     except Exception as err:
-        logger.critical('RSS feed not available')
+        logger.critical("RSS feed not available")
         exit(1)
 
     for channel_id in aims_xml_info.keys():
         try:
             process_monthly_channel(channel_id, aims_xml_info, level_qc)
         except Exception as err:
-            logger.error('QC{qc_level} - Channel {channel_id}: Failed, unknown reason - manual debug required'.format(
-                channel_id=str(channel_id),
-                qc_level=str(level_qc)))
+            logger.error(
+                "QC{qc_level} - Channel {channel_id}: Failed, unknown reason - manual debug required".format(
+                    channel_id=str(channel_id), qc_level=str(level_qc)
+                )
+            )
             logger.error(traceback.print_exc())
 
 
 class AimsDataValidationTest(data_validation_test.TestCase):
-
     def setUp(self):
-        """ Check that a the AIMS system or this script hasn't been modified.
+        """Check that a the AIMS system or this script hasn't been modified.
         This function checks that a downloaded file still has the same md5.
         """
-        channel_id                   = '84329'
-        from_date                    = '2016-01-01T00:00:00Z'
-        thru_date                    = '2016-01-02T00:00:00Z'
-        level_qc                     = 1
-        aims_rss_val                 = 300
-        xml_url                      = 'https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level%s/%s' % (str(level_qc), str(aims_rss_val))
-
-        logger.info('Data validation unittests...')
-        aims_xml_info                = parse_aims_xml(xml_url)
+        channel_id = "84329"
+        from_date = "2016-01-01T00:00:00Z"
+        thru_date = "2016-01-02T00:00:00Z"
+        level_qc = 1
+        aims_rss_val = 300
+        xml_url = (
+            "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level%s/%s"
+            % (str(level_qc), str(aims_rss_val))
+        )
+
+        logger.info("Data validation unittests...")
+        aims_xml_info = parse_aims_xml(xml_url)
         channel_id_info = aims_xml_info[channel_id]
-        self.netcdf_tmp_file_path    = download_channel(channel_id, from_date, thru_date, level_qc)
+        self.netcdf_tmp_file_path = download_channel(
+            channel_id, from_date, thru_date, level_qc
+        )
         modify_anmn_nrs_netcdf(self.netcdf_tmp_file_path, channel_id_info)
 
         # force values of attributes which change all the time
-        netcdf_file_obj              = Dataset(self.netcdf_tmp_file_path, 'a', format='NETCDF4')
+        netcdf_file_obj = Dataset(self.netcdf_tmp_file_path, "a", format="NETCDF4")
         netcdf_file_obj.date_created = "1970-01-01T00:00:00Z"  # epoch
-        netcdf_file_obj.history      = 'data validation test only'
-        netcdf_file_obj.NCO          = 'NCO_VERSION'
+        netcdf_file_obj.history = "data validation test only"
+        netcdf_file_obj.NCO = "NCO_VERSION"
 
         netcdf_file_obj.close()
 
     def tearDown(self):
-        shutil.copy(self.netcdf_tmp_file_path, os.path.join(os.environ['data_wip_path'], 'nc_unittest_%s.nc' % self.md5_netcdf_value))
+        shutil.copy(
+            self.netcdf_tmp_file_path,
+            os.path.join(
+                os.environ["data_wip_path"], "nc_unittest_%s.nc" % self.md5_netcdf_value
+            ),
+        )
         shutil.rmtree(os.path.dirname(self.netcdf_tmp_file_path))
 
     def test_aims_validation(self):
         if sys.version_info[0] < 3:
-            self.md5_expected_value = '76c9a595264a8173545b6dc0c518a280'
+            self.md5_expected_value = "76c9a595264a8173545b6dc0c518a280"
         else:
-            self.md5_expected_value = '78c6386529faf9dc2272e9bed5ed7fa2'
-
+            self.md5_expected_value = MD5_EXPECTED_VALUE
         self.md5_netcdf_value = md5(self.netcdf_tmp_file_path)
 
         self.assertEqual(self.md5_netcdf_value, self.md5_expected_value)
@@ -315,19 +414,24 @@ def args():
     :return: vargs
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("-t", "--testing",
-                        action='store_true',
-                        help="testing only - downloads the first month of each channel")
+    parser.add_argument(
+        "-t",
+        "--testing",
+        action="store_true",
+        help="testing only - downloads the first month of each channel",
+    )
 
     return parser.parse_args()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     vargs = args()
     me = singleton.SingleInstance()
-    os.environ['data_wip_path'] = os.path.join(os.environ.get('WIP_DIR'),
-                                               'ANMN',
-                                               'NRS_AIMS_Darwin_Yongala_data_rss_download_temporary')
+    os.environ["data_wip_path"] = os.path.join(
+        os.environ.get("WIP_DIR"),
+        "ANMN",
+        "NRS_AIMS_Darwin_Yongala_data_rss_download_temporary",
+    )
     global TMP_MANIFEST_DIR
     global TESTING
 
@@ -340,11 +444,13 @@ def args():
 
     # data validation test
     runner = data_validation_test.TextTestRunner()
-    itersuite = data_validation_test.TestLoader().loadTestsFromTestCase(AimsDataValidationTest)
+    itersuite = data_validation_test.TestLoader().loadTestsFromTestCase(
+        AimsDataValidationTest
+    )
     res = runner.run(itersuite)
 
     if not DATA_WIP_PATH:
-        logger.critical('environment variable data_wip_path is not defined.')
+        logger.critical("environment variable data_wip_path is not defined.")
         exit(1)
 
     # script optional argument for testing only. used in process_monthly_channel
@@ -353,18 +459,19 @@ def args():
     rm_tmp_dir(DATA_WIP_PATH)
 
     if len(os.listdir(ANMN_NRS_INCOMING_DIR)) >= 2:
-        logger.critical('Operation aborted, too many files in INCOMING_DIR')
+        logger.critical("Operation aborted, too many files in INCOMING_DIR")
         exit(1)
 
     if len(os.listdir(ANMN_NRS_ERROR_DIR)) >= 2:
-        logger.critical('Operation aborted, too many files in ERROR_DIR')
+        logger.critical("Operation aborted, too many files in ERROR_DIR")
         exit(1)
 
     if not res.failures:
         for level in [0, 1]:
-            date_str_now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
-            TMP_MANIFEST_DIR = os.path.join(DATA_WIP_PATH, 'manifest_dir_tmp_{date}'.format(
-                date=date_str_now))
+            date_str_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+            TMP_MANIFEST_DIR = os.path.join(
+                DATA_WIP_PATH, "manifest_dir_tmp_{date}".format(date=date_str_now)
+            )
             os.makedirs(TMP_MANIFEST_DIR)
 
             process_qc_level(level)
@@ -372,17 +479,26 @@ def args():
             lines_per_file = 2**12
             file_list = list_recursively_files_abs_path(TMP_MANIFEST_DIR)
             if len(file_list) > 0:
-                for file_number, lines in groupby(enumerate(file_list), key=lambda x: x[0] // lines_per_file):
-                    incoming_file = os.path.join(DATA_WIP_PATH, 'anmn_nrs_aims_FV0{level}_{date}_{file_number}.manifest'.format(
-                        level=str(level),
-                        date=date_str_now,
-                        file_number=file_number))
-                    with open(incoming_file, 'w') as outfile:
+                for file_number, lines in groupby(
+                    enumerate(file_list), key=lambda x: x[0] // lines_per_file
+                ):
+                    incoming_file = os.path.join(
+                        DATA_WIP_PATH,
+                        "anmn_nrs_aims_FV0{level}_{date}_{file_number}.manifest".format(
+                            level=str(level), date=date_str_now, file_number=file_number
+                        ),
+                    )
+                    with open(incoming_file, "w") as outfile:
                         for item in lines:
                             outfile.write("%s\n" % item[1])
 
                     os.chmod(incoming_file, 0o0664)  # change to 664 for pipeline v2
-                    shutil.move(incoming_file, os.path.join(ANMN_NRS_INCOMING_DIR, os.path.basename(incoming_file)))
+                    shutil.move(
+                        incoming_file,
+                        os.path.join(
+                            ANMN_NRS_INCOMING_DIR, os.path.basename(incoming_file)
+                        ),
+                    )
 
     else:
-        logger.error('Data validation unittests failed')
+        logger.error("Data validation unittests failed")
diff --git a/lib/python/aims_realtime_util.py b/lib/python/aims_realtime_util.py
index 4daad1b7..21446e7c 100755
--- a/lib/python/aims_realtime_util.py
+++ b/lib/python/aims_realtime_util.py
@@ -1,4 +1,4 @@
-""" set of tools to
+"""set of tools to
 - parse AIMS RSS feed web pages
 - create a list of monthly timestamps to download
 - generate URL to download (with regards to what has already been downloaded
@@ -10,6 +10,7 @@
 
 author Laurent Besnard, laurent.besnard@utas.edu.au
 """
+
 import datetime
 import glob
 import json
@@ -48,34 +49,36 @@
 
 
 def logging_aims():
-    """ start logging using logging python library
+    """start logging using logging python library
     output:
        logger - similar to a file handler
     """
-    wip_path = os.environ.get('data_wip_path')
+    wip_path = os.environ.get("data_wip_path")
     # this is used for unit testing as data_wip_path env would not be set
     if wip_path is None:
         wip_path = tempfile.mkdtemp()
 
-    logging_format = "%(asctime)s — %(name)s — %(levelname)s — %(funcName)s:%(lineno)d — %(message)s"
+    logging_format = (
+        "%(asctime)s — %(name)s — %(levelname)s — %(funcName)s:%(lineno)d — %(message)s"
+    )
 
     # set up logging to file
-    tmp_filename = tempfile.mkstemp('.log', 'aims_data_download_')[1]
-    log_path = os.path.join(wip_path, 'aims.log')
-    logging.basicConfig(level=logging.INFO,
-                        format=logging_format,
-                        filename=tmp_filename,
-                        filemode='a+')
+    tmp_filename = tempfile.mkstemp(".log", "aims_data_download_")[1]
+    log_path = os.path.join(wip_path, "aims.log")
+    logging.basicConfig(
+        level=logging.INFO, format=logging_format, filename=tmp_filename, filemode="a+"
+    )
 
     # rotate logs every Day, and keep only the last 5 log files
-    logHandler = TimedRotatingFileHandler(log_path,
-                                          when="D",
-                                          interval=1,
-                                          backupCount=5,  # backupCount files will be kept
-                                          )
+    logHandler = TimedRotatingFileHandler(
+        log_path,
+        when="D",
+        interval=1,
+        backupCount=5,  # backupCount files will be kept
+    )
     logHandler.setFormatter(logging.Formatter(logging_format))
     logHandler.setLevel(logging.DEBUG)
-    logging.getLogger('').addHandler(logHandler)
+    logging.getLogger("").addHandler(logHandler)
 
     # define a Handler which writes DEBUG messages to the sys.stderr
     logFormatter = logging.Formatter(logging_format)
@@ -84,7 +87,7 @@ def logging_aims():
     consoleHandler.setFormatter(logFormatter)
 
     # add the console handler to the root logger
-    logging.getLogger('').addHandler(consoleHandler)
+    logging.getLogger("").addHandler(consoleHandler)
 
 
 ####################
@@ -93,33 +96,33 @@ def logging_aims():
 
 
 def _pickle_filename(level_qc):
-    """ returns the pickle filepath according to the QC level being processed
+    """returns the pickle filepath according to the QC level being processed
     input:
         level_qc(int) : 0 or 1
     output:
         picleQc_file(str) : pickle file path
     """
-    wip_path = os.environ.get('data_wip_path')
+    wip_path = os.environ.get("data_wip_path")
     if wip_path is None:
-        raise ValueError('data_wip_path enviromnent variable is not set')
+        raise ValueError("data_wip_path enviromnent variable is not set")
 
     if level_qc == 0:
-        pickle_qc_file = os.path.join(wip_path, 'aims_qc0.pickle')
+        pickle_qc_file = os.path.join(wip_path, "aims_qc0.pickle")
     elif level_qc == 1:
-        pickle_qc_file = os.path.join(wip_path, 'aims_qc1.pickle')
+        pickle_qc_file = os.path.join(wip_path, "aims_qc1.pickle")
 
     return pickle_qc_file
 
 
 def delete_channel_id_from_pickle(level_qc, channel_id):
     pickle_file = _pickle_filename(level_qc)
-    with open(pickle_file, 'rb') as p_read:
+    with open(pickle_file, "rb") as p_read:
         aims_xml_info = pickle.load(p_read)
 
     if channel_id in aims_xml_info.keys():
-        del(aims_xml_info[channel_id])
+        del aims_xml_info[channel_id]
 
-    with open(pickle_file, 'wb') as p_write:
+    with open(pickle_file, "wb") as p_write:
         pickle.dump(aims_xml_info, p_write)
 
 
@@ -139,74 +142,78 @@ def delete_platform_entries_from_pickle(level_qc, platform):
         In [2]: delete_platform_entries_from_pickle(2, 'Beagle')
     """
     pickle_file = _pickle_filename(level_qc)
-    with open(pickle_file, 'rb') as p_read:
+    with open(pickle_file, "rb") as p_read:
         aims_xml_info = pickle.load(p_read)
 
     def delete_over_list_platform(aims_xml_info, platform):
         for index_platform, value in enumerate(aims_xml_info):
             if platform in value:
                 for index_field in range(0, len(aims_xml_info)):
-                    del(aims_xml_info[index_field][platform_name])
+                    del aims_xml_info[index_field][platform_name]
                 aims_xml_info = delete_over_list_platform(aims_xml_info, platform)
         return aims_xml_info
 
     aims_xml_info_clean = delete_over_list_platform(aims_xml_info, platform)
-    with open(pickle_file, 'wb') as p_write:
+    with open(pickle_file, "wb") as p_write:
         pickle.dump(aims_xml_info_clean, p_write)
 
 
 @retry(URLError, tries=10, delay=3, backoff=2)
 def urlopen_with_retry(url):
-    """ it will retry a maximum of 10 times, with an exponential backoff delay
+    """it will retry a maximum of 10 times, with an exponential backoff delay
     doubling each time, e.g. 3 seconds, 6 seconds, 12 seconds
     """
     return urlopen(url)
 
 
-def save_channel_info(channel_id, aims_xml_info, level_qc, *last_downloaded_date_channel):
+def save_channel_info(
+    channel_id, aims_xml_info, level_qc, *last_downloaded_date_channel
+):
     """
-     if channel_id has been successfuly processed, we write about it in a pickle file
-     we write the last downloaded data date for each channel
-     input:
-        channel_id(str)       : channel_id to save information
-        aims_xml_info(dict) : generated by parser_aims_xml
-        level_qc(int)         : 0 or 1
-        last_downloaded_date_channel is a variable argument, not used by soop trv
+    if channel_id has been successfuly processed, we write about it in a pickle file
+    we write the last downloaded data date for each channel
+    input:
+       channel_id(str)       : channel_id to save information
+       aims_xml_info(dict) : generated by parser_aims_xml
+       level_qc(int)         : 0 or 1
+       last_downloaded_date_channel is a variable argument, not used by soop trv
     """
     pickle_file = _pickle_filename(level_qc)
     last_downloaded_date = dict()
     # condition in case the pickle file already exists or not. In the first case,
     # aims_xml_info comes from the pickle, file, otherwise comes from the function arg
     if os.path.isfile(pickle_file):
-        with open(pickle_file, 'rb') as p_read:
+        with open(pickle_file, "rb") as p_read:
             aims_xml_info_file = pickle.load(p_read)
             last_downloaded_date = aims_xml_info_file
 
         if not last_downloaded_date_channel:
             # soop trv specific, vararg
-            last_downloaded_date[channel_id] = aims_xml_info[channel_id]['thru_date']
+            last_downloaded_date[channel_id] = aims_xml_info[channel_id]["thru_date"]
         else:
             last_downloaded_date[channel_id] = last_downloaded_date_channel[0]
 
     else:
         if not last_downloaded_date_channel:
             # soop trv specific, vararg
-            last_downloaded_date[channel_id] = aims_xml_info[channel_id]['thru_date']
+            last_downloaded_date[channel_id] = aims_xml_info[channel_id]["thru_date"]
         else:
             last_downloaded_date[channel_id] = last_downloaded_date_channel[0]
 
-    with open(pickle_file, 'wb') as p_write:
+    with open(pickle_file, "wb") as p_write:
         pickle.dump(last_downloaded_date, p_write)
 
 
 def get_last_downloaded_date_channel(channel_id, level_qc, from_date):
-    """ Retrieve the last date sucessfully downloaded for a channel """
+    """Retrieve the last date sucessfully downloaded for a channel"""
     pickle_file = _pickle_filename(level_qc)  # different pickle per QC
     if os.path.isfile(pickle_file):
-        with open(pickle_file, 'rb') as p_read:
+        with open(pickle_file, "rb") as p_read:
             last_downloaded_date = pickle.load(p_read)
 
-        if channel_id in last_downloaded_date.keys():  # check the channel is in the pickle file
+        if (
+            channel_id in last_downloaded_date.keys()
+        ):  # check the channel is in the pickle file
             if last_downloaded_date[channel_id] is not None:
                 return last_downloaded_date[channel_id]
 
@@ -216,11 +223,15 @@ def get_last_downloaded_date_channel(channel_id, level_qc, from_date):
 def has_channel_already_been_downloaded(channel_id, level_qc):
     pickle_file = _pickle_filename(level_qc)  # different pickle per QC
     if os.path.isfile(pickle_file):
-        with open(pickle_file, 'rb') as p_read:
+        with open(pickle_file, "rb") as p_read:
             last_downloaded_date = pickle.load(p_read)
 
-        if channel_id in last_downloaded_date.keys():  # check the channel is in the pickle file
-            if last_downloaded_date[channel_id] is not None:  # check the last downloaded_date field
+        if (
+            channel_id in last_downloaded_date.keys()
+        ):  # check the channel is in the pickle file
+            if (
+                last_downloaded_date[channel_id] is not None
+            ):  # check the last downloaded_date field
                 return True
             else:
                 return False
@@ -232,22 +243,28 @@ def has_channel_already_been_downloaded(channel_id, level_qc):
 
 
 def create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date):
-    """ generate a list of monthly start dates and end dates to download FAIMMS and NRS data """
+    """generate a list of monthly start dates and end dates to download FAIMMS and NRS data"""
 
     from dateutil import rrule
     from datetime import datetime
     from dateutil.relativedelta import relativedelta
 
-    last_downloaded_date = get_last_downloaded_date_channel(channel_id, level_qc, from_date)
-    start_dates          = []
-    end_dates            = []
+    last_downloaded_date = get_last_downloaded_date_channel(
+        channel_id, level_qc, from_date
+    )
+    start_dates = []
+    end_dates = []
 
-    from_date            = datetime.strptime(from_date, "%Y-%m-%dT%H:%M:%SZ")
-    thru_date            = datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ")
+    from_date = datetime.strptime(from_date, "%Y-%m-%dT%H:%M:%SZ")
+    thru_date = datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ")
     last_downloaded_date = datetime.strptime(last_downloaded_date, "%Y-%m-%dT%H:%M:%SZ")
 
     if last_downloaded_date < thru_date:
-        for dt in rrule.rrule(rrule.MONTHLY, dtstart=datetime(last_downloaded_date.year, last_downloaded_date.month, 1), until=thru_date):
+        for dt in rrule.rrule(
+            rrule.MONTHLY,
+            dtstart=datetime(last_downloaded_date.year, last_downloaded_date.month, 1),
+            until=thru_date,
+        ):
             start_dates.append(dt)
             end_dates.append(datetime(dt.year, dt.month, 1) + relativedelta(months=1))
 
@@ -263,14 +280,14 @@ def list_recursively_files_abs_path(path):
     :return:
     """
     filelist = []
-    for filename in glob.glob('{path}/**'.format(path=path), recursive=True):
+    for filename in glob.glob("{path}/**".format(path=path), recursive=True):
         if os.path.isfile(filename):
             filelist.append(os.path.abspath(filename))
     return filelist
 
 
 def md5(fname):
-    """ return a md5 checksum of a file """
+    """return a md5 checksum of a file"""
     import hashlib
 
     hash = hashlib.md5()
@@ -281,17 +298,17 @@ def md5(fname):
 
 
 def get_main_netcdf_var(netcdf_file_path):
-    with Dataset(netcdf_file_path, mode='r') as netcdf_file_obj:
+    with Dataset(netcdf_file_path, mode="r") as netcdf_file_obj:
         variables = netcdf_file_obj.variables
 
-        variables.pop('TIME')
-        variables.pop('LATITUDE')
-        variables.pop('LONGITUDE')
+        variables.pop("TIME")
+        variables.pop("LATITUDE")
+        variables.pop("LONGITUDE")
 
-        if 'NOMINAL_DEPTH' in variables:
-            variables.pop('NOMINAL_DEPTH')
+        if "NOMINAL_DEPTH" in variables:
+            variables.pop("NOMINAL_DEPTH")
 
-        qc_var = [s for s in variables if '_quality_control' in s]
+        qc_var = [s for s in variables if "_quality_control" in s]
         if qc_var != []:
             variables.pop(qc_var[0])
 
@@ -301,22 +318,29 @@ def get_main_netcdf_var(netcdf_file_path):
 
 
 def is_above_file_limit(json_watchd_name):
-    """ check if the number of files in INCOMING DIR as set in watch.d/[JSON_WATCHD_NAME.json is above threshold
-        SOMETHING quite annoying re the pipeline structure :
-          * the watchd JSON filename maches the ERROR directory
-          * BUT doesn't match the INCOMING_DIR. the 'path' in the watch.d json file matches the ERROR_DIR"""
-
-    json_fp = os.path.join(os.environ['DATA_SERVICES_DIR'], 'watch.d', '%s.json' % json_watchd_name)
+    """check if the number of files in INCOMING DIR as set in watch.d/[JSON_WATCHD_NAME.json is above threshold
+    SOMETHING quite annoying re the pipeline structure :
+      * the watchd JSON filename maches the ERROR directory
+      * BUT doesn't match the INCOMING_DIR. the 'path' in the watch.d json file matches the ERROR_DIR"""
+
+    json_fp = os.path.join(
+        os.environ["DATA_SERVICES_DIR"], "watch.d", "%s.json" % json_watchd_name
+    )
     with open(json_fp) as j_data:
         parsed_json = json.load(j_data)
 
-        if len(os.listdir(os.path.join(os.environ['INCOMING_DIR'], parsed_json['path'][0]))) >= int(parsed_json['files_crit']):
+        if len(
+            os.listdir(os.path.join(os.environ["INCOMING_DIR"], parsed_json["path"][0]))
+        ) >= int(parsed_json["files_crit"]):
             return True
-        elif len(os.listdir(os.path.join(os.environ['ERROR_DIR'], json_watchd_name))) >= int(parsed_json['files_crit']):
+        elif len(
+            os.listdir(os.path.join(os.environ["ERROR_DIR"], json_watchd_name))
+        ) >= int(parsed_json["files_crit"]):
             return True
         else:
             return False
 
+
 ######################
 # XML Info Functions #
 ######################
@@ -324,65 +348,86 @@ def is_above_file_limit(json_watchd_name):
 
 @lru_cache(maxsize=100)
 def parse_aims_xml(xml_url):
-    """ Download and parse the AIMS XML rss feed """
+    """Download and parse the AIMS XML rss feed"""
     logger = logging.getLogger(__name__)
-    logger.info('PARSE AIMS xml RSS feed : %s' % (xml_url))
-    response        = urlopen(xml_url)
-    html            = response.read()
-    root            = ET.fromstring(html)
-
-    n_item_start    = 3  # start number for AIMS xml file
-
-    title           = []
-    link            = []
-    metadata_uuid   = []
-    uom             = []
-    from_date       = []
-    thru_date       = []
-    platform_name   = []
-    site_name       = []
-    channel_id      = []
-    parameter       = []
-    parameter_type  = []
-    trip_id         = []  # soop trv only
+    logger.info("PARSE AIMS xml RSS feed : %s" % (xml_url))
+    response = urlopen(xml_url)
+    html = response.read()
+    root = ET.fromstring(html)
+
+    n_item_start = 3  # start number for AIMS xml file
+
+    title = []
+    link = []
+    metadata_uuid = []
+    uom = []
+    from_date = []
+    thru_date = []
+    platform_name = []
+    site_name = []
+    channel_id = []
+    parameter = []
+    parameter_type = []
+    trip_id = []  # soop trv only
 
     for n_item in range(n_item_start, len(root[0])):
-        title         .append(root[0][n_item][0].text)
-        link          .append(root[0][n_item][1].text)
-        metadata_uuid .append(root[0][n_item][6].text)
-        uom           .append(root[0][n_item][7].text)
-        from_date     .append(root[0][n_item][8].text)
-        thru_date     .append(root[0][n_item][9].text)
-        platform_name .append(root[0][n_item][10].text)
-        site_name     .append(root[0][n_item][11].text)
-        channel_id    .append(root[0][n_item][12].text)
-        parameter     .append(root[0][n_item][13].text)
+        title.append(root[0][n_item][0].text)
+        link.append(root[0][n_item][1].text)
+        metadata_uuid.append(root[0][n_item][6].text)
+        uom.append(root[0][n_item][7].text)
+        from_date.append(root[0][n_item][8].text)
+        thru_date.append(root[0][n_item][9].text)
+        platform_name.append(root[0][n_item][10].text)
+        site_name.append(root[0][n_item][11].text)
+        channel_id.append(root[0][n_item][12].text)
+        parameter.append(root[0][n_item][13].text)
         parameter_type.append(root[0][n_item][14].text)
 
         # in case there is no trip id defined by AIMS, we create a fake one, used by SOOP TRV only
         try:
             trip_id.append(root[0][n_item][15].text)
         except IndexError:
-            dateObject   = time.strptime(root[0][n_item][8].text, "%Y-%m-%dT%H:%M:%SZ")
-            trip_id_fake = str(dateObject.tm_year) + str(dateObject.tm_mon).zfill(2) + str(dateObject.tm_mday).zfill(2)
+            dateObject = time.strptime(root[0][n_item][8].text, "%Y-%m-%dT%H:%M:%SZ")
+            trip_id_fake = (
+                str(dateObject.tm_year)
+                + str(dateObject.tm_mon).zfill(2)
+                + str(dateObject.tm_mday).zfill(2)
+            )
             trip_id.append(trip_id_fake)
 
     response.close()
-    d = [{c: {'title': ttl,
-              'channel_id': c,
-              'link': lk,
-              'metadata_uuid': muuid,
-              'uom': uo,
-              'from_date': fro,
-              'thru_date': thr,
-              'platform_name': pltname,
-              'site_name': stname,
-              'parameter': para,
-              'parameter_type': paratype,
-              'trip_id': trid
-              }} for c, ttl, lk, muuid, uo, fro, thr, pltname, stname, para, paratype, trid in
-         zip(channel_id, title, link, metadata_uuid, uom, from_date,
-             thru_date, platform_name, site_name, parameter, parameter_type, trip_id)]
+    d = [
+        {
+            c: {
+                "title": ttl,
+                "channel_id": c,
+                "link": lk,
+                "metadata_uuid": muuid,
+                "uom": uo,
+                "from_date": fro,
+                "thru_date": thr,
+                "platform_name": pltname,
+                "site_name": stname,
+                "parameter": para,
+                "parameter_type": paratype,
+                "trip_id": trid,
+            }
+        }
+        for c, ttl, lk, muuid, uo, fro, thr, pltname, stname, para, paratype, trid in zip(
+            channel_id,
+            title,
+            link,
+            metadata_uuid,
+            uom,
+            from_date,
+            thru_date,
+            platform_name,
+            site_name,
+            parameter,
+            parameter_type,
+            trip_id,
+        )
+    ]
 
     # re-writting the dict to have the channel key as a key value
     new_dict = {}
@@ -392,6 +437,7 @@ def parse_aims_xml(xml_url):
 
     return new_dict
 
+
 ##########################################
 # Channel Process/Download/Mod Functions #
 ##########################################
@@ -402,9 +448,11 @@ def retry_if_result_none(result):
     return result is None
 
 
-@retry(retry_on_result=retry_if_result_none, stop_max_attempt_number=10, wait_fixed=2000)
+@retry(
+    retry_on_result=retry_if_result_none, stop_max_attempt_number=10, wait_fixed=2000
+)
 def download_channel(channel_id, from_date, thru_date, level_qc):
-    """ generated the data link to download, and extract the zip file into a temp file
+    """generated the data link to download, and extract the zip file into a temp file
     input:
         channel_id(str) : channel_id to download
         from_date(str)  : str containing the first time to start the download from written in this format 2009-04-21_t10:43:54Z
@@ -412,28 +460,38 @@ def download_channel(channel_id, from_date, thru_date, level_qc):
         level_qc(int)   : 0 or 1
     """
     logger = logging.getLogger(__name__)
-    tmp_zip_file      = tempfile.mkstemp()
-    netcdf_tmp_path   = tempfile.mkdtemp()
-    url_data_download = 'http://data.aims.gov.au/gbroosdata/services/data/rtds/%s/level%s/raw/raw/%s/%s/netcdf/2' % \
-                        (channel_id, str(level_qc), from_date, thru_date)
+    tmp_zip_file = tempfile.mkstemp()
+    netcdf_tmp_path = tempfile.mkdtemp()
+    url_data_download = (
+        "https://data.aims.gov.au/gbroosdata/services/data/rtds/%s/level%s/raw/raw/%s/%s/netcdf/2"
+        % (channel_id, str(level_qc), from_date, thru_date)
+    )
 
     # set the timeout for no data to 120 seconds and enable streaming responses so we don't have to keep the file in memory
-    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
+    }
     request = requests.get(url_data_download, timeout=120, stream=True, headers=headers)
     if request.status_code == 403:
-        logger.error('Error 403: access to the requested resource is forbidden - {url}'.format(url=url_data_download))
+        logger.error(
+            "Error 403: access to the requested resource is forbidden - {url}".format(
+                url=url_data_download
+            )
+        )
         return
 
-    with open(tmp_zip_file[1], 'wb') as fh:
+    with open(tmp_zip_file[1], "wb") as fh:
         # Walk through the request response in chunks of 1024 * 1024 bytes, so 1MiB
         for chunk in request.iter_content(1024 * 1024):
             # Write the chunk to the file
             fh.write(chunk)
 
     if not zipfile.is_zipfile(tmp_zip_file[1]):
-        logger.error('%s is not a valid zip file' % url_data_download)
+        logger.error("%s is not a valid zip file" % url_data_download)
         os.close(tmp_zip_file[0])
-        os.remove(tmp_zip_file[1])  # file object needs to be closed or can end up with too many open files
+        os.remove(
+            tmp_zip_file[1]
+        )  # file object needs to be closed or can end up with too many open files
         shutil.rmtree(netcdf_tmp_path)
         return
 
@@ -445,11 +503,14 @@ def download_channel(channel_id, from_date, thru_date, level_qc):
 
     zip.close()
     os.close(tmp_zip_file[0])
-    os.remove(tmp_zip_file[1])  # file object needs to be closed or can end up with too many open files
+    os.remove(
+        tmp_zip_file[1]
+    )  # file object needs to be closed or can end up with too many open files
 
-    logger.info('%s download: SUCCESS' % url_data_download)
+    logger.info("%s download: SUCCESS" % url_data_download)
     return netcdf_file_path
 
+
 ####################################
 # Functions to modify NetCDF files #
 # AIMS NetCDF file specific only   #
@@ -457,27 +518,27 @@ def download_channel(channel_id, from_date, thru_date, level_qc):
 
 
 def is_no_data_found(netcdf_file_path):
-    """ Check if the unzipped file is a 'NO_DATA_FOUND' file instead of a netCDF file
+    """Check if the unzipped file is a 'NO_DATA_FOUND' file instead of a netCDF file
     this behaviour is correct for FAIMMS and NRS, as it means no data for the selected
     time period. However it doesn't make sense for SOOP TRV
     """
-    return os.path.basename(netcdf_file_path) == 'NO_DATA_FOUND'
+    return os.path.basename(netcdf_file_path) == "NO_DATA_FOUND"
 
 
 def rename_netcdf_attribute(object_, old_attribute_name, new_attribute_name):
-    """ Rename global attribute from netcdf4 dataset object
-      object             = Dataset(netcdf_file, 'a', format='NETCDF4')
-      old_attribute_name = current gatt name to modify
-      new_attribute_name = new gatt name
+    """Rename global attribute from netcdf4 dataset object
+    object             = Dataset(netcdf_file, 'a', format='NETCDF4')
+    old_attribute_name = current gatt name to modify
+    new_attribute_name = new gatt name
     """
     setattr(object_, new_attribute_name, getattr(object_, old_attribute_name))
     delattr(object_, old_attribute_name)
 
 
 def is_time_var_empty(netcdf_file_path):
-    """ check if the yet unmodified file (time instead of TIME) has values in its time variable """
-    netcdf_file_obj = Dataset(netcdf_file_path, 'r', format='NETCDF4')
-    var_obj         = netcdf_file_obj.variables['time']
+    """check if the yet unmodified file (time instead of TIME) has values in its time variable"""
+    netcdf_file_obj = Dataset(netcdf_file_path, "r", format="NETCDF4")
+    var_obj = netcdf_file_obj.variables["time"]
 
     if var_obj.shape[0] == 0:
         return True
@@ -489,15 +550,19 @@ def is_time_var_empty(netcdf_file_path):
 
 
 def convert_time_cf_to_imos(netcdf_file_path):
-    """  convert a CF time into an IMOS one forced to be 'days since 1950-01-01 00:00:00'
+    """convert a CF time into an IMOS one forced to be 'days since 1950-01-01 00:00:00'
     the variable HAS to be 'TIME'
     """
     try:
-        netcdf_file_obj = Dataset(netcdf_file_path, 'a', format='NETCDF4')
-        time            = netcdf_file_obj.variables['TIME']
-        dtime           = num2date(time[:], time.units, time.calendar)  # this gives an array of datetime objects
-        time.units      = 'days since 1950-01-01 00:00:00 UTC'
-        time[:]         = date2num(dtime, time.units, time.calendar)  # conversion to IMOS recommended time
+        netcdf_file_obj = Dataset(netcdf_file_path, "a", format="NETCDF4")
+        time = netcdf_file_obj.variables["TIME"]
+        dtime = num2date(
+            time[:], time.units, time.calendar
+        )  # this gives an array of datetime objects
+        time.units = "days since 1950-01-01 00:00:00 UTC"
+        time[:] = date2num(
+            dtime, time.units, time.calendar
+        )  # conversion to IMOS recommended time
         netcdf_file_obj.close()
         return True
     except:
@@ -507,13 +572,13 @@ def convert_time_cf_to_imos(netcdf_file_path):
 
 
 def strictly_increasing(list):
-    """ check monotocity of list of values"""
+    """check monotocity of list of values"""
     return all(x < y for x, y in zip(list, list[1:]))
 
 
 def is_time_monotonic(netcdf_file_path):
-    netcdf_file_obj = Dataset(netcdf_file_path, 'r', format='NETCDF4')
-    time            = netcdf_file_obj.variables['TIME'][:]
+    netcdf_file_obj = Dataset(netcdf_file_path, "r", format="NETCDF4")
+    time = netcdf_file_obj.variables["TIME"][:]
     netcdf_file_obj.close()
     if not strictly_increasing(time):
         return False
@@ -521,77 +586,87 @@ def is_time_monotonic(netcdf_file_path):
 
 
 def modify_aims_netcdf(netcdf_file_path, channel_id_info):
-    """ Modify the downloaded netCDF file so it passes both CF and IMOS checker
+    """Modify the downloaded netCDF file so it passes both CF and IMOS checker
     input:
        netcdf_file_path(str)    : path of netcdf file to modify
        channel_id_index(dict) : information from xml for the channel
     """
-    imos_env_path = os.path.join(os.environ.get('DATA_SERVICES_DIR'), 'lib', 'netcdf', 'imos_env')
+    imos_env_path = os.path.join(
+        os.environ.get("DATA_SERVICES_DIR"), "lib", "netcdf", "imos_env"
+    )
     if not os.path.isfile(imos_env_path):
         logger = logging.getLogger(__name__)
-        logger.error('%s is not accessible' % imos_env_path)
+        logger.error("%s is not accessible" % imos_env_path)
         sys.exit(1)
 
     dotenv.load_dotenv(imos_env_path)
-    netcdf_file_obj = Dataset(netcdf_file_path, 'a', format='NETCDF4')
-    netcdf_file_obj.naming_authority = 'IMOS'
+    netcdf_file_obj = Dataset(netcdf_file_path, "a", format="NETCDF4")
+    netcdf_file_obj.naming_authority = "IMOS"
 
     # add gatts to NetCDF
-    netcdf_file_obj.aims_channel_id = int(channel_id_info['channel_id'])
+    netcdf_file_obj.aims_channel_id = int(channel_id_info["channel_id"])
 
-    if not (channel_id_info['metadata_uuid'] == 'Not Available'):
-        netcdf_file_obj.metadata_uuid = channel_id_info['metadata_uuid']
+    if not (channel_id_info["metadata_uuid"] == "Not Available"):
+        netcdf_file_obj.metadata_uuid = channel_id_info["metadata_uuid"]
 
     if not netcdf_file_obj.instrument_serial_number:
-        del(netcdf_file_obj.instrument_serial_number)
+        del netcdf_file_obj.instrument_serial_number
 
     # add CF gatts, values stored in lib/netcdf/imos_env
-    netcdf_file_obj.Conventions            = os.environ.get('CONVENTIONS')
-    netcdf_file_obj.data_centre_email      = os.environ.get('DATA_CENTRE_EMAIL')
-    netcdf_file_obj.data_centre            = os.environ.get('DATA_CENTRE')
-    netcdf_file_obj.project                = os.environ.get('PROJECT')
-    netcdf_file_obj.acknowledgement        = os.environ.get('ACKNOWLEDGEMENT')
-    netcdf_file_obj.distribution_statement = os.environ.get('DISTRIBUTION_STATEMENT')
-
-    netcdf_file_obj.date_created           = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
-    netcdf_file_obj.quality_control_set    = 1
-    imos_qc_convention                     = 'IMOS standard set using the IODE flags'
-    netcdf_file_obj.author                 = 'laurent besnard'
-    netcdf_file_obj.author_email           = 'laurent.besnard@utas.edu.au'
-
-    rename_netcdf_attribute(netcdf_file_obj, 'geospatial_LAT_max', 'geospatial_lat_max')
-    rename_netcdf_attribute(netcdf_file_obj, 'geospatial_LAT_min', 'geospatial_lat_min')
-    rename_netcdf_attribute(netcdf_file_obj, 'geospatial_LON_max', 'geospatial_lon_max')
-    rename_netcdf_attribute(netcdf_file_obj, 'geospatial_LON_min', 'geospatial_lon_min')
+    netcdf_file_obj.Conventions = os.environ.get("CONVENTIONS")
+    netcdf_file_obj.data_centre_email = os.environ.get("DATA_CENTRE_EMAIL")
+    netcdf_file_obj.data_centre = os.environ.get("DATA_CENTRE")
+    netcdf_file_obj.project = os.environ.get("PROJECT")
+    netcdf_file_obj.acknowledgement = os.environ.get("ACKNOWLEDGEMENT")
+    netcdf_file_obj.distribution_statement = os.environ.get("DISTRIBUTION_STATEMENT")
+
+    netcdf_file_obj.date_created = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
+    netcdf_file_obj.quality_control_set = 1
+    imos_qc_convention = "IMOS standard set using the IODE flags"
+    netcdf_file_obj.author = "laurent besnard"
+    netcdf_file_obj.author_email = "laurent.besnard@utas.edu.au"
+
+    rename_netcdf_attribute(netcdf_file_obj, "geospatial_LAT_max", "geospatial_lat_max")
+    rename_netcdf_attribute(netcdf_file_obj, "geospatial_LAT_min", "geospatial_lat_min")
+    rename_netcdf_attribute(netcdf_file_obj, "geospatial_LON_max", "geospatial_lon_max")
+    rename_netcdf_attribute(netcdf_file_obj, "geospatial_LON_min", "geospatial_lon_min")
 
     # variables modifications
-    time           = netcdf_file_obj.variables['time']
-    time.calendar  = 'gregorian'
-    time.axis      = 'T'
+    time = netcdf_file_obj.variables["time"]
+    time.calendar = "gregorian"
+    time.axis = "T"
     time.valid_min = 0.0
     time.valid_max = 9999999999.0
-    netcdf_file_obj.renameDimension('time', 'TIME')
-    netcdf_file_obj.renameVariable('time', 'TIME')
-
-    netcdf_file_obj.time_coverage_start = num2date(time[:], time.units, time.calendar).min().strftime('%Y-%m-%dT%H:%M:%SZ')
-    netcdf_file_obj.time_coverage_end   = num2date(time[:], time.units, time.calendar).max().strftime('%Y-%m-%dT%H:%M:%SZ')
+    netcdf_file_obj.renameDimension("time", "TIME")
+    netcdf_file_obj.renameVariable("time", "TIME")
+
+    netcdf_file_obj.time_coverage_start = (
+        num2date(time[:], time.units, time.calendar)
+        .min()
+        .strftime("%Y-%m-%dT%H:%M:%SZ")
+    )
+    netcdf_file_obj.time_coverage_end = (
+        num2date(time[:], time.units, time.calendar)
+        .max()
+        .strftime("%Y-%m-%dT%H:%M:%SZ")
+    )
 
     # latitude longitude
-    latitude                  = netcdf_file_obj.variables['LATITUDE']
-    latitude.axis             = 'Y'
-    latitude.valid_min        = -90.0
-    latitude.valid_max        = 90.0
-    latitude.reference_datum  = 'geographical coordinates, WGS84 projection'
-    latitude.standard_name    = 'latitude'
-    latitude.long_name        = 'latitude'
-
-    longitude                 = netcdf_file_obj.variables['LONGITUDE']
-    longitude.axis            = 'X'
-    longitude.valid_min       = -180.0
-    longitude.valid_max       = 180.0
-    longitude.reference_datum = 'geographical coordinates, WGS84 projection'
-    longitude.standard_name   = 'longitude'
-    longitude.long_name       = 'longitude'
+    latitude = netcdf_file_obj.variables["LATITUDE"]
+    latitude.axis = "Y"
+    latitude.valid_min = -90.0
+    latitude.valid_max = 90.0
+    latitude.reference_datum = "geographical coordinates, WGS84 projection"
+    latitude.standard_name = "latitude"
+    latitude.long_name = "latitude"
+
+    longitude = netcdf_file_obj.variables["LONGITUDE"]
+    longitude.axis = "X"
+    longitude.valid_min = -180.0
+    longitude.valid_max = 180.0
+    longitude.reference_datum = "geographical coordinates, WGS84 projection"
+    longitude.standard_name = "longitude"
+    longitude.long_name = "longitude"
 
     # handle masked arrays
     lon_array = longitude[:]
@@ -612,233 +687,257 @@ def modify_aims_netcdf(netcdf_file_path, channel_id_info):
         netcdf_file_obj.geospatial_lat_max = numpy.ma.MaskedArray.max(lat_array)
 
     # Change variable name, standard name, longname, untis ....
-    if 'Seawater_Intake_Temperature' in netcdf_file_obj.variables.keys():
-        var                     = netcdf_file_obj.variables['Seawater_Intake_Temperature']
-        var.units               = 'Celsius'
-        netcdf_file_obj.renameVariable('Seawater_Intake_Temperature', 'TEMP')
-        netcdf_file_obj.renameVariable('Seawater_Intake_Temperature_quality_control', 'TEMP_quality_control')
-        var.ancillary_variables = 'TEMP_quality_control'
-
-    if 'PSAL' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['PSAL'].units = '1e-3'
-
-    if 'TURB' in netcdf_file_obj.variables.keys():
-        var                                                             = netcdf_file_obj.variables['TURB']
-        var.units                                                       = '1'
-        var.standard_name                                               = 'sea_water_turbidity'
-        netcdf_file_obj.variables['TURB_quality_control'].standard_name = 'sea_water_turbidity status_flag'
-
-    if 'DOWN_PHOTOSYNTH_FLUX' in netcdf_file_obj.variables.keys():
-        var       = netcdf_file_obj.variables['DOWN_PHOTOSYNTH_FLUX']
-        var.units = 'W m-2'
-
-    if 'PEAK_WAVE_DIR' in netcdf_file_obj.variables.keys():
-        var       = netcdf_file_obj.variables['PEAK_WAVE_DIR']
-        var.units = 'degree'
-
-    if 'CDIR' in netcdf_file_obj.variables.keys():
-        var           = netcdf_file_obj.variables['CDIR']
-        var.units     = 'degree'
-        var.long_name = 'current_direction'
-
-    if 'CSPD' in netcdf_file_obj.variables.keys():
-        var           = netcdf_file_obj.variables['CSPD']
-        var.long_name = 'current_magnitude'
-
-    if 'ALBD' in netcdf_file_obj.variables.keys():
-        var       = netcdf_file_obj.variables['ALBD']
-        var.units = '1'
+    if "Seawater_Intake_Temperature" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["Seawater_Intake_Temperature"]
+        var.units = "Celsius"
+        netcdf_file_obj.renameVariable("Seawater_Intake_Temperature", "TEMP")
+        netcdf_file_obj.renameVariable(
+            "Seawater_Intake_Temperature_quality_control", "TEMP_quality_control"
+        )
+        var.ancillary_variables = "TEMP_quality_control"
+
+    if "PSAL" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["PSAL"].units = "1e-3"
+
+    if "TURB" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["TURB"]
+        var.units = "1"
+        var.standard_name = "sea_water_turbidity"
+        netcdf_file_obj.variables[
+            "TURB_quality_control"
+        ].standard_name = "sea_water_turbidity status_flag"
+
+    if "DOWN_PHOTOSYNTH_FLUX" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["DOWN_PHOTOSYNTH_FLUX"]
+        var.units = "W m-2"
+
+    if "PEAK_WAVE_DIR" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["PEAK_WAVE_DIR"]
+        var.units = "degree"
+
+    if "CDIR" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["CDIR"]
+        var.units = "degree"
+        var.long_name = "current_direction"
+
+    if "CSPD" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["CSPD"]
+        var.long_name = "current_magnitude"
+
+    if "ALBD" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["ALBD"]
+        var.units = "1"
 
     def clean_no_cf_variables(var, netcdf_file_obj):
         """
         remove standard name of main variable and of its ancillary qc var if exists
         """
         if var in netcdf_file_obj.variables.keys():
-            if hasattr(netcdf_file_obj.variables[var], 'standard_name'):
-                del(netcdf_file_obj.variables[var].standard_name)
-        var_qc = '%s_quality_control' % var
+            if hasattr(netcdf_file_obj.variables[var], "standard_name"):
+                del netcdf_file_obj.variables[var].standard_name
+        var_qc = "%s_quality_control" % var
         if var_qc in netcdf_file_obj.variables.keys():
-            if hasattr(netcdf_file_obj.variables[var_qc], 'standard_name'):
-                del(netcdf_file_obj.variables[var_qc].standard_name)
-            if hasattr(netcdf_file_obj.variables[var], 'ancillary_variables'):
+            if hasattr(netcdf_file_obj.variables[var_qc], "standard_name"):
+                del netcdf_file_obj.variables[var_qc].standard_name
+            if hasattr(netcdf_file_obj.variables[var], "ancillary_variables"):
                 netcdf_file_obj.variables[var].ancillary_variables = var_qc
 
-    if 'Dissolved_Oxygen_Percent' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('Dissolved_Oxygen_Percent', netcdf_file_obj)
-
-    if 'ErrorVelocity' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('ErrorVelocity', netcdf_file_obj)
-        netcdf_file_obj.variables['ErrorVelocity'].long_name = 'error_velocity'
-
-    if 'Average_Compass_Heading' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('Average_Compass_Heading', netcdf_file_obj)
-        var       = netcdf_file_obj.variables['Average_Compass_Heading']
-        var.units = 'degree'
-
-    if 'Upwelling_longwave_radiation' in netcdf_file_obj.variables.keys():
-        var_str              = 'Upwelling_longwave_radiation'
-        var_qc_str           = '%s_quality_control' % var_str
-        var                  = netcdf_file_obj.variables[var_str]
-        var_qc               = netcdf_file_obj.variables[var_qc_str]
-        var.units            = 'W m-2'
-        var.standard_name    = 'upwelling_longwave_flux_in_air'
-        var_qc.standard_name = 'upwelling_longwave_flux_in_air status_flag'
-
-    if 'Downwelling_longwave_radiation' in netcdf_file_obj.variables.keys():
-        var_str              = 'Downwelling_longwave_radiation'
-        var_qc_str           = '%s_quality_control' % var_str
-        var                  = netcdf_file_obj.variables[var_str]
-        var_qc               = netcdf_file_obj.variables[var_qc_str]
-        var.units            = 'W m-2'
-        var.standard_name    = 'downwelling_longwave_flux_in_air'
-        var_qc.standard_name = 'downwelling_longwave_flux_in_air status_flag'
-
-    if 'UP_TOT_RADIATION' in netcdf_file_obj.variables.keys():
-        var_str              = 'UP_TOT_RADIATION'
-        var_qc_str           = '%s_quality_control' % var_str
-        var                  = netcdf_file_obj.variables[var_str]
-        var_qc               = netcdf_file_obj.variables[var_qc_str]
-        var.units            = 'W m-2'
-        var.standard_name    = 'upwelling_longwave_flux_in_air'
-        var_qc.standard_name = 'upwelling_longwave_flux_in_air status_flag'
-
-    if 'DOWN_TOT_RADIATION' in netcdf_file_obj.variables.keys():
-        var_str              = 'DOWN_TOT_RADIATION'
-        var_qc_str           = '%s_quality_control' % var_str
-        var                  = netcdf_file_obj.variables[var_str]
-        var_qc               = netcdf_file_obj.variables[var_qc_str]
-        var.units            = 'W m-2'
-        var.standard_name    = 'downwelling_longwave_flux_in_air'
-        var_qc.standard_name = 'downwelling_longwave_flux_in_air status_flag'
-
-    if 'RADIATION_DOWN_NET' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('RADIATION_DOWN_NET', netcdf_file_obj)
-
-    if 'fluorescence' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.renameVariable('fluorescence', 'CPHL')
-        netcdf_file_obj.variables['CPHL'].long_name = 'mass_concentration_of_inferred_chlorophyll_from_relative_fluorescence_units_in_sea_water_concentration_of_chlorophyll_in_sea_water'
-        if 'fluorescence_quality_control' in netcdf_file_obj.variables.keys():
-            netcdf_file_obj.renameVariable('fluorescence_quality_control', 'CPHL_quality_control')
-            netcdf_file_obj.variables['CPHL_quality_control'].long_name = 'mass_concentration_of_inferred_chlorophyll_from_relative_fluorescence_units_in_sea_waterconcentration_of_chlorophyll_in_sea_water status_flag'
-        clean_no_cf_variables('CPHL', netcdf_file_obj)
-
-    if 'WDIR_10min' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['WDIR_10min'].units = 'degree'
-
-    if 'WDIR_30min' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['WDIR_30min'].units = 'degree'
-
-    if 'R_sigma_30min' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['R_sigma_30min'].units = 'degree'
-        clean_no_cf_variables('R_sigma_30min', netcdf_file_obj)
-
-    if 'WDIR_sigma_10min' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['WDIR_sigma_10min'].units = 'degree'
-        clean_no_cf_variables('WDIR_sigma_10min', netcdf_file_obj)
-
-    if 'WDIR_sigma_30min' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['WDIR_sigma_30min'].units = 'degree'
-        clean_no_cf_variables('WDIR_sigma_30min', netcdf_file_obj)
-
-    if 'ATMP' in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables['ATMP'].units = 'hPa'
-
-    if 'RAIN_DURATION' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('RAIN_DURATION', netcdf_file_obj)
-
-    if 'HAIL_DURATION' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('HAIL_DURATION', netcdf_file_obj)
-
-    if 'HAIL_HIT' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('HAIL_HIT', netcdf_file_obj)
-        netcdf_file_obj.variables['HAIL_HIT'].comment = netcdf_file_obj.variables['HAIL_HIT'].units
-        netcdf_file_obj.variables['HAIL_HIT'].units = '1'
-
-    if 'HAIL_INTENSITY_10min' in netcdf_file_obj.variables.keys():
-        clean_no_cf_variables('HAIL_INTENSITY_10min', netcdf_file_obj)
-        netcdf_file_obj.variables['HAIL_INTENSITY_10min'].comment = netcdf_file_obj.variables['HAIL_INTENSITY_10min'].units
-        netcdf_file_obj.variables['HAIL_INTENSITY_10min'].units = '1'
+    if "Dissolved_Oxygen_Percent" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("Dissolved_Oxygen_Percent", netcdf_file_obj)
+
+    if "ErrorVelocity" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("ErrorVelocity", netcdf_file_obj)
+        netcdf_file_obj.variables["ErrorVelocity"].long_name = "error_velocity"
+
+    if "Average_Compass_Heading" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("Average_Compass_Heading", netcdf_file_obj)
+        var = netcdf_file_obj.variables["Average_Compass_Heading"]
+        var.units = "degree"
+
+    if "Upwelling_longwave_radiation" in netcdf_file_obj.variables.keys():
+        var_str = "Upwelling_longwave_radiation"
+        var_qc_str = "%s_quality_control" % var_str
+        var = netcdf_file_obj.variables[var_str]
+        var_qc = netcdf_file_obj.variables[var_qc_str]
+        var.units = "W m-2"
+        var.standard_name = "upwelling_longwave_flux_in_air"
+        var_qc.standard_name = "upwelling_longwave_flux_in_air status_flag"
+
+    if "Downwelling_longwave_radiation" in netcdf_file_obj.variables.keys():
+        var_str = "Downwelling_longwave_radiation"
+        var_qc_str = "%s_quality_control" % var_str
+        var = netcdf_file_obj.variables[var_str]
+        var_qc = netcdf_file_obj.variables[var_qc_str]
+        var.units = "W m-2"
+        var.standard_name = "downwelling_longwave_flux_in_air"
+        var_qc.standard_name = "downwelling_longwave_flux_in_air status_flag"
+
+    if "UP_TOT_RADIATION" in netcdf_file_obj.variables.keys():
+        var_str = "UP_TOT_RADIATION"
+        var_qc_str = "%s_quality_control" % var_str
+        var = netcdf_file_obj.variables[var_str]
+        var_qc = netcdf_file_obj.variables[var_qc_str]
+        var.units = "W m-2"
+        var.standard_name = "upwelling_longwave_flux_in_air"
+        var_qc.standard_name = "upwelling_longwave_flux_in_air status_flag"
+
+    if "DOWN_TOT_RADIATION" in netcdf_file_obj.variables.keys():
+        var_str = "DOWN_TOT_RADIATION"
+        var_qc_str = "%s_quality_control" % var_str
+        var = netcdf_file_obj.variables[var_str]
+        var_qc = netcdf_file_obj.variables[var_qc_str]
+        var.units = "W m-2"
+        var.standard_name = "downwelling_longwave_flux_in_air"
+        var_qc.standard_name = "downwelling_longwave_flux_in_air status_flag"
+
+    if "RADIATION_DOWN_NET" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("RADIATION_DOWN_NET", netcdf_file_obj)
+
+    if "fluorescence" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.renameVariable("fluorescence", "CPHL")
+        netcdf_file_obj.variables[
+            "CPHL"
+        ].long_name = "mass_concentration_of_inferred_chlorophyll_from_relative_fluorescence_units_in_sea_water_concentration_of_chlorophyll_in_sea_water"
+        if "fluorescence_quality_control" in netcdf_file_obj.variables.keys():
+            netcdf_file_obj.renameVariable(
+                "fluorescence_quality_control", "CPHL_quality_control"
+            )
+            netcdf_file_obj.variables[
+                "CPHL_quality_control"
+            ].long_name = "mass_concentration_of_inferred_chlorophyll_from_relative_fluorescence_units_in_sea_waterconcentration_of_chlorophyll_in_sea_water status_flag"
+        clean_no_cf_variables("CPHL", netcdf_file_obj)
+
+    if "WDIR_10min" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["WDIR_10min"].units = "degree"
+
+    if "WDIR_30min" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["WDIR_30min"].units = "degree"
+
+    if "R_sigma_30min" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["R_sigma_30min"].units = "degree"
+        clean_no_cf_variables("R_sigma_30min", netcdf_file_obj)
+
+    if "WDIR_sigma_10min" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["WDIR_sigma_10min"].units = "degree"
+        clean_no_cf_variables("WDIR_sigma_10min", netcdf_file_obj)
+
+    if "WDIR_sigma_30min" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["WDIR_sigma_30min"].units = "degree"
+        clean_no_cf_variables("WDIR_sigma_30min", netcdf_file_obj)
+
+    if "ATMP" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables["ATMP"].units = "hPa"
+
+    if "RAIN_DURATION" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("RAIN_DURATION", netcdf_file_obj)
+
+    if "HAIL_DURATION" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("HAIL_DURATION", netcdf_file_obj)
+
+    if "HAIL_HIT" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("HAIL_HIT", netcdf_file_obj)
+        netcdf_file_obj.variables["HAIL_HIT"].comment = netcdf_file_obj.variables[
+            "HAIL_HIT"
+        ].units
+        netcdf_file_obj.variables["HAIL_HIT"].units = "1"
+
+    if "HAIL_INTENSITY_10min" in netcdf_file_obj.variables.keys():
+        clean_no_cf_variables("HAIL_INTENSITY_10min", netcdf_file_obj)
+        netcdf_file_obj.variables[
+            "HAIL_INTENSITY_10min"
+        ].comment = netcdf_file_obj.variables["HAIL_INTENSITY_10min"].units
+        netcdf_file_obj.variables["HAIL_INTENSITY_10min"].units = "1"
 
     # add qc conventions to qc vars
     variables = netcdf_file_obj.variables.keys()
-    qc_vars = [s for s in variables if '_quality_control' in s]
+    qc_vars = [s for s in variables if "_quality_control" in s]
     if qc_vars != []:
         for var in qc_vars:
-            netcdf_file_obj.variables[var].quality_control_conventions = imos_qc_convention
+            netcdf_file_obj.variables[
+                var
+            ].quality_control_conventions = imos_qc_convention
 
     # clean longnames, force lower case, remove space, remove double underscore
     for var in variables:
-        if hasattr(netcdf_file_obj.variables[var], 'long_name'):
-            netcdf_file_obj.variables[var].long_name = netcdf_file_obj.variables[var].long_name.replace('__', '_')
-            netcdf_file_obj.variables[var].long_name = netcdf_file_obj.variables[var].long_name.replace(' _', '_')
-            netcdf_file_obj.variables[var].long_name = netcdf_file_obj.variables[var].long_name.lower()
+        if hasattr(netcdf_file_obj.variables[var], "long_name"):
+            netcdf_file_obj.variables[var].long_name = netcdf_file_obj.variables[
+                var
+            ].long_name.replace("__", "_")
+            netcdf_file_obj.variables[var].long_name = netcdf_file_obj.variables[
+                var
+            ].long_name.replace(" _", "_")
+            netcdf_file_obj.variables[var].long_name = netcdf_file_obj.variables[
+                var
+            ].long_name.lower()
 
     netcdf_file_obj.close()
 
 
 def fix_provider_code_from_filename(netcdf_file_path, imos_facility_code):
-    new_filename = re.sub('AIMS_', ('%s_' % imos_facility_code), netcdf_file_path)
+    new_filename = re.sub("AIMS_", ("%s_" % imos_facility_code), netcdf_file_path)
     shutil.move(netcdf_file_path, new_filename)
     return new_filename
 
 
 def fix_data_code_from_filename(netcdf_file_path):
-    """ Some filename are badly written.
+    """Some filename are badly written.
     this function has to run after modifying the file to make it CF and IMOS compliant
     It physically renames the filename if needed
     """
 
-    netcdf_file_obj = Dataset(netcdf_file_path, 'r', format='NETCDF4')
-    if 'CDIR' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_CDIR_', '_V_', netcdf_file_path)
+    netcdf_file_obj = Dataset(netcdf_file_path, "r", format="NETCDF4")
+    if "CDIR" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_CDIR_", "_V_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'CSPD' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_CSPD_', '_V_', netcdf_file_path)
+    if "CSPD" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_CSPD_", "_V_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'DOX1' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_Dissolved_O2_\(mole\)_', '_K_', netcdf_file_path)
+    if "DOX1" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_Dissolved_O2_\(mole\)_", "_K_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'DEPTH' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_DEPTH_', '_Z_', netcdf_file_path)
+    if "DEPTH" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_DEPTH_", "_Z_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'Dissolved_Oxygen_Percent' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_DO_%_', '_O_', netcdf_file_path)
+    if "Dissolved_Oxygen_Percent" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_DO_%_", "_O_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'ErrorVelocity' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_ErrorVelocity_', '_V_', netcdf_file_path)
+    if "ErrorVelocity" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_ErrorVelocity_", "_V_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'Average_Compass_Heading' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_Average_Compass_Heading_', '_E_', netcdf_file_path)
+    if "Average_Compass_Heading" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_Average_Compass_Heading_", "_E_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'Upwelling_longwave_radiation' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_Upwelling_longwave_radiation_', '_F_', netcdf_file_path)
+    if "Upwelling_longwave_radiation" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub("_Upwelling_longwave_radiation_", "_F_", netcdf_file_path)
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
 
-    if 'Downwelling_longwave_radiation' in netcdf_file_obj.variables.keys():
-        new_filename = re.sub('_Downwelling_longwave_radiation_', '_F_', netcdf_file_path)
+    if "Downwelling_longwave_radiation" in netcdf_file_obj.variables.keys():
+        new_filename = re.sub(
+            "_Downwelling_longwave_radiation_", "_F_", netcdf_file_path
+        )
         netcdf_file_obj.close()
         shutil.move(netcdf_file_path, new_filename)
         return new_filename
@@ -848,49 +947,57 @@ def fix_data_code_from_filename(netcdf_file_path):
 
 
 def has_var_only_fill_value(netcdf_file_path, var):
-    """ some channels have only _Fillvalues in their main variable. This is not correct and need
+    """some channels have only _Fillvalues in their main variable. This is not correct and need
     to be tested
     var is a string of the variable to test
     """
-    netcdf_file_obj = Dataset(netcdf_file_path, 'r', format='NETCDF4')
-    var_obj         = netcdf_file_obj.variables[var]
-    var_values      = var_obj[:]
+    netcdf_file_obj = Dataset(netcdf_file_path, "r", format="NETCDF4")
+    var_obj = netcdf_file_obj.variables[var]
+    var_values = var_obj[:]
     netcdf_file_obj.close()
 
     # if no fill value in variable, no mask attribute
-    if hasattr(var_values, 'mask'):
+    if hasattr(var_values, "mask"):
         return var_values.mask.all()
     else:
         return False
 
 
 def remove_dimension_from_netcdf(netcdf_file_path):
-    """ DIRTY, calling bash. need to write in Python, or part of the NetCDF4 module
+    """DIRTY, calling bash. need to write in Python, or part of the NetCDF4 module
     need to remove the 'single' dimension name from DEPTH or other dim. Unfortunately can't seem to find a way to do it easily with netCDF4 module
     """
     fd, tmp_file = tempfile.mkstemp()
     os.close(fd)
 
-    subprocess.check_call(['ncwa', '-O', '-a', 'single', netcdf_file_path, tmp_file])
-    subprocess.check_call(['ncatted', '-O', '-a', 'cell_methods,,d,,', tmp_file, tmp_file])
+    subprocess.check_call(["ncwa", "-O", "-a", "single", netcdf_file_path, tmp_file])
+    subprocess.check_call(
+        ["ncatted", "-O", "-a", "cell_methods,,d,,", tmp_file, tmp_file]
+    )
     shutil.move(tmp_file, netcdf_file_path)
 
 
 def remove_end_date_from_filename(netcdf_filename):
-    """ remove the _END-* part of the file, as we download monthly file. This helps
+    """remove the _END-* part of the file, as we download monthly file. This helps
     to overwrite file with new data for the same month
     """
-    return re.sub('_END-.*$', '.nc', netcdf_filename)
+    return re.sub("_END-.*$", ".nc", netcdf_filename)
 
 
 def rm_tmp_dir(data_wip_path):
-    """ remove temporary directories older than 15 days from data_wip path"""
+    """remove temporary directories older than 15 days from data_wip path"""
     for dir_path in os.listdir(data_wip_path):
-        if dir_path.startswith('manifest_dir_tmp_'):
-            file_date = datetime.datetime.strptime(dir_path.split('_')[-1], '%Y%m%d%H%M%S')
+        if dir_path.startswith("manifest_dir_tmp_"):
+            file_date = datetime.datetime.strptime(
+                dir_path.split("_")[-1], "%Y%m%d%H%M%S"
+            )
             if (datetime.datetime.now() - file_date).days > 15:
                 logger = logging.getLogger(__name__)
-                logger.info('DELETE old temporary folder {path}'.format(path=os.path.join(data_wip_path, dir_path)))
+                logger.info(
+                    "DELETE old temporary folder {path}".format(
+                        path=os.path.join(data_wip_path, dir_path)
+                    )
+                )
                 shutil.rmtree(os.path.join(data_wip_path, dir_path))
 
 
@@ -898,7 +1005,7 @@ def set_up():
     """
     set up wip facility directories
     """
-    wip_path = os.environ.get('data_wip_path')
+    wip_path = os.environ.get("data_wip_path")
 
     # this is used for unit testing as data_wip_path env would not be set
     if wip_path is None:
@@ -906,11 +1013,11 @@ def set_up():
 
     if not wip_path:
         logger = logging.getLogger(__name__)
-        logger.error('env data_wip_path not defined')
+        logger.error("env data_wip_path not defined")
         exit(1)
 
     if not os.path.exists(wip_path):
         os.makedirs(wip_path)
 
-    if not os.path.exists(os.path.join(wip_path, 'errors')):
-        os.makedirs(os.path.join(wip_path, 'errors'))
+    if not os.path.exists(os.path.join(wip_path, "errors")):
+        os.makedirs(os.path.join(wip_path, "errors"))

From d995da6458fcb66f9a08972bdf6c6a424aa6cb7f Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Tue, 27 Jan 2026 16:08:13 +1100
Subject: [PATCH 2/8] Fix: delete properly tmp dirs

---
 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py     | 178 ++++---
 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new | 497 ++++++++++++++++++++
 2 files changed, 600 insertions(+), 75 deletions(-)
 create mode 100755 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new

diff --git a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
index 2eb53e75..ba90f2a0 100755
--- a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
+++ b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
@@ -30,15 +30,13 @@
 import datetime
 import logging
 import os
-import sys
 import re
 import shutil
+import sys
 import traceback
 import unittest as data_validation_test
-
-from netCDF4 import Dataset
 from itertools import groupby
-from tendo import singleton
+from pathlib import Path
 
 from aims_realtime_util import (
     convert_time_cf_to_imos,
@@ -46,23 +44,25 @@
     download_channel,
     fix_data_code_from_filename,
     fix_provider_code_from_filename,
+    get_main_netcdf_var,
     has_var_only_fill_value,
     is_no_data_found,
     is_time_monotonic,
     is_time_var_empty,
+    list_recursively_files_abs_path,
     logging_aims,
     md5,
     modify_aims_netcdf,
     parse_aims_xml,
     remove_dimension_from_netcdf,
     remove_end_date_from_filename,
+    rm_tmp_dir,
     save_channel_info,
     set_up,
-    rm_tmp_dir,
-    get_main_netcdf_var,
-    list_recursively_files_abs_path,
 )
 from dest_path import get_anmn_nrs_site_name
+from netCDF4 import Dataset
+from tendo import singleton
 from util import pass_netcdf_checker
 
 MD5_EXPECTED_VALUE = "a6207e053f1cc0e00d171701f0cdb186"
@@ -79,88 +79,116 @@
 
 
 def modify_anmn_nrs_netcdf(netcdf_file_path, channel_id_info):
-    """Modify the downloaded netCDF file so it passes both CF and IMOS checker
-    input:
-       netcdf_file_path(str)    : path of netcdf file to modify
-       channel_id_index(tupple) : information from xml for the channel
     """
+    Refines ANMN NRS specific metadata and coordinate variables.
+    """
+    # First pass: Generic AIMS modifications
     modify_aims_netcdf(netcdf_file_path, channel_id_info)
 
-    netcdf_file_obj = Dataset(netcdf_file_path, "a", format="NETCDF4")
-    netcdf_file_obj.aims_channel_id = int(channel_id_info["channel_id"])
-
-    if "Yongala" in channel_id_info["site_name"]:
-        netcdf_file_obj.site_code = "NRSYON"
-        netcdf_file_obj.platform_code = "Yongala NRS Buoy"
-    elif "Darwin" in channel_id_info["site_name"]:
-        netcdf_file_obj.site_code = "NRSDAR"
-        netcdf_file_obj.platform_code = "Darwin NRS Buoy"
-    elif "Beagle" in channel_id_info["site_name"]:
-        netcdf_file_obj.site_code = "DARBGF"
-        netcdf_file_obj.platform_code = "Beagle Gulf Mooring"
-    else:
-        return False
-
-    if not (channel_id_info["metadata_uuid"] == "Not Available"):
-        netcdf_file_obj.metadata_uuid = channel_id_info["metadata_uuid"]
-
-    # some weather stations channels don't have a depth variable if sensor above water
-    if "depth" in netcdf_file_obj.variables.keys():
-        var = netcdf_file_obj.variables["depth"]
-        var.long_name = "nominal depth"
-        var.positive = "down"
-        var.axis = "Z"
-        var.reference_datum = "sea surface"
-        var.valid_min = -10.0
-        var.valid_max = 30.0
-        var.units = "m"  # some channels put degrees celcius instead ...
-        netcdf_file_obj.renameVariable("depth", "NOMINAL_DEPTH")
-
-    if "DEPTH" in netcdf_file_obj.variables.keys():
-        var = netcdf_file_obj.variables["DEPTH"]
-        var.coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
-        var.long_name = "actual depth"
-        var.reference_datum = "sea surface"
-        var.positive = "down"
-        var.valid_min = -10.0
-        var.valid_max = 30.0
-        var.units = "m"  # some channels put degrees celcius instead ...
-
-    netcdf_file_obj.close()
-    netcdf_file_obj = Dataset(
-        netcdf_file_path, "a", format="NETCDF4"
-    )  # need to close to save to file. as we call get_main_var just after
+    # Site and Platform Mapping (The Dictionary approach)
+    site_map = {
+        "Yongala": ("NRSYON", "Yongala NRS Buoy"),
+        "Darwin": ("NRSDAR", "Darwin NRS Buoy"),
+        "Beagle": ("DARBGF", "Beagle Gulf Mooring"),
+    }
+
+    site_name = channel_id_info.get("site_name", "")
+    site_data = next((v for k, v in site_map.items() if k in site_name), None)
+
+    if not site_data:
+        return False  # Site not recognised
+
+    with Dataset(netcdf_file_path, "a") as nc:
+        nc.site_code, nc.platform_code = site_data
+        nc.aims_channel_id = int(channel_id_info["channel_id"])
+
+        if channel_id_info.get("metadata_uuid") != "Not Available":
+            nc.metadata_uuid = channel_id_info["metadata_uuid"]
+
+        # Depth Variable Attributes (Common configurations)
+        depth_attrs = {
+            "positive": "down",
+            "axis": "Z",
+            "reference_datum": "sea surface",
+            "valid_min": -10.0,
+            "valid_max": 30.0,
+            "units": "m",
+        }
+
+        # Handle 'depth'
+        if "depth" in nc.variables:
+            var = nc.variables["depth"]
+            for k, v in depth_attrs.items():
+                setattr(var, k, v)
+            var.long_name = "nominal depth"
+            nc.renameVariable("depth", "NOMINAL_DEPTH")
+
+        # Handle 'DEPTH' (actual depth)
+        if "DEPTH" in nc.variables:
+            var = nc.variables["DEPTH"]
+            # Standard depth attributes plus coordinates
+            for k, v in depth_attrs.items():
+                setattr(var, k, v)
+            var.long_name = "actual depth"
+            var.coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
+
+    # Coordinate String Assignment
+    # We close the file above so that the next functions see the changes
     main_var = get_main_netcdf_var(netcdf_file_path)
-    # DEPTH, LATITUDE and LONGITUDE are not dimensions, so we make them into auxiliary cooordinate variables by adding this attribute
-    if "NOMINAL_DEPTH" in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables[
-            main_var
-        ].coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
-    else:
-        netcdf_file_obj.variables[main_var].coordinates = "TIME LATITUDE LONGITUDE"
 
-    netcdf_file_obj.close()
+    with Dataset(netcdf_file_path, "a") as nc:
+        if main_var in nc.variables:
+            coords = "TIME LATITUDE LONGITUDE"
+            if "NOMINAL_DEPTH" in nc.variables:
+                coords += " NOMINAL_DEPTH"
+            nc.variables[main_var].coordinates = coords
 
+    # Final transformations
     if not convert_time_cf_to_imos(netcdf_file_path):
         return False
 
-    remove_dimension_from_netcdf(
-        netcdf_file_path
-    )  # last modification to do in this order!
+    # This MUST be last as it reshapes the file
+    remove_dimension_from_netcdf(netcdf_file_path)
+
     return True
 
 
 def move_to_tmp_incoming(netcdf_path):
-    # [org_filename withouth creation date].[md5].nc to have unique filename in
-    new_filename = "%s.%s.nc" % (
-        os.path.splitext(os.path.basename(remove_end_date_from_filename(netcdf_path)))[
-            0
-        ],
-        md5(netcdf_path),
-    )
+    """
+    Renames the NetCDF to include its MD5 hash, moves it to the manifest directory,
+    and cleans up the now-empty source directory.
+    """
+    logger = logging.getLogger(__name__)
+    # Convert to Path object for easier manipulation
+    source_file = Path(netcdf_path)
+    source_dir = source_file.parent
+
+    # Construct the new filename: [name_without_date].[md5].nc
+    # remove_end_date_from_filename returns a string, so we wrap it in Path
+    name_no_date = Path(remove_end_date_from_filename(str(source_file))).stem
+    file_hash = md5(str(source_file))
+    new_filename = f"{name_no_date}.{file_hash}.nc"
 
-    os.chmod(netcdf_path, 0o0664)  # change to 664 for pipeline v2
-    shutil.move(netcdf_path, os.path.join(TMP_MANIFEST_DIR, new_filename))
+    destination = Path(TMP_MANIFEST_DIR) / new_filename
+
+    try:
+        # Apply permissions (664)
+        source_file.chmod(0o664)
+
+        # Perform the move
+        shutil.move(str(source_file), str(destination))
+        logger.info(f"Moved {source_file.name} to {destination}")
+
+        # Cleanup: Delete the source directory if it is now empty
+        try:
+            source_dir.rmdir()
+            logger.debug(f"Cleaned up empty directory: {source_dir}")
+        except OSError:
+            logger.debug(f"Source directory not empty; skipping cleanup: {source_dir}")
+
+    except Exception as e:
+        logger.error(f"Failed to move {source_file} to incoming: {e}")
+        raise
 
 
 def process_monthly_channel(channel_id, aims_xml_info, level_qc):
diff --git a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new
new file mode 100755
index 00000000..24143f80
--- /dev/null
+++ b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new
@@ -0,0 +1,497 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Download ANMN NRS data from AIMS Web Service for Darwin, Yongala and Beagle
+The script reads an XML file provided by AIMS and looks for channels with
+new data to download. It compares this list with a pickle file (pythonic
+way to store python variables) containing what has already been downloaded
+in the previous run of this script.
+Some modifications on the files have to be done so they comply with CF and
+IMOS conventions.
+The IOOS compliance checker is used to check if the first downloaded file of
+a channel complies once modified. If not, the download of the rest of the
+channel is aborted until some modification on the source code is done so
+the channel can pass the checker.
+Files which don't pass the checker will land in os.path.join(wip_path, 'errors')
+for investigation. No need to reprocess them as they will be redownloaded on
+next run until they end up passing the checker. Files in the 'errors' dir can be
+removed at anytime
+
+IMPORTANT:
+is it essential to look at the logging os.path.join(wip_path, 'aims.log')
+to know which channels have problems and why as most of the time, AIMS will
+have to be contacted to sort out issues.
+
+
+author Laurent Besnard, laurent.besnard@utas.edu.au
+"""
+
+import argparse
+import datetime
+import logging
+import os
+import re
+import shutil
+import sys
+import traceback
+import unittest as data_validation_test
+from itertools import groupby
+from pathlib import Path
+
+from aims_realtime_util import (
+    convert_time_cf_to_imos,
+    create_list_of_dates_to_download,
+    download_channel,
+    fix_data_code_from_filename,
+    fix_provider_code_from_filename,
+    get_main_netcdf_var,
+    has_var_only_fill_value,
+    is_no_data_found,
+    is_time_monotonic,
+    is_time_var_empty,
+    list_recursively_files_abs_path,
+    logging_aims,
+    md5,
+    modify_aims_netcdf,
+    parse_aims_xml,
+    remove_dimension_from_netcdf,
+    remove_end_date_from_filename,
+    rm_tmp_dir,
+    save_channel_info,
+    set_up,
+)
+from dest_path import get_anmn_nrs_site_name
+from netCDF4 import Dataset
+from tendo import singleton
+from util import pass_netcdf_checker
+
+DATA_WIP_PATH = os.path.join(
+    os.environ.get("WIP_DIR"),
+    "ANMN",
+    "NRS_AIMS_Darwin_Yongala_data_rss_download_temporary",
+)
+ANMN_NRS_INCOMING_DIR = os.path.join(
+    os.environ.get("INCOMING_DIR"), "AODN", "ANMN_NRS_DAR_YON"
+)
+ANMN_NRS_ERROR_DIR = os.path.join(os.environ["ERROR_DIR"], "ANMN_NRS_DAR_YON")
+
+
+def modify_anmn_nrs_netcdf(netcdf_file_path, channel_id_info):
+    """Modify the downloaded netCDF file so it passes both CF and IMOS checker
+    input:
+       netcdf_file_path(str)    : path of netcdf file to modify
+       channel_id_index(tupple) : information from xml for the channel
+    """
+    modify_aims_netcdf(netcdf_file_path, channel_id_info)
+
+    netcdf_file_obj = Dataset(netcdf_file_path, "a", format="NETCDF4")
+    netcdf_file_obj.aims_channel_id = int(channel_id_info["channel_id"])
+
+    if "Yongala" in channel_id_info["site_name"]:
+        netcdf_file_obj.site_code = "NRSYON"
+        netcdf_file_obj.platform_code = "Yongala NRS Buoy"
+    elif "Darwin" in channel_id_info["site_name"]:
+        netcdf_file_obj.site_code = "NRSDAR"
+        netcdf_file_obj.platform_code = "Darwin NRS Buoy"
+    elif "Beagle" in channel_id_info["site_name"]:
+        netcdf_file_obj.site_code = "DARBGF"
+        netcdf_file_obj.platform_code = "Beagle Gulf Mooring"
+    else:
+        return False
+
+    if not (channel_id_info["metadata_uuid"] == "Not Available"):
+        netcdf_file_obj.metadata_uuid = channel_id_info["metadata_uuid"]
+
+    # some weather stations channels don't have a depth variable if sensor above water
+    if "depth" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["depth"]
+        var.long_name = "nominal depth"
+        var.positive = "down"
+        var.axis = "Z"
+        var.reference_datum = "sea surface"
+        var.valid_min = -10.0
+        var.valid_max = 30.0
+        var.units = "m"  # some channels put degrees celcius instead ...
+        netcdf_file_obj.renameVariable("depth", "NOMINAL_DEPTH")
+
+    if "DEPTH" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["DEPTH"]
+        var.coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
+        var.long_name = "actual depth"
+        var.reference_datum = "sea surface"
+        var.positive = "down"
+        var.valid_min = -10.0
+        var.valid_max = 30.0
+        var.units = "m"  # some channels put degrees celcius instead ...
+
+    netcdf_file_obj.close()
+    netcdf_file_obj = Dataset(
+        netcdf_file_path, "a", format="NETCDF4"
+    )  # need to close to save to file. as we call get_main_var just after
+    main_var = get_main_netcdf_var(netcdf_file_path)
+    # DEPTH, LATITUDE and LONGITUDE are not dimensions, so we make them into auxiliary cooordinate variables by adding this attribute
+    if "NOMINAL_DEPTH" in netcdf_file_obj.variables.keys():
+        netcdf_file_obj.variables[
+            main_var
+        ].coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
+    else:
+        netcdf_file_obj.variables[main_var].coordinates = "TIME LATITUDE LONGITUDE"
+
+    netcdf_file_obj.close()
+
+    if not convert_time_cf_to_imos(netcdf_file_path):
+        return False
+
+    remove_dimension_from_netcdf(
+        netcdf_file_path
+    )  # last modification to do in this order!
+    return True
+
+
+def move_to_tmp_incoming(netcdf_path):
+    """
+    Renames the NetCDF to include its MD5 hash, moves it to the manifest directory,
+    and cleans up the now-empty source directory.
+    """
+    logger = logging.getLogger(__name__)
+    # Convert to Path object for easier manipulation
+    source_file = Path(netcdf_path)
+    source_dir = source_file.parent
+
+    # Construct the new filename: [name_without_date].[md5].nc
+    # remove_end_date_from_filename returns a string, so we wrap it in Path
+    name_no_date = Path(remove_end_date_from_filename(str(source_file))).stem
+    file_hash = md5(str(source_file))
+    new_filename = f"{name_no_date}.{file_hash}.nc"
+
+    destination = Path(TMP_MANIFEST_DIR) / new_filename
+
+    try:
+        # Apply permissions (664)
+        source_file.chmod(0o664)
+
+        # Perform the move
+        shutil.move(str(source_file), str(destination))
+        logger.info(f"Moved {source_file.name} to {destination}")
+
+        # Cleanup: Delete the source directory if it is now empty
+        try:
+            source_dir.rmdir()
+            logger.debug(f"Cleaned up empty directory: {source_dir}")
+        except OSError:
+            logger.debug(f"Source directory not empty; skipping cleanup: {source_dir}")
+
+    except Exception as e:
+        logger.error(f"Failed to move {source_file} to incoming: {e}")
+        raise
+
+
+def process_monthly_channel(channel_id, aims_xml_info, level_qc):
+    """
+    Downloads all the data available for one channel_id and moves the file to a wip_path dir
+
+    aims_service : 1   -> FAIMMS data
+                   100 -> SOOP TRV data
+                   300 -> NRS DATA
+    for monthly data download, only 1 and 300 should be use
+    """
+    contact_aims_msg = "Process of channel aborted - CONTACT AIMS"
+    wip_path = Path(os.environ.get("data_wip_path", ""))
+
+    logger.info(f"QC{level_qc} - Processing channel {channel_id}")
+
+    channel_id_info = aims_xml_info[channel_id]
+    from_date = channel_id_info["from_date"]
+    thru_date = channel_id_info["thru_date"]
+
+    # [start_dates, end_dates] generation
+    start_dates, end_dates = create_list_of_dates_to_download(
+        channel_id, level_qc, from_date, thru_date
+    )
+
+    if not start_dates:
+        logger.info(f"QC{level_qc} - Channel {channel_id}: already up to date")
+        return
+
+    # download monthly file
+    for start_dt, end_dt in zip(start_dates, end_dates):
+        start_date = start_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+        end_date = end_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+        netcdf_tmp_file_path = download_channel(
+            channel_id, start_date, end_date, level_qc
+        )
+
+        if netcdf_tmp_file_path is None:
+            logger.error(
+                f"   Channel {channel_id} - not valid zip file - {contact_aims_msg}"
+            )
+            break
+
+        tmp_dir = Path(netcdf_tmp_file_path).parent
+
+        # NO_DATA_FOUND file only means there is no data for the selected time period.
+        # Could be some data afterwards
+        if is_no_data_found(netcdf_tmp_file_path):
+            logger.info(
+                f"Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]"
+            )
+            shutil.rmtree(tmp_dir)
+            continue  # Move to next month
+
+        # Start of validation sequence
+        error_occurred = False
+
+        if is_time_var_empty(netcdf_tmp_file_path):
+            logger.error(
+                f"Channel {channel_id}: No values in TIME variable - {contact_aims_msg}"
+            )
+            error_occurred = True
+
+        elif not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info):
+            logger.error(
+                f"Channel {channel_id}: Could not modify the NetCDF file - Process of channel aborted"
+            )
+            error_occurred = True
+
+        else:
+            main_var = get_main_netcdf_var(netcdf_tmp_file_path)
+            if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
+                logger.error(
+                    f"Channel {channel_id}: _Fillvalues only in main variable - {contact_aims_msg}"
+                )
+                error_occurred = True
+            elif not get_anmn_nrs_site_name(netcdf_tmp_file_path):
+                logger.error(
+                    f"Channel {channel_id}: Unknown site_code gatt value - {contact_aims_msg}"
+                )
+                error_occurred = True
+            elif not is_time_monotonic(netcdf_tmp_file_path):
+                logger.error(
+                    f"Channel {channel_id}: TIME value is not strictly monotonic - {contact_aims_msg}"
+                )
+                error_occurred = True
+
+        if error_occurred:
+            shutil.rmtree(tmp_dir)
+            break
+
+        # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
+        if not pass_netcdf_checker(netcdf_tmp_file_path, tests=["cf:1.6", "imos:1.3"]):
+            logger.error(
+                f"Channel {channel_id}: File does not pass CF/IMOS compliance checker - Process of channel aborted"
+            )
+
+            err_dest = wip_path / "errors" / os.path.basename(netcdf_tmp_file_path)
+            shutil.copy(netcdf_tmp_file_path, err_dest)
+
+            logger.error(f"File copied to {err_dest} for debugging")
+            shutil.rmtree(tmp_dir)
+            break
+
+        netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
+        netcdf_tmp_file_path = fix_provider_code_from_filename(
+            netcdf_tmp_file_path, "IMOS_ANMN"
+        )
+
+        if not re.search(r"IMOS_ANMN_[A-Z]{1}_", netcdf_tmp_file_path):
+            logger.error(
+                f"   Channel {channel_id} - File name Data code does not pass REGEX - Process of channel aborted"
+            )
+
+            err_dest = wip_path / "errors" / os.path.basename(netcdf_tmp_file_path)
+            shutil.copy(netcdf_tmp_file_path, err_dest)
+
+            logger.error(f"   File copied to {err_dest} for debugging")
+            shutil.rmtree(tmp_dir)
+            break
+
+        move_to_tmp_incoming(netcdf_tmp_file_path)
+
+        # Update tracking
+        save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
+
+        if TESTING:
+            # The 2 next lines download the first month only for every single channel.
+            # This is only used for testing
+            # Note: save_channel_info already called above
+            break
+
+
+def process_qc_level(level_qc):
+    """Downloads all channels for a QC level
+    level_qc(int) : 0 or 1
+    """
+
+    logger.info(
+        "Process ANMN NRS download from AIMS web service - QC level {level_qc}".format(
+            level_qc=level_qc
+        )
+    )
+    xml_url = "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level{level_qc}/300".format(
+        level_qc=level_qc
+    )
+    try:
+        aims_xml_info = parse_aims_xml(xml_url)
+    except Exception as err:
+        logger.critical("RSS feed not available")
+        exit(1)
+
+    for channel_id in aims_xml_info.keys():
+        try:
+            process_monthly_channel(channel_id, aims_xml_info, level_qc)
+        except Exception as err:
+            logger.error(
+                "QC{qc_level} - Channel {channel_id}: Failed, unknown reason - manual debug required".format(
+                    channel_id=str(channel_id), qc_level=str(level_qc)
+                )
+            )
+            logger.error(traceback.print_exc())
+
+
+class AimsDataValidationTest(data_validation_test.TestCase):
+    def setUp(self):
+        """Check that a the AIMS system or this script hasn't been modified.
+        This function checks that a downloaded file still has the same md5.
+        """
+        channel_id = "84329"
+        from_date = "2016-01-01T00:00:00Z"
+        thru_date = "2016-01-02T00:00:00Z"
+        level_qc = 1
+        aims_rss_val = 300
+        xml_url = (
+            "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level%s/%s"
+            % (str(level_qc), str(aims_rss_val))
+        )
+
+        logger.info("Data validation unittests...")
+        aims_xml_info = parse_aims_xml(xml_url)
+        channel_id_info = aims_xml_info[channel_id]
+        self.netcdf_tmp_file_path = download_channel(
+            channel_id, from_date, thru_date, level_qc
+        )
+        modify_anmn_nrs_netcdf(self.netcdf_tmp_file_path, channel_id_info)
+        EPOCH_ISO = "1970-01-01T00:00:00Z"
+
+        netcdf_path = Path(self.netcdf_tmp_file_path)
+
+        with Dataset(netcdf_path, mode="a", format="NETCDF4") as nc:
+            # force values of attributes which change all the time
+            nc.date_created = EPOCH_ISO
+            nc.history = "data validation test only"
+            nc.NCO = "NCO_VERSION"
+
+    def tearDown(self):
+        shutil.copy(
+            self.netcdf_tmp_file_path,
+            os.path.join(
+                os.environ["data_wip_path"], "nc_unittest_%s.nc" % self.md5_netcdf_value
+            ),
+        )
+        shutil.rmtree(os.path.dirname(self.netcdf_tmp_file_path))
+
+    def test_aims_validation(self):
+        if sys.version_info[0] < 3:
+            self.md5_expected_value = "76c9a595264a8173545b6dc0c518a280"
+        else:
+            self.md5_expected_value = "1bb65266f8e526ed2087904ae024e33d"
+
+        self.md5_netcdf_value = md5(self.netcdf_tmp_file_path)
+
+        self.assertEqual(self.md5_netcdf_value, self.md5_expected_value)
+
+
+def args():
+    """
+    define the script arguments
+    :return: vargs
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-t",
+        "--testing",
+        action="store_true",
+        help="testing only - downloads the first month of each channel",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    vargs = args()
+    me = singleton.SingleInstance()
+    os.environ["data_wip_path"] = os.path.join(
+        os.environ.get("WIP_DIR"),
+        "ANMN",
+        "NRS_AIMS_Darwin_Yongala_data_rss_download_temporary",
+    )
+    global TMP_MANIFEST_DIR
+    global TESTING
+
+    set_up()
+
+    # initialise logging
+    logging_aims()
+    global logger
+    logger = logging.getLogger(__name__)
+
+    # data validation test
+    runner = data_validation_test.TextTestRunner()
+    itersuite = data_validation_test.TestLoader().loadTestsFromTestCase(
+        AimsDataValidationTest
+    )
+    res = runner.run(itersuite)
+
+    if not DATA_WIP_PATH:
+        logger.critical("environment variable data_wip_path is not defined.")
+        exit(1)
+
+    # script optional argument for testing only. used in process_monthly_channel
+    TESTING = vargs.testing
+
+    rm_tmp_dir(DATA_WIP_PATH)
+
+    if len(os.listdir(ANMN_NRS_INCOMING_DIR)) >= 2:
+        logger.critical("Operation aborted, too many files in INCOMING_DIR")
+        exit(1)
+
+    if len(os.listdir(ANMN_NRS_ERROR_DIR)) >= 2:
+        logger.critical("Operation aborted, too many files in ERROR_DIR")
+        exit(1)
+
+    if not res.failures:
+        for level in [0, 1]:
+            date_str_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+            TMP_MANIFEST_DIR = os.path.join(
+                DATA_WIP_PATH, "manifest_dir_tmp_{date}".format(date=date_str_now)
+            )
+            os.makedirs(TMP_MANIFEST_DIR)
+
+            process_qc_level(level)
+
+            lines_per_file = 2**12
+            file_list = list_recursively_files_abs_path(TMP_MANIFEST_DIR)
+            if len(file_list) > 0:
+                for file_number, lines in groupby(
+                    enumerate(file_list), key=lambda x: x[0] // lines_per_file
+                ):
+                    incoming_file = os.path.join(
+                        DATA_WIP_PATH,
+                        "anmn_nrs_aims_FV0{level}_{date}_{file_number}.manifest".format(
+                            level=str(level), date=date_str_now, file_number=file_number
+                        ),
+                    )
+                    with open(incoming_file, "w") as outfile:
+                        for item in lines:
+                            outfile.write("%s\n" % item[1])
+
+                    os.chmod(incoming_file, 0o0664)  # change to 664 for pipeline v2
+                    shutil.move(
+                        incoming_file,
+                        os.path.join(
+                            ANMN_NRS_INCOMING_DIR, os.path.basename(incoming_file)
+                        ),
+                    )
+
+    else:
+        logger.error("Data validation unittests failed")

From ee57da535c9c7c8c71e8716c72a6bcadebd0e8d6 Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Thu, 29 Jan 2026 14:45:55 +1100
Subject: [PATCH 3/8] Fix: AIMS NRS - various improvments

---
 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py | 315 +++++++++++-------------
 1 file changed, 142 insertions(+), 173 deletions(-)

diff --git a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
index ba90f2a0..971a20f0 100755
--- a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
+++ b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
@@ -33,7 +33,6 @@
 import re
 import shutil
 import sys
-import traceback
 import unittest as data_validation_test
 from itertools import groupby
 from pathlib import Path
@@ -85,7 +84,6 @@ def modify_anmn_nrs_netcdf(netcdf_file_path, channel_id_info):
     # First pass: Generic AIMS modifications
     modify_aims_netcdf(netcdf_file_path, channel_id_info)
 
-    # Site and Platform Mapping (The Dictionary approach)
     site_map = {
         "Yongala": ("NRSYON", "Yongala NRS Buoy"),
         "Darwin": ("NRSDAR", "Darwin NRS Buoy"),
@@ -159,7 +157,7 @@ def move_to_tmp_incoming(netcdf_path):
     and cleans up the now-empty source directory.
     """
     logger = logging.getLogger(__name__)
-    # Convert to Path object for easier manipulation
+
     source_file = Path(netcdf_path)
     source_dir = source_file.parent
 
@@ -192,198 +190,165 @@ def move_to_tmp_incoming(netcdf_path):
 
 
 def process_monthly_channel(channel_id, aims_xml_info, level_qc):
-    """Downloads all the data available for one channel_id and moves the file to a wip_path dir
-    channel_id(str)
-    aims_xml_info(tuple)
-    level_qc(int)
+    """
+    Downloads all the data available for one channel_id and moves the file to a wip_path dir
 
     aims_service : 1   -> FAIMMS data
                    100 -> SOOP TRV data
                    300 -> NRS DATA
     for monthly data download, only 1 and 300 should be use
     """
-    logger.info(
-        "QC{level_qc} - Processing channel {channel_id}".format(
-            channel_id=str(channel_id), level_qc=str(level_qc)
-        )
-    )
+    contact_aims_msg = "Process of channel aborted - CONTACT AIMS"
+    wip_path = Path(os.environ.get("data_wip_path", ""))
+
+    logger.info(f"QC{level_qc} - Processing channel {channel_id}")
+
     channel_id_info = aims_xml_info[channel_id]
     from_date = channel_id_info["from_date"]
     thru_date = channel_id_info["thru_date"]
-    [start_dates, end_dates] = create_list_of_dates_to_download(
+
+    # [start_dates, end_dates] generation
+    start_dates, end_dates = create_list_of_dates_to_download(
         channel_id, level_qc, from_date, thru_date
     )
 
-    if len(start_dates) != 0:
-        # download monthly file
-        for start_date, end_date in zip(start_dates, end_dates):
-            start_date = start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
-            end_date = end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
-            netcdf_tmp_file_path = download_channel(
-                channel_id, start_date, end_date, level_qc
+    if not start_dates:
+        logger.info(f"QC{level_qc} - Channel {channel_id}: already up to date")
+        return
+
+    # download monthly file
+    for start_dt, end_dt in zip(start_dates, end_dates):
+        start_date = start_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+        end_date = end_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+        netcdf_tmp_file_path = download_channel(
+            channel_id, start_date, end_date, level_qc
+        )
+
+        if netcdf_tmp_file_path is None:
+            logger.error(
+                f"   Channel {channel_id} - not valid zip file - {contact_aims_msg}"
+            )
+            break
+
+        tmp_dir = Path(netcdf_tmp_file_path).parent
+
+        # NO_DATA_FOUND file only means there is no data for the selected time period.
+        # Could be some data afterwards
+        if is_no_data_found(netcdf_tmp_file_path):
+            logger.info(
+                f"Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]"
+            )
+            shutil.rmtree(tmp_dir)
+            continue  # Move to next month
+
+        # Start of validation sequence
+        error_occurred = False
+
+        if is_time_var_empty(netcdf_tmp_file_path):
+            logger.error(
+                f"Channel {channel_id}: No values in TIME variable - {contact_aims_msg}"
+            )
+            error_occurred = True
+
+        elif not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info):
+            logger.error(
+                f"Channel {channel_id}: Could not modify the NetCDF file - Process of channel aborted"
             )
-            contact_aims_msg = "Process of channel aborted - CONTACT AIMS"
+            error_occurred = True
 
-            if netcdf_tmp_file_path is None:
+        else:
+            main_var = get_main_netcdf_var(netcdf_tmp_file_path)
+            if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
                 logger.error(
-                    "   Channel %s - not valid zip file - %s"
-                    % (str(channel_id), contact_aims_msg)
+                    f"Channel {channel_id}: _Fillvalues only in main variable - {contact_aims_msg}"
                 )
-                break
-
-            # NO_DATA_FOUND file only means there is no data for the selected time period. Could be some data afterwards
-            if is_no_data_found(netcdf_tmp_file_path):
-                logger.info(
-                    "Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]".format(
-                        channel_id=str(channel_id),
-                        start_date=start_date,
-                        end_date=end_date,
-                    )
+                error_occurred = True
+            elif not get_anmn_nrs_site_name(netcdf_tmp_file_path):
+                logger.error(
+                    f"Channel {channel_id}: Unknown site_code gatt value - {contact_aims_msg}"
                 )
-                shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-            else:
-                if is_time_var_empty(netcdf_tmp_file_path):
-                    logger.error(
-                        "Channel {channel_id}: No values in TIME variable - {message}".format(
-                            channel_id=str(channel_id), message=contact_aims_msg
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
-
-                if not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info):
-                    logger.error(
-                        "Channel{channel_id}: Could not modify the NetCDF file - Process of channel aborted".format(
-                            channel_id=str(channel_id)
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
-
-                main_var = get_main_netcdf_var(netcdf_tmp_file_path)
-                if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
-                    logger.error(
-                        "Channel {channel_id}: _Fillvalues only in main variable - {message}".format(
-                            channel_id=str(channel_id), message=contact_aims_msg
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
-
-                if get_anmn_nrs_site_name(netcdf_tmp_file_path) == []:
-                    logger.error(
-                        "Channel {channel_id}: Unknown site_code gatt value - {message}".format(
-                            channel_id=str(channel_id), message=contact_aims_msg
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
-
-                if not is_time_monotonic(netcdf_tmp_file_path):
-                    logger.error(
-                        "Channel {channel_id}: TIME value is not strictly monotonic \
-                                 - {message}".format(
-                            channel_id=str(channel_id), message=contact_aims_msg
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
-
-                # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
-                wip_path = os.environ.get("data_wip_path")
-                checker_retval = pass_netcdf_checker(
-                    netcdf_tmp_file_path, tests=["cf:1.6", "imos:1.3"]
+                error_occurred = True
+            elif not is_time_monotonic(netcdf_tmp_file_path):
+                logger.error(
+                    f"Channel {channel_id}: TIME value is not strictly monotonic - {contact_aims_msg}"
                 )
-                if not checker_retval:
-                    logger.error(
-                        "Channel {channel_id}: File does not pass CF/IMOS compliance checker - Process of channel aborted".format(
-                            channel_id=str(channel_id)
-                        )
-                    )
-                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, "errors"))
-
-                    logger.error(
-                        "File copied to {path} for debugging".format(
-                            path=os.path.join(
-                                wip_path,
-                                "errors",
-                                os.path.basename(netcdf_tmp_file_path),
-                            )
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
+                error_occurred = True
 
-                netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
-                netcdf_tmp_file_path = fix_provider_code_from_filename(
-                    netcdf_tmp_file_path, "IMOS_ANMN"
-                )
+        if error_occurred:
+            shutil.rmtree(tmp_dir)
+            break
 
-                if re.search("IMOS_ANMN_[A-Z]{1}_", netcdf_tmp_file_path) is None:
-                    logger.error(
-                        "   Channel %s - File name Data code does not pass REGEX - Process of channel aborted"
-                        % str(channel_id)
-                    )
-                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, "errors"))
-                    logger.error(
-                        "   File copied to %s for debugging"
-                        % (
-                            os.path.join(
-                                wip_path,
-                                "errors",
-                                os.path.basename(netcdf_tmp_file_path),
-                            )
-                        )
-                    )
-                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
-                    break
+        # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
+        if not pass_netcdf_checker(netcdf_tmp_file_path, tests=["cf:1.6", "imos:1.3"]):
+            logger.error(
+                f"Channel {channel_id}: File does not pass CF/IMOS compliance checker - Process of channel aborted"
+            )
 
-                move_to_tmp_incoming(netcdf_tmp_file_path)
+            err_dest = wip_path / "errors" / os.path.basename(netcdf_tmp_file_path)
+            shutil.copy(netcdf_tmp_file_path, err_dest)
 
-                if TESTING:
-                    # The 2 next lines download the first month only for every single channel. This is only used for testing
-                    save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
-                    break
+            logger.error(f"File copied to {err_dest} for debugging")
+            shutil.rmtree(tmp_dir)
+            break
 
-            save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
+        netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
+        netcdf_tmp_file_path = fix_provider_code_from_filename(
+            netcdf_tmp_file_path, "IMOS_ANMN"
+        )
 
-    else:
-        logger.info(
-            "QC{level_qc} - Channel {channel_id}: already up to date".format(
-                channel_id=str(channel_id), level_qc=str(level_qc)
+        if not re.search(r"IMOS_ANMN_[A-Z]{1}_", netcdf_tmp_file_path):
+            logger.error(
+                f"   Channel {channel_id} - File name Data code does not pass REGEX - Process of channel aborted"
             )
-        )
+
+            err_dest = wip_path / "errors" / os.path.basename(netcdf_tmp_file_path)
+            shutil.copy(netcdf_tmp_file_path, err_dest)
+
+            logger.error(f"   File copied to {err_dest} for debugging")
+            shutil.rmtree(tmp_dir)
+            break
+
+        move_to_tmp_incoming(netcdf_tmp_file_path)
+
+        # Update tracking
+        save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
+
+        if TESTING:
+            # The 2 next lines download the first month only for every single channel.
+            # This is only used for testing
+            # Note: save_channel_info already called above
+            break
 
 
 def process_qc_level(level_qc):
-    """Downloads all channels for a QC level
-    level_qc(int) : 0 or 1
     """
-
+    Downloads all channels for a specific QC level (0 or 1).
+    """
     logger.info(
-        "Process ANMN NRS download from AIMS web service - QC level {level_qc}".format(
-            level_qc=level_qc
-        )
+        f"Process ANMN NRS download from AIMS web service - QC level {level_qc}"
     )
-    xml_url = "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level{level_qc}/300".format(
-        level_qc=level_qc
+
+    xml_url = (
+        f"https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level{level_qc}/300"
     )
+
     try:
         aims_xml_info = parse_aims_xml(xml_url)
-    except Exception as err:
-        logger.critical("RSS feed not available")
+    except Exception:
+        # Use exc_info=True to automatically attach the stack trace to the log
+        logger.critical(f"RSS feed not available at {xml_url}", exc_info=True)
         exit(1)
 
-    for channel_id in aims_xml_info.keys():
+    # Iterate through channels
+    for channel_id in aims_xml_info:
         try:
             process_monthly_channel(channel_id, aims_xml_info, level_qc)
-        except Exception as err:
-            logger.error(
-                "QC{qc_level} - Channel {channel_id}: Failed, unknown reason - manual debug required".format(
-                    channel_id=str(channel_id), qc_level=str(level_qc)
-                )
+        except Exception:
+            # logger.exception automatically logs the error AND the traceback
+            logger.exception(
+                f"QC{level_qc} - Channel {channel_id}: Failed, unknown reason - manual debug required"
             )
-            logger.error(traceback.print_exc())
 
 
 class AimsDataValidationTest(data_validation_test.TestCase):
@@ -404,34 +369,38 @@ def setUp(self):
         logger.info("Data validation unittests...")
         aims_xml_info = parse_aims_xml(xml_url)
         channel_id_info = aims_xml_info[channel_id]
-        self.netcdf_tmp_file_path = download_channel(
-            channel_id, from_date, thru_date, level_qc
+        self.nc_path = Path(
+            download_channel(channel_id, from_date, thru_date, level_qc)
         )
-        modify_anmn_nrs_netcdf(self.netcdf_tmp_file_path, channel_id_info)
+        modify_anmn_nrs_netcdf(str(self.nc_path), channel_id_info)
 
         # force values of attributes which change all the time
-        netcdf_file_obj = Dataset(self.netcdf_tmp_file_path, "a", format="NETCDF4")
-        netcdf_file_obj.date_created = "1970-01-01T00:00:00Z"  # epoch
-        netcdf_file_obj.history = "data validation test only"
-        netcdf_file_obj.NCO = "NCO_VERSION"
-
-        netcdf_file_obj.close()
+        with Dataset(self.nc_path, "a") as nc:
+            nc.date_created = "1970-01-01T00:00:00Z"
+            nc.history = "data validation test only"
+            # Check if NCO attribute exists before forcing it
+            if hasattr(nc, "NCO"):
+                nc.NCO = "NCO_VERSION"
 
     def tearDown(self):
-        shutil.copy(
-            self.netcdf_tmp_file_path,
-            os.path.join(
-                os.environ["data_wip_path"], "nc_unittest_%s.nc" % self.md5_netcdf_value
-            ),
-        )
-        shutil.rmtree(os.path.dirname(self.netcdf_tmp_file_path))
+        wip_dir = Path(os.environ.get("data_wip_path", "."))
+
+        # Preserve the file for debugging before cleanup
+        # self.md5_netcdf_value needs to be calculated in the test method itself
+        if hasattr(self, "md5_netcdf_value"):
+            debug_name = f"nc_unittest_{self.md5_netcdf_value}.nc"
+            shutil.copy(self.nc_path, wip_dir / debug_name)
+
+        # Cleanup: Remove the parent directory of the temp file
+        if self.nc_path.parent.exists():
+            shutil.rmtree(self.nc_path.parent)
 
     def test_aims_validation(self):
         if sys.version_info[0] < 3:
             self.md5_expected_value = "76c9a595264a8173545b6dc0c518a280"
         else:
             self.md5_expected_value = MD5_EXPECTED_VALUE
-        self.md5_netcdf_value = md5(self.netcdf_tmp_file_path)
+        self.md5_netcdf_value = md5(str(self.nc_path))
 
         self.assertEqual(self.md5_netcdf_value, self.md5_expected_value)
 

From b905823cf1e793e229af1f7745a653897cacebe3 Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Mon, 2 Feb 2026 10:50:41 +1100
Subject: [PATCH 4/8] Fix: aims - DOXY var had wrong CF units

---
 lib/python/aims_realtime_util.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/python/aims_realtime_util.py b/lib/python/aims_realtime_util.py
index 21446e7c..f07bae14 100755
--- a/lib/python/aims_realtime_util.py
+++ b/lib/python/aims_realtime_util.py
@@ -728,6 +728,10 @@ def modify_aims_netcdf(netcdf_file_path, channel_id_info):
         var = netcdf_file_obj.variables["ALBD"]
         var.units = "1"
 
+    if "DOXY" in netcdf_file_obj.variables.keys():
+        var = netcdf_file_obj.variables["DOXY"]
+        var.units = "kg m-3"  # unit was milliliter/Liter which was not CF but equivalent anyway; Example channel 84900
+
     def clean_no_cf_variables(var, netcdf_file_obj):
         """
         remove standard name of main variable and of its ancillary qc var if exists

From 034dcabceabfaed8854115c0cc12f553f9ac8788 Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Mon, 2 Feb 2026 13:49:16 +1100
Subject: [PATCH 5/8] Fix: aims_realtime_utils modernisation (2)

---
 lib/python/aims_realtime_util.py | 394 +++++++++++++++++--------------
 1 file changed, 221 insertions(+), 173 deletions(-)

diff --git a/lib/python/aims_realtime_util.py b/lib/python/aims_realtime_util.py
index f07bae14..abed4d12 100755
--- a/lib/python/aims_realtime_util.py
+++ b/lib/python/aims_realtime_util.py
@@ -11,8 +11,8 @@
 author Laurent Besnard, laurent.besnard@utas.edu.au
 """
 
-import datetime
 import glob
+import hashlib
 import json
 import logging
 import os
@@ -25,11 +25,15 @@
 import time
 import xml.etree.ElementTree as ET
 import zipfile
+from datetime import datetime, timedelta
+from pathlib import Path
 from time import gmtime, strftime
 
 import dotenv
 import numpy
 import requests
+from dateutil import rrule
+from dateutil.relativedelta import relativedelta
 from six.moves.urllib.request import urlopen
 from six.moves.urllib_error import URLError
 
@@ -37,11 +41,10 @@
     from functools import lru_cache
 except ImportError:
     from functools32 import lru_cache
-from netCDF4 import Dataset, date2num, num2date
-
-from retrying import retry
 from logging.handlers import TimedRotatingFileHandler
 
+from netCDF4 import Dataset, date2num, num2date
+from retrying import retry
 
 #####################
 # Logging Functions #
@@ -49,45 +52,54 @@
 
 
 def logging_aims():
-    """start logging using logging python library
-    output:
-       logger - similar to a file handler
     """
-    wip_path = os.environ.get("data_wip_path")
-    # this is used for unit testing as data_wip_path env would not be set
-    if wip_path is None:
-        wip_path = tempfile.mkdtemp()
+    Starts logging using the standard library.
+    Returns a configured logger instance.
+    """
+    # Get wip_path from env; fallback to a temp directory for testing
+    wip_path_env = os.environ.get("data_wip_path")
+    wip_path = Path(wip_path_env) if wip_path_env else Path(tempfile.mkdtemp())
+
+    log_path = wip_path / "aims.log"
 
-    logging_format = (
+    # Centralized Formatting
+    log_format = (
         "%(asctime)s — %(name)s — %(levelname)s — %(funcName)s:%(lineno)d — %(message)s"
     )
+    formatter = logging.Formatter(log_format)
 
-    # set up logging to file
-    tmp_filename = tempfile.mkstemp(".log", "aims_data_download_")[1]
-    log_path = os.path.join(wip_path, "aims.log")
-    logging.basicConfig(
-        level=logging.INFO, format=logging_format, filename=tmp_filename, filemode="a+"
-    )
+    # Initialize Root Logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.DEBUG)  # Capture everything at the root level
 
-    # rotate logs every Day, and keep only the last 5 log files
-    logHandler = TimedRotatingFileHandler(
-        log_path,
-        when="D",
-        interval=1,
-        backupCount=5,  # backupCount files will be kept
-    )
-    logHandler.setFormatter(logging.Formatter(logging_format))
-    logHandler.setLevel(logging.DEBUG)
-    logging.getLogger("").addHandler(logHandler)
+    # Clear existing handlers to prevent duplicate logs if function is called twice
+    if root_logger.hasHandlers():
+        root_logger.handlers.clear()
 
-    # define a Handler which writes DEBUG messages to the sys.stderr
-    logFormatter = logging.Formatter(logging_format)
-    consoleHandler = logging.StreamHandler()
-    consoleHandler.setLevel(logging.INFO)
-    consoleHandler.setFormatter(logFormatter)
+    # File Handler (Timed Rotation)
+    # Logic: Daily rotation, keep 5 backups
+    file_handler = TimedRotatingFileHandler(
+        filename=log_path, when="D", interval=1, backupCount=5, encoding="utf-8"
+    )
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(formatter)
+    root_logger.addHandler(file_handler)
+
+    # Console Handler
+    # Logic: High-level INFO messages to stderr
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(formatter)
+    root_logger.addHandler(console_handler)
+
+    # Debug logs to verify initialization
+    root_logger.debug("Logging initialized successfully.")
+    root_logger.debug(f"Log file location: {log_path}")
+    root_logger.debug(
+        f"Environment 'data_wip_path' was: {'Set' if wip_path_env else 'Not Set (using temp)'}"
+    )
 
-    # add the console handler to the root logger
-    logging.getLogger("").addHandler(consoleHandler)
+    return root_logger
 
 
 ####################
@@ -178,98 +190,145 @@ def save_channel_info(
        level_qc(int)         : 0 or 1
        last_downloaded_date_channel is a variable argument, not used by soop trv
     """
-    pickle_file = _pickle_filename(level_qc)
-    last_downloaded_date = dict()
-    # condition in case the pickle file already exists or not. In the first case,
-    # aims_xml_info comes from the pickle, file, otherwise comes from the function arg
-    if os.path.isfile(pickle_file):
-        with open(pickle_file, "rb") as p_read:
-            aims_xml_info_file = pickle.load(p_read)
-            last_downloaded_date = aims_xml_info_file
-
-        if not last_downloaded_date_channel:
-            # soop trv specific, vararg
-            last_downloaded_date[channel_id] = aims_xml_info[channel_id]["thru_date"]
-        else:
-            last_downloaded_date[channel_id] = last_downloaded_date_channel[0]
+    logger = logging.getLogger(__name__)
+    pickle_file = Path(_pickle_filename(level_qc))
+    last_downloaded_data = {}
 
+    # Load existing data if file exists
+    if pickle_file.exists():
+        try:
+            with pickle_file.open("rb") as p_read:
+                last_downloaded_data = pickle.load(p_read)
+            logger.debug(f"Loaded existing metadata from {pickle_file}")
+        except (EOFError, pickle.UnpicklingError):
+            logger.warning(f"Pickle file {pickle_file} was corrupt. Starting fresh.")
+
+    # Determine the date (DRY - Don't Repeat Yourself)
+    if last_downloaded_date_channel:
+        new_date = last_downloaded_date_channel[0]
+        logger.debug(f"Using provided vararg date for {channel_id}: {new_date}")
     else:
-        if not last_downloaded_date_channel:
-            # soop trv specific, vararg
-            last_downloaded_date[channel_id] = aims_xml_info[channel_id]["thru_date"]
-        else:
-            last_downloaded_date[channel_id] = last_downloaded_date_channel[0]
+        new_date = aims_xml_info[channel_id]["thru_date"]
+        logger.debug(f"Extracted date from XML info for {channel_id}: {new_date}")
 
-    with open(pickle_file, "wb") as p_write:
-        pickle.dump(last_downloaded_date, p_write)
+    # Update and Save
+    last_downloaded_data[channel_id] = new_date
+
+    with pickle_file.open("wb") as p_write:
+        pickle.dump(last_downloaded_data, p_write)
+
+    logger.info(f"Successfully saved channel info for {channel_id} to {pickle_file}")
 
 
 def get_last_downloaded_date_channel(channel_id, level_qc, from_date):
-    """Retrieve the last date sucessfully downloaded for a channel"""
-    pickle_file = _pickle_filename(level_qc)  # different pickle per QC
-    if os.path.isfile(pickle_file):
-        with open(pickle_file, "rb") as p_read:
-            last_downloaded_date = pickle.load(p_read)
+    """
+    Retrieve the last date successfully downloaded for a channel.
+    Falls back to from_date if no record is found or the file is missing/corrupt.
+    """
+
+    logger = logging.getLogger(__name__)
+    pickle_path = Path(_pickle_filename(level_qc))
 
-        if (
-            channel_id in last_downloaded_date.keys()
-        ):  # check the channel is in the pickle file
-            if last_downloaded_date[channel_id] is not None:
-                return last_downloaded_date[channel_id]
+    if not pickle_path.is_file():
+        return from_date
 
-    return from_date
+    try:
+        with pickle_path.open("rb") as p_read:
+            last_downloaded_map = pickle.load(p_read)
+
+        recorded_date = last_downloaded_map.get(channel_id)
+        return recorded_date if recorded_date is not None else from_date
+
+    except (EOFError, pickle.UnpicklingError, Exception) as e:
+        # If the pickle is corrupt, we don't want to kill the pipeline.
+        # Log it and fall back to the provided from_date.
+        logger.warning(
+            f"Failed to read tracking file {pickle_path}: {e}. Falling back to {from_date}"
+        )
+        return from_date
 
 
 def has_channel_already_been_downloaded(channel_id, level_qc):
-    pickle_file = _pickle_filename(level_qc)  # different pickle per QC
-    if os.path.isfile(pickle_file):
-        with open(pickle_file, "rb") as p_read:
-            last_downloaded_date = pickle.load(p_read)
+    """
+    Checks if a channel exists in the tracking pickle and has a valid date.
+    """
 
-        if (
-            channel_id in last_downloaded_date.keys()
-        ):  # check the channel is in the pickle file
-            if (
-                last_downloaded_date[channel_id] is not None
-            ):  # check the last downloaded_date field
-                return True
-            else:
-                return False
-        else:
-            return False
+    logger = logging.getLogger(__name__)
+    pickle_path = Path(_pickle_filename(level_qc))
+    #
+    # Early exit if file doesn't exist
+    if not pickle_path.is_file():
+        logger.debug(f"No tracking file found at {pickle_path}")
+        return False
 
-    else:
+    try:
+        with pickle_path.open("rb") as p_read:
+            last_downloaded_date = pickle.load(p_read)
+    except (EOFError, pickle.UnpicklingError):
+        logger.error(f"Failed to read pickle file: {pickle_path}")
         return False
 
+    # Dictionary .get() returns None if key is missing
+    download_date = last_downloaded_date.get(channel_id)
+    exists = download_date is not None
 
-def create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date):
-    """generate a list of monthly start dates and end dates to download FAIMMS and NRS data"""
+    logger.debug(
+        f"Channel {channel_id} download status: {exists} (Date: {download_date})"
+    )
 
-    from dateutil import rrule
-    from datetime import datetime
-    from dateutil.relativedelta import relativedelta
+    return exists
+
+
+def create_list_of_dates_to_download(
+    channel_id, level_qc, from_date_str, thru_date_str
+):
+    """
+    Generates lists of monthly start and end dates for data downloads.
+    Logic: Starts from the 1st of the month of the last download.
+    """
+
+    logger = logging.getLogger(__name__)
+    # date format
+    iso_format = "%Y-%m-%dT%H:%M:%SZ"
+
+    # Retrieve last download date
+    last_dl_str = get_last_downloaded_date_channel(channel_id, level_qc, from_date_str)
+
+    # Convert strings to datetime objects
+    thru_date = datetime.strptime(thru_date_str, iso_format)
+    last_dl_date = datetime.strptime(last_dl_str, iso_format)
 
-    last_downloaded_date = get_last_downloaded_date_channel(
-        channel_id, level_qc, from_date
-    )
     start_dates = []
     end_dates = []
 
-    from_date = datetime.strptime(from_date, "%Y-%m-%dT%H:%M:%SZ")
-    thru_date = datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ")
-    last_downloaded_date = datetime.strptime(last_downloaded_date, "%Y-%m-%dT%H:%M:%SZ")
+    # Only process if there is new data to get
+    if last_dl_date >= thru_date:
+        logger.info(
+            f"Channel {channel_id}: No new dates to download. "
+            f"Last download ({last_dl_date}) is >= thru_date ({thru_date})"
+        )
+        return start_dates, end_dates
+
+    # Generate Monthly Ranges
+    # We start at the beginning (1st) of the month of the last download
+    month_start = datetime(last_dl_date.year, last_dl_date.month, 1)
+
+    logger.debug(
+        f"Generating monthly ranges for {channel_id} starting from {month_start}"
+    )
 
-    if last_downloaded_date < thru_date:
-        for dt in rrule.rrule(
-            rrule.MONTHLY,
-            dtstart=datetime(last_downloaded_date.year, last_downloaded_date.month, 1),
-            until=thru_date,
-        ):
-            start_dates.append(dt)
-            end_dates.append(datetime(dt.year, dt.month, 1) + relativedelta(months=1))
+    for dt in rrule.rrule(rrule.MONTHLY, dtstart=month_start, until=thru_date):
+        start_dates.append(dt)
+        # End date is exactly one month after the start of the current iteration
+        end_dates.append(dt + relativedelta(months=1))
 
+    # Ensure the very last end date doesn't overshoot the requested thru_date
+    if end_dates:
+        original_end = end_dates[-1]
         end_dates[-1] = thru_date
+        logger.debug(f"Snapped final end date from {original_end} to {thru_date}")
 
+    logger.info(f"Generated {len(start_dates)} monthly intervals for {channel_id}")
     return start_dates, end_dates
 
 
@@ -287,14 +346,15 @@ def list_recursively_files_abs_path(path):
 
 
 def md5(fname):
-    """return a md5 checksum of a file"""
-    import hashlib
-
-    hash = hashlib.md5()
+    """Return an md5 checksum of a file."""
     with open(fname, "rb") as f:
+        if hasattr(hashlib, "file_digest"):
+            return hashlib.file_digest(f, "md5").hexdigest()
+
+        hash_obj = hashlib.md5()
         for chunk in iter(lambda: f.read(4096), b""):
-            hash.update(chunk)
-    return hash.hexdigest()
+            hash_obj.update(chunk)
+        return hash_obj.hexdigest()
 
 
 def get_main_netcdf_var(netcdf_file_path):
@@ -889,64 +949,36 @@ def fix_data_code_from_filename(netcdf_file_path):
     It physically renames the filename if needed
     """
 
-    netcdf_file_obj = Dataset(netcdf_file_path, "r", format="NETCDF4")
-    if "CDIR" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_CDIR_", "_V_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
-
-    if "CSPD" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_CSPD_", "_V_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
-
-    if "DOX1" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_Dissolved_O2_\(mole\)_", "_K_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+    logger = logging.getLogger(__name__)
 
-    if "DEPTH" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_DEPTH_", "_Z_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+    # Mapping of {Variable_Internal_Name: (Regex_Pattern, Replacement_Code)}
+    FILENAME_MAPPING = {
+        "CDIR": ("_CDIR_", "_V_"),
+        "CSPD": ("_CSPD_", "_V_"),
+        "DOX1": (r"_Dissolved_O2_\(mole\)_", "_K_"),
+        "DEPTH": ("_DEPTH_", "_Z_"),
+        "Dissolved_Oxygen_Percent": ("_DO_%_", "_O_"),
+        "ErrorVelocity": ("_ErrorVelocity_", "_V_"),
+        "Average_Compass_Heading": ("_Average_Compass_Heading_", "_E_"),
+        "Upwelling_longwave_radiation": ("_Upwelling_longwave_radiation_", "_F_"),
+        "Downwelling_longwave_radiation": ("_Downwelling_longwave_radiation_", "_F_"),
+    }
 
-    if "Dissolved_Oxygen_Percent" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_DO_%_", "_O_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+    with Dataset(netcdf_file_path, "r", format="NETCDF4") as nc:
+        found_var = next((var for var in FILENAME_MAPPING if var in nc.variables), None)
 
-    if "ErrorVelocity" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_ErrorVelocity_", "_V_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+    if found_var:
+        pattern, replacement = FILENAME_MAPPING[found_var]
+        new_filename = re.sub(pattern, replacement, str(netcdf_file_path))
 
-    if "Average_Compass_Heading" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_Average_Compass_Heading_", "_E_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+        logger.debug(f"Renaming file based on variable '{found_var}': {new_filename}")
 
-    if "Upwelling_longwave_radiation" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub("_Upwelling_longwave_radiation_", "_F_", netcdf_file_path)
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+        old_path = Path(netcdf_file_path)
+        new_path = old_path.with_name(Path(new_filename).name)
 
-    if "Downwelling_longwave_radiation" in netcdf_file_obj.variables.keys():
-        new_filename = re.sub(
-            "_Downwelling_longwave_radiation_", "_F_", netcdf_file_path
-        )
-        netcdf_file_obj.close()
-        shutil.move(netcdf_file_path, new_filename)
-        return new_filename
+        shutil.move(str(old_path), str(new_path))
+        return str(new_path)
 
-    netcdf_file_obj.close()
     return netcdf_file_path
 
 
@@ -989,20 +1021,36 @@ def remove_end_date_from_filename(netcdf_filename):
 
 
 def rm_tmp_dir(data_wip_path):
-    """remove temporary directories older than 15 days from data_wip path"""
-    for dir_path in os.listdir(data_wip_path):
-        if dir_path.startswith("manifest_dir_tmp_"):
-            file_date = datetime.datetime.strptime(
-                dir_path.split("_")[-1], "%Y%m%d%H%M%S"
-            )
-            if (datetime.datetime.now() - file_date).days > 15:
-                logger = logging.getLogger(__name__)
-                logger.info(
-                    "DELETE old temporary folder {path}".format(
-                        path=os.path.join(data_wip_path, dir_path)
-                    )
-                )
-                shutil.rmtree(os.path.join(data_wip_path, dir_path))
+    """
+    Remove temporary directories older than 15 days from data_wip path.
+    Expected folder format: manifest_dir_tmp_YYYYMMDDHHMMSS
+    """
+
+    logger = logging.getLogger(__name__)
+    base_path = Path(data_wip_path)
+    if not base_path.is_dir():
+        logger.warning(f"Cleanup skipped: {data_wip_path} is not a valid directory.")
+        return
+
+    # Set threshold to 15 days ago
+    expiry_limit = datetime.now() - timedelta(days=15)
+
+    for folder in base_path.glob("manifest_dir_tmp_*"):
+        try:
+            # Extract date string from the end of the folder name
+            date_str = folder.name.split("_")[-1]
+            folder_date = datetime.strptime(date_str, "%Y%m%d%H%M%S")
+
+            if folder_date < expiry_limit:
+                logger.info(f"Deleting old temporary folder: {folder}")
+                shutil.rmtree(folder)
+
+        except ValueError:
+            # This handles cases where the folder name matches the prefix
+            # but the suffix isn't a valid date
+            logger.debug(f"Skipping folder with invalid date format: {folder.name}")
+        except Exception as e:
+            logger.error(f"Failed to delete {folder}: {e}")
 
 
 def set_up():

From 1668f164228bde53bc616bec99949a7f2d58352e Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Mon, 2 Feb 2026 14:29:22 +1100
Subject: [PATCH 6/8] Fix: aims_realtime_utils modernisation (3)

---
 lib/python/aims_realtime_util.py | 38 ++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/lib/python/aims_realtime_util.py b/lib/python/aims_realtime_util.py
index abed4d12..51b7ba75 100755
--- a/lib/python/aims_realtime_util.py
+++ b/lib/python/aims_realtime_util.py
@@ -358,23 +358,27 @@ def md5(fname):
 
 
 def get_main_netcdf_var(netcdf_file_path):
-    with Dataset(netcdf_file_path, mode="r") as netcdf_file_obj:
-        variables = netcdf_file_obj.variables
-
-        variables.pop("TIME")
-        variables.pop("LATITUDE")
-        variables.pop("LONGITUDE")
-
-        if "NOMINAL_DEPTH" in variables:
-            variables.pop("NOMINAL_DEPTH")
-
-        qc_var = [s for s in variables if "_quality_control" in s]
-        if qc_var != []:
-            variables.pop(qc_var[0])
-
-        return [item for item in variables.keys()][0]
-
-    return variables[0]
+    """
+    Identifies the primary data variable in a NetCDF file by excluding
+    known coordinate and QC variables.
+    """
+    with Dataset(netcdf_file_path, mode="r") as nc:
+        # Define the set of variables to ignore
+        excluded_vars = {"TIME", "LATITUDE", "LONGITUDE", "NOMINAL_DEPTH"}
+
+        # Get all variable names as a list to avoid modifying the 'variables' object
+        var_names = list(nc.variables.keys())
+
+        # 1. Filter out the static coordinate names
+        # 2. Filter out any variable containing '_quality_control'
+        remaining_vars = [
+            v
+            for v in var_names
+            if v not in excluded_vars and "_quality_control" not in v
+        ]
+
+        # Return the first remaining variable if one exists, else None
+        return remaining_vars[0] if remaining_vars else None
 
 
 def is_above_file_limit(json_watchd_name):

From e1f694efde03606605359eacdc29b83993e8dd22 Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Mon, 2 Feb 2026 17:26:18 +1100
Subject: [PATCH 7/8] Fix: aims_realtime_utils modernisation (4)

---
 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py |  20 +-
 lib/python/aims_realtime_util.py        | 253 +++++++++++++-----------
 2 files changed, 157 insertions(+), 116 deletions(-)

diff --git a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
index 971a20f0..ccaa7d98 100755
--- a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
+++ b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py
@@ -64,7 +64,8 @@
 from tendo import singleton
 from util import pass_netcdf_checker
 
-MD5_EXPECTED_VALUE = "a6207e053f1cc0e00d171701f0cdb186"
+MD5_EXPECTED_VALUE = "ba3bcf5d61134a338ee62c8f98033d00"
+# MD5_EXPECTED_VALUE = "a6207e053f1cc0e00d171701f0cdb186"
 
 DATA_WIP_PATH = os.path.join(
     os.environ.get("WIP_DIR"),
@@ -201,7 +202,16 @@ def process_monthly_channel(channel_id, aims_xml_info, level_qc):
     contact_aims_msg = "Process of channel aborted - CONTACT AIMS"
     wip_path = Path(os.environ.get("data_wip_path", ""))
 
-    logger.info(f"QC{level_qc} - Processing channel {channel_id}")
+    HL = "\x1b[1;35m"  # Bold Magenta
+    RS = "\x1b[0m"
+    GREEN = "\033[92m"
+    ORANGE = "\033[38;5;208m"
+    RESET = "\033[0m"
+    YELLOW = "\033[33m"
+
+    logger.info(
+        f"QC{level_qc} - {YELLOW}Processing channel{YELLOW} {HL}{channel_id}{RS}"
+    )
 
     channel_id_info = aims_xml_info[channel_id]
     from_date = channel_id_info["from_date"]
@@ -213,7 +223,9 @@ def process_monthly_channel(channel_id, aims_xml_info, level_qc):
     )
 
     if not start_dates:
-        logger.info(f"QC{level_qc} - Channel {channel_id}: already up to date")
+        logger.info(
+            f"{GREEN}QC{level_qc} - Channel {channel_id}: already up to date{RESET}"
+        )
         return
 
     # download monthly file
@@ -237,7 +249,7 @@ def process_monthly_channel(channel_id, aims_xml_info, level_qc):
         # Could be some data afterwards
         if is_no_data_found(netcdf_tmp_file_path):
             logger.info(
-                f"Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]"
+                f"{ORANGE}Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]{RESET}"
             )
             shutil.rmtree(tmp_dir)
             continue  # Move to next month
diff --git a/lib/python/aims_realtime_util.py b/lib/python/aims_realtime_util.py
index 51b7ba75..e22d358f 100755
--- a/lib/python/aims_realtime_util.py
+++ b/lib/python/aims_realtime_util.py
@@ -19,13 +19,13 @@
 import pickle
 import re
 import shutil
-import subprocess
 import sys
 import tempfile
 import time
 import xml.etree.ElementTree as ET
 import zipfile
 from datetime import datetime, timedelta
+from logging.handlers import TimedRotatingFileHandler
 from pathlib import Path
 from time import gmtime, strftime
 
@@ -51,53 +51,71 @@
 #####################
 
 
+class AimsColorFormatter(logging.Formatter):
+    """Custom formatter to add colors to console output only."""
+
+    # ANSI Codes
+    GREY = "\x1b[38;20m"
+    CYAN = "\x1b[36;20m"
+    YELLOW = "\x1b[33;20m"
+    RED = "\x1b[31;20m"
+    BOLD_RED = "\x1b[31;1m"
+    RESET = "\x1b[0m"
+
+    log_format = (
+        "%(asctime)s — %(name)s — %(levelname)s — %(funcName)s:%(lineno)d — %(message)s"
+    )
+
+    LEVEL_COLORS = {
+        logging.DEBUG: GREY,
+        logging.INFO: CYAN,
+        logging.WARNING: YELLOW,
+        logging.ERROR: RED,
+        logging.CRITICAL: BOLD_RED,
+    }
+
+    def format(self, record):
+        color = self.LEVEL_COLORS.get(record.levelno, self.RESET)
+        formatter = logging.Formatter(f"{color}{self.log_format}{self.RESET}")
+        return formatter.format(record)
+
+
 def logging_aims():
-    """
-    Starts logging using the standard library.
-    Returns a configured logger instance.
-    """
-    # Get wip_path from env; fallback to a temp directory for testing
+    """Starts logging with colored console and plain-text file output."""
+
     wip_path_env = os.environ.get("data_wip_path")
     wip_path = Path(wip_path_env) if wip_path_env else Path(tempfile.mkdtemp())
-
     log_path = wip_path / "aims.log"
 
-    # Centralized Formatting
-    log_format = (
+    # Standard plain formatter for the file
+    file_format = (
         "%(asctime)s — %(name)s — %(levelname)s — %(funcName)s:%(lineno)d — %(message)s"
     )
-    formatter = logging.Formatter(log_format)
+    file_formatter = logging.Formatter(file_format)
 
-    # Initialize Root Logger
     root_logger = logging.getLogger()
-    root_logger.setLevel(logging.DEBUG)  # Capture everything at the root level
+    root_logger.setLevel(logging.DEBUG)
 
-    # Clear existing handlers to prevent duplicate logs if function is called twice
     if root_logger.hasHandlers():
         root_logger.handlers.clear()
 
-    # File Handler (Timed Rotation)
-    # Logic: Daily rotation, keep 5 backups
+    # 1. File Handler (Plain text)
     file_handler = TimedRotatingFileHandler(
         filename=log_path, when="D", interval=1, backupCount=5, encoding="utf-8"
     )
     file_handler.setLevel(logging.DEBUG)
-    file_handler.setFormatter(formatter)
+    file_handler.setFormatter(file_formatter)
     root_logger.addHandler(file_handler)
 
-    # Console Handler
-    # Logic: High-level INFO messages to stderr
+    # 2. Console Handler (Colored)
     console_handler = logging.StreamHandler()
     console_handler.setLevel(logging.INFO)
-    console_handler.setFormatter(formatter)
+
+    console_handler.setFormatter(AimsColorFormatter())
     root_logger.addHandler(console_handler)
 
-    # Debug logs to verify initialization
     root_logger.debug("Logging initialized successfully.")
-    root_logger.debug(f"Log file location: {log_path}")
-    root_logger.debug(
-        f"Environment 'data_wip_path' was: {'Set' if wip_path_env else 'Not Set (using temp)'}"
-    )
+    root_logger.info(f"Log file location: {log_path}")
 
     return root_logger
 
@@ -412,92 +430,47 @@ def is_above_file_limit(json_watchd_name):
 
 @lru_cache(maxsize=100)
 def parse_aims_xml(xml_url):
-    """Download and parse the AIMS XML rss feed"""
+    """Download and parse the AIMS XML rss feed using a single-pass loop."""
     logger = logging.getLogger(__name__)
-    logger.info("PARSE AIMS xml RSS feed : %s" % (xml_url))
-    response = urlopen(xml_url)
-    html = response.read()
-    root = ET.fromstring(html)
-
-    n_item_start = 3  # start number for AIMS xml file
-
-    title = []
-    link = []
-    metadata_uuid = []
-    uom = []
-    from_date = []
-    thru_date = []
-    platform_name = []
-    site_name = []
-    channel_id = []
-    parameter = []
-    parameter_type = []
-    trip_id = []  # soop trv only
-
-    for n_item in range(n_item_start, len(root[0])):
-        title.append(root[0][n_item][0].text)
-        link.append(root[0][n_item][1].text)
-        metadata_uuid.append(root[0][n_item][6].text)
-        uom.append(root[0][n_item][7].text)
-        from_date.append(root[0][n_item][8].text)
-        thru_date.append(root[0][n_item][9].text)
-        platform_name.append(root[0][n_item][10].text)
-        site_name.append(root[0][n_item][11].text)
-        channel_id.append(root[0][n_item][12].text)
-        parameter.append(root[0][n_item][13].text)
-        parameter_type.append(root[0][n_item][14].text)
-
-        # in case there is no trip id defined by AIMS, we create a fake one, used by SOOP TRV only
+    logger.info(f"PARSE AIMS xml RSS feed : {xml_url}")
+
+    with urlopen(xml_url) as response:
+        root = ET.fromstring(response.read())
+
+    new_dict = {}
+    items = root[0]
+    n_item_start = 3
+
+    for i in range(n_item_start, len(items)):
+        node = items[i]
+
+        # Extract channel_id first as it's our primary key
+        c_id = node[12].text
+
+        # Handle the trip_id logic for SOOP TRV only
         try:
-            trip_id.append(root[0][n_item][15].text)
+            t_id = node[15].text
         except IndexError:
-            dateObject = time.strptime(root[0][n_item][8].text, "%Y-%m-%dT%H:%M:%SZ")
-            trip_id_fake = (
-                str(dateObject.tm_year)
-                + str(dateObject.tm_mon).zfill(2)
-                + str(dateObject.tm_mday).zfill(2)
-            )
-            trip_id.append(trip_id_fake)
-
-    response.close()
-    d = [
-        {
-            c: {
-                "title": ttl,
-                "channel_id": c,
-                "link": lk,
-                "metadata_uuid": muuid,
-                "uom": uo,
-                "from_date": fro,
-                "thru_date": thr,
-                "platform_name": pltname,
-                "site_name": stname,
-                "parameter": para,
-                "parameter_type": paratype,
-                "trip_id": trid,
-            }
+            # Create fake trip_id from from_date (node[8])
+            date_str = node[8].text
+            date_obj = time.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ")
+            t_id = time.strftime("%Y%m%d", date_obj)
+
+        # Build the entry directly into the final dictionary
+        new_dict[c_id] = {
+            "title": node[0].text,
+            "channel_id": c_id,
+            "link": node[1].text,
+            "metadata_uuid": node[6].text,
+            "uom": node[7].text,
+            "from_date": node[8].text,
+            "thru_date": node[9].text,
+            "platform_name": node[10].text,
+            "site_name": node[11].text,
+            "parameter": node[13].text,
+            "parameter_type": node[14].text,
+            "trip_id": t_id,
         }
-        for c, ttl, lk, muuid, uo, fro, thr, pltname, stname, para, paratype, trid in zip(
-            channel_id,
-            title,
-            link,
-            metadata_uuid,
-            uom,
-            from_date,
-            thru_date,
-            platform_name,
-            site_name,
-            parameter,
-            parameter_type,
-            trip_id,
-        )
-    ]
-
-    # re-writting the dict to have the channel key as a key value
-    new_dict = {}
-    for item in d:
-        for name in item.keys():
-            new_dict[name] = item[name]
 
     return new_dict
 
@@ -1003,17 +976,73 @@ def has_var_only_fill_value(netcdf_file_path, var):
         return False
 
 
+#
+# def remove_dimension_from_netcdf(netcdf_file_path):
+#     """DIRTY, calling bash. need to write in Python, or part of the NetCDF4 module
+#     need to remove the 'single' dimension name from DEPTH or other dim. Unfortunately can't seem to find a way to do it easily with netCDF4 module
+#     """
+#     fd, tmp_file = tempfile.mkstemp()
+#     os.close(fd)
+#     import subprocess
+#
+#     subprocess.check_call(["ncwa", "-O", "-a", "single", netcdf_file_path, tmp_file])
+#     subprocess.check_call(
+#         ["ncatted", "-O", "-a", "cell_methods,,d,,", tmp_file, tmp_file]
+#     )
+#     shutil.move(tmp_file, netcdf_file_path)
+#
+#
 def remove_dimension_from_netcdf(netcdf_file_path):
-    """DIRTY, calling bash. need to write in Python, or part of the NetCDF4 module
-    need to remove the 'single' dimension name from DEPTH or other dim. Unfortunately can't seem to find a way to do it easily with netCDF4 module
+    """
+    Python replacement for NCO ncwa/ncatted.
+    Fixes the _FillValue AttributeError by passing it during variable creation.
     """
     fd, tmp_file = tempfile.mkstemp()
     os.close(fd)
 
-    subprocess.check_call(["ncwa", "-O", "-a", "single", netcdf_file_path, tmp_file])
-    subprocess.check_call(
-        ["ncatted", "-O", "-a", "cell_methods,,d,,", tmp_file, tmp_file]
-    )
+    with Dataset(netcdf_file_path, "r") as src, Dataset(tmp_file, "w") as dst:
+        # 1. Copy global attributes
+        dst.setncatts(src.__dict__)
+
+        hist_msg = "NetCDF file modified by remove_dimension_from_netcdf function"
+        if hasattr(dst, "history"):
+            # Append to existing history with a newline for readability
+            dst.history = f"{hist_msg}\n{dst.history}"
+        else:
+            # Create it if it doesn't exist
+            dst.history = hist_msg
+
+        # 2. Copy dimensions EXCEPT 'single'
+        for name, dimension in src.dimensions.items():
+            if name != "single":
+                dst.createDimension(
+                    name, (len(dimension) if not dimension.isunlimited() else None)
+                )
+
+        # 3. Copy variables
+        for name, variable in src.variables.items():
+            new_dims = tuple(d for d in variable.dimensions if d != "single")
+
+            # --- THE FIX ---
+            # Check if source has a fill value.
+            # We use getattr because _FillValue is a reserved attribute name.
+            fill_val = getattr(variable, "_FillValue", None)
+
+            # Create the variable with the fill_value already set
+            dst_var = dst.createVariable(
+                name, variable.datatype, new_dims, fill_value=fill_val
+            )
+            # ----------------
+
+            # 4. Copy remaining Attributes (Replaces ncatted logic)
+            # We skip 'cell_methods' AND '_FillValue' (since we just set it)
+            for attr_name in variable.ncattrs():
+                if attr_name not in ["cell_methods", "_FillValue"]:
+                    dst_var.setncattr(attr_name, variable.getncattr(attr_name))
+
+            # 5. Copy Data
+            dst_var[:] = variable[:]
+
     shutil.move(tmp_file, netcdf_file_path)
 
 

From 64991e688b59ba4b34c99c1e26260e1e051f8a11 Mon Sep 17 00:00:00 2001
From: lbesnard <laurent.besnard@utas.edu.au>
Date: Mon, 2 Feb 2026 17:31:01 +1100
Subject: [PATCH 8/8] Fix: removing badly commited file

---
 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new | 497 --------------------
 1 file changed, 497 deletions(-)
 delete mode 100755 ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new

diff --git a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new b/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new
deleted file mode 100755
index 24143f80..00000000
--- a/ANMN/NRS_AIMS/REALTIME/anmn_nrs_aims.py.new
+++ /dev/null
@@ -1,497 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Download ANMN NRS data from AIMS Web Service for Darwin, Yongala and Beagle
-The script reads an XML file provided by AIMS and looks for channels with
-new data to download. It compares this list with a pickle file (pythonic
-way to store python variables) containing what has already been downloaded
-in the previous run of this script.
-Some modifications on the files have to be done so they comply with CF and
-IMOS conventions.
-The IOOS compliance checker is used to check if the first downloaded file of
-a channel complies once modified. If not, the download of the rest of the
-channel is aborted until some modification on the source code is done so
-the channel can pass the checker.
-Files which don't pass the checker will land in os.path.join(wip_path, 'errors')
-for investigation. No need to reprocess them as they will be redownloaded on
-next run until they end up passing the checker. Files in the 'errors' dir can be
-removed at anytime
-
-IMPORTANT:
-is it essential to look at the logging os.path.join(wip_path, 'aims.log')
-to know which channels have problems and why as most of the time, AIMS will
-have to be contacted to sort out issues.
-
-
-author Laurent Besnard, laurent.besnard@utas.edu.au
-"""
-
-import argparse
-import datetime
-import logging
-import os
-import re
-import shutil
-import sys
-import traceback
-import unittest as data_validation_test
-from itertools import groupby
-from pathlib import Path
-
-from aims_realtime_util import (
-    convert_time_cf_to_imos,
-    create_list_of_dates_to_download,
-    download_channel,
-    fix_data_code_from_filename,
-    fix_provider_code_from_filename,
-    get_main_netcdf_var,
-    has_var_only_fill_value,
-    is_no_data_found,
-    is_time_monotonic,
-    is_time_var_empty,
-    list_recursively_files_abs_path,
-    logging_aims,
-    md5,
-    modify_aims_netcdf,
-    parse_aims_xml,
-    remove_dimension_from_netcdf,
-    remove_end_date_from_filename,
-    rm_tmp_dir,
-    save_channel_info,
-    set_up,
-)
-from dest_path import get_anmn_nrs_site_name
-from netCDF4 import Dataset
-from tendo import singleton
-from util import pass_netcdf_checker
-
-DATA_WIP_PATH = os.path.join(
-    os.environ.get("WIP_DIR"),
-    "ANMN",
-    "NRS_AIMS_Darwin_Yongala_data_rss_download_temporary",
-)
-ANMN_NRS_INCOMING_DIR = os.path.join(
-    os.environ.get("INCOMING_DIR"), "AODN", "ANMN_NRS_DAR_YON"
-)
-ANMN_NRS_ERROR_DIR = os.path.join(os.environ["ERROR_DIR"], "ANMN_NRS_DAR_YON")
-
-
-def modify_anmn_nrs_netcdf(netcdf_file_path, channel_id_info):
-    """Modify the downloaded netCDF file so it passes both CF and IMOS checker
-    input:
-       netcdf_file_path(str)    : path of netcdf file to modify
-       channel_id_index(tupple) : information from xml for the channel
-    """
-    modify_aims_netcdf(netcdf_file_path, channel_id_info)
-
-    netcdf_file_obj = Dataset(netcdf_file_path, "a", format="NETCDF4")
-    netcdf_file_obj.aims_channel_id = int(channel_id_info["channel_id"])
-
-    if "Yongala" in channel_id_info["site_name"]:
-        netcdf_file_obj.site_code = "NRSYON"
-        netcdf_file_obj.platform_code = "Yongala NRS Buoy"
-    elif "Darwin" in channel_id_info["site_name"]:
-        netcdf_file_obj.site_code = "NRSDAR"
-        netcdf_file_obj.platform_code = "Darwin NRS Buoy"
-    elif "Beagle" in channel_id_info["site_name"]:
-        netcdf_file_obj.site_code = "DARBGF"
-        netcdf_file_obj.platform_code = "Beagle Gulf Mooring"
-    else:
-        return False
-
-    if not (channel_id_info["metadata_uuid"] == "Not Available"):
-        netcdf_file_obj.metadata_uuid = channel_id_info["metadata_uuid"]
-
-    # some weather stations channels don't have a depth variable if sensor above water
-    if "depth" in netcdf_file_obj.variables.keys():
-        var = netcdf_file_obj.variables["depth"]
-        var.long_name = "nominal depth"
-        var.positive = "down"
-        var.axis = "Z"
-        var.reference_datum = "sea surface"
-        var.valid_min = -10.0
-        var.valid_max = 30.0
-        var.units = "m"  # some channels put degrees celcius instead ...
-        netcdf_file_obj.renameVariable("depth", "NOMINAL_DEPTH")
-
-    if "DEPTH" in netcdf_file_obj.variables.keys():
-        var = netcdf_file_obj.variables["DEPTH"]
-        var.coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
-        var.long_name = "actual depth"
-        var.reference_datum = "sea surface"
-        var.positive = "down"
-        var.valid_min = -10.0
-        var.valid_max = 30.0
-        var.units = "m"  # some channels put degrees celcius instead ...
-
-    netcdf_file_obj.close()
-    netcdf_file_obj = Dataset(
-        netcdf_file_path, "a", format="NETCDF4"
-    )  # need to close to save to file. as we call get_main_var just after
-    main_var = get_main_netcdf_var(netcdf_file_path)
-    # DEPTH, LATITUDE and LONGITUDE are not dimensions, so we make them into auxiliary cooordinate variables by adding this attribute
-    if "NOMINAL_DEPTH" in netcdf_file_obj.variables.keys():
-        netcdf_file_obj.variables[
-            main_var
-        ].coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH"
-    else:
-        netcdf_file_obj.variables[main_var].coordinates = "TIME LATITUDE LONGITUDE"
-
-    netcdf_file_obj.close()
-
-    if not convert_time_cf_to_imos(netcdf_file_path):
-        return False
-
-    remove_dimension_from_netcdf(
-        netcdf_file_path
-    )  # last modification to do in this order!
-    return True
-
-
-def move_to_tmp_incoming(netcdf_path):
-    """
-    Renames the NetCDF to include its MD5 hash, moves it to the manifest directory,
-    and cleans up the now-empty source directory.
-    """
-    logger = logging.getLogger(__name__)
-    # Convert to Path object for easier manipulation
-    source_file = Path(netcdf_path)
-    source_dir = source_file.parent
-
-    # Construct the new filename: [name_without_date].[md5].nc
-    # remove_end_date_from_filename returns a string, so we wrap it in Path
-    name_no_date = Path(remove_end_date_from_filename(str(source_file))).stem
-    file_hash = md5(str(source_file))
-    new_filename = f"{name_no_date}.{file_hash}.nc"
-
-    destination = Path(TMP_MANIFEST_DIR) / new_filename
-
-    try:
-        # Apply permissions (664)
-        source_file.chmod(0o664)
-
-        # Perform the move
-        shutil.move(str(source_file), str(destination))
-        logger.info(f"Moved {source_file.name} to {destination}")
-
-        # Cleanup: Delete the source directory if it is now empty
-        try:
-            source_dir.rmdir()
-            logger.debug(f"Cleaned up empty directory: {source_dir}")
-        except OSError:
-            logger.debug(f"Source directory not empty; skipping cleanup: {source_dir}")
-
-    except Exception as e:
-        logger.error(f"Failed to move {source_file} to incoming: {e}")
-        raise
-
-
-def process_monthly_channel(channel_id, aims_xml_info, level_qc):
-    """
-    Downloads all the data available for one channel_id and moves the file to a wip_path dir
-
-    aims_service : 1   -> FAIMMS data
-                   100 -> SOOP TRV data
-                   300 -> NRS DATA
-    for monthly data download, only 1 and 300 should be use
-    """
-    contact_aims_msg = "Process of channel aborted - CONTACT AIMS"
-    wip_path = Path(os.environ.get("data_wip_path", ""))
-
-    logger.info(f"QC{level_qc} - Processing channel {channel_id}")
-
-    channel_id_info = aims_xml_info[channel_id]
-    from_date = channel_id_info["from_date"]
-    thru_date = channel_id_info["thru_date"]
-
-    # [start_dates, end_dates] generation
-    start_dates, end_dates = create_list_of_dates_to_download(
-        channel_id, level_qc, from_date, thru_date
-    )
-
-    if not start_dates:
-        logger.info(f"QC{level_qc} - Channel {channel_id}: already up to date")
-        return
-
-    # download monthly file
-    for start_dt, end_dt in zip(start_dates, end_dates):
-        start_date = start_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
-        end_date = end_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
-
-        netcdf_tmp_file_path = download_channel(
-            channel_id, start_date, end_date, level_qc
-        )
-
-        if netcdf_tmp_file_path is None:
-            logger.error(
-                f"   Channel {channel_id} - not valid zip file - {contact_aims_msg}"
-            )
-            break
-
-        tmp_dir = Path(netcdf_tmp_file_path).parent
-
-        # NO_DATA_FOUND file only means there is no data for the selected time period.
-        # Could be some data afterwards
-        if is_no_data_found(netcdf_tmp_file_path):
-            logger.info(
-                f"Channel {channel_id}: No data for the time period:[{start_date} - {end_date}]"
-            )
-            shutil.rmtree(tmp_dir)
-            continue  # Move to next month
-
-        # Start of validation sequence
-        error_occurred = False
-
-        if is_time_var_empty(netcdf_tmp_file_path):
-            logger.error(
-                f"Channel {channel_id}: No values in TIME variable - {contact_aims_msg}"
-            )
-            error_occurred = True
-
-        elif not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info):
-            logger.error(
-                f"Channel {channel_id}: Could not modify the NetCDF file - Process of channel aborted"
-            )
-            error_occurred = True
-
-        else:
-            main_var = get_main_netcdf_var(netcdf_tmp_file_path)
-            if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
-                logger.error(
-                    f"Channel {channel_id}: _Fillvalues only in main variable - {contact_aims_msg}"
-                )
-                error_occurred = True
-            elif not get_anmn_nrs_site_name(netcdf_tmp_file_path):
-                logger.error(
-                    f"Channel {channel_id}: Unknown site_code gatt value - {contact_aims_msg}"
-                )
-                error_occurred = True
-            elif not is_time_monotonic(netcdf_tmp_file_path):
-                logger.error(
-                    f"Channel {channel_id}: TIME value is not strictly monotonic - {contact_aims_msg}"
-                )
-                error_occurred = True
-
-        if error_occurred:
-            shutil.rmtree(tmp_dir)
-            break
-
-        # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
-        if not pass_netcdf_checker(netcdf_tmp_file_path, tests=["cf:1.6", "imos:1.3"]):
-            logger.error(
-                f"Channel {channel_id}: File does not pass CF/IMOS compliance checker - Process of channel aborted"
-            )
-
-            err_dest = wip_path / "errors" / os.path.basename(netcdf_tmp_file_path)
-            shutil.copy(netcdf_tmp_file_path, err_dest)
-
-            logger.error(f"File copied to {err_dest} for debugging")
-            shutil.rmtree(tmp_dir)
-            break
-
-        netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
-        netcdf_tmp_file_path = fix_provider_code_from_filename(
-            netcdf_tmp_file_path, "IMOS_ANMN"
-        )
-
-        if not re.search(r"IMOS_ANMN_[A-Z]{1}_", netcdf_tmp_file_path):
-            logger.error(
-                f"   Channel {channel_id} - File name Data code does not pass REGEX - Process of channel aborted"
-            )
-
-            err_dest = wip_path / "errors" / os.path.basename(netcdf_tmp_file_path)
-            shutil.copy(netcdf_tmp_file_path, err_dest)
-
-            logger.error(f"   File copied to {err_dest} for debugging")
-            shutil.rmtree(tmp_dir)
-            break
-
-        move_to_tmp_incoming(netcdf_tmp_file_path)
-
-        # Update tracking
-        save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
-
-        if TESTING:
-            # The 2 next lines download the first month only for every single channel.
-            # This is only used for testing
-            # Note: save_channel_info already called above
-            break
-
-
-def process_qc_level(level_qc):
-    """Downloads all channels for a QC level
-    level_qc(int) : 0 or 1
-    """
-
-    logger.info(
-        "Process ANMN NRS download from AIMS web service - QC level {level_qc}".format(
-            level_qc=level_qc
-        )
-    )
-    xml_url = "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level{level_qc}/300".format(
-        level_qc=level_qc
-    )
-    try:
-        aims_xml_info = parse_aims_xml(xml_url)
-    except Exception as err:
-        logger.critical("RSS feed not available")
-        exit(1)
-
-    for channel_id in aims_xml_info.keys():
-        try:
-            process_monthly_channel(channel_id, aims_xml_info, level_qc)
-        except Exception as err:
-            logger.error(
-                "QC{qc_level} - Channel {channel_id}: Failed, unknown reason - manual debug required".format(
-                    channel_id=str(channel_id), qc_level=str(level_qc)
-                )
-            )
-            logger.error(traceback.print_exc())
-
-
-class AimsDataValidationTest(data_validation_test.TestCase):
-    def setUp(self):
-        """Check that a the AIMS system or this script hasn't been modified.
-        This function checks that a downloaded file still has the same md5.
-        """
-        channel_id = "84329"
-        from_date = "2016-01-01T00:00:00Z"
-        thru_date = "2016-01-02T00:00:00Z"
-        level_qc = 1
-        aims_rss_val = 300
-        xml_url = (
-            "https://data.aims.gov.au/gbroosdata/services/rss/netcdf/level%s/%s"
-            % (str(level_qc), str(aims_rss_val))
-        )
-
-        logger.info("Data validation unittests...")
-        aims_xml_info = parse_aims_xml(xml_url)
-        channel_id_info = aims_xml_info[channel_id]
-        self.netcdf_tmp_file_path = download_channel(
-            channel_id, from_date, thru_date, level_qc
-        )
-        modify_anmn_nrs_netcdf(self.netcdf_tmp_file_path, channel_id_info)
-        EPOCH_ISO = "1970-01-01T00:00:00Z"
-
-        netcdf_path = Path(self.netcdf_tmp_file_path)
-
-        with Dataset(netcdf_path, mode="a", format="NETCDF4") as nc:
-            # force values of attributes which change all the time
-            nc.date_created = EPOCH_ISO
-            nc.history = "data validation test only"
-            nc.NCO = "NCO_VERSION"
-
-    def tearDown(self):
-        shutil.copy(
-            self.netcdf_tmp_file_path,
-            os.path.join(
-                os.environ["data_wip_path"], "nc_unittest_%s.nc" % self.md5_netcdf_value
-            ),
-        )
-        shutil.rmtree(os.path.dirname(self.netcdf_tmp_file_path))
-
-    def test_aims_validation(self):
-        if sys.version_info[0] < 3:
-            self.md5_expected_value = "76c9a595264a8173545b6dc0c518a280"
-        else:
-            self.md5_expected_value = "1bb65266f8e526ed2087904ae024e33d"
-
-        self.md5_netcdf_value = md5(self.netcdf_tmp_file_path)
-
-        self.assertEqual(self.md5_netcdf_value, self.md5_expected_value)
-
-
-def args():
-    """
-    define the script arguments
-    :return: vargs
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-t",
-        "--testing",
-        action="store_true",
-        help="testing only - downloads the first month of each channel",
-    )
-
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    vargs = args()
-    me = singleton.SingleInstance()
-    os.environ["data_wip_path"] = os.path.join(
-        os.environ.get("WIP_DIR"),
-        "ANMN",
-        "NRS_AIMS_Darwin_Yongala_data_rss_download_temporary",
-    )
-    global TMP_MANIFEST_DIR
-    global TESTING
-
-    set_up()
-
-    # initialise logging
-    logging_aims()
-    global logger
-    logger = logging.getLogger(__name__)
-
-    # data validation test
-    runner = data_validation_test.TextTestRunner()
-    itersuite = data_validation_test.TestLoader().loadTestsFromTestCase(
-        AimsDataValidationTest
-    )
-    res = runner.run(itersuite)
-
-    if not DATA_WIP_PATH:
-        logger.critical("environment variable data_wip_path is not defined.")
-        exit(1)
-
-    # script optional argument for testing only. used in process_monthly_channel
-    TESTING = vargs.testing
-
-    rm_tmp_dir(DATA_WIP_PATH)
-
-    if len(os.listdir(ANMN_NRS_INCOMING_DIR)) >= 2:
-        logger.critical("Operation aborted, too many files in INCOMING_DIR")
-        exit(1)
-
-    if len(os.listdir(ANMN_NRS_ERROR_DIR)) >= 2:
-        logger.critical("Operation aborted, too many files in ERROR_DIR")
-        exit(1)
-
-    if not res.failures:
-        for level in [0, 1]:
-            date_str_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
-            TMP_MANIFEST_DIR = os.path.join(
-                DATA_WIP_PATH, "manifest_dir_tmp_{date}".format(date=date_str_now)
-            )
-            os.makedirs(TMP_MANIFEST_DIR)
-
-            process_qc_level(level)
-
-            lines_per_file = 2**12
-            file_list = list_recursively_files_abs_path(TMP_MANIFEST_DIR)
-            if len(file_list) > 0:
-                for file_number, lines in groupby(
-                    enumerate(file_list), key=lambda x: x[0] // lines_per_file
-                ):
-                    incoming_file = os.path.join(
-                        DATA_WIP_PATH,
-                        "anmn_nrs_aims_FV0{level}_{date}_{file_number}.manifest".format(
-                            level=str(level), date=date_str_now, file_number=file_number
-                        ),
-                    )
-                    with open(incoming_file, "w") as outfile:
-                        for item in lines:
-                            outfile.write("%s\n" % item[1])
-
-                    os.chmod(incoming_file, 0o0664)  # change to 664 for pipeline v2
-                    shutil.move(
-                        incoming_file,
-                        os.path.join(
-                            ANMN_NRS_INCOMING_DIR, os.path.basename(incoming_file)
-                        ),
-                    )
-
-    else:
-        logger.error("Data validation unittests failed")