From debef9ce344a9e475cab4defee567edb2e7159e6 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 16 Jan 2026 22:56:16 +0000 Subject: [PATCH 001/132] split into candidate and simple calc phases remove observations from json db and move to sqlite start adding support for distributed processing --- wmpl/Trajectory/CorrelateDB.py | 82 +++ wmpl/Trajectory/CorrelateEngine.py | 790 +++++++++++++++++------------ wmpl/Trajectory/CorrelateRMS.py | 157 ++---- wmpl/Utils/remoteDataHandling.py | 8 +- 4 files changed, 590 insertions(+), 447 deletions(-) create mode 100644 wmpl/Trajectory/CorrelateDB.py diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py new file mode 100644 index 00000000..dca34377 --- /dev/null +++ b/wmpl/Trajectory/CorrelateDB.py @@ -0,0 +1,82 @@ +""" Python scripts to manage the WMPL SQLite databases +""" +import os +import sqlite3 +import logging +import logging.handlers +log = logging.getLogger("traj_correlator") + + +def openObsDatabase(db_path, db_name='observations'): + db_full_name = os.path.join(db_path, f'{db_name}.db') + log.info(f'opening database {db_full_name}') + con = sqlite3.connect(db_full_name) + cur = con.cursor() + res = cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") + if res.fetchone() is None: + cur.execute("CREATE TABLE paired_obs(station_code, obs_id unique, status)") + con.commit() + return con + + +def closeObsDatabase(dbhandle): + dbhandle.commit() + dbhandle.close() + return + + +def checkObsPaired(dbhandle, station_code, obs_id): + cur = dbhandle.cursor() + res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") + if res.fetchone() is None: + return False + return True + + +def addPairedObs(dbhandle,station_code, obs_id): + """ + addPairedObs - add a potentially paired Observation to the database + + :param dbhandle: database connection handle + :param station_code: station code eg UK12345 + :param obs_id: met_obs observation ID + :return: true if successful, false if the object already exists + :rtype: bool + """ + cur = dbhandle.cursor() + res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") + if res.fetchone() is None: + log.info(f'adding {obs_id} to paired_obs table') + sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}',1)" + else: + log.info(f'updating {obs_id} in paired_obs table') + sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" + cur.execute(sqlstr) + dbhandle.commit() + if not checkObsPaired(dbhandle, station_code, obs_id): + log.info(f'failed to add {obs_id} to paired_obs table') + return False + return True + + +def unpairObs(dbhandle, station_code, obs_id): + cur = dbhandle.cursor() + cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") + dbhandle.commit() + return True + + +def openTrajDatabase(db_path, db_name='processed_trajectories'): + db_full_name = os.path.join(db_path, f'{db_name}.db') + log.info(f'opening database {db_full_name}') + con = sqlite3.connect(db_full_name) + cur = con.cursor() + res = cur.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") + if res.fetchone() is None: + cur.execute("CREATE TABLE failed_trajectories()") + + res = cur.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") + if res.fetchone() is None: + cur.execute("CREATE TABLE trajectories()") + con.commit() + return con diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 52ff61f1..df208e46 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -8,7 +8,9 @@ import multiprocessing import logging import os - +import glob +from random import randrange +import platform import numpy as np from wmpl.Trajectory.Trajectory import ObservedPoints, PlaneIntersection, Trajectory, moveStateVector @@ -18,7 +20,16 @@ from wmpl.Utils.ShowerAssociation import associateShowerTraj from wmpl.Utils.TrajConversions import J2000_JD, geo2Cartesian, cartesian2Geo, raDec2AltAz, altAz2RADec, \ raDec2ECI, datetime2JD, jd2Date, equatorialCoordPrecession_vect +from wmpl.Utils.Pickling import loadPickle, savePickle +from wmpl.Trajectory.CorrelateDB import addPairedObs, unpairObs + +CANDMODE_NONE = 0 +CANDMODE_SAVE = 1 +CANDMODE_LOAD = 2 +MCMODE_NONE = 0 +MCMODE_PHASE1 = 1 +MCMODE_PHASE2 = 2 # Grab the logger from the main thread log = logging.getLogger("traj_correlator") @@ -239,6 +250,37 @@ def __init__(self, data_handle, traj_constraints, v_init_part, data_in_j2000=Tru # enable OS style ground maps if true self.enableOSM = enableOSM + self.candidatemode = None + + + + def getCandidateFolders(self): + """ get candidate folders, if in multi-node candidate mode + """ + self.node_list = {} + node_file = os.path.join(self.dh.dir_path, 'wmpl_nodes.cfg') + if os.path.isfile(node_file): + lis = open(node_file, 'r').readlines() + nodes = [li for li in lis if '#' not in li and len(li) > 2] + for node in nodes: + node_name, node_path, node_max = node.split(',') + node_name = node_name.strip() + node_max = int(node_max.strip()) + node_path = node_path.strip() + if node_path[0]==os.sep or node_path[1]==':': + np = node_path + else: + np = os.path.join(self.dh.output_dir, node_path) + self.node_list[node_name] = {'node_path': np, 'node_max':node_max} + os.makedirs(np, exist_ok=True) + + # add a node for this hardware + master_name = platform.uname()[1] + np = os.path.join(self.dh.output_dir, 'candidates') + self.node_list[master_name] = {'node_path': np, 'node_max':0} + return + + def trajectoryRangeCheck(self, traj_reduced, platepar): """ Check that the trajectory is within the range limits. @@ -601,7 +643,7 @@ def initTrajectory(self, jdt_ref, mc_runs, verbose=False): return traj - def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=None): + def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, orig_traj=None): """ Given an initialized Trajectory object with observation, run the solver and automatically reject bad observations. @@ -631,8 +673,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N initial_ignore_count = len([obs for obs in traj.observations if obs.ignore_station]) log.info(f'initially ignoring {initial_ignore_count} stations...') - # run the first phase of the solver if mcmode is 0 or 1 - if mcmode < 2: + # run the first phase of the solver if mcmode is MCMODE_NONE or MCMODE_PHASE1 + if mcmode != MCMODE_PHASE2: # Disable Monte Carlo runs until an initial stable set of observations is found traj.monte_carlo = False @@ -839,18 +881,12 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref) log.info("Trajectory skipped and added to fails!") + if matched_obs: + for _, met_obs_temp, _ in matched_obs: + log.info(f'Marking {met_obs_temp.id} unpaired') + unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) return False - # # If the trajectory solutions was not done at any point, skip the trajectory completely - # if traj_best is None: - # return False - - # # Otherwise, use the best trajectory solution until the solving failed - # else: - # log.info("Using previously estimated best trajectory...") - # traj_status = traj_best - - # If there are only two stations, make sure to reject solutions which have stations with # residuals higher than the maximum limit if len(traj_status.observations) == 2: @@ -861,7 +897,9 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref) - + for _, met_obs_temp, _ in matched_obs: + log.info(f'Marking {met_obs_temp.id} unpaired') + unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) return False @@ -869,7 +907,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N traj = traj_status # if we're only doing the simple solution, then print the results - if mcmode == 1: + if mcmode == MCMODE_PHASE1: # Only proceed if the orbit could be computed if traj.orbit.ra_g is not None: # Update trajectory file name @@ -890,8 +928,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N ##### end of simple soln phase ##### now run the Monte-carlo phase, if the mcmode is 0 (do both) or 2 (mc-only) - if mcmode == 0 or mcmode == 2: - if mcmode == 2: + if mcmode == MCMODE_NONE or mcmode == MCMODE_PHASE2: + if mcmode == MCMODE_PHASE2: traj_status = traj # save the traj in case we need to clean it up @@ -918,7 +956,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N # Don't do this in mc-only mode since phase1 has already selected the stations and we could # create duplicate orbits if we now exclude some stations from the solution # TODO should we do this here *at all* ? - if len(non_ignored_observations) > self.traj_constraints.max_stations and mcmode != 2: + if len(non_ignored_observations) > self.traj_constraints.max_stations and mcmode != MCMODE_PHASE2: # Sort the observations by residuals (smallest first) # TODO: implement better sorting algorithm @@ -959,7 +997,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N if traj_status is None: # Add the trajectory to the list of failed trajectories - if mcmode != 2: + if mcmode != MCMODE_PHASE2: self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref) log.info('Trajectory failed to solve') self.dh.cleanupPhase2TempPickle(save_traj) @@ -1023,11 +1061,11 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N # Save the trajectory if successful. if successful_traj_fit: # restore the original traj_id so that the phase1 and phase 2 results use the same ID - if mcmode == 2: + if mcmode == MCMODE_PHASE2: traj.traj_id = saved_traj_id traj.phase_1_only = False - if mcmode == 1: + if mcmode == MCMODE_PHASE1: traj.phase_1_only = True if orig_traj: @@ -1038,23 +1076,18 @@ def solveTrajectory(self, traj, mc_runs, mcmode=0, matched_obs=None, orig_traj=N log.info('Saving trajectory....') self.dh.saveTrajectoryResults(traj, self.traj_constraints.save_plots) - if mcmode != 2: + if mcmode != MCMODE_PHASE2: # we do not need to update the database for phase2 log.info('Updating database....') self.dh.addTrajectory(traj) - # Mark observations as paired in a trajectory if fit successful - if mcmode != 2 and matched_obs is not None: - for _, met_obs_temp, _ in matched_obs: - self.dh.markObservationAsPaired(met_obs_temp) else: log.info('unable to fit trajectory') return successful_traj_fit - - def run(self, event_time_range=None, bin_time_range=None, mcmode=0): + def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, candidatemode=CANDMODE_NONE): """ Run meteor corellation using available data. Keyword arguments: @@ -1064,36 +1097,38 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): """ # a bit of logging to let readers know what we're doing - if mcmode == 2: + if mcmode == MCMODE_PHASE2: mcmodestr = ' - MONTE CARLO STAGE' - elif mcmode == 1: + elif mcmode == MCMODE_PHASE1: mcmodestr = ' - SIMPLE STAGE' else: mcmodestr = ' ' - - if mcmode != 2: - # Get unpaired observations, filter out observations with too little points and sort them by time - unpaired_observations_all = self.dh.getUnpairedObservations() - unpaired_observations_all = [mettmp for mettmp in unpaired_observations_all - if len(mettmp.data) >= self.traj_constraints.min_meas_pts] - unpaired_observations_all = sorted(unpaired_observations_all, key=lambda x: x.reference_dt) - - # Remove all observations done prior to 2000, to weed out those with bad time - unpaired_observations_all = [met_obs for met_obs in unpaired_observations_all - if met_obs.reference_dt > datetime.datetime(2000, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)] - - # Normalize all reference times and time data so that the reference time is at t = 0 s - for met_obs in unpaired_observations_all: - - # Correct the reference time - t_zero = met_obs.data[0].time_rel - met_obs.reference_dt = met_obs.reference_dt + datetime.timedelta(seconds=t_zero) - - # Normalize all observation times so that the first time is t = 0 s - for i in range(len(met_obs.data)): - met_obs.data[i].time_rel -= t_zero - - + self.candidatemode = candidatemode + + if mcmode != MCMODE_PHASE2: + if candidatemode != CANDMODE_LOAD: + # Get unpaired observations, filter out observations with too little points and sort them by time + unpaired_observations_all = self.dh.getUnpairedObservations() + unpaired_observations_all = [mettmp for mettmp in unpaired_observations_all + if len(mettmp.data) >= self.traj_constraints.min_meas_pts] + unpaired_observations_all = sorted(unpaired_observations_all, key=lambda x: x.reference_dt) + + # Remove all observations done prior to 2000, to weed out those with bad time + unpaired_observations_all = [met_obs for met_obs in unpaired_observations_all + if met_obs.reference_dt > datetime.datetime(2000, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)] + + # Normalize all reference times and time data so that the reference time is at t = 0 s + for met_obs in unpaired_observations_all: + + # Correct the reference time + t_zero = met_obs.data[0].time_rel + met_obs.reference_dt = met_obs.reference_dt + datetime.timedelta(seconds=t_zero) + + # Normalize all observation times so that the first time is t = 0 s + for i in range(len(met_obs.data)): + met_obs.data[i].time_rel -= t_zero + else: + event_time_range = self.dh.dt_range # If the time range was given, only use the events in that time range if event_time_range: @@ -1104,8 +1139,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): # Data will be divided into time bins, so the pairing function doesn't have to go pair many # observations at once and keep all pairs in memory else: - dt_beg = unpaired_observations_all[0].reference_dt - dt_end = unpaired_observations_all[-1].reference_dt + if candidatemode != CANDMODE_LOAD: + dt_beg = unpaired_observations_all[0].reference_dt + dt_end = unpaired_observations_all[-1].reference_dt + else: + dt_beg, dt_end = self.dh.dt_range dt_bin_list = generateDatetimeBins( dt_beg, dt_end, bin_days=1, utc_hour_break=12, tzinfo=datetime.timezone.utc, reverse=False @@ -1133,410 +1171,483 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): traj_solved_count = 0 # if we're in MC mode 0 or 1 we have to find the candidate trajectories - if mcmode < 2: - log.info("") - log.info("-----------------------------------") - log.info(" PAIRING TRAJECTORIES IN TIME BIN:") - log.info(" BIN BEG: {:s} UTC".format(str(bin_beg))) - log.info(" BIN END: {:s} UTC".format(str(bin_end))) - log.info("-----------------------------------") - log.info("") - + if mcmode != MCMODE_PHASE2: + ## we are in candidatemode mode 0 or 1 and want to find candidates + if self.candidatemode != CANDMODE_LOAD: + log.info("") + log.info("-----------------------------------") + log.info(" PAIRING TRAJECTORIES IN TIME BIN:") + log.info(" BIN BEG: {:s} UTC".format(str(bin_beg))) + log.info(" BIN END: {:s} UTC".format(str(bin_end))) + log.info("-----------------------------------") + log.info("") - # Select observations in the given time bin - unpaired_observations = [met_obs for met_obs in unpaired_observations_all - if (met_obs.reference_dt >= bin_beg) and (met_obs.reference_dt <= bin_end)] - log.info(f'Analysing {len(unpaired_observations)} observations...') + # Select observations in the given time bin + unpaired_observations = [met_obs for met_obs in unpaired_observations_all + if (met_obs.reference_dt >= bin_beg) and (met_obs.reference_dt <= bin_end)] - ### CHECK FOR PAIRING WITH PREVIOUSLY ESTIMATED TRAJECTORIES ### + total_unpaired = len(unpaired_observations) + remaining_unpaired = total_unpaired + log.info(f'Analysing {total_unpaired} observations in this bucket...') - log.info("") - log.info("--------------------------------------------------------------------------") - log.info(" 1) CHECKING IF PREVIOUSLY ESTIMATED TRAJECTORIES HAVE NEW OBSERVATIONS") - log.info("--------------------------------------------------------------------------") - log.info("") + ### CHECK FOR PAIRING WITH PREVIOUSLY ESTIMATED TRAJECTORIES ### - # Get a list of all already computed trajectories within the given time bin - # Reducted trajectory objects are returned - - if bin_time_range: - # restrict checks to the bin range supplied to run() plus a day to allow for data upload times - log.info(f'Getting computed trajectories for bin {str(bin_time_range[0])} to {str(bin_time_range[1])}') - computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_time_range[0]), datetime2JD(bin_time_range[1])+1) - else: - # use the current bin. - log.info(f'Getting computed trajectories for {str(bin_beg)} to {str(bin_end)}') - computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_beg), datetime2JD(bin_end)) + log.info("") + log.info("--------------------------------------------------------------------------") + log.info(" 1) CHECKING IF PREVIOUSLY ESTIMATED TRAJECTORIES HAVE NEW OBSERVATIONS") + log.info("--------------------------------------------------------------------------") + log.info("") - # Find all unpaired observations that match already existing trajectories - for traj_reduced in computed_traj_list: + # Get a list of all already computed trajectories within the given time bin + # Reducted trajectory objects are returned + + if bin_time_range: + # restrict checks to the bin range supplied to run() plus a day to allow for data upload times + log.info(f'Getting computed trajectories for bin {str(bin_time_range[0])} to {str(bin_time_range[1])}') + computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_time_range[0]), datetime2JD(bin_time_range[1])+1) + else: + # use the current bin. + log.info(f'Getting computed trajectories for {str(bin_beg)} to {str(bin_end)}') + computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_beg), datetime2JD(bin_end)) - # If the trajectory already has more than the maximum number of stations, skip it - if len(traj_reduced.participating_stations) >= self.traj_constraints.max_stations: + # Find all unpaired observations that match already existing trajectories + for traj_reduced in computed_traj_list: - log.info( - "Trajectory {:s} has already reached the maximum number of stations, " - "skipping...".format( - str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)))) + # If the trajectory already has more than the maximum number of stations, skip it + if len(traj_reduced.participating_stations) >= self.traj_constraints.max_stations: - # TODO DECIDE WHETHER WE ACTUALLY WANT TO DO THIS - # the problem is that we could end up with unpaired observations that form a new trajectory instead of - # being added to an existing one - continue - - # Get all unprocessed observations which are close in time to the reference trajectory - traj_time_pairs = self.dh.getTrajTimePairs(traj_reduced, unpaired_observations, - self.traj_constraints.max_toffset) + log.info( + "Trajectory {:s} has already reached the maximum number of stations, " + "skipping...".format( + str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)))) - # Skip trajectory if there are no new obervations - if not traj_time_pairs: - continue + # TODO DECIDE WHETHER WE ACTUALLY WANT TO DO THIS + # the problem is that we could end up with unpaired observations that form a new trajectory instead of + # being added to an existing one + continue + + # Get all unprocessed observations which are close in time to the reference trajectory + traj_time_pairs = self.dh.getTrajTimePairs(traj_reduced, unpaired_observations, + self.traj_constraints.max_toffset) + # Skip trajectory if there are no new obervations + if not traj_time_pairs: + continue - log.info("") - log.info("Checking trajectory at {:s} in countries: {:s}".format( - str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)), - ", ".join(list(set([stat_id[:2] for stat_id in traj_reduced.participating_stations]))))) - log.info("--------") + log.info("") + log.info("Checking trajectory at {:s} in countries: {:s}".format( + str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)), + ", ".join(list(set([stat_id[:2] for stat_id in traj_reduced.participating_stations]))))) + log.info("--------") - # Filter out bad matches and only keep the good ones - candidate_observations = [] - traj_full = None - skip_traj_check = False - for met_obs in traj_time_pairs: - log.info("Candidate observation: {:s}".format(met_obs.station_code)) + # Filter out bad matches and only keep the good ones + candidate_observations = [] + traj_full = None + skip_traj_check = False + for met_obs in traj_time_pairs: - platepar = self.dh.getPlatepar(met_obs) + log.info("Candidate observation: {:s}".format(met_obs.station_code)) - # Check that the trajectory beginning and end are within the distance limit - if not self.trajectoryRangeCheck(traj_reduced, platepar): - continue + platepar = self.dh.getPlatepar(met_obs) + # Check that the trajectory beginning and end are within the distance limit + if not self.trajectoryRangeCheck(traj_reduced, platepar): + continue - # Check that the trajectory is within the field of view - if not self.trajectoryInFOV(traj_reduced, platepar): - continue + # Check that the trajectory is within the field of view + if not self.trajectoryInFOV(traj_reduced, platepar): + continue - # Load the full trajectory object - if traj_full is None: - traj_full = self.dh.loadFullTraj(traj_reduced) - # If the full trajectory couldn't be loaded, skip checking this trajectory + # Load the full trajectory object if traj_full is None: - - skip_traj_check = True - break + traj_full = self.dh.loadFullTraj(traj_reduced) + # If the full trajectory couldn't be loaded, skip checking this trajectory + if traj_full is None: + + skip_traj_check = True + break - ### Do a rough trajectory solution and perform a quick quality control ### - # Init observation object using the new meteor observation - obs_new = self.initObservationsObject(met_obs, platepar, - ref_dt=jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)) + ### Do a rough trajectory solution and perform a quick quality control ### + # Init observation object using the new meteor observation + obs_new = self.initObservationsObject(met_obs, platepar, + ref_dt=jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)) + obs_new.id = met_obs.id + obs_new.station_code = met_obs.station_code - # Get an observation from the trajectory object with the maximum convergence angle to - # the reference observations - obs_traj_best = None - qc_max = 0.0 - for obs_tmp in traj_full.observations: - - # Compute the plane intersection between the new and one of trajectory observations - pi = PlaneIntersection(obs_new, obs_tmp) + # Get an observation from the trajectory object with the maximum convergence angle to + # the reference observations + obs_traj_best = None + qc_max = 0.0 + for obs_tmp in traj_full.observations: + + # Compute the plane intersection between the new and one of trajectory observations + pi = PlaneIntersection(obs_new, obs_tmp) - # Take the observation with the maximum convergence angle - if (obs_traj_best is None) or (pi.conv_angle > qc_max): - qc_max = pi.conv_angle - obs_traj_best = obs_tmp + # Take the observation with the maximum convergence angle + if (obs_traj_best is None) or (pi.conv_angle > qc_max): + qc_max = pi.conv_angle + obs_traj_best = obs_tmp - # Do a quick trajectory solution and perform sanity checks - plane_intersection = self.quickTrajectorySolution(obs_traj_best, obs_new) - if plane_intersection is None: - continue + # Do a quick trajectory solution and perform sanity checks + plane_intersection = self.quickTrajectorySolution(obs_traj_best, obs_new) + if plane_intersection is None: + continue - ### ### + ### ### - candidate_observations.append([obs_new, met_obs]) + candidate_observations.append([obs_new, met_obs]) - # Skip the candidate trajectory if it couldn't be loaded from disk - if skip_traj_check: - continue + # Skip the candidate trajectory if it couldn't be loaded from disk + if skip_traj_check: + continue - # If there are any good new observations, add them to the trajectory and re-run the solution - if candidate_observations: + # If there are any good new observations, add them to the trajectory and re-run the solution + if candidate_observations: - log.info("Recomputing trajectory with new observations from stations:") + log.info("Recomputing trajectory with new observations from stations:") - # Add new observations to the trajectory object - for obs_new, _ in candidate_observations: - log.info(obs_new.station_id) - traj_full.infillWithObs(obs_new) + # Add new observations to the trajectory object + for obs_new, _ in candidate_observations: + log.info(obs_new.station_id) + traj_full.infillWithObs(obs_new) - # Re-run the trajectory fit - # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds - successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, orig_traj=traj_reduced) - - # If the new trajectory solution succeeded, remove the now-paired observations - if successful_traj_fit: + # Re-run the trajectory fit + # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds + successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, orig_traj=traj_reduced) + + # If the new trajectory solution succeeded, remove the now-paired observations + if successful_traj_fit: - log.info("Remove paired observations from the processing list...") - for _, met_obs_temp in candidate_observations: - self.dh.markObservationAsPaired(met_obs_temp) - unpaired_observations.remove(met_obs_temp) + log.info("Remove paired observations from the processing list...") + for _, met_obs_temp in candidate_observations: + unpaired_observations.remove(met_obs_temp) + remaining_unpaired -= 1 - else: - log.info("New trajectory solution failed, keeping the old trajectory...") + else: + for met_obs_temp, _ in candidate_observations: + unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) + log.info("New trajectory solution failed, keeping the old trajectory...") - ### ### + ### ### - log.info("") - log.info("-------------------------------------------------") - log.info(" 2) PAIRING OBSERVATIONS INTO NEW TRAJECTORIES") - log.info("-------------------------------------------------") - log.info("") + log.info("") + log.info("-------------------------------------------------") + log.info(" 2) PAIRING OBSERVATIONS INTO NEW TRAJECTORIES") + log.info("-------------------------------------------------") + log.info("") - # List of all candidate trajectories - candidate_trajectories = [] + # List of all candidate trajectories + candidate_trajectories = [] - # Go through all unpaired and unprocessed meteor observations - for met_obs in unpaired_observations: + # Go through all unpaired and unprocessed meteor observations + for met_obs in unpaired_observations: - # Skip observations that were processed in the meantime - if met_obs.processed: - continue + # Skip observations that were processed in the meantime + if met_obs.processed: + continue - # Get station platepar - reference_platepar = self.dh.getPlatepar(met_obs) - obs1 = self.initObservationsObject(met_obs, reference_platepar) + # Get station platepar + reference_platepar = self.dh.getPlatepar(met_obs) + obs1 = self.initObservationsObject(met_obs, reference_platepar) - # Keep a list of observations which matched the reference observation - matched_observations = [] + # Keep a list of observations which matched the reference observation + matched_observations = [] - # Find all meteors from other stations that are close in time to this meteor - plane_intersection_good = None - time_pairs = self.dh.findTimePairs(met_obs, unpaired_observations, - self.traj_constraints.max_toffset) - for met_pair_candidate in time_pairs: + # Find all meteors from other stations that are close in time to this meteor + plane_intersection_good = None + time_pairs = self.dh.findTimePairs(met_obs, unpaired_observations, + self.traj_constraints.max_toffset) + for met_pair_candidate in time_pairs: - log.info("") - log.info("Processing pair:") - log.info("{:s} and {:s}".format(met_obs.station_code, met_pair_candidate.station_code)) - log.info("{:s} and {:s}".format(str(met_obs.reference_dt), str(met_pair_candidate.reference_dt))) - log.info("-----------------------") + log.info("") + log.info("Processing pair:") + log.info("{:s} and {:s}".format(met_obs.station_code, met_pair_candidate.station_code)) + log.info("{:s} and {:s}".format(str(met_obs.reference_dt), str(met_pair_candidate.reference_dt))) + log.info("-----------------------") - ### Check if the stations are close enough and have roughly overlapping fields of view ### + ### Check if the stations are close enough and have roughly overlapping fields of view ### - # Get candidate station platepar - candidate_platepar = self.dh.getPlatepar(met_pair_candidate) + # Get candidate station platepar + candidate_platepar = self.dh.getPlatepar(met_pair_candidate) - # Check if the stations are within range - if not self.stationRangeCheck(reference_platepar, candidate_platepar): - continue + # Check if the stations are within range + if not self.stationRangeCheck(reference_platepar, candidate_platepar): + continue - # Check the FOV overlap - if not self.checkFOVOverlap(reference_platepar, candidate_platepar): - log.info("Station FOV does not overlap: {:s} and {:s}".format(met_obs.station_code, - met_pair_candidate.station_code)) - continue + # Check the FOV overlap + if not self.checkFOVOverlap(reference_platepar, candidate_platepar): + log.info("Station FOV does not overlap: {:s} and {:s}".format(met_obs.station_code, + met_pair_candidate.station_code)) + continue - ### ### + ### ### - ### Do a rough trajectory solution and perform a quick quality control ### + ### Do a rough trajectory solution and perform a quick quality control ### - # Init observations - obs2 = self.initObservationsObject(met_pair_candidate, candidate_platepar, - ref_dt=met_obs.reference_dt) + # Init observations + obs2 = self.initObservationsObject(met_pair_candidate, candidate_platepar, + ref_dt=met_obs.reference_dt) - # Do a quick trajectory solution and perform sanity checks - plane_intersection = self.quickTrajectorySolution(obs1, obs2) - if plane_intersection is None: - continue + # Do a quick trajectory solution and perform sanity checks + plane_intersection = self.quickTrajectorySolution(obs1, obs2) + if plane_intersection is None: + continue - else: - plane_intersection_good = plane_intersection + else: + plane_intersection_good = plane_intersection - ### ### + ### ### - matched_observations.append([obs2, met_pair_candidate, plane_intersection]) + matched_observations.append([obs2, met_pair_candidate, plane_intersection]) - # If there are no matched observations, skip it - if len(matched_observations) == 0: + # If there are no matched observations, skip it + if len(matched_observations) == 0: - if len(time_pairs) > 0: - log.info("") - log.info(" --- NO MATCH ---") + if len(time_pairs) > 0: + log.info("") + log.info(" --- NO MATCH ---") - continue + continue - # Skip if there are not good plane intersections - if plane_intersection_good is None: - continue + # Skip if there are not good plane intersections + if plane_intersection_good is None: + continue - # Add the first observation to matched observations - matched_observations.append([obs1, met_obs, plane_intersection_good]) + # Add the first observation to matched observations + matched_observations.append([obs1, met_obs, plane_intersection_good]) - # Mark observations as processed - for _, met_obs_temp, _ in matched_observations: - met_obs_temp.processed = True - self.dh.markObservationAsProcessed(met_obs_temp) + # Mark observations as processed + for _, met_obs_temp, _ in matched_observations: + met_obs_temp.processed = True + if addPairedObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id): + remaining_unpaired -= 1 + # Store candidate trajectories + log.info("") + log.info(" --- ADDING CANDIDATE ---") + candidate_trajectories.append(matched_observations) - # Store candidate trajectories + ### Merge all candidate trajectories which share the same observations ### log.info("") - log.info(" --- ADDING CANDIDATE ---") - candidate_trajectories.append(matched_observations) + log.info("---------------------------") + log.info("MERGING BROKEN OBSERVATIONS") + log.info("---------------------------") + merged_candidate_trajectories = [] + merged_indices = [] + for i, traj_cand_ref in enumerate(candidate_trajectories): + + # Skip candidate trajectories that have already been merged + if i in merged_indices: + continue + + # Stop the search if the end has been reached + if (i + 1) == len(candidate_trajectories): + merged_candidate_trajectories.append(traj_cand_ref) + break - ### Merge all candidate trajectories which share the same observations ### - log.info("") - log.info("---------------------------") - log.info("MERGING BROKEN OBSERVATIONS") - log.info("---------------------------") - merged_candidate_trajectories = [] - merged_indices = [] - for i, traj_cand_ref in enumerate(candidate_trajectories): - - # Skip candidate trajectories that have already been merged - if i in merged_indices: - continue + # Get the mean time of the reference observation + ref_mean_dt = traj_cand_ref[0][1].mean_dt - - # Stop the search if the end has been reached - if (i + 1) == len(candidate_trajectories): - merged_candidate_trajectories.append(traj_cand_ref) - break + obs_list_ref = [entry[1] for entry in traj_cand_ref] + merged_candidate = [] + # Compute the mean radiant of the reference solution + plane_radiants_ref = [entry[2].radiant_eq for entry in traj_cand_ref] + ra_mean_ref = meanAngle([ra for ra, _ in plane_radiants_ref]) + dec_mean_ref = np.mean([dec for _, dec in plane_radiants_ref]) - # Get the mean time of the reference observation - ref_mean_dt = traj_cand_ref[0][1].mean_dt - obs_list_ref = [entry[1] for entry in traj_cand_ref] - merged_candidate = [] + # Check for pairs + found_first_pair = False + for j, traj_cand_test in enumerate(candidate_trajectories[(i + 1):]): - # Compute the mean radiant of the reference solution - plane_radiants_ref = [entry[2].radiant_eq for entry in traj_cand_ref] - ra_mean_ref = meanAngle([ra for ra, _ in plane_radiants_ref]) - dec_mean_ref = np.mean([dec for _, dec in plane_radiants_ref]) + # Skip same observations + if traj_cand_ref[0] == traj_cand_test[0]: + continue - # Check for pairs - found_first_pair = False - for j, traj_cand_test in enumerate(candidate_trajectories[(i + 1):]): + # Get the mean time of the test observation + test_mean_dt = traj_cand_test[0][1].mean_dt - # Skip same observations - if traj_cand_ref[0] == traj_cand_test[0]: - continue + # Make sure the observations that are being compared are within the time window + time_diff = (test_mean_dt - ref_mean_dt).total_seconds() + if abs(time_diff) > self.traj_constraints.max_toffset: + continue - # Get the mean time of the test observation - test_mean_dt = traj_cand_test[0][1].mean_dt + # Break the search if the time went beyond the search. This can be done as observations + # are ordered in time + if time_diff > self.traj_constraints.max_toffset: + break - # Make sure the observations that are being compared are within the time window - time_diff = (test_mean_dt - ref_mean_dt).total_seconds() - if abs(time_diff) > self.traj_constraints.max_toffset: - continue - # Break the search if the time went beyond the search. This can be done as observations - # are ordered in time - if time_diff > self.traj_constraints.max_toffset: - break + # Create a list of observations + obs_list_test = [entry[1] for entry in traj_cand_test] + # Check if there any any common observations between candidate trajectories and merge them + # if that is the case + found_match = False + for obs1 in obs_list_ref: + if obs1 in obs_list_test: + found_match = True + break - # Create a list of observations - obs_list_test = [entry[1] for entry in traj_cand_test] + # Compute the mean radiant of the reference solution + plane_radiants_test = [entry[2].radiant_eq for entry in traj_cand_test] + ra_mean_test = meanAngle([ra for ra, _ in plane_radiants_test]) + dec_mean_test = np.mean([dec for _, dec in plane_radiants_test]) - # Check if there any any common observations between candidate trajectories and merge them - # if that is the case - found_match = False - for obs1 in obs_list_ref: - if obs1 in obs_list_test: - found_match = True - break + # Skip the mergning attempt if the estimated radiants are too far off + if np.degrees(angleBetweenSphericalCoords(dec_mean_ref, ra_mean_ref, dec_mean_test, ra_mean_test)) > self.traj_constraints.max_merge_radiant_angle: + continue - # Compute the mean radiant of the reference solution - plane_radiants_test = [entry[2].radiant_eq for entry in traj_cand_test] - ra_mean_test = meanAngle([ra for ra, _ in plane_radiants_test]) - dec_mean_test = np.mean([dec for _, dec in plane_radiants_test]) - # Skip the mergning attempt if the estimated radiants are too far off - if np.degrees(angleBetweenSphericalCoords(dec_mean_ref, ra_mean_ref, dec_mean_test, ra_mean_test)) > self.traj_constraints.max_merge_radiant_angle: + # Add the candidate trajectory to the common list if a match has been found + if found_match: - continue + ref_stations = [obs.station_code for obs in obs_list_ref] + # Add observations that weren't present in the reference candidate + for entry in traj_cand_test: - # Add the candidate trajectory to the common list if a match has been found - if found_match: + # Make sure the added observation is not from a station that's already added + if entry[1].station_code in ref_stations: + continue - ref_stations = [obs.station_code for obs in obs_list_ref] + if entry[1] not in obs_list_ref: - # Add observations that weren't present in the reference candidate - for entry in traj_cand_test: + # Print the reference and the merged radiants + if not found_first_pair: + log.info("") + log.info("------") + log.info("Reference time: {:s}".format(str(ref_mean_dt))) + log.info("Reference stations: {:s}".format(", ".join(sorted(ref_stations)))) + log.info("Reference radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_ref), np.degrees(dec_mean_ref))) + log.info("") + found_first_pair = True - # Make sure the added observation is not from a station that's already added - if entry[1].station_code in ref_stations: - continue + log.info("Merging: {:s} {:s}".format(str(entry[1].mean_dt), str(entry[1].station_code))) + traj_cand_ref.append(entry) - if entry[1] not in obs_list_ref: + log.info("Merged radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_test), np.degrees(dec_mean_test))) - # Print the reference and the merged radiants - if not found_first_pair: - log.info("") - log.info("------") - log.info("Reference time: {:s}".format(str(ref_mean_dt))) - log.info("Reference stations: {:s}".format(", ".join(sorted(ref_stations)))) - log.info("Reference radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_ref), np.degrees(dec_mean_ref))) - log.info("") - found_first_pair = True + - log.info("Merging: {:s} {:s}".format(str(entry[1].mean_dt), str(entry[1].station_code))) - traj_cand_ref.append(entry) - log.info("Merged radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_test), np.degrees(dec_mean_test))) + # Mark that the current index has been processed + merged_indices.append(i + j + 1) - + # Add the reference candidate observations to the list + merged_candidate += traj_cand_ref - # Mark that the current index has been processed - merged_indices.append(i + j + 1) + # Add the merged observation to the final list + merged_candidate_trajectories.append(merged_candidate) - # Add the reference candidate observations to the list - merged_candidate += traj_cand_ref + candidate_trajectories = merged_candidate_trajectories + log.info(f'There are {remaining_unpaired} remaining unpaired observations in this bucket.') - # Add the merged observation to the final list - merged_candidate_trajectories.append(merged_candidate) + # in candidatemode mode 1 we want to save the candidates to disk + if self.candidatemode == CANDMODE_SAVE: + self.getCandidateFolders() + print("-----------------------") + print('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) + print("-----------------------") + for matched_observations in candidate_trajectories: + # randomly select a node from the list of nodes then check that its actually listening + # and hasn't already received its max allocation. The master node gets anything left + while True: + curr_node = list(self.node_list.keys())[randrange(len(self.node_list.keys()))] + save_path = self.node_list[curr_node]['node_path'] + if curr_node == platform.uname()[1]: + break + listen_file = os.path.join(save_path, f'{curr_node}.listening') + if os.path.isfile(listen_file): + # if the folder already has enough candidates then use the master node + if len(glob.glob(os.path.join(save_path, '*.pickle'))) >= self.node_list[curr_node]['node_max']: + save_path = self.node_list[platform.uname()[1]]['node_path'] + break + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + #log.debug(str(ref_dt).replace(" ", "_")) + picklename = str(ref_dt.timestamp()) + '.pickle' + savePickle(matched_observations, save_path, picklename) - candidate_trajectories = merged_candidate_trajectories + for curr_node in self.node_list.keys(): + save_path = self.node_list[curr_node]['node_path'] + log.info(f'There are {len(glob.glob(os.path.join(save_path, "*.pickle")))} candidates for {curr_node}') + return + else: + print("-----------------------") + print('PROCESSING {} CANDIDATES'.format(len(candidate_trajectories))) + print("-----------------------") + # end of 'if self.candidatemode != CANDMODE_LOAD' ### ### - else: + # otherwise we doing self.candidatemode == CANDMODE_SAVE + else: + traj_solved_count = 0 + candidate_trajectories = [] + print("-----------------------") + print('LOADING CANDIDATES') + print("-----------------------") + self.getCandidateFolders() + # only load candidates from this node's candidate folder + save_path = self.node_list[platform.uname()[1]]['node_path'] + for fil in os.listdir(save_path): + if '.pickle' not in fil: + continue + try: + loadedpickle = loadPickle(save_path, fil) + candidate_trajectories.append(loadedpickle) + # move the loaded file so we don't try to reprocess it on a subsequent pass + procpath = os.path.join(save_path, 'processed') + os.makedirs(procpath, exist_ok=True) + procfile = os.path.join(procpath, fil) + if os.path.isfile(procfile): + os.remove(procfile) + os.rename(os.path.join(save_path, fil), procfile) + except Exception: + print(f'Candidate {fil} went away, probably picked up by another process') + print("-----------------------") + print('LOADED {} TRAJECTORIES'.format(len(candidate_trajectories))) + print("-----------------------") + # end of 'self.candidatemode == CANDMODE_LOAD' + # end of 'if mcmode != MCMODE_PHASE2' + else: + # mcmode == MCMODE_PHASE2 so we need to load the phase1 solutions log.info("-----------------------") log.info('LOADING PHASE1 SOLUTIONS') log.info("-----------------------") candidate_trajectories = self.dh.phase1Trajectories - # end of "if mcmode < 2" + # end of "if mcmode == MCMODE_PHASE2" log.info("") log.info("-----------------------") @@ -1552,7 +1663,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): # if mcmode is not 2, prepare to calculate the intersecting planes solutions - if mcmode != 2: + if mcmode != MCMODE_PHASE2: # Find unique station counts station_counts = np.unique([entry[1].station_code for entry in matched_observations], return_counts=True) @@ -1621,6 +1732,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): if qc_max < self.traj_constraints.min_qc: log.info("Max convergence angle too small: {:.1f} < {:.1f} deg".format(qc_max, self.traj_constraints.min_qc)) + for _, met_obs_temp, _ in matched_observations: + log.info(f'Marking {met_obs_temp.id} unpaired') + unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) continue @@ -1685,15 +1799,18 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): # new observations are added if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") + for _, met_obs_temp, _ in matched_observations: + log.info(f'Marking {met_obs_temp.id} unpaired') + unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations) traj_solved_count += int(result) - # end of if mcmode != 2 + # end of if mcmode != MCMODE_PHASE2 else: - # mcmode is 2 and so we have a list of trajectories that were solved in phase 1 + # mcmode is MCMODE_PHASE2 and so we have a list of trajectories that were solved in phase 1 # to prepare for monte-carlo solutions traj = matched_observations @@ -1726,6 +1843,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=0): # Finish the correlation run (update the database with new values) self.dh.saveDatabase() + log.info(f'SOLVED {sum(outcomes)} TRAJECTORIES') log.info("") diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 88c11292..d74d9db2 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -23,11 +23,13 @@ from wmpl.Formats.CAMS import loadFTPDetectInfo from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints +from wmpl.Trajectory.CorrelateEngine import MCMODE_NONE, MCMODE_PHASE2, CANDMODE_LOAD from wmpl.Utils.Math import generateDatetimeBins from wmpl.Utils.OSTools import mkdirP from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date -from wmpl.Utils.remoteDataHandling import collectRemoteTrajectories, moveRemoteTrajectories, uploadTrajToRemote +from wmpl.Utils.remoteDataHandling import collectRemoteData, moveRemoteData, uploadDataToRemote +from wmpl.Trajectory.CorrelateDB import openObsDatabase, checkObsPaired, addPairedObs, closeObsDatabase ### CONSTANTS ### @@ -149,10 +151,6 @@ def __init__(self, db_file_path, verbose=False): self.db_file_path = db_file_path - # List of processed directories (keys are station codes, values are relative paths to night - # directories) - self.processed_dirs = {} - # List of paired observations as a part of a trajectory (keys are station codes, values are unique # observation IDs) self.paired_obs = {} @@ -168,7 +166,6 @@ def __init__(self, db_file_path, verbose=False): # Load the database from a JSON file self.load(verbose=verbose) - def load(self, verbose=False): """ Load the database from a JSON file. """ @@ -250,34 +247,6 @@ def save(self): shutil.copy2(db_bak_file_path, self.db_file_path) log.warning(e) - def addProcessedDir(self, station_name, rel_proc_path): - """ Add the processed directory to the list. """ - - if station_name in self.processed_dirs: - if rel_proc_path not in self.processed_dirs[station_name]: - self.processed_dirs[station_name].append(rel_proc_path) - - - def addPairedObservation(self, met_obs): - """ Mark the given meteor observation as paired in a trajectory. """ - - if met_obs.station_code not in self.paired_obs: - self.paired_obs[met_obs.station_code] = [] - - if met_obs.id not in self.paired_obs[met_obs.station_code]: - self.paired_obs[met_obs.station_code].append(met_obs.id) - - - def checkObsIfPaired(self, met_obs): - """ Check if the given observation has been paired to a trajectory or not. """ - - if met_obs.station_code in self.paired_obs: - return (met_obs.id in self.paired_obs[met_obs.station_code]) - - else: - return False - - def checkTrajIfFailed(self, traj): """ Check if the given trajectory has been computed with the same observations and has failed to be computed before. @@ -517,7 +486,7 @@ def __init__(self, **entries): class RMSDataHandle(object): - def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode=0, max_trajs=1000, remotehost=None, verbose=False): + def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode=MCMODE_NONE, max_trajs=1000, remotehost=None, verbose=False, archivemonths=3): """ Handles data interfacing between the trajectory correlator and RMS data files on disk. Arguments: @@ -563,7 +532,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.phase1_dir = os.path.join(self.output_dir, 'phase1') # create the directory for phase1 simple trajectories, if needed - if self.mc_mode > 0: + if self.mc_mode > MCMODE_NONE: mkdirP(os.path.join(self.phase1_dir, 'processed')) self.purgePhase1ProcessedData(os.path.join(self.phase1_dir, 'processed')) @@ -578,17 +547,27 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode log.info("") # move any remotely calculated pickles to their target locations if os.path.isdir(os.path.join(self.output_dir, 'remoteuploads')): - moveRemoteTrajectories(self.output_dir) + moveRemoteData(self.output_dir) - if mcmode != 2: + if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) self.db = DatabaseJSON(database_path, verbose=self.verbose) - log.info('Archiving older entries....') - try: - self.archiveOldRecords(older_than=3) - except: - pass - log.info(" ... done!") + self.observations_db = openObsDatabase(db_dir, 'observations') + if len(self.db.paired_obs) > 0: + log.info('moving observations to sqlite') + print('moving observations to sqlite') + for stat_id in self.db.paired_obs.keys(): + for obs_id in self.db.paired_obs[stat_id]: + addPairedObs(self.observations_db, stat_id, obs_id) + del self.dh.db.paired_obs[stat_id][obs_id] + print('done') + if archivemonths != 0: + log.info('Archiving older entries....') + try: + self.archiveOldRecords(older_than=archivemonths) + except: + pass + log.info(" ... done!") # Load the list of stations station_list = self.loadStations() @@ -602,13 +581,14 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: # retrieve pickles from a remote host, if configured if self.remotehost is not None: - collectRemoteTrajectories(remotehost, max_trajs, self.phase1_dir) + collectRemoteData(remotehost, max_trajs, self.phase1_dir) # reload the phase1 trajectories dt_beg, dt_end = self.loadPhase1Trajectories(max_trajs=max_trajs) self.processing_list = None self.dt_range=[dt_beg, dt_end] self.db = None + self.observations_db = None ### Define country groups to speed up the proceessing ### @@ -651,9 +631,7 @@ def purgePhase1ProcessedData(self, dir_path): log.warning(f"File not found: {file_path}") continue - if ( - os.path.exists(file_path) and (file_dt < refdt) and os.path.isfile(file_path) - ): + if os.path.exists(file_path) and (file_dt < refdt) and os.path.isfile(file_path): try: os.remove(file_path) @@ -684,32 +662,18 @@ def __init__(self, station, obs_id): archdate_jd = datetime2JD(archdate) arch_db_path = os.path.join(self.db_dir, f'{archdate.strftime("%Y%m")}_{JSON_DB_NAME}') - archdb = DatabaseJSON(arch_db_path, verbose=self.verbose) + archdb = DatabaseJSON(arch_db_path, verbose=self.verbose, archiveYM=archdate.strftime("%Y%m")) log.info(f'Archiving db records to {arch_db_path}...') for traj in [t for t in self.db.trajectories if t < archdate_jd]: if traj < archdate_jd: archdb.addTrajectory(None, self.db.trajectories[traj], False) - self.db.removeTrajectory(self.db.trajectories[traj], keepFolder=True) + del self.db.trajectories[traj] for traj in [t for t in self.db.failed_trajectories if t < archdate_jd]: if traj < archdate_jd: archdb.addTrajectory(None, self.db.failed_trajectories[traj], True) - self.db.removeTrajectory(self.db.failed_trajectories[traj], keepFolder=True) - - for station in self.db.processed_dirs: - arch_processed = [dirname for dirname in self.db.processed_dirs[station] if - datetime.datetime.strptime(dirname[14:22], '%Y%m%d').replace(tzinfo=datetime.timezone.utc) < archdate] - for dirname in arch_processed: - archdb.addProcessedDir(station, dirname) - self.db.processed_dirs[station].remove(dirname) - - for station in self.db.paired_obs: - arch_processed = [obs_id for obs_id in self.db.paired_obs[station] if - datetime.datetime.strptime(obs_id[7:15], '%Y%m%d').replace(tzinfo=datetime.timezone.utc) < archdate] - for obs_id in arch_processed: - archdb.addPairedObservation(DummyMetObs(station, obs_id)) - self.db.paired_obs[station].remove(obs_id) + del self.db.failed_trajectories[traj] archdb.save() self.db.save() @@ -747,10 +711,6 @@ def findUnprocessedFolders(self, station_list): station_path = os.path.join(self.dir_path, station_name) - # Add the station name to the database if it doesn't exist - if station_name not in self.db.processed_dirs: - self.db.processed_dirs[station_name] = [] - # Go through all directories in stations for night_name in os.listdir(station_path): @@ -862,10 +822,6 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): # Skip these observations if no data files were found inside if (ftpdetectinfo_name is None) or (platepar_recalibrated_name is None): log.info(" Skipping {:s} due to missing data files...".format(rel_proc_path)) - - # Add the folder to the list of processed folders - self.db.addProcessedDir(station_code, rel_proc_path) - continue if station_code != prev_station: @@ -934,11 +890,9 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): continue # Add only unpaired observations - if not self.db.checkObsIfPaired(met_obs): - + if not checkObsPaired(self.observations_db, met_obs.station_code, met_obs.id): # print(" ", station_code, met_obs.reference_dt, rel_proc_path) added_count += 1 - unpaired_met_obs_list.append(met_obs) log.info(" Added {:d} observations!".format(added_count)) @@ -1447,7 +1401,7 @@ def saveTrajectoryResults(self, traj, save_plots): if self.remotehost is not None: log.info('saving to remote host') - uploadTrajToRemote(remotehost, traj.file_name + '_trajectory.pickle', output_dir) + uploadDataToRemote(remotehost, traj.file_name + '_trajectory.pickle', output_dir) log.info(' ...done') # Save the plots @@ -1459,26 +1413,6 @@ def saveTrajectoryResults(self, traj, save_plots): pass traj.save_results = False - - - def markObservationAsProcessed(self, met_obs): - """ Mark the given meteor observation as processed. """ - - if self.db is None: - return - self.db.addProcessedDir(met_obs.station_code, met_obs.rel_proc_path) - - - - def markObservationAsPaired(self, met_obs): - """ Mark the given meteor observation as paired in a trajectory. """ - - if self.db is None: - return - self.db.addPairedObservation(met_obs) - - - def addTrajectory(self, traj, failed_jdt_ref=None): """ Add the resulting trajectory to the database. @@ -1508,7 +1442,7 @@ def removeTrajectory(self, traj_reduced): """ Remove the trajectory from the data base and disk. """ # in mcmode 2 the database isn't loaded but we still need to delete updated trajectories - if self.mc_mode == 2: + if self.mc_mode == MCMODE_PHASE2: if os.path.isfile(traj_reduced.traj_file_path): traj_dir = os.path.dirname(traj_reduced.traj_file_path) shutil.rmtree(traj_dir, ignore_errors=True) @@ -1778,6 +1712,12 @@ def _breakHandler(signum, frame): arg_parser.add_argument('--mcmode', '--mcmode', type=int, default=0, help="Run just simple soln (1), just monte-carlos (2) or both (0, default).") + arg_parser.add_argument('--candmode', '--candmode', type=int, default=0, + help="Run normally (0), create candidates only (1), load previously-created candidates(2).") + + arg_parser.add_argument('--archiveoldrecords', '--archiveoldrecords', type=int, default=3, + help="Months back to archive old data. Default 3. Zero means don't archive (useful in testing).") + arg_parser.add_argument('--maxtrajs', '--maxtrajs', type=int, default=None, help="Max number of trajectories to reload in each pass when doing the Monte-Carlo phase") @@ -1785,7 +1725,7 @@ def _breakHandler(signum, frame): help="Minutes to wait between runs in auto-mode") arg_parser.add_argument('--remotehost', '--remotehost', type=str, default=None, - help="Remote host to collect and return MC phase solutions to. Supports internet-distributed processing.") + help="Remote host to collect candiates and return solutions to. Supports internet-distributed processing.") arg_parser.add_argument('--verbose', '--verbose', help='Verbose logging.', default=False, action="store_true") @@ -1870,7 +1810,7 @@ def _breakHandler(signum, frame): trajectory_constraints.max_arcsec_err = cml_args.maxerr remotehost = cml_args.remotehost - if cml_args.mcmode !=2 and remotehost is not None: + if cml_args.mcmode !=MCMODE_PHASE2 and remotehost is not None: log.info('remotehost only applicable in mcmode 2') remotehost = None @@ -1883,7 +1823,7 @@ def _breakHandler(signum, frame): if cml_args.maxtrajs is not None: max_trajs = int(cml_args.maxtrajs) - if cml_args.mcmode == 2: + if cml_args.mcmode == MCMODE_PHASE2: log.info(f'Reloading at most {max_trajs} phase1 trajectories.') # Set the number of CPU cores @@ -1949,10 +1889,10 @@ def _breakHandler(signum, frame): dh = RMSDataHandle( cml_args.dir_path, dt_range=event_time_range, db_dir=cml_args.dbdir, output_dir=cml_args.outdir, - mcmode=cml_args.mcmode, max_trajs=max_trajs, remotehost=remotehost, verbose=cml_args.verbose) + mcmode=cml_args.mcmode, max_trajs=max_trajs, remotehost=remotehost, verbose=cml_args.verbose, archivemonths=cml_args.archiveoldrecords) # If there is nothing to process, stop, unless we're in mcmode 2 (processing_list is not used in this case) - if not dh.processing_list and cml_args.mcmode < 2: + if not dh.processing_list and cml_args.mcmode != MCMODE_PHASE2: log.info("") log.info("Nothing to process!") log.info("Probably everything is already processed.") @@ -1962,7 +1902,7 @@ def _breakHandler(signum, frame): ### GENERATE DAILY TIME BINS ### - if cml_args.mcmode != 2: + if cml_args.mcmode != MCMODE_PHASE2: # Find the range of datetimes of all folders (take only those after the year 2000) proc_dir_dts = [entry[3] for entry in dh.processing_list if entry[3] is not None] proc_dir_dts = [dt for dt in proc_dir_dts if dt > datetime.datetime(2000, 1, 1, 0, 0, 0, @@ -2019,20 +1959,23 @@ def _breakHandler(signum, frame): log.info("") # Load data of unprocessed observations - if cml_args.mcmode != 2: + if cml_args.mcmode != MCMODE_PHASE2 and cml_args.candmode != CANDMODE_LOAD: dh.unpaired_observations = dh.loadUnpairedObservations(dh.processing_list, dt_range=(bin_beg, bin_end)) + log.info(f'loaded {len(dh.unpaired_observations)} observations') # refresh list of calculated trajectories from disk dh.removeDeletedTrajectories() dh.loadComputedTrajectories(os.path.join(dh.output_dir, OUTPUT_TRAJ_DIR), dt_range=[bin_beg, bin_end]) - if cml_args.mcmode != 2: + if cml_args.mcmode != MCMODE_PHASE2: dh.removeDuplicateTrajectories(dt_range=[bin_beg, bin_end]) # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) bin_time_range = [bin_beg, bin_end] - tc.run(event_time_range=event_time_range, mcmode=cml_args.mcmode, bin_time_range=bin_time_range) + tc.run(event_time_range=event_time_range, mcmode=cml_args.mcmode, bin_time_range=bin_time_range, candidatemode=cml_args.candmode) + + closeObsDatabase(dh.observations_db) else: # there were no datasets to process log.info('no data to process yet') diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 59f59a19..f3ccf526 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -33,9 +33,9 @@ log = logging.getLogger("traj_correlator") -def collectRemoteTrajectories(remotehost, max_trajs, output_dir): +def collectRemoteData(remotehost, max_trajs, output_dir, datatype='traj'): """ - Collect trajectory pickles from a remote server for local phase2 (monte-carlo) processing + Collect trajectory or candidate pickles from a remote server for local processing NB: do NOT use os.path.join here, as it will break on Windows """ @@ -77,7 +77,7 @@ def collectRemoteTrajectories(remotehost, max_trajs, output_dir): return -def uploadTrajToRemote(remotehost, trajfile, output_dir): +def uploadDataToRemote(remotehost, trajfile, output_dir, datatype='traj'): """ At the end of MC phase, upload the trajectory pickle and report to a remote host for integration into the solved dataset @@ -107,7 +107,7 @@ def uploadTrajToRemote(remotehost, trajfile, output_dir): return -def moveRemoteTrajectories(output_dir): +def moveRemoteData(output_dir, datatype='traj'): """ Move remotely processed pickle files to their target location in the trajectories area, making sure we clean up any previously-calculated trajectory and temporary files From 240a720675c7bc2eb5299eff0cf32ee2adbae219 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 16 Jan 2026 23:06:54 +0000 Subject: [PATCH 002/132] print candmode in logs --- wmpl/Trajectory/CorrelateRMS.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index d74d9db2..2b8ba590 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -23,7 +23,7 @@ from wmpl.Formats.CAMS import loadFTPDetectInfo from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints -from wmpl.Trajectory.CorrelateEngine import MCMODE_NONE, MCMODE_PHASE2, CANDMODE_LOAD +from wmpl.Trajectory.CorrelateEngine import MCMODE_NONE, MCMODE_PHASE2, CANDMODE_LOAD, CANDMODE_SAVE from wmpl.Utils.Math import generateDatetimeBins from wmpl.Utils.OSTools import mkdirP from wmpl.Utils.Pickling import loadPickle, savePickle @@ -1833,6 +1833,11 @@ def _breakHandler(signum, frame): trajectory_constraints.mc_cores = cpu_cores log.info("Running using {:d} CPU cores.".format(cpu_cores)) + if cml_args.candmode == CANDMODE_LOAD: + log.info('Loading Candidates') + elif cml_args.candmode == CANDMODE_SAVE: + log.info('Saving Candidates') + # Run processing. If the auto run more is not on, the loop will break after one run previous_start_time = None while True: From b1040c6a17ccf509de3e0a3fe0502b400a7738a9 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 17 Jan 2026 17:05:22 +0000 Subject: [PATCH 003/132] tidy up logging --- wmpl/Trajectory/CorrelateEngine.py | 33 ++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index df208e46..6b4e33f2 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1570,15 +1570,16 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca merged_candidate_trajectories.append(merged_candidate) candidate_trajectories = merged_candidate_trajectories - + log.info("-----------------------") log.info(f'There are {remaining_unpaired} remaining unpaired observations in this bucket.') + log.info("-----------------------") # in candidatemode mode 1 we want to save the candidates to disk if self.candidatemode == CANDMODE_SAVE: self.getCandidateFolders() - print("-----------------------") - print('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) - print("-----------------------") + log.info("-----------------------") + log.info('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) + log.info("-----------------------") for matched_observations in candidate_trajectories: # randomly select a node from the list of nodes then check that its actually listening @@ -1602,22 +1603,24 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca for curr_node in self.node_list.keys(): save_path = self.node_list[curr_node]['node_path'] + log.info("-----------------------") log.info(f'There are {len(glob.glob(os.path.join(save_path, "*.pickle")))} candidates for {curr_node}') + log.info("-----------------------") return else: - print("-----------------------") - print('PROCESSING {} CANDIDATES'.format(len(candidate_trajectories))) - print("-----------------------") + log.info("-----------------------") + log.info('PROCESSING {} CANDIDATES'.format(len(candidate_trajectories))) + log.info("-----------------------") # end of 'if self.candidatemode != CANDMODE_LOAD' ### ### - # otherwise we doing self.candidatemode == CANDMODE_SAVE else: + # candidatemode is LOAD so load any available candidates for processing traj_solved_count = 0 candidate_trajectories = [] - print("-----------------------") - print('LOADING CANDIDATES') - print("-----------------------") + log.info("-----------------------") + log.info('LOADING CANDIDATES') + log.info("-----------------------") self.getCandidateFolders() # only load candidates from this node's candidate folder save_path = self.node_list[platform.uname()[1]]['node_path'] @@ -1636,9 +1639,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca os.rename(os.path.join(save_path, fil), procfile) except Exception: print(f'Candidate {fil} went away, probably picked up by another process') - print("-----------------------") - print('LOADED {} TRAJECTORIES'.format(len(candidate_trajectories))) - print("-----------------------") + log.info("-----------------------") + log.info('LOADED {} CANDIDATES'.format(len(candidate_trajectories))) + log.info("-----------------------") # end of 'self.candidatemode == CANDMODE_LOAD' # end of 'if mcmode != MCMODE_PHASE2' else: @@ -1834,7 +1837,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # This will increase the number of MC runs while keeping the processing time the same mc_runs = int(np.ceil(mc_runs/self.traj_constraints.mc_cores)*self.traj_constraints.mc_cores) - # pass in matched_observations here so that solveTrajectory can mark them paired if they're used + # pass in matched_observations here so that solveTrajectory can mark them unpaired if the solver fails result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations, orig_traj=traj) traj_solved_count += int(result) From aa20b49e57be8c3e047d320feaf851e8626d001e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 17 Jan 2026 17:06:01 +0000 Subject: [PATCH 004/132] bug in conversion step --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 2b8ba590..7992f9e0 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -559,7 +559,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode for stat_id in self.db.paired_obs.keys(): for obs_id in self.db.paired_obs[stat_id]: addPairedObs(self.observations_db, stat_id, obs_id) - del self.dh.db.paired_obs[stat_id][obs_id] + del self.db.paired_obs[stat_id][obs_id] print('done') if archivemonths != 0: log.info('Archiving older entries....') From 02a9865d279122ecd2beec23966f56b29856c6b6 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 19 Jan 2026 15:45:34 +0000 Subject: [PATCH 005/132] avoid committing to sqlite after every update --- wmpl/Trajectory/CorrelateDB.py | 12 ++++++++++-- wmpl/Trajectory/CorrelateEngine.py | 8 ++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index dca34377..9215f67d 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -33,7 +33,7 @@ def checkObsPaired(dbhandle, station_code, obs_id): return True -def addPairedObs(dbhandle,station_code, obs_id): +def addPairedObs(dbhandle,station_code, obs_id, commitnow=True): """ addPairedObs - add a potentially paired Observation to the database @@ -52,13 +52,21 @@ def addPairedObs(dbhandle,station_code, obs_id): log.info(f'updating {obs_id} in paired_obs table') sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" cur.execute(sqlstr) - dbhandle.commit() + if commitnow: + dbhandle.commit() if not checkObsPaired(dbhandle, station_code, obs_id): log.info(f'failed to add {obs_id} to paired_obs table') return False return True +def commitObsDb(dbhandle): + """ commit the obs db, called only during initialisation + """ + dbhandle.commit() + return + + def unpairObs(dbhandle, station_code, obs_id): cur = dbhandle.cursor() cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 6b4e33f2..4006eab4 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1443,7 +1443,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Store candidate trajectories log.info("") - log.info(" --- ADDING CANDIDATE ---") + log.info(f" --- ADDING CANDIDATE at {met_obs.reference_dt.isoformat()} ---") candidate_trajectories.append(matched_observations) ### Merge all candidate trajectories which share the same observations ### @@ -1569,7 +1569,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Add the merged observation to the final list merged_candidate_trajectories.append(merged_candidate) - candidate_trajectories = merged_candidate_trajectories + log.info("-----------------------") + log.info('CHECKING FOR ALREADY-FAILED CANDIDATES') + log.info("-----------------------") + candidate_trajectories, remaining_unpaired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, remaining_unpaired) + log.info("-----------------------") log.info(f'There are {remaining_unpaired} remaining unpaired observations in this bucket.') log.info("-----------------------") From f569ebe3bc4a11cbdd8b5f82bc5f0946fa31f5dc Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 19 Jan 2026 15:46:06 +0000 Subject: [PATCH 006/132] support for excluding already-failed candidates --- wmpl/Trajectory/CorrelateRMS.py | 54 +++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 7992f9e0..6876caee 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -29,7 +29,9 @@ from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date from wmpl.Utils.remoteDataHandling import collectRemoteData, moveRemoteData, uploadDataToRemote -from wmpl.Trajectory.CorrelateDB import openObsDatabase, checkObsPaired, addPairedObs, closeObsDatabase +from wmpl.Trajectory.CorrelateDB import openObsDatabase, closeObsDatabase, commitObsDb +from wmpl.Trajectory.CorrelateDB import checkObsPaired, addPairedObs, unpairObs +from wmpl.Trajectory.Trajectory import Trajectory ### CONSTANTS ### @@ -553,13 +555,15 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode log.info("Loading database: {:s}".format(database_path)) self.db = DatabaseJSON(database_path, verbose=self.verbose) self.observations_db = openObsDatabase(db_dir, 'observations') - if len(self.db.paired_obs) > 0: + if hasattr(self.db, 'paired_obs') and len(self.db.paired_obs) > 0: log.info('moving observations to sqlite') print('moving observations to sqlite') - for stat_id in self.db.paired_obs.keys(): + keylist = self.db.paired_obs.keys() + for stat_id in keylist: for obs_id in self.db.paired_obs[stat_id]: - addPairedObs(self.observations_db, stat_id, obs_id) - del self.db.paired_obs[stat_id][obs_id] + addPairedObs(self.observations_db, stat_id, obs_id, commitnow=False) + del self.db.paired_obs + commitObsDb(self.observations_db) print('done') if archivemonths != 0: log.info('Archiving older entries....') @@ -900,7 +904,6 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): log.info("") log.info(" Finished loading unpaired observations!") - self.saveDatabase() return unpaired_met_obs_list @@ -1484,6 +1487,45 @@ def cleanupPhase2TempPickle(self, traj, success=False): savePickle(traj, os.path.join(self.phase1_dir, 'processed'), fldr_name + '_trajectory.pickle') return + def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired): + # TODO make this function work! + # wants to go through the candidates and check if they correspond to already-failed + candidate_trajectories=[] + for cand in matched_observations: + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in cand]) + jdt_ref = datetime2JD(ref_dt) + traj = Trajectory(jdt_ref, verbose=False) + # Feed the observations into the trajectory solver + for obs_temp, met_obs, _ in cand: + + # Normalize the observations to the reference Julian date + jdt_ref_curr = datetime2JD(met_obs.reference_dt) + obs_temp.time_data += (jdt_ref_curr - jdt_ref)*86400 + + traj.infillWithObs(obs_temp) + + ### Recompute the reference JD and all times so that the first time starts at 0 ### + + # Determine the first relative time from reference JD + t0 = min([obs.time_data[0] for obs in traj.observations if (not obs.ignore_station) + or (not np.all(obs.ignore_list))]) + + # If the first time is not 0, normalize times so that the earliest time is 0 + if t0 != 0.0: + # Recompute the reference JD to corresponds with t0 + traj.jdt_ref = traj.jdt_ref + t0/86400.0 + + if self.checkTrajIfFailed(traj): + log.info('--------') + log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') + for _, met_obs_temp, _ in cand: + log.info(f'Marking {met_obs_temp.id} unpaired') + unpairObs(self.observations_db, met_obs_temp.station_code, met_obs_temp.id) + remaining_unpaired -= 1 + else: + candidate_trajectories.append(cand) + + return candidate_trajectories, remaining_unpaired def checkTrajIfFailed(self, traj): """ Check if the given trajectory has been computed with the same observations and has failed to be From 12dab86bdf5505d6a9cdd63e54daf62729c54e70 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 19 Jan 2026 17:45:20 +0000 Subject: [PATCH 007/132] start adding some standalone management to the sqlite db scripts --- wmpl/Trajectory/CorrelateDB.py | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 9215f67d..ba1293d9 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -4,6 +4,8 @@ import sqlite3 import logging import logging.handlers +import argparse +import datetime log = logging.getLogger("traj_correlator") @@ -88,3 +90,63 @@ def openTrajDatabase(db_path, db_name='processed_trajectories'): cur.execute("CREATE TABLE trajectories()") con.commit() return con + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser(description="""Automatically compute trajectories from RMS data in the given directory.""", + formatter_class=argparse.RawTextHelpFormatter) + + arg_parser.add_argument('--dir_path', type=str, default=None, help='Path to the directory containing the databases.') + + arg_parser.add_argument('--database', type=str, default=None, help='Database to process, either observations or trajectories') + + arg_parser.add_argument('--action', type=str, default=None, help='Action to take on the database') + + arg_parser.add_argument("--logdir", type=str, default=None, + help="Path to the directory where the log files will be stored. If not given, a logs folder will be created in the database folder") + + arg_parser.add_argument('-r', '--timerange', metavar='TIME_RANGE', + help="""Apply action to this date range in the format: "(YYYYMMDD-HHMMSS,YYYYMMDD-HHMMSS)".""", type=str) + + cml_args = arg_parser.parse_args() + # Find the log directory + log_dir = cml_args.logdir + if log_dir is None: + log_dir = os.path.join(cml_args.dir_path, 'logs') + if not os.path.isdir(log_dir): + os.makedirs(log_dir) + log.setLevel(logging.DEBUG) + + # Init the log formatter + log_formatter = logging.Formatter( + fmt='%(asctime)s-%(levelname)-5s-%(module)-15s:%(lineno)-5d- %(message)s', + datefmt='%Y/%m/%d %H:%M:%S') + + # Init the file handler + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"correlate_db_{timestamp}.log") + file_handler = logging.handlers.TimedRotatingFileHandler(log_file, when="midnight", backupCount=7) + file_handler.setFormatter(log_formatter) + log.addHandler(file_handler) + + # Init the console handler (i.e. print to console) + console_handler = logging.StreamHandler() + console_handler.setFormatter(log_formatter) + log.addHandler(console_handler) + + dbname = cml_args.database.lower() + action = cml_args.action.lower() + + if dbname == 'observations': + dbhandle = openObsDatabase(cml_args.dir_path) + if action == 'read': + cur = dbhandle.cursor() + cur.execute('select * from paired_obs where status=1') + print(f'there are {len(cur.fetchall())} paired obs') + cur.execute('select * from paired_obs where status=0') + print(f'and {len(cur.fetchall())} unpaired obs') + closeObsDatabase(dbhandle) + elif dbname == 'trajectories': + print('hello') + else: + log.info('valid database not specified') From 9b18baac92e95dea810de353b80c6763dd48af99 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 19 Jan 2026 22:45:23 +0000 Subject: [PATCH 008/132] add obs-date to database add support to archive old records --- wmpl/Trajectory/CorrelateDB.py | 70 ++++++++++++++++++++++++++---- wmpl/Trajectory/CorrelateEngine.py | 2 +- wmpl/Trajectory/CorrelateRMS.py | 18 +++++--- 3 files changed, 76 insertions(+), 14 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index ba1293d9..4d774201 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -6,21 +6,42 @@ import logging.handlers import argparse import datetime + +from wmpl.Utils.TrajConversions import datetime2JD + log = logging.getLogger("traj_correlator") def openObsDatabase(db_path, db_name='observations'): + """ + openObsDatabase - open the observations sqlite database and return a database handle + + The database is created if it doesn't exist. + + :param db_path: the path to the database + :param db_name: the name of the database to open, default 'observations' + :return: database handle + """ + db_full_name = os.path.join(db_path, f'{db_name}.db') log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) cur = con.cursor() res = cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") if res.fetchone() is None: - cur.execute("CREATE TABLE paired_obs(station_code, obs_id unique, status)") + cur.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") con.commit() + cur.close() return con +def commitObsDatabase(dbhandle): + """ commit the obs db + """ + dbhandle.commit() + return + + def closeObsDatabase(dbhandle): dbhandle.commit() dbhandle.close() @@ -28,20 +49,32 @@ def closeObsDatabase(dbhandle): def checkObsPaired(dbhandle, station_code, obs_id): + """ + checkObsPaired - check if an observation is already paired + + :param dbhandle: the database + :param station_code: the station ID + :param obs_id; the observation id + :return: true if matched, false otherwise + + """ cur = dbhandle.cursor() res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") if res.fetchone() is None: return False + cur.close() return True -def addPairedObs(dbhandle,station_code, obs_id, commitnow=True): +def addPairedObs(dbhandle,station_code, obs_id, obs_date, commitnow=True): """ addPairedObs - add a potentially paired Observation to the database :param dbhandle: database connection handle :param station_code: station code eg UK12345 :param obs_id: met_obs observation ID + :param commitnow: boolean true to force commit immediately + :return: true if successful, false if the object already exists :rtype: bool """ @@ -49,11 +82,12 @@ def addPairedObs(dbhandle,station_code, obs_id, commitnow=True): res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") if res.fetchone() is None: log.info(f'adding {obs_id} to paired_obs table') - sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}',1)" + sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" else: log.info(f'updating {obs_id} in paired_obs table') sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" cur.execute(sqlstr) + cur.close() if commitnow: dbhandle.commit() if not checkObsPaired(dbhandle, station_code, obs_id): @@ -62,20 +96,40 @@ def addPairedObs(dbhandle,station_code, obs_id, commitnow=True): return True -def commitObsDb(dbhandle): - """ commit the obs db, called only during initialisation +def unpairObs(dbhandle, station_code, obs_id): """ - dbhandle.commit() - return + unpairObs - mark an observation unpaired by setting the status to zero + :param dbhandle: the database + :param station_code: the station ID + :param obs_id; the observation id + + """ -def unpairObs(dbhandle, station_code, obs_id): cur = dbhandle.cursor() cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") dbhandle.commit() + cur.close() return True +def archiveObsDatabase(dbhandle, db_path, arch_prefix, archdate_jd): + # create the database if it doesnt exist + archdb_name = f'{arch_prefix}_observations' + archdb = openObsDatabase(db_path, archdb_name) + closeObsDatabase(archdb) + + # attach the arch db and copy the records then delete them + cur = dbhandle.cursor() + archdb_fullname = os.path.join(db_path, f'{archdb_name}.db') + cur.execute(f"attach database '{archdb_fullname}' as archdb") + cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') + cur.execute(f'delete from paired_obs where obs_date < {archdate_jd}') + commitObsDatabase() + cur.close() + return + + def openTrajDatabase(db_path, db_name='processed_trajectories'): db_full_name = os.path.join(db_path, f'{db_name}.db') log.info(f'opening database {db_full_name}') diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 4006eab4..acb62a67 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1438,7 +1438,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Mark observations as processed for _, met_obs_temp, _ in matched_observations: met_obs_temp.processed = True - if addPairedObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id): + if addPairedObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.mean_dt, met_obs_temp.id): remaining_unpaired -= 1 # Store candidate trajectories diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 6876caee..86720880 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -29,7 +29,7 @@ from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date from wmpl.Utils.remoteDataHandling import collectRemoteData, moveRemoteData, uploadDataToRemote -from wmpl.Trajectory.CorrelateDB import openObsDatabase, closeObsDatabase, commitObsDb +from wmpl.Trajectory.CorrelateDB import openObsDatabase, closeObsDatabase, commitObsDatabase, archiveObsDatabase from wmpl.Trajectory.CorrelateDB import checkObsPaired, addPairedObs, unpairObs from wmpl.Trajectory.Trajectory import Trajectory @@ -556,15 +556,21 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.db = DatabaseJSON(database_path, verbose=self.verbose) self.observations_db = openObsDatabase(db_dir, 'observations') if hasattr(self.db, 'paired_obs') and len(self.db.paired_obs) > 0: + log.info('-----------------------------') log.info('moving observations to sqlite') - print('moving observations to sqlite') keylist = self.db.paired_obs.keys() for stat_id in keylist: for obs_id in self.db.paired_obs[stat_id]: - addPairedObs(self.observations_db, stat_id, obs_id, commitnow=False) + try: + obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') + except Exception: + obs_date = datetime.datetime(2000,1,1,0,0,0) + addPairedObs(self.observations_db, stat_id, obs_id, obs_date, commitnow=False) del self.db.paired_obs - commitObsDb(self.observations_db) - print('done') + commitObsDatabase(self.observations_db) + self.saveDatabase() + log.info('done') + log.info('-----------------------------') if archivemonths != 0: log.info('Archiving older entries....') try: @@ -665,6 +671,8 @@ def __init__(self, station, obs_id): archdate = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=older_than) archdate_jd = datetime2JD(archdate) + archiveObsDatabase(self.observations_db, self.db_dir, archdate.strftime("%Y%m"), archdate_jd) + arch_db_path = os.path.join(self.db_dir, f'{archdate.strftime("%Y%m")}_{JSON_DB_NAME}') archdb = DatabaseJSON(arch_db_path, verbose=self.verbose, archiveYM=archdate.strftime("%Y%m")) log.info(f'Archiving db records to {arch_db_path}...') From 8208b773c07645cb16b024a9e00d62818cadd4cc Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 20 Jan 2026 00:13:35 +0000 Subject: [PATCH 009/132] got params in wrong order arg --- wmpl/Trajectory/CorrelateEngine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index acb62a67..8a76a852 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1438,7 +1438,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Mark observations as processed for _, met_obs_temp, _ in matched_observations: met_obs_temp.processed = True - if addPairedObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.mean_dt, met_obs_temp.id): + if addPairedObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): remaining_unpaired -= 1 # Store candidate trajectories From 1dc6ae54335fdd5b7e8f0fd1e32fa75318902120 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 20 Jan 2026 00:33:03 +0000 Subject: [PATCH 010/132] improve archiving process --- wmpl/Trajectory/CorrelateDB.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 4d774201..116b876f 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -73,6 +73,7 @@ def addPairedObs(dbhandle,station_code, obs_id, obs_date, commitnow=True): :param dbhandle: database connection handle :param station_code: station code eg UK12345 :param obs_id: met_obs observation ID + :param obs_date: observation date/time :param commitnow: boolean true to force commit immediately :return: true if successful, false if the object already exists @@ -123,9 +124,19 @@ def archiveObsDatabase(dbhandle, db_path, arch_prefix, archdate_jd): cur = dbhandle.cursor() archdb_fullname = os.path.join(db_path, f'{archdb_name}.db') cur.execute(f"attach database '{archdb_fullname}' as archdb") - cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') + try: + cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') + except Exception: + log.info('Some records already exist in archdb, doing row-wise copy') + cur.execute(f'select * from paired_obs where obs_date < {archdate_jd}') + for row in cur.fetchall(): + try: + cur.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]},{row[3]})") + except Exception: + log.info(f'{row[1]} already exists in target') + cur.execute(f'delete from paired_obs where obs_date < {archdate_jd}') - commitObsDatabase() + commitObsDatabase(dbhandle) cur.close() return From 332c5f3f7334bf268754199daa46172af67b5922 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 20 Jan 2026 11:38:23 +0000 Subject: [PATCH 011/132] log how many obs got moved to sqlite --- wmpl/Trajectory/CorrelateRMS.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 86720880..14c0763d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -557,7 +557,8 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.observations_db = openObsDatabase(db_dir, 'observations') if hasattr(self.db, 'paired_obs') and len(self.db.paired_obs) > 0: log.info('-----------------------------') - log.info('moving observations to sqlite') + log.info('moving observations to sqlite - this may take some time....') + i = 0 keylist = self.db.paired_obs.keys() for stat_id in keylist: for obs_id in self.db.paired_obs[stat_id]: @@ -566,10 +567,13 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode except Exception: obs_date = datetime.datetime(2000,1,1,0,0,0) addPairedObs(self.observations_db, stat_id, obs_id, obs_date, commitnow=False) + i += 1 + if i % 1000: + log.info(f'moved {i} observations') del self.db.paired_obs commitObsDatabase(self.observations_db) self.saveDatabase() - log.info('done') + log.info(f'done - moved {i} observations') log.info('-----------------------------') if archivemonths != 0: log.info('Archiving older entries....') @@ -1527,7 +1531,6 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire log.info('--------') log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: - log.info(f'Marking {met_obs_temp.id} unpaired') unpairObs(self.observations_db, met_obs_temp.station_code, met_obs_temp.id) remaining_unpaired -= 1 else: From db77938674c9b89794bb0d79c0ebe5a0d9960a8b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 20 Jan 2026 11:38:40 +0000 Subject: [PATCH 012/132] make logging provisional --- wmpl/Trajectory/CorrelateDB.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 116b876f..c5e273da 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -66,7 +66,7 @@ def checkObsPaired(dbhandle, station_code, obs_id): return True -def addPairedObs(dbhandle,station_code, obs_id, obs_date, commitnow=True): +def addPairedObs(dbhandle,station_code, obs_id, obs_date, commitnow=True, verbose=False): """ addPairedObs - add a potentially paired Observation to the database @@ -82,22 +82,24 @@ def addPairedObs(dbhandle,station_code, obs_id, obs_date, commitnow=True): cur = dbhandle.cursor() res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") if res.fetchone() is None: - log.info(f'adding {obs_id} to paired_obs table') + if verbose: + log.info(f'adding {obs_id} to paired_obs table') sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" else: - log.info(f'updating {obs_id} in paired_obs table') + if verbose: + log.info(f'updating {obs_id} in paired_obs table') sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" cur.execute(sqlstr) cur.close() if commitnow: dbhandle.commit() if not checkObsPaired(dbhandle, station_code, obs_id): - log.info(f'failed to add {obs_id} to paired_obs table') + log.warning(f'failed to add {obs_id} to paired_obs table') return False return True -def unpairObs(dbhandle, station_code, obs_id): +def unpairObs(dbhandle, station_code, obs_id, verbose=False): """ unpairObs - mark an observation unpaired by setting the status to zero @@ -108,6 +110,8 @@ def unpairObs(dbhandle, station_code, obs_id): """ cur = dbhandle.cursor() + if verbose: + log.info(f'unpairing {obs_id}') cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") dbhandle.commit() cur.close() From 1089a1b1fc14ec10df77c161df9879478fc3665c Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 20 Jan 2026 11:38:58 +0000 Subject: [PATCH 013/132] remove unnecessary logging --- wmpl/Trajectory/CorrelateEngine.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 8a76a852..a8b3396e 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1740,7 +1740,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info("Max convergence angle too small: {:.1f} < {:.1f} deg".format(qc_max, self.traj_constraints.min_qc)) for _, met_obs_temp, _ in matched_observations: - log.info(f'Marking {met_obs_temp.id} unpaired') unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) continue @@ -1807,7 +1806,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") for _, met_obs_temp, _ in matched_observations: - log.info(f'Marking {met_obs_temp.id} unpaired') unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) continue From e4320a51471a74470fe6877475f002973b19afd3 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 21 Jan 2026 21:05:22 +0000 Subject: [PATCH 014/132] make obs database a class --- wmpl/Trajectory/CorrelateDB.py | 273 +++++++++++++++-------------- wmpl/Trajectory/CorrelateEngine.py | 27 ++- wmpl/Trajectory/CorrelateRMS.py | 24 ++- 3 files changed, 164 insertions(+), 160 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index c5e273da..4e51b04f 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -12,137 +12,144 @@ log = logging.getLogger("traj_correlator") -def openObsDatabase(db_path, db_name='observations'): - """ - openObsDatabase - open the observations sqlite database and return a database handle - - The database is created if it doesn't exist. - - :param db_path: the path to the database - :param db_name: the name of the database to open, default 'observations' - :return: database handle - """ - - db_full_name = os.path.join(db_path, f'{db_name}.db') - log.info(f'opening database {db_full_name}') - con = sqlite3.connect(db_full_name) - cur = con.cursor() - res = cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") - if res.fetchone() is None: - cur.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") - con.commit() - cur.close() - return con - - -def commitObsDatabase(dbhandle): - """ commit the obs db - """ - dbhandle.commit() - return - - -def closeObsDatabase(dbhandle): - dbhandle.commit() - dbhandle.close() - return - - -def checkObsPaired(dbhandle, station_code, obs_id): - """ - checkObsPaired - check if an observation is already paired - - :param dbhandle: the database - :param station_code: the station ID - :param obs_id; the observation id - :return: true if matched, false otherwise - - """ - cur = dbhandle.cursor() - res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") - if res.fetchone() is None: - return False - cur.close() - return True - - -def addPairedObs(dbhandle,station_code, obs_id, obs_date, commitnow=True, verbose=False): - """ - addPairedObs - add a potentially paired Observation to the database - - :param dbhandle: database connection handle - :param station_code: station code eg UK12345 - :param obs_id: met_obs observation ID - :param obs_date: observation date/time - :param commitnow: boolean true to force commit immediately - - :return: true if successful, false if the object already exists - :rtype: bool - """ - cur = dbhandle.cursor() - res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") - if res.fetchone() is None: +class ObservationDatabase(): + def __init__(self, db_path, db_name='observations'): + self.dbhandle = self.openObsDatabase(db_path, db_name) + + def openObsDatabase(self, db_path, db_name='observations'): + """ + openObsDatabase - open the observations sqlite database and return a database handle + + The database is created if it doesn't exist. + + :param db_path: the path to the database + :param db_name: the name of the database to open, default 'observations' + :return: database handle + """ + + db_full_name = os.path.join(db_path, f'{db_name}.db') + log.info(f'opening database {db_full_name}') + con = sqlite3.connect(db_full_name) + cur = con.cursor() + res = cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") + if res.fetchone() is None: + cur.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") + con.commit() + cur.close() + return con + + def commitObsDatabase(self): + """ commit the obs db + """ + self.dbhandle.commit() + return + + def closeObsDatabase(self): + self.dbhandle.commit() + self.dbhandle.close() + return + + + def checkObsPaired(self, station_code, obs_id): + """ + checkObsPaired - check if an observation is already paired + + :param dbhandle: the database + :param station_code: the station ID + :param obs_id; the observation id + :return: true if matched, false otherwise + + """ + cur = self.dbhandle.cursor() + res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") + if res.fetchone() is None: + return False + cur.close() + return True + + + def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=False): + """ + addPairedObs - add a potentially paired Observation to the database + + :param dbhandle: database connection handle + :param station_code: station code eg UK12345 + :param obs_id: met_obs observation ID + :param obs_date: observation date/time + :param commitnow: boolean true to force commit immediately + + :return: true if successful, false if the object already exists + :rtype: bool + """ + cur = self.dbhandle.cursor() + res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") + if res.fetchone() is None: + if verbose: + log.info(f'adding {obs_id} to paired_obs table') + sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" + else: + if verbose: + log.info(f'updating {obs_id} in paired_obs table') + sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" + cur.execute(sqlstr) + cur.close() + if commitnow: + self.dbhandle.commit() + if not self.checkObsPaired(station_code, obs_id): + log.warning(f'failed to add {obs_id} to paired_obs table') + return False + return True + + + def unpairObs(self, station_code, obs_id, verbose=False): + """ + unpairObs - mark an observation unpaired by setting the status to zero + + :param dbhandle: the database + :param station_code: the station ID + :param obs_id; the observation id + + """ + + cur = self.dbhandle.cursor() if verbose: - log.info(f'adding {obs_id} to paired_obs table') - sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" - else: - if verbose: - log.info(f'updating {obs_id} in paired_obs table') - sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" - cur.execute(sqlstr) - cur.close() - if commitnow: - dbhandle.commit() - if not checkObsPaired(dbhandle, station_code, obs_id): - log.warning(f'failed to add {obs_id} to paired_obs table') - return False - return True - - -def unpairObs(dbhandle, station_code, obs_id, verbose=False): - """ - unpairObs - mark an observation unpaired by setting the status to zero - - :param dbhandle: the database - :param station_code: the station ID - :param obs_id; the observation id - - """ - - cur = dbhandle.cursor() - if verbose: - log.info(f'unpairing {obs_id}') - cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") - dbhandle.commit() - cur.close() - return True - - -def archiveObsDatabase(dbhandle, db_path, arch_prefix, archdate_jd): - # create the database if it doesnt exist - archdb_name = f'{arch_prefix}_observations' - archdb = openObsDatabase(db_path, archdb_name) - closeObsDatabase(archdb) - - # attach the arch db and copy the records then delete them - cur = dbhandle.cursor() - archdb_fullname = os.path.join(db_path, f'{archdb_name}.db') - cur.execute(f"attach database '{archdb_fullname}' as archdb") - try: - cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') - except Exception: - log.info('Some records already exist in archdb, doing row-wise copy') - cur.execute(f'select * from paired_obs where obs_date < {archdate_jd}') - for row in cur.fetchall(): - try: - cur.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]},{row[3]})") - except Exception: - log.info(f'{row[1]} already exists in target') - - cur.execute(f'delete from paired_obs where obs_date < {archdate_jd}') - commitObsDatabase(dbhandle) - cur.close() - return + log.info(f'unpairing {obs_id}') + try: + cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") + except Exception: + # obs wasn't in the database so no need to unpair it + pass + self.dbhandle.commit() + cur.close() + return True + + + def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): + # create the database if it doesnt exist + archdb_name = f'{arch_prefix}_observations' + archdb = self.openObsDatabase(db_path, archdb_name) + archdb.commit() + archdb.close() + + # attach the arch db and copy the records then delete them + cur = self.dbhandle.cursor() + archdb_fullname = os.path.join(db_path, f'{archdb_name}.db') + cur.execute(f"attach database '{archdb_fullname}' as archdb") + try: + cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') + except Exception: + log.info('Some records already exist in archdb, doing row-wise copy') + cur.execute(f'select * from paired_obs where obs_date < {archdate_jd}') + for row in cur.fetchall(): + try: + cur.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]},{row[3]})") + except Exception: + log.info(f'{row[1]} already exists in target') + + cur.execute(f'delete from paired_obs where obs_date < {archdate_jd}') + self.commitObsDatabase() + cur.close() + return def openTrajDatabase(db_path, db_name='processed_trajectories'): @@ -207,14 +214,14 @@ def openTrajDatabase(db_path, db_name='processed_trajectories'): action = cml_args.action.lower() if dbname == 'observations': - dbhandle = openObsDatabase(cml_args.dir_path) + obsdb = ObservationDatabase(cml_args.dir_path) if action == 'read': - cur = dbhandle.cursor() + cur = obsdb.dbhandle.cursor() cur.execute('select * from paired_obs where status=1') print(f'there are {len(cur.fetchall())} paired obs') cur.execute('select * from paired_obs where status=0') print(f'and {len(cur.fetchall())} unpaired obs') - closeObsDatabase(dbhandle) + obsdb.closeObsDatabase() elif dbname == 'trajectories': print('hello') else: diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index a8b3396e..a36b39f3 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -21,7 +21,6 @@ from wmpl.Utils.TrajConversions import J2000_JD, geo2Cartesian, cartesian2Geo, raDec2AltAz, altAz2RADec, \ raDec2ECI, datetime2JD, jd2Date, equatorialCoordPrecession_vect from wmpl.Utils.Pickling import loadPickle, savePickle -from wmpl.Trajectory.CorrelateDB import addPairedObs, unpairObs CANDMODE_NONE = 0 CANDMODE_SAVE = 1 @@ -883,8 +882,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o log.info("Trajectory skipped and added to fails!") if matched_obs: for _, met_obs_temp, _ in matched_obs: - log.info(f'Marking {met_obs_temp.id} unpaired') - unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) + # log.info(f'Marking {met_obs_temp.id} unpaired') + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -899,7 +898,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref) for _, met_obs_temp, _ in matched_obs: log.info(f'Marking {met_obs_temp.id} unpaired') - unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) return False @@ -1337,7 +1336,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca else: for met_obs_temp, _ in candidate_observations: - unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) log.info("New trajectory solution failed, keeping the old trajectory...") ### ### @@ -1438,7 +1437,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Mark observations as processed for _, met_obs_temp, _ in matched_observations: met_obs_temp.processed = True - if addPairedObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): + if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): remaining_unpaired -= 1 # Store candidate trajectories @@ -1481,7 +1480,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Check for pairs found_first_pair = False for j, traj_cand_test in enumerate(candidate_trajectories[(i + 1):]): - # Skip same observations if traj_cand_ref[0] == traj_cand_test[0]: continue @@ -1520,9 +1518,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca ra_mean_test = meanAngle([ra for ra, _ in plane_radiants_test]) dec_mean_test = np.mean([dec for _, dec in plane_radiants_test]) - # Skip the mergning attempt if the estimated radiants are too far off + # Skip the merging attempt if the estimated radiants are too far off if np.degrees(angleBetweenSphericalCoords(dec_mean_ref, ra_mean_ref, dec_mean_test, ra_mean_test)) > self.traj_constraints.max_merge_radiant_angle: - continue @@ -1535,9 +1532,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca for entry in traj_cand_test: # Make sure the added observation is not from a station that's already added - if entry[1].station_code in ref_stations: - continue - + #if entry[1].station_code in ref_stations: + # print('station code already in ref stations') + # continue if entry[1] not in obs_list_ref: # Print the reference and the merged radiants @@ -1554,6 +1551,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca traj_cand_ref.append(entry) log.info("Merged radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_test), np.degrees(dec_mean_test))) + log.info(f'Candidate contains {len(traj_cand_ref)} obs') @@ -1572,6 +1570,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info("-----------------------") log.info('CHECKING FOR ALREADY-FAILED CANDIDATES') log.info("-----------------------") + candidate_trajectories, remaining_unpaired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, remaining_unpaired) log.info("-----------------------") @@ -1740,7 +1739,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info("Max convergence angle too small: {:.1f} < {:.1f} deg".format(qc_max, self.traj_constraints.min_qc)) for _, met_obs_temp, _ in matched_observations: - unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) continue @@ -1806,7 +1805,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") for _, met_obs_temp, _ in matched_observations: - unpairObs(self.dh.observations_db, met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 14c0763d..aa76e5aa 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -29,8 +29,7 @@ from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date from wmpl.Utils.remoteDataHandling import collectRemoteData, moveRemoteData, uploadDataToRemote -from wmpl.Trajectory.CorrelateDB import openObsDatabase, closeObsDatabase, commitObsDatabase, archiveObsDatabase -from wmpl.Trajectory.CorrelateDB import checkObsPaired, addPairedObs, unpairObs +from wmpl.Trajectory.CorrelateDB import ObservationDatabase from wmpl.Trajectory.Trajectory import Trajectory ### CONSTANTS ### @@ -554,7 +553,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) self.db = DatabaseJSON(database_path, verbose=self.verbose) - self.observations_db = openObsDatabase(db_dir, 'observations') + self.observations_db = ObservationDatabase(db_dir, 'observations') if hasattr(self.db, 'paired_obs') and len(self.db.paired_obs) > 0: log.info('-----------------------------') log.info('moving observations to sqlite - this may take some time....') @@ -566,15 +565,15 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') except Exception: obs_date = datetime.datetime(2000,1,1,0,0,0) - addPairedObs(self.observations_db, stat_id, obs_id, obs_date, commitnow=False) + self.observations_db.addPairedObs(stat_id, obs_id, obs_date, commitnow=False) i += 1 - if i % 1000: + if not i % 100000: log.info(f'moved {i} observations') del self.db.paired_obs - commitObsDatabase(self.observations_db) - self.saveDatabase() + self.observations_db.commitObsDatabase() log.info(f'done - moved {i} observations') log.info('-----------------------------') + self.saveDatabase() if archivemonths != 0: log.info('Archiving older entries....') try: @@ -675,7 +674,7 @@ def __init__(self, station, obs_id): archdate = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=older_than) archdate_jd = datetime2JD(archdate) - archiveObsDatabase(self.observations_db, self.db_dir, archdate.strftime("%Y%m"), archdate_jd) + self.observations_db.archiveObsDatabase(self.db_dir, archdate.strftime("%Y%m"), archdate_jd) arch_db_path = os.path.join(self.db_dir, f'{archdate.strftime("%Y%m")}_{JSON_DB_NAME}') archdb = DatabaseJSON(arch_db_path, verbose=self.verbose, archiveYM=archdate.strftime("%Y%m")) @@ -906,7 +905,7 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): continue # Add only unpaired observations - if not checkObsPaired(self.observations_db, met_obs.station_code, met_obs.id): + if not self.observations_db.checkObsPaired(met_obs.station_code, met_obs.id): # print(" ", station_code, met_obs.reference_dt, rel_proc_path) added_count += 1 unpaired_met_obs_list.append(met_obs) @@ -1500,8 +1499,7 @@ def cleanupPhase2TempPickle(self, traj, success=False): return def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired): - # TODO make this function work! - # wants to go through the candidates and check if they correspond to already-failed + # go through the candidates and check if they correspond to already-failed candidate_trajectories=[] for cand in matched_observations: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in cand]) @@ -1531,7 +1529,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire log.info('--------') log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: - unpairObs(self.observations_db, met_obs_temp.station_code, met_obs_temp.id) + self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) remaining_unpaired -= 1 else: candidate_trajectories.append(cand) @@ -2033,7 +2031,7 @@ def _breakHandler(signum, frame): bin_time_range = [bin_beg, bin_end] tc.run(event_time_range=event_time_range, mcmode=cml_args.mcmode, bin_time_range=bin_time_range, candidatemode=cml_args.candmode) - closeObsDatabase(dh.observations_db) + dh.observations_db.closeObsDatabase() else: # there were no datasets to process log.info('no data to process yet') From 75f11eab78a69b1fedf0f666a4ccca06931a4b4e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 21 Jan 2026 21:11:18 +0000 Subject: [PATCH 015/132] improve documentation --- wmpl/Trajectory/CorrelateDB.py | 58 ++++++++++------------------------ 1 file changed, 17 insertions(+), 41 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 4e51b04f..bb159ced 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -11,21 +11,17 @@ log = logging.getLogger("traj_correlator") +# classes to handle the Observation and Trajectory databases + class ObservationDatabase(): + + # A class to handle the sqlite observations database transparently. + def __init__(self, db_path, db_name='observations'): self.dbhandle = self.openObsDatabase(db_path, db_name) def openObsDatabase(self, db_path, db_name='observations'): - """ - openObsDatabase - open the observations sqlite database and return a database handle - - The database is created if it doesn't exist. - - :param db_path: the path to the database - :param db_name: the name of the database to open, default 'observations' - :return: database handle - """ db_full_name = os.path.join(db_path, f'{db_name}.db') log.info(f'opening database {db_full_name}') @@ -51,15 +47,8 @@ def closeObsDatabase(self): def checkObsPaired(self, station_code, obs_id): - """ - checkObsPaired - check if an observation is already paired - - :param dbhandle: the database - :param station_code: the station ID - :param obs_id; the observation id - :return: true if matched, false otherwise - - """ + # return True if there is an observation with the correct station code, id and status = 1 + cur = self.dbhandle.cursor() res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") if res.fetchone() is None: @@ -69,18 +58,7 @@ def checkObsPaired(self, station_code, obs_id): def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=False): - """ - addPairedObs - add a potentially paired Observation to the database - - :param dbhandle: database connection handle - :param station_code: station code eg UK12345 - :param obs_id: met_obs observation ID - :param obs_date: observation date/time - :param commitnow: boolean true to force commit immediately - - :return: true if successful, false if the object already exists - :rtype: bool - """ + # add or update an entry in the database, setting status = 1 cur = self.dbhandle.cursor() res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") if res.fetchone() is None: @@ -102,15 +80,9 @@ def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=F def unpairObs(self, station_code, obs_id, verbose=False): - """ - unpairObs - mark an observation unpaired by setting the status to zero - - :param dbhandle: the database - :param station_code: the station ID - :param obs_id; the observation id - - """ - + # if an entry exists, update the status to 0. + # this allows us to mark an observation paired, then unpair it later if the solution fails + # or we want to force a rerun. cur = self.dbhandle.cursor() if verbose: log.info(f'unpairing {obs_id}') @@ -125,19 +97,23 @@ def unpairObs(self, station_code, obs_id, verbose=False): def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): - # create the database if it doesnt exist + # archive records older than archdate_jd to a database {arch_prefix}_observations.db + + # create the database and table if it doesnt exist archdb_name = f'{arch_prefix}_observations' archdb = self.openObsDatabase(db_path, archdb_name) archdb.commit() archdb.close() - # attach the arch db and copy the records then delete them + # attach the arch db, copy the records then delete them cur = self.dbhandle.cursor() archdb_fullname = os.path.join(db_path, f'{archdb_name}.db') cur.execute(f"attach database '{archdb_fullname}' as archdb") try: + # bulk-copy if possible cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') except Exception: + # otherwise, one by one log.info('Some records already exist in archdb, doing row-wise copy') cur.execute(f'select * from paired_obs where obs_date < {archdate_jd}') for row in cur.fetchall(): From 405d6c0cb74439f1ef03fdc71ac3e220afbbe64f Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 21 Jan 2026 21:15:07 +0000 Subject: [PATCH 016/132] some tidying up --- wmpl/Trajectory/CorrelateDB.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index bb159ced..d0f9dc6e 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -22,6 +22,7 @@ def __init__(self, db_path, db_name='observations'): self.dbhandle = self.openObsDatabase(db_path, db_name) def openObsDatabase(self, db_path, db_name='observations'): + # open the database, creating it and adding the required table if necessary db_full_name = os.path.join(db_path, f'{db_name}.db') log.info(f'opening database {db_full_name}') @@ -35,26 +36,29 @@ def openObsDatabase(self, db_path, db_name='observations'): return con def commitObsDatabase(self): - """ commit the obs db - """ + # commit the obs db. This function exists so we can do lazy writes in some cases + self.dbhandle.commit() return def closeObsDatabase(self): + # close the database, making sure we commit any pending updates + self.dbhandle.commit() self.dbhandle.close() return def checkObsPaired(self, station_code, obs_id): - # return True if there is an observation with the correct station code, id and status = 1 + # return True if there is an observation with the correct station code, obs id and with status = 1 + paired = True cur = self.dbhandle.cursor() res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") if res.fetchone() is None: - return False + paired = False cur.close() - return True + return paired def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=False): @@ -71,6 +75,7 @@ def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=F sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" cur.execute(sqlstr) cur.close() + if commitnow: self.dbhandle.commit() if not self.checkObsPaired(station_code, obs_id): @@ -83,15 +88,17 @@ def unpairObs(self, station_code, obs_id, verbose=False): # if an entry exists, update the status to 0. # this allows us to mark an observation paired, then unpair it later if the solution fails # or we want to force a rerun. - cur = self.dbhandle.cursor() if verbose: log.info(f'unpairing {obs_id}') + + cur = self.dbhandle.cursor() try: cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") + self.dbhandle.commit() except Exception: # obs wasn't in the database so no need to unpair it pass - self.dbhandle.commit() + cur.close() return True From 81a29695b1d7aa83289a2c9da513b9e8f18a402e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 21 Jan 2026 22:51:49 +0000 Subject: [PATCH 017/132] simplify migration to sqlite --- wmpl/Trajectory/CorrelateRMS.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index aa76e5aa..e9388e84 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -553,27 +553,14 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) self.db = DatabaseJSON(database_path, verbose=self.verbose) - self.observations_db = ObservationDatabase(db_dir, 'observations') + + self.observations_db = ObservationDatabase(db_dir) + # move any legacy paired obs data into sqlite if hasattr(self.db, 'paired_obs') and len(self.db.paired_obs) > 0: - log.info('-----------------------------') - log.info('moving observations to sqlite - this may take some time....') - i = 0 - keylist = self.db.paired_obs.keys() - for stat_id in keylist: - for obs_id in self.db.paired_obs[stat_id]: - try: - obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') - except Exception: - obs_date = datetime.datetime(2000,1,1,0,0,0) - self.observations_db.addPairedObs(stat_id, obs_id, obs_date, commitnow=False) - i += 1 - if not i % 100000: - log.info(f'moved {i} observations') + self.observations_db.moveJsonRecords(self.db.paired_obs) del self.db.paired_obs - self.observations_db.commitObsDatabase() - log.info(f'done - moved {i} observations') - log.info('-----------------------------') self.saveDatabase() + if archivemonths != 0: log.info('Archiving older entries....') try: From 9a71d85bb448b873e9b2fbaff852477b88927200 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 21 Jan 2026 22:52:04 +0000 Subject: [PATCH 018/132] keep track of which traj we're doing --- wmpl/Trajectory/CorrelateEngine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index a36b39f3..296cb62c 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1662,10 +1662,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info("") # Go through all candidate trajectories and compute the complete trajectory solution - for matched_observations in candidate_trajectories: + for i, matched_observations in enumerate(candidate_trajectories): log.info("") log.info("-----------------------") + log.info(f'processing candidate {i+1}') # if mcmode is not 2, prepare to calculate the intersecting planes solutions From 297da7cc9ec28bd97f433ca77a8180eb77227f9d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 21 Jan 2026 22:52:31 +0000 Subject: [PATCH 019/132] improve archiving and add merging support --- wmpl/Trajectory/CorrelateDB.py | 72 ++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index d0f9dc6e..eeb0129c 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -11,23 +11,28 @@ log = logging.getLogger("traj_correlator") +############################################################ # classes to handle the Observation and Trajectory databases +############################################################ class ObservationDatabase(): # A class to handle the sqlite observations database transparently. - def __init__(self, db_path, db_name='observations'): - self.dbhandle = self.openObsDatabase(db_path, db_name) + def __init__(self, db_path, db_name='observations.db', purge_records=False): + self.dbhandle = self.openObsDatabase(db_path, db_name, purge_records) - def openObsDatabase(self, db_path, db_name='observations'): - # open the database, creating it and adding the required table if necessary + def openObsDatabase(self, db_path, db_name='observations.db', purge_records=False): + # Open the database, creating it and adding the required table if necessary. + # If purge_records is true, delete any existing records. - db_full_name = os.path.join(db_path, f'{db_name}.db') + db_full_name = os.path.join(db_path, f'{db_name}') log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) cur = con.cursor() + if purge_records: + cur.execute('drop table paired_obs') res = cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") if res.fetchone() is None: cur.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") @@ -107,21 +112,20 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): # archive records older than archdate_jd to a database {arch_prefix}_observations.db # create the database and table if it doesnt exist - archdb_name = f'{arch_prefix}_observations' + archdb_name = f'{arch_prefix}_observations.db' archdb = self.openObsDatabase(db_path, archdb_name) archdb.commit() archdb.close() # attach the arch db, copy the records then delete them cur = self.dbhandle.cursor() - archdb_fullname = os.path.join(db_path, f'{archdb_name}.db') + archdb_fullname = os.path.join(db_path, f'{archdb_name}') cur.execute(f"attach database '{archdb_fullname}' as archdb") try: # bulk-copy if possible - cur.execute(f'insert into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') + cur.execute(f'insert or replace into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') except Exception: # otherwise, one by one - log.info('Some records already exist in archdb, doing row-wise copy') cur.execute(f'select * from paired_obs where obs_date < {archdate_jd}') for row in cur.fetchall(): try: @@ -134,9 +138,55 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): cur.close() return + def moveJsonRecords(self, paired_obs): + log.info('-----------------------------') + log.info('moving observations to sqlite - this may take some time....') + i = 0 + keylist = paired_obs.keys() + for stat_id in keylist: + for obs_id in paired_obs[stat_id]: + try: + obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') + except Exception: + obs_date = datetime.datetime(2000,1,1,0,0,0) + self.addPairedObs(stat_id, obs_id, obs_date, commitnow=False) + i += 1 + if not i % 100000: + log.info(f'moved {i} observations') + self.commitObsDatabase() + log.info(f'done - moved {i} observations') + log.info('-----------------------------') + + return + + def mergeObsDatabase(self, source_db_path): + # merge in records from another observation database, for example from a remote node + + if not os.path.isfile(source_db_path): + log.warning(f'source database missing: {source_db_path}') + return + # attach the other db, copy the records then detach it + cur = self.dbhandle.cursor() + cur.execute(f"attach database '{source_db_path}' as sourcedb") + try: + # bulk-copy if possible + cur.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') + except Exception: + # otherwise, one by one + log.info('Some records already exist, doing row-wise copy') + cur.execute('select * from sourcedb.paired_obs') + for row in cur.fetchall(): + self.addPairedObs(row[0], row[1],row[2]) + self.commitObsDatabase() + cur.execute("detach database 'sourcedb'") + cur.close() + return + + +############################################################ -def openTrajDatabase(db_path, db_name='processed_trajectories'): - db_full_name = os.path.join(db_path, f'{db_name}.db') +def openTrajDatabase(db_path, db_name='processed_trajectories.db'): + db_full_name = os.path.join(db_path, f'{db_name}') log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) cur = con.cursor() From f1f5f5e1298e275ba7f69e909229e838dbc4c74e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 24 Jan 2026 13:28:12 +0000 Subject: [PATCH 020/132] updates to improve handling of operation modes and remote data --- wmpl/Trajectory/CorrelateEngine.py | 82 +++++++---- wmpl/Trajectory/CorrelateRMS.py | 96 +++++++----- wmpl/Utils/remoteDataHandling.py | 229 ++++++++++++++++------------- 3 files changed, 245 insertions(+), 162 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 296cb62c..85feb48f 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -22,13 +22,13 @@ raDec2ECI, datetime2JD, jd2Date, equatorialCoordPrecession_vect from wmpl.Utils.Pickling import loadPickle, savePickle -CANDMODE_NONE = 0 -CANDMODE_SAVE = 1 -CANDMODE_LOAD = 2 - MCMODE_NONE = 0 MCMODE_PHASE1 = 1 MCMODE_PHASE2 = 2 +MCMODE_CANDS = 4 +MCMODE_ALL = MCMODE_CANDS + MCMODE_PHASE1 + MCMODE_PHASE2 +MCMODE_SIMPLE = MCMODE_CANDS + MCMODE_PHASE1 + # Grab the logger from the main thread log = logging.getLogger("traj_correlator") @@ -642,7 +642,7 @@ def initTrajectory(self, jdt_ref, mc_runs, verbose=False): return traj - def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, orig_traj=None): + def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, orig_traj=None): """ Given an initialized Trajectory object with observation, run the solver and automatically reject bad observations. @@ -671,9 +671,10 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o # make a note of how many observations are already marked ignored. initial_ignore_count = len([obs for obs in traj.observations if obs.ignore_station]) log.info(f'initially ignoring {initial_ignore_count} stations...') + successful_traj_fit = False - # run the first phase of the solver if mcmode is MCMODE_NONE or MCMODE_PHASE1 - if mcmode != MCMODE_PHASE2: + # run the first phase of the solver if mcmode is MCMODE_PHASE1 + if mcmode & MCMODE_PHASE1: # Disable Monte Carlo runs until an initial stable set of observations is found traj.monte_carlo = False @@ -841,14 +842,21 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o # Disable Monte Carlo runs until an initial stable set of observations is found traj.monte_carlo = False - # Reinitialize the observations, rejecting the ignored stations + # Reinitialize the observations. Note we *include* the ignored obs as they're internally marked ignored + # and so will be skipped, but to avoid confusion in the logs we only print the names of the non-ignored ones for obs in traj_status.observations: + traj.infillWithObs(obs) if not obs.ignore_station: log.info(f'Adding {obs.station_id}') - traj.infillWithObs(obs) log.info("") - log.info(f'Rerunning the trajectory solution with {len(traj.observations)} stations...') + active_stns = len([obs for obs in traj.observations if not obs.ignore_station]) + if active_stns < 2: + log.info(f"Only {active_stns} stations left - trajectory estimation failed!") + skip_trajectory = True + break + + log.info(f'Rerunning the trajectory solution with {active_stns} stations...') # Re-run the trajectory solution try: traj_status = traj.run() @@ -879,10 +887,10 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref) - log.info("Trajectory skipped and added to fails!") + log.info(f"Trajectory at {jdt_ref} skipped and added to fails!") if matched_obs: for _, met_obs_temp, _ in matched_obs: - # log.info(f'Marking {met_obs_temp.id} unpaired') + log.info(f'Marking {met_obs_temp.id} unpaired') self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) return False @@ -922,14 +930,15 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o else: shower_code = shower_obj.IAU_code log.info("Shower: {:s}".format(shower_code)) + + if mcmode & MCMODE_PHASE1: successful_traj_fit = True log.info('finished initial solution') ##### end of simple soln phase ##### now run the Monte-carlo phase, if the mcmode is 0 (do both) or 2 (mc-only) - if mcmode == MCMODE_NONE or mcmode == MCMODE_PHASE2: - if mcmode == MCMODE_PHASE2: - traj_status = traj + if mcmode & MCMODE_PHASE2: + traj_status = traj # save the traj in case we need to clean it up save_traj = traj @@ -1069,7 +1078,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o if orig_traj: log.info(f"Removing the previous solution {os.path.dirname(orig_traj.traj_file_path)} ...") - self.dh.removeTrajectory(orig_traj) + remove_phase1 = True if abs(round((traj.jdt_ref-orig_traj.jdt_ref)*86400000,0)) > 0 else False + self.dh.removeTrajectory(orig_traj, remove_phase1) traj.pre_mc_longname = os.path.split(self.dh.generateTrajOutputDirectoryPath(orig_traj, make_dirs=False))[-1] log.info('Saving trajectory....') @@ -1086,7 +1096,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_NONE, matched_obs=None, o return successful_traj_fit - def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, candidatemode=CANDMODE_NONE): + def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): """ Run meteor corellation using available data. Keyword arguments: @@ -1100,12 +1110,13 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca mcmodestr = ' - MONTE CARLO STAGE' elif mcmode == MCMODE_PHASE1: mcmodestr = ' - SIMPLE STAGE' + elif mcmode == MCMODE_CANDS: + mcmodestr = ' - CANDIDATE STAGE' else: - mcmodestr = ' ' - self.candidatemode = candidatemode + mcmodestr = 'FULL SOLVER' if mcmode != MCMODE_PHASE2: - if candidatemode != CANDMODE_LOAD: + if mcmode & MCMODE_CANDS: # Get unpaired observations, filter out observations with too little points and sort them by time unpaired_observations_all = self.dh.getUnpairedObservations() unpaired_observations_all = [mettmp for mettmp in unpaired_observations_all @@ -1138,7 +1149,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # Data will be divided into time bins, so the pairing function doesn't have to go pair many # observations at once and keep all pairs in memory else: - if candidatemode != CANDMODE_LOAD: + if mcmode & MCMODE_CANDS: dt_beg = unpaired_observations_all[0].reference_dt dt_end = unpaired_observations_all[-1].reference_dt else: @@ -1163,6 +1174,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info("---------------------------------") log.info("") + log.info(f'mcmode is {mcmode}') # Go though all time bins and split the list of observations for bin_beg, bin_end in dt_bin_list: @@ -1172,7 +1184,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca # if we're in MC mode 0 or 1 we have to find the candidate trajectories if mcmode != MCMODE_PHASE2: ## we are in candidatemode mode 0 or 1 and want to find candidates - if self.candidatemode != CANDMODE_LOAD: + if mcmode & MCMODE_CANDS: log.info("") log.info("-----------------------------------") log.info(" PAIRING TRAJECTORIES IN TIME BIN:") @@ -1332,7 +1344,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info("Remove paired observations from the processing list...") for _, met_obs_temp in candidate_observations: unpaired_observations.remove(met_obs_temp) - remaining_unpaired -= 1 + if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): + remaining_unpaired -= 1 + else: for met_obs_temp, _ in candidate_observations: @@ -1577,8 +1591,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca log.info(f'There are {remaining_unpaired} remaining unpaired observations in this bucket.') log.info("-----------------------") - # in candidatemode mode 1 we want to save the candidates to disk - if self.candidatemode == CANDMODE_SAVE: + # in candidate mode we want to save the candidates to disk + if mcmode == MCMODE_CANDS: self.getCandidateFolders() log.info("-----------------------") log.info('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) @@ -1739,9 +1753,25 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_NONE, ca if qc_max < self.traj_constraints.min_qc: log.info("Max convergence angle too small: {:.1f} < {:.1f} deg".format(qc_max, self.traj_constraints.min_qc)) + + # create a traj object to add to the failed database so we don't try to recompute this one again + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + jdt_ref = datetime2JD(ref_dt) + + failed_traj = self.initTrajectory(jdt_ref, 0, verbose=False) + for obs_temp, met_obs, _ in matched_observations: + failed_traj.infillWithObs(obs_temp) + + t0 = min([obs.time_data[0] for obs in failed_traj.observations if (not obs.ignore_station) + or (not np.all(obs.ignore_list))]) + if t0 != 0.0: + failed_traj.jdt_ref = failed_traj.jdt_ref + t0/86400.0 + + self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref) + for _, met_obs_temp, _ in matched_observations: self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) - + log.info("Trajectory skipped and added to fails!") continue diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index e9388e84..c2bbe6e6 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -23,15 +23,16 @@ from wmpl.Formats.CAMS import loadFTPDetectInfo from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints -from wmpl.Trajectory.CorrelateEngine import MCMODE_NONE, MCMODE_PHASE2, CANDMODE_LOAD, CANDMODE_SAVE from wmpl.Utils.Math import generateDatetimeBins from wmpl.Utils.OSTools import mkdirP from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date -from wmpl.Utils.remoteDataHandling import collectRemoteData, moveRemoteData, uploadDataToRemote +from wmpl.Utils.remoteDataHandling import RemoteDataHandler from wmpl.Trajectory.CorrelateDB import ObservationDatabase from wmpl.Trajectory.Trajectory import Trajectory +from wmpl.Trajectory.CorrelateEngine import MCMODE_CANDS, MCMODE_PHASE1, MCMODE_PHASE2, MCMODE_ALL + ### CONSTANTS ### # Name of the ouput trajectory directory @@ -80,6 +81,10 @@ def __init__(self, traj_file_path, json_dict=None, traj_obj=None): except FileNotFoundError: log.info("Pickle file not found: " + traj_file_path) return None + + finally: + log.info("Pickle file could not be loaded: " + traj_file_path) + return None else: @@ -338,7 +343,7 @@ def removeTrajectory(self, traj_reduced, keepFolder=False): traj_dir = os.path.dirname(traj_reduced.traj_file_path) shutil.rmtree(traj_dir, ignore_errors=True) if os.path.isfile(traj_reduced.traj_file_path): - log.info(f'unable to remove {traj_dir}') + log.info(f'unable to remove {traj_dir}') @@ -487,7 +492,7 @@ def __init__(self, **entries): class RMSDataHandle(object): - def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode=MCMODE_NONE, max_trajs=1000, remotehost=None, verbose=False, archivemonths=3): + def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode=MCMODE_ALL, max_trajs=1000, remotehost=None, verbose=False, archivemonths=3): """ Handles data interfacing between the trajectory correlator and RMS data files on disk. Arguments: @@ -500,6 +505,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode database file will be loaded from the dir_path. output_dir: [str] Path to the directory where the output files will be saved. None by default, in which case the output files will be saved in the dir_path. + mcmode: [int] the operation mode, candidates, phase1 simple solns, mc phase or a combination max_trajs: [int] maximum number of phase1 trajectories to load at a time when adding uncertainties. Improves throughput. """ @@ -533,7 +539,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.phase1_dir = os.path.join(self.output_dir, 'phase1') # create the directory for phase1 simple trajectories, if needed - if self.mc_mode > MCMODE_NONE: + if self.mc_mode & MCMODE_PHASE1: mkdirP(os.path.join(self.phase1_dir, 'processed')) self.purgePhase1ProcessedData(os.path.join(self.phase1_dir, 'processed')) @@ -546,9 +552,11 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # Load database of processed folders database_path = os.path.join(self.db_dir, JSON_DB_NAME) log.info("") + remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') + self.remotedatahandler = RemoteDataHandler(remote_cfg) # move any remotely calculated pickles to their target locations if os.path.isdir(os.path.join(self.output_dir, 'remoteuploads')): - moveRemoteData(self.output_dir) + self.remotedatahandler.moveRemoteData(self.output_dir) if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) @@ -581,7 +589,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: # retrieve pickles from a remote host, if configured if self.remotehost is not None: - collectRemoteData(remotehost, max_trajs, self.phase1_dir) + self.remotedatahandler.collectRemoteData(remotehost, max_trajs, self.phase1_dir) # reload the phase1 trajectories dt_beg, dt_end = self.loadPhase1Trajectories(max_trajs=max_trajs) @@ -1382,7 +1390,7 @@ def saveTrajectoryResults(self, traj, save_plots): # if additional observations are found then the refdt or country list may change quite a bit traj.longname = os.path.split(output_dir)[-1] - if self.mc_mode == 1: + if self.mc_mode & MCMODE_PHASE1: # The MC phase may change the refdt so save a copy of the the original name. traj.pre_mc_longname = traj.longname @@ -1393,16 +1401,16 @@ def saveTrajectoryResults(self, traj, save_plots): savePickle(traj, output_dir, traj.file_name + '_trajectory.pickle') log.info(f'saved {traj.traj_id} to {output_dir}') - if self.mc_mode == 1: + if self.mc_mode == MCMODE_PHASE1: savePickle(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle') - elif self.mc_mode == 2: + elif self.mc_mode & MCMODE_PHASE2: # we save this in MC mode the MC phase may alter the trajectory details and if later on # we're including additional observations we need to use the most recent version of the trajectory savePickle(traj, os.path.join(self.phase1_dir, 'processed'), traj.pre_mc_longname + '_trajectory.pickle') if self.remotehost is not None: log.info('saving to remote host') - uploadDataToRemote(remotehost, traj.file_name + '_trajectory.pickle', output_dir) + self.remotedatahandler.uploadDataToRemote(remotehost, traj.file_name + '_trajectory.pickle', output_dir) log.info(' ...done') # Save the plots @@ -1439,11 +1447,11 @@ def addTrajectory(self, traj, failed_jdt_ref=None): - def removeTrajectory(self, traj_reduced): + def removeTrajectory(self, traj_reduced, remove_phase1=False): """ Remove the trajectory from the data base and disk. """ # in mcmode 2 the database isn't loaded but we still need to delete updated trajectories - if self.mc_mode == MCMODE_PHASE2: + if self.mc_mode & MCMODE_PHASE2: if os.path.isfile(traj_reduced.traj_file_path): traj_dir = os.path.dirname(traj_reduced.traj_file_path) shutil.rmtree(traj_dir, ignore_errors=True) @@ -1460,8 +1468,16 @@ def removeTrajectory(self, traj_reduced): # remove the processed pickle now we're done with it self.cleanupPhase2TempPickle(traj_reduced, True) + return + if self.mcmode & MCMODE_PHASE1 and remove_phase1: + # remove any solution from the phase1 folder + phase1_traj = os.path.join(self.phase1_dir, os.path.basename(traj_reduced.traj_file_path)) + if os.path.isfile(phase1_traj): + try: + os.remove(phase1_traj) + except Exception: + pass - return self.db.removeTrajectory(traj_reduced) @@ -1472,7 +1488,7 @@ def cleanupPhase2TempPickle(self, traj, success=False): the pickle, because we might later on get new data and it might become solvable. Otherwise, we can just delete the file since the MC solver will have saved an updated one already. """ - if self.mc_mode != 2: + if not self.mc_mode & MCMODE_PHASE2: return fldr_name = os.path.split(self.generateTrajOutputDirectoryPath(traj, make_dirs=False))[-1] pick = os.path.join(self.phase1_dir, fldr_name + '_trajectory.pickle_processing') @@ -1513,7 +1529,6 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire traj.jdt_ref = traj.jdt_ref + t0/86400.0 if self.checkTrajIfFailed(traj): - log.info('--------') log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) @@ -1521,7 +1536,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire else: candidate_trajectories.append(cand) - return candidate_trajectories, remaining_unpaired + return candidate_trajectories, max(0,remaining_unpaired) def checkTrajIfFailed(self, traj): """ Check if the given trajectory has been computed with the same observations and has failed to be @@ -1750,9 +1765,6 @@ def _breakHandler(signum, frame): arg_parser.add_argument('--mcmode', '--mcmode', type=int, default=0, help="Run just simple soln (1), just monte-carlos (2) or both (0, default).") - arg_parser.add_argument('--candmode', '--candmode', type=int, default=0, - help="Run normally (0), create candidates only (1), load previously-created candidates(2).") - arg_parser.add_argument('--archiveoldrecords', '--archiveoldrecords', type=int, default=3, help="Months back to archive old data. Default 3. Zero means don't archive (useful in testing).") @@ -1847,8 +1859,17 @@ def _breakHandler(signum, frame): if cml_args.maxerr is not None: trajectory_constraints.max_arcsec_err = cml_args.maxerr + # mcmode values + # mcmode = 1 -> load candidates and do simple solutions + # mcmode = 2 -> load simple solns and do MC solutions + # mcmode = 4 -> find candidates only + # mcmode = 7 -> do everything + # mcmode = 0 -> same as mode 7 + + mcmode = MCMODE_ALL if cml_args.mcmode == 0 else cml_args.mcmode + remotehost = cml_args.remotehost - if cml_args.mcmode !=MCMODE_PHASE2 and remotehost is not None: + if mcmode !=MCMODE_PHASE2 and remotehost is not None: log.info('remotehost only applicable in mcmode 2') remotehost = None @@ -1861,7 +1882,7 @@ def _breakHandler(signum, frame): if cml_args.maxtrajs is not None: max_trajs = int(cml_args.maxtrajs) - if cml_args.mcmode == MCMODE_PHASE2: + if mcmode == MCMODE_PHASE2: log.info(f'Reloading at most {max_trajs} phase1 trajectories.') # Set the number of CPU cores @@ -1871,10 +1892,12 @@ def _breakHandler(signum, frame): trajectory_constraints.mc_cores = cpu_cores log.info("Running using {:d} CPU cores.".format(cpu_cores)) - if cml_args.candmode == CANDMODE_LOAD: - log.info('Loading Candidates') - elif cml_args.candmode == CANDMODE_SAVE: - log.info('Saving Candidates') + if mcmode == MCMODE_CANDS: + log.info('Saving Candidates only') + elif mcmode == MCMODE_PHASE1: + log.info('Loading Candidates if needed') + elif mcmode == MCMODE_ALL: + log.info('Full processing mode') # Run processing. If the auto run more is not on, the loop will break after one run previous_start_time = None @@ -1932,10 +1955,10 @@ def _breakHandler(signum, frame): dh = RMSDataHandle( cml_args.dir_path, dt_range=event_time_range, db_dir=cml_args.dbdir, output_dir=cml_args.outdir, - mcmode=cml_args.mcmode, max_trajs=max_trajs, remotehost=remotehost, verbose=cml_args.verbose, archivemonths=cml_args.archiveoldrecords) + mcmode=mcmode, max_trajs=max_trajs, remotehost=remotehost, verbose=cml_args.verbose, archivemonths=cml_args.archiveoldrecords) - # If there is nothing to process, stop, unless we're in mcmode 2 (processing_list is not used in this case) - if not dh.processing_list and cml_args.mcmode != MCMODE_PHASE2: + # If there is nothing to process and we're in Candidate mode, stop + if not dh.processing_list and (mcmode & MCMODE_CANDS): log.info("") log.info("Nothing to process!") log.info("Probably everything is already processed.") @@ -1945,7 +1968,7 @@ def _breakHandler(signum, frame): ### GENERATE DAILY TIME BINS ### - if cml_args.mcmode != MCMODE_PHASE2: + if mcmode != MCMODE_PHASE2: # Find the range of datetimes of all folders (take only those after the year 2000) proc_dir_dts = [entry[3] for entry in dh.processing_list if entry[3] is not None] proc_dir_dts = [dt for dt in proc_dir_dts if dt > datetime.datetime(2000, 1, 1, 0, 0, 0, @@ -2001,8 +2024,8 @@ def _breakHandler(signum, frame): log.info("-----------------------------") log.info("") - # Load data of unprocessed observations - if cml_args.mcmode != MCMODE_PHASE2 and cml_args.candmode != CANDMODE_LOAD: + # Load data of unprocessed observations only if creating candidates + if mcmode & MCMODE_CANDS: dh.unpaired_observations = dh.loadUnpairedObservations(dh.processing_list, dt_range=(bin_beg, bin_end)) log.info(f'loaded {len(dh.unpaired_observations)} observations') @@ -2010,15 +2033,16 @@ def _breakHandler(signum, frame): # refresh list of calculated trajectories from disk dh.removeDeletedTrajectories() dh.loadComputedTrajectories(os.path.join(dh.output_dir, OUTPUT_TRAJ_DIR), dt_range=[bin_beg, bin_end]) - if cml_args.mcmode != MCMODE_PHASE2: + if mcmode != MCMODE_PHASE2: dh.removeDuplicateTrajectories(dt_range=[bin_beg, bin_end]) # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) bin_time_range = [bin_beg, bin_end] - tc.run(event_time_range=event_time_range, mcmode=cml_args.mcmode, bin_time_range=bin_time_range, candidatemode=cml_args.candmode) + tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range) - dh.observations_db.closeObsDatabase() + if mcmode & MCMODE_CANDS: + dh.observations_db.closeObsDatabase() else: # there were no datasets to process log.info('no data to process yet') @@ -2056,4 +2080,4 @@ def _breakHandler(signum, frame): while next_run_time > datetime.datetime.now(datetime.timezone.utc): print("Waiting {:s} to run the trajectory solver... ".format(str(next_run_time - datetime.datetime.now(datetime.timezone.utc)))) - time.sleep(2) + time.sleep(10) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index f3ccf526..a0a7675f 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -25,86 +25,153 @@ import logging import glob import shutil +from configparser import ConfigParser from wmpl.Utils.OSTools import mkdirP from wmpl.Utils.Pickling import loadPickle + log = logging.getLogger("traj_correlator") -def collectRemoteData(remotehost, max_trajs, output_dir, datatype='traj'): - """ - Collect trajectory or candidate pickles from a remote server for local processing - NB: do NOT use os.path.join here, as it will break on Windows - """ - - ftpcli, remote_dir, sshcli = getSFTPConnection(remotehost) - if ftpcli is None: +class RemoteDataHandler(): + def __init__(self, cfg_file): + self.initialised = False + if not os.path.isfile(cfg_file): + log.warning(f'unable to find {cfg_file}, aborting remote processing') + return + + cfg = ConfigParser() + cfg.read(cfg_file) + self.mode = cfg['mode']['mode'] + if self.mode not in ['master', 'child']: + log.warning('remote cfg: mode must be master or child, aborting remote processing') + return + if self.mode == 'master': + if 'children' not in cfg.sections() or 'capacity' not in cfg.sections(): + log.warning('remote cfg: capacity or children sections missing, aborting remote processing') + return + + self.nodes = [k for k in cfg['children'].values()] + self.capacity = [int(k) for k in cfg['capacity'].values()] + if len(self.nodes) != len(self.capacity): + log.warning('remote cfg: capacity and children not same length, aborting remote processing') + return + else: + if 'key' not in cfg['sftp'] or 'host' not in cfg['sftp'] or 'user' not in cfg['sftp']: + log.warning('remote cfg: child user, key or host missing, aborting remote processing') + return + + self.remotehost = cfg['sftp']['host'] + self.user = cfg['sftp']['user'] + self.key = os.path.normpath(os.path.expanduser(cfg['sftp']['key'])) + if 'port' not in cfg['sftp']: + self.port = 22 + else: + self.port = int(cfg['sftp']['port']) + + self.initialised = True + self.ssh_client = None + self.sftp_client = None return - remote_phase1_dir = os.path.join(remote_dir, 'phase1').replace('\\','/') - - log.info(f'Looking in {remote_phase1_dir} on remote host for up to {max_trajs} trajectories') - - try: - files = ftpcli.listdir(remote_phase1_dir) - files = [f for f in files if '.pickle' in f and 'processing' not in f] - files = files[:max_trajs] - - if len(files) == 0: - log.info('no data available at this time') - ftpcli.close() - sshcli.close() - return + def getSFTPConnection(self): + if not self.initialised: + return False + log.info(f'Connecting to {self.host}:{self.port} as {self.user}....') + + if not os.path.isfile(os.path.expanduser(self.key)): + log.warning(f'ssh keyfile {self.key} missing') + return False + + self.ssh_client = paramiko.SSHClient() + self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + pkey = paramiko.RSAKey.from_private_key_file(self.key) + try: + self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, pkey=pkey, look_for_keys=False) + self.ftp_client = self.ssh_client.open_sftp() + return True - for trajfile in files: - fullname = os.path.join(remote_phase1_dir, trajfile).replace('\\','/') - localname = os.path.join(output_dir, trajfile) - ftpcli.get(fullname, localname) - ftpcli.rename(fullname, f'{fullname}_processing') + except Exception as e: - log.info(f'Obtained {len(files)} trajectories') + log.warning('sftp connection to remote host failed') + log.warning(e) + self.ssh_client.close() + return False + + def closeSFTPConnection(self): + if self.sftp_client: + self.sftp_client.close() + if self.ssh_client: + self.ssh_client.close() + return + + def getRemoteCandidates(self): + return + + def collectRemotePhase1(self, max_trajs, output_dir): + """ + Collect trajectory or candidate pickles from a remote server for local processing + NB: do NOT use os.path.join here, as it will break on Windows + """ - except Exception as e: - log.warning('Problem with download') - log.info(e) + if not self.initialised or not self.getSFTPConnection(): + return + + try: + files = self.ftp_client.listdir('phase1') + files = [f for f in files if '.pickle' in f and 'processing' not in f] + files = files[:max_trajs] + + if len(files) == 0: + log.info('no data available at this time') + self.closeSFTPConnection() + return + + for trajfile in files: + fullname = os.path.join('phase1', trajfile).replace('\\','/') + localname = os.path.join(output_dir, trajfile) + self.ftp_client.get(fullname, localname) + self.ftp_client.rename(fullname, f'{fullname}_processing') + log.info(f'Obtained {len(files)} trajectories') + + + except Exception as e: + log.warning('Problem with download') + log.info(e) + + self.closeSFTPConnection() + return - ftpcli.close() - sshcli.close() - return + def uploadToRemote(self, trajfile, output_dir, operation_mode=None): + """ + upload the trajectory pickle and report to a remote host for integration + into the solved dataset + """ + if not self.initialised or not self.getSFTPConnection(): + return -def uploadDataToRemote(remotehost, trajfile, output_dir, datatype='traj'): - """ - At the end of MC phase, upload the trajectory pickle and report to a remote host for integration - into the solved dataset - """ + remote_phase2_dir = '' + try: + self.sftp_client.mkdir(remote_phase2_dir) + except Exception: + pass - ftpcli, remote_dir, sshcli = getSFTPConnection(remotehost) - if ftpcli is None: - return - - remote_phase2_dir = os.path.join(remote_dir, 'remoteuploads').replace('\\','/') - try: - ftpcli.mkdir(remote_phase2_dir) - except Exception: - pass - - localname = os.path.join(output_dir, trajfile) - remotename = os.path.join(remote_phase2_dir, trajfile).replace('\\','/') - ftpcli.put(localname, remotename) - - localname = localname.replace('_trajectory.pickle', '_report.txt') - remotename = remotename.replace('_trajectory.pickle', '_report.txt') - if os.path.isfile(localname): - ftpcli.put(localname, remotename) + localname = os.path.join(output_dir, trajfile) + remotename = os.path.join(remote_phase2_dir, trajfile).replace('\\','/') + self.ftp_client.put(localname, remotename) + + localname = localname.replace('_trajectory.pickle', '_report.txt') + remotename = remotename.replace('_trajectory.pickle', '_report.txt') + if os.path.isfile(localname): + self.ftp_client.put(localname, remotename) - ftpcli.close() - sshcli.close() - return + self.closeSFTPConnection() + return def moveRemoteData(output_dir, datatype='traj'): @@ -154,45 +221,7 @@ def moveRemoteData(output_dir, datatype='traj'): return -def getSFTPConnection(remotehost): - - hostdets = remotehost.split(':') - - if len(hostdets) < 2 or '@' not in hostdets[0]: - log.warning(f'{remotehost} malformed, should be user@host:port:/path/to/dataroot') - return None, None, None - - if len(hostdets) == 3: - port = int(hostdets[1]) - remote_data_dir = hostdets[2] - - else: - port = 22 - remote_data_dir = hostdets[1] - user,host = hostdets[0].split('@') - log.info(f'Connecting to {host}....') - - ssh_client = paramiko.SSHClient() - ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - - - if not os.path.isfile(os.path.expanduser('~/.ssh/trajsolver')): - log.warning('ssh keyfile ~/.ssh/trajsolver missing') - ssh_client.close() - return None, None, None - - pkey = paramiko.RSAKey.from_private_key_file(os.path.expanduser('~/.ssh/trajsolver')) - try: - ssh_client.connect(hostname=host, username=user, port=port, pkey=pkey, look_for_keys=False) - ftp_client = ssh_client.open_sftp() - return ftp_client, remote_data_dir, ssh_client - - except Exception as e: - - log.warning('sftp connection to remote host failed') - log.warning(e) - ssh_client.close() - - return None, None, None +def putPhase1Trajectories(): + return From 6eb44fa872c26f8c857292b0df158b0df3fdfb8c Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sun, 25 Jan 2026 12:47:20 +0000 Subject: [PATCH 021/132] improve documentation --- wmpl/Trajectory/CorrelateRMS.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index c2bbe6e6..09a9d48f 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1865,6 +1865,9 @@ def _breakHandler(signum, frame): # mcmode = 4 -> find candidates only # mcmode = 7 -> do everything # mcmode = 0 -> same as mode 7 + # bitwise combinations are permissioble so: + # 4+1 will find candidates and then run simple solutions to populate "phase1" + # 1+2 will load candidates from "candidates" and solve them completely mcmode = MCMODE_ALL if cml_args.mcmode == 0 else cml_args.mcmode From 1afaff12c91c2af9bb508d1b929daa613757aecc Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 26 Jan 2026 15:12:06 +0000 Subject: [PATCH 022/132] bugfix to create phase1 trajs properly --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 09a9d48f..a79d5818 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1401,7 +1401,7 @@ def saveTrajectoryResults(self, traj, save_plots): savePickle(traj, output_dir, traj.file_name + '_trajectory.pickle') log.info(f'saved {traj.traj_id} to {output_dir}') - if self.mc_mode == MCMODE_PHASE1: + if self.mc_mode & MCMODE_PHASE1 and self.mc_mode != MCMODE_ALL: savePickle(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle') elif self.mc_mode & MCMODE_PHASE2: # we save this in MC mode the MC phase may alter the trajectory details and if later on From 625ee4dff30992ce3dfac361ccfa5b4851bd3608 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 26 Jan 2026 20:45:11 +0000 Subject: [PATCH 023/132] bugfix in when to save phase1 --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index a79d5818..0e3071b0 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1401,7 +1401,7 @@ def saveTrajectoryResults(self, traj, save_plots): savePickle(traj, output_dir, traj.file_name + '_trajectory.pickle') log.info(f'saved {traj.traj_id} to {output_dir}') - if self.mc_mode & MCMODE_PHASE1 and self.mc_mode != MCMODE_ALL: + if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: savePickle(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle') elif self.mc_mode & MCMODE_PHASE2: # we save this in MC mode the MC phase may alter the trajectory details and if later on From 36840144e97b3e82a3615ce9890fd288380238ac Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 11:55:21 +0000 Subject: [PATCH 024/132] be a bit less verbose --- wmpl/Trajectory/Trajectory.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/Trajectory.py b/wmpl/Trajectory/Trajectory.py index 72cf0f8d..7c02e9ba 100644 --- a/wmpl/Trajectory/Trajectory.py +++ b/wmpl/Trajectory/Trajectory.py @@ -2757,7 +2757,8 @@ def generateFileName(self): def infillTrajectory(self, meas1, meas2, time_data, lat, lon, ele, station_id=None, excluded_time=None, - ignore_list=None, magnitudes=None, fov_beg=None, fov_end=None, obs_id=None, comment='', ignore_station=False): + ignore_list=None, magnitudes=None, fov_beg=None, fov_end=None, obs_id=None, comment='', ignore_station=False, + verbose=False): """ Initialize a set of measurements for a given station. Arguments: @@ -2835,7 +2836,7 @@ def infillTrajectory(self, meas1, meas2, time_data, lat, lon, ele, station_id=No # Skip the observation if all points were ignored if ignore_list is not None: - if np.all(ignore_list): + if np.all(ignore_list) and verbose: print('All points from station {:s} are ignored, not using this station in the solution!'.format(station_id)) From 7f5677a4669501d9e68489cfe13030b00d02a1f2 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 11:55:40 +0000 Subject: [PATCH 025/132] remove unnecessary message --- wmpl/Rebound/REBOUND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Rebound/REBOUND.py b/wmpl/Rebound/REBOUND.py index 92ef3330..75620897 100644 --- a/wmpl/Rebound/REBOUND.py +++ b/wmpl/Rebound/REBOUND.py @@ -14,7 +14,7 @@ REBOUND_FOUND = True except ImportError: - print("REBOUND package not found. Install REBOUND and reboundx packages to use the REBOUND functions.") + # don't print a message here as its already printed whenever REBOUND_FOUND is False REBOUND_FOUND = False from wmpl.Utils.TrajConversions import ( From 21fa47e5f7c4bb265cfe9c16a0789eba1169eba8 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 11:56:15 +0000 Subject: [PATCH 026/132] reinstate prior behaviour to skip ignored stations when retrying solutions --- wmpl/Trajectory/CorrelateEngine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 85feb48f..87e83e0e 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -749,7 +749,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or max_rejections_possible = int(np.ceil(0.5*len(traj_status.observations))) + initial_ignore_count log.info(f'max stations allowed to be rejected is {max_rejections_possible}') for i, obs in enumerate(traj_status.observations): - + if obs.ignore_station: + continue # Compute the median angular uncertainty of all other non-ignored stations ang_res_list = [obstmp.ang_res_std for j, obstmp in enumerate(traj_status.observations) if (i != j) and not obstmp.ignore_station] From 2c1ab8be3b4e54c53c9c924ef00c610683272251 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 15:09:13 +0000 Subject: [PATCH 027/132] simplify calls to addTrajectory --- wmpl/Trajectory/CorrelateRMS.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 0e3071b0..fa8e6ab9 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -283,7 +283,7 @@ def checkTrajIfFailed(self, traj): return False - def addTrajectory(self, traj_file_path, traj_obj=None, failed=False): + def addTrajectory(self, traj_reduced, failed=False): """ Add a computed trajectory to the list. Arguments: @@ -294,25 +294,11 @@ def addTrajectory(self, traj_file_path, traj_obj=None, failed=False): failed: [bool] Add as a failed trajectory. False by default. """ - # Load the trajectory from disk - if traj_obj is None: - - # Init the reduced trajectory object - traj_reduced = TrajectoryReduced(traj_file_path) - if self.verbose: - log.info(f' loaded {traj_file_path}, traj_id {traj_reduced.traj_id}') - # Skip if failed - if traj_reduced is None: - return None - - if not hasattr(traj_reduced, "jdt_ref"): - return None + if traj_reduced is None or not hasattr(traj_reduced, "jdt_ref"): + return None - else: - # Use the provided trajectory object - traj_reduced = traj_obj - if self.verbose: - log.info(f' loaded {traj_obj.traj_file_path}, traj_id {traj_reduced.traj_id}') + if self.verbose: + log.info(f' loaded {traj_reduced.traj_file_path}, traj_id {traj_reduced.traj_id}') # Choose to which dictionary the trajectory will be added @@ -677,12 +663,12 @@ def __init__(self, station, obs_id): for traj in [t for t in self.db.trajectories if t < archdate_jd]: if traj < archdate_jd: - archdb.addTrajectory(None, self.db.trajectories[traj], False) + archdb.addTrajectory(self.db.trajectories[traj], False) del self.db.trajectories[traj] for traj in [t for t in self.db.failed_trajectories if t < archdate_jd]: if traj < archdate_jd: - archdb.addTrajectory(None, self.db.failed_trajectories[traj], True) + archdb.addTrajectory(self.db.failed_trajectories[traj], True) del self.db.failed_trajectories[traj] archdb.save() @@ -1150,7 +1136,7 @@ def loadComputedTrajectories(self, traj_dir_path, dt_range=None): if self.trajectoryFileInDtRange(file_name, dt_range=dt_range): - self.db.addTrajectory(os.path.join(full_traj_dir, file_name)) + self.db.addTrajectory(TrajectoryReduced(os.path.join(full_traj_dir, file_name))) # Print every 1000th trajectory if counter % 1000 == 0: @@ -1443,7 +1429,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None): if failed_jdt_ref is not None: traj_reduced.jdt_ref = failed_jdt_ref - self.db.addTrajectory(None, traj_obj=traj_reduced, failed=(failed_jdt_ref is not None)) + self.db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None)) From 610cfa5e1bda4c811521407c52606f6f869215e7 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 21:33:36 +0000 Subject: [PATCH 028/132] create trajectories db in sqlite --- wmpl/Trajectory/CorrelateDB.py | 314 +++++++++++++++++++++++++++++++-- 1 file changed, 300 insertions(+), 14 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index eeb0129c..c634950a 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -6,9 +6,12 @@ import logging.handlers import argparse import datetime +import json +import shutil from wmpl.Utils.TrajConversions import datetime2JD + log = logging.getLogger("traj_correlator") ############################################################ @@ -185,21 +188,304 @@ def mergeObsDatabase(self, source_db_path): ############################################################ -def openTrajDatabase(db_path, db_name='processed_trajectories.db'): - db_full_name = os.path.join(db_path, f'{db_name}') - log.info(f'opening database {db_full_name}') - con = sqlite3.connect(db_full_name) - cur = con.cursor() - res = cur.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") - if res.fetchone() is None: - cur.execute("CREATE TABLE failed_trajectories()") - - res = cur.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") - if res.fetchone() is None: - cur.execute("CREATE TABLE trajectories()") - con.commit() - return con +class DummyTrajReduced(): + # a dummy class for use in a couple of fuctions in the TrajectoryDatabase + def __init__(self, jdt_ref, traj_id, traj_file_path): + self.jdt_ref = jdt_ref + self.traj_id = traj_id + self.traj_file_path = traj_file_path + + +class TrajectoryDatabase(): + + # A class to handle the sqlite trajectory database transparently. + + def __init__(self, db_path, db_name='trajectories.db', purge_records=False): + self.dbhandle = self.openTrajDatabase(db_path, db_name, purge_records) + + def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=False): + # Open the database, creating it and adding the required table if necessary. + # If purge_records is true, delete any existing records. + + db_full_name = os.path.join(db_path, f'{db_name}') + log.info(f'opening database {db_full_name}') + con = sqlite3.connect(db_full_name) + cur = con.cursor() + if purge_records: + cur.execute('drop table trajectories') + cur.execute('drop table failed_trajectories') + res = cur.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") + if res.fetchone() is None: + cur.execute("""CREATE TABLE trajectories( + jdt_ref REAL UNIQUE, + traj_id VARCHAR UNIQUE, + traj_file_path VARCHAR, + participating_stations VARCHAR, + radiant_eci_mini VARCHAR, + state_vect_mini VARCHAR, + ignored_stations VARCHAR, + phase_1_only INTEGER, + v_init REAL, + gravity_factor REAL, + v0z REAL, + v_avg REAL, + rbeg_jd REAL, + rend_jd REAL, + rbeg_lat REAL, + rbeg_lon REAL, + rbeg_ele REAL, + rend_lat REAL, + rend_lon REAL, + rend_ele REAL, + status INTEGER) """) + + res = cur.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") + if res.fetchone() is None: + cur.execute("""CREATE TABLE failed_trajectories( + jdt_ref REAL UNIQUE, + traj_id VARCHAR UNIQUE, + traj_file_path VARCHAR, + participating_stations VARCHAR, + ignored_stations VARCHAR, + radiant_eci_mini VARCHAR, + state_vect_mini VARCHAR, + phase_1_only INTEGER, + v_init REAL, + gravity_factor REAL, + status INTEGER) """) + + con.commit() + cur.close() + return con + + def commitTrajDatabase(self): + # commit the obs db. This function exists so we can do lazy writes in some cases + + self.dbhandle.commit() + return + + def closeTrajDatabase(self): + # close the database, making sure we commit any pending updates + + self.dbhandle.commit() + self.dbhandle.close() + return + + + def checkTrajIfFailed(self, traj_reduced, verbose=False): + # return True if there is an observation with the same jdt_ref and matching list of stations + + if not hasattr(traj_reduced, 'jdt_ref'): + return False + + station_list = list(set(traj_reduced.participating_stations + traj_reduced.ignored_stations)) + cur = self.dbhandle.cursor() + res = cur.execute(f"SELECT traj_id FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref} and status=1") + if res.fetchone() is None: + cur.close() + return False + else: + res = cur.execute(f"SELECT participating_stations, ignored_stations FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref}") + row = res.fetchone() + traj_stations = list(set(json.loads(row[0]) + json.loads(row[1]))) + if traj_stations == station_list: + cur.close() + return True + else: + cur.close() + return False + + + def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=True): + # add or update an entry in the database, setting status = 1 + + if verbose: + log.info(f'adding {traj_reduced.traj_id} with jdt {traj_reduced.jdt_ref}') + cur = self.dbhandle.cursor() + if failed: + cur.execute(f'insert or replace into failed_trajectories values (' + f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," + f"'{json.dumps(traj_reduced.participating_stations)}'," + f"'{json.dumps(traj_reduced.ignored_stations)}'," + f"'{json.dumps(traj_reduced.radiant_eci_mini)}'," + f"'{json.dumps(traj_reduced.state_vect_mini)}'," + f"{traj_reduced.phase_1_only},{traj_reduced.v_init},{traj_reduced.gravity_factor},1)") + else: + cur.execute(f'insert or replace into trajectories values (' + f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," + f"'{json.dumps(traj_reduced.participating_stations)}'," + f"'{json.dumps(traj_reduced.ignored_stations)}'," + f"'{json.dumps(traj_reduced.radiant_eci_mini)}'," + f"'{json.dumps(traj_reduced.state_vect_mini)}'," + f"{traj_reduced.phase_1_only},{traj_reduced.v_init},{traj_reduced.gravity_factor}," + f"{traj_reduced.v0z},{traj_reduced.v_avg}," + f"{traj_reduced.rbeg_jd},{traj_reduced.rend_jd}," + f"{traj_reduced.rbeg_lat},{traj_reduced.rbeg_lon},{traj_reduced.rbeg_ele}," + f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") + + if commitnow: + self.dbhandle.commit() + + cur.close() + return True + + + def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose=False): + # if an entry exists, update the status to 0. + # this allows us to mark an observation paired, then unpair it later if the solution fails + # or we want to force a rerun. + if verbose: + log.info(f'removing {traj_reduced.traj_id}') + table_name = 'failed_trajectories' if failed else 'trajectories' + + cur = self.dbhandle.cursor() + try: + cur.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") + self.dbhandle.commit() + except Exception: + # traj wasn't in the database so no action required + pass + cur.close() + + # Remove the trajectory folder on the disk + if not keepFolder and os.path.isfile(traj_reduced.traj_file_path): + traj_dir = os.path.dirname(traj_reduced.traj_file_path) + shutil.rmtree(traj_dir, ignore_errors=True) + if os.path.isfile(traj_reduced.traj_file_path): + log.info(f'unable to remove {traj_dir}') + + return True + + + def getTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): + + table_name = 'failed_trajectories' if failed else 'trajectories' + if verbose: + log.info(f'getting trajectories between {jdt_start} and {jdt_end}') + print(f'getting trajectories between {jdt_start} and {jdt_end}') + + + cur = self.dbhandle.cursor() + if not jdt_end: + res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + rows = res.fetchall() + else: + res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + rows = res.fetchall() + cur.close() + trajs = [] + for rw in rows: + json_dict = {'jdt_ref':rw[0], 'traj_id':rw[1], 'traj_file_path':rw[2], + 'participating_stations': json.loads(rw[3]), + 'ignored_stations': json.loads(rw[4]), + 'radiant_eci_mini': json.loads(rw[5]), + 'state_vect_mini': json.loads(rw[6]), + 'phase_1_only': rw[7], 'v_init': rw[8],'gravity_factor': rw[9], + 'v0z': rw[10], 'v_avg': rw[11], + 'rbeg_jd': rw[12], 'rend_id': rw[13], + 'rbeg_lat': rw[14], 'rbeg_lon': rw[15], 'rbeg_ele': rw[16], + 'rend_lat': rw[17], 'rend_lon': rw[18], 'rend_ele': rw[19] + } + + trajs.append(json_dict) + return trajs + + + def removeDeletedTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): + + table_name = 'failed_trajectories' if failed else 'trajectories' + if verbose: + log.info(f'getting trajectories between {jdt_start} and {jdt_end}') + + cur = self.dbhandle.cursor() + if not jdt_end: + res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + rows = res.fetchall() + else: + res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + rows = res.fetchall() + cur.close() + for rw in rows: + if not os.path.isfile(rw[2]): + log.info(f'removing traj {rw[0]} from database') + self.removeTrajectory(DummyTrajReduced(rw[0], rw[1], rw[2]), keepFolder=True) + return + + def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): + # archive records older than archdate_jd to a database {arch_prefix}_trajectories.db + + # create the database and table if it doesnt exist + archdb_name = f'{arch_prefix}_trajectories.db' + archdb = self.openObsDatabase(db_path, archdb_name) + archdb.commit() + archdb.close() + + # attach the arch db, copy the records then delete them + cur = self.dbhandle.cursor() + archdb_fullname = os.path.join(db_path, f'{archdb_name}') + cur.execute(f"attach database '{archdb_fullname}' as archdb") + for table_name in ['trajectories', 'failed_trajectories']: + try: + # bulk-copy if possible + cur.execute(f'insert or replace into archdb.{table_name} select * from {table_name} where jdt_ref < {archdate_jd}') + cur.execute(f'delete from {table_name} where jdt_ref < {archdate_jd}') + except Exception: + log.warning(f'unable to archive {table_name}') + + self.commitTrajDatabase() + cur.close() + return + + def moveJsonRecords(self, trajectories, failed_trajectories): + log.info('-----------------------------') + log.info('moving trajectories to sqlite - this may take some time....') + i = 0 + keylist = trajectories.keys() + for jdt_ref in keylist: + self.addTrajectory(trajectories[jdt_ref]) + i += 1 + if not i % 100000: + log.info(f'moved {i} trajectories') + log.info(f'done - moved {i} trajectories') + log.info('-----------------------------') + keylist = failed_trajectories.keys() + for jdt_ref in keylist: + self.addTrajectory(failed_trajectories[jdt_ref], failed=True) + i += 1 + if not i % 100000: + log.info(f'moved {i} failed_trajectories') + self.commitTrajDatabase() + log.info(f'done - moved {i} failed_trajectories') + log.info('-----------------------------') + + return + + def mergeTrajDatabase(self, source_db_path): + # merge in records from another observation database, for example from a remote node + + if not os.path.isfile(source_db_path): + log.warning(f'source database missing: {source_db_path}') + return + # attach the other db, copy the records then detach it + cur = self.dbhandle.cursor() + cur.execute(f"attach database '{source_db_path}' as sourcedb") + + # TODO need to correct the traj_file_path to account for server locations + + for table_name in ['trajectories', 'failed_trajectories']: + try: + # bulk-copy if possible + cur.execute(f'insert or replace into {table_name} select * from sourcedb.{table_name}') + except Exception: + log.warning(f'unable to merge data from {source_db_path}') + self.commitTrajDatabase() + cur.execute("detach database 'sourcedb'") + cur.close() + return + + +############################################################ if __name__ == '__main__': arg_parser = argparse.ArgumentParser(description="""Automatically compute trajectories from RMS data in the given directory.""", From 22e993242b863f72280e991db1e012d72c6ee68d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 21:35:29 +0000 Subject: [PATCH 029/132] add trajectories db as json file --- wmpl/Trajectory/CorrelateRMS.py | 128 ++++++-------------------------- 1 file changed, 24 insertions(+), 104 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index fa8e6ab9..bab25a81 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -17,7 +17,6 @@ import logging import logging.handlers import glob -import pandas as pd from dateutil.relativedelta import relativedelta import numpy as np @@ -28,7 +27,7 @@ from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date from wmpl.Utils.remoteDataHandling import RemoteDataHandler -from wmpl.Trajectory.CorrelateDB import ObservationDatabase +from wmpl.Trajectory.CorrelateDB import ObservationDatabase, TrajectoryDatabase from wmpl.Trajectory.Trajectory import Trajectory from wmpl.Trajectory.CorrelateEngine import MCMODE_CANDS, MCMODE_PHASE1, MCMODE_PHASE2, MCMODE_ALL @@ -82,7 +81,7 @@ def __init__(self, traj_file_path, json_dict=None, traj_obj=None): log.info("Pickle file not found: " + traj_file_path) return None - finally: + except: log.info("Pickle file could not be loaded: " + traj_file_path) return None @@ -92,7 +91,6 @@ def __init__(self, traj_file_path, json_dict=None, traj_obj=None): traj = traj_obj self.traj_file_path = os.path.join(traj.output_dir, traj.file_name + "_trajectory.pickle") - # Reference Julian date (beginning of the meteor) self.jdt_ref = traj.jdt_ref @@ -229,6 +227,8 @@ def save(self): db_bak_file_path = self.db_file_path + ".bak" if os.path.exists(self.db_file_path): shutil.copy2(self.db_file_path, db_bak_file_path) + else: + return # Save the data base try: @@ -546,13 +546,20 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) - self.db = DatabaseJSON(database_path, verbose=self.verbose) + self.old_db = DatabaseJSON(database_path, verbose=self.verbose) + self.db = TrajectoryDatabase(db_dir) + # move any legacy paired obs data into sqlite + if hasattr(self.old_db, 'trajectories'): + self.db.moveJsonRecords(self.old_db.trajectories, self.old_db.failed_trajectories) + del self.old_db.trajectories + del self.old_db.failed_trajectories + self.saveDatabase() self.observations_db = ObservationDatabase(db_dir) # move any legacy paired obs data into sqlite - if hasattr(self.db, 'paired_obs') and len(self.db.paired_obs) > 0: - self.observations_db.moveJsonRecords(self.db.paired_obs) - del self.db.paired_obs + if hasattr(self.old_db, 'paired_obs'): + self.observations_db.moveJsonRecords(self.old_db.paired_obs) + del self.old_db.paired_obs self.saveDatabase() if archivemonths != 0: @@ -1038,29 +1045,7 @@ def removeDeletedTrajectories(self): jdt_start = datetime2JD(self.dt_range[0]) jdt_end = datetime2JD(self.dt_range[1]) - trajs_to_remove = [] - - keys = [k for k in self.db.trajectories.keys() if k >= jdt_start and k <= jdt_end] - for trajkey in keys: - traj_reduced = self.db.trajectories[trajkey] - # Update the trajectory path to make sure we're working with the correct filesystem - traj_path = self.generateTrajOutputDirectoryPath(traj_reduced) - traj_file_name = os.path.split(traj_reduced.traj_file_path)[1] - traj_path = os.path.join(traj_path, traj_file_name) - - if self.verbose: - log.info(f' testing {traj_path}') - - if not os.path.isfile(traj_path): - traj_reduced.traj_file_path = traj_path - trajs_to_remove.append(traj_reduced) - - for traj in trajs_to_remove: - log.info(f' removing deleted {traj.traj_file_path}') - - # remove from the database but not from the disk: they're already not on the disk and this avoids - # accidentally deleting a different traj with a timestamp which is within a millisecond - self.db.removeTrajectory(traj, keepFolder=True) + self.db.removeDeletedTrajectories(jdt_start, jdt_end) return @@ -1153,74 +1138,10 @@ def loadComputedTrajectories(self, traj_dir_path, dt_range=None): def getComputedTrajectories(self, jd_beg, jd_end): """ Returns a list of computed trajectories between the Julian dates. """ - - return [self.db.trajectories[key] for key in self.db.trajectories - if (self.db.trajectories[key].jdt_ref >= jd_beg) - and (self.db.trajectories[key].jdt_ref <= jd_end)] - - - def removeDuplicateTrajectories(self, dt_range): - """ Remove trajectories with duplicate IDs - keeping the one with the most station observations - """ - - log.info('removing duplicate trajectories') - - tr_in_scope = self.getComputedTrajectories(datetime2JD(dt_range[0]), datetime2JD(dt_range[1])) - tr_to_check = [{'jdt_ref':traj.jdt_ref,'traj_id':traj.traj_id, 'traj': traj} for traj in tr_in_scope if hasattr(traj,'traj_id')] - - if len(tr_to_check) == 0: - log.info('no trajectories in range') - return - - tr_df = pd.DataFrame(tr_to_check) - tr_df['dupe']=tr_df.duplicated(subset=['traj_id']) - dupeids = tr_df[tr_df.dupe].sort_values(by=['traj_id']).traj_id - duperows = tr_df[tr_df.traj_id.isin(dupeids)] - - log.info(f'there are {len(duperows.traj_id.unique())} duplicate trajectories') - - # iterate over the duplicates, finding the best and removing the others - for traj_id in duperows.traj_id.unique(): - num_stats = 0 - best_traj_dt = None - best_traj_path = None - # find duplicate with largest number of observations - for testdt in duperows[duperows.traj_id==traj_id].jdt_ref.values: - - if len(dh.db.trajectories[testdt].participating_stations) > num_stats: - - best_traj_dt = testdt - num_stats = len(dh.db.trajectories[testdt].participating_stations) - # sometimes the database contains duplicates that differ by microseconds in jdt. These - # will have overwritten each other in the folder so make a note of the location. - best_traj_path = dh.db.trajectories[testdt].traj_file_path - - # now remove all except the best - for testdt in duperows[duperows.traj_id==traj_id].jdt_ref.values: - - traj = dh.db.trajectories[testdt] - if testdt != best_traj_dt: - - # get the current trajectory's location. If its the same as that of the best trajectory - # don't try to delete the solution from disk even if there's a small difference in jdt_ref - keepFolder = False - if traj.traj_file_path == best_traj_path: - keepFolder = True - # Update the trajectory path to make sure we're working with the correct filesystem - traj_path = self.generateTrajOutputDirectoryPath(traj) - traj_file_name = os.path.split(traj.traj_file_path)[1] - traj.traj_file_path = os.path.join(traj_path, traj_file_name) - log.info(f'removing duplicate {traj.traj_id} keep {traj_file_name} {keepFolder}') - - self.db.removeTrajectory(traj, keepFolder=keepFolder) - - else: - if self.verbose: - log.info(f'keeping {traj.traj_id} {traj.traj_file_path}') - - return - + json_dicts = self.db.getTrajectories(jd_beg, jd_end) + trajs = [TrajectoryReduced(None, json_dict=j) for j in json_dicts] + return trajs + def getPlatepar(self, met_obs): """ Return the platepar of the meteor observation. """ @@ -1531,8 +1452,9 @@ def checkTrajIfFailed(self, traj): """ if self.db is None: - return - return self.db.checkTrajIfFailed(traj) + return + traj_reduced = TrajectoryReduced(None, traj_obj=traj) + return self.db.checkTrajIfFailed(traj_reduced) @@ -1652,7 +1574,7 @@ def _breakHandler(signum, frame): # Save the data base log.info("Saving data base to disk...") - self.db.save() + self.old_db.save() # Restore the signal functionality signal.signal(signal.SIGINT, original_signal) @@ -2022,8 +1944,6 @@ def _breakHandler(signum, frame): # refresh list of calculated trajectories from disk dh.removeDeletedTrajectories() dh.loadComputedTrajectories(os.path.join(dh.output_dir, OUTPUT_TRAJ_DIR), dt_range=[bin_beg, bin_end]) - if mcmode != MCMODE_PHASE2: - dh.removeDuplicateTrajectories(dt_range=[bin_beg, bin_end]) # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) From 558ce812abbc1b2f8096fb34e4f97c5d99d97593 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 27 Jan 2026 23:21:26 +0000 Subject: [PATCH 030/132] bugfixes --- wmpl/Trajectory/CorrelateDB.py | 21 ++++++++++++--------- wmpl/Trajectory/CorrelateRMS.py | 9 ++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index c634950a..41ddda51 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -9,7 +9,7 @@ import json import shutil -from wmpl.Utils.TrajConversions import datetime2JD +from wmpl.Utils.TrajConversions import datetime2JD, jd2Date log = logging.getLogger("traj_correlator") @@ -303,13 +303,17 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=Tru log.info(f'adding {traj_reduced.traj_id} with jdt {traj_reduced.jdt_ref}') cur = self.dbhandle.cursor() if failed: + v_init = 0 if traj_reduced.v_init is None else traj_reduced.v_init + radiant_eci_mini = [0,0,0] if traj_reduced.radiant_eci_mini is None else traj_reduced.radiant_eci_mini + state_vect_mini = [0,0,0] if traj_reduced.state_vect_mini is None else traj_reduced.state_vect_mini + cur.execute(f'insert or replace into failed_trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," f"'{json.dumps(traj_reduced.ignored_stations)}'," - f"'{json.dumps(traj_reduced.radiant_eci_mini)}'," - f"'{json.dumps(traj_reduced.state_vect_mini)}'," - f"{traj_reduced.phase_1_only},{traj_reduced.v_init},{traj_reduced.gravity_factor},1)") + f"'{json.dumps(radiant_eci_mini)}'," + f"'{json.dumps(state_vect_mini)}'," + f"0,{v_init},{traj_reduced.gravity_factor},1)") else: cur.execute(f'insert or replace into trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," @@ -361,9 +365,7 @@ def getTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): table_name = 'failed_trajectories' if failed else 'trajectories' if verbose: - log.info(f'getting trajectories between {jdt_start} and {jdt_end}') - print(f'getting trajectories between {jdt_start} and {jdt_end}') - + log.info(f'getting trajectories between {jd2Date(jdt_start, dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} and {jd2Date(jdt_end, dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")}') cur = self.dbhandle.cursor() if not jdt_end: @@ -382,7 +384,7 @@ def getTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): 'state_vect_mini': json.loads(rw[6]), 'phase_1_only': rw[7], 'v_init': rw[8],'gravity_factor': rw[9], 'v0z': rw[10], 'v_avg': rw[11], - 'rbeg_jd': rw[12], 'rend_id': rw[13], + 'rbeg_jd': rw[12], 'rend_jd': rw[13], 'rbeg_lat': rw[14], 'rbeg_lon': rw[15], 'rbeg_ele': rw[16], 'rend_lat': rw[17], 'rend_lon': rw[18], 'rend_ele': rw[19] } @@ -407,7 +409,8 @@ def removeDeletedTrajectories(self, jdt_start, jdt_end=None, failed=False, verbo cur.close() for rw in rows: if not os.path.isfile(rw[2]): - log.info(f'removing traj {rw[0]} from database') + if verbose: + log.info(f'removing traj {jd2Date(rw[0], dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} from database') self.removeTrajectory(DummyTrajReduced(rw[0], rw[1], rw[2]), keepFolder=True) return diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index bab25a81..aef4627e 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -203,7 +203,8 @@ def load(self, verbose=False): # Overwrite the database path with the saved one self.db_file_path = db_file_path_saved - if db_is_ok: + # if the trajectories attribute is not present, then the database has been converted to sqlite + if db_is_ok and hasattr(self, 'trajectories'): # Convert trajectories from JSON to TrajectoryReduced objects for traj_dict_str in ["trajectories", "failed_trajectories"]: traj_dict = getattr(self, traj_dict_str) @@ -236,8 +237,10 @@ def save(self): self2 = copy.deepcopy(self) # Convert reduced trajectory objects to JSON objects - self2.trajectories = {key: self.trajectories[key].__dict__ for key in self.trajectories} - self2.failed_trajectories = {key: self.failed_trajectories[key].__dict__ + if hasattr(self2,'trajectories'): + self2.trajectories = {key: self.trajectories[key].__dict__ for key in self.trajectories} + if hasattr(self2, 'failed_trajectories'): + self2.failed_trajectories = {key: self.failed_trajectories[key].__dict__ for key in self.failed_trajectories} if hasattr(self2, 'phase1Trajectories'): delattr(self2, 'phase1Trajectories') From fd228a93ba7d5ac2d9bec1b111c7dc7d80749945 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 28 Jan 2026 22:26:29 +0000 Subject: [PATCH 031/132] updates to handle trajectory sqlite db better --- wmpl/Trajectory/CorrelateDB.py | 33 ++++++++++++++++++---- wmpl/Trajectory/CorrelateRMS.py | 50 ++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 41ddda51..6c7358c4 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -303,19 +303,20 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=Tru log.info(f'adding {traj_reduced.traj_id} with jdt {traj_reduced.jdt_ref}') cur = self.dbhandle.cursor() if failed: + traj_id = 'None' if not hasattr(traj_reduced, 'traj_id') or traj_reduced.traj_id is None else traj_reduced.traj_id v_init = 0 if traj_reduced.v_init is None else traj_reduced.v_init radiant_eci_mini = [0,0,0] if traj_reduced.radiant_eci_mini is None else traj_reduced.radiant_eci_mini state_vect_mini = [0,0,0] if traj_reduced.state_vect_mini is None else traj_reduced.state_vect_mini cur.execute(f'insert or replace into failed_trajectories values (' - f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," + f"{traj_reduced.jdt_ref}, '{traj_id}', '{traj_reduced.traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," f"'{json.dumps(traj_reduced.ignored_stations)}'," f"'{json.dumps(radiant_eci_mini)}'," f"'{json.dumps(state_vect_mini)}'," f"0,{v_init},{traj_reduced.gravity_factor},1)") else: - cur.execute(f'insert or replace into trajectories values (' + sql_str = (f'insert or replace into trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," f"'{json.dumps(traj_reduced.ignored_stations)}'," @@ -326,7 +327,8 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=Tru f"{traj_reduced.rbeg_jd},{traj_reduced.rend_jd}," f"{traj_reduced.rbeg_lat},{traj_reduced.rbeg_lon},{traj_reduced.rbeg_ele}," f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") - + sql_str = sql_str.replace('nan','"NaN"') + cur.execute(sql_str) if commitnow: self.dbhandle.commit() @@ -407,11 +409,13 @@ def removeDeletedTrajectories(self, jdt_start, jdt_end=None, failed=False, verbo res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") rows = res.fetchall() cur.close() - for rw in rows: + i = 0 # initial value in case there are zero rows + for i, rw in enumerate(rows): if not os.path.isfile(rw[2]): if verbose: log.info(f'removing traj {jd2Date(rw[0], dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} from database') self.removeTrajectory(DummyTrajReduced(rw[0], rw[1], rw[2]), keepFolder=True) + log.info(f'removed {i} deleted trajectories') return @@ -448,7 +452,7 @@ def moveJsonRecords(self, trajectories, failed_trajectories): for jdt_ref in keylist: self.addTrajectory(trajectories[jdt_ref]) i += 1 - if not i % 100000: + if not i % 10000: log.info(f'moved {i} trajectories') log.info(f'done - moved {i} trajectories') log.info('-----------------------------') @@ -456,7 +460,7 @@ def moveJsonRecords(self, trajectories, failed_trajectories): for jdt_ref in keylist: self.addTrajectory(failed_trajectories[jdt_ref], failed=True) i += 1 - if not i % 100000: + if not i % 10000: log.info(f'moved {i} failed_trajectories') self.commitTrajDatabase() log.info(f'done - moved {i} failed_trajectories') @@ -464,6 +468,23 @@ def moveJsonRecords(self, trajectories, failed_trajectories): return + def moveFailedTrajectories(self, failed_trajectories, dt_range): + jd_beg = datetime2JD(dt_range[0]) + jd_end = datetime2JD(dt_range[1]) + log.info('moving trajectories to sqlite - this may take some time....') + keylist = [k for k in failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] + i = 0 # just in case there aren't any trajectories to move + for i,jdt_ref in enumerate(keylist): + self.addTrajectory(failed_trajectories[jdt_ref], failed=True, commitnow=False) + i += 1 + if not i % 10000: + self.commitTrajDatabase() + log.info(f'moved {i} failed_trajectories') + self.commitTrajDatabase() + log.info(f'done - moved {i} failed_trajectories') + + return + def mergeTrajDatabase(self, source_db_path): # merge in records from another observation database, for example from a remote node diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index aef4627e..b09095cf 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -335,6 +335,20 @@ def removeTrajectory(self, traj_reduced, keepFolder=False): log.info(f'unable to remove {traj_dir}') + def removeTrajectories(self, dt_range, failed=False): + jd_beg = datetime2JD(dt_range[0]) + jd_end = datetime2JD(dt_range[1]) + if not failed: + keys = [k for k in self.trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] + for jdt in keys: + del self.trajectories[jdt] + else: + keys = [k for k in self.failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] + for jdt in keys: + del self.failed_trajectories[jdt] + log.info(f'deleted {len(keys)} keys from {"failed_trajectories" if failed else "trajectories"}') + return len(keys) + class MeteorPointRMS(object): def __init__(self, frame, time_rel, x, y, ra, dec, azim, alt, mag): @@ -550,15 +564,10 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) self.old_db = DatabaseJSON(database_path, verbose=self.verbose) - self.db = TrajectoryDatabase(db_dir) - # move any legacy paired obs data into sqlite - if hasattr(self.old_db, 'trajectories'): - self.db.moveJsonRecords(self.old_db.trajectories, self.old_db.failed_trajectories) - del self.old_db.trajectories - del self.old_db.failed_trajectories - self.saveDatabase() + self.db = TrajectoryDatabase(db_dir) self.observations_db = ObservationDatabase(db_dir) + # move any legacy paired obs data into sqlite if hasattr(self.old_db, 'paired_obs'): self.observations_db.moveJsonRecords(self.old_db.paired_obs) @@ -1053,13 +1062,13 @@ def removeDeletedTrajectories(self): return - def loadComputedTrajectories(self, traj_dir_path, dt_range=None): + def loadComputedTrajectories(self, dt_range=None): """ Load already estimated trajectories from disk within a date range. Arguments: - traj_dir_path: [str] Full path to a directory with trajectory pickles. + dt_range: [datetime, datetime] range of dates to load data for """ - + traj_dir_path = os.path.join(self.output_dir, OUTPUT_TRAJ_DIR) # defend against the case where there are no existing trajectories and traj_dir_path doesn't exist if not os.path.isdir(traj_dir_path): return @@ -1072,7 +1081,7 @@ def loadComputedTrajectories(self, traj_dir_path, dt_range=None): else: dt_beg, dt_end = dt_range - log.info(" Loading trajectories from: " + traj_dir_path) + log.info(" Loading found trajectories from: " + traj_dir_path) if self.dt_range is not None: log.info(" Datetime range: {:s} - {:s}".format( dt_beg.strftime("%Y-%m-%d %H:%M:%S"), @@ -1097,17 +1106,17 @@ def loadComputedTrajectories(self, traj_dir_path, dt_range=None): curr_dt = jd2Date(jdt, dt_obj=True) if curr_dt.year != yyyy: yyyy = curr_dt.year - log.info("- year " + str(yyyy)) + #log.info("- year " + str(yyyy)) if curr_dt.month != mm: mm = curr_dt.month yyyymm = f'{yyyy}{mm:02d}' - log.info(" - month " + str(yyyymm)) + #log.info(" - month " + str(yyyymm)) if curr_dt.day != dd: dd = curr_dt.day yyyymmdd = f'{yyyy}{mm:02d}{dd:02d}' - log.info(" - day " + str(yyyymmdd)) + #log.info(" - day " + str(yyyymmdd)) yyyymmdd_dir_path = os.path.join(traj_dir_path, f'{yyyy}', f'{yyyymm}', f'{yyyymmdd}') @@ -1128,7 +1137,7 @@ def loadComputedTrajectories(self, traj_dir_path, dt_range=None): # Print every 1000th trajectory if counter % 1000 == 0: - log.info(f" Loaded {counter:6d} trajectories, currently on {file_name}") + log.info(f" Loaded {counter:6d} trajectories") counter += 1 dir_paths.append(full_traj_dir) @@ -1944,9 +1953,16 @@ def _breakHandler(signum, frame): dt_range=(bin_beg, bin_end)) log.info(f'loaded {len(dh.unpaired_observations)} observations') - # refresh list of calculated trajectories from disk + # remove any trajectories that no longer exist on disk dh.removeDeletedTrajectories() - dh.loadComputedTrajectories(os.path.join(dh.output_dir, OUTPUT_TRAJ_DIR), dt_range=[bin_beg, bin_end]) + # load computed trajectories from disk into sqlite + dh.loadComputedTrajectories(dt_range=(bin_beg, bin_end)) + # move any legacy failed traj into sqlite + if hasattr(dh.old_db, 'failed_trajectories'): + dh.db.moveFailedTrajectories(dh.old_db.failed_trajectories, (bin_beg, bin_end)) + if dh.old_db.removeTrajectories((bin_beg, bin_end), failed=True) > 0: + dh.saveDatabase() + # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) From b82689b78ebb8df3448a117910a1882533a3fc4b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 28 Jan 2026 23:39:13 +0000 Subject: [PATCH 032/132] dont load the traj db in MCMODE2 as we don't need it --- wmpl/Trajectory/CorrelateRMS.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index b09095cf..12af67ec 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1953,15 +1953,16 @@ def _breakHandler(signum, frame): dt_range=(bin_beg, bin_end)) log.info(f'loaded {len(dh.unpaired_observations)} observations') - # remove any trajectories that no longer exist on disk - dh.removeDeletedTrajectories() - # load computed trajectories from disk into sqlite - dh.loadComputedTrajectories(dt_range=(bin_beg, bin_end)) - # move any legacy failed traj into sqlite - if hasattr(dh.old_db, 'failed_trajectories'): - dh.db.moveFailedTrajectories(dh.old_db.failed_trajectories, (bin_beg, bin_end)) - if dh.old_db.removeTrajectories((bin_beg, bin_end), failed=True) > 0: - dh.saveDatabase() + if mcmode != MCMODE_PHASE2: + # remove any trajectories that no longer exist on disk + dh.removeDeletedTrajectories() + # load computed trajectories from disk into sqlite + dh.loadComputedTrajectories(dt_range=(bin_beg, bin_end)) + # move any legacy failed traj into sqlite + if hasattr(dh.old_db, 'failed_trajectories'): + dh.db.moveFailedTrajectories(dh.old_db.failed_trajectories, (bin_beg, bin_end)) + if dh.old_db.removeTrajectories((bin_beg, bin_end), failed=True) > 0: + dh.saveDatabase() # Run the trajectory correlator From 83311488b83c8af0cc2ead3f5cff90e460fb8711 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 28 Jan 2026 23:39:24 +0000 Subject: [PATCH 033/132] some support to analyse the traj db --- wmpl/Trajectory/CorrelateDB.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 6c7358c4..df187af4 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -566,6 +566,12 @@ def mergeTrajDatabase(self, source_db_path): print(f'and {len(cur.fetchall())} unpaired obs') obsdb.closeObsDatabase() elif dbname == 'trajectories': - print('hello') + trajdb = TrajectoryDatabase(cml_args.dir_path) + if action == 'read': + cur = trajdb.dbhandle.cursor() + cur.execute('select * from trajectories where status=1') + print(f'there are {len(cur.fetchall())} successful trajectories') + cur.execute('select * from failed_trajectories') + print(f'and {len(cur.fetchall())} failed trajectories') else: log.info('valid database not specified') From 62b45cd5ae93b925c8d1603767e16f05034423e3 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 00:26:23 +0000 Subject: [PATCH 034/132] improve logging --- wmpl/Trajectory/CorrelateDB.py | 2 +- wmpl/Trajectory/CorrelateEngine.py | 22 ++++++++++------------ wmpl/Trajectory/CorrelateRMS.py | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index df187af4..3f1122a2 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -300,7 +300,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=Tru # add or update an entry in the database, setting status = 1 if verbose: - log.info(f'adding {traj_reduced.traj_id} with jdt {traj_reduced.jdt_ref}') + log.info(f'adding {traj_reduced.traj_id} with jdt {traj_reduced.jdt_ref} to {"failed" if failed else "traj"}') cur = self.dbhandle.cursor() if failed: traj_id = 'None' if not hasattr(traj_reduced, 'traj_id') or traj_reduced.traj_id is None else traj_reduced.traj_id diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 87e83e0e..48b2150c 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -887,12 +887,11 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or if skip_trajectory: # Add the trajectory to the list of failed trajectories - self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref) + self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=True) log.info(f"Trajectory at {jdt_ref} skipped and added to fails!") if matched_obs: for _, met_obs_temp, _ in matched_obs: - log.info(f'Marking {met_obs_temp.id} unpaired') - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -904,10 +903,9 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info("2 station only solution, one station has an error above the maximum limit, skipping!") # Add the trajectory to the list of failed trajectories - self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref) - for _, met_obs_temp, _ in matched_obs: - log.info(f'Marking {met_obs_temp.id} unpaired') - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) + self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=True) + for _, met_obs_temp, _ in traj_status.observations: + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) return False @@ -1007,7 +1005,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories if mcmode != MCMODE_PHASE2: - self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref) + self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=True) log.info('Trajectory failed to solve') self.dh.cleanupPhase2TempPickle(save_traj) return False @@ -1351,7 +1349,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): else: for met_obs_temp, _ in candidate_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) log.info("New trajectory solution failed, keeping the old trajectory...") ### ### @@ -1768,10 +1766,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): if t0 != 0.0: failed_traj.jdt_ref = failed_traj.jdt_ref + t0/86400.0 - self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref) + self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=True) for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) log.info("Trajectory skipped and added to fails!") continue @@ -1837,7 +1835,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 12af67ec..e35fb220 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1362,7 +1362,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None): if failed_jdt_ref is not None: traj_reduced.jdt_ref = failed_jdt_ref - self.db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None)) + self.db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=True) @@ -1450,7 +1450,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire if self.checkTrajIfFailed(traj): log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: - self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id) + self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) remaining_unpaired -= 1 else: candidate_trajectories.append(cand) From 02b6c0eaa4bab0f15bf4c813c53bf8f903474598 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 00:33:11 +0000 Subject: [PATCH 035/132] handle verbose flags better --- wmpl/Trajectory/CorrelateRMS.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index e35fb220..ad79ea1d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1341,7 +1341,7 @@ def saveTrajectoryResults(self, traj, save_plots): pass traj.save_results = False - def addTrajectory(self, traj, failed_jdt_ref=None): + def addTrajectory(self, traj, failed_jdt_ref=None, verbose=False): """ Add the resulting trajectory to the database. Arguments: @@ -1362,7 +1362,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None): if failed_jdt_ref is not None: traj_reduced.jdt_ref = failed_jdt_ref - self.db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=True) + self.db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=verbose) From 13e6836d588e56dad45f6c44d938b74b1efce910 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 01:05:21 +0000 Subject: [PATCH 036/132] more debug --- wmpl/Trajectory/CorrelateDB.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 3f1122a2..f6fb3210 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -308,13 +308,16 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=Tru radiant_eci_mini = [0,0,0] if traj_reduced.radiant_eci_mini is None else traj_reduced.radiant_eci_mini state_vect_mini = [0,0,0] if traj_reduced.state_vect_mini is None else traj_reduced.state_vect_mini - cur.execute(f'insert or replace into failed_trajectories values (' + sql_str = (f'insert or replace into failed_trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_id}', '{traj_reduced.traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," f"'{json.dumps(traj_reduced.ignored_stations)}'," f"'{json.dumps(radiant_eci_mini)}'," f"'{json.dumps(state_vect_mini)}'," f"0,{v_init},{traj_reduced.gravity_factor},1)") + if verbose: + log.info(sql_str) + cur.execute(sql_str) else: sql_str = (f'insert or replace into trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," From 9ec2eaa44c488105a4bb03b7e3337b7d9eee2de0 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 01:13:38 +0000 Subject: [PATCH 037/132] remove commitnow as its causing issues --- wmpl/Trajectory/CorrelateDB.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index f6fb3210..0efd53fa 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -69,7 +69,7 @@ def checkObsPaired(self, station_code, obs_id): return paired - def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=False): + def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): # add or update an entry in the database, setting status = 1 cur = self.dbhandle.cursor() res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") @@ -84,8 +84,6 @@ def addPairedObs(self, station_code, obs_id, obs_date, commitnow=True, verbose=F cur.execute(sqlstr) cur.close() - if commitnow: - self.dbhandle.commit() if not self.checkObsPaired(station_code, obs_id): log.warning(f'failed to add {obs_id} to paired_obs table') return False @@ -152,7 +150,7 @@ def moveJsonRecords(self, paired_obs): obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') except Exception: obs_date = datetime.datetime(2000,1,1,0,0,0) - self.addPairedObs(stat_id, obs_id, obs_date, commitnow=False) + self.addPairedObs(stat_id, obs_id, obs_date) i += 1 if not i % 100000: log.info(f'moved {i} observations') @@ -296,7 +294,7 @@ def checkTrajIfFailed(self, traj_reduced, verbose=False): return False - def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=True): + def addTrajectory(self, traj_reduced, failed=False, verbose=False): # add or update an entry in the database, setting status = 1 if verbose: @@ -332,9 +330,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False, commitnow=Tru f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") sql_str = sql_str.replace('nan','"NaN"') cur.execute(sql_str) - if commitnow: - self.dbhandle.commit() - + self.dbhandle.commit() cur.close() return True @@ -478,7 +474,7 @@ def moveFailedTrajectories(self, failed_trajectories, dt_range): keylist = [k for k in failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] i = 0 # just in case there aren't any trajectories to move for i,jdt_ref in enumerate(keylist): - self.addTrajectory(failed_trajectories[jdt_ref], failed=True, commitnow=False) + self.addTrajectory(failed_trajectories[jdt_ref], failed=True) i += 1 if not i % 10000: self.commitTrajDatabase() From 7a8d4e36076b10eb3019e919bcfd762cc82d361d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 01:50:13 +0000 Subject: [PATCH 038/132] debug --- wmpl/Trajectory/CorrelateDB.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 0efd53fa..9c94fe56 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -316,6 +316,8 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): if verbose: log.info(sql_str) cur.execute(sql_str) + cur.execute(f"select * from failed_trajectories where jdt_ref = '{traj_reduced.jdt_ref}") + print(cur.fetchall()) else: sql_str = (f'insert or replace into trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," From 36a9f0b33281359efd7bd4c2ed42b626798dd9dd Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 01:51:44 +0000 Subject: [PATCH 039/132] debug --- wmpl/Trajectory/CorrelateDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 9c94fe56..a8953f69 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -316,7 +316,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): if verbose: log.info(sql_str) cur.execute(sql_str) - cur.execute(f"select * from failed_trajectories where jdt_ref = '{traj_reduced.jdt_ref}") + cur.execute(f"select * from failed_trajectories where jdt_ref = {traj_reduced.jdt_ref}) print(cur.fetchall()) else: sql_str = (f'insert or replace into trajectories values (' From ebf54735d51c88f7485e8ff324b4f219837d0327 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 01:52:21 +0000 Subject: [PATCH 040/132] typo --- wmpl/Trajectory/CorrelateDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index a8953f69..f79de020 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -316,7 +316,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): if verbose: log.info(sql_str) cur.execute(sql_str) - cur.execute(f"select * from failed_trajectories where jdt_ref = {traj_reduced.jdt_ref}) + cur.execute(f"select * from failed_trajectories where jdt_ref = {traj_reduced.jdt_ref}") print(cur.fetchall()) else: sql_str = (f'insert or replace into trajectories values (' From 7e884680339a441a85fd2e6fe37d8a63120a0f4a Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 15:05:39 +0000 Subject: [PATCH 041/132] more doc and debug to track down issue --- wmpl/Trajectory/CorrelateDB.py | 40 ++++++++++++++++----------------- wmpl/Trajectory/CorrelateRMS.py | 3 ++- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index f79de020..b5f7bcd5 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -273,41 +273,37 @@ def closeTrajDatabase(self): def checkTrajIfFailed(self, traj_reduced, verbose=False): # return True if there is an observation with the same jdt_ref and matching list of stations - if not hasattr(traj_reduced, 'jdt_ref'): + if not hasattr(traj_reduced, 'jdt_ref') or not hasattr(traj_reduced, 'participating_stations') or not hasattr(traj_reduced, 'ignored_stations'): return False + found = False station_list = list(set(traj_reduced.participating_stations + traj_reduced.ignored_stations)) cur = self.dbhandle.cursor() - res = cur.execute(f"SELECT traj_id FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref} and status=1") - if res.fetchone() is None: - cur.close() - return False + res = cur.execute(f"SELECT traj_id,participating_stations, ignored_stations FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref} and status=1") + row = res.fetchone() + if row is None: + found = False else: - res = cur.execute(f"SELECT participating_stations, ignored_stations FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref}") - row = res.fetchone() - traj_stations = list(set(json.loads(row[0]) + json.loads(row[1]))) - if traj_stations == station_list: - cur.close() - return True - else: - cur.close() - return False - + traj_stations = list(set(json.loads(row[1]) + json.loads(row[2]))) + found = True if (traj_stations == station_list) else False + cur.close() + return found def addTrajectory(self, traj_reduced, failed=False, verbose=False): # add or update an entry in the database, setting status = 1 if verbose: - log.info(f'adding {traj_reduced.traj_id} with jdt {traj_reduced.jdt_ref} to {"failed" if failed else "traj"}') + log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') cur = self.dbhandle.cursor() if failed: traj_id = 'None' if not hasattr(traj_reduced, 'traj_id') or traj_reduced.traj_id is None else traj_reduced.traj_id v_init = 0 if traj_reduced.v_init is None else traj_reduced.v_init radiant_eci_mini = [0,0,0] if traj_reduced.radiant_eci_mini is None else traj_reduced.radiant_eci_mini state_vect_mini = [0,0,0] if traj_reduced.state_vect_mini is None else traj_reduced.state_vect_mini + traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] sql_str = (f'insert or replace into failed_trajectories values (' - f"{traj_reduced.jdt_ref}, '{traj_id}', '{traj_reduced.traj_file_path}'," + f"{traj_reduced.jdt_ref}, '{traj_id}', '{traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," f"'{json.dumps(traj_reduced.ignored_stations)}'," f"'{json.dumps(radiant_eci_mini)}'," @@ -318,9 +314,12 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): cur.execute(sql_str) cur.execute(f"select * from failed_trajectories where jdt_ref = {traj_reduced.jdt_ref}") print(cur.fetchall()) + cur.execute("select count(jdt_ref) from failed_trajectories") + print('there are', cur.fetchall(),'fails') else: + traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] sql_str = (f'insert or replace into trajectories values (' - f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_reduced.traj_file_path}'," + f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," f"'{json.dumps(traj_reduced.ignored_stations)}'," f"'{json.dumps(traj_reduced.radiant_eci_mini)}'," @@ -336,7 +335,6 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): cur.close() return True - def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose=False): # if an entry exists, update the status to 0. # this allows us to mark an observation paired, then unpair it later if the solution fails @@ -364,7 +362,7 @@ def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose return True - def getTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): + def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, verbose=False): table_name = 'failed_trajectories' if failed else 'trajectories' if verbose: @@ -380,7 +378,7 @@ def getTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): cur.close() trajs = [] for rw in rows: - json_dict = {'jdt_ref':rw[0], 'traj_id':rw[1], 'traj_file_path':rw[2], + json_dict = {'jdt_ref':rw[0], 'traj_id':rw[1], 'traj_file_path':os.path.join(output_dir, rw[2]), 'participating_stations': json.loads(rw[3]), 'ignored_stations': json.loads(rw[4]), 'radiant_eci_mini': json.loads(rw[5]), diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index ad79ea1d..3335aa51 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -671,6 +671,7 @@ def __init__(self, station, obs_id): self.station_code = station self.id = obs_id + # TODO FIX ME I AM BROKEN BECAUSE OF NOO DATABASE archdate = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=older_than) archdate_jd = datetime2JD(archdate) @@ -1150,7 +1151,7 @@ def loadComputedTrajectories(self, dt_range=None): def getComputedTrajectories(self, jd_beg, jd_end): """ Returns a list of computed trajectories between the Julian dates. """ - json_dicts = self.db.getTrajectories(jd_beg, jd_end) + json_dicts = self.db.getTrajectories(self.output_dir, jd_beg, jd_end) trajs = [TrajectoryReduced(None, json_dict=j) for j in json_dicts] return trajs From 2eaeb91c23b5ef36054b9ff65943130aedce28af Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 15:16:38 +0000 Subject: [PATCH 042/132] fix bug in fails table --- wmpl/Trajectory/CorrelateDB.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index b5f7bcd5..432d8ac6 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -239,9 +239,10 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal res = cur.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") if res.fetchone() is None: + # note: traj_id not unique here as some fails will have traj-id None cur.execute("""CREATE TABLE failed_trajectories( jdt_ref REAL UNIQUE, - traj_id VARCHAR UNIQUE, + traj_id VARCHAR, traj_file_path VARCHAR, participating_stations VARCHAR, ignored_stations VARCHAR, From ad4eec4f354ae9694b057e82f3d6db038c21bc86 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 15:24:36 +0000 Subject: [PATCH 043/132] bugfix --- wmpl/Trajectory/CorrelateEngine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 48b2150c..5cd448a6 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -904,7 +904,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=True) - for _, met_obs_temp, _ in traj_status.observations: + for _, met_obs_temp, _ in matched_obs: self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) return False From d27a13b0a95fcd127742cf3240adc26daa14241d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 15:25:16 +0000 Subject: [PATCH 044/132] simplify code --- wmpl/Trajectory/CorrelateDB.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 432d8ac6..dc493c30 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -296,12 +296,15 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): if verbose: log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') cur = self.dbhandle.cursor() + # remove the output_dir part from the path so that the data are location-independent + traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] + if failed: + # fixup possible bad values traj_id = 'None' if not hasattr(traj_reduced, 'traj_id') or traj_reduced.traj_id is None else traj_reduced.traj_id v_init = 0 if traj_reduced.v_init is None else traj_reduced.v_init radiant_eci_mini = [0,0,0] if traj_reduced.radiant_eci_mini is None else traj_reduced.radiant_eci_mini state_vect_mini = [0,0,0] if traj_reduced.state_vect_mini is None else traj_reduced.state_vect_mini - traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] sql_str = (f'insert or replace into failed_trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_id}', '{traj_file_path}'," @@ -310,15 +313,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): f"'{json.dumps(radiant_eci_mini)}'," f"'{json.dumps(state_vect_mini)}'," f"0,{v_init},{traj_reduced.gravity_factor},1)") - if verbose: - log.info(sql_str) - cur.execute(sql_str) - cur.execute(f"select * from failed_trajectories where jdt_ref = {traj_reduced.jdt_ref}") - print(cur.fetchall()) - cur.execute("select count(jdt_ref) from failed_trajectories") - print('there are', cur.fetchall(),'fails') else: - traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] sql_str = (f'insert or replace into trajectories values (' f"{traj_reduced.jdt_ref}, '{traj_reduced.traj_id}', '{traj_file_path}'," f"'{json.dumps(traj_reduced.participating_stations)}'," @@ -330,8 +325,9 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): f"{traj_reduced.rbeg_jd},{traj_reduced.rend_jd}," f"{traj_reduced.rbeg_lat},{traj_reduced.rbeg_lon},{traj_reduced.rbeg_ele}," f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") - sql_str = sql_str.replace('nan','"NaN"') - cur.execute(sql_str) + + sql_str = sql_str.replace('nan','"NaN"') + cur.execute(sql_str) self.dbhandle.commit() cur.close() return True From 6c28d9d7b06d64b0a3749e45ef503750210c5821 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 15:37:15 +0000 Subject: [PATCH 045/132] bugfixes in correlateDB --- wmpl/Trajectory/CorrelateDB.py | 36 ++++++++++++++-------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index dc493c30..059106d8 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -36,8 +36,8 @@ def openObsDatabase(self, db_path, db_name='observations.db', purge_records=Fals cur = con.cursor() if purge_records: cur.execute('drop table paired_obs') - res = cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") - if res.fetchone() is None: + cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") + if cur.fetchone() is None: cur.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") con.commit() cur.close() @@ -62,8 +62,8 @@ def checkObsPaired(self, station_code, obs_id): paired = True cur = self.dbhandle.cursor() - res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}' and status=1") - if res.fetchone() is None: + cur.execute(f"SELECT obs_id FROM paired_obs WHERE obs_id='{obs_id}' and status=1") + if cur.fetchone() is None: paired = False cur.close() return paired @@ -71,16 +71,10 @@ def checkObsPaired(self, station_code, obs_id): def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): # add or update an entry in the database, setting status = 1 + if verbose: + log.info(f'adding {obs_id} to paired_obs table') cur = self.dbhandle.cursor() - res = cur.execute(f"SELECT obs_id FROM paired_obs WHERE station_code='{station_code}' and obs_id='{obs_id}'") - if res.fetchone() is None: - if verbose: - log.info(f'adding {obs_id} to paired_obs table') - sqlstr = f"insert into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" - else: - if verbose: - log.info(f'updating {obs_id} in paired_obs table') - sqlstr = f"update paired_obs set status=1 where station_code='{station_code}' and obs_id='{obs_id}'" + sqlstr = f"insert or replace into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" cur.execute(sqlstr) cur.close() @@ -367,11 +361,11 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver cur = self.dbhandle.cursor() if not jdt_end: - res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") - rows = res.fetchall() + cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + rows = cur.fetchall() else: - res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") - rows = res.fetchall() + cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + rows = cur.fetchall() cur.close() trajs = [] for rw in rows: @@ -399,11 +393,11 @@ def removeDeletedTrajectories(self, jdt_start, jdt_end=None, failed=False, verbo cur = self.dbhandle.cursor() if not jdt_end: - res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") - rows = res.fetchall() + cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + rows = cur.fetchall() else: - res = cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") - rows = res.fetchall() + cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + rows = cur.fetchall() cur.close() i = 0 # initial value in case there are zero rows for i, rw in enumerate(rows): From 863d7e2f2b77aefb7b0c6c65222c1e5b54e4ba4c Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 16:33:41 +0000 Subject: [PATCH 046/132] avoid removing trajectories incorrectly --- wmpl/Trajectory/CorrelateDB.py | 4 ++-- wmpl/Trajectory/CorrelateRMS.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 059106d8..91418313 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -385,7 +385,7 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver return trajs - def removeDeletedTrajectories(self, jdt_start, jdt_end=None, failed=False, verbose=False): + def removeDeletedTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, verbose=False): table_name = 'failed_trajectories' if failed else 'trajectories' if verbose: @@ -401,7 +401,7 @@ def removeDeletedTrajectories(self, jdt_start, jdt_end=None, failed=False, verbo cur.close() i = 0 # initial value in case there are zero rows for i, rw in enumerate(rows): - if not os.path.isfile(rw[2]): + if not os.path.isfile(os.path.join(output_dir, rw[2])): if verbose: log.info(f'removing traj {jd2Date(rw[0], dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} from database') self.removeTrajectory(DummyTrajReduced(rw[0], rw[1], rw[2]), keepFolder=True) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 3335aa51..14aff2a0 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1058,7 +1058,7 @@ def removeDeletedTrajectories(self): jdt_start = datetime2JD(self.dt_range[0]) jdt_end = datetime2JD(self.dt_range[1]) - self.db.removeDeletedTrajectories(jdt_start, jdt_end) + self.db.removeDeletedTrajectories(self.output_dir, jdt_start, jdt_end) return From 50eb9dfc49f7b50cdb29030eadf0d1dbdd5d8a20 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 29 Jan 2026 16:44:21 +0000 Subject: [PATCH 047/132] ting bug in reporting of removed traj --- wmpl/Trajectory/CorrelateDB.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 91418313..f8573d4c 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -399,12 +399,13 @@ def removeDeletedTrajectories(self, output_dir, jdt_start, jdt_end=None, failed= cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") rows = cur.fetchall() cur.close() - i = 0 # initial value in case there are zero rows - for i, rw in enumerate(rows): + i = 0 + for rw in rows: if not os.path.isfile(os.path.join(output_dir, rw[2])): if verbose: log.info(f'removing traj {jd2Date(rw[0], dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} from database') self.removeTrajectory(DummyTrajReduced(rw[0], rw[1], rw[2]), keepFolder=True) + i += 1 log.info(f'removed {i} deleted trajectories') return From 36a880bbb6dfb04923d9ee284aeeceb03f64e2cb Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 21:28:52 +0000 Subject: [PATCH 048/132] Client-mode remote data processing --- wmpl/Trajectory/CorrelateDB.py | 2 + wmpl/Trajectory/CorrelateEngine.py | 33 +--- wmpl/Trajectory/CorrelateRMS.py | 158 +++++++++++---- wmpl/Utils/remoteDataHandling.py | 304 +++++++++++++++++------------ 4 files changed, 309 insertions(+), 188 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index f8573d4c..64351660 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -54,6 +54,7 @@ def closeObsDatabase(self): self.dbhandle.commit() self.dbhandle.close() + self.dbhandle = None return @@ -262,6 +263,7 @@ def closeTrajDatabase(self): self.dbhandle.commit() self.dbhandle.close() + self.dbhandle = None return diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 5cd448a6..1bf956b1 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -8,8 +8,6 @@ import multiprocessing import logging import os -import glob -from random import randrange import platform import numpy as np @@ -20,14 +18,15 @@ from wmpl.Utils.ShowerAssociation import associateShowerTraj from wmpl.Utils.TrajConversions import J2000_JD, geo2Cartesian, cartesian2Geo, raDec2AltAz, altAz2RADec, \ raDec2ECI, datetime2JD, jd2Date, equatorialCoordPrecession_vect -from wmpl.Utils.Pickling import loadPickle, savePickle +from wmpl.Utils.Pickling import loadPickle MCMODE_NONE = 0 MCMODE_PHASE1 = 1 MCMODE_PHASE2 = 2 MCMODE_CANDS = 4 -MCMODE_ALL = MCMODE_CANDS + MCMODE_PHASE1 + MCMODE_PHASE2 MCMODE_SIMPLE = MCMODE_CANDS + MCMODE_PHASE1 +MCMODE_BOTH = MCMODE_PHASE1 + MCMODE_PHASE2 +MCMODE_ALL = MCMODE_CANDS + MCMODE_PHASE1 + MCMODE_PHASE2 # Grab the logger from the main thread @@ -1597,31 +1596,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): log.info('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") - for matched_observations in candidate_trajectories: - # randomly select a node from the list of nodes then check that its actually listening - # and hasn't already received its max allocation. The master node gets anything left - while True: - curr_node = list(self.node_list.keys())[randrange(len(self.node_list.keys()))] - save_path = self.node_list[curr_node]['node_path'] - if curr_node == platform.uname()[1]: - break - listen_file = os.path.join(save_path, f'{curr_node}.listening') - if os.path.isfile(listen_file): - # if the folder already has enough candidates then use the master node - if len(glob.glob(os.path.join(save_path, '*.pickle'))) >= self.node_list[curr_node]['node_max']: - save_path = self.node_list[platform.uname()[1]]['node_path'] - break - - ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) - #log.debug(str(ref_dt).replace(" ", "_")) - picklename = str(ref_dt.timestamp()) + '.pickle' - savePickle(matched_observations, save_path, picklename) - - for curr_node in self.node_list.keys(): - save_path = self.node_list[curr_node]['node_path'] - log.info("-----------------------") - log.info(f'There are {len(glob.glob(os.path.join(save_path, "*.pickle")))} candidates for {curr_node}') - log.info("-----------------------") + self.dh.saveCandidates(candidate_trajectories) return else: log.info("-----------------------") diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 14aff2a0..53c9774a 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -19,6 +19,8 @@ import glob from dateutil.relativedelta import relativedelta import numpy as np +from random import randrange +import platform from wmpl.Formats.CAMS import loadFTPDetectInfo from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints @@ -30,7 +32,7 @@ from wmpl.Trajectory.CorrelateDB import ObservationDatabase, TrajectoryDatabase from wmpl.Trajectory.Trajectory import Trajectory -from wmpl.Trajectory.CorrelateEngine import MCMODE_CANDS, MCMODE_PHASE1, MCMODE_PHASE2, MCMODE_ALL +from wmpl.Trajectory.CorrelateEngine import MCMODE_CANDS, MCMODE_PHASE1, MCMODE_PHASE2, MCMODE_ALL, MCMODE_BOTH ### CONSTANTS ### @@ -349,6 +351,25 @@ def removeTrajectories(self, dt_range, failed=False): log.info(f'deleted {len(keys)} keys from {"failed_trajectories" if failed else "trajectories"}') return len(keys) + def archiveRecords(self, db_dir, arch_prefix, archdate_jd): + arch_db_path = os.path.join(db_dir, f'{arch_prefix}_{JSON_DB_NAME}') + archdb = DatabaseJSON(arch_db_path, verbose=self.verbose, archiveYM=arch_prefix) + log.info(f'Archiving db records to {arch_db_path}...') + + for traj in [t for t in self.old_db.trajectories if t < archdate_jd]: + if traj < archdate_jd: + archdb.addTrajectory(self.old_db.trajectories[traj], False) + del self.old_db.trajectories[traj] + + for traj in [t for t in self.old_db.failed_trajectories if t < archdate_jd]: + if traj < archdate_jd: + archdb.addTrajectory(self.old_db.failed_trajectories[traj], True) + del self.old_db.failed_trajectories[traj] + + archdb.save() + self.db.save() + + class MeteorPointRMS(object): def __init__(self, frame, time_rel, x, y, ra, dec, azim, alt, mag): @@ -495,7 +516,7 @@ def __init__(self, **entries): class RMSDataHandle(object): - def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode=MCMODE_ALL, max_trajs=1000, remotehost=None, verbose=False, archivemonths=3): + def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode=MCMODE_ALL, max_trajs=1000, verbose=False, archivemonths=3): """ Handles data interfacing between the trajectory correlator and RMS data files on disk. Arguments: @@ -538,28 +559,34 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # Create the output directory if it doesn't exist mkdirP(self.output_dir) + # Candidate directory, if running in create or load cands modes + self.candidate_dir = os.path.join(self.output_dir, 'candidates') + if not self.mc_mode & MCMODE_PHASE2: + mkdirP(os.path.join(self.candidate_dir, 'processed')) + # Phase 1 trajectory pickle directory needed to reload previous results. self.phase1_dir = os.path.join(self.output_dir, 'phase1') - - # create the directory for phase1 simple trajectories, if needed if self.mc_mode & MCMODE_PHASE1: mkdirP(os.path.join(self.phase1_dir, 'processed')) self.purgePhase1ProcessedData(os.path.join(self.phase1_dir, 'processed')) - self.remotehost = remotehost - self.verbose = verbose ############################ # Load database of processed folders database_path = os.path.join(self.db_dir, JSON_DB_NAME) + log.info("") + + # Initialise remote data handling, if the config file is present remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') - self.remotedatahandler = RemoteDataHandler(remote_cfg) - # move any remotely calculated pickles to their target locations - if os.path.isdir(os.path.join(self.output_dir, 'remoteuploads')): - self.remotedatahandler.moveRemoteData(self.output_dir) + if os.path.isfile(remote_cfg): + log.info('remote data management requested, initialising') + self.RemoteDatahandler = RemoteDataHandler(remote_cfg) + self.gatherRemoteData(mcmode, verbose=False) + else: + self.RemoteDatahandler = None if mcmode != MCMODE_PHASE2: log.info("Loading database: {:s}".format(database_path)) @@ -592,9 +619,6 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode log.info(" ... done!") else: - # retrieve pickles from a remote host, if configured - if self.remotehost is not None: - self.remotedatahandler.collectRemoteData(remotehost, max_trajs, self.phase1_dir) # reload the phase1 trajectories dt_beg, dt_end = self.loadPhase1Trajectories(max_trajs=max_trajs) @@ -671,28 +695,14 @@ def __init__(self, station, obs_id): self.station_code = station self.id = obs_id - # TODO FIX ME I AM BROKEN BECAUSE OF NOO DATABASE archdate = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=older_than) archdate_jd = datetime2JD(archdate) + arch_prefix = archdate.strftime("%Y%m") - self.observations_db.archiveObsDatabase(self.db_dir, archdate.strftime("%Y%m"), archdate_jd) - - arch_db_path = os.path.join(self.db_dir, f'{archdate.strftime("%Y%m")}_{JSON_DB_NAME}') - archdb = DatabaseJSON(arch_db_path, verbose=self.verbose, archiveYM=archdate.strftime("%Y%m")) - log.info(f'Archiving db records to {arch_db_path}...') - - for traj in [t for t in self.db.trajectories if t < archdate_jd]: - if traj < archdate_jd: - archdb.addTrajectory(self.db.trajectories[traj], False) - del self.db.trajectories[traj] - - for traj in [t for t in self.db.failed_trajectories if t < archdate_jd]: - if traj < archdate_jd: - archdb.addTrajectory(self.db.failed_trajectories[traj], True) - del self.db.failed_trajectories[traj] + self.observations_db.archiveObsDatabase(self.db_dir, arch_prefix, archdate_jd) + self.db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) + self.old_db.archiveRecords(self.db_dir, arch_prefix, archdate_jd) - archdb.save() - self.db.save() return def loadStations(self): @@ -1328,11 +1338,6 @@ def saveTrajectoryResults(self, traj, save_plots): # we're including additional observations we need to use the most recent version of the trajectory savePickle(traj, os.path.join(self.phase1_dir, 'processed'), traj.pre_mc_longname + '_trajectory.pickle') - if self.remotehost is not None: - log.info('saving to remote host') - self.remotedatahandler.uploadDataToRemote(remotehost, traj.file_name + '_trajectory.pickle', output_dir) - log.info(' ...done') - # Save the plots if save_plots: traj.save_results = True @@ -1591,8 +1596,78 @@ def _breakHandler(signum, frame): # Restore the signal functionality signal.signal(signal.SIGINT, original_signal) + + def distributeToChildren(self, verbose=False): + """ + In 'master' mode this distributes candidates or phase1 trajectories to the children + """ + return + + def uploadToMaster(self, verbose=False): + """ + In 'child' mode this sends solved data back to the master node + """ + # close the databases and upload the data to the master node + self.db.closeTrajDatabase() + self.observations_db.closeObsDatabase() + + self.RemoteDatahandler.uploadToMaster(self.output_dir, verbose=True) + + # truncate the tables here so they are clean for the next run + self.db = TrajectoryDatabase(self.db_dir, purge_records=True) + self.observations_db = ObservationDatabase(self.db_dir, purge_records=True) + return + + def gatherRemoteData(self, mcmode, verbose=False): + """ + In master mode this gathers data thats been uploaded by the children and relocates + it to the correct places. + In 'child' mode it downloads data from the master for local processing. + """ + if self.RemoteDatahandler.mode == 'master': + # TODO make this bit work properly + # move remotely processed data from upload folders to the correct locations on the master node + self.RemoteDatahandler.moveRemoteData(self.output_dir) + self.RemoteDatahandler.moveRemoteSimpleData(self.output_dir) + + else: + # collect candidates or phase1 solutions from the master node + if mcmode == MCMODE_PHASE1 or mcmode == MCMODE_BOTH: + self.RemoteDatahandler.collectRemoteData('candidates', self.output_dir) + elif mcmode == MCMODE_PHASE2: + self.RemoteDatahandler.collectRemoteData('phase1', self.output_dir) + return + + def saveCandidates(self, candidate_trajectories): + for matched_observations in candidate_trajectories: + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + picklename = str(ref_dt.timestamp()) + '.pickle' + + if self.RemoteDatahandler: + #TODO get candidate folder name here + # randomly select a node from the list of nodes then check that its actually listening + # and hasn't already received its max allocation. The master node gets anything left + while True: + curr_node = list(self.node_list.keys())[randrange(len(self.node_list.keys()))] + save_path = self.node_list[curr_node]['node_path'] + if curr_node == platform.uname()[1]: + break + listen_file = os.path.join(save_path, f'{curr_node}.listening') + if os.path.isfile(listen_file): + # if the folder already has enough candidates then use the master node + if len(glob.glob(os.path.join(save_path, '*.pickle'))) >= self.node_list[curr_node]['node_max']: + save_path = self.node_list[platform.uname()[1]]['node_path'] + break + else: + save_folder = self.candidate_dir + + savePickle(matched_observations, save_folder, picklename) + + log.info("-----------------------") + log.info(f'Saved {len(candidate_trajectories)} candidates') + log.info("-----------------------") + - @@ -1879,7 +1954,7 @@ def _breakHandler(signum, frame): dh = RMSDataHandle( cml_args.dir_path, dt_range=event_time_range, db_dir=cml_args.dbdir, output_dir=cml_args.outdir, - mcmode=mcmode, max_trajs=max_trajs, remotehost=remotehost, verbose=cml_args.verbose, archivemonths=cml_args.archiveoldrecords) + mcmode=mcmode, max_trajs=max_trajs, verbose=cml_args.verbose, archivemonths=cml_args.archiveoldrecords) # If there is nothing to process and we're in Candidate mode, stop if not dh.processing_list and (mcmode & MCMODE_CANDS): @@ -1982,6 +2057,13 @@ def _breakHandler(signum, frame): # Store the previous start time previous_start_time = copy.deepcopy(t1) + if dh.RemoteDatahandler: + if dh.RemoteDatahandler.mode == 'child': + dh.uploadToMaster(verbose=True) + else: + # dh.distributeForChildren(verbose=True) + pass + # Break after one loop if auto mode is not on if cml_args.auto is None: break diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index a0a7675f..6e13eec7 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -39,31 +39,44 @@ class RemoteDataHandler(): def __init__(self, cfg_file): self.initialised = False if not os.path.isfile(cfg_file): - log.warning(f'unable to find {cfg_file}, aborting remote processing') + log.warning(f'unable to find {cfg_file}, not enabling remote processing') return + self.nodenames = None + self.nodes = None + self.capacity = None + + self.host = None + self.user = None + self.key = None + + self.ssh_client = None + self.sftp_client = None + cfg = ConfigParser() cfg.read(cfg_file) - self.mode = cfg['mode']['mode'] + self.mode = cfg['mode']['mode'].lower() if self.mode not in ['master', 'child']: - log.warning('remote cfg: mode must be master or child, aborting remote processing') + log.warning('remote cfg: mode must be master or child, not enabling remote processing') return if self.mode == 'master': - if 'children' not in cfg.sections() or 'capacity' not in cfg.sections(): - log.warning('remote cfg: capacity or children sections missing, aborting remote processing') + if 'children' not in cfg.sections(): + log.warning('remote cfg: children section missing, not enabling remote processing') return - self.nodes = [k for k in cfg['children'].values()] - self.capacity = [int(k) for k in cfg['capacity'].values()] - if len(self.nodes) != len(self.capacity): - log.warning('remote cfg: capacity and children not same length, aborting remote processing') - return + self.nodenames = [k for k in cfg['children'].keys()] + self.nodes = [k.split(',') for k in cfg['children'].values()] + for i in range(len(self.nodes)): + if len(self.nodes[i]) < 3: + print(f'disabling node {self.nodenames[i]} due to missing config') + while len(self.nodes[i]) < 3: + self.nodes[i].append(0) else: - if 'key' not in cfg['sftp'] or 'host' not in cfg['sftp'] or 'user' not in cfg['sftp']: - log.warning('remote cfg: child user, key or host missing, aborting remote processing') + if 'sftp' not in cfg.sections() or 'key' not in cfg['sftp'] or 'host' not in cfg['sftp'] or 'user' not in cfg['sftp']: + log.warning('remote cfg: sftp user, key or host missing, not enabling remote processing') return - self.remotehost = cfg['sftp']['host'] + self.host = cfg['sftp']['host'] self.user = cfg['sftp']['user'] self.key = os.path.normpath(os.path.expanduser(cfg['sftp']['key'])) if 'port' not in cfg['sftp']: @@ -72,71 +85,51 @@ def __init__(self, cfg_file): self.port = int(cfg['sftp']['port']) self.initialised = True - self.ssh_client = None - self.sftp_client = None return - def getSFTPConnection(self): - if not self.initialised: - return False - log.info(f'Connecting to {self.host}:{self.port} as {self.user}....') + ######################################################## + # functions used by the client nodes - if not os.path.isfile(os.path.expanduser(self.key)): - log.warning(f'ssh keyfile {self.key} missing') - return False - - self.ssh_client = paramiko.SSHClient() - self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - pkey = paramiko.RSAKey.from_private_key_file(self.key) - try: - self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, pkey=pkey, look_for_keys=False) - self.ftp_client = self.ssh_client.open_sftp() - return True - - except Exception as e: - - log.warning('sftp connection to remote host failed') - log.warning(e) - self.ssh_client.close() - return False - - def closeSFTPConnection(self): - if self.sftp_client: - self.sftp_client.close() - if self.ssh_client: - self.ssh_client.close() - return - - def getRemoteCandidates(self): - return - - - def collectRemotePhase1(self, max_trajs, output_dir): + def collectRemoteData(self, datatype, output_dir, verbose=False): """ Collect trajectory or candidate pickles from a remote server for local processing - NB: do NOT use os.path.join here, as it will break on Windows + + parameters: + datatype = 'candidates' or 'phase1' + output_dir = folder to put the pickles into generally dh.output_dir """ if not self.initialised or not self.getSFTPConnection(): return + + for pth in ['files', 'files/candidates', 'files/phase1', 'files/trajectories', + 'files/candidates/processed','files/phase1/processed']: + try: + self.sftp_client.mkdir(pth) + except Exception: + pass try: - files = self.ftp_client.listdir('phase1') + rem_dir = f'files/{datatype}' + files = self.sftp_client.listdir(rem_dir) files = [f for f in files if '.pickle' in f and 'processing' not in f] - files = files[:max_trajs] - if len(files) == 0: log.info('no data available at this time') self.closeSFTPConnection() return for trajfile in files: - fullname = os.path.join('phase1', trajfile).replace('\\','/') - localname = os.path.join(output_dir, trajfile) - self.ftp_client.get(fullname, localname) - self.ftp_client.rename(fullname, f'{fullname}_processing') - log.info(f'Obtained {len(files)} trajectories') - + fullname = f'{rem_dir}/{trajfile}' + localname = os.path.join(output_dir, datatype, trajfile) + if verbose: + log.info(f'downloading {fullname} to {localname}') + self.sftp_client.get(fullname, localname) + try: + self.sftp_client.rename(fullname, f'{rem_dir}/processed/{trajfile}') + except: + self.sftp_client.remove(fullname) + + log.info(f'Obtained {len(files)} {"trajectories" if datatype=="phase1" else "candidates"}') except Exception as e: log.warning('Problem with download') @@ -146,82 +139,151 @@ def collectRemotePhase1(self, max_trajs, output_dir): return - def uploadToRemote(self, trajfile, output_dir, operation_mode=None): + def uploadToMaster(self, source_dir, verbose=False): """ upload the trajectory pickle and report to a remote host for integration into the solved dataset + + parameters: + source_dir = root folder containing data, generally dh.output_dir """ if not self.initialised or not self.getSFTPConnection(): return - remote_phase2_dir = '' - try: - self.sftp_client.mkdir(remote_phase2_dir) - except Exception: - pass - - localname = os.path.join(output_dir, trajfile) - remotename = os.path.join(remote_phase2_dir, trajfile).replace('\\','/') - self.ftp_client.put(localname, remotename) - - localname = localname.replace('_trajectory.pickle', '_report.txt') - remotename = remotename.replace('_trajectory.pickle', '_report.txt') - if os.path.isfile(localname): - self.ftp_client.put(localname, remotename) + for pth in ['files', 'files/candidates', 'files/phase1', 'files/trajectories', + 'files/candidates/processed','files/phase1/processed']: + try: + self.sftp_client.mkdir(pth) + except Exception: + pass + if os.path.isdir(os.path.join(source_dir, 'phase1')): + # upload any phase1 trajectories + i=0 + proc_dir = os.path.join(source_dir, 'phase1', 'processed') + os.makedirs(proc_dir, exist_ok=True) + for (dirpath, dirnames, filenames) in os.walk(os.path.join(source_dir, 'phase1')): + if len(filenames) > 0 and 'processed' not in dirpath: + for fil in filenames: + local_name = os.path.join(source_dir, 'phase1', fil) + remname = f'files/phase1/{fil}' + if verbose: + log.info(f'uploading {local_name} to {remname}') + self.sftp_client.put(local_name, remname) + if os.path.isfile(os.path.join(proc_dir, fil)): + os.remove(os.path.join(proc_dir, fil)) + shutil.move(local_name, proc_dir) + i += 1 + log.info(f'uploaded {i} phase1 solutions') + # now upload any data in the 'trajectories' folder, flattening it to make it simpler + i=0 + if os.path.isdir(os.path.join(source_dir, 'trajectories')): + traj_dir = f'{source_dir}/trajectories' + for (dirpath, dirnames, filenames) in os.walk(traj_dir): + if len(filenames) > 0: + rem_path = f'files/trajectories/{os.path.basename(dirpath)}' + try: + self.sftp_client.mkdir(rem_path) + except Exception: + pass + for fil in filenames: + local_name = os.path.join(dirpath, fil) + rem_file = f'{rem_path}/{fil}' + if verbose: + log.info(f'uploading {local_name} to {rem_file}') + self.sftp_client.put(local_name, rem_file) + i += 1 + log.info(f'uploaded {int(i/2)} trajectories') + + # finally the databases + for fname in ['observations.db', 'trajectories.db']: + local_name = os.path.join(source_dir, fname) + if os.path.isfile(local_name): + rem_file = f'files/{fname}' + if verbose: + log.info(f'uploading {local_name} to {rem_file}') + self.sftp_client.put(local_name, rem_file) + + log.info('uploaded databases') self.closeSFTPConnection() return -def moveRemoteData(output_dir, datatype='traj'): - """ - Move remotely processed pickle files to their target location in the trajectories area, - making sure we clean up any previously-calculated trajectory and temporary files - """ - - phase2_dir = os.path.join(output_dir, 'remoteuploads') - - if os.path.isdir(phase2_dir): - log.info('Checking for remotely calculated trajectories...') - pickles = glob.glob1(phase2_dir, '*.pickle') - - for pick in pickles: - traj = loadPickle(phase2_dir, pick) - phase1_name = traj.pre_mc_longname - traj_dir = f'{output_dir}/trajectories/{phase1_name[:4]}/{phase1_name[:6]}/{phase1_name[:8]}/{phase1_name}' - if os.path.isdir(traj_dir): - shutil.rmtree(traj_dir) - processed_traj_file = os.path.join(output_dir, 'phase1', phase1_name + '_trajectory.pickle_processing') - - if os.path.isfile(processed_traj_file): - log.info(f' Moving {phase1_name} to processed folder...') - dst = os.path.join(output_dir, 'phase1', 'processed', phase1_name + '_trajectory.pickle') - shutil.copyfile(processed_traj_file, dst) - os.remove(processed_traj_file) - - phase2_name = traj.longname - traj_dir = f'{output_dir}/trajectories/{phase2_name[:4]}/{phase2_name[:6]}/{phase2_name[:8]}/{phase2_name}' - mkdirP(traj_dir) - log.info(f' Moving {phase2_name} to {traj_dir}...') - src = os.path.join(phase2_dir, pick) - dst = os.path.join(traj_dir, pick[:15]+'_trajectory.pickle') - - shutil.copyfile(src, dst) - os.remove(src) - - report_file = src.replace('_trajectory.pickle','_report.txt') - if os.path.isfile(report_file): - dst = dst.replace('_trajectory.pickle','_report.txt') - shutil.copyfile(report_file, dst) - os.remove(report_file) - - log.info(f'Moved {len(pickles)} trajectories.') + def moveRemoteData(output_dir): + """ + Move remotely processed pickle files to their target location in the trajectories area, + making sure we clean up any previously-calculated trajectory and temporary files + """ - return + # TODO NEED TO REWORK THIS + phase2_dir = os.path.join(output_dir, 'remoteuploads') + + if os.path.isdir(phase2_dir): + log.info('Checking for remotely calculated trajectories...') + pickles = glob.glob1(phase2_dir, '*.pickle') + + for pick in pickles: + traj = loadPickle(phase2_dir, pick) + phase1_name = traj.pre_mc_longname + traj_dir = f'{output_dir}/trajectories/{phase1_name[:4]}/{phase1_name[:6]}/{phase1_name[:8]}/{phase1_name}' + if os.path.isdir(traj_dir): + shutil.rmtree(traj_dir) + processed_traj_file = os.path.join(output_dir, 'phase1', phase1_name + '_trajectory.pickle_processing') + + if os.path.isfile(processed_traj_file): + log.info(f' Moving {phase1_name} to processed folder...') + dst = os.path.join(output_dir, 'phase1', 'processed', phase1_name + '_trajectory.pickle') + shutil.copyfile(processed_traj_file, dst) + os.remove(processed_traj_file) + + phase2_name = traj.longname + traj_dir = f'{output_dir}/trajectories/{phase2_name[:4]}/{phase2_name[:6]}/{phase2_name[:8]}/{phase2_name}' + mkdirP(traj_dir) + log.info(f' Moving {phase2_name} to {traj_dir}...') + src = os.path.join(phase2_dir, pick) + dst = os.path.join(traj_dir, pick[:15]+'_trajectory.pickle') + + shutil.copyfile(src, dst) + os.remove(src) + + report_file = src.replace('_trajectory.pickle','_report.txt') + if os.path.isfile(report_file): + dst = dst.replace('_trajectory.pickle','_report.txt') + shutil.copyfile(report_file, dst) + os.remove(report_file) + + log.info(f'Moved {len(pickles)} trajectories.') + return + def getSFTPConnection(self): + if not self.initialised: + return False + log.info(f'Connecting to {self.host}:{self.port} as {self.user}....') + if not os.path.isfile(os.path.expanduser(self.key)): + log.warning(f'ssh keyfile {self.key} missing') + return False + + self.ssh_client = paramiko.SSHClient() + self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + pkey = paramiko.RSAKey.from_private_key_file(self.key) + try: + self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, pkey=pkey, look_for_keys=False) + self.sftp_client = self.ssh_client.open_sftp() + return True + + except Exception as e: -def putPhase1Trajectories(): - return + log.warning('sftp connection to remote host failed') + log.warning(e) + self.ssh_client.close() + return False + + def closeSFTPConnection(self): + if self.sftp_client: + self.sftp_client.close() + if self.ssh_client: + self.ssh_client.close() + return From bb7b50235e982f64ab7bd0fd96b5a07da05b5eb9 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 23:11:04 +0000 Subject: [PATCH 049/132] reduce amount of legacy data thats loaded --- wmpl/Trajectory/CorrelateDB.py | 45 ++++++++++++--------------------- wmpl/Trajectory/CorrelateRMS.py | 14 ++++++---- 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 64351660..960b7d28 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -134,10 +134,16 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): cur.close() return - def moveJsonRecords(self, paired_obs): + def moveObsJsonRecords(self, paired_obs, dt_range): log.info('-----------------------------') - log.info('moving observations to sqlite - this may take some time....') + log.info('moving recent observations to sqlite - this may take some time....') i = 0 + + # only copy recent observations since if we ever run for an historic date + # its likely we will want to reanalyse all available obs anyway + dt_end = dt_range[1] + dt_beg = max(dt_range[0], dt_end + datetime.timedelta(days=-7)) + keylist = paired_obs.keys() for stat_id in keylist: for obs_id in paired_obs[stat_id]: @@ -145,7 +151,8 @@ def moveJsonRecords(self, paired_obs): obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') except Exception: obs_date = datetime.datetime(2000,1,1,0,0,0) - self.addPairedObs(stat_id, obs_id, obs_date) + if obs_date >= dt_beg and obs_date < dt_end: + self.addPairedObs(stat_id, obs_id, obs_date) i += 1 if not i % 100000: log.info(f'moved {i} observations') @@ -437,34 +444,14 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): cur.close() return - def moveJsonRecords(self, trajectories, failed_trajectories): - log.info('-----------------------------') - log.info('moving trajectories to sqlite - this may take some time....') - i = 0 - keylist = trajectories.keys() - for jdt_ref in keylist: - self.addTrajectory(trajectories[jdt_ref]) - i += 1 - if not i % 10000: - log.info(f'moved {i} trajectories') - log.info(f'done - moved {i} trajectories') - log.info('-----------------------------') - keylist = failed_trajectories.keys() - for jdt_ref in keylist: - self.addTrajectory(failed_trajectories[jdt_ref], failed=True) - i += 1 - if not i % 10000: - log.info(f'moved {i} failed_trajectories') - self.commitTrajDatabase() - log.info(f'done - moved {i} failed_trajectories') - log.info('-----------------------------') - - return - def moveFailedTrajectories(self, failed_trajectories, dt_range): - jd_beg = datetime2JD(dt_range[0]) + log.info('moving recent trajectories to sqlite - this may take some time....') + + # only copy recent records since if we ever run for an historic date + # its likely we will want to reanalyse all available obs anyway jd_end = datetime2JD(dt_range[1]) - log.info('moving trajectories to sqlite - this may take some time....') + jd_beg = max(datetime2JD(dt_range[0]), jd_end - 7) + keylist = [k for k in failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] i = 0 # just in case there aren't any trajectories to move for i,jdt_ref in enumerate(keylist): diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 53c9774a..e1213e6c 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -589,17 +589,21 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.RemoteDatahandler = None if mcmode != MCMODE_PHASE2: - log.info("Loading database: {:s}".format(database_path)) - self.old_db = DatabaseJSON(database_path, verbose=self.verbose) + + # no need to load the legacy JSON file if we already have the sqlite databases + if not os.path.isfile(os.path.join(db_dir, 'observations.db')) and \ + not os.path.isfile(os.path.join(db_dir, 'trajectories.db')): + log.info("Loading database: {:s}".format(database_path)) + self.old_db = DatabaseJSON(database_path, verbose=self.verbose) + else: + self.old_db = None self.db = TrajectoryDatabase(db_dir) self.observations_db = ObservationDatabase(db_dir) # move any legacy paired obs data into sqlite if hasattr(self.old_db, 'paired_obs'): - self.observations_db.moveJsonRecords(self.old_db.paired_obs) - del self.old_db.paired_obs - self.saveDatabase() + self.observations_db.moveObsJsonRecords(self.old_db.paired_obs, dt_range) if archivemonths != 0: log.info('Archiving older entries....') From 23b258b6d1c5359d4b26d5da936f40ec07d3b319 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 23:15:02 +0000 Subject: [PATCH 050/132] make dates tz aware --- wmpl/Trajectory/CorrelateDB.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 960b7d28..de8ba81c 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -151,6 +151,8 @@ def moveObsJsonRecords(self, paired_obs, dt_range): obs_date = datetime.datetime.strptime(obs_id.split('_')[1], '%Y%m%d-%H%M%S.%f') except Exception: obs_date = datetime.datetime(2000,1,1,0,0,0) + obs_date = obs_date.replace(tzinfo=datetime.timezone.utc) + if obs_date >= dt_beg and obs_date < dt_end: self.addPairedObs(stat_id, obs_id, obs_date) i += 1 From be2995197518813239c5dcb1dffa8b61349d73d6 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 23:22:03 +0000 Subject: [PATCH 051/132] improve logging --- wmpl/Trajectory/CorrelateDB.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index de8ba81c..45811606 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -135,15 +135,16 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): return def moveObsJsonRecords(self, paired_obs, dt_range): - log.info('-----------------------------') - log.info('moving recent observations to sqlite - this may take some time....') - i = 0 - # only copy recent observations since if we ever run for an historic date # its likely we will want to reanalyse all available obs anyway dt_end = dt_range[1] dt_beg = max(dt_range[0], dt_end + datetime.timedelta(days=-7)) + log.info('-----------------------------') + log.info('moving recent observations to sqlite - this may take some time....') + log.info(f'observation date range {dt_beg.isoformat()} to {dt_end.isoformat()}') + + i = 0 keylist = paired_obs.keys() for stat_id in keylist: for obs_id in paired_obs[stat_id]: @@ -152,7 +153,7 @@ def moveObsJsonRecords(self, paired_obs, dt_range): except Exception: obs_date = datetime.datetime(2000,1,1,0,0,0) obs_date = obs_date.replace(tzinfo=datetime.timezone.utc) - + if obs_date >= dt_beg and obs_date < dt_end: self.addPairedObs(stat_id, obs_id, obs_date) i += 1 @@ -447,13 +448,15 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): return def moveFailedTrajectories(self, failed_trajectories, dt_range): - log.info('moving recent trajectories to sqlite - this may take some time....') # only copy recent records since if we ever run for an historic date # its likely we will want to reanalyse all available obs anyway jd_end = datetime2JD(dt_range[1]) jd_beg = max(datetime2JD(dt_range[0]), jd_end - 7) + log.info('moving recent failed trajectories to sqlite - this may take some time....') + log.info(f'observation date range {jd2Date(jd_beg, dt_obj=True).isoformat()} to {dt_range[1].isoformat()}') + keylist = [k for k in failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] i = 0 # just in case there aren't any trajectories to move for i,jdt_ref in enumerate(keylist): From 5eebd9dc1858b9550b29a126d1747a1bc5da315a Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 23:34:54 +0000 Subject: [PATCH 052/132] more tweaks to copying legacy data --- wmpl/Trajectory/CorrelateRMS.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index e1213e6c..d668ed9d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -589,7 +589,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.RemoteDatahandler = None if mcmode != MCMODE_PHASE2: - + # no need to load the legacy JSON file if we already have the sqlite databases if not os.path.isfile(os.path.join(db_dir, 'observations.db')) and \ not os.path.isfile(os.path.join(db_dir, 'trajectories.db')): @@ -598,13 +598,17 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: self.old_db = None - self.db = TrajectoryDatabase(db_dir) self.observations_db = ObservationDatabase(db_dir) - - # move any legacy paired obs data into sqlite if hasattr(self.old_db, 'paired_obs'): + # move any legacy paired obs data into sqlite + log.info(dt_range) self.observations_db.moveObsJsonRecords(self.old_db.paired_obs, dt_range) + self.db = TrajectoryDatabase(db_dir) + if hasattr(self.old_db, 'failed_trajectories'): + # move any legacy failed traj data into sqlite + self.db.moveFailedTrajectories(self.old_db.failed_trajectories, dt_range) + if archivemonths != 0: log.info('Archiving older entries....') try: @@ -2039,10 +2043,6 @@ def saveCandidates(self, candidate_trajectories): # load computed trajectories from disk into sqlite dh.loadComputedTrajectories(dt_range=(bin_beg, bin_end)) # move any legacy failed traj into sqlite - if hasattr(dh.old_db, 'failed_trajectories'): - dh.db.moveFailedTrajectories(dh.old_db.failed_trajectories, (bin_beg, bin_end)) - if dh.old_db.removeTrajectories((bin_beg, bin_end), failed=True) > 0: - dh.saveDatabase() # Run the trajectory correlator From a87fb1648e43b1a986c2acf462a1cf80ed531ae2 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 23:39:18 +0000 Subject: [PATCH 053/132] bugfix --- wmpl/Trajectory/CorrelateDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 45811606..9827fb8f 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -156,7 +156,7 @@ def moveObsJsonRecords(self, paired_obs, dt_range): if obs_date >= dt_beg and obs_date < dt_end: self.addPairedObs(stat_id, obs_id, obs_date) - i += 1 + i += 1 if not i % 100000: log.info(f'moved {i} observations') self.commitObsDatabase() From 71e20aab5fa141d8fd3727f1d53e92253b782e56 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 30 Jan 2026 23:42:23 +0000 Subject: [PATCH 054/132] bugfix --- wmpl/Trajectory/CorrelateDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 9827fb8f..62c6d27f 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -157,7 +157,7 @@ def moveObsJsonRecords(self, paired_obs, dt_range): if obs_date >= dt_beg and obs_date < dt_end: self.addPairedObs(stat_id, obs_id, obs_date) i += 1 - if not i % 100000: + if not i % 100000 and i != 0: log.info(f'moved {i} observations') self.commitObsDatabase() log.info(f'done - moved {i} observations') From 6083b4694b8d15452c3ae78995f9ffdc18abb4af Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 31 Jan 2026 00:13:01 +0000 Subject: [PATCH 055/132] remove unncessary debug --- wmpl/Trajectory/CorrelateRMS.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index d668ed9d..4715a5f9 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -601,7 +601,6 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.observations_db = ObservationDatabase(db_dir) if hasattr(self.old_db, 'paired_obs'): # move any legacy paired obs data into sqlite - log.info(dt_range) self.observations_db.moveObsJsonRecords(self.old_db.paired_obs, dt_range) self.db = TrajectoryDatabase(db_dir) From 5f986c7007755f9c25748fa296050233c86aec49 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 31 Jan 2026 00:33:35 +0000 Subject: [PATCH 056/132] don't try to save an unopened database --- wmpl/Trajectory/CorrelateRMS.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 4715a5f9..4ecc1baa 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -708,7 +708,8 @@ def __init__(self, station, obs_id): self.observations_db.archiveObsDatabase(self.db_dir, arch_prefix, archdate_jd) self.db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) - self.old_db.archiveRecords(self.db_dir, arch_prefix, archdate_jd) + if self.old_db: + self.old_db.archiveRecords(self.db_dir, arch_prefix, archdate_jd) return @@ -1585,6 +1586,8 @@ def loadPhase1Trajectories(self, max_trajs=1000): def saveDatabase(self): """ Save the data base. """ + if self.old_db is None: + return def _breakHandler(signum, frame): """ Do nothing if CTRL + C is pressed. """ From 6e8bb887336e9aae403e7c2697b763e5d4f81c5c Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sun, 1 Feb 2026 10:05:26 +0000 Subject: [PATCH 057/132] Updates for remote client datahandling --- wmpl/Trajectory/CorrelateEngine.py | 44 ++-------- wmpl/Trajectory/CorrelateRMS.py | 111 ++++++------------------- wmpl/Utils/remoteDataHandling.py | 129 ++++++++++++++++++----------- 3 files changed, 109 insertions(+), 175 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 1bf956b1..1502163c 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -8,7 +8,6 @@ import multiprocessing import logging import os -import platform import numpy as np from wmpl.Trajectory.Trajectory import ObservedPoints, PlaneIntersection, Trajectory, moveStateVector @@ -250,35 +249,6 @@ def __init__(self, data_handle, traj_constraints, v_init_part, data_in_j2000=Tru self.candidatemode = None - - - def getCandidateFolders(self): - """ get candidate folders, if in multi-node candidate mode - """ - self.node_list = {} - node_file = os.path.join(self.dh.dir_path, 'wmpl_nodes.cfg') - if os.path.isfile(node_file): - lis = open(node_file, 'r').readlines() - nodes = [li for li in lis if '#' not in li and len(li) > 2] - for node in nodes: - node_name, node_path, node_max = node.split(',') - node_name = node_name.strip() - node_max = int(node_max.strip()) - node_path = node_path.strip() - if node_path[0]==os.sep or node_path[1]==':': - np = node_path - else: - np = os.path.join(self.dh.output_dir, node_path) - self.node_list[node_name] = {'node_path': np, 'node_max':node_max} - os.makedirs(np, exist_ok=True) - - # add a node for this hardware - master_name = platform.uname()[1] - np = os.path.join(self.dh.output_dir, 'candidates') - self.node_list[master_name] = {'node_path': np, 'node_max':0} - return - - def trajectoryRangeCheck(self, traj_reduced, platepar): """ Check that the trajectory is within the range limits. @@ -1597,13 +1567,13 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): log.info("-----------------------") self.dh.saveCandidates(candidate_trajectories) - return + return len(candidate_trajectories) else: log.info("-----------------------") log.info('PROCESSING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") - # end of 'if self.candidatemode != CANDMODE_LOAD' + # end of 'if mcmode & MCMODE_CANDS' ### ### else: # candidatemode is LOAD so load any available candidates for processing @@ -1612,9 +1582,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): log.info("-----------------------") log.info('LOADING CANDIDATES') log.info("-----------------------") - self.getCandidateFolders() - # only load candidates from this node's candidate folder - save_path = self.node_list[platform.uname()[1]]['node_path'] + + save_path = self.dh.candidate_dir for fil in os.listdir(save_path): if '.pickle' not in fil: continue @@ -1850,12 +1819,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): # end of "for matched_observations in candidate_trajectories" outcomes = [traj_solved_count] - # Finish the correlation run (update the database with new values) - self.dh.saveDatabase() - log.info(f'SOLVED {sum(outcomes)} TRAJECTORIES') log.info("") log.info("-----------------") log.info("SOLVING RUN DONE!") log.info("-----------------") + + return sum(outcomes) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 4ecc1baa..aaa259ba 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -12,7 +12,6 @@ import datetime import shutil import time -import signal import multiprocessing import logging import logging.handlers @@ -222,7 +221,6 @@ def load(self, verbose=False): # do this here because the object dict is overwritten during the load operation above self.verbose = verbose - def save(self): """ Save the database of processed meteors to disk. """ @@ -284,10 +282,8 @@ def checkTrajIfFailed(self, traj): if all_match: return True - return False - def addTrajectory(self, traj_reduced, failed=False): """ Add a computed trajectory to the list. @@ -320,8 +316,6 @@ def addTrajectory(self, traj_reduced, failed=False): else: traj_dict[traj_reduced.jdt_ref].traj_id = traj_reduced.traj_id - - def removeTrajectory(self, traj_reduced, keepFolder=False): """ Remove the trajectory from the data base and disk. """ @@ -337,40 +331,6 @@ def removeTrajectory(self, traj_reduced, keepFolder=False): log.info(f'unable to remove {traj_dir}') - def removeTrajectories(self, dt_range, failed=False): - jd_beg = datetime2JD(dt_range[0]) - jd_end = datetime2JD(dt_range[1]) - if not failed: - keys = [k for k in self.trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] - for jdt in keys: - del self.trajectories[jdt] - else: - keys = [k for k in self.failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] - for jdt in keys: - del self.failed_trajectories[jdt] - log.info(f'deleted {len(keys)} keys from {"failed_trajectories" if failed else "trajectories"}') - return len(keys) - - def archiveRecords(self, db_dir, arch_prefix, archdate_jd): - arch_db_path = os.path.join(db_dir, f'{arch_prefix}_{JSON_DB_NAME}') - archdb = DatabaseJSON(arch_db_path, verbose=self.verbose, archiveYM=arch_prefix) - log.info(f'Archiving db records to {arch_db_path}...') - - for traj in [t for t in self.old_db.trajectories if t < archdate_jd]: - if traj < archdate_jd: - archdb.addTrajectory(self.old_db.trajectories[traj], False) - del self.old_db.trajectories[traj] - - for traj in [t for t in self.old_db.failed_trajectories if t < archdate_jd]: - if traj < archdate_jd: - archdb.addTrajectory(self.old_db.failed_trajectories[traj], True) - del self.old_db.failed_trajectories[traj] - - archdb.save() - self.db.save() - - - class MeteorPointRMS(object): def __init__(self, frame, time_rel, x, y, ra, dec, azim, alt, mag): """ Container for individual meteor picks. """ @@ -398,7 +358,6 @@ def __init__(self, frame, time_rel, x, y, ra, dec, azim, alt, mag): self.mag = mag - class MeteorObsRMS(object): def __init__(self, station_code, reference_dt, platepar, data, rel_proc_path, ff_name=None): """ Container for meteor observations with the interface compatible with the trajectory correlator @@ -584,7 +543,10 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode if os.path.isfile(remote_cfg): log.info('remote data management requested, initialising') self.RemoteDatahandler = RemoteDataHandler(remote_cfg) - self.gatherRemoteData(mcmode, verbose=False) + remote_count = self.gatherRemoteData(mcmode, verbose=False) + if remote_count == 0: + log.info('no remote data yet') + # TODO probably want to loop here looking for data for 10-15 minutes else: self.RemoteDatahandler = None @@ -706,10 +668,9 @@ def __init__(self, station, obs_id): archdate_jd = datetime2JD(archdate) arch_prefix = archdate.strftime("%Y%m") + # TODO check if this works self.observations_db.archiveObsDatabase(self.db_dir, arch_prefix, archdate_jd) self.db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) - if self.old_db: - self.old_db.archiveRecords(self.db_dir, arch_prefix, archdate_jd) return @@ -862,11 +823,6 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): station_count += 1 prev_station = station_code - # Save database to mark those with missing data files (only every 250th station, to speed things up) - if (station_count % 250 == 0) and (station_code != prev_station): - self.saveDatabase() - - # Load platepars with open(os.path.join(proc_path, platepar_recalibrated_name)) as f: platepars_recalibrated_dict = json.load(f) @@ -1340,6 +1296,7 @@ def saveTrajectoryResults(self, traj, save_plots): log.info(f'saved {traj.traj_id} to {output_dir}') if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: + # TODO distribute phase1 pickles here savePickle(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle') elif self.mc_mode & MCMODE_PHASE2: # we save this in MC mode the MC phase may alter the trajectory details and if later on @@ -1582,35 +1539,15 @@ def loadPhase1Trajectories(self, max_trajs=1000): # if the file couldn't be read, then skip it for now - we'll get it in the next pass log.info(f'File {pick} skipped for now') return dt_beg, dt_end - - - def saveDatabase(self): - """ Save the data base. """ - if self.old_db is None: - return - - def _breakHandler(signum, frame): - """ Do nothing if CTRL + C is pressed. """ - log.info("The data base is being saved, the program cannot be exited right now!") - pass - - if self.db is None: - return - # Prevent quitting while a data base is being saved - original_signal = signal.getsignal(signal.SIGINT) - signal.signal(signal.SIGINT, _breakHandler) - - # Save the data base - log.info("Saving data base to disk...") - self.old_db.save() - - # Restore the signal functionality - signal.signal(signal.SIGINT, original_signal) - - def distributeToChildren(self, verbose=False): + + def distributeCandsForChildren(self, verbose=False): """ In 'master' mode this distributes candidates or phase1 trajectories to the children """ + if self.mc_mode != MCMODE_CANDS: + log.warning('candidate distribution only applicable in MCMODE_CANDS') + return + # TODO - distribute return def uploadToMaster(self, verbose=False): @@ -1621,7 +1558,7 @@ def uploadToMaster(self, verbose=False): self.db.closeTrajDatabase() self.observations_db.closeObsDatabase() - self.RemoteDatahandler.uploadToMaster(self.output_dir, verbose=True) + self.RemoteDatahandler.uploadToMaster(self.output_dir, verbose=verbose) # truncate the tables here so they are clean for the next run self.db = TrajectoryDatabase(self.db_dir, purge_records=True) @@ -1643,10 +1580,10 @@ def gatherRemoteData(self, mcmode, verbose=False): else: # collect candidates or phase1 solutions from the master node if mcmode == MCMODE_PHASE1 or mcmode == MCMODE_BOTH: - self.RemoteDatahandler.collectRemoteData('candidates', self.output_dir) + remote_count = self.RemoteDatahandler.collectRemoteData('candidates', self.output_dir, verbose=verbose) elif mcmode == MCMODE_PHASE2: - self.RemoteDatahandler.collectRemoteData('phase1', self.output_dir) - return + remote_count = self.RemoteDatahandler.collectRemoteData('phase1', self.output_dir, verbose=verbose) + return remote_count def saveCandidates(self, candidate_trajectories): for matched_observations in candidate_trajectories: @@ -1654,7 +1591,7 @@ def saveCandidates(self, candidate_trajectories): picklename = str(ref_dt.timestamp()) + '.pickle' if self.RemoteDatahandler: - #TODO get candidate folder name here + # TODO get candidate folder name here # randomly select a node from the list of nodes then check that its actually listening # and hasn't already received its max allocation. The master node gets anything left while True: @@ -2050,28 +1987,28 @@ def saveCandidates(self, candidate_trajectories): # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) bin_time_range = [bin_beg, bin_end] - tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range) + num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range) if mcmode & MCMODE_CANDS: dh.observations_db.closeObsDatabase() + else: # there were no datasets to process log.info('no data to process yet') log.info("Total run time: {:s}".format(str(datetime.datetime.now(datetime.timezone.utc) - t1))) + if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: + dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=False) + # Store the previous start time previous_start_time = copy.deepcopy(t1) - if dh.RemoteDatahandler: - if dh.RemoteDatahandler.mode == 'child': - dh.uploadToMaster(verbose=True) - else: - # dh.distributeForChildren(verbose=True) - pass + # Break after one loop if auto mode is not on if cml_args.auto is None: + dh.RemoteDatahandler.clearReadyFlag() break else: diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 6e13eec7..e72021b9 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -87,6 +87,37 @@ def __init__(self, cfg_file): self.initialised = True return + def getSFTPConnection(self): + if not self.initialised: + return False + log.info(f'Connecting to {self.host}:{self.port} as {self.user}....') + + if not os.path.isfile(os.path.expanduser(self.key)): + log.warning(f'ssh keyfile {self.key} missing') + return False + + self.ssh_client = paramiko.SSHClient() + self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + pkey = paramiko.RSAKey.from_private_key_file(self.key) + try: + self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, pkey=pkey, look_for_keys=False) + self.sftp_client = self.ssh_client.open_sftp() + return True + + except Exception as e: + + log.warning('sftp connection to remote host failed') + log.warning(e) + self.ssh_client.close() + return False + + def closeSFTPConnection(self): + if self.sftp_client: + self.sftp_client.close() + if self.ssh_client: + self.ssh_client.close() + return + ######################################################## # functions used by the client nodes @@ -110,6 +141,11 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): pass try: + readyfile = os.path.join(os.getenv('TMP', default='/tmp'),'ready') + open(readyfile,'w').write('ready') + self.sftp_client.put(readyfile, 'files/ready') + log.info('set ready flag') + rem_dir = f'files/{datatype}' files = self.sftp_client.listdir(rem_dir) files = [f for f in files if '.pickle' in f and 'processing' not in f] @@ -136,8 +172,7 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): log.info(e) self.closeSFTPConnection() - return - + return len(files) def uploadToMaster(self, source_dir, verbose=False): """ @@ -157,24 +192,26 @@ def uploadToMaster(self, source_dir, verbose=False): self.sftp_client.mkdir(pth) except Exception: pass - if os.path.isdir(os.path.join(source_dir, 'phase1')): + phase1_dir = os.path.join(source_dir, 'phase1') + if os.path.isdir(phase1_dir): # upload any phase1 trajectories i=0 - proc_dir = os.path.join(source_dir, 'phase1', 'processed') + proc_dir = os.path.join(phase1_dir, 'processed') os.makedirs(proc_dir, exist_ok=True) - for (dirpath, dirnames, filenames) in os.walk(os.path.join(source_dir, 'phase1')): - if len(filenames) > 0 and 'processed' not in dirpath: - for fil in filenames: - local_name = os.path.join(source_dir, 'phase1', fil) - remname = f'files/phase1/{fil}' - if verbose: - log.info(f'uploading {local_name} to {remname}') - self.sftp_client.put(local_name, remname) - if os.path.isfile(os.path.join(proc_dir, fil)): - os.remove(os.path.join(proc_dir, fil)) - shutil.move(local_name, proc_dir) - i += 1 - log.info(f'uploaded {i} phase1 solutions') + for fil in os.listdir(phase1_dir): + local_name = os.path.join(phase1_dir, fil) + if os.path.isdir(local_name): + continue + remname = f'files/phase1/{fil}' + if verbose: + log.info(f'uploading {local_name} to {remname}') + self.sftp_client.put(local_name, remname) + if os.path.isfile(os.path.join(proc_dir, fil)): + os.remove(os.path.join(proc_dir, fil)) + shutil.move(local_name, proc_dir) + i += 1 + if i > 0: + log.info(f'uploaded {i} phase1 solutions') # now upload any data in the 'trajectories' folder, flattening it to make it simpler i=0 if os.path.isdir(os.path.join(source_dir, 'trajectories')): @@ -193,7 +230,9 @@ def uploadToMaster(self, source_dir, verbose=False): log.info(f'uploading {local_name} to {rem_file}') self.sftp_client.put(local_name, rem_file) i += 1 - log.info(f'uploaded {int(i/2)} trajectories') + shutil.rmtree(traj_dir, ignore_errors=True) + if i > 0: + log.info(f'uploaded {int(i/2)} trajectories') # finally the databases for fname in ['observations.db', 'trajectories.db']: @@ -209,8 +248,29 @@ def uploadToMaster(self, source_dir, verbose=False): self.closeSFTPConnection() return + def clearReadyFlag(self, verbose=False): + """ + upload the trajectory pickle and report to a remote host for integration + into the solved dataset + + parameters: + source_dir = root folder containing data, generally dh.output_dir + """ + + if not self.initialised or not self.getSFTPConnection(): + return + try: + self.sftp_client.remove('files/ready') + log.info('removed ready flag') + except: + log.warning('unable to clear ready flag, master continue to assign data') + self.closeSFTPConnection() + return + + ######################################################## + # functions used by the master node - def moveRemoteData(output_dir): + def moveRemoteData(self, output_dir): """ Move remotely processed pickle files to their target location in the trajectories area, making sure we clean up any previously-calculated trajectory and temporary files @@ -256,34 +316,3 @@ def moveRemoteData(output_dir): log.info(f'Moved {len(pickles)} trajectories.') return - - def getSFTPConnection(self): - if not self.initialised: - return False - log.info(f'Connecting to {self.host}:{self.port} as {self.user}....') - - if not os.path.isfile(os.path.expanduser(self.key)): - log.warning(f'ssh keyfile {self.key} missing') - return False - - self.ssh_client = paramiko.SSHClient() - self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - pkey = paramiko.RSAKey.from_private_key_file(self.key) - try: - self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, pkey=pkey, look_for_keys=False) - self.sftp_client = self.ssh_client.open_sftp() - return True - - except Exception as e: - - log.warning('sftp connection to remote host failed') - log.warning(e) - self.ssh_client.close() - return False - - def closeSFTPConnection(self): - if self.sftp_client: - self.sftp_client.close() - if self.ssh_client: - self.ssh_client.close() - return From 9cd2d009108cf0c1b6c7b3461316c1ff39026bc7 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Feb 2026 12:45:54 +0000 Subject: [PATCH 058/132] support to signal when a client is ready for data and master mode remote data processing --- wmpl/Trajectory/CorrelateDB.py | 32 +++- wmpl/Trajectory/CorrelateEngine.py | 1 - wmpl/Trajectory/CorrelateRMS.py | 257 +++++++++++++++++++---------- wmpl/Utils/remoteDataHandling.py | 117 +++++-------- 4 files changed, 231 insertions(+), 176 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 62c6d27f..5ff9f941 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -175,14 +175,11 @@ def mergeObsDatabase(self, source_db_path): cur = self.dbhandle.cursor() cur.execute(f"attach database '{source_db_path}' as sourcedb") try: - # bulk-copy if possible + # bulk-copy cur.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') except Exception: - # otherwise, one by one - log.info('Some records already exist, doing row-wise copy') - cur.execute('select * from sourcedb.paired_obs') - for row in cur.fetchall(): - self.addPairedObs(row[0], row[1],row[2]) + log.info('unable to merge child observations') + self.commitObsDatabase() cur.execute("detach database 'sourcedb'") cur.close() @@ -302,9 +299,13 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): if verbose: log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') cur = self.dbhandle.cursor() + # remove the output_dir part from the path so that the data are location-independent traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] + # and remove windows-style path separators + traj_file_path = traj_file_path.replace('\\','/') + if failed: # fixup possible bad values traj_id = 'None' if not hasattr(traj_reduced, 'traj_id') or traj_reduced.traj_id is None else traj_reduced.traj_id @@ -396,6 +397,25 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver trajs.append(json_dict) return trajs + def getTrajNames(self, jdt_start=None, jdt_end=None, failed=False, verbose=False): + + table_name = 'failed_trajectories' if failed else 'trajectories' + cur = self.dbhandle.cursor() + if not jdt_start: + cur.execute(f"SELECT * FROM {table_name}") + rows = cur.fetchall() + elif not jdt_end: + cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + rows = cur.fetchall() + else: + cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + rows = cur.fetchall() + cur.close() + trajs = [] + for rw in rows: + trajs.append(rw[2]) + return trajs + def removeDeletedTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, verbose=False): diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 1502163c..8f0559ad 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1561,7 +1561,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): # in candidate mode we want to save the candidates to disk if mcmode == MCMODE_CANDS: - self.getCandidateFolders() log.info("-----------------------") log.info('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index aaa259ba..d3016e06 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -18,8 +18,9 @@ import glob from dateutil.relativedelta import relativedelta import numpy as np -from random import randrange -import platform +import sys +import signal +import secrets from wmpl.Formats.CAMS import loadFTPDetectInfo from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints @@ -543,8 +544,12 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode if os.path.isfile(remote_cfg): log.info('remote data management requested, initialising') self.RemoteDatahandler = RemoteDataHandler(remote_cfg) - remote_count = self.gatherRemoteData(mcmode, verbose=False) - if remote_count == 0: + if self.RemoteDatahandler.mode == 'child': + self.RemoteDatahandler.clearStopFlag() + status = self.getRemoteData(verbose=False) + else: + status = self.moveUploadedData(verbose=False) + if not status: log.info('no remote data yet') # TODO probably want to loop here looking for data for 10-15 minutes else: @@ -565,10 +570,10 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # move any legacy paired obs data into sqlite self.observations_db.moveObsJsonRecords(self.old_db.paired_obs, dt_range) - self.db = TrajectoryDatabase(db_dir) + self.traj_db = TrajectoryDatabase(db_dir) if hasattr(self.old_db, 'failed_trajectories'): # move any legacy failed traj data into sqlite - self.db.moveFailedTrajectories(self.old_db.failed_trajectories, dt_range) + self.traj_db.moveFailedTrajectories(self.old_db.failed_trajectories, dt_range) if archivemonths != 0: log.info('Archiving older entries....') @@ -593,7 +598,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode dt_beg, dt_end = self.loadPhase1Trajectories(max_trajs=max_trajs) self.processing_list = None self.dt_range=[dt_beg, dt_end] - self.db = None + self.traj_db = None self.observations_db = None ### Define country groups to speed up the proceessing ### @@ -670,7 +675,7 @@ def __init__(self, station, obs_id): # TODO check if this works self.observations_db.archiveObsDatabase(self.db_dir, arch_prefix, archdate_jd) - self.db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) + self.traj_db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) return @@ -725,10 +730,6 @@ def findUnprocessedFolders(self, station_list): night_path = os.path.join(station_path, night_name) night_path_rel = os.path.join(station_name, night_name) - # # If the night path is not in the processed list, add it to the processing list - # if night_path_rel not in self.db.processed_dirs[station_name]: - # processing_list.append([station_name, night_path_rel, night_path, night_dt]) - processing_list.append([station_name, night_path_rel, night_path, night_dt]) # else: @@ -1020,7 +1021,7 @@ def removeDeletedTrajectories(self): if not os.path.isdir(self.output_dir): return - if self.db is None: + if self.traj_db is None: return log.info(" Removing deleted trajectories from: " + self.output_dir) @@ -1032,7 +1033,7 @@ def removeDeletedTrajectories(self): jdt_start = datetime2JD(self.dt_range[0]) jdt_end = datetime2JD(self.dt_range[1]) - self.db.removeDeletedTrajectories(self.output_dir, jdt_start, jdt_end) + self.traj_db.removeDeletedTrajectories(self.output_dir, jdt_start, jdt_end) return @@ -1048,7 +1049,7 @@ def loadComputedTrajectories(self, dt_range=None): if not os.path.isdir(traj_dir_path): return - if self.db is None: + if self.traj_db is None: return if dt_range is None: @@ -1108,7 +1109,7 @@ def loadComputedTrajectories(self, dt_range=None): if self.trajectoryFileInDtRange(file_name, dt_range=dt_range): - self.db.addTrajectory(TrajectoryReduced(os.path.join(full_traj_dir, file_name))) + self.traj_db.addTrajectory(TrajectoryReduced(os.path.join(full_traj_dir, file_name))) # Print every 1000th trajectory if counter % 1000 == 0: @@ -1125,7 +1126,7 @@ def loadComputedTrajectories(self, dt_range=None): def getComputedTrajectories(self, jd_beg, jd_end): """ Returns a list of computed trajectories between the Julian dates. """ - json_dicts = self.db.getTrajectories(self.output_dir, jd_beg, jd_end) + json_dicts = self.traj_db.getTrajectories(self.output_dir, jd_beg, jd_end) trajs = [TrajectoryReduced(None, json_dict=j) for j in json_dicts] return trajs @@ -1297,9 +1298,10 @@ def saveTrajectoryResults(self, traj, save_plots): if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: # TODO distribute phase1 pickles here - savePickle(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle') + self.savePhase1Trajectory(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle', verbose=True) + elif self.mc_mode & MCMODE_PHASE2: - # we save this in MC mode the MC phase may alter the trajectory details and if later on + # the MC phase may alter the trajectory details and if later on # we're including additional observations we need to use the most recent version of the trajectory savePickle(traj, os.path.join(self.phase1_dir, 'processed'), traj.pre_mc_longname + '_trajectory.pickle') @@ -1320,7 +1322,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None, verbose=False): failed_jdt_ref: [float] Reference Julian date of the failed trajectory. None by default. """ - if self.db is None: + if self.traj_db is None: return # Set the correct output path traj.output_dir = self.generateTrajOutputDirectoryPath(traj) @@ -1333,7 +1335,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None, verbose=False): if failed_jdt_ref is not None: traj_reduced.jdt_ref = failed_jdt_ref - self.db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=verbose) + self.traj_db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=verbose) @@ -1368,7 +1370,7 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): except Exception: pass - self.db.removeTrajectory(traj_reduced) + self.traj_db.removeTrajectory(traj_reduced) def cleanupPhase2TempPickle(self, traj, success=False): @@ -1392,12 +1394,14 @@ def cleanupPhase2TempPickle(self, traj, success=False): return def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired): + # go through the candidates and check if they correspond to already-failed candidate_trajectories=[] for cand in matched_observations: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in cand]) jdt_ref = datetime2JD(ref_dt) traj = Trajectory(jdt_ref, verbose=False) + # Feed the observations into the trajectory solver for obs_temp, met_obs, _ in cand: @@ -1434,10 +1438,10 @@ def checkTrajIfFailed(self, traj): """ - if self.db is None: + if self.traj_db is None: return traj_reduced = TrajectoryReduced(None, traj_obj=traj) - return self.db.checkTrajIfFailed(traj_reduced) + return self.traj_db.checkTrajIfFailed(traj_reduced) @@ -1540,81 +1544,145 @@ def loadPhase1Trajectories(self, max_trajs=1000): log.info(f'File {pick} skipped for now') return dt_beg, dt_end - def distributeCandsForChildren(self, verbose=False): - """ - In 'master' mode this distributes candidates or phase1 trajectories to the children - """ - if self.mc_mode != MCMODE_CANDS: - log.warning('candidate distribution only applicable in MCMODE_CANDS') - return - # TODO - distribute - return - def uploadToMaster(self, verbose=False): """ - In 'child' mode this sends solved data back to the master node + Used in 'child' mode: this sends solved data back to the master node """ # close the databases and upload the data to the master node - self.db.closeTrajDatabase() + self.traj_db.closeTrajDatabase() self.observations_db.closeObsDatabase() self.RemoteDatahandler.uploadToMaster(self.output_dir, verbose=verbose) # truncate the tables here so they are clean for the next run - self.db = TrajectoryDatabase(self.db_dir, purge_records=True) + self.traj_db = TrajectoryDatabase(self.db_dir, purge_records=True) self.observations_db = ObservationDatabase(self.db_dir, purge_records=True) return - def gatherRemoteData(self, mcmode, verbose=False): + def moveUploadedData(self, verbose=False): """ - In master mode this gathers data thats been uploaded by the children and relocates - it to the correct places. - In 'child' mode it downloads data from the master for local processing. + Used in 'master' mode: this moves uploaded data to the target locations on the server """ - if self.RemoteDatahandler.mode == 'master': - # TODO make this bit work properly - # move remotely processed data from upload folders to the correct locations on the master node - self.RemoteDatahandler.moveRemoteData(self.output_dir) - self.RemoteDatahandler.moveRemoteSimpleData(self.output_dir) + for node in self.RemoteDatahandler.nodes: + if node.nodename == 'localhost': + continue - else: - # collect candidates or phase1 solutions from the master node - if mcmode == MCMODE_PHASE1 or mcmode == MCMODE_BOTH: - remote_count = self.RemoteDatahandler.collectRemoteData('candidates', self.output_dir, verbose=verbose) - elif mcmode == MCMODE_PHASE2: - remote_count = self.RemoteDatahandler.collectRemoteData('phase1', self.output_dir, verbose=verbose) - return remote_count + # merge the databases + if not os.path.isdir(os.path.join(node.dirpath,'files')): + continue + for obsdb_path in glob.glob(os.path.join(node.dirpath,'files','observations*.db')): + self.observations_db.mergeObsDatabase(obsdb_path) + os.remove(obsdb_path) + + for trajdb_path in glob.glob(os.path.join(node.dirpath,'files','trajectories*.db')): + self.traj_db.mergeTrajDatabase(trajdb_path) + + rem_db = TrajectoryDatabase(*os.path.split(trajdb_path)) + i = 0 + for i, traj in enumerate(i, rem_db.getTrajNames()): + traj_path, traj_name = os.path.split(traj) + local_path = os.path.split(traj_path)[1] + targ_path = os.path.join(self.output_dir, traj_path) + src_path = os.path.join(node.dirpath,'files', 'trajectories', local_path) + + src_name = os.path.join(src_path, traj_name) + shutil.copy(src_name, targ_path) + src_name = src_name.replace('trajectory.pickle', 'report.txt') + shutil.copy(src_name, targ_path) + + shutil.rmtree(src_path) + os.remove(trajdb_path) + if i > 0: + log.info(f'moved {i} trajectories in {trajdb_path}') + + remote_ph1dir = os.path.join(node.dirpath, 'files', 'phase1') + i = 0 + for i, fil in enumerate([x for x in os.listdir(remote_ph1dir) if '.pickle' in x]): + full_name = os.path.join(remote_ph1dir, fil) + shutil.copy(full_name, self.phase1_dir) + os.remove(full_name) + + if i > 0: + log.info(f'moved {i} phase 1 files from {node.nodename}') + + return True + def getRemoteData(self, verbose=False): + """ + Used in 'child' mode: this downloads data from the master for local processing. + """ + if not self.RemoteDatahandler: + log.info('remote data handler not initialised') + return False + + # collect candidates or phase1 solutions from the master node + if self.mc_mode == MCMODE_PHASE1 or self.mc_mode == MCMODE_BOTH: + status = self.RemoteDatahandler.collectRemoteData('candidates', self.output_dir, verbose=verbose) + elif mcmode == MCMODE_PHASE2: + status = self.RemoteDatahandler.collectRemoteData('phase1', self.output_dir, verbose=verbose) + else: + status = False + return status + def saveCandidates(self, candidate_trajectories): for matched_observations in candidate_trajectories: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) picklename = str(ref_dt.timestamp()) + '.pickle' - if self.RemoteDatahandler: - # TODO get candidate folder name here - # randomly select a node from the list of nodes then check that its actually listening - # and hasn't already received its max allocation. The master node gets anything left - while True: - curr_node = list(self.node_list.keys())[randrange(len(self.node_list.keys()))] - save_path = self.node_list[curr_node]['node_path'] - if curr_node == platform.uname()[1]: - break - listen_file = os.path.join(save_path, f'{curr_node}.listening') - if os.path.isfile(listen_file): - # if the folder already has enough candidates then use the master node - if len(glob.glob(os.path.join(save_path, '*.pickle'))) >= self.node_list[curr_node]['node_max']: - save_path = self.node_list[platform.uname()[1]]['node_path'] - break - else: - save_folder = self.candidate_dir - - savePickle(matched_observations, save_folder, picklename) + # this function can also save a candidate + self.savePhase1Trajectory(matched_observations, picklename, 'candidates', verbose=True) log.info("-----------------------") log.info(f'Saved {len(candidate_trajectories)} candidates') log.info("-----------------------") - + def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False): + """ + in mcmode MCMODE_PHASE1 or MCMODE_SIMPLE , save the candidates or phase 1 trajectories + and distribute as appropriate + + """ + if savetype == 'phase1': + save_dir = self.phase1_dir + required_mode = 2 + else: + save_dir = self.candidate_dir + required_mode = 1 + + if self.RemoteDatahandler and self.RemoteDatahandler.mode == 'master': + + # Select a random bucket, check its not already full, and then save the pickle there. + # Make sure to break out once all buckets have been tested + # Fallback/default is to use the local phase_1 dir. + tested_buckets = [] + bucket_num = -1 + bucket_list = self.RemoteDatahandler.nodes + bucket_list[-1].dirpath = save_dir + + while bucket_num not in tested_buckets: + bucket_num = secrets.randbelow(len(bucket_list)) + bucket = bucket_list[bucket_num] + if verbose: + log.info(f'testing node {bucket.nodename} {bucket.dirpath}, {bucket.capacity}, {bucket.mode}') + # if the child isn't in mc mode, skip it + if bucket.mode != required_mode and bucket.mode != -1: + log.info(f'wrong mode - {bucket.mode} instead of {required_mode}') + tested_buckets.append(bucket_num) + continue + tmp_save_dir = os.path.join(bucket.dirpath, 'files', savetype) + os.makedirs(tmp_save_dir, exist_ok=True) + if os.path.isfile(os.path.join(bucket.dirpath, 'files', 'stop')): + tested_buckets.append(bucket_num) + log.info('stopfile present') + continue + if bucket.capacity < 0 or len(glob.glob(os.path.join(tmp_save_dir, '*.pickle'))) < bucket.capacity: + if verbose: + save_dir = tmp_save_dir + log.info(f'saving {file_name} to {bucket.dirpath}') + break + tested_buckets.append(bucket_num) + + savePickle(traj, save_dir, file_name) @@ -1717,9 +1785,6 @@ def saveCandidates(self, candidate_trajectories): arg_parser.add_argument('--autofreq', '--autofreq', type=int, default=360, help="Minutes to wait between runs in auto-mode") - arg_parser.add_argument('--remotehost', '--remotehost', type=str, default=None, - help="Remote host to collect candiates and return solutions to. Supports internet-distributed processing.") - arg_parser.add_argument('--verbose', '--verbose', help='Verbose logging.', default=False, action="store_true") # Parse the command line arguments @@ -1727,7 +1792,23 @@ def saveCandidates(self, candidate_trajectories): ############################ + db_dir = cml_args.dbdir + if db_dir is None: + db_dir = cml_args.dir_path + + def signal_handler(sig, frame): + signal.signal(sig, signal.SIG_IGN) # ignore additional signals + log.info('======================================') + log.info('CTRL-C pressed, exiting gracefully....') + log.info('======================================') + remote_cfg = os.path.join(db_dir, 'wmpl_remote.cfg') + if os.path.isfile(remote_cfg): + rdh = RemoteDataHandler(remote_cfg) + if rdh.mode == 'child': + rdh.setStopFlag() + sys.exit(0) + signal.signal(signal.SIGINT, signal_handler) ### Init logging - roll over every day ### @@ -1814,17 +1895,9 @@ def saveCandidates(self, candidate_trajectories): mcmode = MCMODE_ALL if cml_args.mcmode == 0 else cml_args.mcmode - remotehost = cml_args.remotehost - if mcmode !=MCMODE_PHASE2 and remotehost is not None: - log.info('remotehost only applicable in mcmode 2') - remotehost = None - # set the maximum number of trajectories to reprocess when doing the MC uncertainties # set a default of 10 for remote processing and 1000 for local processing - if cml_args.remotehost is not None: - max_trajs = 10 - else: - max_trajs = 1000 + max_trajs = 1000 if cml_args.maxtrajs is not None: max_trajs = int(cml_args.maxtrajs) @@ -1989,6 +2062,9 @@ def saveCandidates(self, candidate_trajectories): bin_time_range = [bin_beg, bin_end] num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range) + if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: + dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=False) + if mcmode & MCMODE_CANDS: dh.observations_db.closeObsDatabase() @@ -1998,9 +2074,6 @@ def saveCandidates(self, candidate_trajectories): log.info("Total run time: {:s}".format(str(datetime.datetime.now(datetime.timezone.utc) - t1))) - if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: - dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=False) - # Store the previous start time previous_start_time = copy.deepcopy(t1) @@ -2008,7 +2081,9 @@ def saveCandidates(self, candidate_trajectories): # Break after one loop if auto mode is not on if cml_args.auto is None: - dh.RemoteDatahandler.clearReadyFlag() + # clear the remote data ready flag to indicate we're shutting down + if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child': + dh.RemoteDatahandler.setStopFlag() break else: @@ -2017,6 +2092,10 @@ def saveCandidates(self, candidate_trajectories): wait_time = (datetime.timedelta(hours=AUTO_RUN_FREQUENCY) - (datetime.datetime.now(datetime.timezone.utc) - t1)).total_seconds() + # remove the remote data stop flag to indicate we're open for business + if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child': + dh.RemoteDatahandler.clearStopFlag() + # Run immediately if the wait time has elapsed if wait_time < 0: continue diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index e72021b9..c9ebe02d 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -23,16 +23,20 @@ import os import paramiko import logging -import glob import shutil from configparser import ConfigParser -from wmpl.Utils.OSTools import mkdirP -from wmpl.Utils.Pickling import loadPickle +log = logging.getLogger("traj_correlator") -log = logging.getLogger("traj_correlator") +class RemoteNode(): + def __init__(self, nodename, dirpath, capacity, mode, active=False): + self.nodename = nodename + self.dirpath = dirpath + self.capacity = int(capacity) + self.mode = int(mode) + self.active = active class RemoteDataHandler(): @@ -64,14 +68,13 @@ def __init__(self, cfg_file): log.warning('remote cfg: children section missing, not enabling remote processing') return + # create a list of available nodes, disabling any that are malformed in the config file self.nodenames = [k for k in cfg['children'].keys()] self.nodes = [k.split(',') for k in cfg['children'].values()] - for i in range(len(self.nodes)): - if len(self.nodes[i]) < 3: - print(f'disabling node {self.nodenames[i]} due to missing config') - while len(self.nodes[i]) < 3: - self.nodes[i].append(0) + self.nodes = [RemoteNode(nn,x[0],x[1],x[2]) for nn,x in zip(self.nodenames,self.nodes) if len(x)==3] + self.nodes.append(RemoteNode('localhost', None, -1, -1)) else: + # 'child' mode if 'sftp' not in cfg.sections() or 'key' not in cfg['sftp'] or 'host' not in cfg['sftp'] or 'user' not in cfg['sftp']: log.warning('remote cfg: sftp user, key or host missing, not enabling remote processing') return @@ -90,6 +93,10 @@ def __init__(self, cfg_file): def getSFTPConnection(self): if not self.initialised: return False + + if self.sftp_client: + return True + log.info(f'Connecting to {self.host}:{self.port} as {self.user}....') if not os.path.isfile(os.path.expanduser(self.key)): @@ -114,8 +121,10 @@ def getSFTPConnection(self): def closeSFTPConnection(self): if self.sftp_client: self.sftp_client.close() + self.sftp_client = None if self.ssh_client: self.ssh_client.close() + self.ssh_client = None return ######################################################## @@ -131,7 +140,7 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): """ if not self.initialised or not self.getSFTPConnection(): - return + return False for pth in ['files', 'files/candidates', 'files/phase1', 'files/trajectories', 'files/candidates/processed','files/phase1/processed']: @@ -141,18 +150,13 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): pass try: - readyfile = os.path.join(os.getenv('TMP', default='/tmp'),'ready') - open(readyfile,'w').write('ready') - self.sftp_client.put(readyfile, 'files/ready') - log.info('set ready flag') - rem_dir = f'files/{datatype}' files = self.sftp_client.listdir(rem_dir) files = [f for f in files if '.pickle' in f and 'processing' not in f] if len(files) == 0: log.info('no data available at this time') self.closeSFTPConnection() - return + return False for trajfile in files: fullname = f'{rem_dir}/{trajfile}' @@ -172,7 +176,7 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): log.info(e) self.closeSFTPConnection() - return len(files) + return True def uploadToMaster(self, source_dir, verbose=False): """ @@ -244,75 +248,28 @@ def uploadToMaster(self, source_dir, verbose=False): self.sftp_client.put(local_name, rem_file) log.info('uploaded databases') - self.closeSFTPConnection() return + + def setStopFlag(self, verbose=False): + if not self.initialised or not self.getSFTPConnection(): + return + try: + readyfile = os.path.join(os.getenv('TMP', default='/tmp'),'stop') + open(readyfile,'w').write('stop') + self.sftp_client.put(readyfile, 'files/stop') + except Exception: + log.warning('unable to set stop flag, master will not continue to assign data') + self.closeSFTPConnection() + log.info('set stop flag') - def clearReadyFlag(self, verbose=False): - """ - upload the trajectory pickle and report to a remote host for integration - into the solved dataset - - parameters: - source_dir = root folder containing data, generally dh.output_dir - """ - + def clearStopFlag(self, verbose=False): if not self.initialised or not self.getSFTPConnection(): return try: - self.sftp_client.remove('files/ready') - log.info('removed ready flag') + self.sftp_client.remove('files/stop') + log.info('removed stop flag') except: - log.warning('unable to clear ready flag, master continue to assign data') + pass self.closeSFTPConnection() return - - ######################################################## - # functions used by the master node - - def moveRemoteData(self, output_dir): - """ - Move remotely processed pickle files to their target location in the trajectories area, - making sure we clean up any previously-calculated trajectory and temporary files - """ - - # TODO NEED TO REWORK THIS - phase2_dir = os.path.join(output_dir, 'remoteuploads') - - if os.path.isdir(phase2_dir): - log.info('Checking for remotely calculated trajectories...') - pickles = glob.glob1(phase2_dir, '*.pickle') - - for pick in pickles: - traj = loadPickle(phase2_dir, pick) - phase1_name = traj.pre_mc_longname - traj_dir = f'{output_dir}/trajectories/{phase1_name[:4]}/{phase1_name[:6]}/{phase1_name[:8]}/{phase1_name}' - if os.path.isdir(traj_dir): - shutil.rmtree(traj_dir) - processed_traj_file = os.path.join(output_dir, 'phase1', phase1_name + '_trajectory.pickle_processing') - - if os.path.isfile(processed_traj_file): - log.info(f' Moving {phase1_name} to processed folder...') - dst = os.path.join(output_dir, 'phase1', 'processed', phase1_name + '_trajectory.pickle') - shutil.copyfile(processed_traj_file, dst) - os.remove(processed_traj_file) - - phase2_name = traj.longname - traj_dir = f'{output_dir}/trajectories/{phase2_name[:4]}/{phase2_name[:6]}/{phase2_name[:8]}/{phase2_name}' - mkdirP(traj_dir) - log.info(f' Moving {phase2_name} to {traj_dir}...') - src = os.path.join(phase2_dir, pick) - dst = os.path.join(traj_dir, pick[:15]+'_trajectory.pickle') - - shutil.copyfile(src, dst) - os.remove(src) - - report_file = src.replace('_trajectory.pickle','_report.txt') - if os.path.isfile(report_file): - dst = dst.replace('_trajectory.pickle','_report.txt') - shutil.copyfile(report_file, dst) - os.remove(report_file) - - log.info(f'Moved {len(pickles)} trajectories.') - - return From 458170d2d30f88b55f15dfa85ebb277c41a7886e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Feb 2026 18:25:19 +0000 Subject: [PATCH 059/132] more work on master mode --- wmpl/Trajectory/CorrelateRMS.py | 96 ++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index d3016e06..8077caf0 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -539,22 +539,6 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode log.info("") - # Initialise remote data handling, if the config file is present - remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') - if os.path.isfile(remote_cfg): - log.info('remote data management requested, initialising') - self.RemoteDatahandler = RemoteDataHandler(remote_cfg) - if self.RemoteDatahandler.mode == 'child': - self.RemoteDatahandler.clearStopFlag() - status = self.getRemoteData(verbose=False) - else: - status = self.moveUploadedData(verbose=False) - if not status: - log.info('no remote data yet') - # TODO probably want to loop here looking for data for 10-15 minutes - else: - self.RemoteDatahandler = None - if mcmode != MCMODE_PHASE2: # no need to load the legacy JSON file if we already have the sqlite databases @@ -601,6 +585,22 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.traj_db = None self.observations_db = None + # Initialise remote data handling, if the config file is present + remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') + if os.path.isfile(remote_cfg): + log.info('remote data management requested, initialising') + self.RemoteDatahandler = RemoteDataHandler(remote_cfg) + if self.RemoteDatahandler.mode == 'child': + self.RemoteDatahandler.clearStopFlag() + status = self.getRemoteData(verbose=False) + else: + status = self.moveUploadedData(verbose=False) + if not status: + log.info('no remote data yet') + # TODO probably want to loop here looking for data for 10-15 minutes + else: + self.RemoteDatahandler = None + ### Define country groups to speed up the proceessing ### north_america_group = ["CA", "US", "MX"] @@ -1298,7 +1298,7 @@ def saveTrajectoryResults(self, traj, save_plots): if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: # TODO distribute phase1 pickles here - self.savePhase1Trajectory(traj, self.phase1_dir, traj.pre_mc_longname + '_trajectory.pickle', verbose=True) + self.savePhase1Trajectory(traj, traj.pre_mc_longname + '_trajectory.pickle', verbose=True) elif self.mc_mode & MCMODE_PHASE2: # the MC phase may alter the trajectory details and if later on @@ -1567,9 +1567,11 @@ def moveUploadedData(self, verbose=False): if node.nodename == 'localhost': continue - # merge the databases + # if the remote node upload path doesn't exist skip it if not os.path.isdir(os.path.join(node.dirpath,'files')): continue + + # merge the databases for obsdb_path in glob.glob(os.path.join(node.dirpath,'files','observations*.db')): self.observations_db.mergeObsDatabase(obsdb_path) os.remove(obsdb_path) @@ -1579,31 +1581,40 @@ def moveUploadedData(self, verbose=False): rem_db = TrajectoryDatabase(*os.path.split(trajdb_path)) i = 0 - for i, traj in enumerate(i, rem_db.getTrajNames()): + for i, traj in enumerate(rem_db.getTrajNames()): traj_path, traj_name = os.path.split(traj) local_path = os.path.split(traj_path)[1] targ_path = os.path.join(self.output_dir, traj_path) src_path = os.path.join(node.dirpath,'files', 'trajectories', local_path) src_name = os.path.join(src_path, traj_name) - shutil.copy(src_name, targ_path) + os.makedirs(targ_path, exist_ok=True) + if not os.path.isfile(src_name): + log.info(f'trajectory {src_name} missing') + else: + shutil.copy(src_name, targ_path) src_name = src_name.replace('trajectory.pickle', 'report.txt') - shutil.copy(src_name, targ_path) + if not os.path.isfile(src_name): + log.info(f'report {src_name} missing') + else: + shutil.copy(src_name, targ_path) - shutil.rmtree(src_path) + shutil.rmtree(src_path,ignore_errors=True) + rem_db.closeTrajDatabase() os.remove(trajdb_path) if i > 0: - log.info(f'moved {i} trajectories in {trajdb_path}') + log.info(f'moved {i+1} trajectories in {trajdb_path}') remote_ph1dir = os.path.join(node.dirpath, 'files', 'phase1') - i = 0 - for i, fil in enumerate([x for x in os.listdir(remote_ph1dir) if '.pickle' in x]): - full_name = os.path.join(remote_ph1dir, fil) - shutil.copy(full_name, self.phase1_dir) - os.remove(full_name) - - if i > 0: - log.info(f'moved {i} phase 1 files from {node.nodename}') + if os.path.isdir(remote_ph1dir): + i = 0 + for i, fil in enumerate([x for x in os.listdir(remote_ph1dir) if '.pickle' in x]): + full_name = os.path.join(remote_ph1dir, fil) + shutil.copy(full_name, self.phase1_dir) + os.remove(full_name) + + if i > 0: + log.info(f'moved {i+1} phase 1 files from {node.nodename}') return True @@ -1624,13 +1635,14 @@ def getRemoteData(self, verbose=False): status = False return status - def saveCandidates(self, candidate_trajectories): + def saveCandidates(self, candidate_trajectories, verbose=False): for matched_observations in candidate_trajectories: - ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) - picklename = str(ref_dt.timestamp()) + '.pickle' + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + ctries = '_'.join(list(set([met_obs.station_code[:2] for _, met_obs, _ in matched_observations]))) + picklename = str(ref_dt.timestamp()) + '_' + ctries + '.pickle' # this function can also save a candidate - self.savePhase1Trajectory(matched_observations, picklename, 'candidates', verbose=True) + self.savePhase1Trajectory(matched_observations, picklename, 'candidates', verbose=verbose) log.info("-----------------------") log.info(f'Saved {len(candidate_trajectories)} candidates') @@ -1662,26 +1674,26 @@ def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False while bucket_num not in tested_buckets: bucket_num = secrets.randbelow(len(bucket_list)) bucket = bucket_list[bucket_num] - if verbose: - log.info(f'testing node {bucket.nodename} {bucket.dirpath}, {bucket.capacity}, {bucket.mode}') # if the child isn't in mc mode, skip it if bucket.mode != required_mode and bucket.mode != -1: - log.info(f'wrong mode - {bucket.mode} instead of {required_mode}') tested_buckets.append(bucket_num) continue - tmp_save_dir = os.path.join(bucket.dirpath, 'files', savetype) + if bucket.nodename != 'localhost': + tmp_save_dir = os.path.join(bucket.dirpath, 'files', savetype) + else: + tmp_save_dir = save_dir os.makedirs(tmp_save_dir, exist_ok=True) if os.path.isfile(os.path.join(bucket.dirpath, 'files', 'stop')): tested_buckets.append(bucket_num) - log.info('stopfile present') continue if bucket.capacity < 0 or len(glob.glob(os.path.join(tmp_save_dir, '*.pickle'))) < bucket.capacity: - if verbose: + if bucket.nodename != 'localhost': save_dir = tmp_save_dir - log.info(f'saving {file_name} to {bucket.dirpath}') break tested_buckets.append(bucket_num) + if verbose: + log.info(f'saving {file_name} to {save_dir}') savePickle(traj, save_dir, file_name) From 138d67233dc66eba87daceb5f8181fa3a321733e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Feb 2026 22:25:30 +0000 Subject: [PATCH 060/132] fix bug in unpairing if db is empty --- wmpl/Trajectory/CorrelateDB.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 5ff9f941..bd3a2159 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -85,7 +85,7 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): return True - def unpairObs(self, station_code, obs_id, verbose=False): + def unpairObs(self, station_code, obs_id, obs_date, verbose=False): # if an entry exists, update the status to 0. # this allows us to mark an observation paired, then unpair it later if the solution fails # or we want to force a rerun. @@ -94,6 +94,7 @@ def unpairObs(self, station_code, obs_id, verbose=False): cur = self.dbhandle.cursor() try: + cur.execute(f"insert or ignore into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)") cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") self.dbhandle.commit() except Exception: @@ -130,7 +131,7 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): log.info(f'{row[1]} already exists in target') cur.execute(f'delete from paired_obs where obs_date < {archdate_jd}') - self.commitObsDatabase() + self.dbhandle.commit() cur.close() return @@ -159,7 +160,7 @@ def moveObsJsonRecords(self, paired_obs, dt_range): i += 1 if not i % 100000 and i != 0: log.info(f'moved {i} observations') - self.commitObsDatabase() + self.dbhandle.commit() log.info(f'done - moved {i} observations') log.info('-----------------------------') @@ -180,7 +181,7 @@ def mergeObsDatabase(self, source_db_path): except Exception: log.info('unable to merge child observations') - self.commitObsDatabase() + self.dbhandle.commit() cur.execute("detach database 'sourcedb'") cur.close() return @@ -212,8 +213,9 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal con = sqlite3.connect(db_full_name) cur = con.cursor() if purge_records: - cur.execute('drop table trajectories') - cur.execute('drop table failed_trajectories') + cur.execute('drop table if exists trajectories') + cur.execute('drop table if exists failed_trajectories') + con.commit() res = cur.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") if res.fetchone() is None: cur.execute("""CREATE TABLE trajectories( @@ -463,7 +465,7 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): except Exception: log.warning(f'unable to archive {table_name}') - self.commitTrajDatabase() + self.dbhandle.commit() cur.close() return @@ -485,7 +487,7 @@ def moveFailedTrajectories(self, failed_trajectories, dt_range): if not i % 10000: self.commitTrajDatabase() log.info(f'moved {i} failed_trajectories') - self.commitTrajDatabase() + self.dbhandle.commit() log.info(f'done - moved {i} failed_trajectories') return @@ -508,7 +510,7 @@ def mergeTrajDatabase(self, source_db_path): cur.execute(f'insert or replace into {table_name} select * from sourcedb.{table_name}') except Exception: log.warning(f'unable to merge data from {source_db_path}') - self.commitTrajDatabase() + self.dbhandle.commit() cur.execute("detach database 'sourcedb'") cur.close() return From 69c99ceff3518cb387d8ee7b052a72999f1c3364 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Feb 2026 22:25:58 +0000 Subject: [PATCH 061/132] fix bug in unpairing if db is empty --- wmpl/Trajectory/CorrelateEngine.py | 10 +++++----- wmpl/Trajectory/CorrelateRMS.py | 27 +++++++++++---------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 8f0559ad..13e6b25d 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -860,7 +860,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info(f"Trajectory at {jdt_ref} skipped and added to fails!") if matched_obs: for _, met_obs_temp, _ in matched_obs: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -874,7 +874,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=True) for _, met_obs_temp, _ in matched_obs: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) return False @@ -1318,7 +1318,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): else: for met_obs_temp, _ in candidate_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) log.info("New trajectory solution failed, keeping the old trajectory...") ### ### @@ -1712,7 +1712,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=True) for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) log.info("Trajectory skipped and added to fails!") continue @@ -1778,7 +1778,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 8077caf0..c15b9fbd 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1425,7 +1425,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire if self.checkTrajIfFailed(traj): log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: - self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, verbose=True) + self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) remaining_unpaired -= 1 else: candidate_trajectories.append(cand) @@ -1544,24 +1544,10 @@ def loadPhase1Trajectories(self, max_trajs=1000): log.info(f'File {pick} skipped for now') return dt_beg, dt_end - def uploadToMaster(self, verbose=False): - """ - Used in 'child' mode: this sends solved data back to the master node - """ - # close the databases and upload the data to the master node - self.traj_db.closeTrajDatabase() - self.observations_db.closeObsDatabase() - - self.RemoteDatahandler.uploadToMaster(self.output_dir, verbose=verbose) - - # truncate the tables here so they are clean for the next run - self.traj_db = TrajectoryDatabase(self.db_dir, purge_records=True) - self.observations_db = ObservationDatabase(self.db_dir, purge_records=True) - return - def moveUploadedData(self, verbose=False): """ Used in 'master' mode: this moves uploaded data to the target locations on the server + and merges in the databases """ for node in self.RemoteDatahandler.nodes: if node.nodename == 'localhost': @@ -2075,8 +2061,17 @@ def signal_handler(sig, frame): num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range) if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: + log.info('uploading to master node') + # close the databases and upload the data to the master node + dh.traj_db.closeTrajDatabase() + dh.observations_db.closeObsDatabase() + dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=False) + # truncate the tables here so they are clean for the next run + dh.traj_db = TrajectoryDatabase(dh.db_dir, purge_records=True) + dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) + if mcmode & MCMODE_CANDS: dh.observations_db.closeObsDatabase() From fd38bd5ee5eef8fef0902a615ca16a3faead06f1 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Feb 2026 22:26:40 +0000 Subject: [PATCH 062/132] make uploaded database names unique --- wmpl/Utils/remoteDataHandling.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index c9ebe02d..52195512 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -24,6 +24,8 @@ import paramiko import logging import shutil +import uuid + from configparser import ConfigParser @@ -239,10 +241,11 @@ def uploadToMaster(self, source_dir, verbose=False): log.info(f'uploaded {int(i/2)} trajectories') # finally the databases - for fname in ['observations.db', 'trajectories.db']: - local_name = os.path.join(source_dir, fname) + uuid_str = str(uuid.uuid4()) + for fname in ['observations', 'trajectories']: + local_name = os.path.join(source_dir, f'{fname}.db') if os.path.isfile(local_name): - rem_file = f'files/{fname}' + rem_file = f'files/{fname}-{uuid_str}.db' if verbose: log.info(f'uploading {local_name} to {rem_file}') self.sftp_client.put(local_name, rem_file) From 6aac11ff89d53dc109a17d6215d9cf59e2313964 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Feb 2026 23:52:40 +0000 Subject: [PATCH 063/132] oops, make sure phase1 dir exists even in mc mode --- wmpl/Trajectory/CorrelateRMS.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index c15b9fbd..771053f4 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1592,6 +1592,7 @@ def moveUploadedData(self, verbose=False): log.info(f'moved {i+1} trajectories in {trajdb_path}') remote_ph1dir = os.path.join(node.dirpath, 'files', 'phase1') + os.makedirs(self.phase1_dir, exist_ok=True) if os.path.isdir(remote_ph1dir): i = 0 for i, fil in enumerate([x for x in os.listdir(remote_ph1dir) if '.pickle' in x]): From ce2877bbce6939a537a4a95a01c75d67a57f77f6 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 19:34:19 +0000 Subject: [PATCH 064/132] small bugfixes --- wmpl/Trajectory/CorrelateEngine.py | 31 +++++++++++++++--------------- wmpl/Utils/remoteDataHandling.py | 2 ++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 13e6b25d..58335e1c 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -611,7 +611,7 @@ def initTrajectory(self, jdt_ref, mc_runs, verbose=False): return traj - def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, orig_traj=None): + def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, orig_traj=None, verbose=False): """ Given an initialized Trajectory object with observation, run the solver and automatically reject bad observations. @@ -807,7 +807,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Init a new trajectory object (make sure to use the new reference Julian date) - traj = self.initTrajectory(traj_status.jdt_ref, mc_runs, verbose=False) + traj = self.initTrajectory(traj_status.jdt_ref, mc_runs, verbose=verbose) # Disable Monte Carlo runs until an initial stable set of observations is found traj.monte_carlo = False @@ -860,7 +860,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info(f"Trajectory at {jdt_ref} skipped and added to fails!") if matched_obs: for _, met_obs_temp, _ in matched_obs: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -874,7 +874,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=True) for _, met_obs_temp, _ in matched_obs: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) return False @@ -919,7 +919,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info("Stable set of observations found, computing uncertainties using Monte Carlo...") # Init a new trajectory object (make sure to use the new reference Julian date) - traj = self.initTrajectory(traj_status.jdt_ref, mc_runs, verbose=False) + traj = self.initTrajectory(traj_status.jdt_ref, mc_runs, verbose=verbose) # Enable Monte Carlo traj.monte_carlo = True @@ -1064,7 +1064,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or return successful_traj_fit - def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): + def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, verbose=False): """ Run meteor corellation using available data. Keyword arguments: @@ -1260,6 +1260,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): ref_dt=jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)) obs_new.id = met_obs.id obs_new.station_code = met_obs.station_code + obs_new.mean_dt = met_obs.mean_dt # Get an observation from the trajectory object with the maximum convergence angle to # the reference observations @@ -1304,7 +1305,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): # Re-run the trajectory fit # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds - successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, orig_traj=traj_reduced) + successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, orig_traj=traj_reduced, verbose=verbose) # If the new trajectory solution succeeded, remove the now-paired observations if successful_traj_fit: @@ -1318,7 +1319,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): else: for met_obs_temp, _ in candidate_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) log.info("New trajectory solution failed, keeping the old trajectory...") ### ### @@ -1565,7 +1566,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): log.info('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") - self.dh.saveCandidates(candidate_trajectories) + self.dh.saveCandidates(candidate_trajectories, verbose=verbose) return len(candidate_trajectories) else: log.info("-----------------------") @@ -1700,7 +1701,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) jdt_ref = datetime2JD(ref_dt) - failed_traj = self.initTrajectory(jdt_ref, 0, verbose=False) + failed_traj = self.initTrajectory(jdt_ref, 0, verbose=verbose) for obs_temp, met_obs, _ in matched_observations: failed_traj.infillWithObs(obs_temp) @@ -1712,7 +1713,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=True) for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) log.info("Trajectory skipped and added to fails!") continue @@ -1743,7 +1744,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): # Init the solver (use the earliest date as the reference) ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) jdt_ref = datetime2JD(ref_dt) - traj = self.initTrajectory(jdt_ref, mc_runs, verbose=False) + traj = self.initTrajectory(jdt_ref, mc_runs, verbose=verbose) # Feed the observations into the trajectory solver @@ -1778,11 +1779,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) + self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used - result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations) + result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations, verbose=verbose) traj_solved_count += int(result) # end of if mcmode != MCMODE_PHASE2 @@ -1812,7 +1813,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL): mc_runs = int(np.ceil(mc_runs/self.traj_constraints.mc_cores)*self.traj_constraints.mc_cores) # pass in matched_observations here so that solveTrajectory can mark them unpaired if the solver fails - result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations, orig_traj=traj) + result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations, orig_traj=traj, verbose=verbose) traj_solved_count += int(result) # end of "for matched_observations in candidate_trajectories" diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 52195512..e83e3234 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -75,6 +75,8 @@ def __init__(self, cfg_file): self.nodes = [k.split(',') for k in cfg['children'].values()] self.nodes = [RemoteNode(nn,x[0],x[1],x[2]) for nn,x in zip(self.nodenames,self.nodes) if len(x)==3] self.nodes.append(RemoteNode('localhost', None, -1, -1)) + activenodes = [n.nodename for n in self.nodes if n.capacity!=0] + log.info(f' using nodes {activenodes}') else: # 'child' mode if 'sftp' not in cfg.sections() or 'key' not in cfg['sftp'] or 'host' not in cfg['sftp'] or 'user' not in cfg['sftp']: From 3622d0c2cda5803247b1070a52123ba753400f08 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 19:36:57 +0000 Subject: [PATCH 065/132] remove some excess verbosity --- wmpl/Trajectory/CorrelateEngine.py | 8 ++++---- wmpl/Trajectory/CorrelateRMS.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 58335e1c..147941a9 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -856,7 +856,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or if skip_trajectory: # Add the trajectory to the list of failed trajectories - self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=True) + self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) log.info(f"Trajectory at {jdt_ref} skipped and added to fails!") if matched_obs: for _, met_obs_temp, _ in matched_obs: @@ -872,7 +872,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info("2 station only solution, one station has an error above the maximum limit, skipping!") # Add the trajectory to the list of failed trajectories - self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=True) + self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=verbose) for _, met_obs_temp, _ in matched_obs: self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) return False @@ -974,7 +974,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories if mcmode != MCMODE_PHASE2: - self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=True) + self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) log.info('Trajectory failed to solve') self.dh.cleanupPhase2TempPickle(save_traj) return False @@ -1710,7 +1710,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if t0 != 0.0: failed_traj.jdt_ref = failed_traj.jdt_ref + t0/86400.0 - self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=True) + self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=verbose) for _, met_obs_temp, _ in matched_observations: self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 771053f4..3fcca36f 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1270,7 +1270,7 @@ def generateTrajOutputDirectoryPath(self, traj, make_dirs=False): return out_path - def saveTrajectoryResults(self, traj, save_plots): + def saveTrajectoryResults(self, traj, save_plots, verbose=False): """ Save trajectory results to the disk. """ @@ -1298,7 +1298,7 @@ def saveTrajectoryResults(self, traj, save_plots): if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: # TODO distribute phase1 pickles here - self.savePhase1Trajectory(traj, traj.pre_mc_longname + '_trajectory.pickle', verbose=True) + self.savePhase1Trajectory(traj, traj.pre_mc_longname + '_trajectory.pickle', verbose=verbose) elif self.mc_mode & MCMODE_PHASE2: # the MC phase may alter the trajectory details and if later on @@ -1393,7 +1393,7 @@ def cleanupPhase2TempPickle(self, traj, success=False): savePickle(traj, os.path.join(self.phase1_dir, 'processed'), fldr_name + '_trajectory.pickle') return - def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired): + def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired, verbose=False): # go through the candidates and check if they correspond to already-failed candidate_trajectories=[] @@ -1425,7 +1425,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire if self.checkTrajIfFailed(traj): log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: - self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=True) + self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) remaining_unpaired -= 1 else: candidate_trajectories.append(cand) From 67fefbef1096cdc117839e14abf3c8f73ee4a940 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 21:06:37 +0000 Subject: [PATCH 066/132] indicate how many are being solved --- wmpl/Trajectory/CorrelateEngine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 147941a9..3cd9af0b 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1612,9 +1612,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver candidate_trajectories = self.dh.phase1Trajectories # end of "if mcmode == MCMODE_PHASE2" + num_traj = len(candidate_trajectories) log.info("") log.info("-----------------------") - log.info(f'SOLVING {len(candidate_trajectories)} TRAJECTORIES {mcmodestr}') + log.info(f'SOLVING {num_traj} TRAJECTORIES {mcmodestr}') log.info("-----------------------") log.info("") @@ -1623,7 +1624,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("") log.info("-----------------------") - log.info(f'processing candidate {i+1}') + log.info(f'processing {"candidate" if mcmode==MCMODE_PHASE1 else "trajectory"} {i+1}/{num_traj}') # if mcmode is not 2, prepare to calculate the intersecting planes solutions From c4eecace6a3f8182746010e14ea46c45bf51ce7f Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 22:34:51 +0000 Subject: [PATCH 067/132] bugfix to handle nans in the database --- wmpl/Trajectory/CorrelateDB.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index bd3a2159..674d3dca 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -8,6 +8,7 @@ import datetime import json import shutil +import numpy as np from wmpl.Utils.TrajConversions import datetime2JD, jd2Date @@ -223,9 +224,9 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal traj_id VARCHAR UNIQUE, traj_file_path VARCHAR, participating_stations VARCHAR, + ignored_stations VARCHAR, radiant_eci_mini VARCHAR, state_vect_mini VARCHAR, - ignored_stations VARCHAR, phase_1_only INTEGER, v_init REAL, gravity_factor REAL, @@ -384,6 +385,7 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver cur.close() trajs = [] for rw in rows: + rw = [np.nan if x == 'NaN' else x for x in rw] json_dict = {'jdt_ref':rw[0], 'traj_id':rw[1], 'traj_file_path':os.path.join(output_dir, rw[2]), 'participating_stations': json.loads(rw[3]), 'ignored_stations': json.loads(rw[4]), From 92f865a2deb313f660fd9f30667bd56fdc69e2f9 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 22:35:18 +0000 Subject: [PATCH 068/132] add log suffix --- wmpl/Trajectory/CorrelateRMS.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 3fcca36f..0137671f 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1773,7 +1773,7 @@ def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False help="Use best N stations in the solution (default is use 15 stations).") arg_parser.add_argument('--mcmode', '--mcmode', type=int, default=0, - help="Run just simple soln (1), just monte-carlos (2) or both (0, default).") + help="Operation mode - see readme. For standalone solving either don't set this or set it to 0") arg_parser.add_argument('--archiveoldrecords', '--archiveoldrecords', type=int, default=3, help="Months back to archive old data. Default 3. Zero means don't archive (useful in testing).") @@ -1786,6 +1786,8 @@ def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False arg_parser.add_argument('--verbose', '--verbose', help='Verbose logging.', default=False, action="store_true") + arg_parser.add_argument('--addlogsuffix', '--addlogsuffix', help='add a suffix to the log to show what stage it is.', default=False, action="store_true") + # Parse the command line arguments cml_args = arg_parser.parse_args() @@ -1834,6 +1836,12 @@ def signal_handler(sig, frame): # Init the file handler timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"correlate_rms_{timestamp}.log") + if cml_args.logsuffix: + modestrs = {4:'cands', 1:'simple', 2:'mcphase', 5:'candsimple', 3:'simplemc',7:'full',0:'full'} + if cml_args.mcmode in modestrs.keys(): + modestr = modestrs[cml_args.mcmode] + log_file = os.path.join(log_dir, f"correlate_rms_{timestamp}_{modestr}.log") + file_handler = logging.handlers.TimedRotatingFileHandler(log_file, when="midnight", backupCount=7) file_handler.setFormatter(log_formatter) log.addHandler(file_handler) From b2710374e3c30bd459b25e5a6cdb8a9521c43e09 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 22:37:40 +0000 Subject: [PATCH 069/132] add log suffix properly --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 0137671f..ac2662a1 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1836,7 +1836,7 @@ def signal_handler(sig, frame): # Init the file handler timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"correlate_rms_{timestamp}.log") - if cml_args.logsuffix: + if cml_args.addlogsuffix: modestrs = {4:'cands', 1:'simple', 2:'mcphase', 5:'candsimple', 3:'simplemc',7:'full',0:'full'} if cml_args.mcmode in modestrs.keys(): modestr = modestrs[cml_args.mcmode] From 1d366bf231e37c48292949113fd1fcf8d8895f0b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 23:39:46 +0000 Subject: [PATCH 070/132] catch SFTP fails --- wmpl/Utils/remoteDataHandling.py | 39 ++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index e83e3234..09c88ca9 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -25,6 +25,7 @@ import logging import shutil import uuid +import time from configparser import ConfigParser @@ -123,13 +124,26 @@ def getSFTPConnection(self): return False def closeSFTPConnection(self): - if self.sftp_client: + if self.sftp_client: self.sftp_client.close() self.sftp_client = None if self.ssh_client: self.ssh_client.close() self.ssh_client = None return + + def putWithRetry(self, local_name, remname): + i = 0 + while i < 10: + try: + self.sftp_client.put(local_name, remname) + except Exception as e: + time.sleep(1) + i += 1 + if i == 10: + log.warning(f'upload of {local_name} failed after 10 retries') + log.warning(e) + return ######################################################## # functions used by the client nodes @@ -167,11 +181,16 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): localname = os.path.join(output_dir, datatype, trajfile) if verbose: log.info(f'downloading {fullname} to {localname}') - self.sftp_client.get(fullname, localname) - try: - self.sftp_client.rename(fullname, f'{rem_dir}/processed/{trajfile}') - except: - self.sftp_client.remove(fullname) + i = 0 + while i < 10: + try: + self.sftp_client.get(fullname, localname) + except: + i += 1 + try: + self.sftp_client.rename(fullname, f'{rem_dir}/processed/{trajfile}') + except: + self.sftp_client.remove(fullname) log.info(f'Obtained {len(files)} {"trajectories" if datatype=="phase1" else "candidates"}') @@ -213,7 +232,7 @@ def uploadToMaster(self, source_dir, verbose=False): remname = f'files/phase1/{fil}' if verbose: log.info(f'uploading {local_name} to {remname}') - self.sftp_client.put(local_name, remname) + self.putWithRetry(local_name, remname) if os.path.isfile(os.path.join(proc_dir, fil)): os.remove(os.path.join(proc_dir, fil)) shutil.move(local_name, proc_dir) @@ -236,7 +255,7 @@ def uploadToMaster(self, source_dir, verbose=False): rem_file = f'{rem_path}/{fil}' if verbose: log.info(f'uploading {local_name} to {rem_file}') - self.sftp_client.put(local_name, rem_file) + self.putWithRetry(local_name, rem_file) i += 1 shutil.rmtree(traj_dir, ignore_errors=True) if i > 0: @@ -250,7 +269,7 @@ def uploadToMaster(self, source_dir, verbose=False): rem_file = f'files/{fname}-{uuid_str}.db' if verbose: log.info(f'uploading {local_name} to {rem_file}') - self.sftp_client.put(local_name, rem_file) + self.putWithRetry(local_name, rem_file) log.info('uploaded databases') self.closeSFTPConnection() @@ -262,7 +281,7 @@ def setStopFlag(self, verbose=False): try: readyfile = os.path.join(os.getenv('TMP', default='/tmp'),'stop') open(readyfile,'w').write('stop') - self.sftp_client.put(readyfile, 'files/stop') + self.putWithRetry(readyfile, 'files/stop') except Exception: log.warning('unable to set stop flag, master will not continue to assign data') self.closeSFTPConnection() From 333b48ecde0d8d9f733080725b534b3dddeb3661 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Feb 2026 23:41:26 +0000 Subject: [PATCH 071/132] more small changes to protect against sftp fails --- wmpl/Utils/remoteDataHandling.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 09c88ca9..2b625804 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -186,11 +186,15 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): try: self.sftp_client.get(fullname, localname) except: + time.sleep(1) i += 1 + try: + self.sftp_client.rename(fullname, f'{rem_dir}/processed/{trajfile}') + except: try: - self.sftp_client.rename(fullname, f'{rem_dir}/processed/{trajfile}') - except: self.sftp_client.remove(fullname) + except: + pass log.info(f'Obtained {len(files)} {"trajectories" if datatype=="phase1" else "candidates"}') From c224ac16de3d4db82b46f67b2d4dc25e5bf83ead Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 11 Feb 2026 14:53:24 +0000 Subject: [PATCH 072/132] defend against sftp failures add some verbosity flags to support more advanced logging --- wmpl/Trajectory/CorrelateRMS.py | 5 ++-- wmpl/Utils/remoteDataHandling.py | 40 +++++++++++++++++++------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index ac2662a1..7f454c36 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -592,12 +592,11 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.RemoteDatahandler = RemoteDataHandler(remote_cfg) if self.RemoteDatahandler.mode == 'child': self.RemoteDatahandler.clearStopFlag() - status = self.getRemoteData(verbose=False) + status = self.getRemoteData(verbose=True) else: status = self.moveUploadedData(verbose=False) if not status: log.info('no remote data yet') - # TODO probably want to loop here looking for data for 10-15 minutes else: self.RemoteDatahandler = None @@ -1805,7 +1804,7 @@ def signal_handler(sig, frame): remote_cfg = os.path.join(db_dir, 'wmpl_remote.cfg') if os.path.isfile(remote_cfg): rdh = RemoteDataHandler(remote_cfg) - if rdh.mode == 'child': + if rdh and rdh.mode == 'child': rdh.setStopFlag() sys.exit(0) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 2b625804..bd433ac1 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -95,7 +95,7 @@ def __init__(self, cfg_file): self.initialised = True return - def getSFTPConnection(self): + def getSFTPConnection(self, verbose=False): if not self.initialised: return False @@ -109,11 +109,20 @@ def getSFTPConnection(self): return False self.ssh_client = paramiko.SSHClient() + if verbose: + log.info('created paramiko ssh client....') self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) pkey = paramiko.RSAKey.from_private_key_file(self.key) try: - self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, pkey=pkey, look_for_keys=False) + if verbose: + log.info('connecting....') + self.ssh_client.connect(hostname=self.host, username=self.user, port=self.port, + pkey=pkey, look_for_keys=False, timeout=10) + if verbose: + log.info('connected....') self.sftp_client = self.ssh_client.open_sftp() + if verbose: + log.info('created client') return True except Exception as e: @@ -133,16 +142,14 @@ def closeSFTPConnection(self): return def putWithRetry(self, local_name, remname): - i = 0 - while i < 10: + for i in range(10): try: self.sftp_client.put(local_name, remname) - except Exception as e: + break + except Exception: time.sleep(1) - i += 1 - if i == 10: - log.warning(f'upload of {local_name} failed after 10 retries') - log.warning(e) + if i == 10: + log.warning(f'upload of {local_name} failed after 10 retries') return ######################################################## @@ -157,7 +164,7 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): output_dir = folder to put the pickles into generally dh.output_dir """ - if not self.initialised or not self.getSFTPConnection(): + if not self.initialised or not self.getSFTPConnection(verbose=verbose): return False for pth in ['files', 'files/candidates', 'files/phase1', 'files/trajectories', @@ -181,13 +188,12 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): localname = os.path.join(output_dir, datatype, trajfile) if verbose: log.info(f'downloading {fullname} to {localname}') - i = 0 - while i < 10: + for i in range(10): try: self.sftp_client.get(fullname, localname) - except: + break + except Exception: time.sleep(1) - i += 1 try: self.sftp_client.rename(fullname, f'{rem_dir}/processed/{trajfile}') except: @@ -214,7 +220,7 @@ def uploadToMaster(self, source_dir, verbose=False): source_dir = root folder containing data, generally dh.output_dir """ - if not self.initialised or not self.getSFTPConnection(): + if not self.initialised or not self.getSFTPConnection(verbose=verbose): return for pth in ['files', 'files/candidates', 'files/phase1', 'files/trajectories', @@ -285,11 +291,13 @@ def setStopFlag(self, verbose=False): try: readyfile = os.path.join(os.getenv('TMP', default='/tmp'),'stop') open(readyfile,'w').write('stop') - self.putWithRetry(readyfile, 'files/stop') + self.sftp_client.put(readyfile, 'files/stop') except Exception: log.warning('unable to set stop flag, master will not continue to assign data') + time.sleep(2) self.closeSFTPConnection() log.info('set stop flag') + return def clearStopFlag(self, verbose=False): if not self.initialised or not self.getSFTPConnection(): From 47a2d27f03ee57bdcf1b45cf73426fa5d5e22b72 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 11 Feb 2026 21:55:35 +0000 Subject: [PATCH 073/132] bugfix to prevent phase2 crash merging data --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 7f454c36..0c08a94d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1549,7 +1549,7 @@ def moveUploadedData(self, verbose=False): and merges in the databases """ for node in self.RemoteDatahandler.nodes: - if node.nodename == 'localhost': + if node.nodename == 'localhost' or self.observations_db is None or self.traj_db is None: continue # if the remote node upload path doesn't exist skip it From 6da8bbba180a061856183cf3492e0ea2cc62cbb2 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 11 Feb 2026 21:55:49 +0000 Subject: [PATCH 074/132] add function to copy json data over --- wmpl/Trajectory/CorrelateDB.py | 100 ++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 25 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 674d3dca..2965e6db 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -192,10 +192,13 @@ def mergeObsDatabase(self, source_db_path): class DummyTrajReduced(): # a dummy class for use in a couple of fuctions in the TrajectoryDatabase - def __init__(self, jdt_ref, traj_id, traj_file_path): - self.jdt_ref = jdt_ref - self.traj_id = traj_id - self.traj_file_path = traj_file_path + def __init__(self, jdt_ref=None, traj_id=None, traj_file_path=None, json_dict=None): + if json_dict is None: + self.jdt_ref = jdt_ref + self.traj_id = traj_id + self.traj_file_path = traj_file_path + else: + self.__dict__ = json_dict class TrajectoryDatabase(): @@ -440,7 +443,7 @@ def removeDeletedTrajectories(self, output_dir, jdt_start, jdt_end=None, failed= if not os.path.isfile(os.path.join(output_dir, rw[2])): if verbose: log.info(f'removing traj {jd2Date(rw[0], dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} from database') - self.removeTrajectory(DummyTrajReduced(rw[0], rw[1], rw[2]), keepFolder=True) + self.removeTrajectory(DummyTrajReduced(jdt_ref=rw[0], traj_id=rw[1], traj_file_path=rw[2]), keepFolder=True) i += 1 log.info(f'removed {i} deleted trajectories') return @@ -517,8 +520,30 @@ def mergeTrajDatabase(self, source_db_path): cur.close() return +################################################################################## +# dummy classes for moving data from the old JSON database. Created here to +# avoid a circular import + + +class dummyDatabaseJSON(): + def __init__(self, db_dir, dt_range=None): + self.db_file_path = os.path.join(db_dir, 'processed_trajectories.json') + self.paired_obs = {} + self.failed_trajectories = {} + if os.path.exists(self.db_file_path): + self.__dict__ = json.load(open(self.db_file_path)) + if hasattr(self, 'trajectories'): + # Convert trajectories from JSON to TrajectoryReduced objects + traj_dict = getattr(self, "failed_trajectories") + trajectories_obj_dict = {} + for traj_json in traj_dict: + traj_reduced_tmp = DummyTrajReduced(json_dict=traj_dict[traj_json]) + trajectories_obj_dict[traj_reduced_tmp.jdt_ref] = traj_reduced_tmp + setattr(self, "failed_trajectories", trajectories_obj_dict) + + +################################################################################## -############################################################ if __name__ == '__main__': arg_parser = argparse.ArgumentParser(description="""Automatically compute trajectories from RMS data in the given directory.""", @@ -562,25 +587,50 @@ def mergeTrajDatabase(self, source_db_path): console_handler.setFormatter(log_formatter) log.addHandler(console_handler) - dbname = cml_args.database.lower() + if cml_args.database: + dbname = cml_args.database.lower() action = cml_args.action.lower() - if dbname == 'observations': - obsdb = ObservationDatabase(cml_args.dir_path) - if action == 'read': - cur = obsdb.dbhandle.cursor() - cur.execute('select * from paired_obs where status=1') - print(f'there are {len(cur.fetchall())} paired obs') - cur.execute('select * from paired_obs where status=0') - print(f'and {len(cur.fetchall())} unpaired obs') - obsdb.closeObsDatabase() - elif dbname == 'trajectories': - trajdb = TrajectoryDatabase(cml_args.dir_path) - if action == 'read': - cur = trajdb.dbhandle.cursor() - cur.execute('select * from trajectories where status=1') - print(f'there are {len(cur.fetchall())} successful trajectories') - cur.execute('select * from failed_trajectories') - print(f'and {len(cur.fetchall())} failed trajectories') + dt_range = None + if cml_args.timerange is not None: + time_beg, time_end = cml_args.timerange.strip("(").strip(")").split(",") + dt_beg = datetime.datetime.strptime(time_beg, "%Y%m%d-%H%M%S").replace(tzinfo=datetime.timezone.utc) + dt_end = datetime.datetime.strptime(time_end, "%Y%m%d-%H%M%S").replace(tzinfo=datetime.timezone.utc) + log.info("Custom time range:") + log.info(" BEG: {:s}".format(str(dt_beg))) + log.info(" END: {:s}".format(str(dt_end))) + dt_range = [dt_beg, dt_end] + + + if action == 'copy': + if dt_range is None: + log.info('Date range must be provided for copy operation') + else: + dt_range_jd = [datetime2JD(dt_range[0]),datetime2JD(dt_range[1])] + jsondb = dummyDatabaseJSON(db_dir=cml_args.dir_path) + obsdb = ObservationDatabase(cml_args.dir_path) + obsdb.moveObsJsonRecords(jsondb.paired_obs, dt_range) + obsdb.closeObsDatabase() + trajdb = TrajectoryDatabase(cml_args.dir_path) + trajdb.moveFailedTrajectories(jsondb.failed_trajectories, dt_range) + trajdb.closeTrajDatabase() else: - log.info('valid database not specified') + if dbname == 'observations': + obsdb = ObservationDatabase(cml_args.dir_path) + if action == 'status': + cur = obsdb.dbhandle.cursor() + cur.execute('select * from paired_obs where status=1') + print(f'there are {len(cur.fetchall())} paired obs') + cur.execute('select * from paired_obs where status=0') + print(f'and {len(cur.fetchall())} unpaired obs') + + elif dbname == 'trajectories': + trajdb = TrajectoryDatabase(cml_args.dir_path) + if action == 'read': + cur = trajdb.dbhandle.cursor() + cur.execute('select * from trajectories where status=1') + print(f'there are {len(cur.fetchall())} successful trajectories') + cur.execute('select * from failed_trajectories') + print(f'and {len(cur.fetchall())} failed trajectories') + else: + log.info('valid database not specified') From 83a2b7263c54fbb97380dba3f330fe2a52f5d77b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 11 Feb 2026 23:14:03 +0000 Subject: [PATCH 075/132] bugfix: only move phase1 if client was doing phase1 --- wmpl/Trajectory/CorrelateRMS.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 0c08a94d..fe6c4745 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1590,9 +1590,10 @@ def moveUploadedData(self, verbose=False): if i > 0: log.info(f'moved {i+1} trajectories in {trajdb_path}') + # if we're in mode 1 then move any uploaded phase1 solutions remote_ph1dir = os.path.join(node.dirpath, 'files', 'phase1') os.makedirs(self.phase1_dir, exist_ok=True) - if os.path.isdir(remote_ph1dir): + if os.path.isdir(remote_ph1dir) and node.mode==1: i = 0 for i, fil in enumerate([x for x in os.listdir(remote_ph1dir) if '.pickle' in x]): full_name = os.path.join(remote_ph1dir, fil) From 6ece9aba2c2e7efd4097f0473877ee18df9aa3e6 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 00:44:11 +0000 Subject: [PATCH 076/132] make sure folders exist --- wmpl/Utils/remoteDataHandling.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index bd433ac1..dec5c708 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -183,9 +183,12 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): self.closeSFTPConnection() return False + local_dir = os.path.join(output_dir, datatype) + if not os.path.isdir(local_dir): + os.makedirs(local_dir, exist_ok=True) for trajfile in files: fullname = f'{rem_dir}/{trajfile}' - localname = os.path.join(output_dir, datatype, trajfile) + localname = os.path.join(local_dir, trajfile) if verbose: log.info(f'downloading {fullname} to {localname}') for i in range(10): From c9bb9bd277add661c9bb361373ea73f2ad8a0eb7 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 00:44:45 +0000 Subject: [PATCH 077/132] initialise remote data handling in the right place for phase2 --- wmpl/Trajectory/CorrelateRMS.py | 43 +++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index fe6c4745..fc356ab1 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -576,30 +576,19 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.processing_list = self.findUnprocessedFolders(station_list) log.info(" ... done!") + # in phase 1, initialise and collect data second as we load candidates dynamically + self.initialiseRemoteDataHandling() + else: + # in phase 2, initialise and collect data first as we need the phase1 traj on disk already + self.initialiseRemoteDataHandling() - # reload the phase1 trajectories dt_beg, dt_end = self.loadPhase1Trajectories(max_trajs=max_trajs) self.processing_list = None self.dt_range=[dt_beg, dt_end] self.traj_db = None self.observations_db = None - # Initialise remote data handling, if the config file is present - remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') - if os.path.isfile(remote_cfg): - log.info('remote data management requested, initialising') - self.RemoteDatahandler = RemoteDataHandler(remote_cfg) - if self.RemoteDatahandler.mode == 'child': - self.RemoteDatahandler.clearStopFlag() - status = self.getRemoteData(verbose=True) - else: - status = self.moveUploadedData(verbose=False) - if not status: - log.info('no remote data yet') - else: - self.RemoteDatahandler = None - ### Define country groups to speed up the proceessing ### north_america_group = ["CA", "US", "MX"] @@ -623,6 +612,23 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode ### ### + def initialiseRemoteDataHandling(self): + # Initialise remote data handling, if the config file is present + remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') + if os.path.isfile(remote_cfg): + log.info('remote data management requested, initialising') + self.RemoteDatahandler = RemoteDataHandler(remote_cfg) + if self.RemoteDatahandler.mode == 'child': + self.RemoteDatahandler.clearStopFlag() + status = self.getRemoteData(verbose=True) + else: + status = self.moveUploadedData(verbose=False) + if not status: + log.info('no remote data yet') + else: + self.RemoteDatahandler = None + + def purgePhase1ProcessedData(self, dir_path): """ Purge old phase1 processed data if it is older than 90 days. """ @@ -1590,10 +1596,11 @@ def moveUploadedData(self, verbose=False): if i > 0: log.info(f'moved {i+1} trajectories in {trajdb_path}') - # if we're in mode 1 then move any uploaded phase1 solutions + # if the node was in mode 1 then move any uploaded phase1 solutions remote_ph1dir = os.path.join(node.dirpath, 'files', 'phase1') - os.makedirs(self.phase1_dir, exist_ok=True) if os.path.isdir(remote_ph1dir) and node.mode==1: + if not os.path.isdir(self.phase1_dir): + os.makedirs(self.phase1_dir, exist_ok=True) i = 0 for i, fil in enumerate([x for x in os.listdir(remote_ph1dir) if '.pickle' in x]): full_name = os.path.join(remote_ph1dir, fil) From c9ede51d75b1cc36b902d9a5fc56d7cff673cbd8 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 00:51:23 +0000 Subject: [PATCH 078/132] improve logging --- wmpl/Utils/remoteDataHandling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index dec5c708..23bc44cc 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -203,7 +203,7 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): try: self.sftp_client.remove(fullname) except: - pass + log.info(f'unable to rename or remove {fullname}') log.info(f'Obtained {len(files)} {"trajectories" if datatype=="phase1" else "candidates"}') From cda60ba91d0dd9d3a5daaac6705ffb56e227f4f0 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 09:35:45 +0000 Subject: [PATCH 079/132] prevent crash in mcmode 2 --- wmpl/Trajectory/CorrelateRMS.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index fc356ab1..b3de251d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -2079,14 +2079,16 @@ def signal_handler(sig, frame): if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: log.info('uploading to master node') # close the databases and upload the data to the master node - dh.traj_db.closeTrajDatabase() - dh.observations_db.closeObsDatabase() + if mcmode != MCMODE_PHASE2: + dh.traj_db.closeTrajDatabase() + dh.observations_db.closeObsDatabase() dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=False) # truncate the tables here so they are clean for the next run - dh.traj_db = TrajectoryDatabase(dh.db_dir, purge_records=True) - dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) + if mcmode != MCMODE_PHASE2: + dh.traj_db = TrajectoryDatabase(dh.db_dir, purge_records=True) + dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) if mcmode & MCMODE_CANDS: dh.observations_db.closeObsDatabase() From fbd78990f2d3e42e45d54408a7ed263729ea6f92 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 15:05:37 +0000 Subject: [PATCH 080/132] print operation mode more frequently --- wmpl/Trajectory/CorrelateEngine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 3cd9af0b..be2b04de 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1142,7 +1142,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("---------------------------------") log.info("") - log.info(f'mcmode is {mcmode}') + log.info(f'mcmode is {mcmodestr}') # Go though all time bins and split the list of observations for bin_beg, bin_end in dt_bin_list: From 7677accf00393551c19b9eab83dcbaae90b36611 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 15:06:01 +0000 Subject: [PATCH 081/132] update remote data handling to actually move trajectories! --- wmpl/Trajectory/CorrelateRMS.py | 45 ++++++++++++++------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index b3de251d..2ba9469e 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1302,7 +1302,6 @@ def saveTrajectoryResults(self, traj, save_plots, verbose=False): log.info(f'saved {traj.traj_id} to {output_dir}') if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: - # TODO distribute phase1 pickles here self.savePhase1Trajectory(traj, traj.pre_mc_longname + '_trajectory.pickle', verbose=verbose) elif self.mc_mode & MCMODE_PHASE2: @@ -1567,34 +1566,28 @@ def moveUploadedData(self, verbose=False): self.observations_db.mergeObsDatabase(obsdb_path) os.remove(obsdb_path) + for trajdb_path in glob.glob(os.path.join(node.dirpath,'files','trajectories*.db')): self.traj_db.mergeTrajDatabase(trajdb_path) - - rem_db = TrajectoryDatabase(*os.path.split(trajdb_path)) - i = 0 - for i, traj in enumerate(rem_db.getTrajNames()): - traj_path, traj_name = os.path.split(traj) - local_path = os.path.split(traj_path)[1] - targ_path = os.path.join(self.output_dir, traj_path) - src_path = os.path.join(node.dirpath,'files', 'trajectories', local_path) - - src_name = os.path.join(src_path, traj_name) - os.makedirs(targ_path, exist_ok=True) - if not os.path.isfile(src_name): - log.info(f'trajectory {src_name} missing') - else: - shutil.copy(src_name, targ_path) - src_name = src_name.replace('trajectory.pickle', 'report.txt') - if not os.path.isfile(src_name): - log.info(f'report {src_name} missing') - else: - shutil.copy(src_name, targ_path) - - shutil.rmtree(src_path,ignore_errors=True) - rem_db.closeTrajDatabase() os.remove(trajdb_path) - if i > 0: - log.info(f'moved {i+1} trajectories in {trajdb_path}') + + i = 0 + remote_trajdir = os.path.join(node.dirpath, 'files', 'trajectories') + if os.path.isdir(remote_trajdir): + for i,traj in enumerate(os.listdir(remote_trajdir)): + if os.path.isdir(os.path.join(remote_trajdir, traj)): + targ_path = os.path.join(self.output_dir, 'trajectories', traj[:4], traj[:6], traj[:8], traj) + src_path = os.path.join(node.dirpath,'files', 'trajectories', traj) + for src_name in os.path.listdir(src_path): + src_name = os.path.join(src_path, src_name) + if not os.path.isfile(src_name): + log.info(f'{src_name} missing') + else: + os.makedirs(targ_path, exist_ok=True) + shutil.copy(src_name, targ_path) + shutil.rmtree(src_path,ignore_errors=True) + if i > 0: + log.info(f'moved {i+1} trajectories') # if the node was in mode 1 then move any uploaded phase1 solutions remote_ph1dir = os.path.join(node.dirpath, 'files', 'phase1') From 21c94b4ed4d9910022907071ad5f819e7f50ea95 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 15:08:43 +0000 Subject: [PATCH 082/132] slight improvement in shutdown messaging --- wmpl/Trajectory/CorrelateRMS.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 2ba9469e..4eb99337 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1807,6 +1807,8 @@ def signal_handler(sig, frame): rdh = RemoteDataHandler(remote_cfg) if rdh and rdh.mode == 'child': rdh.setStopFlag() + log.info('DONE') + log.info('======================================') sys.exit(0) signal.signal(signal.SIGINT, signal_handler) From 1b73810b8a962b312a8a991911832c85fbdc0d4d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 15:17:28 +0000 Subject: [PATCH 083/132] bugfix --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 4eb99337..4ac090f5 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1578,7 +1578,7 @@ def moveUploadedData(self, verbose=False): if os.path.isdir(os.path.join(remote_trajdir, traj)): targ_path = os.path.join(self.output_dir, 'trajectories', traj[:4], traj[:6], traj[:8], traj) src_path = os.path.join(node.dirpath,'files', 'trajectories', traj) - for src_name in os.path.listdir(src_path): + for src_name in os.listdir(src_path): src_name = os.path.join(src_path, src_name) if not os.path.isfile(src_name): log.info(f'{src_name} missing') From 8d8a325ae3b60de9d3ebfc1c2a5b6129f2a9ddcc Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 15:59:06 +0000 Subject: [PATCH 084/132] chmod upload folders as needed --- wmpl/Utils/remoteDataHandling.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 23bc44cc..4a249088 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -142,7 +142,7 @@ def closeSFTPConnection(self): return def putWithRetry(self, local_name, remname): - for i in range(10): + for i in range(10): try: self.sftp_client.put(local_name, remname) break @@ -171,6 +171,7 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): 'files/candidates/processed','files/phase1/processed']: try: self.sftp_client.mkdir(pth) + self.sftp_client.chmod(pth, 0o777) except Exception: pass @@ -230,6 +231,7 @@ def uploadToMaster(self, source_dir, verbose=False): 'files/candidates/processed','files/phase1/processed']: try: self.sftp_client.mkdir(pth) + self.sftp_client.chmod(pth, 0o777) except Exception: pass phase1_dir = os.path.join(source_dir, 'phase1') @@ -261,6 +263,7 @@ def uploadToMaster(self, source_dir, verbose=False): rem_path = f'files/trajectories/{os.path.basename(dirpath)}' try: self.sftp_client.mkdir(rem_path) + self.sftp_client.chmod(rem_path, 0o777) except Exception: pass for fil in filenames: From cb20be5c4ee6d09121fdda5f617398f677792464 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 12 Feb 2026 16:03:25 +0000 Subject: [PATCH 085/132] fix perms on upload folders --- wmpl/Utils/remoteDataHandling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 4a249088..9c68e6f4 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -171,9 +171,9 @@ def collectRemoteData(self, datatype, output_dir, verbose=False): 'files/candidates/processed','files/phase1/processed']: try: self.sftp_client.mkdir(pth) - self.sftp_client.chmod(pth, 0o777) except Exception: pass + self.sftp_client.chmod(pth, 0o777) try: rem_dir = f'files/{datatype}' @@ -231,9 +231,10 @@ def uploadToMaster(self, source_dir, verbose=False): 'files/candidates/processed','files/phase1/processed']: try: self.sftp_client.mkdir(pth) - self.sftp_client.chmod(pth, 0o777) except Exception: pass + self.sftp_client.chmod(pth, 0o777) + phase1_dir = os.path.join(source_dir, 'phase1') if os.path.isdir(phase1_dir): # upload any phase1 trajectories @@ -263,9 +264,9 @@ def uploadToMaster(self, source_dir, verbose=False): rem_path = f'files/trajectories/{os.path.basename(dirpath)}' try: self.sftp_client.mkdir(rem_path) - self.sftp_client.chmod(rem_path, 0o777) except Exception: pass + self.sftp_client.chmod(rem_path, 0o777) for fil in filenames: local_name = os.path.join(dirpath, fil) rem_file = f'{rem_path}/{fil}' From db2f17c2b0432f586c697fb5b111e90abaa8e108 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 13 Feb 2026 16:45:11 +0000 Subject: [PATCH 086/132] add some verbosity for debug --- wmpl/Trajectory/CorrelateDB.py | 4 +++- wmpl/Trajectory/CorrelateRMS.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 2965e6db..485e1a2f 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -59,7 +59,7 @@ def closeObsDatabase(self): return - def checkObsPaired(self, station_code, obs_id): + def checkObsPaired(self, station_code, obs_id, verbose=False): # return True if there is an observation with the correct station code, obs id and with status = 1 paired = True @@ -68,6 +68,8 @@ def checkObsPaired(self, station_code, obs_id): if cur.fetchone() is None: paired = False cur.close() + if verbose: + log.info(f'{obs_id} is {"Paired" if paired else "Unpaired"}') return paired diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 4ac090f5..6425cdab 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1165,7 +1165,7 @@ def countryFilter(self, station_code1, station_code2): return True - def findTimePairs(self, met_obs, unpaired_observations, max_toffset): + def findTimePairs(self, met_obs, unpaired_observations, max_toffset, verbose=False): """ Finds pairs in time between the given meteor observations and all other observations from different stations. @@ -1185,6 +1185,9 @@ def findTimePairs(self, met_obs, unpaired_observations, max_toffset): # Go through all meteors from other stations for met_obs2 in unpaired_observations: + if self.observations_db.checkObsPaired(met_obs2.station_code, met_obs2.id, verbose=verbose): + continue + # Take only observations from different stations if met_obs.station_code == met_obs2.station_code: continue @@ -1626,7 +1629,7 @@ def saveCandidates(self, candidate_trajectories, verbose=False): for matched_observations in candidate_trajectories: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) ctries = '_'.join(list(set([met_obs.station_code[:2] for _, met_obs, _ in matched_observations]))) - picklename = str(ref_dt.timestamp()) + '_' + ctries + '.pickle' + picklename = f'{ref_dt.timestamp():.6f}_{ctries}.pickle' # this function can also save a candidate self.savePhase1Trajectory(matched_observations, picklename, 'candidates', verbose=verbose) From e753a1ee8d6af0e6b3c22cc71a70940d6308040d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 13 Feb 2026 16:45:59 +0000 Subject: [PATCH 087/132] bugfix in pairing routine --- wmpl/Trajectory/CorrelateEngine.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index be2b04de..713c3cba 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1341,6 +1341,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if met_obs.processed: continue + if self.dh.observations_db.checkObsPaired(met_obs.station_code, met_obs.id, verbose=verbose): + continue + # Get station platepar reference_platepar = self.dh.getPlatepar(met_obs) obs1 = self.initObservationsObject(met_obs, reference_platepar) @@ -1420,7 +1423,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Mark observations as processed for _, met_obs_temp, _ in matched_observations: met_obs_temp.processed = True - if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): + if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose): remaining_unpaired -= 1 # Store candidate trajectories @@ -1490,8 +1493,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Check if there any any common observations between candidate trajectories and merge them # if that is the case found_match = False + test_ids = [x.id for x in obs_list_test] for obs1 in obs_list_ref: - if obs1 in obs_list_test: + if obs1.id in test_ids: found_match = True break From b12504a346079738e79aa230c925cf68943cf052 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 14 Feb 2026 13:38:20 +0000 Subject: [PATCH 088/132] rename archiveoldrecords to archivemonths --- wmpl/MetSim/GUI.py | 115 ++++-- wmpl/MetSim/MetSimErosion.py | 37 +- wmpl/Trajectory/CorrelateRMS.py | 3 +- wmpl/Utils/CalibrateLuxLC.py | 611 ++++++++++++++++++++++++++++++++ 4 files changed, 736 insertions(+), 30 deletions(-) create mode 100644 wmpl/Utils/CalibrateLuxLC.py diff --git a/wmpl/MetSim/GUI.py b/wmpl/MetSim/GUI.py index 04306263..7e29e475 100644 --- a/wmpl/MetSim/GUI.py +++ b/wmpl/MetSim/GUI.py @@ -1917,8 +1917,16 @@ def extractWake(sr, wake_containers, wake_fraction=0.5, peak_region=20, site_id= # Get the two containers with observations wake_container_ref = wake_containers[ht_ref_idx] - # Find the wake index closest to the given wake height, ignoring nana - wake_res_indx_ref = np.nanargmin(np.abs(ht_ref - sr.leading_frag_height_arr)) + # Find indices where the wake result is not None + valid_wake_indices = [i for i, w in enumerate(sr.wake_results) if w is not None] + + if not valid_wake_indices: + # Should ideally handle this gracefully, but for now fallback to previous behavior which might error or return None + wake_res_indx_ref = np.nanargmin(np.abs(ht_ref - sr.brightest_height_arr)) + else: + # Find the wake index closest to the given wake height, considering only valid wakes + closest_idx_in_valid = np.argmin(np.abs(ht_ref - sr.brightest_height_arr[valid_wake_indices])) + wake_res_indx_ref = valid_wake_indices[closest_idx_in_valid] # Extract the wake results wake_ref = sr.wake_results[wake_res_indx_ref] @@ -2776,10 +2784,15 @@ def __init__(self, traj_path, const_json_file=None, ecsv_files=None, met_path=No if self.wake_meas: self.wake_heights = [] for wake_container in self.wake_meas: - for wake_pt in wake_container.points: - if int(wake_pt.n ) == 0: - self.wake_heights.append([wake_pt.ht, wake_container]) - break + + # Find the point with the minimum lag (closest to n=0) + if len(wake_container.points): + # Sort points by lag n + points_sorted = sorted(wake_container.points, key=lambda x: x.n) + + # Use the height of the first point + self.wake_heights.append([points_sorted[0].ht, wake_container]) + # Sort wake height list by height self.wake_heights = sorted(self.wake_heights, key=lambda x: x[0]) @@ -2819,8 +2832,8 @@ def __init__(self, traj_path, const_json_file=None, ecsv_files=None, met_path=No else: self.wake_plot_ht = self.traj.rbeg_ele # m - self.wake_normalization_method = 'area' - self.wake_align_method = 'none' + self.wake_normalization_method = 'peak' + self.wake_align_method = 'correlate' self.magnitudePlotWakeLines = None @@ -4910,11 +4923,18 @@ def updateWakePlot(self, show_previous=False): sim_wake_exists = False if sr is not None: - # Find the wake index closest to the given wake height - wake_res_indx = np.argmin(np.abs(self.wake_plot_ht - sr.brightest_height_arr)) + # Find indices where the wake result is not None + valid_wake_indices = [i for i, w in enumerate(sr.wake_results) if w is not None] + + wake = None + if valid_wake_indices: + + # Find the wake index closest to the given wake height, considering only valid wakes + closest_idx_in_valid = np.argmin(np.abs(self.wake_plot_ht - sr.brightest_height_arr[valid_wake_indices])) + wake_res_indx = valid_wake_indices[closest_idx_in_valid] - # Get the approprate wake results - wake = sr.wake_results[wake_res_indx] + # Get the appropriate wake results + wake = sr.wake_results[wake_res_indx] if wake is not None: @@ -5069,11 +5089,18 @@ def updateWakePlot(self, show_previous=False): wake = None if sr is not None: - # Find the wake index closest to the given wake height - wake_res_indx = np.argmin(np.abs(plot_ht - sr.brightest_height_arr)) - - # Get the approprate wake results - wake = sr.wake_results[wake_res_indx] + # Find indices where the wake result is not None + valid_wake_indices = [i for i, w in enumerate(sr.wake_results) if w is not None] + + if not valid_wake_indices: + wake = None + else: + # Find the wake index closest to the given wake height, considering only valid wakes + closest_idx_in_valid = np.argmin(np.abs(plot_ht - sr.brightest_height_arr[valid_wake_indices])) + wake_res_indx = valid_wake_indices[closest_idx_in_valid] + + # Get the appropriate wake results + wake = sr.wake_results[wake_res_indx] # Plot the simulated wake @@ -5178,9 +5205,9 @@ def updateWakePlot(self, show_previous=False): - # Enable/disable wake normalization and alignment dpending on availability of simulated data - self.wakeNormalizeGroup.setDisabled(wake is None) - self.wakeAlignGroup.setDisabled(wake is None) + # Enable/disable wake normalization and alignment depending on availability of simulated data + self.wakeNormalizeGroup.setDisabled(not self.wake_on) + self.wakeAlignGroup.setDisabled(not self.wake_on) @@ -5595,6 +5622,12 @@ def runSimulationGUI(self): print('Running simulation...') t1 = time.time() + # Pass the observed wake heights to the simulation to speed it up + if hasattr(self, 'wake_heights') and (self.wake_heights is not None): + self.const.wake_heights = [x[0] for x in self.wake_heights] + else: + self.const.wake_heights = None + # Run the simulation frag_main, results_list, wake_results = runSimulation(self.const, compute_wake=self.wake_on) @@ -6296,19 +6329,31 @@ def saveVideo(self, event): # Get the screen resolution app = QApplication([]) screen = app.primaryScreen() + + # Get the OS display scale factor (e.g., 200% scaling = 2.0) + device_pixel_ratio = screen.devicePixelRatio() + + # Get the logical screen size (already accounts for OS scaling) screen_size = screen.size() screen_width = screen_size.width() screen_height = screen_size.height() + + # Calculate the physical resolution by multiplying by device pixel ratio + physical_height = int(screen_height * device_pixel_ratio) - # Only scale the window if the screen resolution is not 1080p - if screen_height != 1080: + # Only scale the window if the physical screen resolution is not 1080p + if physical_height != 1080: # Compute the scaling factor, taking 1080p as the reference resolution (compute the ratio of # diagonals) # Use the screen height as the reference to avoid issues with very wide screens, and assume that the # screen size ratio is 1.6 (16:10) - screen_width_calc = int(screen_height*1.6) - scaling_factor = np.sqrt(screen_width_calc**2 + screen_height**2) / np.sqrt(1920**2 + 1080**2) + screen_width_calc = int(physical_height*1.6) + scaling_factor = np.sqrt(screen_width_calc**2 + physical_height**2) / np.sqrt(1920**2 + 1080**2) + + # Divide by device pixel ratio to account for OS-level scaling + # This prevents double scaling (resolution scaling * OS scaling) + scaling_factor = scaling_factor / device_pixel_ratio # If the scaling factor is > 1, reduce it by 2% to avoid too large fonts if scaling_factor > 1: @@ -6320,7 +6365,8 @@ def saveVideo(self, event): os.environ["QT_SCALE_FACTOR"] = str(scaling_factor) else: - scaling_factor = 1 + scaling_factor = 1.0 / device_pixel_ratio + os.environ["QT_SCALE_FACTOR"] = str(scaling_factor) # Destroy the QApplication object app.quit() @@ -6332,12 +6378,27 @@ def saveVideo(self, event): # Init PyQt5 window app = QApplication([]) - # Set a font for the whole application + # Set font size compensated for QT_SCALE_FACTOR + # Base is 7pt, divide by scaling_factor so when Qt scales it back up, it's 7pt effective font = QFont() font.setFamily("Arial") - font.setPointSize(int(np.ceil(8/scaling_factor))) + font.setPointSize(int(np.ceil(7 / scaling_factor))) app.setFont(font) + # Scale down checkboxes and radio buttons to compensate for QT_SCALE_FACTOR + checkbox_size = int(26 / scaling_factor) # 26px base size (2x larger than default) + stylesheet = f""" + QCheckBox::indicator {{ + width: {checkbox_size}px; + height: {checkbox_size}px; + }} + QRadioButton::indicator {{ + width: {checkbox_size}px; + height: {checkbox_size}px; + }} + """ + app.setStyleSheet(stylesheet) + # Automatically find all input files if the --all option is given diff --git a/wmpl/MetSim/MetSimErosion.py b/wmpl/MetSim/MetSimErosion.py index 9226c640..e9a50e5e 100644 --- a/wmpl/MetSim/MetSimErosion.py +++ b/wmpl/MetSim/MetSimErosion.py @@ -89,6 +89,9 @@ def __init__(self): # Wake extension from the leading fragment (m) self.wake_extension = 200 + # Specific heights at which the wake should be simulated (m) + self.wake_heights = None + ### ### @@ -683,7 +686,7 @@ def killFragment(const, frag): const.main_mass_exhaustion_ht = frag.h -def ablateAll(fragments, const, compute_wake=False): +def ablateAll(fragments, const, compute_wake=False, wake_heights_queue=None): """ Perform single body ablation of all fragments using the 4th order Runge-Kutta method. Arguments: @@ -692,6 +695,7 @@ def ablateAll(fragments, const, compute_wake=False): Keyword arguments: compute_wake: [bool] If True, the wake profile will be computed. False by default. + wake_heights_queue: [list] A list of heights at which the wake should be computed. None by default. Return: ... @@ -1240,6 +1244,28 @@ def ablateAll(fragments, const, compute_wake=False): leading_frag_dyn_press = None ### Compute the wake profile ### + + # If the specific wake heights are given, check if the current height is below the next wake height + if (wake_heights_queue is not None) and (leading_frag_height is not None): + + # If there are any heights left in the queue + if len(wake_heights_queue): + + # If the current height is below the next wake height, compute the wake + if leading_frag_height <= wake_heights_queue[0]: + compute_wake = True + + # Pop all heights that are above the current height (including the one we just passed) + while len(wake_heights_queue) and (leading_frag_height <= wake_heights_queue[0]): + wake_heights_queue.pop(0) + + else: + compute_wake = False + + else: + compute_wake = False + + if compute_wake and (leading_frag_length is not None): @@ -1354,6 +1380,13 @@ def runSimulation(const, compute_wake=False): if const.rho > const.rho_grain: const.rho_grain = const.rho + + # If the wake heights are given, sort them by height descending + wake_heights_queue = None + if (const.wake_heights is not None) and compute_wake: + wake_heights_queue = sorted(const.wake_heights, reverse=True) + + # Run the simulation until all fragments stop ablating results_list = [] wake_results = [] @@ -1364,7 +1397,7 @@ def runSimulation(const, compute_wake=False): tau_total, tau_main, tau_eroded, brightest_height, brightest_length, brightest_vel, \ leading_frag_height, leading_frag_length, leading_frag_vel, leading_frag_dyn_press, \ mass_total_active, main_mass, main_height, main_length, main_vel, main_dyn_press, \ - wake = ablateAll(fragments, const, compute_wake=compute_wake) + wake = ablateAll(fragments, const, compute_wake=compute_wake, wake_heights_queue=wake_heights_queue) # Track the bottom height of the main fragment if main_height > 0: diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 6425cdab..8f99499b 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1778,7 +1778,7 @@ def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False arg_parser.add_argument('--mcmode', '--mcmode', type=int, default=0, help="Operation mode - see readme. For standalone solving either don't set this or set it to 0") - arg_parser.add_argument('--archiveoldrecords', '--archiveoldrecords', type=int, default=3, + arg_parser.add_argument('--archivemonths', '--archivemonths', type=int, default=3, help="Months back to archive old data. Default 3. Zero means don't archive (useful in testing).") arg_parser.add_argument('--maxtrajs', '--maxtrajs', type=int, default=None, @@ -1800,6 +1800,7 @@ def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False if db_dir is None: db_dir = cml_args.dir_path + # signal handler created inline here as it needs access to db_dir def signal_handler(sig, frame): signal.signal(sig, signal.SIG_IGN) # ignore additional signals log.info('======================================') diff --git a/wmpl/Utils/CalibrateLuxLC.py b/wmpl/Utils/CalibrateLuxLC.py new file mode 100644 index 00000000..cd2c517d --- /dev/null +++ b/wmpl/Utils/CalibrateLuxLC.py @@ -0,0 +1,611 @@ +""" Given raw light curve data, this script will take the trajectory and compute the calibrated light curve.""" + +import os + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import scipy.optimize +import scipy.signal +import scipy.interpolate +import scipy.integrate + +from wmpl.Trajectory.Trajectory import Trajectory +from wmpl.Utils.Math import lineFunc, vectMag, vectNorm, angleBetweenSphericalCoords +from wmpl.Utils.Pickling import loadPickle +from wmpl.Utils.Physics import calcMass, calcRadiatedEnergy +from wmpl.Utils.TrajConversions import jd2Date, geo2Cartesian, cartesian2Geo, eci2RaDec, raDec2AltAz + + + +def atmosphericExtinction(altitude, obs_ht, k=0.2): + """ Compute the atmospheric extinction given the altitude above horizon. + + Arguments: + mag: [float] Magnitude. + altitude: [float] Altitude above horizon in degrees. + obs_ht: [float] Observer height in meters. + + Keyword arguments: + k: [float] Extinction coefficient. Default is 0.2 (V band). + + Returns: + [float] Magnitude correction for extinction. + """ + + # Compute the zenith angle + z = np.radians(90.0 - altitude) + + # Convert height to km + h = obs_ht/1000 + + # Compute the air mass + x = (np.cos(z) + 0.025*np.exp(-11*np.cos(z)))**(-1) + + # Compute the correction + return k*x + + + + + + +if __name__ == "__main__": + + import argparse + + + ### Define the command line arguments ### + arg_parser = argparse.ArgumentParser(description="Calibrate light curve data collected by the lux meter given a trajectory file.") + + arg_parser.add_argument("light_curve_file", help="The light curve file to calibrate.") + + arg_parser.add_argument("trajectory_file", help="The trajectory file to use for calibration.") + + arg_parser.add_argument("lc_peak_ht", help="The height of the light curve peak in km. This will be used to align the light curve with the trajectory.") + + arg_parser.add_argument("time_range", help="The comma-separated relative time range speficying where the fireball is in the LC data. E.g. 2.5,6.0") + + arg_parser.add_argument("obs_geocoords", help="The comma-separated coordinates of the observer (lat,lon,ele) in degrees and meters. If the latitude is negative, put the coordinates in quotes and have a leading space, e.g. \" -45.0,120.0,100\".") + + arg_parser.add_argument("--pointing", help="The pointing direction of the sensor. Assumed zenith by default. If given, it must be a comma-separated list of (az,el) in degrees (azimuth is +E of due N).", default="0,90") + + arg_parser.add_argument("--timehtfit", type=int, choices=[1, 2, 3], default=None, help="If given, a polynomial fit of the specified order (1, 2, or 3) will be used for the time vs height relationship instead of interpolation.") + + arg_parser.add_argument("--tau", type=float, default=None, help="The luminous efficiency of fireballs in %. If given, it will be used instead of the default value of 5% which is appropraite for low speed fireballs.") + + + # Parse the command line arguments + cml_args = arg_parser.parse_args() + + ### ### + + + ### CONSTANTS ### + + # Power of a zero magnitude meteor + P_0M = 1500 + + # Luminous efficacy at 5800 K + LUM_EFFICACY = 0.0079 # 1 lux in W/m^2 + + # Luminous efficiency of fireballs at low speeds + if cml_args.tau is None: + TAU = 5.0/100 + else: + TAU = cml_args.tau/100 + + + ## Sensor parameters + + # Re irradiance responsivity from TSL2591 datasheet, white light on "visible" sensor channel 0 + # The 100 scaling factor is to convert the RE_WHITE_CHANNEL0 from the datsheet units of counts/(μW/cm2) to counts/(W/m2) + RE_WHITE_CHANNEL0 = 264.1*100 + + # High gain factor 428x from https://github.com/adafruit/Adafruit_CircuitPython_TSL2591/blob/main/adafruit_tsl2591.py + GAIN_HIGH = 428 + + ## + + + ### ### + + + + + + # Check that the light curve file exists + if not os.path.exists(cml_args.light_curve_file): + raise ValueError("The light curve file does not exist.") + + # Get the directory path of the light curve file + dir_path = os.path.dirname(os.path.realpath(cml_args.light_curve_file)) + + # Load the lux sensor data (handle both number and text column) + columns = ["Date", "Time", "Lux", "Visible", "IR", "Gain", "IntTime"] + lc_data = pd.read_csv(cml_args.light_curve_file, header=None, delimiter=' ', names=columns) + print(lc_data) + + + # Load the trajectory data + if not os.path.exists(cml_args.trajectory_file): + raise ValueError("The trajectory file does not exist.") + + traj = loadPickle(*os.path.split(cml_args.trajectory_file)) + + # Extract the peak height + peak_ht = float(cml_args.lc_peak_ht) + + + # Get the fireball time range + if "," not in cml_args.time_range: + raise ValueError("The time range must be comma-separated.") + + time_range = np.array(cml_args.time_range.split(','), dtype=float) + + # Extract the observer coordinates + if "," not in cml_args.obs_geocoords: + raise ValueError("The observer coordinates must be comma-separated.") + + obs_lat, obs_lon, obs_ht = np.array(cml_args.obs_geocoords.split(','), dtype=float) + + print("Observer coordinates:") + print(" Lat: {:.5f} deg +N".format(obs_lat)) + print(" Lon: {:.5f} deg +E".format(obs_lon)) + print(" Ele: {:.2f} m".format(obs_ht)) + + + # Read the sensor pointing + if "," not in cml_args.pointing: + raise ValueError("The sensor pointing must be comma-separated.") + + sensor_azim, sensor_alt = np.array(cml_args.pointing.split(','), dtype=float) + + + + # Extract the relative time and lux values + # The columns are: "Date", "Time", "Lux", "Visible", "IR", "Gain", "IntTime" + date_date = lc_data["Date"].values + time_data = lc_data["Time"].values + + + ### EXTRACT LUX VALUES ### + + # # Use the computed lux values + # lux_data = lc_data["Lux"].values + + + # OR: + + # Calculate the gain scaling + # The RE_WHITE_CHANNEL0 measured in the datasheet is measured at high gain, so divide by the GAIN_HIGH factor + # Note: datasheet says the gain scaling for max gain is 9200/400 + gain_scaling = np.array(lc_data["Gain"].values, dtype=float)/GAIN_HIGH + + # Get watts/m2 from visibale data + visible_data = np.array(lc_data["Visible"].values, dtype=float) + watts_per_square_meter = visible_data/(RE_WHITE_CHANNEL0*gain_scaling) + + # Compute the lux from visible data + lux_data = watts_per_square_meter/LUM_EFFICACY + + ### ### + + + + + # Merge the date and time columns and convert to datetime + date_time = pd.to_datetime(date_date + ' ' + time_data) + + # Compute relative time in seconds + time_data = (date_time - date_time[0]).total_seconds().values + + + ### Fit a line to the lux for background subtraction (use soft l1 for outlier rejection) ### + + # Iteratively fit a line to the background + # Initial mask includes the faintest 80% of data to exclude the fireball peak + bg_mask = lux_data < np.percentile(lux_data, 80) + + # Perform 10 iterations + for _ in range(10): + + # Fit a line to the data + bg_params = np.polyfit(time_data[bg_mask], lux_data[bg_mask], 1) + + # Compute the model + bg_model = np.polyval(bg_params, time_data) + + # Compute residuals + residuals = lux_data - bg_model + + # Compute the standard deviation of the residuals + sigma = np.std(residuals[bg_mask]) + + # Update the mask (reject positive outliers > 2 sigma) + # We only care about positive outliers (fireball) + new_bg_mask = residuals < 2*sigma + + # Check if the mask has converged + if np.sum(new_bg_mask) == np.sum(bg_mask): + bg_mask = new_bg_mask + break + + bg_mask = new_bg_mask + + print("Background subtraction parameters: m={:.3e}, k={:.3e} (iterations={:d})".format(bg_params[0], bg_params[1], 10)) + + # Convert to the format expected by lineFunc (m, k) -> (x, m, k) + # np.polyfit returns (m, k) + # lineFunc takes (x, m, k) + # So we can just unpack *bg_params in the next step, but be careful of order. + # np.polyfit returns highest power first, so [slope, intercept]. + # lineFunc definition is m*x + k. So it matches. + + + ### ### + + # Compute background-subtracted lux + lux_data = lux_data - np.polyval(bg_params, time_data) + + + # Only take the data within the specified time range and with lux > 0 + filter_mask = (time_data >= time_range[0]) & (time_data <= time_range[1]) & (lux_data > 0) + time_data_fireball = time_data[filter_mask] + lux_data_fireball = lux_data[filter_mask] + + # Identify the peak lux and its time + peak_lux = np.max(lux_data_fireball) + peak_time = time_data_fireball[np.where(lux_data_fireball == peak_lux)[0][0]] + + + # Plot the extracted lux to check that the fireball data was extracted correctly + + # Plot all data + plt.scatter(time_data, lux_data, c='k', s=1, label="All data") + + # Plot the fireball data + plt.plot(time_data_fireball, lux_data_fireball, color='r', linewidth=1, label="Fireball data") + + # Plot the peak lux with an empty red circle + plt.plot(peak_time, peak_lux, 'ro', markerfacecolor='none', label="Peak\nt = {:.3f} s\nh = {:.2f} km".format(peak_time, peak_ht)) + + # Plot a horizontal zero line + plt.plot([time_data[0], time_data[-1]], [0, 0], 'k--', label='Background') + + plt.xlabel('Time (s)') + plt.ylabel('Apparent illuminance (lux)') + + plt.legend() + + plot_name = os.path.basename(os.path.realpath(cml_args.light_curve_file)).replace(".csv", "_fireball_lux.png") + plt.savefig(os.path.join(dir_path, plot_name), dpi=300) + + plt.show() + + + ### Interpolate the trajectory time vs height ### + + time_data_traj = np.concatenate([obs.time_data for obs in traj.observations]) + ht_data_traj = np.concatenate([obs.model_ht for obs in traj.observations]) + + + # Sort the trajectory by time + sort_idx = np.argsort(time_data_traj) + time_data_traj, ht_data_traj = time_data_traj[sort_idx], ht_data_traj[sort_idx] + + + # Check if a fit should be performed instead of interpolation + if cml_args.timehtfit is not None: + + print("Fitting a polynomial of order {:d} to time vs height...".format(cml_args.timehtfit)) + + # Fit a polynomial to the data + poly_ht = np.poly1d(np.polyfit(time_data_traj, ht_data_traj, cml_args.timehtfit)) + + # Define the height interpolator as a function of time + ht_interp = lambda t: poly_ht(t) + + # Compute the interpolated data for plotting + time_data_interp = np.linspace(time_data_traj[0], time_data_traj[-1], 1000) + ht_data_interp = ht_interp(time_data_interp) + + # Define the inverse interpolator (time as a function of height) + def time_ht_interp(h): + + # Find the roots of the polynomial minus the height + roots = (poly_ht - h).roots + + # Take the real roots + roots = roots[np.isreal(roots)].real + + # Take the root that is within the time range + # If there are multiple, take the one closest to the middle of the time range + if len(roots) > 0: + return roots[np.argmin(np.abs(roots - np.mean(time_data_traj)))] + else: + return np.nan + + else: + + # Interpolate the trajectory time vs height + ht_interp = scipy.interpolate.PchipInterpolator(time_data_traj, ht_data_traj) + + # Compute the interpolated data + time_data_interp = np.linspace(time_data_traj[0], time_data_traj[-1], 1000) + ht_data_interp = ht_interp(time_data_interp) + + # Smooth the interpolated data + ht_data_interp = scipy.signal.savgol_filter(ht_data_interp, 21, 3) + + # Interpolate again after smoothing + ht_interp = scipy.interpolate.PchipInterpolator(time_data_interp, ht_data_interp) + + # Interpolate the inverse, i.e. height vs time (sort the interpolated data by height first) + sort_idx = np.argsort(ht_data_interp) + time_ht_interp = scipy.interpolate.PchipInterpolator(ht_data_interp[sort_idx], time_data_interp[sort_idx]) + + + ### ### + + + # Plot time vs height from the trajectory + for obs in traj.observations: + + plt.scatter(obs.time_data, obs.model_ht/1000, label=obs.station_id, marker='x') + + # Plot the interpolated data + plt.plot(time_data_interp, ht_data_interp/1000, 'r-', label='Interpolated') + + plt.xlabel('Time (s)') + plt.ylabel('Height (km)') + + plt.legend() + + plot_name = os.path.basename(os.path.realpath(cml_args.light_curve_file)).replace(".csv", "_time_vs_ht.png") + plt.savefig(os.path.join(dir_path, plot_name), dpi=300) + + plt.show() + + + ### Interpolate the trajectory time vs length ### + + time_data_traj = np.concatenate([obs.time_data for obs in traj.observations]) + len_data_traj = np.concatenate([obs.state_vect_dist for obs in traj.observations]) + + # Sort the trajectory by time + sort_idx = np.argsort(time_data_traj) + time_data_traj, len_data_traj = time_data_traj[sort_idx], len_data_traj[sort_idx] + + + # Check if a fit should be performed instead of interpolation + if cml_args.timehtfit is not None: + + print("Fitting a polynomial of order {:d} to time vs length...".format(cml_args.timehtfit)) + + # Fit a polynomial to the data + poly_len = np.poly1d(np.polyfit(time_data_traj, len_data_traj, cml_args.timehtfit)) + + # Define the length interpolator as a function of time + len_interp = lambda t: poly_len(t) + + # Compute the interpolated data for plotting + time_data_interp = np.linspace(time_data_traj[0], time_data_traj[-1], 1000) + len_data_interp = len_interp(time_data_interp) + + else: + + # Interpolate the trajectory time vs length + len_interp = scipy.interpolate.PchipInterpolator(time_data_traj, len_data_traj) + + # Compute the interpolated data + time_data_interp = np.linspace(time_data_traj[0], time_data_traj[-1], 1000) + len_data_interp = len_interp(time_data_interp) + + # Smooth the interpolated data + len_data_interp = scipy.signal.savgol_filter(len_data_interp, 21, 3) + + # Interpolate again after smoothing + len_interp = scipy.interpolate.PchipInterpolator(time_data_interp, len_data_interp) + + ### ### + + + # Plot time vs length from the trajectory + for obs in traj.observations: + + plt.scatter(obs.time_data, obs.state_vect_dist/1000, label=obs.station_id, marker='x') + + # Plot the interpolated data + plt.plot(time_data_interp, len_data_interp/1000, 'r-', label='Interpolated') + + plt.xlabel('Time (s)') + plt.ylabel('Trajectory length (km)') + + plt.legend() + + plot_name = os.path.basename(os.path.realpath(cml_args.light_curve_file)).replace(".csv", "_time_vs_len.png") + plt.savefig(os.path.join(dir_path, plot_name), dpi=300) + + plt.show() + + + + ### Compute the corrected light curve ### + + # Find the trajectory time at the peak height + traj_peak_time = time_ht_interp(1000*peak_ht) + + print("Peak time: {:.3f} s".format(peak_time)) + print("Trajectory peak time: {:.3f} s".format(traj_peak_time)) + + # Compute the LC time in the trajectory frame + lc_time_traj = time_data_fireball + traj_peak_time - peak_time + + # Compute the LC height + lc_ht = ht_interp(lc_time_traj) + + # Compute the LC length + lc_len = len_interp(lc_time_traj) + + + ## Compute the ECI coordinates of the fireball over time + + lat_fireball_data = [] + lon_fireball_data = [] + height_fireball_data = [] + range_fireball_data = [] + alt_fireball_data = [] + power_data = [] + extinction_data = [] + abs_mag_fireball_data = [] + + # Get ECI coordinates of the state vector over time + for t, lux in zip(lc_time_traj, lux_data_fireball): + + # Compute the Julian date of the point + jd = traj.jdt_ref + t/86400 + + # Compute the distance traveled from the state vector at the given time + sv_dist = len_interp(t) + + # Compute the ECI coordinates of the fireball at the given time + fireball_eci = traj.state_vect_mini - sv_dist*traj.radiant_eci_mini + + # Compute the height of the fireball above the ground + lat_fireball, lon_fireball, ht_fireball = cartesian2Geo(jd, *fireball_eci) + + + # Compute ECI coordinates of the observer at the given time + obs_eci = np.array(geo2Cartesian(np.radians(obs_lat), np.radians(obs_lon), obs_ht, jd)) + + # Compute the distance between the fireball and the observer + range_fireball = vectMag(fireball_eci - obs_eci) + + + # Compute the vector pointing from the observer to the fireball + fireball_obs_vect = vectNorm(fireball_eci - obs_eci) + + # Compute the apparent altitude of the fireball + ra, dec = eci2RaDec(fireball_obs_vect) + azim, alt = raDec2AltAz(ra, dec, jd, np.radians(obs_lat), np.radians(obs_lon)) + + print() + print("Time: {:.3f} s".format(t)) + print("Fireball range: {:.3f} km".format(range_fireball/1000)) + print("Fireball altitude: {:.3f} deg".format(np.degrees(alt))) + + + # Compute the angular distance between the sensor centre and the fireball + sensor_ang_dist = angleBetweenSphericalCoords(alt, azim, np.radians(sensor_alt), np.radians(sensor_azim)) + + + # Compute sensor sensitivity correction (assuming a simple cosine response) + sensor_corr = 1/np.cos(sensor_ang_dist) + + # Compute the corrected lux + lux_corr = lux*sensor_corr + + # Compute the power over area using an assumed luminous efficacy + power_area = lux_corr*LUM_EFFICACY # W/m^2 + + # Compute the total power emitted by the fireball by applying a range correction + power = power_area*4*np.pi*range_fireball**2 # W + + # Compute the absolute magnitude of the fireball + abs_mag = -2.5*np.log10(power/P_0M) + + # Correct the absolute magnitude for extinction + extinction = atmosphericExtinction(np.degrees(alt), obs_ht) + abs_mag -= extinction + + # Compute the extinction-corrected power + power = P_0M*10**(abs_mag/-2.5) + + print("Power: {:.3f} W".format(power)) + print("Extinction correction: {:.3f} mag".format(extinction)) + print("Absolute magnitude: {:.3f}".format(abs_mag)) + + + # Save the data + lat_fireball_data.append(lat_fireball) + lon_fireball_data.append(lon_fireball) + height_fireball_data.append(ht_fireball) + range_fireball_data.append(range_fireball) + alt_fireball_data.append(alt) + power_data.append(power) + extinction_data.append(extinction) + abs_mag_fireball_data.append(abs_mag) + + + + + + ## + + # Compute the total radiate energy + energy = calcRadiatedEnergy(lc_time_traj, np.array(abs_mag_fireball_data), P_0m=P_0M) + + # Compute the photometric mass + mass = calcMass(lc_time_traj, np.array(abs_mag_fireball_data), traj.orbit.v_avg_norot, tau=TAU, P_0m=P_0M) + + print() + print("-" * 50) + print("Peak absolute magnitude: {:.3f}".format(np.min(abs_mag_fireball_data))) + print("Average velocity: {:.3f} km/s".format(traj.orbit.v_avg_norot/1000)) + print("Radiated energy: {:.3f} J".format(energy)) + print("Assumed luminous efficacy (5800 K): {:.3f} lm/W".format(LUM_EFFICACY)) + print("Assumed luminous efficiency: {:.2f} %".format(100*TAU)) + print("Photometric mass: {:.3f} kg".format(mass)) + + + + # Save the computed parameters to an output file next to the input file + file_name = os.path.basename(os.path.realpath(cml_args.light_curve_file)).replace(".csv", "_calibrated.csv") + + # Compute the reference time + ref_dt = jd2Date(traj.jdt_ref, dt_obj=True).strftime("%Y-%m-%d %H:%M:%S.%f") + + with open(os.path.join(dir_path, file_name), 'w') as f: + f.write("# Reference time: {:s}\n".format(ref_dt)) + f.write("# Time (s), Lat (deg), Lon (deg), Height (m), Range (m), Alt (deg), Power (W), Extinction (mag), Abs mag\n") + + for t, lat, lon, ht, r, alt, p, ext, abs_mag in zip(lc_time_traj, lat_fireball_data, + lon_fireball_data, height_fireball_data, + range_fireball_data, alt_fireball_data, + power_data, extinction_data, + abs_mag_fireball_data): + + f.write("{:.3f}, {:.6f}, {:.6f}, {:.3f}, {:.3f}, {:.6f}, {:.3f}, {:.6f}, {:.6f}\n".format(t, + np.degrees(lat), np.degrees(lon), ht, r, np.degrees(alt), p, ext, abs_mag)) + + + + + fig, (ax_time, ax_ht) = plt.subplots(ncols=2, sharex=True, figsize=(10, 8)) + + ax_time.plot(abs_mag_fireball_data, lc_time_traj, color='k') + ax_time.invert_xaxis() + ax_time.invert_yaxis() + + ax_time.set_xlabel("Absolute magnitude") + ax_time.set_ylabel("Time after {:s} (s)".format(ref_dt)) + + + # Plot the LC in the trajectory frame + ax_ht.plot(abs_mag_fireball_data, lc_ht/1000, color='k') + + ax_ht.set_ylabel("Height (km)") + ax_ht.set_xlabel("Absolute magnitude") + + plt.tight_layout() + + # Save the plot + plot_name = os.path.basename(os.path.realpath(cml_args.light_curve_file)).replace(".csv", "_lc.png") + plt.savefig(os.path.join(dir_path, plot_name), dpi=300) + + plt.show() + + + + ### ### \ No newline at end of file From 4d212afa149cf1c9f2adcb5e4dd1df2a702ab1e1 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 14 Feb 2026 13:53:50 +0000 Subject: [PATCH 089/132] remove unnecessasry flag --- wmpl/Trajectory/Trajectory.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/wmpl/Trajectory/Trajectory.py b/wmpl/Trajectory/Trajectory.py index 7c02e9ba..72cf0f8d 100644 --- a/wmpl/Trajectory/Trajectory.py +++ b/wmpl/Trajectory/Trajectory.py @@ -2757,8 +2757,7 @@ def generateFileName(self): def infillTrajectory(self, meas1, meas2, time_data, lat, lon, ele, station_id=None, excluded_time=None, - ignore_list=None, magnitudes=None, fov_beg=None, fov_end=None, obs_id=None, comment='', ignore_station=False, - verbose=False): + ignore_list=None, magnitudes=None, fov_beg=None, fov_end=None, obs_id=None, comment='', ignore_station=False): """ Initialize a set of measurements for a given station. Arguments: @@ -2836,7 +2835,7 @@ def infillTrajectory(self, meas1, meas2, time_data, lat, lon, ele, station_id=No # Skip the observation if all points were ignored if ignore_list is not None: - if np.all(ignore_list) and verbose: + if np.all(ignore_list): print('All points from station {:s} are ignored, not using this station in the solution!'.format(station_id)) From 22dadc5cc74eb75f7519811ab1158a81f2cb5df6 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 14 Feb 2026 18:30:27 +0000 Subject: [PATCH 090/132] whoops changed param name incorrectly --- wmpl/Trajectory/CorrelateRMS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 8f99499b..2517f910 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1987,7 +1987,7 @@ def signal_handler(sig, frame): dh = RMSDataHandle( cml_args.dir_path, dt_range=event_time_range, db_dir=cml_args.dbdir, output_dir=cml_args.outdir, - mcmode=mcmode, max_trajs=max_trajs, verbose=cml_args.verbose, archivemonths=cml_args.archiveoldrecords) + mcmode=mcmode, max_trajs=max_trajs, verbose=cml_args.verbose, archivemonths=cml_args.archivemonths) # If there is nothing to process and we're in Candidate mode, stop if not dh.processing_list and (mcmode & MCMODE_CANDS): From 86721cd372b6d5e6ca79f2e7843bc88cb5133734 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 14 Feb 2026 21:24:15 +0000 Subject: [PATCH 091/132] initialise obs and traj db --- wmpl/Trajectory/CorrelateRMS.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 2517f910..e007417e 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -581,13 +581,13 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: # in phase 2, initialise and collect data first as we need the phase1 traj on disk already + self.traj_db = None + self.observations_db = None self.initialiseRemoteDataHandling() dt_beg, dt_end = self.loadPhase1Trajectories(max_trajs=max_trajs) self.processing_list = None self.dt_range=[dt_beg, dt_end] - self.traj_db = None - self.observations_db = None ### Define country groups to speed up the proceessing ### From 508f40e7fcd30e2a7216dff2a0ffeaedd87b0695 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 16 Feb 2026 16:33:57 +0000 Subject: [PATCH 092/132] a bit of documentation --- wmpl/Trajectory/CorrelateEngine.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 713c3cba..081520bd 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1426,7 +1426,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose): remaining_unpaired -= 1 - # Store candidate trajectories + # Store candidate trajectory group + # Note that this will include candidate groups that already failed on previous runs. + # We will exclude these later - we can't do it just yet as if new data has arrived, then + # in the next step, the group might be merged with another group creating a solvable set. log.info("") log.info(f" --- ADDING CANDIDATE at {met_obs.reference_dt.isoformat()} ---") candidate_trajectories.append(matched_observations) @@ -1518,10 +1521,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Add observations that weren't present in the reference candidate for entry in traj_cand_test: - # Make sure the added observation is not from a station that's already added - #if entry[1].station_code in ref_stations: - # print('station code already in ref stations') - # continue + # Make sure the added observation is not already added if entry[1] not in obs_list_ref: # Print the reference and the merged radiants @@ -1558,6 +1558,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info('CHECKING FOR ALREADY-FAILED CANDIDATES') log.info("-----------------------") + # okay now we can remove any already-failed combinations. This wasn't safe to do earlier + # because we first needed to see if we could merge any groups. candidate_trajectories, remaining_unpaired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, remaining_unpaired) log.info("-----------------------") From bd31fa05200b93088b51eb9795d2ff58ac181f91 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 16 Feb 2026 16:34:35 +0000 Subject: [PATCH 093/132] check status from merge functions --- wmpl/Trajectory/CorrelateDB.py | 30 ++++++++++++++++++++++++++++-- wmpl/Trajectory/CorrelateRMS.py | 8 ++++---- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 485e1a2f..f9e84062 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -1,3 +1,25 @@ +# The MIT License + +# Copyright (c) 2024 Mark McIntyre + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + """ Python scripts to manage the WMPL SQLite databases """ import os @@ -181,13 +203,15 @@ def mergeObsDatabase(self, source_db_path): try: # bulk-copy cur.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') + status = True except Exception: log.info('unable to merge child observations') + status = False self.dbhandle.commit() cur.execute("detach database 'sourcedb'") cur.close() - return + return status ############################################################ @@ -511,16 +535,18 @@ def mergeTrajDatabase(self, source_db_path): # TODO need to correct the traj_file_path to account for server locations + status = True for table_name in ['trajectories', 'failed_trajectories']: try: # bulk-copy if possible cur.execute(f'insert or replace into {table_name} select * from sourcedb.{table_name}') except Exception: log.warning(f'unable to merge data from {source_db_path}') + status = False self.dbhandle.commit() cur.execute("detach database 'sourcedb'") cur.close() - return + return status ################################################################################## # dummy classes for moving data from the old JSON database. Created here to diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index e007417e..6625eaa4 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1566,13 +1566,13 @@ def moveUploadedData(self, verbose=False): # merge the databases for obsdb_path in glob.glob(os.path.join(node.dirpath,'files','observations*.db')): - self.observations_db.mergeObsDatabase(obsdb_path) - os.remove(obsdb_path) + if self.observations_db.mergeObsDatabase(obsdb_path): + os.remove(obsdb_path) for trajdb_path in glob.glob(os.path.join(node.dirpath,'files','trajectories*.db')): - self.traj_db.mergeTrajDatabase(trajdb_path) - os.remove(trajdb_path) + if self.traj_db.mergeTrajDatabase(trajdb_path): + os.remove(trajdb_path) i = 0 remote_trajdir = os.path.join(node.dirpath, 'files', 'trajectories') From 8de1c47802150d4e109a510073bc4c25071c3afd Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 16 Feb 2026 16:35:10 +0000 Subject: [PATCH 094/132] if connection fails, close gracefully and retry later --- wmpl/Utils/remoteDataHandling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Utils/remoteDataHandling.py b/wmpl/Utils/remoteDataHandling.py index 9c68e6f4..b4125642 100644 --- a/wmpl/Utils/remoteDataHandling.py +++ b/wmpl/Utils/remoteDataHandling.py @@ -129,7 +129,7 @@ def getSFTPConnection(self, verbose=False): log.warning('sftp connection to remote host failed') log.warning(e) - self.ssh_client.close() + self.closeSFTPConnection() return False def closeSFTPConnection(self): From e04f9f4907172df92a726e00c587ef9806cca412 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 16 Feb 2026 17:33:46 +0000 Subject: [PATCH 095/132] write pidfile --- wmpl/Trajectory/CorrelateEngine.py | 24 +++++++++++------ wmpl/Trajectory/CorrelateRMS.py | 42 +++++++++++++++++++----------- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 081520bd..ce61d5d6 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -32,6 +32,21 @@ log = logging.getLogger("traj_correlator") +def getMcModeStr(mcmode, strtype=0): + modestrs = {4:'cands', 1:'simple', 2:'mcphase', 5:'candsimple', 3:'simplemc',7:'full',0:'full'} + fullmodestrs = {4:'CANDIDATE STAGE', 1:'SIMPLE STAGE', 2:'MONTE CARLO STAGE', 7:'FULL',0:'FULL'} + if strtype == 0: + if mcmode in fullmodestrs.keys(): + return fullmodestrs[mcmode] + else: + return 'MIXED' + else: + if mcmode in modestrs.keys(): + return modestrs[mcmode] + else: + return False + + def pickBestStations(obslist, max_stns): """ Find the stations with the best statistics @@ -1074,14 +1089,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver """ # a bit of logging to let readers know what we're doing - if mcmode == MCMODE_PHASE2: - mcmodestr = ' - MONTE CARLO STAGE' - elif mcmode == MCMODE_PHASE1: - mcmodestr = ' - SIMPLE STAGE' - elif mcmode == MCMODE_CANDS: - mcmodestr = ' - CANDIDATE STAGE' - else: - mcmodestr = 'FULL SOLVER' + mcmodestr = getMcModeStr(mcmode, strtype=1) if mcmode != MCMODE_PHASE2: if mcmode & MCMODE_CANDS: diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 6625eaa4..b29ba8f5 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -23,7 +23,7 @@ import secrets from wmpl.Formats.CAMS import loadFTPDetectInfo -from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints +from wmpl.Trajectory.CorrelateEngine import TrajectoryCorrelator, TrajectoryConstraints, getMcModeStr from wmpl.Utils.Math import generateDatetimeBins from wmpl.Utils.OSTools import mkdirP from wmpl.Utils.Pickling import loadPickle, savePickle @@ -1800,6 +1800,25 @@ def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False if db_dir is None: db_dir = cml_args.dir_path + # mcmode values + # mcmode = 1 -> load candidates and do simple solutions + # mcmode = 2 -> load simple solns and do MC solutions + # mcmode = 4 -> find candidates only + # mcmode = 7 -> do everything + # mcmode = 0 -> same as mode 7 + # bitwise combinations are permissioble so: + # 4+1 will find candidates and then run simple solutions to populate "phase1" + # 1+2 will load candidates from "candidates" and solve them completely + + mcmode = MCMODE_ALL if cml_args.mcmode == 0 else cml_args.mcmode + + + mcmodestr = getMcModeStr(mcmode, 1) + pid_file = None + if mcmodestr: + pid_file = os.path.join(db_dir, f'.{mcmodestr}.pid') + open(pid_file,'w').write(f'{os.getpid()}') + # signal handler created inline here as it needs access to db_dir def signal_handler(sig, frame): signal.signal(sig, signal.SIG_IGN) # ignore additional signals @@ -1811,6 +1830,8 @@ def signal_handler(sig, frame): rdh = RemoteDataHandler(remote_cfg) if rdh and rdh.mode == 'child': rdh.setStopFlag() + if os.path.isfile(pid_file): + os.remove(pid_file) log.info('DONE') log.info('======================================') sys.exit(0) @@ -1843,9 +1864,8 @@ def signal_handler(sig, frame): timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"correlate_rms_{timestamp}.log") if cml_args.addlogsuffix: - modestrs = {4:'cands', 1:'simple', 2:'mcphase', 5:'candsimple', 3:'simplemc',7:'full',0:'full'} - if cml_args.mcmode in modestrs.keys(): - modestr = modestrs[cml_args.mcmode] + modestr = getMcModeStr(cml_args.mcmode, 1) + if modestr: log_file = os.path.join(log_dir, f"correlate_rms_{timestamp}_{modestr}.log") file_handler = logging.handlers.TimedRotatingFileHandler(log_file, when="midnight", backupCount=7) @@ -1896,17 +1916,6 @@ def signal_handler(sig, frame): if cml_args.maxerr is not None: trajectory_constraints.max_arcsec_err = cml_args.maxerr - # mcmode values - # mcmode = 1 -> load candidates and do simple solutions - # mcmode = 2 -> load simple solns and do MC solutions - # mcmode = 4 -> find candidates only - # mcmode = 7 -> do everything - # mcmode = 0 -> same as mode 7 - # bitwise combinations are permissioble so: - # 4+1 will find candidates and then run simple solutions to populate "phase1" - # 1+2 will load candidates from "candidates" and solve them completely - - mcmode = MCMODE_ALL if cml_args.mcmode == 0 else cml_args.mcmode # set the maximum number of trajectories to reprocess when doing the MC uncertainties # set a default of 10 for remote processing and 1000 for local processing @@ -1933,6 +1942,7 @@ def signal_handler(sig, frame): # Run processing. If the auto run more is not on, the loop will break after one run previous_start_time = None + while True: # Clock for measuring script time @@ -2108,6 +2118,8 @@ def signal_handler(sig, frame): # clear the remote data ready flag to indicate we're shutting down if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child': dh.RemoteDatahandler.setStopFlag() + if pid_file and os.path.isfile(pid_file): + os.remove(pid_file) break else: From 17ef66d4dbe8515f9fc1a99efd02a51eb0a25b59 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 16 Feb 2026 21:55:42 +0000 Subject: [PATCH 096/132] remove unnecessary saving of temp pickles in ph2 --- wmpl/Trajectory/CorrelateEngine.py | 9 ----- wmpl/Trajectory/CorrelateRMS.py | 64 +++--------------------------- 2 files changed, 5 insertions(+), 68 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index ce61d5d6..e2f8818f 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -923,9 +923,6 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or if mcmode & MCMODE_PHASE2: traj_status = traj - # save the traj in case we need to clean it up - save_traj = traj - # Only proceed if the orbit could be computed if traj.orbit.ra_g is not None: @@ -980,7 +977,6 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or except ValueError as e: log.info("Error during trajectory estimation!") print(e) - self.dh.cleanupPhase2TempPickle(save_traj) return False @@ -991,7 +987,6 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or if mcmode != MCMODE_PHASE2: self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) log.info('Trajectory failed to solve') - self.dh.cleanupPhase2TempPickle(save_traj) return False @@ -1004,7 +999,6 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info("Average velocity outside range: {:.1f} < {:.1f} < {:.1f} km/s, skipping...".format(self.traj_constraints.v_avg_min, traj.orbit.v_avg/1000, self.traj_constraints.v_avg_max)) - self.dh.cleanupPhase2TempPickle(save_traj) return False @@ -1012,14 +1006,12 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or for obs in traj.observations: if (obs.rbeg_ele is None) and (not obs.ignore_station): log.info("Heights from observations failed to be estimated!") - self.dh.cleanupPhase2TempPickle(save_traj) return False # Check that the orbit could be computed if traj.orbit.ra_g is None: log.info("The orbit could not be computed!") - self.dh.cleanupPhase2TempPickle(save_traj) return False # Set the trajectory fit as successful @@ -1044,7 +1036,6 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or else: log.info("The orbit could not be computed!") - self.dh.cleanupPhase2TempPickle(save_traj) return False diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index b29ba8f5..8a8149b2 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -611,7 +611,6 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode ### ### - def initialiseRemoteDataHandling(self): # Initialise remote data handling, if the config file is present remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') @@ -628,7 +627,6 @@ def initialiseRemoteDataHandling(self): else: self.RemoteDatahandler = None - def purgePhase1ProcessedData(self, dir_path): """ Purge old phase1 processed data if it is older than 90 days. """ @@ -661,7 +659,6 @@ def purgePhase1ProcessedData(self, dir_path): return result - def archiveOldRecords(self, older_than=3): """ Archive off old records to keep the database size down @@ -702,8 +699,6 @@ def loadStations(self): return station_list - - def findUnprocessedFolders(self, station_list): """ Go through directories and find folders with unprocessed data. """ @@ -746,8 +741,6 @@ def findUnprocessedFolders(self, station_list): return processing_list - - def initMeteorObs(self, station_code, ftpdetectinfo_path, platepars_recalibrated_dict): """ Init meteor observations from the FTPdetectinfo file and recalibrated platepars. """ @@ -767,8 +760,6 @@ def initMeteorObs(self, station_code, ftpdetectinfo_path, platepars_recalibrated return meteor_list - - def loadUnpairedObservations(self, processing_list, dt_range=None): """ Load unpaired meteor observations, i.e. observations that are not a part of any trajectory. """ @@ -898,7 +889,6 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): log.info(" Finished loading unpaired observations!") return unpaired_met_obs_list - def yearMonthDayDirInDtRange(self, dir_name): """ Given a directory name which is either YYYY, YYYYMM or YYYYMMDD, check if it is in the given @@ -988,8 +978,7 @@ def yearMonthDayDirInDtRange(self, dir_name): return True else: - return False - + return False def trajectoryFileInDtRange(self, file_name, dt_range=None): """ Check if the trajectory file is in the given datetime range. """ @@ -1018,7 +1007,6 @@ def trajectoryFileInDtRange(self, file_name, dt_range=None): else: return False - def removeDeletedTrajectories(self): """ Purge the database of any trajectories that no longer exist on disk. These can arise because the monte-carlo stage may update the data. @@ -1042,7 +1030,6 @@ def removeDeletedTrajectories(self): return - def loadComputedTrajectories(self, dt_range=None): """ Load already estimated trajectories from disk within a date range. @@ -1125,8 +1112,6 @@ def loadComputedTrajectories(self, dt_range=None): dur = (datetime.datetime.now() - start_time).total_seconds() log.info(f" Loaded {counter:6d} trajectories in {dur:.0f} seconds") - - def getComputedTrajectories(self, jd_beg, jd_end): """ Returns a list of computed trajectories between the Julian dates. @@ -1134,21 +1119,17 @@ def getComputedTrajectories(self, jd_beg, jd_end): json_dicts = self.traj_db.getTrajectories(self.output_dir, jd_beg, jd_end) trajs = [TrajectoryReduced(None, json_dict=j) for j in json_dicts] return trajs - def getPlatepar(self, met_obs): """ Return the platepar of the meteor observation. """ return met_obs.platepar - - def getUnpairedObservations(self): """ Returns a list of unpaired meteor observations. """ return self.unpaired_observations - def countryFilter(self, station_code1, station_code2): """ Only pair observations if they are in proximity to a given country. """ @@ -1164,7 +1145,6 @@ def countryFilter(self, station_code1, station_code2): # If a given country is not in any of the groups, allow it to be paired return True - def findTimePairs(self, met_obs, unpaired_observations, max_toffset, verbose=False): """ Finds pairs in time between the given meteor observations and all other observations from different stations. @@ -1203,7 +1183,6 @@ def findTimePairs(self, met_obs, unpaired_observations, max_toffset, verbose=Fal return found_pairs - def getTrajTimePairs(self, traj_reduced, unpaired_observations, max_toffset): """ Find unpaired observations which are close in time to the given trajectory. """ @@ -1232,7 +1211,6 @@ def getTrajTimePairs(self, traj_reduced, unpaired_observations, max_toffset): return found_traj_obs_pairs - def generateTrajOutputDirectoryPath(self, traj, make_dirs=False): """ Generate a path to the trajectory output directory. @@ -1277,7 +1255,6 @@ def generateTrajOutputDirectoryPath(self, traj, make_dirs=False): return out_path - def saveTrajectoryResults(self, traj, save_plots, verbose=False): """ Save trajectory results to the disk. """ @@ -1344,8 +1321,6 @@ def addTrajectory(self, traj, failed_jdt_ref=None, verbose=False): self.traj_db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=verbose) - - def removeTrajectory(self, traj_reduced, remove_phase1=False): """ Remove the trajectory from the data base and disk. """ @@ -1360,14 +1335,8 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): traj_dir = os.path.join(base_dir, traj_reduced.pre_mc_longname) if os.path.isdir(traj_dir): shutil.rmtree(traj_dir, ignore_errors=True) - else: - log.warning(f'unable to find {traj_dir}') - else: - log.warning(f'unable to find {traj_reduced.traj_file_path}') - - # remove the processed pickle now we're done with it - self.cleanupPhase2TempPickle(traj_reduced, True) return + if self.mcmode & MCMODE_PHASE1 and remove_phase1: # remove any solution from the phase1 folder phase1_traj = os.path.join(self.phase1_dir, os.path.basename(traj_reduced.traj_file_path)) @@ -1379,27 +1348,6 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): self.traj_db.removeTrajectory(traj_reduced) - - def cleanupPhase2TempPickle(self, traj, success=False): - """ - At the start of phase 2 monte-carlo sim calculation, the phase1 pickles are renamed to indicate they're being processed. - Once each one is processed (fail or succeed) we need to clean up the file. If the MC step failed, we still want to keep - the pickle, because we might later on get new data and it might become solvable. Otherwise, we can just delete the file - since the MC solver will have saved an updated one already. - """ - if not self.mc_mode & MCMODE_PHASE2: - return - fldr_name = os.path.split(self.generateTrajOutputDirectoryPath(traj, make_dirs=False))[-1] - pick = os.path.join(self.phase1_dir, fldr_name + '_trajectory.pickle_processing') - if os.path.isfile(pick): - os.remove(pick) - else: - log.warning(f'unable to find _processing file {pick}') - if not success: - # save the pickle in case we get new data later and can solve it - savePickle(traj, os.path.join(self.phase1_dir, 'processed'), fldr_name + '_trajectory.pickle') - return - def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired, verbose=False): # go through the candidates and check if they correspond to already-failed @@ -1450,8 +1398,6 @@ def checkTrajIfFailed(self, traj): traj_reduced = TrajectoryReduced(None, traj_obj=traj) return self.traj_db.checkTrajIfFailed(traj_reduced) - - def loadFullTraj(self, traj_reduced): """ Load the full trajectory object. @@ -1534,12 +1480,12 @@ def loadPhase1Trajectories(self, max_trajs=1000): if not hasattr(traj, 'pre_mc_longname'): traj.pre_mc_longname = os.path.split(traj_dir)[-1] - # Check if the traj object as fixed time offsets + # Check if the traj object has fixed time offsets if not hasattr(traj, 'fixed_time_offsets'): traj.fixed_time_offsets = {} - # now we've loaded the phase 1 solution, move it to prevent accidental reprocessing - procfile = os.path.join(self.phase1_dir, pick + '_processing') + # now we've loaded the phase 1 solution, move it to prevent reprocessing + procfile = os.path.join(self.phase1_dir, 'processed', pick) if os.path.isfile(procfile): os.remove(procfile) os.rename(os.path.join(self.phase1_dir, pick), procfile) From 0a102b92377fb68c5ad97805749b1a946af4143f Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 18 Feb 2026 23:58:28 +0000 Subject: [PATCH 097/132] tidying up the date formats when reporting candidates, fails etc --- wmpl/Trajectory/CorrelateEngine.py | 14 +++++++++----- wmpl/Trajectory/CorrelateRMS.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index e2f8818f..1457d65c 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -869,10 +869,11 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Skip the trajectory if no good solution was found if skip_trajectory: - # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) - log.info(f"Trajectory at {jdt_ref} skipped and added to fails!") + ref_dt = jd2Date(min([met_obs.jdt_ref for met_obs in traj.observations]), dt_obj=True) + log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") + if matched_obs: for _, met_obs_temp, _ in matched_obs: self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) @@ -884,7 +885,9 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or if np.any([(obstmp.ang_res_std > np.radians(self.traj_constraints.max_arcsec_err/3600)) for obstmp in traj_status.observations]): + ref_dt = jd2Date(min([met_obs.jdt_ref for met_obs in traj.observations]), dt_obj=True) log.info("2 station only solution, one station has an error above the maximum limit, skipping!") + log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=verbose) @@ -986,7 +989,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories if mcmode != MCMODE_PHASE2: self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) - log.info('Trajectory failed to solve') + log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") return False @@ -1430,7 +1433,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # We will exclude these later - we can't do it just yet as if new data has arrived, then # in the next step, the group might be merged with another group creating a solvable set. log.info("") - log.info(f" --- ADDING CANDIDATE at {met_obs.reference_dt.isoformat()} ---") + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + log.info(f" --- ADDING CANDIDATE at {ref_dt.isoformat()} ---") candidate_trajectories.append(matched_observations) ### Merge all candidate trajectories which share the same observations ### @@ -1720,7 +1724,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver for _, met_obs_temp, _ in matched_observations: self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) - log.info("Trajectory skipped and added to fails!") + log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") continue diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 8a8149b2..20b1ce63 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1378,7 +1378,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire traj.jdt_ref = traj.jdt_ref + t0/86400.0 if self.checkTrajIfFailed(traj): - log.info(f'Trajectory at {jd2Date(traj.jdt_ref,dt_obj=True).isoformat()} already failed, skipping') + log.info(f'Candidate at {ref_dt.isoformat()} already failed, skipping') for _, met_obs_temp, _ in cand: self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) remaining_unpaired -= 1 From b73f07d57d8ccf1bab9321c6cec821d2dd5c7280 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 19 Feb 2026 15:36:07 +0000 Subject: [PATCH 098/132] bugfix failed to define ref_dt --- wmpl/Trajectory/CorrelateEngine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 1457d65c..fa80df0a 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -989,6 +989,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories if mcmode != MCMODE_PHASE2: self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) + ref_dt = jd2Date(min([met_obs.jdt_ref for met_obs in traj.observations]), dt_obj=True) log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") return False From 7b45db1fad1c7c5ca4900bd9b3720358dbd9e3f0 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 26 Feb 2026 17:20:43 +0000 Subject: [PATCH 099/132] add some comments --- wmpl/Trajectory/CorrelateDB.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index f9e84062..e02d4f18 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -97,6 +97,12 @@ def checkObsPaired(self, station_code, obs_id, verbose=False): def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): # add or update an entry in the database, setting status = 1 + + # Note that we do not commit the database as this would cause problems if we have to + # stop and restart processing mid-way through a pairing run. By leaving the data uncommitted + # we ensure that if the process crashes, then data will be left unpaired and we can rerun the + # pairing routine safely. + if verbose: log.info(f'adding {obs_id} to paired_obs table') cur = self.dbhandle.cursor() @@ -112,7 +118,7 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): def unpairObs(self, station_code, obs_id, obs_date, verbose=False): # if an entry exists, update the status to 0. - # this allows us to mark an observation paired, then unpair it later if the solution fails + # this allows us to mark an observation paired during candidate creation, then unpair it later if the solution fails # or we want to force a rerun. if verbose: log.info(f'unpairing {obs_id}') @@ -328,6 +334,9 @@ def checkTrajIfFailed(self, traj_reduced, verbose=False): def addTrajectory(self, traj_reduced, failed=False, verbose=False): # add or update an entry in the database, setting status = 1 + # note that unlike the observations db we DO commit here because as soon as a solution is found + # we want to ensure we don't try to find it again on a rerun + if verbose: log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') cur = self.dbhandle.cursor() From 91ab59ee04f9d95e9964090bb10adb48060f2594 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 26 Feb 2026 17:22:11 +0000 Subject: [PATCH 100/132] create function to merge broken so we can call it elsewhere if needed --- wmpl/Trajectory/CorrelateEngine.py | 260 ++++++++++++++--------------- 1 file changed, 129 insertions(+), 131 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index fa80df0a..76e3f91e 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1073,6 +1073,124 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or return successful_traj_fit + def mergeBrokenCandidates(self, candidate_trajectories): + ### Merge all candidate trajectories which share the same observations ### + log.info("") + log.info("---------------------------") + log.info("3) MERGING BROKEN OBSERVATIONS") + log.info("---------------------------") + merged_candidate_trajectories = [] + merged_indices = [] + for i, traj_cand_ref in enumerate(candidate_trajectories): + + # Skip candidate trajectories that have already been merged + if i in merged_indices: + continue + + + # Stop the search if the end has been reached + if (i + 1) == len(candidate_trajectories): + merged_candidate_trajectories.append(traj_cand_ref) + break + + + # Get the mean time of the reference observation + ref_mean_dt = traj_cand_ref[0][1].mean_dt + + obs_list_ref = [entry[1] for entry in traj_cand_ref] + merged_candidate = [] + + # Compute the mean radiant of the reference solution + plane_radiants_ref = [entry[2].radiant_eq for entry in traj_cand_ref] + ra_mean_ref = meanAngle([ra for ra, _ in plane_radiants_ref]) + dec_mean_ref = np.mean([dec for _, dec in plane_radiants_ref]) + + + # Check for pairs + found_first_pair = False + for j, traj_cand_test in enumerate(candidate_trajectories[(i + 1):]): + # Skip same observations + if traj_cand_ref[0] == traj_cand_test[0]: + continue + + + # Get the mean time of the test observation + test_mean_dt = traj_cand_test[0][1].mean_dt + + # Make sure the observations that are being compared are within the time window + time_diff = (test_mean_dt - ref_mean_dt).total_seconds() + if abs(time_diff) > self.traj_constraints.max_toffset: + continue + + + # Break the search if the time went beyond the search. This can be done as observations + # are ordered in time + if time_diff > self.traj_constraints.max_toffset: + break + + + + # Create a list of observations + obs_list_test = [entry[1] for entry in traj_cand_test] + + # Check if there any any common observations between candidate trajectories and merge them + # if that is the case + found_match = False + test_ids = [x.id for x in obs_list_test] + for obs1 in obs_list_ref: + if obs1.id in test_ids: + found_match = True + break + + + # Compute the mean radiant of the reference solution + plane_radiants_test = [entry[2].radiant_eq for entry in traj_cand_test] + ra_mean_test = meanAngle([ra for ra, _ in plane_radiants_test]) + dec_mean_test = np.mean([dec for _, dec in plane_radiants_test]) + + # Skip the merging attempt if the estimated radiants are too far off + if np.degrees(angleBetweenSphericalCoords(dec_mean_ref, ra_mean_ref, dec_mean_test, ra_mean_test)) > self.traj_constraints.max_merge_radiant_angle: + continue + + + # Add the candidate trajectory to the common list if a match has been found + if found_match: + + ref_stations = [obs.station_code for obs in obs_list_ref] + + # Add observations that weren't present in the reference candidate + for entry in traj_cand_test: + + # Make sure the added observation is not already added + if entry[1] not in obs_list_ref: + + # Print the reference and the merged radiants + if not found_first_pair: + log.info("") + log.info("------") + log.info("Reference time: {:s}".format(str(ref_mean_dt))) + log.info("Reference stations: {:s}".format(", ".join(sorted(ref_stations)))) + log.info("Reference radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_ref), np.degrees(dec_mean_ref))) + log.info("") + found_first_pair = True + + log.info("Merging: {:s} {:s}".format(str(entry[1].mean_dt), str(entry[1].station_code))) + traj_cand_ref.append(entry) + + log.info("Merged radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_test), np.degrees(dec_mean_test))) + log.info(f'Candidate contains {len(traj_cand_ref)} obs') + + # Mark that the current index has been processed + merged_indices.append(i + j + 1) + + # Add the reference candidate observations to the list + merged_candidate += traj_cand_ref + + # Add the merged observation to the final list + merged_candidate_trajectories.append(merged_candidate) + + return merged_candidate_trajectories + def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, verbose=False): """ Run meteor corellation using available data. @@ -1123,11 +1241,14 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if mcmode & MCMODE_CANDS: dt_beg = unpaired_observations_all[0].reference_dt dt_end = unpaired_observations_all[-1].reference_dt + bin_days = 0.25 else: dt_beg, dt_end = self.dh.dt_range + bin_days = 1 + dt_bin_list = generateDatetimeBins( dt_beg, dt_end, - bin_days=1, utc_hour_break=12, tzinfo=datetime.timezone.utc, reverse=False + bin_days=bin_days, utc_hour_break=12, tzinfo=datetime.timezone.utc, reverse=False ) else: @@ -1158,7 +1279,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if mcmode & MCMODE_CANDS: log.info("") log.info("-----------------------------------") - log.info(" PAIRING TRAJECTORIES IN TIME BIN:") + log.info("0) PAIRING TRAJECTORIES IN TIME BIN:") log.info(" BIN BEG: {:s} UTC".format(str(bin_beg))) log.info(" BIN END: {:s} UTC".format(str(bin_end))) log.info("-----------------------------------") @@ -1438,132 +1559,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info(f" --- ADDING CANDIDATE at {ref_dt.isoformat()} ---") candidate_trajectories.append(matched_observations) - ### Merge all candidate trajectories which share the same observations ### - log.info("") - log.info("---------------------------") - log.info("MERGING BROKEN OBSERVATIONS") - log.info("---------------------------") - merged_candidate_trajectories = [] - merged_indices = [] - for i, traj_cand_ref in enumerate(candidate_trajectories): - - # Skip candidate trajectories that have already been merged - if i in merged_indices: - continue - - - # Stop the search if the end has been reached - if (i + 1) == len(candidate_trajectories): - merged_candidate_trajectories.append(traj_cand_ref) - break - - - # Get the mean time of the reference observation - ref_mean_dt = traj_cand_ref[0][1].mean_dt - - obs_list_ref = [entry[1] for entry in traj_cand_ref] - merged_candidate = [] - - # Compute the mean radiant of the reference solution - plane_radiants_ref = [entry[2].radiant_eq for entry in traj_cand_ref] - ra_mean_ref = meanAngle([ra for ra, _ in plane_radiants_ref]) - dec_mean_ref = np.mean([dec for _, dec in plane_radiants_ref]) - - - # Check for pairs - found_first_pair = False - for j, traj_cand_test in enumerate(candidate_trajectories[(i + 1):]): - # Skip same observations - if traj_cand_ref[0] == traj_cand_test[0]: - continue - - - # Get the mean time of the test observation - test_mean_dt = traj_cand_test[0][1].mean_dt - - # Make sure the observations that are being compared are within the time window - time_diff = (test_mean_dt - ref_mean_dt).total_seconds() - if abs(time_diff) > self.traj_constraints.max_toffset: - continue - - - # Break the search if the time went beyond the search. This can be done as observations - # are ordered in time - if time_diff > self.traj_constraints.max_toffset: - break - - - - # Create a list of observations - obs_list_test = [entry[1] for entry in traj_cand_test] - - # Check if there any any common observations between candidate trajectories and merge them - # if that is the case - found_match = False - test_ids = [x.id for x in obs_list_test] - for obs1 in obs_list_ref: - if obs1.id in test_ids: - found_match = True - break - - - # Compute the mean radiant of the reference solution - plane_radiants_test = [entry[2].radiant_eq for entry in traj_cand_test] - ra_mean_test = meanAngle([ra for ra, _ in plane_radiants_test]) - dec_mean_test = np.mean([dec for _, dec in plane_radiants_test]) - - # Skip the merging attempt if the estimated radiants are too far off - if np.degrees(angleBetweenSphericalCoords(dec_mean_ref, ra_mean_ref, dec_mean_test, ra_mean_test)) > self.traj_constraints.max_merge_radiant_angle: - continue - - - # Add the candidate trajectory to the common list if a match has been found - if found_match: - - ref_stations = [obs.station_code for obs in obs_list_ref] - - # Add observations that weren't present in the reference candidate - for entry in traj_cand_test: - - # Make sure the added observation is not already added - if entry[1] not in obs_list_ref: - - # Print the reference and the merged radiants - if not found_first_pair: - log.info("") - log.info("------") - log.info("Reference time: {:s}".format(str(ref_mean_dt))) - log.info("Reference stations: {:s}".format(", ".join(sorted(ref_stations)))) - log.info("Reference radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_ref), np.degrees(dec_mean_ref))) - log.info("") - found_first_pair = True - - log.info("Merging: {:s} {:s}".format(str(entry[1].mean_dt), str(entry[1].station_code))) - traj_cand_ref.append(entry) - - log.info("Merged radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_test), np.degrees(dec_mean_test))) - log.info(f'Candidate contains {len(traj_cand_ref)} obs') - - - - - # Mark that the current index has been processed - merged_indices.append(i + j + 1) - - - # Add the reference candidate observations to the list - merged_candidate += traj_cand_ref - - - # Add the merged observation to the final list - merged_candidate_trajectories.append(merged_candidate) - - log.info("-----------------------") - log.info('CHECKING FOR ALREADY-FAILED CANDIDATES') - log.info("-----------------------") - - # okay now we can remove any already-failed combinations. This wasn't safe to do earlier - # because we first needed to see if we could merge any groups. + # Check for mergeable candidate combinations then remove any that already failed. + merged_candidate_trajectories = self.mergeBrokenCandidates(candidate_trajectories) candidate_trajectories, remaining_unpaired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, remaining_unpaired) log.info("-----------------------") @@ -1571,16 +1568,17 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("-----------------------") # in candidate mode we want to save the candidates to disk - if mcmode == MCMODE_CANDS: + if mcmode == MCMODE_CANDS: log.info("-----------------------") - log.info('SAVING {} CANDIDATES'.format(len(candidate_trajectories))) + log.info('5) SAVING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") self.dh.saveCandidates(candidate_trajectories, verbose=verbose) return len(candidate_trajectories) + else: log.info("-----------------------") - log.info('PROCESSING {} CANDIDATES'.format(len(candidate_trajectories))) + log.info('5) PROCESSING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") # end of 'if mcmode & MCMODE_CANDS' From 615ea336a67499c995ec4ef551d3e2c425f62ea3 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 07:50:13 +0000 Subject: [PATCH 101/132] commit the write-ahead-log after every candidate save op --- wmpl/Trajectory/CorrelateDB.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index e02d4f18..0c2304eb 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -56,6 +56,7 @@ def openObsDatabase(self, db_path, db_name='observations.db', purge_records=Fals db_full_name = os.path.join(db_path, f'{db_name}') log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) + con.execute('pragma journal_mode=wal') cur = con.cursor() if purge_records: cur.execute('drop table paired_obs') @@ -68,8 +69,11 @@ def openObsDatabase(self, db_path, db_name='observations.db', purge_records=Fals def commitObsDatabase(self): # commit the obs db. This function exists so we can do lazy writes in some cases - self.dbhandle.commit() + try: + self.dbhandle.execute('pragma wal_checkpoint(TRUNCATE)') + except Exception: + self.dbhandle.execute('pragma wal_checkpoint(PASSIVE)') return def closeObsDatabase(self): From 43a4e8dead2fa29cb49926ffaaefb1dd08bfc3ae Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 07:50:41 +0000 Subject: [PATCH 102/132] commit the obs database after saving candidates --- wmpl/Trajectory/CorrelateEngine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 76e3f91e..9f7c1c83 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1574,6 +1574,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("-----------------------") self.dh.saveCandidates(candidate_trajectories, verbose=verbose) + + # now we've saved the candidates we can commit the changes + self.dh.observations_db.commitObsDatabase() + return len(candidate_trajectories) else: From 1a5d5927091d457d943b84d62aeea325c5d33f3d Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 07:51:25 +0000 Subject: [PATCH 103/132] don't load stations or scan folders if in phase1 or phase2 only modes --- wmpl/Trajectory/CorrelateRMS.py | 65 +++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 20b1ce63..1ab6a37b 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -537,6 +537,9 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # Load database of processed folders database_path = os.path.join(self.db_dir, JSON_DB_NAME) + # create an empty processing list + self.processing_list = [] + log.info("") if mcmode != MCMODE_PHASE2: @@ -567,14 +570,15 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode pass log.info(" ... done!") - # Load the list of stations - station_list = self.loadStations() + if mcmode & MCMODE_CANDS: + # Load the list of stations + station_list = self.loadStations() - # Find unprocessed meteor files - log.info("") - log.info("Finding unprocessed data...") - self.processing_list = self.findUnprocessedFolders(station_list) - log.info(" ... done!") + # Find unprocessed meteor files + log.info("") + log.info("Finding unprocessed data...") + self.processing_list = self.findUnprocessedFolders(station_list) + log.info(" ... done!") # in phase 1, initialise and collect data second as we load candidates dynamically self.initialiseRemoteDataHandling() @@ -684,7 +688,7 @@ def __init__(self, station, obs_id): def loadStations(self): """ Load the station names in the processing folder. """ - station_list = [] + avail_station_list = [] for dir_name in sorted(os.listdir(self.dir_path)): @@ -692,12 +696,12 @@ def loadStations(self): if os.path.isdir(os.path.join(self.dir_path, dir_name)): if re.match("^[A-Z]{2}[A-Z0-9]{4}$", dir_name): log.info("Using station: " + dir_name) - station_list.append(dir_name) + avail_station_list.append(dir_name) else: log.info("Skipping directory: " + dir_name) - return station_list + return avail_station_list def findUnprocessedFolders(self, station_list): """ Go through directories and find folders with unprocessed data. """ @@ -1221,11 +1225,11 @@ def generateTrajOutputDirectoryPath(self, traj, make_dirs=False): # Generate a list of station codes if isinstance(traj, TrajectoryReduced): # If the reducted trajectory object is given - station_list = traj.participating_stations + traj_station_list = traj.participating_stations else: # If the full trajectory object is given - station_list = [obs.station_id for obs in traj.observations if obs.ignore_station is False] + traj_station_list = [obs.station_id for obs in traj.observations if obs.ignore_station is False] # Datetime of the reference trajectory time @@ -1243,7 +1247,7 @@ def generateTrajOutputDirectoryPath(self, traj, make_dirs=False): # Name of the trajectory directory # sort the list of country codes otherwise we can end up with duplicate trajectories - ctry_list = list(set([stat_id[:2] for stat_id in station_list])) + ctry_list = list(set([stat_id[:2] for stat_id in traj_station_list])) ctry_list.sort() traj_dir = dt.strftime("%Y%m%d_%H%M%S.%f")[:-3] + "_" + "_".join(ctry_list) @@ -1978,10 +1982,12 @@ def signal_handler(sig, frame): proc_dir_dt_beg = min(proc_dir_dts) proc_dir_dt_end = max(proc_dir_dts) + bin_length = 0.25 if mcmode == MCMODE_CANDS else 1.0 + # Split the processing into daily chunks dt_bins = generateDatetimeBins( proc_dir_dt_beg, proc_dir_dt_end, - bin_days=1, tzinfo=datetime.timezone.utc, reverse=False) + bin_days=bin_length, tzinfo=datetime.timezone.utc, reverse=False) # check if we've created an extra bucket (might happen if requested timeperiod is less than 24h) if event_time_range is not None: @@ -1992,12 +1998,13 @@ def signal_handler(sig, frame): dt_bins = [(dh.dt_range[0], dh.dt_range[1])] if dh.dt_range is not None: - # there's some data to process - log.info("") - log.info("ALL TIME BINS:") - log.info("----------") - for bin_beg, bin_end in dt_bins: - log.info("{:s}, {:s}".format(str(bin_beg), str(bin_end))) + # there's some data to process and we're in candidate mode + if mcmode & MCMODE_CANDS: + log.info("") + log.info("ALL TIME BINS:") + log.info("----------") + for bin_beg, bin_end in dt_bins: + log.info("{:s}, {:s}".format(str(bin_beg), str(bin_end))) ### ### @@ -2006,14 +2013,13 @@ def signal_handler(sig, frame): # Go through all chunks in time for bin_beg, bin_end in dt_bins: - log.info("") - log.info("PROCESSING TIME BIN:") - log.info("{:s}, {:s}".format(str(bin_beg), str(bin_end))) - log.info("-----------------------------") - log.info("") - - # Load data of unprocessed observations only if creating candidates if mcmode & MCMODE_CANDS: + log.info("") + log.info("PROCESSING TIME BIN:") + log.info("{:s}, {:s}".format(str(bin_beg), str(bin_end))) + log.info("-----------------------------") + log.info("") + dh.unpaired_observations = dh.loadUnpairedObservations(dh.processing_list, dt_range=(bin_beg, bin_end)) log.info(f'loaded {len(dh.unpaired_observations)} observations') @@ -2045,6 +2051,11 @@ def signal_handler(sig, frame): dh.traj_db = TrajectoryDatabase(dh.db_dir, purge_records=True) dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) + # If we're in either of these modes, the correlator will have scooped up available data + # from candidates or phase1 folders so no need to keep looping. + if mcmode == MCMODE_PHASE1 or mcmode == MCMODE_PHASE2: + break + if mcmode & MCMODE_CANDS: dh.observations_db.closeObsDatabase() From 4ba68fc1e739100ab0920d6f1c44575441ddc331 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 07:52:13 +0000 Subject: [PATCH 104/132] support fractional bins --- wmpl/Utils/Math.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wmpl/Utils/Math.py b/wmpl/Utils/Math.py index bb6069b5..d916bc28 100644 --- a/wmpl/Utils/Math.py +++ b/wmpl/Utils/Math.py @@ -1113,11 +1113,13 @@ def generateDatetimeBins(dt_beg, dt_end, bin_days=7, utc_hour_break=12, tzinfo=N else: bin_beg = dt_beg + datetime.timedelta(days=i * bin_days) - bin_beg = bin_beg.replace(hour=int(utc_hour_break), minute=0, second=0, microsecond=0) + if bin_days > 0.999: + bin_beg = bin_beg.replace(hour=int(utc_hour_break), minute=0, second=0, microsecond=0) # Generate the bin ending edge bin_end = bin_beg + datetime.timedelta(days=bin_days) - bin_end = bin_end.replace(hour=int(utc_hour_break), minute=0, second=0, microsecond=0) + if bin_days > 0.999: + bin_end = bin_end.replace(hour=int(utc_hour_break), minute=0, second=0, microsecond=0) # Check that the ending bin is not beyond the end dt end_reached = False From 3c0ae48b5232123ebfbeadc8a565f8a2922c4ec5 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 13:28:41 +0000 Subject: [PATCH 105/132] small correction in correlateDB --- wmpl/Trajectory/CorrelateDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 0c2304eb..daaedff8 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -667,7 +667,7 @@ def __init__(self, db_dir, dt_range=None): elif dbname == 'trajectories': trajdb = TrajectoryDatabase(cml_args.dir_path) - if action == 'read': + if action == 'status': cur = trajdb.dbhandle.cursor() cur.execute('select * from trajectories where status=1') print(f'there are {len(cur.fetchall())} successful trajectories') From f51e56c17515f25934fa497489f12197a4102524 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 16:31:52 +0000 Subject: [PATCH 106/132] remove unnecessary cursors --- wmpl/Trajectory/CorrelateDB.py | 125 +++++++++++++-------------------- 1 file changed, 50 insertions(+), 75 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index daaedff8..19aaca12 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -57,14 +57,12 @@ def openObsDatabase(self, db_path, db_name='observations.db', purge_records=Fals log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) con.execute('pragma journal_mode=wal') - cur = con.cursor() if purge_records: - cur.execute('drop table paired_obs') - cur.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") - if cur.fetchone() is None: - cur.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") + con.execute('drop table paired_obs') + res = con.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") + if res.fetchone() is None: + con.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") con.commit() - cur.close() return con def commitObsDatabase(self): @@ -89,11 +87,9 @@ def checkObsPaired(self, station_code, obs_id, verbose=False): # return True if there is an observation with the correct station code, obs id and with status = 1 paired = True - cur = self.dbhandle.cursor() - cur.execute(f"SELECT obs_id FROM paired_obs WHERE obs_id='{obs_id}' and status=1") + cur = self.dbhandle.execute(f"SELECT obs_id FROM paired_obs WHERE obs_id='{obs_id}' and status=1") if cur.fetchone() is None: paired = False - cur.close() if verbose: log.info(f'{obs_id} is {"Paired" if paired else "Unpaired"}') return paired @@ -109,10 +105,8 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): if verbose: log.info(f'adding {obs_id} to paired_obs table') - cur = self.dbhandle.cursor() sqlstr = f"insert or replace into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" - cur.execute(sqlstr) - cur.close() + self.dbhandle.execute(sqlstr) if not self.checkObsPaired(station_code, obs_id): log.warning(f'failed to add {obs_id} to paired_obs table') @@ -120,23 +114,20 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): return True - def unpairObs(self, station_code, obs_id, obs_date, verbose=False): + def unpairObs(self, obs_ids, verbose=True): # if an entry exists, update the status to 0. # this allows us to mark an observation paired during candidate creation, then unpair it later if the solution fails # or we want to force a rerun. if verbose: - log.info(f'unpairing {obs_id}') - - cur = self.dbhandle.cursor() + log.info(f'unpairing {obs_ids}') try: - cur.execute(f"insert or ignore into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)") - cur.execute(f"update paired_obs set status=0 where station_code='{station_code}' and obs_id='{obs_id}'") - self.dbhandle.commit() + obs_ids_str = ','.join(obs_ids) + self.dbhandle.execute(f"update paired_obs set status=0 where obs_id in ('{obs_ids_str}')") except Exception: # obs wasn't in the database so no need to unpair it pass - cur.close() + self.dbhandle.commit() return True @@ -150,24 +141,22 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): archdb.close() # attach the arch db, copy the records then delete them - cur = self.dbhandle.cursor() archdb_fullname = os.path.join(db_path, f'{archdb_name}') - cur.execute(f"attach database '{archdb_fullname}' as archdb") + self.dbhandle.execute(f"attach database '{archdb_fullname}' as archdb") try: # bulk-copy if possible - cur.execute(f'insert or replace into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') + self.dbhandle.execute(f'insert or replace into archdb.paired_obs select * from paired_obs where obs_date < {archdate_jd}') except Exception: # otherwise, one by one - cur.execute(f'select * from paired_obs where obs_date < {archdate_jd}') + cur = self.dbhandle.execute(f'select * from paired_obs where obs_date < {archdate_jd}') for row in cur.fetchall(): try: - cur.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]},{row[3]})") + self.dbhandle.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]},{row[3]})") except Exception: log.info(f'{row[1]} already exists in target') - cur.execute(f'delete from paired_obs where obs_date < {archdate_jd}') + self.dbhandle.execute(f'delete from paired_obs where obs_date < {archdate_jd}') self.dbhandle.commit() - cur.close() return def moveObsJsonRecords(self, paired_obs, dt_range): @@ -208,19 +197,17 @@ def mergeObsDatabase(self, source_db_path): log.warning(f'source database missing: {source_db_path}') return # attach the other db, copy the records then detach it - cur = self.dbhandle.cursor() - cur.execute(f"attach database '{source_db_path}' as sourcedb") + self.dbhandle.execute(f"attach database '{source_db_path}' as sourcedb") try: # bulk-copy - cur.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') + self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') status = True except Exception: log.info('unable to merge child observations') status = False self.dbhandle.commit() - cur.execute("detach database 'sourcedb'") - cur.close() + self.dbhandle.execute("detach database 'sourcedb'") return status @@ -251,14 +238,13 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal db_full_name = os.path.join(db_path, f'{db_name}') log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) - cur = con.cursor() if purge_records: - cur.execute('drop table if exists trajectories') - cur.execute('drop table if exists failed_trajectories') + con.execute('drop table if exists trajectories') + con.execute('drop table if exists failed_trajectories') con.commit() - res = cur.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") + res = con.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") if res.fetchone() is None: - cur.execute("""CREATE TABLE trajectories( + con.execute("""CREATE TABLE trajectories( jdt_ref REAL UNIQUE, traj_id VARCHAR UNIQUE, traj_file_path VARCHAR, @@ -281,10 +267,10 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal rend_ele REAL, status INTEGER) """) - res = cur.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") + res = con.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") if res.fetchone() is None: # note: traj_id not unique here as some fails will have traj-id None - cur.execute("""CREATE TABLE failed_trajectories( + con.execute("""CREATE TABLE failed_trajectories( jdt_ref REAL UNIQUE, traj_id VARCHAR, traj_file_path VARCHAR, @@ -298,7 +284,6 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal status INTEGER) """) con.commit() - cur.close() return con def commitTrajDatabase(self): @@ -332,7 +317,6 @@ def checkTrajIfFailed(self, traj_reduced, verbose=False): else: traj_stations = list(set(json.loads(row[1]) + json.loads(row[2]))) found = True if (traj_stations == station_list) else False - cur.close() return found def addTrajectory(self, traj_reduced, failed=False, verbose=False): @@ -343,7 +327,6 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): if verbose: log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') - cur = self.dbhandle.cursor() # remove the output_dir part from the path so that the data are location-independent traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] @@ -379,9 +362,8 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") sql_str = sql_str.replace('nan','"NaN"') - cur.execute(sql_str) + self.dbhandle.execute(sql_str) self.dbhandle.commit() - cur.close() return True def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose=False): @@ -392,14 +374,12 @@ def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose log.info(f'removing {traj_reduced.traj_id}') table_name = 'failed_trajectories' if failed else 'trajectories' - cur = self.dbhandle.cursor() try: - cur.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") + self.dbhandle.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") self.dbhandle.commit() except Exception: # traj wasn't in the database so no action required pass - cur.close() # Remove the trajectory folder on the disk if not keepFolder and os.path.isfile(traj_reduced.traj_file_path): @@ -417,16 +397,13 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver if verbose: log.info(f'getting trajectories between {jd2Date(jdt_start, dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} and {jd2Date(jdt_end, dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")}') - cur = self.dbhandle.cursor() if not jdt_end: - cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") rows = cur.fetchall() else: - cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") - rows = cur.fetchall() - cur.close() + rows = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") trajs = [] - for rw in rows: + for rw in rows.fetchall(): rw = [np.nan if x == 'NaN' else x for x in rw] json_dict = {'jdt_ref':rw[0], 'traj_id':rw[1], 'traj_file_path':os.path.join(output_dir, rw[2]), 'participating_stations': json.loads(rw[3]), @@ -446,17 +423,15 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver def getTrajNames(self, jdt_start=None, jdt_end=None, failed=False, verbose=False): table_name = 'failed_trajectories' if failed else 'trajectories' - cur = self.dbhandle.cursor() if not jdt_start: - cur.execute(f"SELECT * FROM {table_name}") + cur = self.dbhandle.execute(f"SELECT * FROM {table_name}") rows = cur.fetchall() elif not jdt_end: - cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") rows = cur.fetchall() else: - cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") rows = cur.fetchall() - cur.close() trajs = [] for rw in rows: trajs.append(rw[2]) @@ -469,14 +444,12 @@ def removeDeletedTrajectories(self, output_dir, jdt_start, jdt_end=None, failed= if verbose: log.info(f'getting trajectories between {jdt_start} and {jdt_end}') - cur = self.dbhandle.cursor() if not jdt_end: - cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") rows = cur.fetchall() else: - cur.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") rows = cur.fetchall() - cur.close() i = 0 for rw in rows: if not os.path.isfile(os.path.join(output_dir, rw[2])): @@ -498,9 +471,8 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): archdb.close() # attach the arch db, copy the records then delete them - cur = self.dbhandle.cursor() archdb_fullname = os.path.join(db_path, f'{archdb_name}') - cur.execute(f"attach database '{archdb_fullname}' as archdb") + cur = self.dbhandle.execute(f"attach database '{archdb_fullname}' as archdb") for table_name in ['trajectories', 'failed_trajectories']: try: # bulk-copy if possible @@ -510,7 +482,6 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): log.warning(f'unable to archive {table_name}') self.dbhandle.commit() - cur.close() return def moveFailedTrajectories(self, failed_trajectories, dt_range): @@ -531,7 +502,7 @@ def moveFailedTrajectories(self, failed_trajectories, dt_range): if not i % 10000: self.commitTrajDatabase() log.info(f'moved {i} failed_trajectories') - self.dbhandle.commit() + self.commitTrajDatabase() log.info(f'done - moved {i} failed_trajectories') return @@ -543,8 +514,7 @@ def mergeTrajDatabase(self, source_db_path): log.warning(f'source database missing: {source_db_path}') return # attach the other db, copy the records then detach it - cur = self.dbhandle.cursor() - cur.execute(f"attach database '{source_db_path}' as sourcedb") + cur = self.dbhandle.execute(f"attach database '{source_db_path}' as sourcedb") # TODO need to correct the traj_file_path to account for server locations @@ -558,7 +528,6 @@ def mergeTrajDatabase(self, source_db_path): status = False self.dbhandle.commit() cur.execute("detach database 'sourcedb'") - cur.close() return status ################################################################################## @@ -596,6 +565,8 @@ def __init__(self, db_dir, dt_range=None): arg_parser.add_argument('--action', type=str, default=None, help='Action to take on the database') + arg_parser.add_argument('--stmt', type=str, default=None, help='statement to execute eg "select * from paired_obs"') + arg_parser.add_argument("--logdir", type=str, default=None, help="Path to the directory where the log files will be stored. If not given, a logs folder will be created in the database folder") @@ -632,6 +603,8 @@ def __init__(self, db_dir, dt_range=None): dbname = cml_args.database.lower() action = cml_args.action.lower() + stmt = cml_args.stmt + dt_range = None if cml_args.timerange is not None: time_beg, time_end = cml_args.timerange.strip("(").strip(")").split(",") @@ -659,19 +632,21 @@ def __init__(self, db_dir, dt_range=None): if dbname == 'observations': obsdb = ObservationDatabase(cml_args.dir_path) if action == 'status': - cur = obsdb.dbhandle.cursor() - cur.execute('select * from paired_obs where status=1') + cur = obsdb.dbhandle.execute('select * from paired_obs where status=1') print(f'there are {len(cur.fetchall())} paired obs') - cur.execute('select * from paired_obs where status=0') + cur = obsdb.dbhandle.execute('select * from paired_obs where status=0') print(f'and {len(cur.fetchall())} unpaired obs') + if action == 'execute': + print(stmt) + cur = obsdb.dbhandle.execute(stmt) + print(cur.fetchall()) elif dbname == 'trajectories': trajdb = TrajectoryDatabase(cml_args.dir_path) if action == 'status': - cur = trajdb.dbhandle.cursor() - cur.execute('select * from trajectories where status=1') + cur = trajdb.dbhandle.execute('select * from trajectories where status=1') print(f'there are {len(cur.fetchall())} successful trajectories') - cur.execute('select * from failed_trajectories') + cur = trajdb.dbhandle.execute('select * from failed_trajectories') print(f'and {len(cur.fetchall())} failed trajectories') else: log.info('valid database not specified') From d898e1fe70475c7e9eea81dc085551a589857861 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 16:34:50 +0000 Subject: [PATCH 107/132] support to purge local processed data simplify unpairing code --- wmpl/Trajectory/CorrelateEngine.py | 21 ++++++++++----------- wmpl/Trajectory/CorrelateRMS.py | 15 ++++++++------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 9f7c1c83..f2399807 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -875,8 +875,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") if matched_obs: - for _, met_obs_temp, _ in matched_obs: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) + obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_obs] + self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -891,8 +891,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=verbose) - for _, met_obs_temp, _ in matched_obs: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) + obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_obs] + self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) return False @@ -1440,11 +1440,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): remaining_unpaired -= 1 - else: - for met_obs_temp, _ in candidate_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) log.info("New trajectory solution failed, keeping the old trajectory...") + obs_ids = [met_obs_temp.id for met_obs_temp, _ in candidate_observations] + self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) ### ### @@ -1725,9 +1724,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=verbose) - for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") + obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_observations] + self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) continue @@ -1791,8 +1790,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # new observations are added if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") - for _, met_obs_temp, _ in matched_observations: - self.dh.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) + obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_observations] + self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 1ab6a37b..00b67676 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -523,12 +523,13 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.candidate_dir = os.path.join(self.output_dir, 'candidates') if not self.mc_mode & MCMODE_PHASE2: mkdirP(os.path.join(self.candidate_dir, 'processed')) + self.purgeProcessedData(os.path.join(self.candidate_dir, 'processed')) # Phase 1 trajectory pickle directory needed to reload previous results. self.phase1_dir = os.path.join(self.output_dir, 'phase1') if self.mc_mode & MCMODE_PHASE1: mkdirP(os.path.join(self.phase1_dir, 'processed')) - self.purgePhase1ProcessedData(os.path.join(self.phase1_dir, 'processed')) + self.purgeProcessedData(os.path.join(self.phase1_dir, 'processed')) self.verbose = verbose @@ -631,10 +632,10 @@ def initialiseRemoteDataHandling(self): else: self.RemoteDatahandler = None - def purgePhase1ProcessedData(self, dir_path): - """ Purge old phase1 processed data if it is older than 90 days. """ + def purgeProcessedData(self, dir_path, days_back=30): + """ Purge processed candidate or phase1 data if it is older than 30 days. """ - refdt = time.time() - 90*86400 + refdt = time.time() - days_back*86400 result = [] for path, _, files in os.walk(dir_path): @@ -1383,9 +1384,9 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire if self.checkTrajIfFailed(traj): log.info(f'Candidate at {ref_dt.isoformat()} already failed, skipping') - for _, met_obs_temp, _ in cand: - self.observations_db.unpairObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose) - remaining_unpaired -= 1 + obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in cand] + self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) + remaining_unpaired -= len(obs_ids) else: candidate_trajectories.append(cand) From 704aa0a8714748c6516861e6619da739a6aefc6c Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 16:45:27 +0000 Subject: [PATCH 108/132] make sure dbs are closed in the maintenance section --- wmpl/Trajectory/CorrelateDB.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 19aaca12..73d1c71c 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -124,7 +124,6 @@ def unpairObs(self, obs_ids, verbose=True): obs_ids_str = ','.join(obs_ids) self.dbhandle.execute(f"update paired_obs set status=0 where obs_id in ('{obs_ids_str}')") except Exception: - # obs wasn't in the database so no need to unpair it pass self.dbhandle.commit() @@ -640,6 +639,7 @@ def __init__(self, db_dir, dt_range=None): print(stmt) cur = obsdb.dbhandle.execute(stmt) print(cur.fetchall()) + obsdb.closeObsDatabase() elif dbname == 'trajectories': trajdb = TrajectoryDatabase(cml_args.dir_path) @@ -648,5 +648,6 @@ def __init__(self, db_dir, dt_range=None): print(f'there are {len(cur.fetchall())} successful trajectories') cur = trajdb.dbhandle.execute('select * from failed_trajectories') print(f'and {len(cur.fetchall())} failed trajectories') + trajdb.closeObsDatabase() else: log.info('valid database not specified') From 51b637a3a0731d9b08215813809fc2d6699fda09 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:33:22 +0000 Subject: [PATCH 109/132] add verbosity --- wmpl/Trajectory/CorrelateRMS.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 00b67676..b7c0759d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -2036,7 +2036,7 @@ def signal_handler(sig, frame): # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) bin_time_range = [bin_beg, bin_end] - num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range) + num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range, verbose=cml_args.verbose) if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: log.info('uploading to master node') @@ -2045,13 +2045,16 @@ def signal_handler(sig, frame): dh.traj_db.closeTrajDatabase() dh.observations_db.closeObsDatabase() - dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=False) + dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=cml_args.verbose) # truncate the tables here so they are clean for the next run if mcmode != MCMODE_PHASE2: dh.traj_db = TrajectoryDatabase(dh.db_dir, purge_records=True) dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) + if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'master': + dh.moveUploadedData(verbose=cml_args.verbose) + pass # If we're in either of these modes, the correlator will have scooped up available data # from candidates or phase1 folders so no need to keep looping. if mcmode == MCMODE_PHASE1 or mcmode == MCMODE_PHASE2: From 3e7ac46910822e7ec44176ff8b17554efb6bda2f Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:39:49 +0000 Subject: [PATCH 110/132] fix verbosity --- wmpl/Trajectory/CorrelateRMS.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index b7c0759d..ea81fe6b 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1891,6 +1891,12 @@ def signal_handler(sig, frame): elif mcmode == MCMODE_ALL: log.info('Full processing mode') + if cml_args.verbose: + log.info('verbose flag set') + verbose = True + else: + verbose = False + # Run processing. If the auto run more is not on, the loop will break after one run previous_start_time = None @@ -1948,7 +1954,7 @@ def signal_handler(sig, frame): dh = RMSDataHandle( cml_args.dir_path, dt_range=event_time_range, db_dir=cml_args.dbdir, output_dir=cml_args.outdir, - mcmode=mcmode, max_trajs=max_trajs, verbose=cml_args.verbose, archivemonths=cml_args.archivemonths) + mcmode=mcmode, max_trajs=max_trajs, verbose=verbose, archivemonths=cml_args.archivemonths) # If there is nothing to process and we're in Candidate mode, stop if not dh.processing_list and (mcmode & MCMODE_CANDS): @@ -2036,7 +2042,7 @@ def signal_handler(sig, frame): # Run the trajectory correlator tc = TrajectoryCorrelator(dh, trajectory_constraints, cml_args.velpart, data_in_j2000=True, enableOSM=cml_args.enableOSM) bin_time_range = [bin_beg, bin_end] - num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range, verbose=cml_args.verbose) + num_done = tc.run(event_time_range=event_time_range, mcmode=mcmode, bin_time_range=bin_time_range, verbose=verbose) if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'child' and num_done > 0: log.info('uploading to master node') @@ -2045,7 +2051,7 @@ def signal_handler(sig, frame): dh.traj_db.closeTrajDatabase() dh.observations_db.closeObsDatabase() - dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=cml_args.verbose) + dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=verbose) # truncate the tables here so they are clean for the next run if mcmode != MCMODE_PHASE2: @@ -2053,7 +2059,7 @@ def signal_handler(sig, frame): dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'master': - dh.moveUploadedData(verbose=cml_args.verbose) + dh.moveUploadedData(verbose=verbose) pass # If we're in either of these modes, the correlator will have scooped up available data # from candidates or phase1 folders so no need to keep looping. From 7c16e371478f21b26c50d123eef29a7d233c787b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:43:09 +0000 Subject: [PATCH 111/132] error handling in db merge process --- wmpl/Trajectory/CorrelateDB.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 73d1c71c..9c787cb3 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -201,8 +201,9 @@ def mergeObsDatabase(self, source_db_path): # bulk-copy self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') status = True - except Exception: - log.info('unable to merge child observations') + except Exception as e: + log.info(f'unable to merge child observations from {source_db_path}') + log.info(e) status = False self.dbhandle.commit() From 739ae2553009a5760390cc9b7dfbd5cf38ebff29 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:44:55 +0000 Subject: [PATCH 112/132] remote remote WAL dbs after merger --- wmpl/Trajectory/CorrelateRMS.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index ea81fe6b..b5045abc 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1519,6 +1519,11 @@ def moveUploadedData(self, verbose=False): for obsdb_path in glob.glob(os.path.join(node.dirpath,'files','observations*.db')): if self.observations_db.mergeObsDatabase(obsdb_path): os.remove(obsdb_path) + try: + os.remove(f'{obsdb_path}-wal') + os.remove(f'{obsdb_path}-shm') + except Exception: + pass for trajdb_path in glob.glob(os.path.join(node.dirpath,'files','trajectories*.db')): From dcc904a406411501d0a5a060074e1f39ccb7badb Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:46:21 +0000 Subject: [PATCH 113/132] avoid trying to merge empty databases --- wmpl/Trajectory/CorrelateDB.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 9c787cb3..cc990f6b 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -199,7 +199,9 @@ def mergeObsDatabase(self, source_db_path): self.dbhandle.execute(f"attach database '{source_db_path}' as sourcedb") try: # bulk-copy - self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') + con = self.dbhandle.execute('select * from sourcedb.paired_obs') + if len(con.fetchall()) > 0: + self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') status = True except Exception as e: log.info(f'unable to merge child observations from {source_db_path}') From 6415260d651b2a3187e711992315a5aac98a73d0 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:53:41 +0000 Subject: [PATCH 114/132] more fixes for database or table not existing --- wmpl/Trajectory/CorrelateDB.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index cc990f6b..4b4268fb 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -197,16 +197,17 @@ def mergeObsDatabase(self, source_db_path): return # attach the other db, copy the records then detach it self.dbhandle.execute(f"attach database '{source_db_path}' as sourcedb") - try: - # bulk-copy - con = self.dbhandle.execute('select * from sourcedb.paired_obs') - if len(con.fetchall()) > 0: - self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') - status = True - except Exception as e: - log.info(f'unable to merge child observations from {source_db_path}') - log.info(e) + res = self.dbhandle.execute("SELECT name FROM sourcedb.sqlite_master WHERE name='paired_obs'") + if res.fetchone() is None: status = False + else: + try: + self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') + status = True + except Exception as e: + log.info(f'unable to merge child observations from {source_db_path}') + log.info(e) + status = False self.dbhandle.commit() self.dbhandle.execute("detach database 'sourcedb'") From d46b06f474746108fbc4750b1583ce678cb381b1 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 18:55:57 +0000 Subject: [PATCH 115/132] documentation --- wmpl/Trajectory/CorrelateDB.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 4b4268fb..d08fa283 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -199,7 +199,8 @@ def mergeObsDatabase(self, source_db_path): self.dbhandle.execute(f"attach database '{source_db_path}' as sourcedb") res = self.dbhandle.execute("SELECT name FROM sourcedb.sqlite_master WHERE name='paired_obs'") if res.fetchone() is None: - status = False + # table is missing so nothing to do + status = True else: try: self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') From 70b59ce4a7e804cfcedd4d9c01ac93cb05a47499 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 19:00:48 +0000 Subject: [PATCH 116/132] add more debug --- wmpl/Trajectory/CorrelateDB.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index d08fa283..4dac856a 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -126,7 +126,11 @@ def unpairObs(self, obs_ids, verbose=True): except Exception: pass + if verbose: + log.info('committing') self.dbhandle.commit() + if verbose: + log.info('done') return True From 8071f49b1f7b0d58f0abfcd28a2adda4dfb76fdc Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 19:21:20 +0000 Subject: [PATCH 117/132] delete rather than update when unpairing --- wmpl/Trajectory/CorrelateDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 4dac856a..8c665e20 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -122,7 +122,7 @@ def unpairObs(self, obs_ids, verbose=True): log.info(f'unpairing {obs_ids}') try: obs_ids_str = ','.join(obs_ids) - self.dbhandle.execute(f"update paired_obs set status=0 where obs_id in ('{obs_ids_str}')") + self.dbhandle.execute(f"delete from paired_obs where obs_id in ('{obs_ids_str}')") except Exception: pass From f66330834f3ab80d7c69da97914858997500fc65 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 27 Feb 2026 22:23:52 +0000 Subject: [PATCH 118/132] handle unpairing more efficiently with multi-insert --- wmpl/Trajectory/CorrelateDB.py | 24 ++++++++++++------------ wmpl/Trajectory/CorrelateEngine.py | 16 ++++++---------- wmpl/Trajectory/CorrelateRMS.py | 9 +++++---- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 8c665e20..497aa487 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -114,23 +114,23 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): return True - def unpairObs(self, obs_ids, verbose=True): + def unpairObs(self, met_obs_list, verbose=True): # if an entry exists, update the status to 0. # this allows us to mark an observation paired during candidate creation, then unpair it later if the solution fails # or we want to force a rerun. + obs_ids_str = ','.join([f"'{met_obs.id}'" for met_obs in met_obs_list]) + if verbose: - log.info(f'unpairing {obs_ids}') - try: - obs_ids_str = ','.join(obs_ids) - self.dbhandle.execute(f"delete from paired_obs where obs_id in ('{obs_ids_str}')") - except Exception: - pass - - if verbose: - log.info('committing') + log.info(f'unpairing {obs_ids_str}') + self.dbhandle.execute(f"delete from paired_obs where obs_id in ({obs_ids_str})") + data = [] + for met_obs in met_obs_list: + data.append(f"('{met_obs.station_code}','{met_obs.id}', {datetime2JD(met_obs.mean_dt)}, 0)") + data_str = ','.join(data) + sqlstr = f"insert or replace into paired_obs values {data_str}" + self.dbhandle.execute(sqlstr) + self.dbhandle.commit() - if verbose: - log.info('done') return True diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index f2399807..b1f13aee 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -875,8 +875,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") if matched_obs: - obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_obs] - self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) + self.dh.observations_db.unpairObs([obs[1] for obs in matched_obs], verbose=verbose) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -891,8 +890,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Add the trajectory to the list of failed trajectories self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=verbose) - obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_obs] - self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) + self.dh.observations_db.unpairObs([obs[1] for obs in matched_obs], verbose=verbose) return False @@ -1442,8 +1440,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver else: log.info("New trajectory solution failed, keeping the old trajectory...") - obs_ids = [met_obs_temp.id for met_obs_temp, _ in candidate_observations] - self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) + # note: candidate_observations has a different structure to elsewhere! + self.dh.observations_db.unpairObs([obs[0] for obs in candidate_observations], verbose=verbose) ### ### @@ -1725,8 +1723,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=verbose) log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") - obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_observations] - self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) + self.dh.observations_db.unpairObs([obs[1] for obs in matched_observations], verbose=verbose) continue @@ -1790,8 +1787,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # new observations are added if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") - obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in matched_observations] - self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) + self.dh.observations_db.unpairObs([obs[1] for obs in matched_observations], verbose=verbose) continue # pass in matched_observations here so that solveTrajectory can mark them paired if they're used diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index b5045abc..b6f0aabc 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1384,9 +1384,8 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire if self.checkTrajIfFailed(traj): log.info(f'Candidate at {ref_dt.isoformat()} already failed, skipping') - obs_ids = [met_obs_temp.id for _, met_obs_temp, _ in cand] - self.dh.observations_db.unpairObs(obs_ids, verbose=verbose) - remaining_unpaired -= len(obs_ids) + self.dh.observations_db.unpairObs([obs[1] for obs in cand], verbose=verbose) + remaining_unpaired -= len(cand) else: candidate_trajectories.append(cand) @@ -1584,7 +1583,9 @@ def getRemoteData(self, verbose=False): def saveCandidates(self, candidate_trajectories, verbose=False): for matched_observations in candidate_trajectories: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) - ctries = '_'.join(list(set([met_obs.station_code[:2] for _, met_obs, _ in matched_observations]))) + ctry_list = list(set([met_obs.station_code[:2] for _, met_obs, _ in matched_observations])) + ctry_list.sort() + ctries = '_'.join(ctry_list) picklename = f'{ref_dt.timestamp():.6f}_{ctries}.pickle' # this function can also save a candidate From 6e20cf71798b04a1aa10f8538ae51610849e6256 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Mar 2026 11:40:43 +0000 Subject: [PATCH 119/132] rename function more sensibly --- wmpl/Trajectory/CorrelateRMS.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index b6f0aabc..219c6b2d 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1287,7 +1287,7 @@ def saveTrajectoryResults(self, traj, save_plots, verbose=False): log.info(f'saved {traj.traj_id} to {output_dir}') if self.mc_mode & MCMODE_PHASE1 and not self.mc_mode & MCMODE_PHASE2: - self.savePhase1Trajectory(traj, traj.pre_mc_longname + '_trajectory.pickle', verbose=verbose) + self.saveCandOrTraj(traj, traj.pre_mc_longname + '_trajectory.pickle', verbose=verbose) elif self.mc_mode & MCMODE_PHASE2: # the MC phase may alter the trajectory details and if later on @@ -1589,13 +1589,13 @@ def saveCandidates(self, candidate_trajectories, verbose=False): picklename = f'{ref_dt.timestamp():.6f}_{ctries}.pickle' # this function can also save a candidate - self.savePhase1Trajectory(matched_observations, picklename, 'candidates', verbose=verbose) + self.saveCandOrTraj(matched_observations, picklename, 'candidates', verbose=verbose) log.info("-----------------------") log.info(f'Saved {len(candidate_trajectories)} candidates') log.info("-----------------------") - def savePhase1Trajectory(self, traj, file_name, savetype='phase1', verbose=False): + def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): """ in mcmode MCMODE_PHASE1 or MCMODE_SIMPLE , save the candidates or phase 1 trajectories and distribute as appropriate From 821bf681d716819c240c032ad8e00754fa25a1d2 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Mar 2026 23:29:42 +0000 Subject: [PATCH 120/132] switch logic round to mark paired after succeessful phase1. This reduces db writes --- wmpl/Trajectory/CorrelateDB.py | 44 ++++++++------------- wmpl/Trajectory/CorrelateEngine.py | 61 +++++++++++++----------------- wmpl/Trajectory/CorrelateRMS.py | 34 +++++++++++++---- 3 files changed, 68 insertions(+), 71 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 497aa487..26ee91b3 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -77,13 +77,12 @@ def commitObsDatabase(self): def closeObsDatabase(self): # close the database, making sure we commit any pending updates - self.dbhandle.commit() + self.commitObsDatabase() self.dbhandle.close() self.dbhandle = None return - - def checkObsPaired(self, station_code, obs_id, verbose=False): + def checkObsPaired(self, obs_id, verbose=False): # return True if there is an observation with the correct station code, obs id and with status = 1 paired = True @@ -94,45 +93,33 @@ def checkObsPaired(self, station_code, obs_id, verbose=False): log.info(f'{obs_id} is {"Paired" if paired else "Unpaired"}') return paired - def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): # add or update an entry in the database, setting status = 1 - # Note that we do not commit the database as this would cause problems if we have to - # stop and restart processing mid-way through a pairing run. By leaving the data uncommitted - # we ensure that if the process crashes, then data will be left unpaired and we can rerun the - # pairing routine safely. - if verbose: log.info(f'adding {obs_id} to paired_obs table') sqlstr = f"insert or replace into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" - self.dbhandle.execute(sqlstr) - - if not self.checkObsPaired(station_code, obs_id): + try: + self.dbhandle.execute(sqlstr) + self.dbhandle.commit() + return True + except Exception: log.warning(f'failed to add {obs_id} to paired_obs table') - return False - return True - + return False def unpairObs(self, met_obs_list, verbose=True): # if an entry exists, update the status to 0. - # this allows us to mark an observation paired during candidate creation, then unpair it later if the solution fails - # or we want to force a rerun. obs_ids_str = ','.join([f"'{met_obs.id}'" for met_obs in met_obs_list]) if verbose: log.info(f'unpairing {obs_ids_str}') - self.dbhandle.execute(f"delete from paired_obs where obs_id in ({obs_ids_str})") - data = [] - for met_obs in met_obs_list: - data.append(f"('{met_obs.station_code}','{met_obs.id}', {datetime2JD(met_obs.mean_dt)}, 0)") - data_str = ','.join(data) - sqlstr = f"insert or replace into paired_obs values {data_str}" - self.dbhandle.execute(sqlstr) - - self.dbhandle.commit() - return True - + try: + self.dbhandle.execute(f"update paired_obs set status = 0 where obs_id in ({obs_ids_str})") + self.dbhandle.commit() + return True + except Exception: + log.warning(f'failed to unpair {obs_ids_str}') + return False def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): # archive records older than archdate_jd to a database {arch_prefix}_observations.db @@ -190,7 +177,6 @@ def moveObsJsonRecords(self, paired_obs, dt_range): self.dbhandle.commit() log.info(f'done - moved {i} observations') log.info('-----------------------------') - return def mergeObsDatabase(self, source_db_path): diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index b1f13aee..560ff8e8 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -670,6 +670,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or except ValueError as e: log.info("Error during trajectory estimation!") print(e) + # TODO do we need to add the trajectory to the failed traj database here? return False @@ -745,10 +746,6 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or ang_res_median = np.median(ang_res_list) - # ### DEBUG PRINT - # print(obs.station_id, 'ang res:', np.degrees(obs.ang_res_std)*3600, \ - # np.degrees(ang_res_median)*3600) - # Check if the current observations is larger than the minimum limit, and # outside the median limit or larger than the maximum limit if (obs.ang_res_std > np.radians(self.traj_constraints.min_arcsec_err/3600)) \ @@ -850,7 +847,8 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or except ValueError as e: log.info("Error during trajectory estimation!") print(e) - return False + skip_trajectory = True + break # If the trajectory estimation failed, skip this trajectory @@ -870,12 +868,9 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # Skip the trajectory if no good solution was found if skip_trajectory: # Add the trajectory to the list of failed trajectories - self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) ref_dt = jd2Date(min([met_obs.jdt_ref for met_obs in traj.observations]), dt_obj=True) log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") - - if matched_obs: - self.dh.observations_db.unpairObs([obs[1] for obs in matched_obs], verbose=verbose) + self.dh.addTrajectory(traj, failed_jdt_ref=jdt_ref, verbose=verbose) return False # If there are only two stations, make sure to reject solutions which have stations with @@ -886,11 +881,9 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or ref_dt = jd2Date(min([met_obs.jdt_ref for met_obs in traj.observations]), dt_obj=True) log.info("2 station only solution, one station has an error above the maximum limit, skipping!") - log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") - # Add the trajectory to the list of failed trajectories + log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") self.dh.addTrajectory(traj_status, failed_jdt_ref=jdt_ref, verbose=verbose) - self.dh.observations_db.unpairObs([obs[1] for obs in matched_obs], verbose=verbose) return False @@ -1061,13 +1054,20 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info('Saving trajectory....') self.dh.saveTrajectoryResults(traj, self.traj_constraints.save_plots) + + # we do not need to update the database for phase2 if mcmode != MCMODE_PHASE2: - # we do not need to update the database for phase2 log.info('Updating database....') self.dh.addTrajectory(traj) + if matched_obs is not None: + for _, obs, _ in matched_obs: + self.dh.observations_db.addPairedObs(obs.station_code, obs.id, obs.mean_dt, verbose=verbose) + else: log.info('unable to fit trajectory') + # TODO add failed traj to database here ? + # self.dh.addTrajectory(traj, blabla) return successful_traj_fit @@ -1077,8 +1077,10 @@ def mergeBrokenCandidates(self, candidate_trajectories): log.info("---------------------------") log.info("3) MERGING BROKEN OBSERVATIONS") log.info("---------------------------") + log.info(f"Initially {len(candidate_trajectories)} candidates") merged_candidate_trajectories = [] merged_indices = [] + total_obs_used = 0 for i, traj_cand_ref in enumerate(candidate_trajectories): # Skip candidate trajectories that have already been merged @@ -1089,6 +1091,7 @@ def mergeBrokenCandidates(self, candidate_trajectories): # Stop the search if the end has been reached if (i + 1) == len(candidate_trajectories): merged_candidate_trajectories.append(traj_cand_ref) + total_obs_used += len(traj_cand_ref) break @@ -1176,18 +1179,18 @@ def mergeBrokenCandidates(self, candidate_trajectories): traj_cand_ref.append(entry) log.info("Merged radiant: RA = {:.2f}, Dec = {:.2f}".format(np.degrees(ra_mean_test), np.degrees(dec_mean_test))) - log.info(f'Candidate contains {len(traj_cand_ref)} obs') # Mark that the current index has been processed merged_indices.append(i + j + 1) # Add the reference candidate observations to the list merged_candidate += traj_cand_ref + total_obs_used += len(traj_cand_ref) # Add the merged observation to the final list merged_candidate_trajectories.append(merged_candidate) - return merged_candidate_trajectories + return merged_candidate_trajectories, total_obs_used def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, verbose=False): @@ -1204,7 +1207,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if mcmode != MCMODE_PHASE2: if mcmode & MCMODE_CANDS: - # Get unpaired observations, filter out observations with too little points and sort them by time + # Get unpaired observations, filter out observations with too few points and sort them by time unpaired_observations_all = self.dh.getUnpairedObservations() unpaired_observations_all = [mettmp for mettmp in unpaired_observations_all if len(mettmp.data) >= self.traj_constraints.min_meas_pts] @@ -1289,7 +1292,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if (met_obs.reference_dt >= bin_beg) and (met_obs.reference_dt <= bin_end)] total_unpaired = len(unpaired_observations) - remaining_unpaired = total_unpaired log.info(f'Analysing {total_unpaired} observations in this bucket...') ### CHECK FOR PAIRING WITH PREVIOUSLY ESTIMATED TRAJECTORIES ### @@ -1429,19 +1431,15 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, orig_traj=traj_reduced, verbose=verbose) - # If the new trajectory solution succeeded, remove the now-paired observations + # If the new trajectory solution succeeded, remove the now-paired observations from the in memory list if successful_traj_fit: log.info("Remove paired observations from the processing list...") for _, met_obs_temp in candidate_observations: unpaired_observations.remove(met_obs_temp) - if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt): - remaining_unpaired -= 1 else: log.info("New trajectory solution failed, keeping the old trajectory...") - # note: candidate_observations has a different structure to elsewhere! - self.dh.observations_db.unpairObs([obs[0] for obs in candidate_observations], verbose=verbose) ### ### @@ -1462,7 +1460,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if met_obs.processed: continue - if self.dh.observations_db.checkObsPaired(met_obs.station_code, met_obs.id, verbose=verbose): + if self.dh.observations_db.checkObsPaired(met_obs.id, verbose=True): continue # Get station platepar @@ -1544,8 +1542,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Mark observations as processed for _, met_obs_temp, _ in matched_observations: met_obs_temp.processed = True - if self.dh.observations_db.addPairedObs(met_obs_temp.station_code, met_obs_temp.id, met_obs_temp.mean_dt, verbose=verbose): - remaining_unpaired -= 1 # Store candidate trajectory group # Note that this will include candidate groups that already failed on previous runs. @@ -1557,11 +1553,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver candidate_trajectories.append(matched_observations) # Check for mergeable candidate combinations then remove any that already failed. - merged_candidate_trajectories = self.mergeBrokenCandidates(candidate_trajectories) - candidate_trajectories, remaining_unpaired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, remaining_unpaired) + merged_candidate_trajectories, num_obs_paired = self.mergeBrokenCandidates(candidate_trajectories) + candidate_trajectories, num_obs_paired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, num_obs_paired) log.info("-----------------------") - log.info(f'There are {remaining_unpaired} remaining unpaired observations in this bucket.') + log.info(f'There are {total_unpaired - num_obs_paired} remaining unpaired observations in this bucket.') log.info("-----------------------") # in candidate mode we want to save the candidates to disk @@ -1720,10 +1716,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if t0 != 0.0: failed_traj.jdt_ref = failed_traj.jdt_ref + t0/86400.0 - self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=verbose) - log.info(f"Trajectory at {ref_dt.isoformat()} skipped and added to fails!") - self.dh.observations_db.unpairObs([obs[1] for obs in matched_observations], verbose=verbose) + self.dh.addTrajectory(failed_traj, failed_traj.jdt_ref, verbose=verbose) continue @@ -1787,10 +1781,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # new observations are added if self.dh.checkTrajIfFailed(traj): log.info("The same trajectory already failed to be computed in previous runs!") - self.dh.observations_db.unpairObs([obs[1] for obs in matched_observations], verbose=verbose) continue - # pass in matched_observations here so that solveTrajectory can mark them paired if they're used + # pass in matched_observations here so that we can mark them paired if they're used result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations, verbose=verbose) traj_solved_count += int(result) @@ -1820,7 +1813,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # This will increase the number of MC runs while keeping the processing time the same mc_runs = int(np.ceil(mc_runs/self.traj_constraints.mc_cores)*self.traj_constraints.mc_cores) - # pass in matched_observations here so that solveTrajectory can mark them unpaired if the solver fails + # pass in matched_observations here so that we can mark them unpaired if the solver fails result = self.solveTrajectory(traj, mc_runs, mcmode=mcmode, matched_obs=matched_observations, orig_traj=traj, verbose=verbose) traj_solved_count += int(result) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 219c6b2d..0508550f 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -548,11 +548,15 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # no need to load the legacy JSON file if we already have the sqlite databases if not os.path.isfile(os.path.join(db_dir, 'observations.db')) and \ not os.path.isfile(os.path.join(db_dir, 'trajectories.db')): - log.info("Loading database: {:s}".format(database_path)) + log.info("Loading old JSON database: {:s}".format(database_path)) self.old_db = DatabaseJSON(database_path, verbose=self.verbose) else: self.old_db = None + # REVISIT THIS LATER + #if mcmode == MCMODE_PHASE1 and self.checkRemoteDataMode() == 'master': + # self.observations_db = ObservationDatabase(self.phase1_dir, 'phase1_paired.db') + #else: self.observations_db = ObservationDatabase(db_dir) if hasattr(self.old_db, 'paired_obs'): # move any legacy paired obs data into sqlite @@ -584,6 +588,11 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # in phase 1, initialise and collect data second as we load candidates dynamically self.initialiseRemoteDataHandling() + # in phase1, if we're the master node, write observations updates to a temp database + if self.RemoteDatahandler and self.RemoteDatahandler.mode == 'master' and mcmode == MCMODE_PHASE1: + self.observations_db.closeObsDatabase() + self.observations_db = ObservationDatabase(self.phase1_dir) + else: # in phase 2, initialise and collect data first as we need the phase1 traj on disk already self.traj_db = None @@ -616,6 +625,15 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode ### ### + def checkRemoteDataMode(self): + remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') + if os.path.isfile(remote_cfg): + self.RemoteDatahandler = RemoteDataHandler(remote_cfg) + return self.RemoteDatahandler.mode + else: + return 'none' + + def initialiseRemoteDataHandling(self): # Initialise remote data handling, if the config file is present remote_cfg = os.path.join(self.db_dir, 'wmpl_remote.cfg') @@ -882,7 +900,7 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): continue # Add only unpaired observations - if not self.observations_db.checkObsPaired(met_obs.station_code, met_obs.id): + if not self.observations_db.checkObsPaired(met_obs.id, verbose=verbose): # print(" ", station_code, met_obs.reference_dt, rel_proc_path) added_count += 1 unpaired_met_obs_list.append(met_obs) @@ -1170,7 +1188,7 @@ def findTimePairs(self, met_obs, unpaired_observations, max_toffset, verbose=Fal # Go through all meteors from other stations for met_obs2 in unpaired_observations: - if self.observations_db.checkObsPaired(met_obs2.station_code, met_obs2.id, verbose=verbose): + if self.observations_db.checkObsPaired(met_obs2.id, verbose=verbose): continue # Take only observations from different stations @@ -1353,7 +1371,7 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): self.traj_db.removeTrajectory(traj_reduced) - def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaired, verbose=False): + def excludeAlreadyFailedCandidates(self, matched_observations, num_obs_paired, verbose=False): # go through the candidates and check if they correspond to already-failed candidate_trajectories=[] @@ -1384,12 +1402,11 @@ def excludeAlreadyFailedCandidates(self, matched_observations, remaining_unpaire if self.checkTrajIfFailed(traj): log.info(f'Candidate at {ref_dt.isoformat()} already failed, skipping') - self.dh.observations_db.unpairObs([obs[1] for obs in cand], verbose=verbose) - remaining_unpaired -= len(cand) + num_obs_paired -= len(cand) else: candidate_trajectories.append(cand) - return candidate_trajectories, max(0,remaining_unpaired) + return candidate_trajectories, num_obs_paired def checkTrajIfFailed(self, traj): """ Check if the given trajectory has been computed with the same observations and has failed to be @@ -1588,7 +1605,8 @@ def saveCandidates(self, candidate_trajectories, verbose=False): ctries = '_'.join(ctry_list) picklename = f'{ref_dt.timestamp():.6f}_{ctries}.pickle' - # this function can also save a candidate + if verbose: + log.info(f'Candidate {picklename} contains {len(matched_observations)} observations') self.saveCandOrTraj(matched_observations, picklename, 'candidates', verbose=verbose) log.info("-----------------------") From 5fe13e04b40ed678ce716aa71cfc113e86e9a811 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 3 Mar 2026 23:56:41 +0000 Subject: [PATCH 121/132] remove unnecessay commit --- wmpl/Trajectory/CorrelateEngine.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 560ff8e8..1491aa6c 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1460,7 +1460,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if met_obs.processed: continue - if self.dh.observations_db.checkObsPaired(met_obs.id, verbose=True): + if self.dh.observations_db.checkObsPaired(met_obs.id, verbose=verbose): continue # Get station platepar @@ -1567,9 +1567,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("-----------------------") self.dh.saveCandidates(candidate_trajectories, verbose=verbose) - - # now we've saved the candidates we can commit the changes - self.dh.observations_db.commitObsDatabase() return len(candidate_trajectories) From cfc0b8112250aa130e93bd56427622b495d4befb Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Wed, 4 Mar 2026 00:47:29 +0000 Subject: [PATCH 122/132] pass new obs to solveTrajectory so they can be marked paired if needed --- wmpl/Trajectory/CorrelateEngine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 1491aa6c..23884255 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1429,7 +1429,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Re-run the trajectory fit # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds - successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, orig_traj=traj_reduced, verbose=verbose) + # pass the new candidates in so that they can be marked paired if the new soln succeeds + successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, + matched_obs=candidate_observations, orig_traj=traj_reduced, verbose=verbose) # If the new trajectory solution succeeded, remove the now-paired observations from the in memory list if successful_traj_fit: @@ -1567,7 +1569,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("-----------------------") self.dh.saveCandidates(candidate_trajectories, verbose=verbose) - + return len(candidate_trajectories) else: From 7db6bdfd19d81126c15e3e902c8ca3263ee66d22 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 5 Mar 2026 22:28:58 +0000 Subject: [PATCH 123/132] bug in phase1 processing --- wmpl/Trajectory/CorrelateRMS.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 0508550f..3b33eceb 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -1360,7 +1360,7 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): shutil.rmtree(traj_dir, ignore_errors=True) return - if self.mcmode & MCMODE_PHASE1 and remove_phase1: + if self.mc_mode & MCMODE_PHASE1 and remove_phase1: # remove any solution from the phase1 folder phase1_traj = os.path.join(self.phase1_dir, os.path.basename(traj_reduced.traj_file_path)) if os.path.isfile(phase1_traj): @@ -1386,6 +1386,7 @@ def excludeAlreadyFailedCandidates(self, matched_observations, num_obs_paired, v # Normalize the observations to the reference Julian date jdt_ref_curr = datetime2JD(met_obs.reference_dt) obs_temp.time_data += (jdt_ref_curr - jdt_ref)*86400 + obs_temp.jdt_ref = jdt_ref traj.infillWithObs(obs_temp) From ac19e59a0d6c94867c5246c542e6434f3f01f12f Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 5 Mar 2026 22:29:49 +0000 Subject: [PATCH 124/132] working on ref_dt bug --- wmpl/Trajectory/CorrelateEngine.py | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 23884255..041a1524 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1048,7 +1048,7 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or if orig_traj: log.info(f"Removing the previous solution {os.path.dirname(orig_traj.traj_file_path)} ...") remove_phase1 = True if abs(round((traj.jdt_ref-orig_traj.jdt_ref)*86400000,0)) > 0 else False - self.dh.removeTrajectory(orig_traj, remove_phase1) + self.dh.removeTrajectory(orig_traj, remove_phase1=remove_phase1) traj.pre_mc_longname = os.path.split(self.dh.generateTrajOutputDirectoryPath(orig_traj, make_dirs=False))[-1] log.info('Saving trajectory....') @@ -1060,14 +1060,16 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or log.info('Updating database....') self.dh.addTrajectory(traj) if matched_obs is not None: - for _, obs, _ in matched_obs: - self.dh.observations_db.addPairedObs(obs.station_code, obs.id, obs.mean_dt, verbose=verbose) + if len(matched_obs[0])==3: + for _, obs, _ in matched_obs: + self.dh.observations_db.addPairedObs(obs.station_code, obs.id, obs.mean_dt, verbose=verbose) + else: + for _, obs in matched_obs: + self.dh.observations_db.addPairedObs(obs.station_code, obs.id, obs.mean_dt, verbose=verbose) else: log.info('unable to fit trajectory') - # TODO add failed traj to database here ? - # self.dh.addTrajectory(traj, blabla) return successful_traj_fit @@ -1430,7 +1432,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Re-run the trajectory fit # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds # pass the new candidates in so that they can be marked paired if the new soln succeeds - successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=mcmode, + # Note: mcmode must be phase1 here to force a recompute + successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=MCMODE_PHASE1, matched_obs=candidate_observations, orig_traj=traj_reduced, verbose=verbose) # If the new trajectory solution succeeded, remove the now-paired observations from the in memory list @@ -1744,8 +1747,11 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Init the solver (use the earliest date as the reference) - ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) - jdt_ref = datetime2JD(ref_dt) + #ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + #jdt_ref = datetime2JD(ref_dt) + jdt_ref = min([obs_temp.jdt_ref for obs_temp, _, _ in matched_observations]) + + log.info(f'ref_dt {jd2Date(jdt_ref, dt_obj=True)}') traj = self.initTrajectory(jdt_ref, mc_runs, verbose=verbose) @@ -1753,8 +1759,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver for obs_temp, met_obs, _ in matched_observations: # Normalize the observations to the reference Julian date - jdt_ref_curr = datetime2JD(met_obs.reference_dt) + jdt_ref_curr = obs_temp.jdt_ref # datetime2JD(met_obs.reference_dt) obs_temp.time_data += (jdt_ref_curr - jdt_ref)*86400 + # we have normalised the time data to jdt_ref, now we need to reset jdt_ref for each obs too + obs_temp.jdt_ref = jdt_ref traj.infillWithObs(obs_temp) @@ -1766,16 +1774,17 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # If the first time is not 0, normalize times so that the earliest time is 0 if t0 != 0.0: - + log.info(f'adjusting by {t0}') # Offset all times by t0 for i in range(len(traj.observations)): traj.observations[i].time_data -= t0 - + log.info(f'obs jdt_ref is {jd2Date(traj.observations[i].jdt_ref, dt_obj=True)}') # Recompute the reference JD to corresponds with t0 traj.jdt_ref = traj.jdt_ref + t0/86400.0 + log.info(f'ref_dt {jd2Date(traj.jdt_ref, dt_obj=True)}') # If this trajectory already failed to be computed, don't try to recompute it again unless # new observations are added if self.dh.checkTrajIfFailed(traj): From ced94c2156297942a126d7edfc4d355cd16c669b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Thu, 5 Mar 2026 22:30:03 +0000 Subject: [PATCH 125/132] add debug messages to help testing --- wmpl/Trajectory/CorrelateDB.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 26ee91b3..1d0fc9c0 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -58,15 +58,18 @@ def openObsDatabase(self, db_path, db_name='observations.db', purge_records=Fals con = sqlite3.connect(db_full_name) con.execute('pragma journal_mode=wal') if purge_records: + log.info('purge: write to obsdb') con.execute('drop table paired_obs') res = con.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") if res.fetchone() is None: + log.info('create table: write to obsdb') con.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") con.commit() return con def commitObsDatabase(self): # commit the obs db. This function exists so we can do lazy writes in some cases + log.info('commit: write to obsdb') self.dbhandle.commit() try: self.dbhandle.execute('pragma wal_checkpoint(TRUNCATE)') @@ -100,6 +103,7 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): log.info(f'adding {obs_id} to paired_obs table') sqlstr = f"insert or replace into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" try: + log.info('update: write to obsdb') self.dbhandle.execute(sqlstr) self.dbhandle.commit() return True @@ -114,6 +118,7 @@ def unpairObs(self, met_obs_list, verbose=True): if verbose: log.info(f'unpairing {obs_ids_str}') try: + log.info('update: write to obsdb') self.dbhandle.execute(f"update paired_obs set status = 0 where obs_id in ({obs_ids_str})") self.dbhandle.commit() return True @@ -145,6 +150,7 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): except Exception: log.info(f'{row[1]} already exists in target') + log.info('delete: write to obsdb') self.dbhandle.execute(f'delete from paired_obs where obs_date < {archdate_jd}') self.dbhandle.commit() return @@ -193,6 +199,7 @@ def mergeObsDatabase(self, source_db_path): status = True else: try: + log.info('insert: write to obsdb') self.dbhandle.execute('insert or replace into paired_obs select * from sourcedb.paired_obs') status = True except Exception as e: @@ -233,11 +240,13 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) if purge_records: + log.info('purge: write to trajdb') con.execute('drop table if exists trajectories') con.execute('drop table if exists failed_trajectories') con.commit() res = con.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") if res.fetchone() is None: + log.info('create table: write to trajdb') con.execute("""CREATE TABLE trajectories( jdt_ref REAL UNIQUE, traj_id VARCHAR UNIQUE, @@ -264,6 +273,7 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal res = con.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") if res.fetchone() is None: # note: traj_id not unique here as some fails will have traj-id None + log.info('create table: write to trajdb') con.execute("""CREATE TABLE failed_trajectories( jdt_ref REAL UNIQUE, traj_id VARCHAR, @@ -283,12 +293,14 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal def commitTrajDatabase(self): # commit the obs db. This function exists so we can do lazy writes in some cases + log.info('commit: write to trajdb') self.dbhandle.commit() return def closeTrajDatabase(self): # close the database, making sure we commit any pending updates + log.info('commit: write to trajdb') self.dbhandle.commit() self.dbhandle.close() self.dbhandle = None @@ -356,6 +368,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") sql_str = sql_str.replace('nan','"NaN"') + log.info('insert: write to trajdb') self.dbhandle.execute(sql_str) self.dbhandle.commit() return True @@ -369,6 +382,7 @@ def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose table_name = 'failed_trajectories' if failed else 'trajectories' try: + log.info('update: write to trajdb') self.dbhandle.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") self.dbhandle.commit() except Exception: @@ -467,6 +481,7 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): # attach the arch db, copy the records then delete them archdb_fullname = os.path.join(db_path, f'{archdb_name}') cur = self.dbhandle.execute(f"attach database '{archdb_fullname}' as archdb") + log.info('delete: write to trajdb') for table_name in ['trajectories', 'failed_trajectories']: try: # bulk-copy if possible @@ -508,6 +523,7 @@ def mergeTrajDatabase(self, source_db_path): log.warning(f'source database missing: {source_db_path}') return # attach the other db, copy the records then detach it + log.info('insert: write to trajdb') cur = self.dbhandle.execute(f"attach database '{source_db_path}' as sourcedb") # TODO need to correct the traj_file_path to account for server locations From 9db92f76a9a456e87189edf2c144acee290ee5d3 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Fri, 6 Mar 2026 00:47:13 +0000 Subject: [PATCH 126/132] tidying up --- wmpl/Trajectory/CorrelateEngine.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 041a1524..7ba3a0e0 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1747,8 +1747,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Init the solver (use the earliest date as the reference) - #ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) - #jdt_ref = datetime2JD(ref_dt) jdt_ref = min([obs_temp.jdt_ref for obs_temp, _, _ in matched_observations]) log.info(f'ref_dt {jd2Date(jdt_ref, dt_obj=True)}') @@ -1778,7 +1776,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Offset all times by t0 for i in range(len(traj.observations)): traj.observations[i].time_data -= t0 - log.info(f'obs jdt_ref is {jd2Date(traj.observations[i].jdt_ref, dt_obj=True)}') + # log.info(f'obs jdt_ref is {jd2Date(traj.observations[i].jdt_ref, dt_obj=True)}') # Recompute the reference JD to corresponds with t0 traj.jdt_ref = traj.jdt_ref + t0/86400.0 From e9d3c6e46ecfda7a39ab244c498f6f05967aaf0b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sat, 7 Mar 2026 23:40:53 +0000 Subject: [PATCH 127/132] avoid reprocessing already processed candidates improve some db functions update documentation --- wmpl/Trajectory/CorrelateDB.py | 386 ++++++++++++++++++++--------- wmpl/Trajectory/CorrelateEngine.py | 38 ++- wmpl/Trajectory/CorrelateRMS.py | 93 ++++--- 3 files changed, 341 insertions(+), 176 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 1d0fc9c0..1dbde64e 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -29,7 +29,6 @@ import argparse import datetime import json -import shutil import numpy as np from wmpl.Utils.TrajConversions import datetime2JD, jd2Date @@ -43,33 +42,42 @@ class ObservationDatabase(): + """ + A class to handle the sqlite observations database transparently. + """ - # A class to handle the sqlite observations database transparently. + def __init__(self, db_path, db_name='observations.db', purge_records=False, verbose=False): + """ + Create an observations database instance - def __init__(self, db_path, db_name='observations.db', purge_records=False): - self.dbhandle = self.openObsDatabase(db_path, db_name, purge_records) - - def openObsDatabase(self, db_path, db_name='observations.db', purge_records=False): - # Open the database, creating it and adding the required table if necessary. - # If purge_records is true, delete any existing records. + Parameters: + db_path : path to the location of the database + db_name : name to use, typically observations.db + purge_records : boolean, if true then delete any existing records + """ db_full_name = os.path.join(db_path, f'{db_name}') log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) con.execute('pragma journal_mode=wal') if purge_records: - log.info('purge: write to obsdb') + if verbose: + log.info('purge: write to obsdb') con.execute('drop table paired_obs') res = con.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") if res.fetchone() is None: - log.info('create table: write to obsdb') + if verbose: + log.info('create table: write to obsdb') con.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") con.commit() - return con - - def commitObsDatabase(self): - # commit the obs db. This function exists so we can do lazy writes in some cases - log.info('commit: write to obsdb') + self.dbhandle = con + + def _commitObsDatabase(self, verbose=False): + """ + Commit the obs db. This function exists so we can do lazy writes + """ + if verbose: + log.info('commit: write to obsdb') self.dbhandle.commit() try: self.dbhandle.execute('pragma wal_checkpoint(TRUNCATE)') @@ -78,15 +86,26 @@ def commitObsDatabase(self): return def closeObsDatabase(self): - # close the database, making sure we commit any pending updates + """ + Close the database, making sure we commit any pending updates + """ - self.commitObsDatabase() + self._commitObsDatabase() self.dbhandle.close() self.dbhandle = None return def checkObsPaired(self, obs_id, verbose=False): - # return True if there is an observation with the correct station code, obs id and with status = 1 + """ + Check if an observation is already marked paired + return True if there is an observation with the correct station code, obs id and with status = 1 + + Parameters: + obs_id : observation ID to check + + Returns: + True if paired, False otherwise + """ paired = True cur = self.dbhandle.execute(f"SELECT obs_id FROM paired_obs WHERE obs_id='{obs_id}' and status=1") @@ -97,13 +116,21 @@ def checkObsPaired(self, obs_id, verbose=False): return paired def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): - # add or update an entry in the database, setting status = 1 + """ + Add or update an entry in the database to mark an observation paired, setting status = 1 + + Parameters: + station_code : observation's station ID eg UK12345 + obs_id : observation ID + obs_date: : observation mean date + """ if verbose: log.info(f'adding {obs_id} to paired_obs table') sqlstr = f"insert or replace into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" try: - log.info('update: write to obsdb') + if verbose: + log.info('update: write to obsdb') self.dbhandle.execute(sqlstr) self.dbhandle.commit() return True @@ -111,8 +138,15 @@ def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): log.warning(f'failed to add {obs_id} to paired_obs table') return False - def unpairObs(self, met_obs_list, verbose=True): - # if an entry exists, update the status to 0. + def unpairObs(self, met_obs_list, verbose=False): + """ + Mark an observation unpaired. + If an entry exists in the database, update the status to 0. + Currently unused. + + Parameters: + met_obs_list : a list of MeteorObsRMS objects + """ obs_ids_str = ','.join([f"'{met_obs.id}'" for met_obs in met_obs_list]) if verbose: @@ -127,13 +161,18 @@ def unpairObs(self, met_obs_list, verbose=True): return False def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): - # archive records older than archdate_jd to a database {arch_prefix}_observations.db - - # create the database and table if it doesnt exist + """ + archive records older than archdate_jd to a database {arch_prefix}_observations.db + + Parameters: + db_path : path to the location of the archive database + arch_prefix : prefix to apply - typically of the form yyyymm + archdate_jd : julian date before which to archive data + """ + # create the database if it doesnt exist archdb_name = f'{arch_prefix}_observations.db' - archdb = self.openObsDatabase(db_path, archdb_name) - archdb.commit() - archdb.close() + archdb = ObservationDatabase(db_path, archdb_name) + archdb.closeObsDatabase() # attach the arch db, copy the records then delete them archdb_fullname = os.path.join(db_path, f'{archdb_name}') @@ -156,8 +195,17 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): return def moveObsJsonRecords(self, paired_obs, dt_range): - # only copy recent observations since if we ever run for an historic date - # its likely we will want to reanalyse all available obs anyway + """ + Copy recent data from the legacy Json database to the new database. + By design this only copies at most the last seven days, but a date-range can be + provided so that relevant data is copied. + + Parameters: + paired_obs : a json list of paired observations from the old database + dt_range : a date range to operate on - at most seven days duration + + """ + # only copy recent observations since dt_end = dt_range[1] dt_beg = max(dt_range[0], dt_end + datetime.timedelta(days=-7)) @@ -186,7 +234,12 @@ def moveObsJsonRecords(self, paired_obs, dt_range): return def mergeObsDatabase(self, source_db_path): - # merge in records from another observation database, for example from a remote node + """ + Merge in records from another observation database 'source_db_path', for example from a remote node + + Parameters: + source_db_path : full name and path to the source database to merge from + """ if not os.path.isfile(source_db_path): log.warning(f'source database missing: {source_db_path}') @@ -214,27 +267,21 @@ def mergeObsDatabase(self, source_db_path): ############################################################ -class DummyTrajReduced(): - # a dummy class for use in a couple of fuctions in the TrajectoryDatabase - def __init__(self, jdt_ref=None, traj_id=None, traj_file_path=None, json_dict=None): - if json_dict is None: - self.jdt_ref = jdt_ref - self.traj_id = traj_id - self.traj_file_path = traj_file_path - else: - self.__dict__ = json_dict - class TrajectoryDatabase(): + """ + A class to handle the sqlite trajectory database transparently. + """ - # A class to handle the sqlite trajectory database transparently. + def __init__(self, db_path, db_name='trajectories.db', purge_records=False, verbose=False): + """ + initialise the trajectory database - def __init__(self, db_path, db_name='trajectories.db', purge_records=False): - self.dbhandle = self.openTrajDatabase(db_path, db_name, purge_records) - - def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=False): - # Open the database, creating it and adding the required table if necessary. - # If purge_records is true, delete any existing records. + Parameters: + db_path : path to the location to store the database + db_name : database name + purge_records : boolean, if true, delete any existing records + """ db_full_name = os.path.join(db_path, f'{db_name}') log.info(f'opening database {db_full_name}') @@ -246,7 +293,8 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal con.commit() res = con.execute("SELECT name FROM sqlite_master WHERE name='trajectories'") if res.fetchone() is None: - log.info('create table: write to trajdb') + if verbose: + log.info('create table: write to trajdb') con.execute("""CREATE TABLE trajectories( jdt_ref REAL UNIQUE, traj_id VARCHAR UNIQUE, @@ -272,8 +320,9 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal res = con.execute("SELECT name FROM sqlite_master WHERE name='failed_trajectories'") if res.fetchone() is None: - # note: traj_id not unique here as some fails will have traj-id None - log.info('create table: write to trajdb') + # note: traj_id not set as unique as some fails will have traj-id None + if verbose: + log.info('create table: write to trajdb') con.execute("""CREATE TABLE failed_trajectories( jdt_ref REAL UNIQUE, traj_id VARCHAR, @@ -288,35 +337,83 @@ def openTrajDatabase(self, db_path, db_name='trajectories.db', purge_records=Fal status INTEGER) """) con.commit() - return con + self.dbhandle = con + return - def commitTrajDatabase(self): - # commit the obs db. This function exists so we can do lazy writes in some cases + def _commitTrajDatabase(self, verbose=False): + """ + commit the traj db. + This function exists so we can do lazy writes in some cases + """ - log.info('commit: write to trajdb') + if verbose: + log.info('commit: write to trajdb') self.dbhandle.commit() return - def closeTrajDatabase(self): - # close the database, making sure we commit any pending updates + def closeTrajDatabase(self, verbose=False): + """ + close the database, making sure we commit any pending updates + """ - log.info('commit: write to trajdb') - self.dbhandle.commit() + if verbose: + log.info('commit: write to trajdb') + self._commitTrajDatabase() self.dbhandle.close() self.dbhandle = None return + def checkCandIfProcessed(self, jdt_ref, station_list, verbose=False): + """ + check if a candidate was already processed into the database + This function is not currently used. + + Parameters: + jdt_ref : candidate's julian reference date + station_list : candidate's list of stations + + Returns: + True if there is a trajectory with the same jdt_ref and matching list of stations as the candidate + """ + + found = False + res = self.dbhandle.execute(f"SELECT traj_id,participating_stations, ignored_stations FROM failed_trajectories WHERE jdt_ref={jdt_ref} and status=1") + row = res.fetchone() + if row is None: + found = False + else: + traj_stations = list(set(json.loads(row[1]) + json.loads(row[2]))) + found = True if (traj_stations == station_list) else False + if found: + return found + + res = self.dbhandle.execute(f"SELECT traj_id,participating_stations, ignored_stations FROM trajectories WHERE jdt_ref={jdt_ref} and status=1") + row = res.fetchone() + if row is None: + found = False + else: + traj_stations = list(set(json.loads(row[1]) + json.loads(row[2]))) + found = True if (traj_stations == station_list) else False + return found + def checkTrajIfFailed(self, traj_reduced, verbose=False): - # return True if there is an observation with the same jdt_ref and matching list of stations + """ + Check if a Trajectory was marked failed + + Parameters: + traj_reduced : a TrajReduced object + + Returns + True if there is a failed trajectory with the same jdt_ref and matching list of stations + """ if not hasattr(traj_reduced, 'jdt_ref') or not hasattr(traj_reduced, 'participating_stations') or not hasattr(traj_reduced, 'ignored_stations'): return False found = False station_list = list(set(traj_reduced.participating_stations + traj_reduced.ignored_stations)) - cur = self.dbhandle.cursor() - res = cur.execute(f"SELECT traj_id,participating_stations, ignored_stations FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref} and status=1") + res = self.dbhandle.execute(f"SELECT traj_id,participating_stations, ignored_stations FROM failed_trajectories WHERE jdt_ref={traj_reduced.jdt_ref} and status=1") row = res.fetchone() if row is None: found = False @@ -326,10 +423,14 @@ def checkTrajIfFailed(self, traj_reduced, verbose=False): return found def addTrajectory(self, traj_reduced, failed=False, verbose=False): - # add or update an entry in the database, setting status = 1 + """ + add or update an entry in the database, setting status = 1 - # note that unlike the observations db we DO commit here because as soon as a solution is found - # we want to ensure we don't try to find it again on a rerun + Parameters: + traj_reduced : a TrajReduced object + failed : boolean, if true, add the traj to the fails list + + """ if verbose: log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') @@ -368,38 +469,47 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") sql_str = sql_str.replace('nan','"NaN"') - log.info('insert: write to trajdb') + if verbose: + log.info('insert: write to trajdb') self.dbhandle.execute(sql_str) self.dbhandle.commit() return True - def removeTrajectory(self, traj_reduced, keepFolder=False, failed=False, verbose=False): - # if an entry exists, update the status to 0. - # this allows us to mark an observation paired, then unpair it later if the solution fails - # or we want to force a rerun. + def removeTrajectory(self, traj_reduced, failed=False, verbose=False): + """ + Mark a trajectory unsolved + If an entry exists, update the status to 0. + + Parameters: + traj_reduced : a TrajReduced object + failed : boolean, if true then remove from the fails list + """ if verbose: log.info(f'removing {traj_reduced.traj_id}') table_name = 'failed_trajectories' if failed else 'trajectories' - try: + if verbose: log.info('update: write to trajdb') - self.dbhandle.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") - self.dbhandle.commit() - except Exception: - # traj wasn't in the database so no action required - pass - - # Remove the trajectory folder on the disk - if not keepFolder and os.path.isfile(traj_reduced.traj_file_path): - traj_dir = os.path.dirname(traj_reduced.traj_file_path) - shutil.rmtree(traj_dir, ignore_errors=True) - if os.path.isfile(traj_reduced.traj_file_path): - log.info(f'unable to remove {traj_dir}') + self.dbhandle.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") + self.dbhandle.commit() return True - def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, verbose=False): + def getTrajectories(self, output_dir, jdt_range, failed=False, verbose=False): + """ + Get a list of trajectories between two julian dates + + Parameters: + output_dir : output_dir specified when invoking CorrelateRMS - will be prepended to the trajectory path + jdt_range : tuple of julian dates to retrieve data between. if the 2nd date is None, retrieve all data to today + failed : boolean - if true, retrieve failed traj rather than successful ones + + Returns: + trajs: json list of traj_reduced objects + """ + + jdt_start, jdt_end = jdt_range table_name = 'failed_trajectories' if failed else 'trajectories' if verbose: @@ -428,55 +538,51 @@ def getTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, ver trajs.append(json_dict) return trajs - def getTrajNames(self, jdt_start=None, jdt_end=None, failed=False, verbose=False): + def getTrajBasics(self, output_dir, jdt_range, failed=False, verbose=False): + """ + Get a list of minimal trajectory details between two dates + + Parameters: + output_dir : output_dir specified when invoking CorrelateRMS - will be prepended to the trajectory path + jdt_range : tuple of julian dates to retrieve data betwee + failed : boolean, if true retrieve names of fails, otherwise retrieve successful + + Returns: + trajs: a json list of tuples of {jdt_ref, traj_id, traj_file_path} + + """ + jdt_start, jdt_end = jdt_range table_name = 'failed_trajectories' if failed else 'trajectories' if not jdt_start: - cur = self.dbhandle.execute(f"SELECT * FROM {table_name}") + cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name}") rows = cur.fetchall() elif not jdt_end: - cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") + cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} WHERE jdt_ref={jdt_start}") rows = cur.fetchall() else: - cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") rows = cur.fetchall() trajs = [] for rw in rows: - trajs.append(rw[2]) + trajs.append({'jdt_ref':rw[0], 'traj_id':rw[1], 'traj_file_path':os.path.join(output_dir, rw[2])}) return trajs - - def removeDeletedTrajectories(self, output_dir, jdt_start, jdt_end=None, failed=False, verbose=False): - - table_name = 'failed_trajectories' if failed else 'trajectories' - if verbose: - log.info(f'getting trajectories between {jdt_start} and {jdt_end}') - - if not jdt_end: - cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref={jdt_start}") - rows = cur.fetchall() - else: - cur = self.dbhandle.execute(f"SELECT * FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") - rows = cur.fetchall() - i = 0 - for rw in rows: - if not os.path.isfile(os.path.join(output_dir, rw[2])): - if verbose: - log.info(f'removing traj {jd2Date(rw[0], dt_obj=True).strftime("%Y%m%d_%M%M%S.%f")} from database') - self.removeTrajectory(DummyTrajReduced(jdt_ref=rw[0], traj_id=rw[1], traj_file_path=rw[2]), keepFolder=True) - i += 1 - log.info(f'removed {i} deleted trajectories') - return - - def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): + """ # archive records older than archdate_jd to a database {arch_prefix}_trajectories.db - # create the database and table if it doesnt exist + Parameters: + db_path : path to the location of the archive database + arch_prefix : prefix to apply - typically of the form yyyymm + archdate_jd : julian date before which to archive data + + """ + + # create the archive database if it doesnt exist archdb_name = f'{arch_prefix}_trajectories.db' - archdb = self.openObsDatabase(db_path, archdb_name) - archdb.commit() - archdb.close() + archdb = TrajectoryDatabase(db_path, archdb_name) + archdb.closeTrajDatabase() # attach the arch db, copy the records then delete them archdb_fullname = os.path.join(db_path, f'{archdb_name}') @@ -494,9 +600,17 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): return def moveFailedTrajectories(self, failed_trajectories, dt_range): + """ + Copy failed trajectories from the old Json database + We only copy recent records since if we ever run for an historic date + its likely we will want to reanalyse all available obs anyway + + Parameters: - # only copy recent records since if we ever run for an historic date - # its likely we will want to reanalyse all available obs anyway + failed_trajectories : json list of fails extracted from the old Json DB + dt_range: : date range to use, at most seven days at a time + + """ jd_end = datetime2JD(dt_range[1]) jd_beg = max(datetime2JD(dt_range[0]), jd_end - 7) @@ -509,15 +623,21 @@ def moveFailedTrajectories(self, failed_trajectories, dt_range): self.addTrajectory(failed_trajectories[jdt_ref], failed=True) i += 1 if not i % 10000: - self.commitTrajDatabase() + self._commitTrajDatabase() log.info(f'moved {i} failed_trajectories') - self.commitTrajDatabase() + self._commitTrajDatabase() log.info(f'done - moved {i} failed_trajectories') return def mergeTrajDatabase(self, source_db_path): - # merge in records from another observation database, for example from a remote node + """ + merge in records from another observation database, for example from a remote node + + Parameters: + source_db_path : the full name of the source database from which to merge in records + + """ if not os.path.isfile(source_db_path): log.warning(f'source database missing: {source_db_path}') @@ -541,11 +661,29 @@ def mergeTrajDatabase(self, source_db_path): return status ################################################################################## -# dummy classes for moving data from the old JSON database. Created here to -# avoid a circular import +# dummy classes for use in the above. +# We can't import from CorrelateRMS as this would create a circular reference + + +class DummyTrajReduced(): + """ + a dummy class for handling TrajReduced objects. + We can't import CorrelateRMS as that would create a circular dependency + """ + def __init__(self, jdt_ref=None, traj_id=None, traj_file_path=None, json_dict=None): + if json_dict is None: + self.jdt_ref = jdt_ref + self.traj_id = traj_id + self.traj_file_path = traj_file_path + else: + self.__dict__ = json_dict class dummyDatabaseJSON(): + """ + Dummy class to handle the old Json data format + We can't import CorrelateRMS as that would create a circular dependency + """ def __init__(self, db_dir, dt_range=None): self.db_file_path = os.path.join(db_dir, 'processed_trajectories.json') self.paired_obs = {} diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 7ba3a0e0..23d09eb0 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1192,6 +1192,7 @@ def mergeBrokenCandidates(self, candidate_trajectories): # Add the merged observation to the final list merged_candidate_trajectories.append(merged_candidate) + log.info(f"After merging, there are {len(merged_candidate_trajectories)} candidates") return merged_candidate_trajectories, total_obs_used @@ -1557,9 +1558,12 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info(f" --- ADDING CANDIDATE at {ref_dt.isoformat()} ---") candidate_trajectories.append(matched_observations) - # Check for mergeable candidate combinations then remove any that already failed. + # Check for mergeable candidate combinations merged_candidate_trajectories, num_obs_paired = self.mergeBrokenCandidates(candidate_trajectories) - candidate_trajectories, num_obs_paired = self.dh.excludeAlreadyFailedCandidates(merged_candidate_trajectories, num_obs_paired) + + # Now check and exclude already-processed candidates + # We can't do this earlier as we need to check mergeability first + candidate_trajectories = self.dh.checkAlreadyProcessed(merged_candidate_trajectories, verbose=verbose) log.info("-----------------------") log.info(f'There are {total_unpaired - num_obs_paired} remaining unpaired observations in this bucket.') @@ -1571,6 +1575,8 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info('5) SAVING {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") + # Save candidates. This will check and skip over already-processed + # combinations self.dh.saveCandidates(candidate_trajectories, verbose=verbose) return len(candidate_trajectories) @@ -1591,21 +1597,26 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("-----------------------") save_path = self.dh.candidate_dir + procpath = os.path.join(save_path, 'processed') + os.makedirs(procpath, exist_ok=True) + # TODO use glob.glob here for fil in os.listdir(save_path): if '.pickle' not in fil: continue try: - loadedpickle = loadPickle(save_path, fil) - candidate_trajectories.append(loadedpickle) - # move the loaded file so we don't try to reprocess it on a subsequent pass - procpath = os.path.join(save_path, 'processed') - os.makedirs(procpath, exist_ok=True) procfile = os.path.join(procpath, fil) if os.path.isfile(procfile): - os.remove(procfile) + # Skip the trajectory if we already processed it. + # To force reprocessing, move the candidate from 'candidates/processed' to 'candidates' + log.info(f'Candidate {fil} already processed') + os.remove(os.path.join(save_path, fil)) + continue + loadedpickle = loadPickle(save_path, fil) + candidate_trajectories.append(loadedpickle) + # now move the loaded file so we don't try to reprocess it os.rename(os.path.join(save_path, fil), procfile) except Exception: - print(f'Candidate {fil} went away, probably picked up by another process') + log.info(f'Candidate {fil} went away, probably picked up by another process') log.info("-----------------------") log.info('LOADED {} CANDIDATES'.format(len(candidate_trajectories))) log.info("-----------------------") @@ -1619,7 +1630,9 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver candidate_trajectories = self.dh.phase1Trajectories # end of "if mcmode == MCMODE_PHASE2" + # avoid reprocessing candidates that were already processed num_traj = len(candidate_trajectories) + log.info("") log.info("-----------------------") log.info(f'SOLVING {num_traj} TRAJECTORIES {mcmodestr}') @@ -1633,7 +1646,6 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver log.info("-----------------------") log.info(f'processing {"candidate" if mcmode==MCMODE_PHASE1 else "trajectory"} {i+1}/{num_traj}') - # if mcmode is not 2, prepare to calculate the intersecting planes solutions if mcmode != MCMODE_PHASE2: # Find unique station counts @@ -1749,7 +1761,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # Init the solver (use the earliest date as the reference) jdt_ref = min([obs_temp.jdt_ref for obs_temp, _, _ in matched_observations]) - log.info(f'ref_dt {jd2Date(jdt_ref, dt_obj=True)}') + #log.info(f'ref_dt {jd2Date(jdt_ref, dt_obj=True)}') traj = self.initTrajectory(jdt_ref, mc_runs, verbose=verbose) @@ -1772,7 +1784,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # If the first time is not 0, normalize times so that the earliest time is 0 if t0 != 0.0: - log.info(f'adjusting by {t0}') + #log.info(f'adjusting by {t0}') # Offset all times by t0 for i in range(len(traj.observations)): traj.observations[i].time_data -= t0 @@ -1782,7 +1794,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver traj.jdt_ref = traj.jdt_ref + t0/86400.0 - log.info(f'ref_dt {jd2Date(traj.jdt_ref, dt_obj=True)}') + #log.info(f'ref_dt {jd2Date(traj.jdt_ref, dt_obj=True)}') # If this trajectory already failed to be computed, don't try to recompute it again unless # new observations are added if self.dh.checkTrajIfFailed(traj): diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 3b33eceb..1fff4027 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -30,7 +30,7 @@ from wmpl.Utils.TrajConversions import datetime2JD, jd2Date from wmpl.Utils.remoteDataHandling import RemoteDataHandler from wmpl.Trajectory.CorrelateDB import ObservationDatabase, TrajectoryDatabase -from wmpl.Trajectory.Trajectory import Trajectory +# from wmpl.Trajectory.Trajectory import Trajectory from wmpl.Trajectory.CorrelateEngine import MCMODE_CANDS, MCMODE_PHASE1, MCMODE_PHASE2, MCMODE_ALL, MCMODE_BOTH @@ -553,10 +553,6 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: self.old_db = None - # REVISIT THIS LATER - #if mcmode == MCMODE_PHASE1 and self.checkRemoteDataMode() == 'master': - # self.observations_db = ObservationDatabase(self.phase1_dir, 'phase1_paired.db') - #else: self.observations_db = ObservationDatabase(db_dir) if hasattr(self.old_db, 'paired_obs'): # move any legacy paired obs data into sqlite @@ -1046,10 +1042,17 @@ def removeDeletedTrajectories(self): self.dt_range[0].strftime("%Y-%m-%d %H:%M:%S"), self.dt_range[1].strftime("%Y-%m-%d %H:%M:%S"))) - jdt_start = datetime2JD(self.dt_range[0]) - jdt_end = datetime2JD(self.dt_range[1]) - - self.traj_db.removeDeletedTrajectories(self.output_dir, jdt_start, jdt_end) + jdt_range = [datetime2JD(self.dt_range[0]), datetime2JD(self.dt_range[1])] + + traj_list = self.traj_db.getTrajMinDetails(self.output_dir, jdt_range) + i = 0 + for traj in traj_list: + if not os.path.isfile(os.path.join(self.output_dir, traj['traj_file_path'])): + if verbose: + log.info(f'removing traj {jd2Date(traj["jdt_ref"]).strftime("%Y%m%d_%M%M%S.%f")} from database') + self.removeTrajectory(TrajectoryReduced(None, json_dict=traj)) + i += 1 + log.info(f'removed {i} deleted trajectories') return @@ -1139,7 +1142,8 @@ def loadComputedTrajectories(self, dt_range=None): def getComputedTrajectories(self, jd_beg, jd_end): """ Returns a list of computed trajectories between the Julian dates. """ - json_dicts = self.traj_db.getTrajectories(self.output_dir, jd_beg, jd_end) + jd_range = [jd_beg, jd_end] + json_dicts = self.traj_db.getTrajectories(self.output_dir, jd_range) trajs = [TrajectoryReduced(None, json_dict=j) for j in json_dicts] return trajs @@ -1369,45 +1373,49 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): except Exception: pass + # Remove the trajectory folder from the disk + if os.path.isfile(traj_reduced.traj_file_path): + traj_dir = os.path.dirname(traj_reduced.traj_file_path) + shutil.rmtree(traj_dir, ignore_errors=True) + if os.path.isfile(traj_reduced.traj_file_path): + log.warning(f'unable to remove {traj_dir}') + self.traj_db.removeTrajectory(traj_reduced) - def excludeAlreadyFailedCandidates(self, matched_observations, num_obs_paired, verbose=False): + def checkAlreadyProcessed(self, matched_observations, verbose=False): + """ + Check if a list of candidates has already been processed, and return only the new ones + """ # go through the candidates and check if they correspond to already-failed candidate_trajectories=[] for cand in matched_observations: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in cand]) - jdt_ref = datetime2JD(ref_dt) - traj = Trajectory(jdt_ref, verbose=False) - - # Feed the observations into the trajectory solver - for obs_temp, met_obs, _ in cand: - - # Normalize the observations to the reference Julian date - jdt_ref_curr = datetime2JD(met_obs.reference_dt) - obs_temp.time_data += (jdt_ref_curr - jdt_ref)*86400 - obs_temp.jdt_ref = jdt_ref - - traj.infillWithObs(obs_temp) - - ### Recompute the reference JD and all times so that the first time starts at 0 ### - - # Determine the first relative time from reference JD - t0 = min([obs.time_data[0] for obs in traj.observations if (not obs.ignore_station) - or (not np.all(obs.ignore_list))]) + ctry_list = list(set([met_obs.station_code[:2] for _, met_obs, _ in cand])) + ctry_list.sort() + ctries = '_'.join(ctry_list) + file_name = f'{ref_dt.timestamp():.6f}_{ctries}.pickle' + save_dir = self.candidate_dir + if verbose: + log.info(f'Candidate {file_name} contains {len(cand)} observations') - # If the first time is not 0, normalize times so that the earliest time is 0 - if t0 != 0.0: - # Recompute the reference JD to corresponds with t0 - traj.jdt_ref = traj.jdt_ref + t0/86400.0 + if os.path.isfile(os.path.join(save_dir, file_name)) or os.path.isfile(os.path.join(save_dir, 'processed', file_name)): + if verbose: + log.info(f'candidate {file_name} already processed') + continue - if self.checkTrajIfFailed(traj): - log.info(f'Candidate at {ref_dt.isoformat()} already failed, skipping') - num_obs_paired -= len(cand) else: candidate_trajectories.append(cand) - return candidate_trajectories, num_obs_paired + return candidate_trajectories + + def checkCandIfFailed(self, candidate): + """ Check if the given candidate has been processed with the same observations and has failed to be + computed before. + """ + jdt_ref = min([obs.jdt_ref for obs, _, _ in candidate]) + stations = [obs.station_id for obs, _, _ in candidate] + return self.traj_db.checkCandIfFailed(jdt_ref, stations) def checkTrajIfFailed(self, traj): """ Check if the given trajectory has been computed with the same observations and has failed to be @@ -1599,6 +1607,7 @@ def getRemoteData(self, verbose=False): return status def saveCandidates(self, candidate_trajectories, verbose=False): + num_saved = 0 for matched_observations in candidate_trajectories: ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) ctry_list = list(set([met_obs.station_code[:2] for _, met_obs, _ in matched_observations])) @@ -1608,10 +1617,11 @@ def saveCandidates(self, candidate_trajectories, verbose=False): if verbose: log.info(f'Candidate {picklename} contains {len(matched_observations)} observations') - self.saveCandOrTraj(matched_observations, picklename, 'candidates', verbose=verbose) + if self.saveCandOrTraj(matched_observations, picklename, 'candidates', verbose=verbose): + num_saved += 1 log.info("-----------------------") - log.info(f'Saved {len(candidate_trajectories)} candidates') + log.info(f'Saved {len(num_saved)} candidates') log.info("-----------------------") def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): @@ -1626,6 +1636,10 @@ def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): else: save_dir = self.candidate_dir required_mode = 1 + # don't resave the same candidate + if os.path.isfile(os.path.join(save_dir, file_name)) or os.path.isfile(os.path.join(save_dir, 'processed', file_name)): + log.info(f'candidate {file_name} already processed') + return False if self.RemoteDatahandler and self.RemoteDatahandler.mode == 'master': @@ -1661,6 +1675,7 @@ def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): if verbose: log.info(f'saving {file_name} to {save_dir}') savePickle(traj, save_dir, file_name) + return True From c2572deab884c47b77c3a59955a611b154bcac84 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sun, 8 Mar 2026 11:18:42 +0000 Subject: [PATCH 128/132] bugfixes and performance improvements --- wmpl/Trajectory/CorrelateDB.py | 6 +- wmpl/Trajectory/CorrelateEngine.py | 390 +++++++++++++++-------------- wmpl/Trajectory/CorrelateRMS.py | 15 +- 3 files changed, 206 insertions(+), 205 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index 1dbde64e..d35a87fa 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -555,13 +555,13 @@ def getTrajBasics(self, output_dir, jdt_range, failed=False, verbose=False): jdt_start, jdt_end = jdt_range table_name = 'failed_trajectories' if failed else 'trajectories' if not jdt_start: - cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name}") + cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} where status=1") rows = cur.fetchall() elif not jdt_end: - cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} WHERE jdt_ref={jdt_start}") + cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} WHERE jdt_ref={jdt_start} and status=1") rows = cur.fetchall() else: - cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end}") + cur = self.dbhandle.execute(f"SELECT jdt_ref, traj_id, traj_file_path FROM {table_name} WHERE jdt_ref>={jdt_start} and jdt_ref<={jdt_end} and status=1") rows = cur.fetchall() trajs = [] for rw in rows: diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 23d09eb0..461a52af 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1296,274 +1296,276 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver total_unpaired = len(unpaired_observations) log.info(f'Analysing {total_unpaired} observations in this bucket...') + num_obs_paired = 0 + # List of all candidate trajectories + candidate_trajectories = [] + ### CHECK FOR PAIRING WITH PREVIOUSLY ESTIMATED TRAJECTORIES ### + if total_unpaired > 0: + log.info("") + log.info("--------------------------------------------------------------------------") + log.info(" 1) CHECKING IF PREVIOUSLY ESTIMATED TRAJECTORIES HAVE NEW OBSERVATIONS") + log.info("--------------------------------------------------------------------------") + log.info("") - log.info("") - log.info("--------------------------------------------------------------------------") - log.info(" 1) CHECKING IF PREVIOUSLY ESTIMATED TRAJECTORIES HAVE NEW OBSERVATIONS") - log.info("--------------------------------------------------------------------------") - log.info("") + # Get a list of all already computed trajectories within the given time bin + # Reducted trajectory objects are returned + + if bin_time_range: + # restrict checks to the bin range supplied to run() plus a day to allow for data upload times + log.info(f'Getting computed trajectories for bin {str(bin_time_range[0])} to {str(bin_time_range[1])}') + computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_time_range[0]), datetime2JD(bin_time_range[1])+1) + else: + # use the current bin. + log.info(f'Getting computed trajectories for {str(bin_beg)} to {str(bin_end)}') + computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_beg), datetime2JD(bin_end)) + + # Find all unpaired observations that match already existing trajectories + for traj_reduced in computed_traj_list: + + # If the trajectory already has more than the maximum number of stations, skip it + if len(traj_reduced.participating_stations) >= self.traj_constraints.max_stations: + + log.info( + "Trajectory {:s} has already reached the maximum number of stations, " + "skipping...".format( + str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)))) + + # TODO DECIDE WHETHER WE ACTUALLY WANT TO DO THIS + # the problem is that we could end up with unpaired observations that form a new trajectory instead of + # being added to an existing one + continue + + # Get all unprocessed observations which are close in time to the reference trajectory + traj_time_pairs = self.dh.getTrajTimePairs(traj_reduced, unpaired_observations, + self.traj_constraints.max_toffset) - # Get a list of all already computed trajectories within the given time bin - # Reducted trajectory objects are returned - - if bin_time_range: - # restrict checks to the bin range supplied to run() plus a day to allow for data upload times - log.info(f'Getting computed trajectories for bin {str(bin_time_range[0])} to {str(bin_time_range[1])}') - computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_time_range[0]), datetime2JD(bin_time_range[1])+1) - else: - # use the current bin. - log.info(f'Getting computed trajectories for {str(bin_beg)} to {str(bin_end)}') - computed_traj_list = self.dh.getComputedTrajectories(datetime2JD(bin_beg), datetime2JD(bin_end)) + # Skip trajectory if there are no new obervations + if not traj_time_pairs: + continue - # Find all unpaired observations that match already existing trajectories - for traj_reduced in computed_traj_list: - # If the trajectory already has more than the maximum number of stations, skip it - if len(traj_reduced.participating_stations) >= self.traj_constraints.max_stations: + log.info("") + log.info("Checking trajectory at {:s} in countries: {:s}".format( + str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)), + ", ".join(list(set([stat_id[:2] for stat_id in traj_reduced.participating_stations]))))) + log.info("--------") - log.info( - "Trajectory {:s} has already reached the maximum number of stations, " - "skipping...".format( - str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)))) - # TODO DECIDE WHETHER WE ACTUALLY WANT TO DO THIS - # the problem is that we could end up with unpaired observations that form a new trajectory instead of - # being added to an existing one - continue - - # Get all unprocessed observations which are close in time to the reference trajectory - traj_time_pairs = self.dh.getTrajTimePairs(traj_reduced, unpaired_observations, - self.traj_constraints.max_toffset) + # Filter out bad matches and only keep the good ones + candidate_observations = [] + traj_full = None + skip_traj_check = False + for met_obs in traj_time_pairs: - # Skip trajectory if there are no new obervations - if not traj_time_pairs: - continue + log.info("Candidate observation: {:s}".format(met_obs.station_code)) + platepar = self.dh.getPlatepar(met_obs) - log.info("") - log.info("Checking trajectory at {:s} in countries: {:s}".format( - str(jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)), - ", ".join(list(set([stat_id[:2] for stat_id in traj_reduced.participating_stations]))))) - log.info("--------") - + # Check that the trajectory beginning and end are within the distance limit + if not self.trajectoryRangeCheck(traj_reduced, platepar): + continue - # Filter out bad matches and only keep the good ones - candidate_observations = [] - traj_full = None - skip_traj_check = False - for met_obs in traj_time_pairs: - log.info("Candidate observation: {:s}".format(met_obs.station_code)) + # Check that the trajectory is within the field of view + if not self.trajectoryInFOV(traj_reduced, platepar): + continue - platepar = self.dh.getPlatepar(met_obs) - # Check that the trajectory beginning and end are within the distance limit - if not self.trajectoryRangeCheck(traj_reduced, platepar): - continue + # Load the full trajectory object + if traj_full is None: + traj_full = self.dh.loadFullTraj(traj_reduced) + # If the full trajectory couldn't be loaded, skip checking this trajectory + if traj_full is None: + + skip_traj_check = True + break - # Check that the trajectory is within the field of view - if not self.trajectoryInFOV(traj_reduced, platepar): - continue + ### Do a rough trajectory solution and perform a quick quality control ### - # Load the full trajectory object - if traj_full is None: - traj_full = self.dh.loadFullTraj(traj_reduced) + # Init observation object using the new meteor observation + obs_new = self.initObservationsObject(met_obs, platepar, + ref_dt=jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)) + obs_new.id = met_obs.id + obs_new.station_code = met_obs.station_code + obs_new.mean_dt = met_obs.mean_dt - # If the full trajectory couldn't be loaded, skip checking this trajectory - if traj_full is None: + # Get an observation from the trajectory object with the maximum convergence angle to + # the reference observations + obs_traj_best = None + qc_max = 0.0 + for obs_tmp in traj_full.observations: - skip_traj_check = True - break + # Compute the plane intersection between the new and one of trajectory observations + pi = PlaneIntersection(obs_new, obs_tmp) + # Take the observation with the maximum convergence angle + if (obs_traj_best is None) or (pi.conv_angle > qc_max): + qc_max = pi.conv_angle + obs_traj_best = obs_tmp - ### Do a rough trajectory solution and perform a quick quality control ### - # Init observation object using the new meteor observation - obs_new = self.initObservationsObject(met_obs, platepar, - ref_dt=jd2Date(traj_reduced.jdt_ref, dt_obj=True, tzinfo=datetime.timezone.utc)) - obs_new.id = met_obs.id - obs_new.station_code = met_obs.station_code - obs_new.mean_dt = met_obs.mean_dt + # Do a quick trajectory solution and perform sanity checks + plane_intersection = self.quickTrajectorySolution(obs_traj_best, obs_new) + if plane_intersection is None: + continue - # Get an observation from the trajectory object with the maximum convergence angle to - # the reference observations - obs_traj_best = None - qc_max = 0.0 - for obs_tmp in traj_full.observations: - - # Compute the plane intersection between the new and one of trajectory observations - pi = PlaneIntersection(obs_new, obs_tmp) + ### ### - # Take the observation with the maximum convergence angle - if (obs_traj_best is None) or (pi.conv_angle > qc_max): - qc_max = pi.conv_angle - obs_traj_best = obs_tmp + candidate_observations.append([obs_new, met_obs]) - # Do a quick trajectory solution and perform sanity checks - plane_intersection = self.quickTrajectorySolution(obs_traj_best, obs_new) - if plane_intersection is None: + # Skip the candidate trajectory if it couldn't be loaded from disk + if skip_traj_check: continue - ### ### - - candidate_observations.append([obs_new, met_obs]) - - - # Skip the candidate trajectory if it couldn't be loaded from disk - if skip_traj_check: - continue - - # If there are any good new observations, add them to the trajectory and re-run the solution - if candidate_observations: + # If there are any good new observations, add them to the trajectory and re-run the solution + if candidate_observations: - log.info("Recomputing trajectory with new observations from stations:") + log.info("Recomputing trajectory with new observations from stations:") - # Add new observations to the trajectory object - for obs_new, _ in candidate_observations: - log.info(obs_new.station_id) - traj_full.infillWithObs(obs_new) + # Add new observations to the trajectory object + for obs_new, _ in candidate_observations: + log.info(obs_new.station_id) + traj_full.infillWithObs(obs_new) - # Re-run the trajectory fit - # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds - # pass the new candidates in so that they can be marked paired if the new soln succeeds - # Note: mcmode must be phase1 here to force a recompute - successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=MCMODE_PHASE1, - matched_obs=candidate_observations, orig_traj=traj_reduced, verbose=verbose) - - # If the new trajectory solution succeeded, remove the now-paired observations from the in memory list - if successful_traj_fit: + # Re-run the trajectory fit + # pass in orig_traj here so that it can be deleted from disk if the new solution succeeds + # pass the new candidates in so that they can be marked paired if the new soln succeeds + # Note: mcmode must be phase1 here to force a recompute + successful_traj_fit = self.solveTrajectory(traj_full, traj_full.mc_runs, mcmode=MCMODE_PHASE1, + matched_obs=candidate_observations, orig_traj=traj_reduced, verbose=verbose) + + # If the new trajectory solution succeeded, remove the now-paired observations from the in memory list + if successful_traj_fit: - log.info("Remove paired observations from the processing list...") - for _, met_obs_temp in candidate_observations: - unpaired_observations.remove(met_obs_temp) + log.info("Remove paired observations from the processing list...") + for _, met_obs_temp in candidate_observations: + unpaired_observations.remove(met_obs_temp) - else: - log.info("New trajectory solution failed, keeping the old trajectory...") + else: + log.info("New trajectory solution failed, keeping the old trajectory...") - ### ### + ### ### - log.info("") - log.info("-------------------------------------------------") - log.info(" 2) PAIRING OBSERVATIONS INTO NEW TRAJECTORIES") - log.info("-------------------------------------------------") - log.info("") + log.info("") + log.info("-------------------------------------------------") + log.info(" 2) PAIRING OBSERVATIONS INTO NEW TRAJECTORIES") + log.info("-------------------------------------------------") + log.info("") - # List of all candidate trajectories - candidate_trajectories = [] - # Go through all unpaired and unprocessed meteor observations - for met_obs in unpaired_observations: + # Go through all unpaired and unprocessed meteor observations + for met_obs in unpaired_observations: - # Skip observations that were processed in the meantime - if met_obs.processed: - continue + # Skip observations that were processed in the meantime + if met_obs.processed: + continue - if self.dh.observations_db.checkObsPaired(met_obs.id, verbose=verbose): - continue + if self.dh.observations_db.checkObsPaired(met_obs.id, verbose=verbose): + continue - # Get station platepar - reference_platepar = self.dh.getPlatepar(met_obs) - obs1 = self.initObservationsObject(met_obs, reference_platepar) + # Get station platepar + reference_platepar = self.dh.getPlatepar(met_obs) + obs1 = self.initObservationsObject(met_obs, reference_platepar) - # Keep a list of observations which matched the reference observation - matched_observations = [] + # Keep a list of observations which matched the reference observation + matched_observations = [] - # Find all meteors from other stations that are close in time to this meteor - plane_intersection_good = None - time_pairs = self.dh.findTimePairs(met_obs, unpaired_observations, - self.traj_constraints.max_toffset) - for met_pair_candidate in time_pairs: + # Find all meteors from other stations that are close in time to this meteor + plane_intersection_good = None + time_pairs = self.dh.findTimePairs(met_obs, unpaired_observations, + self.traj_constraints.max_toffset) + for met_pair_candidate in time_pairs: - log.info("") - log.info("Processing pair:") - log.info("{:s} and {:s}".format(met_obs.station_code, met_pair_candidate.station_code)) - log.info("{:s} and {:s}".format(str(met_obs.reference_dt), str(met_pair_candidate.reference_dt))) - log.info("-----------------------") + log.info("") + log.info("Processing pair:") + log.info("{:s} and {:s}".format(met_obs.station_code, met_pair_candidate.station_code)) + log.info("{:s} and {:s}".format(str(met_obs.reference_dt), str(met_pair_candidate.reference_dt))) + log.info("-----------------------") - ### Check if the stations are close enough and have roughly overlapping fields of view ### + ### Check if the stations are close enough and have roughly overlapping fields of view ### - # Get candidate station platepar - candidate_platepar = self.dh.getPlatepar(met_pair_candidate) + # Get candidate station platepar + candidate_platepar = self.dh.getPlatepar(met_pair_candidate) - # Check if the stations are within range - if not self.stationRangeCheck(reference_platepar, candidate_platepar): - continue + # Check if the stations are within range + if not self.stationRangeCheck(reference_platepar, candidate_platepar): + continue - # Check the FOV overlap - if not self.checkFOVOverlap(reference_platepar, candidate_platepar): - log.info("Station FOV does not overlap: {:s} and {:s}".format(met_obs.station_code, - met_pair_candidate.station_code)) - continue + # Check the FOV overlap + if not self.checkFOVOverlap(reference_platepar, candidate_platepar): + log.info("Station FOV does not overlap: {:s} and {:s}".format(met_obs.station_code, + met_pair_candidate.station_code)) + continue - ### ### + ### ### - ### Do a rough trajectory solution and perform a quick quality control ### + ### Do a rough trajectory solution and perform a quick quality control ### - # Init observations - obs2 = self.initObservationsObject(met_pair_candidate, candidate_platepar, - ref_dt=met_obs.reference_dt) + # Init observations + obs2 = self.initObservationsObject(met_pair_candidate, candidate_platepar, + ref_dt=met_obs.reference_dt) - # Do a quick trajectory solution and perform sanity checks - plane_intersection = self.quickTrajectorySolution(obs1, obs2) - if plane_intersection is None: - continue + # Do a quick trajectory solution and perform sanity checks + plane_intersection = self.quickTrajectorySolution(obs1, obs2) + if plane_intersection is None: + continue - else: - plane_intersection_good = plane_intersection + else: + plane_intersection_good = plane_intersection - ### ### + ### ### - matched_observations.append([obs2, met_pair_candidate, plane_intersection]) + matched_observations.append([obs2, met_pair_candidate, plane_intersection]) - # If there are no matched observations, skip it - if len(matched_observations) == 0: + # If there are no matched observations, skip it + if len(matched_observations) == 0: - if len(time_pairs) > 0: - log.info("") - log.info(" --- NO MATCH ---") + if len(time_pairs) > 0: + log.info("") + log.info(" --- NO MATCH ---") - continue + continue - # Skip if there are not good plane intersections - if plane_intersection_good is None: - continue + # Skip if there are not good plane intersections + if plane_intersection_good is None: + continue - # Add the first observation to matched observations - matched_observations.append([obs1, met_obs, plane_intersection_good]) + # Add the first observation to matched observations + matched_observations.append([obs1, met_obs, plane_intersection_good]) - # Mark observations as processed - for _, met_obs_temp, _ in matched_observations: - met_obs_temp.processed = True + # Mark observations as processed + for _, met_obs_temp, _ in matched_observations: + met_obs_temp.processed = True - # Store candidate trajectory group - # Note that this will include candidate groups that already failed on previous runs. - # We will exclude these later - we can't do it just yet as if new data has arrived, then - # in the next step, the group might be merged with another group creating a solvable set. - log.info("") - ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) - log.info(f" --- ADDING CANDIDATE at {ref_dt.isoformat()} ---") - candidate_trajectories.append(matched_observations) + # Store candidate trajectory group + # Note that this will include candidate groups that already failed on previous runs. + # We will exclude these later - we can't do it just yet as if new data has arrived, then + # in the next step, the group might be merged with another group creating a solvable set. + log.info("") + ref_dt = min([met_obs.reference_dt for _, met_obs, _ in matched_observations]) + log.info(f" --- ADDING CANDIDATE at {ref_dt.isoformat()} ---") + candidate_trajectories.append(matched_observations) - # Check for mergeable candidate combinations - merged_candidate_trajectories, num_obs_paired = self.mergeBrokenCandidates(candidate_trajectories) + # Check for mergeable candidate combinations + merged_candidate_trajectories, num_obs_paired = self.mergeBrokenCandidates(candidate_trajectories) - # Now check and exclude already-processed candidates - # We can't do this earlier as we need to check mergeability first - candidate_trajectories = self.dh.checkAlreadyProcessed(merged_candidate_trajectories, verbose=verbose) + # Now check and exclude already-processed candidates + # We can't do this earlier as we need to check mergeability first + candidate_trajectories = self.dh.checkAlreadyProcessed(merged_candidate_trajectories, verbose=verbose) log.info("-----------------------") log.info(f'There are {total_unpaired - num_obs_paired} remaining unpaired observations in this bucket.') diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 1fff4027..5bc2fa04 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -317,7 +317,7 @@ def addTrajectory(self, traj_reduced, failed=False): else: traj_dict[traj_reduced.jdt_ref].traj_id = traj_reduced.traj_id - def removeTrajectory(self, traj_reduced, keepFolder=False): + def removeTrajectory(self, traj_reduced, keep_folder=False): """ Remove the trajectory from the data base and disk. """ # Remove the trajectory data base entry @@ -325,7 +325,7 @@ def removeTrajectory(self, traj_reduced, keepFolder=False): del self.trajectories[traj_reduced.jdt_ref] # Remove the trajectory folder on the disk - if not keepFolder and os.path.isfile(traj_reduced.traj_file_path): + if not keep_folder and os.path.isfile(traj_reduced.traj_file_path): traj_dir = os.path.dirname(traj_reduced.traj_file_path) shutil.rmtree(traj_dir, ignore_errors=True) if os.path.isfile(traj_reduced.traj_file_path): @@ -1026,7 +1026,7 @@ def trajectoryFileInDtRange(self, file_name, dt_range=None): else: return False - def removeDeletedTrajectories(self): + def removeDeletedTrajectories(self, verbose=True): """ Purge the database of any trajectories that no longer exist on disk. These can arise because the monte-carlo stage may update the data. """ @@ -1044,12 +1044,12 @@ def removeDeletedTrajectories(self): jdt_range = [datetime2JD(self.dt_range[0]), datetime2JD(self.dt_range[1])] - traj_list = self.traj_db.getTrajMinDetails(self.output_dir, jdt_range) + traj_list = self.traj_db.getTrajBasics(self.output_dir, jdt_range) i = 0 for traj in traj_list: if not os.path.isfile(os.path.join(self.output_dir, traj['traj_file_path'])): if verbose: - log.info(f'removing traj {jd2Date(traj["jdt_ref"]).strftime("%Y%m%d_%M%M%S.%f")} from database') + log.info(f'removing traj {jd2Date(traj["jdt_ref"],dt_obj=True).strftime("%Y%m%d_%H%M%S.%f")} {traj["traj_file_path"]} from database') self.removeTrajectory(TrajectoryReduced(None, json_dict=traj)) i += 1 log.info(f'removed {i} deleted trajectories') @@ -1400,8 +1400,7 @@ def checkAlreadyProcessed(self, matched_observations, verbose=False): log.info(f'Candidate {file_name} contains {len(cand)} observations') if os.path.isfile(os.path.join(save_dir, file_name)) or os.path.isfile(os.path.join(save_dir, 'processed', file_name)): - if verbose: - log.info(f'candidate {file_name} already processed') + log.info(f'candidate {file_name} already processed') continue else: @@ -1621,7 +1620,7 @@ def saveCandidates(self, candidate_trajectories, verbose=False): num_saved += 1 log.info("-----------------------") - log.info(f'Saved {len(num_saved)} candidates') + log.info(f'Saved {num_saved} candidates') log.info("-----------------------") def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): From e9794a89a9bbc424ee0b87be37957bb930221556 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Sun, 8 Mar 2026 11:32:27 +0000 Subject: [PATCH 129/132] remove incorrect location of Obs DB --- wmpl/Trajectory/CorrelateRMS.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index 5bc2fa04..be8f0f75 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -584,11 +584,6 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # in phase 1, initialise and collect data second as we load candidates dynamically self.initialiseRemoteDataHandling() - # in phase1, if we're the master node, write observations updates to a temp database - if self.RemoteDatahandler and self.RemoteDatahandler.mode == 'master' and mcmode == MCMODE_PHASE1: - self.observations_db.closeObsDatabase() - self.observations_db = ObservationDatabase(self.phase1_dir) - else: # in phase 2, initialise and collect data first as we need the phase1 traj on disk already self.traj_db = None From 9612432838f9d3734af8f1f1c6ac9e4442977a8b Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 9 Mar 2026 18:44:43 +0000 Subject: [PATCH 130/132] various improvements --- wmpl/Trajectory/CorrelateDB.py | 134 +++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 48 deletions(-) diff --git a/wmpl/Trajectory/CorrelateDB.py b/wmpl/Trajectory/CorrelateDB.py index d35a87fa..fa0c2354 100644 --- a/wmpl/Trajectory/CorrelateDB.py +++ b/wmpl/Trajectory/CorrelateDB.py @@ -41,7 +41,7 @@ ############################################################ -class ObservationDatabase(): +class ObservationsDatabase(): """ A class to handle the sqlite observations database transparently. """ @@ -57,27 +57,22 @@ def __init__(self, db_path, db_name='observations.db', purge_records=False, verb """ db_full_name = os.path.join(db_path, f'{db_name}') - log.info(f'opening database {db_full_name}') + if verbose: + log.info(f'opening database {db_full_name}') con = sqlite3.connect(db_full_name) con.execute('pragma journal_mode=wal') if purge_records: - if verbose: - log.info('purge: write to obsdb') con.execute('drop table paired_obs') res = con.execute("SELECT name FROM sqlite_master WHERE name='paired_obs'") if res.fetchone() is None: - if verbose: - log.info('create table: write to obsdb') - con.execute("CREATE TABLE paired_obs(station_code VARCHAR(8), obs_id VARCHAR(36) UNIQUE, obs_date REAL, status INTEGER)") + con.execute("CREATE TABLE paired_obs(obs_id VARCHAR(36) UNIQUE, obs_dt REAL, status INTEGER)") con.commit() self.dbhandle = con - def _commitObsDatabase(self, verbose=False): + def _commitObsDatabase(self): """ Commit the obs db. This function exists so we can do lazy writes """ - if verbose: - log.info('commit: write to obsdb') self.dbhandle.commit() try: self.dbhandle.execute('pragma wal_checkpoint(TRUNCATE)') @@ -98,7 +93,7 @@ def closeObsDatabase(self): def checkObsPaired(self, obs_id, verbose=False): """ Check if an observation is already marked paired - return True if there is an observation with the correct station code, obs id and with status = 1 + return True if there is an observation with the correct obs id and with status = 1 Parameters: obs_id : observation ID to check @@ -115,39 +110,58 @@ def checkObsPaired(self, obs_id, verbose=False): log.info(f'{obs_id} is {"Paired" if paired else "Unpaired"}') return paired - def addPairedObs(self, station_code, obs_id, obs_date, verbose=False): + def addPairedObservations(self, obs_ids, jdt_refs, verbose=False): + """ + Add or update a list of observations paired, setting status = 1 + + Parameters: + obs_ids : list of observation IDs + jdt_refs : list of julian reference dates of the observations + """ + + vals_str = ','.join(map(str,[(id, dt, 1) for id,dt in zip(obs_ids,jdt_refs)])) + + if verbose: + log.info(f'adding {obs_ids} to paired_obs table') + try: + self.dbhandle.execute(f"insert or replace into paired_obs values {vals_str}") + self.dbhandle.commit() + return True + except Exception: + log.warning(f'failed to add {obs_ids} to paired_obs table') + return False + + return + + def addPairedObs(self, obs_id, jdt_ref, verbose=False): """ - Add or update an entry in the database to mark an observation paired, setting status = 1 + Add or update a single entry in the database to mark an observation paired, setting status = 1 Parameters: - station_code : observation's station ID eg UK12345 obs_id : observation ID - obs_date: : observation mean date + jdt_ref : julian reference date of the observation """ if verbose: log.info(f'adding {obs_id} to paired_obs table') - sqlstr = f"insert or replace into paired_obs values ('{station_code}','{obs_id}', {datetime2JD(obs_date)}, 1)" try: - if verbose: - log.info('update: write to obsdb') - self.dbhandle.execute(sqlstr) + self.dbhandle.execute(f"insert or replace into paired_obs values ('{obs_id}', {jdt_ref}, 1)") self.dbhandle.commit() return True except Exception: log.warning(f'failed to add {obs_id} to paired_obs table') return False - def unpairObs(self, met_obs_list, verbose=False): + def unpairObs(self, obs_ids, verbose=False): """ Mark an observation unpaired. If an entry exists in the database, update the status to 0. - Currently unused. + ** Currently unused. ** Parameters: - met_obs_list : a list of MeteorObsRMS objects + met_obs_list : a list of observation IDs """ - obs_ids_str = ','.join([f"'{met_obs.id}'" for met_obs in met_obs_list]) + obs_ids_str = ','.join(obs_ids) if verbose: log.info(f'unpairing {obs_ids_str}') @@ -158,7 +172,18 @@ def unpairObs(self, met_obs_list, verbose=False): return True except Exception: log.warning(f'failed to unpair {obs_ids_str}') - return False + return False + + def getLinkedObservations(self, jdt_ref): + """ + Return a list of observation IDs linked with a trajectory based on the jdt_ref of the traj + + Parameters + jdt_ref : the julian reference date of the trajectory + + """ + cur = self.dbhandle.execute(f"SELECT obs_id FROM paired_obs WHERE obs_dt={jdt_ref} and status=1") + return [x[0] for x in cur.fetchall()] def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): """ @@ -171,7 +196,7 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): """ # create the database if it doesnt exist archdb_name = f'{arch_prefix}_observations.db' - archdb = ObservationDatabase(db_path, archdb_name) + archdb = ObservationsDatabase(db_path, archdb_name) archdb.closeObsDatabase() # attach the arch db, copy the records then delete them @@ -185,7 +210,7 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): cur = self.dbhandle.execute(f'select * from paired_obs where obs_date < {archdate_jd}') for row in cur.fetchall(): try: - self.dbhandle.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]},{row[3]})") + self.dbhandle.execute(f"insert into archdb.paired_obs values('{row[0]}','{row[1]}',{row[2]})") except Exception: log.info(f'{row[1]} already exists in target') @@ -194,7 +219,7 @@ def archiveObsDatabase(self, db_path, arch_prefix, archdate_jd): self.dbhandle.commit() return - def moveObsJsonRecords(self, paired_obs, dt_range): + def copyObsJsonRecords(self, paired_obs, dt_range): """ Copy recent data from the legacy Json database to the new database. By design this only copies at most the last seven days, but a date-range can be @@ -224,7 +249,7 @@ def moveObsJsonRecords(self, paired_obs, dt_range): obs_date = obs_date.replace(tzinfo=datetime.timezone.utc) if obs_date >= dt_beg and obs_date < dt_end: - self.addPairedObs(stat_id, obs_id, obs_date) + self.addPairedObs(obs_id) i += 1 if not i % 100000 and i != 0: log.info(f'moved {i} observations') @@ -422,7 +447,7 @@ def checkTrajIfFailed(self, traj_reduced, verbose=False): found = True if (traj_stations == station_list) else False return found - def addTrajectory(self, traj_reduced, failed=False, verbose=False): + def addTrajectory(self, traj_reduced, failed=False, force_add=True, verbose=False): """ add or update an entry in the database, setting status = 1 @@ -432,8 +457,17 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): """ + tblname = 'failed_trajectories' if failed else 'trajectories' + + # if force_add is false, don't replace any existing entry + if not force_add: + res = self.dbhandle.execute(f'select traj_id from {tblname} where status =1') + row = res.fetchone() + if row is not None and row[0] !='None': + return True + if verbose: - log.info(f'adding jdt {traj_reduced.jdt_ref} to {"failed" if failed else "trajectories"}') + log.info(f'adding jdt {traj_reduced.jdt_ref} to {tblname}') # remove the output_dir part from the path so that the data are location-independent traj_file_path = traj_reduced.traj_file_path[traj_reduced.traj_file_path.find('trajectories'):] @@ -469,8 +503,7 @@ def addTrajectory(self, traj_reduced, failed=False, verbose=False): f"{traj_reduced.rend_lat},{traj_reduced.rend_lon},{traj_reduced.rend_ele},1)") sql_str = sql_str.replace('nan','"NaN"') - if verbose: - log.info('insert: write to trajdb') + self.dbhandle.execute(sql_str) self.dbhandle.commit() return True @@ -488,8 +521,6 @@ def removeTrajectory(self, traj_reduced, failed=False, verbose=False): log.info(f'removing {traj_reduced.traj_id}') table_name = 'failed_trajectories' if failed else 'trajectories' - if verbose: - log.info('update: write to trajdb') self.dbhandle.execute(f"update {table_name} set status=0 where jdt_ref='{traj_reduced.jdt_ref}'") self.dbhandle.commit() @@ -599,16 +630,17 @@ def archiveTrajDatabase(self, db_path, arch_prefix, archdate_jd): self.dbhandle.commit() return - def moveFailedTrajectories(self, failed_trajectories, dt_range): + def copyTrajJsonRecords(self, trajectories, dt_range, failed=True): """ - Copy failed trajectories from the old Json database - We only copy recent records since if we ever run for an historic date - its likely we will want to reanalyse all available obs anyway + Copy trajectories from the old Json database + We only copy recent failed traj records since if we ever run for an historic date + its likely we will want to reanalyse all available data Parameters: - failed_trajectories : json list of fails extracted from the old Json DB - dt_range: : date range to use, at most seven days at a time + trajectories : json list of trajetories extracted from the old Json DB + dt_range: : date range to use, at most seven days at a time + failed : boolean, default true to move failed traj """ jd_end = datetime2JD(dt_range[1]) @@ -617,10 +649,10 @@ def moveFailedTrajectories(self, failed_trajectories, dt_range): log.info('moving recent failed trajectories to sqlite - this may take some time....') log.info(f'observation date range {jd2Date(jd_beg, dt_obj=True).isoformat()} to {dt_range[1].isoformat()}') - keylist = [k for k in failed_trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] + keylist = [k for k in trajectories.keys() if float(k) >= jd_beg and float(k) <= jd_end] i = 0 # just in case there aren't any trajectories to move for i,jdt_ref in enumerate(keylist): - self.addTrajectory(failed_trajectories[jdt_ref], failed=True) + self.addTrajectory(trajectories[jdt_ref], failed=failed) i += 1 if not i % 10000: self._commitTrajDatabase() @@ -770,15 +802,15 @@ def __init__(self, db_dir, dt_range=None): else: dt_range_jd = [datetime2JD(dt_range[0]),datetime2JD(dt_range[1])] jsondb = dummyDatabaseJSON(db_dir=cml_args.dir_path) - obsdb = ObservationDatabase(cml_args.dir_path) - obsdb.moveObsJsonRecords(jsondb.paired_obs, dt_range) + obsdb = ObservationsDatabase(cml_args.dir_path) + obsdb.copyObsJsonRecords(jsondb.paired_obs, dt_range) obsdb.closeObsDatabase() trajdb = TrajectoryDatabase(cml_args.dir_path) - trajdb.moveFailedTrajectories(jsondb.failed_trajectories, dt_range) + trajdb.copyTrajJsonRecords(jsondb.failed_trajectories, dt_range, failed=True) trajdb.closeTrajDatabase() else: if dbname == 'observations': - obsdb = ObservationDatabase(cml_args.dir_path) + obsdb = ObservationsDatabase(cml_args.dir_path) if action == 'status': cur = obsdb.dbhandle.execute('select * from paired_obs where status=1') print(f'there are {len(cur.fetchall())} paired obs') @@ -787,7 +819,8 @@ def __init__(self, db_dir, dt_range=None): if action == 'execute': print(stmt) cur = obsdb.dbhandle.execute(stmt) - print(cur.fetchall()) + for rw in cur.fetchall(): + print(rw) obsdb.closeObsDatabase() elif dbname == 'trajectories': @@ -797,6 +830,11 @@ def __init__(self, db_dir, dt_range=None): print(f'there are {len(cur.fetchall())} successful trajectories') cur = trajdb.dbhandle.execute('select * from failed_trajectories') print(f'and {len(cur.fetchall())} failed trajectories') - trajdb.closeObsDatabase() + if action == 'execute': + print(stmt) + cur = trajdb.dbhandle.execute(stmt) + for rw in cur.fetchall(): + print(rw) + trajdb.closeTrajDatabase() else: log.info('valid database not specified') From b9d97476094c4bde5a8b6885231711e70bbc25b1 Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Mon, 9 Mar 2026 18:46:13 +0000 Subject: [PATCH 131/132] tidying up --- wmpl/Trajectory/CorrelateEngine.py | 20 ++- wmpl/Trajectory/CorrelateRMS.py | 214 ++++++++++------------------- 2 files changed, 77 insertions(+), 157 deletions(-) diff --git a/wmpl/Trajectory/CorrelateEngine.py b/wmpl/Trajectory/CorrelateEngine.py index 461a52af..46dbacdf 100644 --- a/wmpl/Trajectory/CorrelateEngine.py +++ b/wmpl/Trajectory/CorrelateEngine.py @@ -1058,16 +1058,9 @@ def solveTrajectory(self, traj, mc_runs, mcmode=MCMODE_ALL, matched_obs=None, or # we do not need to update the database for phase2 if mcmode != MCMODE_PHASE2: log.info('Updating database....') - self.dh.addTrajectory(traj) + self.dh.addTrajectory(traj, verbose=verbose) if matched_obs is not None: - if len(matched_obs[0])==3: - for _, obs, _ in matched_obs: - self.dh.observations_db.addPairedObs(obs.station_code, obs.id, obs.mean_dt, verbose=verbose) - else: - for _, obs in matched_obs: - self.dh.observations_db.addPairedObs(obs.station_code, obs.id, obs.mean_dt, verbose=verbose) - - + self.dh.addPairedObs(matched_obs, traj.jdt_ref, verbose=verbose) else: log.info('unable to fit trajectory') @@ -1300,7 +1293,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # List of all candidate trajectories candidate_trajectories = [] - + ### CHECK FOR PAIRING WITH PREVIOUSLY ESTIMATED TRAJECTORIES ### if total_unpaired > 0: log.info("") @@ -1468,7 +1461,7 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver if met_obs.processed: continue - if self.dh.observations_db.checkObsPaired(met_obs.id, verbose=verbose): + if self.dh.checkIfObsPaired(met_obs.id, verbose=verbose): continue # Get station platepar @@ -1574,7 +1567,10 @@ def run(self, event_time_range=None, bin_time_range=None, mcmode=MCMODE_ALL, ver # in candidate mode we want to save the candidates to disk if mcmode == MCMODE_CANDS: log.info("-----------------------") - log.info('5) SAVING {} CANDIDATES'.format(len(candidate_trajectories))) + if bin_time_range: + log.info(f'5) SAVING {len(candidate_trajectories)} CANDIDATES for {str(bin_time_range[0])} to {str(bin_time_range[1])}') + else: + log.info(f'5) SAVING {len(candidate_trajectories)} CANDIDATES for {str(bin_beg)} to {str(bin_end)}') log.info("-----------------------") # Save candidates. This will check and skip over already-processed diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index be8f0f75..ecc58e00 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -29,7 +29,7 @@ from wmpl.Utils.Pickling import loadPickle, savePickle from wmpl.Utils.TrajConversions import datetime2JD, jd2Date from wmpl.Utils.remoteDataHandling import RemoteDataHandler -from wmpl.Trajectory.CorrelateDB import ObservationDatabase, TrajectoryDatabase +from wmpl.Trajectory.CorrelateDB import ObservationsDatabase, TrajectoryDatabase # from wmpl.Trajectory.Trajectory import Trajectory from wmpl.Trajectory.CorrelateEngine import MCMODE_CANDS, MCMODE_PHASE1, MCMODE_PHASE2, MCMODE_ALL, MCMODE_BOTH @@ -222,115 +222,6 @@ def load(self, verbose=False): # do this here because the object dict is overwritten during the load operation above self.verbose = verbose - def save(self): - """ Save the database of processed meteors to disk. """ - - # Back up the existing data base - db_bak_file_path = self.db_file_path + ".bak" - if os.path.exists(self.db_file_path): - shutil.copy2(self.db_file_path, db_bak_file_path) - else: - return - - # Save the data base - try: - with open(self.db_file_path, 'w') as f: - self2 = copy.deepcopy(self) - - # Convert reduced trajectory objects to JSON objects - if hasattr(self2,'trajectories'): - self2.trajectories = {key: self.trajectories[key].__dict__ for key in self.trajectories} - if hasattr(self2, 'failed_trajectories'): - self2.failed_trajectories = {key: self.failed_trajectories[key].__dict__ - for key in self.failed_trajectories} - if hasattr(self2, 'phase1Trajectories'): - delattr(self2, 'phase1Trajectories') - - f.write(json.dumps(self2, default=lambda o: o.__dict__, indent=4, sort_keys=True)) - - # Remove the backup file - if os.path.exists(db_bak_file_path): - os.remove(db_bak_file_path) - - except Exception as e: - log.warning('unable to save the database, likely corrupt data') - shutil.copy2(db_bak_file_path, self.db_file_path) - log.warning(e) - - def checkTrajIfFailed(self, traj): - """ Check if the given trajectory has been computed with the same observations and has failed to be - computed before. - - """ - - # Check if the reference time is in the list of failed trajectories - if traj.jdt_ref in self.failed_trajectories: - - # Get the failed trajectory object - failed_traj = self.failed_trajectories[traj.jdt_ref] - - # Check if the same observations participate in the failed trajectory as in the trajectory that - # is being tested - all_match = True - for obs in traj.observations: - - if not ((obs.station_id in failed_traj.participating_stations) or (obs.station_id in failed_traj.ignored_stations)): - - all_match = False - break - - # If the same stations were used, the trajectory estimation failed before - if all_match: - return True - - return False - - def addTrajectory(self, traj_reduced, failed=False): - """ Add a computed trajectory to the list. - - Arguments: - traj_file_path: [str] Full path the trajectory object. - - Keyword arguments: - traj_obj: [bool] Instead of loading a traj object from disk, use the given object. - failed: [bool] Add as a failed trajectory. False by default. - """ - - if traj_reduced is None or not hasattr(traj_reduced, "jdt_ref"): - return None - - if self.verbose: - log.info(f' loaded {traj_reduced.traj_file_path}, traj_id {traj_reduced.traj_id}') - - - # Choose to which dictionary the trajectory will be added - if failed: - traj_dict = self.failed_trajectories - - else: - traj_dict = self.trajectories - - - # Add the trajectory to the list (key is the reference JD) - if traj_reduced.jdt_ref not in traj_dict: - traj_dict[traj_reduced.jdt_ref] = traj_reduced - else: - traj_dict[traj_reduced.jdt_ref].traj_id = traj_reduced.traj_id - - def removeTrajectory(self, traj_reduced, keep_folder=False): - """ Remove the trajectory from the data base and disk. """ - - # Remove the trajectory data base entry - if traj_reduced.jdt_ref in self.trajectories: - del self.trajectories[traj_reduced.jdt_ref] - - # Remove the trajectory folder on the disk - if not keep_folder and os.path.isfile(traj_reduced.traj_file_path): - traj_dir = os.path.dirname(traj_reduced.traj_file_path) - shutil.rmtree(traj_dir, ignore_errors=True) - if os.path.isfile(traj_reduced.traj_file_path): - log.info(f'unable to remove {traj_dir}') - class MeteorPointRMS(object): def __init__(self, frame, time_rel, x, y, ra, dec, azim, alt, mag): @@ -553,16 +444,19 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: self.old_db = None - self.observations_db = ObservationDatabase(db_dir) + self.observations_db = ObservationsDatabase(db_dir) if hasattr(self.old_db, 'paired_obs'): - # move any legacy paired obs data into sqlite - self.observations_db.moveObsJsonRecords(self.old_db.paired_obs, dt_range) + # copy any legacy paired obs data into sqlite + self.observations_db.copyObsJsonRecords(self.old_db.paired_obs, dt_range) - self.traj_db = TrajectoryDatabase(db_dir) + self.trajectory_db = TrajectoryDatabase(db_dir) if hasattr(self.old_db, 'failed_trajectories'): - # move any legacy failed traj data into sqlite - self.traj_db.moveFailedTrajectories(self.old_db.failed_trajectories, dt_range) + # copy any legacy failed traj data into sqlite, so we avoid recomputing them + self.trajectory_db.copyTrajJsonRecords(self.old_db.failed_trajectories, dt_range, failed=True) + if self.old_db: + del self.old_db + if archivemonths != 0: log.info('Archiving older entries....') try: @@ -586,7 +480,7 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode else: # in phase 2, initialise and collect data first as we need the phase1 traj on disk already - self.traj_db = None + self.trajectory_db = None self.observations_db = None self.initialiseRemoteDataHandling() @@ -691,9 +585,17 @@ def __init__(self, station, obs_id): # TODO check if this works self.observations_db.archiveObsDatabase(self.db_dir, arch_prefix, archdate_jd) - self.traj_db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) + self.trajectory_db.archiveTrajDatabase(self.db_dir, arch_prefix, archdate_jd) return + + def closeObservationsDatabase(self): + self.observations_db.closeObsDatabase() + return + + def closeTrajectoryDatabase(self): + self.trajectory_db.closeTrajDatabase() + return def loadStations(self): """ Load the station names in the processing folder. """ @@ -891,7 +793,7 @@ def loadUnpairedObservations(self, processing_list, dt_range=None): continue # Add only unpaired observations - if not self.observations_db.checkObsPaired(met_obs.id, verbose=verbose): + if not self.checkIfObsPaired(met_obs.id, verbose=verbose): # print(" ", station_code, met_obs.reference_dt, rel_proc_path) added_count += 1 unpaired_met_obs_list.append(met_obs) @@ -1028,7 +930,7 @@ def removeDeletedTrajectories(self, verbose=True): if not os.path.isdir(self.output_dir): return - if self.traj_db is None: + if self.trajectory_db is None: return log.info(" Removing deleted trajectories from: " + self.output_dir) @@ -1039,7 +941,7 @@ def removeDeletedTrajectories(self, verbose=True): jdt_range = [datetime2JD(self.dt_range[0]), datetime2JD(self.dt_range[1])] - traj_list = self.traj_db.getTrajBasics(self.output_dir, jdt_range) + traj_list = self.trajectory_db.getTrajBasics(self.output_dir, jdt_range) i = 0 for traj in traj_list: if not os.path.isfile(os.path.join(self.output_dir, traj['traj_file_path'])): @@ -1062,7 +964,7 @@ def loadComputedTrajectories(self, dt_range=None): if not os.path.isdir(traj_dir_path): return - if self.traj_db is None: + if self.trajectory_db is None: return if dt_range is None: @@ -1122,7 +1024,7 @@ def loadComputedTrajectories(self, dt_range=None): if self.trajectoryFileInDtRange(file_name, dt_range=dt_range): - self.traj_db.addTrajectory(TrajectoryReduced(os.path.join(full_traj_dir, file_name))) + self.trajectory_db.addTrajectory(TrajectoryReduced(os.path.join(full_traj_dir, file_name)), force_add=False) # Print every 1000th trajectory if counter % 1000 == 0: @@ -1138,7 +1040,7 @@ def getComputedTrajectories(self, jd_beg, jd_end): """ Returns a list of computed trajectories between the Julian dates. """ jd_range = [jd_beg, jd_end] - json_dicts = self.traj_db.getTrajectories(self.output_dir, jd_range) + json_dicts = self.trajectory_db.getTrajectories(self.output_dir, jd_range) trajs = [TrajectoryReduced(None, json_dict=j) for j in json_dicts] return trajs @@ -1166,6 +1068,28 @@ def countryFilter(self, station_code1, station_code2): # If a given country is not in any of the groups, allow it to be paired return True + + def checkIfObsPaired(self, obs_id, verbose=False): + return self.observations_db.checkObsPaired(obs_id, verbose) + + def addPairedObs(self, matched_obs, jdt_ref, verbose=False): + """ + mark a list of observations as paired + + parameters: + matched_obs : a tuple containing the observations. + jdt_ref : the julian date of the Trajectory they are paired with. + + """ + if len(matched_obs[0])==3: + obs_ids = [met_obs.id for _, met_obs, _ in matched_obs] + else: + obs_ids = [met_obs.id for _, met_obs in matched_obs] + jdt_refs = [jdt_ref] * len(obs_ids) + + self.observations_db.addPairedObservations(obs_ids, jdt_refs, verbose=verbose) + + return def findTimePairs(self, met_obs, unpaired_observations, max_toffset, verbose=False): """ Finds pairs in time between the given meteor observations and all other observations from @@ -1187,7 +1111,7 @@ def findTimePairs(self, met_obs, unpaired_observations, max_toffset, verbose=Fal # Go through all meteors from other stations for met_obs2 in unpaired_observations: - if self.observations_db.checkObsPaired(met_obs2.id, verbose=verbose): + if self.checkIfObsPaired(met_obs2.id, verbose=verbose): continue # Take only observations from different stations @@ -1328,7 +1252,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None, verbose=False): failed_jdt_ref: [float] Reference Julian date of the failed trajectory. None by default. """ - if self.traj_db is None: + if self.trajectory_db is None: return # Set the correct output path traj.output_dir = self.generateTrajOutputDirectoryPath(traj) @@ -1341,7 +1265,7 @@ def addTrajectory(self, traj, failed_jdt_ref=None, verbose=False): if failed_jdt_ref is not None: traj_reduced.jdt_ref = failed_jdt_ref - self.traj_db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=verbose) + self.trajectory_db.addTrajectory(traj_reduced, failed=(failed_jdt_ref is not None), verbose=verbose) def removeTrajectory(self, traj_reduced, remove_phase1=False): """ Remove the trajectory from the data base and disk. """ @@ -1375,7 +1299,7 @@ def removeTrajectory(self, traj_reduced, remove_phase1=False): if os.path.isfile(traj_reduced.traj_file_path): log.warning(f'unable to remove {traj_dir}') - self.traj_db.removeTrajectory(traj_reduced) + self.trajectory_db.removeTrajectory(traj_reduced) def checkAlreadyProcessed(self, matched_observations, verbose=False): """ @@ -1395,7 +1319,8 @@ def checkAlreadyProcessed(self, matched_observations, verbose=False): log.info(f'Candidate {file_name} contains {len(cand)} observations') if os.path.isfile(os.path.join(save_dir, file_name)) or os.path.isfile(os.path.join(save_dir, 'processed', file_name)): - log.info(f'candidate {file_name} already processed') + if verbose: + log.info(f'candidate {file_name} already processed') continue else: @@ -1409,18 +1334,21 @@ def checkCandIfFailed(self, candidate): """ jdt_ref = min([obs.jdt_ref for obs, _, _ in candidate]) stations = [obs.station_id for obs, _, _ in candidate] - return self.traj_db.checkCandIfFailed(jdt_ref, stations) + return self.trajectory_db.checkCandIfFailed(jdt_ref, stations) def checkTrajIfFailed(self, traj): """ Check if the given trajectory has been computed with the same observations and has failed to be computed before. + Parameters: + traj: full trajectory object + """ - if self.traj_db is None: + if self.trajectory_db is None: return traj_reduced = TrajectoryReduced(None, traj_obj=traj) - return self.traj_db.checkTrajIfFailed(traj_reduced) + return self.trajectory_db.checkTrajIfFailed(traj_reduced) def loadFullTraj(self, traj_reduced): """ Load the full trajectory object. @@ -1527,7 +1455,7 @@ def moveUploadedData(self, verbose=False): and merges in the databases """ for node in self.RemoteDatahandler.nodes: - if node.nodename == 'localhost' or self.observations_db is None or self.traj_db is None: + if node.nodename == 'localhost' or self.observations_db is None or self.trajectory_db is None: continue # if the remote node upload path doesn't exist skip it @@ -1546,7 +1474,7 @@ def moveUploadedData(self, verbose=False): for trajdb_path in glob.glob(os.path.join(node.dirpath,'files','trajectories*.db')): - if self.traj_db.mergeTrajDatabase(trajdb_path): + if self.trajectory_db.mergeTrajDatabase(trajdb_path): os.remove(trajdb_path) i = 0 @@ -1626,14 +1554,10 @@ def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): """ if savetype == 'phase1': save_dir = self.phase1_dir - required_mode = 2 + required_mode = MCMODE_PHASE2 else: save_dir = self.candidate_dir - required_mode = 1 - # don't resave the same candidate - if os.path.isfile(os.path.join(save_dir, file_name)) or os.path.isfile(os.path.join(save_dir, 'processed', file_name)): - log.info(f'candidate {file_name} already processed') - return False + required_mode = MCMODE_PHASE1 if self.RemoteDatahandler and self.RemoteDatahandler.mode == 'master': @@ -2082,15 +2006,15 @@ def signal_handler(sig, frame): log.info('uploading to master node') # close the databases and upload the data to the master node if mcmode != MCMODE_PHASE2: - dh.traj_db.closeTrajDatabase() - dh.observations_db.closeObsDatabase() + dh.closeTrajectoryDatabase() + dh.closeObservationsDatabase() dh.RemoteDatahandler.uploadToMaster(dh.output_dir, verbose=verbose) # truncate the tables here so they are clean for the next run if mcmode != MCMODE_PHASE2: - dh.traj_db = TrajectoryDatabase(dh.db_dir, purge_records=True) - dh.observations_db = ObservationDatabase(dh.db_dir, purge_records=True) + dh.trajectory_db = TrajectoryDatabase(dh.db_dir, purge_records=True) + dh.observations_db = ObservationsDatabase(dh.db_dir, purge_records=True) if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'master': dh.moveUploadedData(verbose=verbose) @@ -2101,7 +2025,7 @@ def signal_handler(sig, frame): break if mcmode & MCMODE_CANDS: - dh.observations_db.closeObsDatabase() + dh.closeObservationsDatabase() else: # there were no datasets to process From d7fabbd98afb35a29862957f7c162bb61beec89e Mon Sep 17 00:00:00 2001 From: Mark McIntyre Date: Tue, 10 Mar 2026 01:02:45 +0000 Subject: [PATCH 132/132] fixes for issues #86, #87, #88, #94 --- wmpl/Trajectory/CorrelateRMS.py | 174 +++++++++++++++++++++++++------- 1 file changed, 136 insertions(+), 38 deletions(-) diff --git a/wmpl/Trajectory/CorrelateRMS.py b/wmpl/Trajectory/CorrelateRMS.py index ecc58e00..2abea12c 100644 --- a/wmpl/Trajectory/CorrelateRMS.py +++ b/wmpl/Trajectory/CorrelateRMS.py @@ -387,6 +387,9 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode self.mc_mode = mcmode self.dir_path = dir_path + # create the data directory. Of course, if the folder doesnt exist there is nothing to process + # but by creating it we avoid an Exception later. And we can always copy data in. + mkdirP(dir_path) self.dt_range = dt_range @@ -410,17 +413,25 @@ def __init__(self, dir_path, dt_range=None, db_dir=None, output_dir=None, mcmode # Create the output directory if it doesn't exist mkdirP(self.output_dir) + if dt_range is None or dt_range[0] == datetime.datetime(2000,1,1,0,0,0).replace(tzinfo=datetime.timezone.utc): + daysback = 14 + else: + daysback = (datetime.datetime.now().replace(tzinfo=datetime.timezone.utc) - dt_range[0]).days + 1 + # Candidate directory, if running in create or load cands modes self.candidate_dir = os.path.join(self.output_dir, 'candidates') - if not self.mc_mode & MCMODE_PHASE2: - mkdirP(os.path.join(self.candidate_dir, 'processed')) - self.purgeProcessedData(os.path.join(self.candidate_dir, 'processed')) + mkdirP(os.path.join(self.candidate_dir, 'processed')) + num_removed_cands = self.purgeProcessedData(os.path.join(self.candidate_dir, 'processed'), days_back=daysback, verbose=verbose) + log.info(f'removed {num_removed_cands} processed candidates') - # Phase 1 trajectory pickle directory needed to reload previous results. + # Phase 1 trajectory pickle directory needed to reload previous results when running phase2. self.phase1_dir = os.path.join(self.output_dir, 'phase1') - if self.mc_mode & MCMODE_PHASE1: - mkdirP(os.path.join(self.phase1_dir, 'processed')) - self.purgeProcessedData(os.path.join(self.phase1_dir, 'processed')) + mkdirP(os.path.join(self.phase1_dir, 'processed')) + num_removed_ph1 = self.purgeProcessedData(os.path.join(self.phase1_dir, 'processed'), days_back=daysback, verbose=verbose) + log.info(f'removed {num_removed_ph1} processed phase1') + + # In a previous incarnation, if the solver crashed it could leave some `.pickle_processing files`. + self.cleanupPartialProcessing() self.verbose = verbose @@ -535,37 +546,40 @@ def initialiseRemoteDataHandling(self): else: self.RemoteDatahandler = None - def purgeProcessedData(self, dir_path, days_back=30): + def purgeProcessedData(self, dir_path, days_back=14, verbose=False): """ Purge processed candidate or phase1 data if it is older than 30 days. """ refdt = time.time() - days_back*86400 - result = [] - for path, _, files in os.walk(dir_path): - - for file in files: - - file_path = os.path.join(path, file) - - # Check if the file is older than the reference date - try: - file_dt = os.stat(file_path).st_mtime - except FileNotFoundError: - log.warning(f"File not found: {file_path}") - continue - - if os.path.exists(file_path) and (file_dt < refdt) and os.path.isfile(file_path): - - try: - os.remove(file_path) - result.append(file_path) + num_removed = 0 + log.info(f'purging processed data from {dir_path} thats older than {days_back} days') - except FileNotFoundError: - log.warning(f"File not found: {file_path}") - - except Exception as e: - log.error(f"Error removing file {file_path}: {e}") - - return result + for file_name in glob.glob(os.path.join(dir_path,'*.pickle')): + try: + file_dt = os.stat(file_name).st_mtime + if file_dt < refdt: + if verbose: + log.info(f'removing {file_name}') + os.remove(file_name) + num_removed += 1 + except FileNotFoundError: + log.warning(f"File disappeared: {file_name}") + continue + except Exception as e: + log.error(f"Error removing file {file_name}: {e}") + + return num_removed + + def cleanupPartialProcessing(self): + log.info('checking for partially-processed phase1 files') + i=0 + for i, file_name in enumerate(glob.glob(os.path.join(self.phase1_dir, '*.pickle_processing'))): + new_name = file_name.replace('_processing','') + if os.path.isfile(new_name): + os.remove(file_name) + else: + os.rename(file_name, new_name) + log.info(f'updated {i} partially-processed files') + return def archiveOldRecords(self, older_than=3): """ @@ -1454,6 +1468,7 @@ def moveUploadedData(self, verbose=False): Used in 'master' mode: this moves uploaded data to the target locations on the server and merges in the databases """ + log.info('merging in any remotely processed data') for node in self.RemoteDatahandler.nodes: if node.nodename == 'localhost' or self.observations_db is None or self.trajectory_db is None: continue @@ -1507,10 +1522,91 @@ def moveUploadedData(self, verbose=False): os.remove(full_name) if i > 0: - log.info(f'moved {i+1} phase 1 files from {node.nodename}') + log.info(f'moved {i+1} phase 1 solutions from {node.nodename}') + + # if the node was in mode 1 then move any uploaded processed candidates + remote_canddir = os.path.join(node.dirpath, 'files', 'candidates', 'processed') + if os.path.isdir(remote_canddir) and node.mode==1: + i = 0 + targ_dir = os.path.join(self.candidate_dir, 'processed') + for i, fil in enumerate([x for x in os.listdir(remote_canddir) if '.pickle' in x]): + full_name = os.path.join(remote_canddir, fil) + shutil.copy(full_name, targ_dir) + os.remove(full_name) + + if i > 0: + log.info(f'moved {i+1} processed candidates from {node.nodename}') return True + def checkAndRedistribCands(self, wait_time=6, verbose=False): + """ + Check child nodes and + 1) if the stop flag has appeared, move any pending data to prevent it getting stuck + 2) move data if it has been waiting more than wait_time hours + 3) if the node is idle, assign it extra data + + Parameters: + wait_time : time in hours to wait before data is considered stale + + """ + for node in self.RemoteDatahandler.nodes: + if node.nodename == 'localhost' or self.observations_db is None or self.trajectory_db is None: + continue + # if the remote node upload path doesn't exist skip it + if not os.path.isdir(os.path.join(node.dirpath,'files')): + continue + + # if the stop file has appeared, then move any pending candidates or phase1 files + if os.path.isfile(os.path.join(node.dirpath, 'files','stop')): + log.info(f'{node.nodename} stopfile has appeared, moving data') + for full_name in glob.glob(os.path.join(node.dirpath, 'files', 'candidates', '*.pickle')): + shutil.copy(full_name, self.candidate_dir) + os.remove(full_name) + for full_name in glob.glob(os.path.join(node.dirpath, 'files', 'phase1', '*.pickle')): + shutil.copy(full_name, self.phase1_dir) + os.remove(full_name) + else: + # if the stop file isnt present and the nodes are idle, give them something to do + targ_dir = os.path.join(node.dirpath, 'files', 'candidates') + if len(glob.glob(os.path.join(targ_dir, '*.pickle'))) == 0 and node.mode == MCMODE_PHASE1: + # the node is waiting for data + log.info(f'{node.nodename} idle, giving it extra candidates') + i = 0 + for i, full_name in enumerate(glob.glob(os.path.join(self.candidate_dir, '*.pickle'))): + shutil.copy(full_name, targ_dir) + os.remove(full_name) + i +=1 + if i == node.capacity: + break + pass + targ_dir = os.path.join(node.dirpath, 'files', 'phase1') + if len(glob.glob(os.path.join(targ_dir, '*.pickle'))) == 0 and node.mode == MCMODE_PHASE2: + # the node is waiting for data + log.info(f'{node.nodename} idle, giving it extra phase1 data') + i = 0 + for i, full_name in enumerate(glob.glob(os.path.join(self.phase1_dir, '*.pickle'))): + shutil.copy(full_name, targ_dir) + os.remove(full_name) + i +=1 + if i == node.capacity: + break + pass + + # if the files have been in the nodes folder for more than wait_time hours, move them + refdt = time.time() - wait_time*3600 + log.info(f'moving any stale data assigned to {node.nodename}') + for full_name in glob.glob(os.path.join(node.dirpath, 'files', 'candidates', '*.pickle')): + if os.stat(full_name).st_mtime < refdt: + shutil.copy(full_name, self.candidate_dir) + os.remove(full_name) + for full_name in glob.glob(os.path.join(node.dirpath, 'files', 'phase1', '*.pickle')): + if os.stat(full_name).st_mtime < refdt: + shutil.copy(full_name, self.phase1_dir) + os.remove(full_name) + + return + def getRemoteData(self, verbose=False): """ Used in 'child' mode: this downloads data from the master for local processing. @@ -1563,7 +1659,7 @@ def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): # Select a random bucket, check its not already full, and then save the pickle there. # Make sure to break out once all buckets have been tested - # Fallback/default is to use the local phase_1 dir. + # Fallback/default is to use the local dir. tested_buckets = [] bucket_num = -1 bucket_list = self.RemoteDatahandler.nodes @@ -1572,7 +1668,7 @@ def saveCandOrTraj(self, traj, file_name, savetype='phase1', verbose=False): while bucket_num not in tested_buckets: bucket_num = secrets.randbelow(len(bucket_list)) bucket = bucket_list[bucket_num] - # if the child isn't in mc mode, skip it + # if the child isn't the right mode, skip it if bucket.mode != required_mode and bucket.mode != -1: tested_buckets.append(bucket_num) continue @@ -2017,8 +2113,10 @@ def signal_handler(sig, frame): dh.observations_db = ObservationsDatabase(dh.db_dir, purge_records=True) if dh.RemoteDatahandler and dh.RemoteDatahandler.mode == 'master': + # move any uploaded data and then check and rebalance any pending cands or phase1s dh.moveUploadedData(verbose=verbose) - pass + dh.checkAndRedistribCands(wait_time=6, verbose=verbose) + # If we're in either of these modes, the correlator will have scooped up available data # from candidates or phase1 folders so no need to keep looping. if mcmode == MCMODE_PHASE1 or mcmode == MCMODE_PHASE2: