diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..945f463 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# PPSD output files +npz_*/ +*.png diff --git a/README.md b/README.md index 8286b4c..289a40f 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,6 @@ This utility automates the calculation, plotting, and export of Power Spectral D ## TODO: -- Custom plotting function and additional plotting parameters (day/night) (?) - Linux building (?) - Major refactor to automate all the data and station information collection (?) @@ -54,6 +53,27 @@ All these parameters are now visible in the GUI. Added a possibility to plot a custom RMS noise level on the plot. +### Time Filtering (Day/Night Mode) + +You can optionally filter PPSD data by time of day using the `time_filter` configuration option. This is useful for analyzing noise levels during specific time periods (e.g., daytime vs. nighttime). + +**Example configuration:** +```yaml +datasets: +- folder: data/station + response: response.xml + channels: [BHZ] + time_filter: + night_start: "22:00" # Start time in HH:MM format + night_stop: "06:00" # End time in HH:MM format +``` + +**Notes:** +- Time format: `HH:MM` or `HH:MM:SS` +- If `night_start` > `night_stop`, the range spans midnight (e.g., 22:00 to 06:00 means nighttime) +- The system calculates PPSD for all data, but only uses filtered .npz files when generating plots +- Omit `time_filter` to use all data (default behavior) + ## Output Structure Depending on the action used, the script generates the following output: diff --git a/example/example_config.yaml b/example/example_config.yaml index ac43113..6a922a4 100644 --- a/example/example_config.yaml +++ b/example/example_config.yaml @@ -18,6 +18,11 @@ datasets: xaxis_frequency: false response: example/IU_ANMO_RESP.xml timewindow: 600 + # Optional: Filter data by time of day (uncomment to enable) + # time_filter: + # night_start: "22:00" # Start time (HH:MM format) + # night_stop: "06:00" # End time (HH:MM format) + # Note: If night_start > night_stop, the range spans midnight - action: full channels: - 00.BH1 diff --git a/src/PPSD_plotter.py b/src/PPSD_plotter.py index c313ff5..794462a 100644 --- a/src/PPSD_plotter.py +++ b/src/PPSD_plotter.py @@ -8,35 +8,25 @@ from obspy.imaging.cm import pqlx from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm +from ppsd_plotter_aux import ( + find_miniseed, + load_inventory, + parse_npz_timestamp, + is_time_in_range, + filter_npz_files_by_time, + split_trace_by_time_filter +) matplotlib.use("Agg") +# Default PPSD time window in seconds +DEFAULT_TIME_WINDOW = 3600 + def load_config(path): with open(path, 'r') as f: return yaml.safe_load(f) -def load_inventory(resp_file): - ext = Path(resp_file).suffix.lower() - - if ext in ['.seed', '.dataless']: - fmt = "SEED" - elif ext == '.xml': - fmt = "STATIONXML" - else: - fmt = None - - try: - if fmt: - inv = read_inventory(resp_file, format=fmt) - else: - inv = read_inventory(resp_file) - return inv - except Exception as e: - print(f"Failed to read inventory {resp_file}: {e}") - return - - def parse_channel(ch_str): parts = ch_str.split(".", 1) if len(parts) == 1: @@ -52,27 +42,7 @@ def safe_bool(val): return False -def find_miniseed(workdir, channel, location=None): - for file in Path(workdir).rglob("*"): - if file.suffix.lower() in [".msd", ".miniseed", ".mseed"]: - try: - st = read(str(file)) - for tr in st: - if location: - if ( - tr.stats.channel == channel and - tr.stats.location == location - ): - return str(file) - else: - if tr.stats.channel == channel: - return str(file) - except Exception as e: - print(f"Skipping {file} due to error: {e}") - return None - - -def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw): +def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw, time_filter=None): workdir = Path(workdir) Path(npzfolder).mkdir(exist_ok=True) @@ -88,13 +58,23 @@ def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw): st = read(str(file)) st = st.select(channel=channel, location=location) for trace in st: - ppsd = PPSD(trace.stats, metadata=inv, ppsd_length=tw) - ppsd.add(trace) - timestamp = trace.stats.starttime.strftime( - '%y-%m-%d_%H-%M-%S.%f' + # Split trace by time filter if needed + trace_segments = split_trace_by_time_filter(trace, time_filter) + + for trace_segment, label in trace_segments: + ppsd = PPSD(trace_segment.stats, metadata=inv, ppsd_length=tw) + ppsd.add(trace_segment) + + # Include label in filename if filtering is applied + timestamp = trace_segment.stats.starttime.strftime( + '%y-%m-%d_%H-%M-%S.%f' ) - outfile = npzfolder / f"{timestamp}.npz" - ppsd.save_npz(str(outfile)) + if label != 'all': + outfile = npzfolder / f"{timestamp}_{label}.npz" + else: + outfile = npzfolder / f"{timestamp}.npz" + + ppsd.save_npz(str(outfile)) except Exception as e: print( f"Error processing {file} for channel={channel}" @@ -104,7 +84,7 @@ def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw): def plot_ppsd( sampledata, channel, location, inv, npzfolder, output_folder, - tw, plot_kwargs=None + tw, plot_kwargs=None, time_filter=None ): if plot_kwargs is None: @@ -125,7 +105,27 @@ def plot_ppsd( ) trace = matches[0] ppsd = PPSD(trace.stats, inv, ppsd_length=tw) - for file in Path(npzfolder).glob("*.npz"): + + # Get all npz files and filter by time if needed + all_files = list(Path(npzfolder).glob("*.npz")) + + # Check if we have labeled files (_day or _night) + labeled_files = [f for f in all_files if f.stem.endswith('_day') or f.stem.endswith('_night')] + unlabeled_files = [f for f in all_files if not (f.stem.endswith('_day') or f.stem.endswith('_night'))] + + if time_filter and unlabeled_files and not labeled_files: + print("Warning: Using time_filter with unlabeled .npz files.") + print("For best results, recalculate with 'action: full' or 'action: calculate' to split traces at boundaries.") + print("Currently using timestamp-based filtering on existing files.") + + filtered_files = filter_npz_files_by_time(all_files, time_filter) + + if time_filter: + nfiltered = len(filtered_files) + ntotal = len(all_files) + print(f"Time filter applied: {nfiltered}/{ntotal} files selected") + + for file in filtered_files: try: ppsd.add_npz(str(file)) except Exception as e: @@ -196,6 +196,8 @@ def process_dataset(entry, tw): channels = entry["channels"] output_folder = entry.get("output_folder", folder) action = str(entry.get("action", "full")) + # Use timewindow from entry if available, otherwise use global tw + tw = entry.get("timewindow", tw) inv = load_inventory(resp_file) if not inv: @@ -247,14 +249,17 @@ def process_dataset(entry, tw): npzfolder = Path(folder) / f"npz_{channel}" if action in ["calculate", "full"]: - calculate_ppsd(folder, npzfolder, channel, loc_code, inv, tw) + time_filter = entry.get("time_filter") + calculate_ppsd(folder, npzfolder, channel, loc_code, inv, tw, time_filter) if action in ["plot", "full"]: sample = find_miniseed(folder, channel, loc_code) if sample: + time_filter = entry.get("time_filter") plot_ppsd( sample, channel, loc_code, inv, npzfolder, - output_folder, tw, plot_kwargs=plot_kwargs.copy() + output_folder, tw, plot_kwargs=plot_kwargs.copy(), + time_filter=time_filter ) else: print(f"No valid trace found in {folder} for {channel}") @@ -265,7 +270,7 @@ def process_dataset(entry, tw): def main(config_path): config = load_config(config_path) - tw = config["timewindow"] + tw = config.get("timewindow", DEFAULT_TIME_WINDOW) num_workers = config.get("num_workers", 1) datasets = config["datasets"] diff --git a/src/gui.py b/src/gui.py index 27f3df0..89554b3 100644 --- a/src/gui.py +++ b/src/gui.py @@ -20,9 +20,16 @@ from concurrent.futures import ProcessPoolExecutor, as_completed from tqdm import tqdm import numpy as np -from ppsd_plotter_aux import calculate_ppsd_worker, load_inventory, \ - find_miniseed_channels, find_miniseed, \ - calculate_noise_line +from ppsd_plotter_aux import ( + calculate_ppsd_worker, + load_inventory, + find_miniseed_channels, + find_miniseed, + calculate_noise_line, + parse_npz_timestamp, + is_time_in_range, + filter_npz_files_by_time +) from localization_dicts import ALL_LABELS, ALL_SOFTWARE_LABELS, ALL_TOOLTIPS matplotlib.use("TkAgg") @@ -85,6 +92,7 @@ "action": "full", "timewindow": 3600, "plot_kwargs": DEFAULT_PLOT_KWARGS.copy(), + "time_filter": None, } ACTIONS = ["plot", "calculate", "full", "convert"] @@ -203,7 +211,7 @@ def safe_bool(val): def calculate_ppsd( - folder, inv, tw, channel_list, callback=None, max_workers=None + folder, inv, tw, channel_list, callback=None, max_workers=None, time_filter=None ): folder = Path(folder) files = list(folder.rglob("*")) @@ -257,7 +265,7 @@ def update_progress(): with ProcessPoolExecutor(max_workers=cpu_count) as executor: futures = [ - executor.submit(calculate_ppsd_worker, chunk, inv, tw, folder) + executor.submit(calculate_ppsd_worker, chunk, inv, tw, folder, time_filter) for chunk in chunks ] for future in as_completed(futures): @@ -294,6 +302,7 @@ def process_dataset_visual(ds, progress_update_callback): tw=int(ds.get("timewindow", 3600)), channel_list=channels, callback=progress_update_callback, + time_filter=ds.get("time_filter"), ) for i, (loc_code, channel) in enumerate(parsed_channels): @@ -314,7 +323,8 @@ def process_dataset_visual(ds, progress_update_callback): npzfolder, int(ds.get("timewindow", 3600)), plot_kwargs.copy(), - custom_noise_line=ds.get("custom_noise_line") + custom_noise_line=ds.get("custom_noise_line"), + time_filter=ds.get("time_filter") ) else: progress_update_callback(progress, f"No data for {ch_label}") @@ -328,7 +338,7 @@ def process_dataset_visual(ds, progress_update_callback): def plot_ppsd_interactive( sampledata, channel, location, inv, npzfolder, tw, plot_kwargs=None, - custom_noise_line=None + custom_noise_line=None, time_filter=None ): if plot_kwargs is None: plot_kwargs = {} @@ -356,7 +366,26 @@ def plot_ppsd_interactive( trace = matches[0] ppsd = PPSD(trace.stats, inv, ppsd_length=tw) - for file in Path(npzfolder).glob("*.npz"): + # Get all npz files and filter by time if needed + all_files = list(Path(npzfolder).glob("*.npz")) + + # Check if we have labeled files (_day or _night) + labeled_files = [f for f in all_files if f.stem.endswith('_day') or f.stem.endswith('_night')] + unlabeled_files = [f for f in all_files if not (f.stem.endswith('_day') or f.stem.endswith('_night'))] + + if time_filter and unlabeled_files and not labeled_files: + print("Warning: Using time_filter with unlabeled .npz files.") + print("For best results, recalculate with action='full' or 'calculate' to split traces at boundaries.") + print("Currently using timestamp-based filtering on existing files.") + + filtered_files = filter_npz_files_by_time(all_files, time_filter) + + if time_filter: + nfiltered = len(filtered_files) + ntotal = len(all_files) + print(f"Time filter applied: {nfiltered}/{ntotal} files selected") + + for file in filtered_files: try: ppsd.add_npz(str(file)) except Exception as e: @@ -597,6 +626,55 @@ def build(self): self.plot_kwargs_vars[key] = var row += 1 + # Time Filter section + textlabel = PARAM_LABELS.get("time_filter", "Time Filter") + label = ttk.Label(self, text=textlabel) + label.grid(row=row, column=0, columnspan=2, sticky="w") + ToolTip(label, PARAM_TOOLTIPS.get("time_filter", "")) + row += 1 + + # Initialize time_filter_vars + self.time_filter_vars = { + "night_start": tk.StringVar(value=""), + "night_stop": tk.StringVar(value="") + } + + # Load existing time_filter values if present + tf = self.dataset.get("time_filter") + if isinstance(tf, dict): + self.time_filter_vars["night_start"].set( + str(tf.get("night_start", "")) + ) + self.time_filter_vars["night_stop"].set( + str(tf.get("night_stop", "")) + ) + + # Night Start Time + label = ttk.Label( + self, text=PARAM_LABELS.get("night_start", "Start Time") + ":" + ) + label.grid(row=row, column=0, sticky="w") + ToolTip(label, PARAM_TOOLTIPS.get("night_start", "")) + entry = ttk.Entry( + self, textvariable=self.time_filter_vars["night_start"], width=10 + ) + entry.grid(row=row, column=1, sticky="w") + entry.bind("", self.update_time_filter) + row += 1 + + # Night Stop Time + label = ttk.Label( + self, text=PARAM_LABELS.get("night_stop", "End Time") + ":" + ) + label.grid(row=row, column=0, sticky="w") + ToolTip(label, PARAM_TOOLTIPS.get("night_stop", "")) + entry = ttk.Entry( + self, textvariable=self.time_filter_vars["night_stop"], width=10 + ) + entry.grid(row=row, column=1, sticky="w") + entry.bind("", self.update_time_filter) + row += 1 + textlabel = ALL_SOFTWARE_LABELS[CURRENT_LANG].get("custom_noise") label = ttk.Label(self, text=textlabel) label.grid(row=row, column=0, columnspan=2, sticky="w") @@ -809,6 +887,26 @@ def update_custom_noise_field(self, field): not line.get("color")): self.dataset["custom_noise_line"] = None + def update_time_filter(self, event=None): + """Update the time_filter in the dataset.""" + night_start = self.time_filter_vars["night_start"].get().strip() + night_stop = self.time_filter_vars["night_stop"].get().strip() + + # If both fields are empty, remove time_filter + if not night_start and not night_stop: + self.dataset["time_filter"] = None + return + + # If both fields are provided, create time_filter dict + if night_start and night_stop: + self.dataset["time_filter"] = { + "night_start": night_start, + "night_stop": night_stop + } + else: + # If only one field is provided, don't set filter (invalid) + self.dataset["time_filter"] = None + def run_this_dataset(self): self.status_label.config(text="Starting...", foreground="orange") self.progress["value"] = 0 diff --git a/src/localization_dicts.py b/src/localization_dicts.py index 0a86ee3..8e0ae2d 100644 --- a/src/localization_dicts.py +++ b/src/localization_dicts.py @@ -19,6 +19,9 @@ "xaxis_frequency": "X Axis Frequency", "action": "Action", "timewindow": "Timewindow", + "time_filter": "Time Filter", + "night_start": "Start Time", + "night_stop": "End Time", } PARAM_TOOLTIPS_EN = { @@ -42,7 +45,10 @@ "xaxis_frequency": "Show frequency instead of period on x-axis.", "amplitude": "Amplitude of the custom noise line (in millig, mg)", "freq_range": "Frequency range in Hz (two floats, e.g. 1.0, 10.0)", - "color": "Line color (select from the list of available names)" + "color": "Line color (select from the list of available names)", + "time_filter": "Filter data by time of day (leave empty to use all data)", + "night_start": "Start time in HH:MM format (e.g. 22:00 for nighttime)", + "night_stop": "End time in HH:MM format (e.g. 06:00 for nighttime)", } SOFTWARE_LABELS_EN = { @@ -94,6 +100,9 @@ "xaxis_frequency": "X-Achse: Frequenz", "action": "Aktion", "timewindow": "Zeitfenster", + "time_filter": "Zeitfilter", + "night_start": "Startzeit", + "night_stop": "Endzeit", } PARAM_TOOLTIPS_DE = { @@ -117,7 +126,10 @@ "xaxis_frequency": "Frequenz statt Periode auf der X-Achse anzeigen.", "amplitude": "Amplitude der benutzerdefinierten Rauschlinie (in Millig, mg)", "freq_range": "Frequenzbereich in Hz (zwei Werte, z. B. 1.0, 10.0)", - "color": "Linienfarbe (aus der Liste verfügbarer Namen wählen)" + "color": "Linienfarbe (aus der Liste verfügbarer Namen wählen)", + "time_filter": "Daten nach Tageszeit filtern (leer lassen für alle Daten)", + "night_start": "Startzeit im Format HH:MM (z. B. 22:00 für Nachtzeit)", + "night_stop": "Endzeit im Format HH:MM (z. B. 06:00 für Nachtzeit)", } SOFTWARE_LABELS_DE = { @@ -168,6 +180,9 @@ "xaxis_frequency": "Частота по оси X", "action": "Действие", "timewindow": "Временное окно", + "time_filter": "Фильтр времени", + "night_start": "Время начала", + "night_stop": "Время окончания", } PARAM_TOOLTIPS_RU = { @@ -191,7 +206,10 @@ "xaxis_frequency": "Показать частоту вместо периода по оси X.", "amplitude": "Амплитуда пользовательской линии шума (в миллиg, mg)", "freq_range": "Частотный диапазон в Гц (две величины, например 1.0, 10.0)", - "color": "Цвет линии (выберите из списка доступных названий)" + "color": "Цвет линии (выберите из списка доступных названий)", + "time_filter": "Фильтровать данные по времени суток (оставьте пустым для использования всех данных)", + "night_start": "Время начала в формате ЧЧ:ММ (например, 22:00 для ночного времени)", + "night_stop": "Время окончания в формате ЧЧ:ММ (например, 06:00 для ночного времени)", } SOFTWARE_LABELS_RU = { @@ -243,6 +261,9 @@ "xaxis_frequency": "X osa: frekvencija", "action": "Akcija", "timewindow": "Vremenski prozor", + "time_filter": "Vremenski filter", + "night_start": "Vreme početka", + "night_stop": "Vreme završetka", } PARAM_TOOLTIPS_RS = { @@ -266,7 +287,10 @@ "xaxis_frequency": "Prikazuj frekvenciju umesto perioda na X osi.", "amplitude": "Amplituda prilagođene linije šuma (u miligima, mg)", "freq_range": "Opseg frekvencije u Hz (dve vrednosti, npr. 1.0, 10.0)", - "color": "Boja linije (izaberite iz liste dostupnih naziva)" + "color": "Boja linije (izaberite iz liste dostupnih naziva)", + "time_filter": "Filtriraj podatke po vremenu dana (ostavite prazno za sve podatke)", + "night_start": "Vreme početka u formatu HH:MM (npr. 22:00 za noćno vreme)", + "night_stop": "Vreme završetka u formatu HH:MM (npr. 06:00 za noćno vreme)", } SOFTWARE_LABELS_RS = { @@ -317,6 +341,9 @@ "xaxis_frequency": "X Ekseninde Frekans", "action": "İşlem", "timewindow": "Zaman Penceresi", + "time_filter": "Zaman Filtresi", + "night_start": "Başlangıç Zamanı", + "night_stop": "Bitiş Zamanı", } PARAM_TOOLTIPS_TR = { @@ -340,7 +367,10 @@ "xaxis_frequency": "X ekseninde periyot yerine frekans göster.", "amplitude": "Özel gürültü çizgisinin genliği (millig cinsinden, mg)", "freq_range": "Frekans aralığı (Hz cinsinden, iki sayı, örn. 1.0, 10.0)", - "color": "Çizgi rengi (mevcut isim listesinden seçiniz)" + "color": "Çizgi rengi (mevcut isim listesinden seçiniz)", + "time_filter": "Verileri günün saatine göre filtrele (tüm verileri kullanmak için boş bırakın)", + "night_start": "Başlangıç zamanı HH:MM formatında (örn. gece için 22:00)", + "night_stop": "Bitiş zamanı HH:MM formatında (örn. gece için 06:00)", } SOFTWARE_LABELS_TR = { diff --git a/src/ppsd_plotter_aux.py b/src/ppsd_plotter_aux.py index cd2cb2a..aa25443 100644 --- a/src/ppsd_plotter_aux.py +++ b/src/ppsd_plotter_aux.py @@ -4,6 +4,8 @@ from obspy.signal import PPSD import sys import numpy as np +from datetime import datetime, time as dt_time, timedelta +from obspy import UTCDateTime def find_miniseed_channels(folder): @@ -46,7 +48,308 @@ def find_miniseed(workdir, channel, location=None): return None -def calculate_ppsd_worker(job_list, inv_path, tw, folder): +def parse_npz_timestamp(filename): + """ + Parse timestamp from npz filename. + Format: yy-mm-dd_HH-MM-SS.ffffff.npz + Returns datetime object or None if parsing fails. + """ + try: + stem = Path(filename).stem # Remove .npz extension + # Parse the timestamp: yy-mm-dd_HH-MM-SS.ffffff + dt = datetime.strptime(stem, '%y-%m-%d_%H-%M-%S.%f') + return dt + except Exception: + return None + + +def is_time_in_range(dt, start_time, end_time): + """ + Check if datetime's time component falls within start_time and end_time. + Handles ranges that span midnight (e.g., 22:00 to 06:00). + + Args: + dt: datetime object + start_time: time object (e.g., time(22, 0)) + end_time: time object (e.g., time(6, 0)) + + Returns: + True if dt.time() is within the range, False otherwise + """ + if start_time is None or end_time is None: + return True + + t = dt.time() + + # Normal range (e.g., 06:00 to 22:00) + if start_time <= end_time: + return start_time <= t <= end_time + # Range spans midnight (e.g., 22:00 to 06:00) + else: + return t >= start_time or t <= end_time + + +def filter_npz_files_by_time(npz_files, time_filter): + """ + Filter npz files based on time_filter configuration. + + With the new approach, files are split during calculation and have + _day or _night suffix based on the original time_filter used during calculation. + This function filters files to match the requested time range. + + Args: + npz_files: list of Path objects + time_filter: dict with optional 'night_start' and 'night_stop' keys + (e.g., {'night_start': '22:00', 'night_stop': '06:00'}) + These specify the desired time range to filter for. + + Returns: + filtered list of Path objects + """ + if not time_filter: + return npz_files + + night_start_str = time_filter.get('night_start') + night_stop_str = time_filter.get('night_stop') + + if not night_start_str or not night_stop_str: + return npz_files + + # Parse time filter + try: + time_formats = ['%H:%M', '%H:%M:%S'] + filter_start = None + filter_stop = None + + for fmt in time_formats: + try: + filter_start = datetime.strptime(night_start_str, fmt).time() + filter_stop = datetime.strptime(night_stop_str, fmt).time() + break + except ValueError: + continue + + if filter_start is None or filter_stop is None: + print(f"Warning: Invalid time format in time_filter. " + f"Expected 'HH:MM' or 'HH:MM:SS', got start='{night_start_str}', " + f"stop='{night_stop_str}'. Using all files.") + return npz_files + except Exception: + return npz_files + + # Determine if the time range spans midnight (night) or not (day) + # If filter_start > filter_stop (e.g., 22:00 to 06:00), it's a night range + # If filter_start <= filter_stop (e.g., 06:00 to 22:00), it's a day range + is_night_range = filter_start > filter_stop + + filtered = [] + for file in npz_files: + filename = file.stem # Get filename without .npz + + # Check if this is a labeled file (new format) + if filename.endswith('_night'): + # This file contains night data + # Include if we're filtering for night range + if is_night_range: + filtered.append(file) + elif filename.endswith('_day'): + # This file contains day data + # Include if we're filtering for day range + if not is_night_range: + filtered.append(file) + else: + # Old format without label - use timestamp-based filtering + # parse_npz_timestamp expects the full filename with .npz + dt = parse_npz_timestamp(file.name) + if dt and is_time_in_range(dt, filter_start, filter_stop): + filtered.append(file) + + return filtered + + +def split_trace_by_time_filter(trace, time_filter): + """ + Split a trace into segments based on time_filter. + + Args: + trace: ObsPy Trace object + time_filter: dict with 'night_start' and 'night_stop' keys (HH:MM format) + or None to return trace as-is + + Returns: + list of tuples: [(trace_segment, label), ...] + where label is 'day' or 'night' or 'all' (if no filter) + """ + if not time_filter: + return [(trace, 'all')] + + night_start_str = time_filter.get('night_start') + night_stop_str = time_filter.get('night_stop') + + if not night_start_str or not night_stop_str: + return [(trace, 'all')] + + try: + # Parse time strings + time_formats = ['%H:%M', '%H:%M:%S'] + night_start = None + night_stop = None + + for fmt in time_formats: + try: + night_start = datetime.strptime(night_start_str, fmt).time() + night_stop = datetime.strptime(night_stop_str, fmt).time() + break + except ValueError: + continue + + if night_start is None or night_stop is None: + return [(trace, 'all')] + + except Exception: + return [(trace, 'all')] + + # Get trace time range + starttime = trace.stats.starttime + endtime = trace.stats.endtime + + # Check if entire trace falls within one period + start_dt = starttime.datetime + end_dt = endtime.datetime + + # If trace is within a single period (day or night), no splitting needed + start_in_range = is_time_in_range(start_dt, night_start, night_stop) + end_in_range = is_time_in_range(end_dt, night_start, night_stop) + + # If both start and end are in the same period and trace is short enough + # (less than 12 hours to avoid spanning multiple days) + trace_duration_hours = (endtime - starttime) / 3600.0 + if start_in_range == end_in_range and trace_duration_hours < 12: + label = 'night' if start_in_range else 'day' + return [(trace, label)] + + # Need to split the trace - find all transition times + result = [] + current_time = UTCDateTime(starttime) + + # Iterate through each day in the trace + while current_time < endtime: + current_date = current_time.datetime.date() + + # Calculate transition times for this day + # Night start time on current day + night_start_utc = UTCDateTime( + datetime.combine(current_date, night_start) + ) + + # Determine night end time + if night_start <= night_stop: + # Normal range (e.g., 06:00 to 22:00 is day) + # So night is outside this range + # This means we have TWO night periods in a day + # This case is complex, let's handle the common case first + night_end_utc = UTCDateTime( + datetime.combine(current_date, night_stop) + ) + else: + # Night spans midnight (e.g., 22:00 to 06:00) + # Night ends next day + next_date = current_date + timedelta(days=1) + night_end_utc = UTCDateTime( + datetime.combine(next_date, night_stop) + ) + + # Create segments for this day + # We'll just split at the boundaries and label each segment + day_boundaries = [] + + if night_start <= night_stop: + # Day period is FROM night_stop TO night_start + # (e.g., 06:00-22:00 is day, rest is night) + day_start = UTCDateTime(datetime.combine(current_date, night_stop)) + day_end = UTCDateTime(datetime.combine(current_date, night_start)) + + # Night before day_start + if current_time < day_start and day_start < endtime: + seg_end = min(day_start, endtime) + if seg_end > current_time: + day_boundaries.append((current_time, seg_end, 'night')) + current_time = seg_end + + # Day period + if current_time < day_end and day_end <= endtime: + seg_end = min(day_end, endtime) + if seg_end > current_time: + day_boundaries.append((current_time, seg_end, 'day')) + current_time = seg_end + + # Night after day_end + next_day_start = UTCDateTime( + datetime.combine(current_date + timedelta(days=1), dt_time(0, 0)) + ) + seg_end = min(next_day_start, endtime) + if seg_end > current_time: + day_boundaries.append((current_time, seg_end, 'night')) + + else: + # Night spans midnight (common case: e.g., 22:00 to 07:00) + # Night is FROM night_start TO night_stop (next day) + # Day is FROM night_stop TO night_start + + # For the current day: + # - Day starts at night_stop (e.g., 07:00) + # - Day ends at night_start (e.g., 22:00) + # - Night is before day_start and after day_end + + day_start = UTCDateTime(datetime.combine(current_date, night_stop)) + day_end = UTCDateTime(datetime.combine(current_date, night_start)) + + # If we're in the night period from previous day (before day_start) + if current_time < day_start and day_start <= endtime: + seg_end = min(day_start, endtime) + if seg_end > current_time: + day_boundaries.append((current_time, seg_end, 'night')) + current_time = seg_end + + # Day period (from day_start to day_end) + if current_time < day_end and day_end <= endtime: + seg_end = min(day_end, endtime) + if seg_end > current_time: + day_boundaries.append((current_time, seg_end, 'day')) + current_time = seg_end + + # Night period starting at day_end (after day_end until midnight) + next_day_start = UTCDateTime( + datetime.combine(current_date + timedelta(days=1), dt_time(0, 0)) + ) + seg_end = min(next_day_start, endtime) + if seg_end > current_time: + day_boundaries.append((current_time, seg_end, 'night')) + + # Move to next day + current_time = UTCDateTime( + datetime.combine(current_date + timedelta(days=1), dt_time(0, 0)) + ) + + # Create trace segments + for seg_start, seg_end, label in day_boundaries: + if seg_end > seg_start: + try: + seg_trace = trace.slice(seg_start, seg_end) + if len(seg_trace.data) > 0: + result.append((seg_trace, label)) + except Exception as e: + print(f"Warning: Could not slice trace: {e}") + continue + + # If we couldn't split for some reason, return original + if not result: + return [(trace, 'all')] + + return result + + +def calculate_ppsd_worker(job_list, inv_path, tw, folder, time_filter=None): inv = load_inventory(inv_path) for file, loc, chan in job_list: @@ -69,11 +372,22 @@ def calculate_ppsd_worker(job_list, inv_path, tw, folder): npzfolder = Path(resource_path(folder)) / f"npz_{chan}" npzfolder.mkdir(exist_ok=True) - ppsd = PPSD(tr.stats, metadata=inv, ppsd_length=tw) - ppsd.add(tr) - timestamp = tr.stats.starttime.strftime("%y-%m-%d_%H-%M-%S.%f") - outfile = npzfolder / f"{timestamp}.npz" - ppsd.save_npz(str(outfile)) + + # Split trace by time filter if needed + trace_segments = split_trace_by_time_filter(tr, time_filter) + + for trace_segment, label in trace_segments: + ppsd = PPSD(trace_segment.stats, metadata=inv, ppsd_length=tw) + ppsd.add(trace_segment) + timestamp = trace_segment.stats.starttime.strftime("%y-%m-%d_%H-%M-%S.%f") + + # Include label in filename if filtering is applied + if label != 'all': + outfile = npzfolder / f"{timestamp}_{label}.npz" + else: + outfile = npzfolder / f"{timestamp}.npz" + + ppsd.save_npz(str(outfile)) except Exception as e: print( f"[{os.getpid()}] Error processing {file.name}"