diff --git a/README.md b/README.md index c4cf8dd..10c79eb 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![PPSD_Plotter Test Linux](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Linux.yml/badge.svg)](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Linux.yml) [![PPSD_Plotter Test Windows](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Windows.yml/badge.svg)](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Windows.yml) [![PPSD_Plotter Test MacOS](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_MacOS.yml/badge.svg)](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_MacOS.yml) +
![Python versions:](https://img.shields.io/badge/python-3.8_%7C_3.9_%7C_3.10_%7C_3.11_%7C_3.12%7C_3.13-blue?)
![License: GPL v3](https://img.shields.io/badge/License-GPLv3-brightgreen.svg) @@ -17,8 +18,8 @@ This script automates the calculation, plotting, and export of Power Spectral De ## TODO: - Better threading -- Additional configuration options -- Fix location codes ambiguity +- GUI +- Wildcard support --- @@ -42,25 +43,31 @@ pip install obspy pyyaml tqdm ## Configuration File: `config.yaml` -The script uses a YAML file to define how each dataset is processed. +The script uses a YAML file to define how each dataset is processed.
An example configuration is provided in the ```example``` folder.
+You can pass additional plotting parameters to the ```PPSD.plot()``` function from ObsPy. +For the full list of supported options, please refer to the [ObsPy documentation](https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.plot.html) ``` -timewindow: 3600 # PPSD time window in seconds -num_workers: 4 # Number of jobs to run in parallel -units: hz +# 3600 should be used as a default; 60 is used for a small example dataset +timewindow: 600 +num_workers: 2 datasets: - - folder: "data/st01" # Path to waveform files (.mseed, .miniseed, .msd) - response: "responses/ST01.xml" # Station response file (.xml, .dataless) - channels: ["HHZ", "HHE"] # List of channels to process - action: full # Processing action (see below) - output_folder: "outputs/st01" # Folder to save output plots - - - folder: "data/st02" - response: "responses/ST02.xml" - channels: ["BHZ"] - action: calculate - output_folder: "outputs/st02" + + - folder: "example/IU.ANMO..D" + response: "example/IU_ANMO_RESP.xml" + channels: ["BHZ", "BH1", "BH2"] + action: full + output_folder: "example/result" + + - folder: "example/IU.GRFO..D" + response: "example/IU_GRFO_RESP.xml" + channels: ["00.BHZ", "00.BH1", "00.BH2"] + action: full + output_folder: "example/result" + figsize: [6, 6] + show_mean: yes + grid: no ``` --- @@ -94,7 +101,7 @@ Depending on the action used, the script generates the following output: - `.npz` files saved to: ``` - /npz_/ + /npz__/ ``` - `.png` plots saved to: @@ -104,7 +111,7 @@ Depending on the action used, the script generates the following output: - `.csv` exported data (for action "convert") saved to: ``` - /npz__text/export.csv + /npz___text/export.csv ``` ## Example Data Acknowledgment diff --git a/example/example_config.yaml b/example/example_config.yaml index 88247a0..cc138a8 100644 --- a/example/example_config.yaml +++ b/example/example_config.yaml @@ -1,7 +1,6 @@ # 3600 should be used as a default; 60 is used for a small example dataset timewindow: 600 num_workers: 2 -units: hz datasets: @@ -13,6 +12,9 @@ datasets: - folder: "example/IU.GRFO..D" response: "example/IU_GRFO_RESP.xml" - channels: ["BHZ", "BH1", "BH2"] + channels: ["00.BHZ", "00.BH1", "00.BH2"] action: full output_folder: "example/result" + figsize: [6, 6] + show_mean: yes + grid: no diff --git a/src/PPSD_plotter.py b/src/PPSD_plotter.py index 8ad814d..c313ff5 100644 --- a/src/PPSD_plotter.py +++ b/src/PPSD_plotter.py @@ -37,22 +37,44 @@ def load_inventory(resp_file): return -def find_miniseed(workdir, channel): +def parse_channel(ch_str): + parts = ch_str.split(".", 1) + if len(parts) == 1: + return None, parts[0] + return parts[0], parts[1] + + +def safe_bool(val): + if isinstance(val, bool): + return val + if isinstance(val, str): + return val.strip().lower() in ("yes", "true", "1", "on") + return False + + +def find_miniseed(workdir, channel, location=None): for file in Path(workdir).rglob("*"): if file.suffix.lower() in [".msd", ".miniseed", ".mseed"]: try: st = read(str(file)) - if any(tr.stats.channel == channel for tr in st): - return str(file) + for tr in st: + if location: + if ( + tr.stats.channel == channel and + tr.stats.location == location + ): + return str(file) + else: + if tr.stats.channel == channel: + return str(file) except Exception as e: print(f"Skipping {file} due to error: {e}") return None -def calculate_ppsd(workdir, channel, inv, tw): +def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw): workdir = Path(workdir) - outdir = workdir / f"npz_{channel}" - outdir.mkdir(exist_ok=True) + Path(npzfolder).mkdir(exist_ok=True) files = [ f for f in workdir.rglob("*") @@ -64,23 +86,45 @@ def calculate_ppsd(workdir, channel, inv, tw): ): try: st = read(str(file)) - for trace in st.select(channel=channel): + st = st.select(channel=channel, location=location) + for trace in st: ppsd = PPSD(trace.stats, metadata=inv, ppsd_length=tw) ppsd.add(trace) timestamp = trace.stats.starttime.strftime( '%y-%m-%d_%H-%M-%S.%f' ) - outfile = outdir / f"{timestamp}.npz" + outfile = npzfolder / f"{timestamp}.npz" ppsd.save_npz(str(outfile)) except Exception as e: - print(f"Error processing {file}: {e}") + print( + f"Error processing {file} for channel={channel}" + f"location={location}: {e}" + ) -def plot_ppsd(sampledata, channel, inv, npzfolder, output_folder, tw, units): +def plot_ppsd( + sampledata, channel, location, inv, npzfolder, output_folder, + tw, plot_kwargs=None + ): + + if plot_kwargs is None: + plot_kwargs = {} + st = read(sampledata) - trace = st.select(channel=channel)[0] + if location: + matches = st.select(channel=channel, location=location) + else: + matches = st.select(channel=channel) + if not matches: + print(f"No matching trace for channel={channel} location={location}") + return + if location is None and len(matches) > 1: + print( + f"Warning: Multiple locations found for {channel}." + f"Using first: {matches[0].stats.location}" + ) + trace = matches[0] ppsd = PPSD(trace.stats, inv, ppsd_length=tw) - for file in Path(npzfolder).glob("*.npz"): try: ppsd.add_npz(str(file)) @@ -89,13 +133,20 @@ def plot_ppsd(sampledata, channel, inv, npzfolder, output_folder, tw, units): output_folder = Path(output_folder) output_folder.mkdir(parents=True, exist_ok=True) - figfile = output_folder / f"{trace.id}.png" + + cmap = plot_kwargs.pop("cmap", pqlx) + figsize = plot_kwargs.pop("figsize", (12, 6)) + dpi = plot_kwargs.pop("dpi", 300) + try: - fig = ppsd.plot(cmap=pqlx, show_mean=True, show_histogram=True, - xaxis_frequency=units, show_mode=True, show=False) - fig.set_size_inches(12, 6) - fig.savefig(figfile, dpi=300) + fig = ppsd.plot( + cmap=cmap, + show=False, + **plot_kwargs + ) + fig.set_size_inches(figsize) + fig.savefig(figfile, dpi=dpi) print(f"Saved plot: {figfile}") except Exception as e: print(f"Error: {e}") @@ -139,30 +190,72 @@ def convert_npz_to_text(npzdir): print("No PSD entries found.") -def process_dataset(entry, tw, units): +def process_dataset(entry, tw): folder = entry["folder"] resp_file = entry["response"] channels = entry["channels"] output_folder = entry.get("output_folder", folder) action = str(entry.get("action", "full")) inv = load_inventory(resp_file) + if not inv: print(f"Failed to read inventory {resp_file}") return - for channel in channels: - print(f"===> {folder} | {channel} | action={action}") - npzfolder = Path(folder) / f"npz_{channel}" + PLOT_KWARGS = { + "show_coverage", + "show_percentiles", + "show_histogram", + "percentiles", + "show_noise_models", + "show_earthquakes", + "grid", + "max_percentage", + "period_lim", + "show_mode", + "show_mean", + "cmap", + "cumulative", + "cumulative_number_of_colors", + "xaxis_frequency", + "dpi", + "figsize", + } + + BOOLEAN_KEYS = { + "show_coverage", "show_percentiles", "show_histogram", + "show_noise_models", "show_earthquakes", "grid", + "show_mode", "show_mean", "cumulative", "xaxis_frequency" + } + + plot_kwargs = {} + + for k in PLOT_KWARGS: + if k in entry: + val = entry[k] + if k in BOOLEAN_KEYS: + plot_kwargs[k] = safe_bool(val) + else: + plot_kwargs[k] = val + + for ch_str in channels: + loc_code, channel = parse_channel(ch_str) + print(f"===> {folder} | {loc_code}.{channel} | action={action}") + if loc_code: + npzfolder = Path(folder) / f"npz_{loc_code}_{channel}" + else: + npzfolder = Path(folder) / f"npz_{channel}" if action in ["calculate", "full"]: - calculate_ppsd(folder, channel, inv, tw) + calculate_ppsd(folder, npzfolder, channel, loc_code, inv, tw) if action in ["plot", "full"]: - sample = find_miniseed(folder, channel) + sample = find_miniseed(folder, channel, loc_code) if sample: plot_ppsd( - sample, channel, inv, npzfolder, output_folder, tw, units - ) + sample, channel, loc_code, inv, npzfolder, + output_folder, tw, plot_kwargs=plot_kwargs.copy() + ) else: print(f"No valid trace found in {folder} for {channel}") @@ -174,17 +267,11 @@ def main(config_path): config = load_config(config_path) tw = config["timewindow"] num_workers = config.get("num_workers", 1) - units = config.get("units", "s").lower() - if units == "hz": - units = True - else: - units = False - datasets = config["datasets"] with ThreadPoolExecutor(max_workers=num_workers) as executor: futures = [ - executor.submit(process_dataset, entry, tw, units) + executor.submit(process_dataset, entry, tw) for entry in tqdm( datasets, desc="Submitting tasks", unit="dataset" )