Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
[![PPSD_Plotter Test Linux](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Linux.yml/badge.svg)](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Linux.yml)
[![PPSD_Plotter Test Windows](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Windows.yml/badge.svg)](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_Windows.yml)
[![PPSD_Plotter Test MacOS](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_MacOS.yml/badge.svg)](https://github.com/msbdd/PPSD_Plotter/actions/workflows/Test_MacOS.yml)
<br>
![Python versions:](https://img.shields.io/badge/python-3.8_%7C_3.9_%7C_3.10_%7C_3.11_%7C_3.12%7C_3.13-blue?)
<br>
![License: GPL v3](https://img.shields.io/badge/License-GPLv3-brightgreen.svg)
Expand All @@ -17,8 +18,8 @@ This script automates the calculation, plotting, and export of Power Spectral De
## TODO:

- Better threading
- Additional configuration options
- Fix location codes ambiguity
- GUI
- Wildcard support

---

Expand All @@ -42,25 +43,31 @@ pip install obspy pyyaml tqdm

## Configuration File: `config.yaml`

The script uses a YAML file to define how each dataset is processed.
The script uses a YAML file to define how each dataset is processed. <br> An example configuration is provided in the ```example``` folder.<br>
You can pass additional plotting parameters to the ```PPSD.plot()``` function from ObsPy.
For the full list of supported options, please refer to the [ObsPy documentation](https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.plot.html)

```
timewindow: 3600 # PPSD time window in seconds
num_workers: 4 # Number of jobs to run in parallel
units: hz
# 3600 should be used as a default; 60 is used for a small example dataset
timewindow: 600
num_workers: 2

datasets:
- folder: "data/st01" # Path to waveform files (.mseed, .miniseed, .msd)
response: "responses/ST01.xml" # Station response file (.xml, .dataless)
channels: ["HHZ", "HHE"] # List of channels to process
action: full # Processing action (see below)
output_folder: "outputs/st01" # Folder to save output plots

- folder: "data/st02"
response: "responses/ST02.xml"
channels: ["BHZ"]
action: calculate
output_folder: "outputs/st02"

- folder: "example/IU.ANMO..D"
response: "example/IU_ANMO_RESP.xml"
channels: ["BHZ", "BH1", "BH2"]
action: full
output_folder: "example/result"

- folder: "example/IU.GRFO..D"
response: "example/IU_GRFO_RESP.xml"
channels: ["00.BHZ", "00.BH1", "00.BH2"]
action: full
output_folder: "example/result"
figsize: [6, 6]
show_mean: yes
grid: no
```

---
Expand Down Expand Up @@ -94,7 +101,7 @@ Depending on the action used, the script generates the following output:

- `.npz` files saved to:
```
<folder>/npz_<channel>/
<folder>/npz_<location>_<channel>/
```

- `.png` plots saved to:
Expand All @@ -104,7 +111,7 @@ Depending on the action used, the script generates the following output:

- `.csv` exported data (for action "convert") saved to:
```
<folder>/npz_<channel>_text/export.csv
<folder>/npz_<location>_<channel>_text/export.csv
```

## Example Data Acknowledgment
Expand Down
6 changes: 4 additions & 2 deletions example/example_config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# 3600 should be used as a default; 60 is used for a small example dataset
timewindow: 600
num_workers: 2
units: hz

datasets:

Expand All @@ -13,6 +12,9 @@ datasets:

- folder: "example/IU.GRFO..D"
response: "example/IU_GRFO_RESP.xml"
channels: ["BHZ", "BH1", "BH2"]
channels: ["00.BHZ", "00.BH1", "00.BH2"]
action: full
output_folder: "example/result"
figsize: [6, 6]
show_mean: yes
grid: no
151 changes: 119 additions & 32 deletions src/PPSD_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,44 @@ def load_inventory(resp_file):
return


def find_miniseed(workdir, channel):
def parse_channel(ch_str):
parts = ch_str.split(".", 1)
if len(parts) == 1:
return None, parts[0]
return parts[0], parts[1]


def safe_bool(val):
if isinstance(val, bool):
return val
if isinstance(val, str):
return val.strip().lower() in ("yes", "true", "1", "on")
return False


def find_miniseed(workdir, channel, location=None):
for file in Path(workdir).rglob("*"):
if file.suffix.lower() in [".msd", ".miniseed", ".mseed"]:
try:
st = read(str(file))
if any(tr.stats.channel == channel for tr in st):
return str(file)
for tr in st:
if location:
if (
tr.stats.channel == channel and
tr.stats.location == location
):
return str(file)
else:
if tr.stats.channel == channel:
return str(file)
except Exception as e:
print(f"Skipping {file} due to error: {e}")
return None


def calculate_ppsd(workdir, channel, inv, tw):
def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw):
workdir = Path(workdir)
outdir = workdir / f"npz_{channel}"
outdir.mkdir(exist_ok=True)
Path(npzfolder).mkdir(exist_ok=True)

files = [
f for f in workdir.rglob("*")
Expand All @@ -64,23 +86,45 @@ def calculate_ppsd(workdir, channel, inv, tw):
):
try:
st = read(str(file))
for trace in st.select(channel=channel):
st = st.select(channel=channel, location=location)
for trace in st:
ppsd = PPSD(trace.stats, metadata=inv, ppsd_length=tw)
ppsd.add(trace)
timestamp = trace.stats.starttime.strftime(
'%y-%m-%d_%H-%M-%S.%f'
)
outfile = outdir / f"{timestamp}.npz"
outfile = npzfolder / f"{timestamp}.npz"
ppsd.save_npz(str(outfile))
except Exception as e:
print(f"Error processing {file}: {e}")
print(
f"Error processing {file} for channel={channel}"
f"location={location}: {e}"
)


def plot_ppsd(sampledata, channel, inv, npzfolder, output_folder, tw, units):
def plot_ppsd(
sampledata, channel, location, inv, npzfolder, output_folder,
tw, plot_kwargs=None
):

if plot_kwargs is None:
plot_kwargs = {}

st = read(sampledata)
trace = st.select(channel=channel)[0]
if location:
matches = st.select(channel=channel, location=location)
else:
matches = st.select(channel=channel)
if not matches:
print(f"No matching trace for channel={channel} location={location}")
return
if location is None and len(matches) > 1:
print(
f"Warning: Multiple locations found for {channel}."
f"Using first: {matches[0].stats.location}"
)
trace = matches[0]
ppsd = PPSD(trace.stats, inv, ppsd_length=tw)

for file in Path(npzfolder).glob("*.npz"):
try:
ppsd.add_npz(str(file))
Expand All @@ -89,13 +133,20 @@ def plot_ppsd(sampledata, channel, inv, npzfolder, output_folder, tw, units):

output_folder = Path(output_folder)
output_folder.mkdir(parents=True, exist_ok=True)

figfile = output_folder / f"{trace.id}.png"

cmap = plot_kwargs.pop("cmap", pqlx)
figsize = plot_kwargs.pop("figsize", (12, 6))
dpi = plot_kwargs.pop("dpi", 300)

try:
fig = ppsd.plot(cmap=pqlx, show_mean=True, show_histogram=True,
xaxis_frequency=units, show_mode=True, show=False)
fig.set_size_inches(12, 6)
fig.savefig(figfile, dpi=300)
fig = ppsd.plot(
cmap=cmap,
show=False,
**plot_kwargs
)
fig.set_size_inches(figsize)
fig.savefig(figfile, dpi=dpi)
print(f"Saved plot: {figfile}")
except Exception as e:
print(f"Error: {e}")
Expand Down Expand Up @@ -139,30 +190,72 @@ def convert_npz_to_text(npzdir):
print("No PSD entries found.")


def process_dataset(entry, tw, units):
def process_dataset(entry, tw):
folder = entry["folder"]
resp_file = entry["response"]
channels = entry["channels"]
output_folder = entry.get("output_folder", folder)
action = str(entry.get("action", "full"))
inv = load_inventory(resp_file)

if not inv:
print(f"Failed to read inventory {resp_file}")
return

for channel in channels:
print(f"===> {folder} | {channel} | action={action}")
npzfolder = Path(folder) / f"npz_{channel}"
PLOT_KWARGS = {
"show_coverage",
"show_percentiles",
"show_histogram",
"percentiles",
"show_noise_models",
"show_earthquakes",
"grid",
"max_percentage",
"period_lim",
"show_mode",
"show_mean",
"cmap",
"cumulative",
"cumulative_number_of_colors",
"xaxis_frequency",
"dpi",
"figsize",
}

BOOLEAN_KEYS = {
"show_coverage", "show_percentiles", "show_histogram",
"show_noise_models", "show_earthquakes", "grid",
"show_mode", "show_mean", "cumulative", "xaxis_frequency"
}

plot_kwargs = {}

for k in PLOT_KWARGS:
if k in entry:
val = entry[k]
if k in BOOLEAN_KEYS:
plot_kwargs[k] = safe_bool(val)
else:
plot_kwargs[k] = val

for ch_str in channels:
loc_code, channel = parse_channel(ch_str)
print(f"===> {folder} | {loc_code}.{channel} | action={action}")
if loc_code:
npzfolder = Path(folder) / f"npz_{loc_code}_{channel}"
else:
npzfolder = Path(folder) / f"npz_{channel}"

if action in ["calculate", "full"]:
calculate_ppsd(folder, channel, inv, tw)
calculate_ppsd(folder, npzfolder, channel, loc_code, inv, tw)

if action in ["plot", "full"]:
sample = find_miniseed(folder, channel)
sample = find_miniseed(folder, channel, loc_code)
if sample:
plot_ppsd(
sample, channel, inv, npzfolder, output_folder, tw, units
)
sample, channel, loc_code, inv, npzfolder,
output_folder, tw, plot_kwargs=plot_kwargs.copy()
)
else:
print(f"No valid trace found in {folder} for {channel}")

Expand All @@ -174,17 +267,11 @@ def main(config_path):
config = load_config(config_path)
tw = config["timewindow"]
num_workers = config.get("num_workers", 1)
units = config.get("units", "s").lower()
if units == "hz":
units = True
else:
units = False

datasets = config["datasets"]

with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [
executor.submit(process_dataset, entry, tw, units)
executor.submit(process_dataset, entry, tw)
for entry in tqdm(
datasets, desc="Submitting tasks", unit="dataset"
)
Expand Down
Loading