Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# Virtual environments
venv/
ENV/
env/

# IDE
.vscode/
.idea/
*.swp
*.swo
*~

# OS
.DS_Store
Thumbs.db

# PPSD output files
npz_*/
*.png
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ This utility automates the calculation, plotting, and export of Power Spectral D

## TODO:

- Custom plotting function and additional plotting parameters (day/night) (?)
- Linux building (?)
- Major refactor to automate all the data and station information collection (?)

Expand Down Expand Up @@ -54,6 +53,27 @@ All these parameters are now visible in the GUI.

Added a possibility to plot a custom RMS noise level on the plot.

### Time Filtering (Day/Night Mode)

You can optionally filter PPSD data by time of day using the `time_filter` configuration option. This is useful for analyzing noise levels during specific time periods (e.g., daytime vs. nighttime).

**Example configuration:**
```yaml
datasets:
- folder: data/station
response: response.xml
channels: [BHZ]
time_filter:
night_start: "22:00" # Start time in HH:MM format
night_stop: "06:00" # End time in HH:MM format
```

**Notes:**
- Time format: `HH:MM` or `HH:MM:SS`
- If `night_start` > `night_stop`, the range spans midnight (e.g., 22:00 to 06:00 means nighttime)
- The system calculates PPSD for all data, but only uses filtered .npz files when generating plots
- Omit `time_filter` to use all data (default behavior)

## Output Structure

Depending on the action used, the script generates the following output:
Expand Down
5 changes: 5 additions & 0 deletions example/example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ datasets:
xaxis_frequency: false
response: example/IU_ANMO_RESP.xml
timewindow: 600
# Optional: Filter data by time of day (uncomment to enable)
# time_filter:
# night_start: "22:00" # Start time (HH:MM format)
# night_stop: "06:00" # End time (HH:MM format)
# Note: If night_start > night_stop, the range spans midnight
- action: full
channels:
- 00.BH1
Expand Down
111 changes: 58 additions & 53 deletions src/PPSD_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,25 @@
from obspy.imaging.cm import pqlx
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from ppsd_plotter_aux import (
find_miniseed,
load_inventory,
parse_npz_timestamp,
is_time_in_range,
filter_npz_files_by_time,
split_trace_by_time_filter
)
matplotlib.use("Agg")

# Default PPSD time window in seconds
DEFAULT_TIME_WINDOW = 3600


def load_config(path):
with open(path, 'r') as f:
return yaml.safe_load(f)


def load_inventory(resp_file):
ext = Path(resp_file).suffix.lower()

if ext in ['.seed', '.dataless']:
fmt = "SEED"
elif ext == '.xml':
fmt = "STATIONXML"
else:
fmt = None

try:
if fmt:
inv = read_inventory(resp_file, format=fmt)
else:
inv = read_inventory(resp_file)
return inv
except Exception as e:
print(f"Failed to read inventory {resp_file}: {e}")
return


def parse_channel(ch_str):
parts = ch_str.split(".", 1)
if len(parts) == 1:
Expand All @@ -52,27 +42,7 @@ def safe_bool(val):
return False


def find_miniseed(workdir, channel, location=None):
for file in Path(workdir).rglob("*"):
if file.suffix.lower() in [".msd", ".miniseed", ".mseed"]:
try:
st = read(str(file))
for tr in st:
if location:
if (
tr.stats.channel == channel and
tr.stats.location == location
):
return str(file)
else:
if tr.stats.channel == channel:
return str(file)
except Exception as e:
print(f"Skipping {file} due to error: {e}")
return None


def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw):
def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw, time_filter=None):
workdir = Path(workdir)
Path(npzfolder).mkdir(exist_ok=True)

Expand All @@ -88,13 +58,23 @@ def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw):
st = read(str(file))
st = st.select(channel=channel, location=location)
for trace in st:
ppsd = PPSD(trace.stats, metadata=inv, ppsd_length=tw)
ppsd.add(trace)
timestamp = trace.stats.starttime.strftime(
'%y-%m-%d_%H-%M-%S.%f'
# Split trace by time filter if needed
trace_segments = split_trace_by_time_filter(trace, time_filter)

for trace_segment, label in trace_segments:
ppsd = PPSD(trace_segment.stats, metadata=inv, ppsd_length=tw)
ppsd.add(trace_segment)

# Include label in filename if filtering is applied
timestamp = trace_segment.stats.starttime.strftime(
'%y-%m-%d_%H-%M-%S.%f'
)
outfile = npzfolder / f"{timestamp}.npz"
ppsd.save_npz(str(outfile))
if label != 'all':
outfile = npzfolder / f"{timestamp}_{label}.npz"
else:
outfile = npzfolder / f"{timestamp}.npz"

ppsd.save_npz(str(outfile))
except Exception as e:
print(
f"Error processing {file} for channel={channel}"
Expand All @@ -104,7 +84,7 @@ def calculate_ppsd(workdir, npzfolder, channel, location, inv, tw):

def plot_ppsd(
sampledata, channel, location, inv, npzfolder, output_folder,
tw, plot_kwargs=None
tw, plot_kwargs=None, time_filter=None
):

if plot_kwargs is None:
Expand All @@ -125,7 +105,27 @@ def plot_ppsd(
)
trace = matches[0]
ppsd = PPSD(trace.stats, inv, ppsd_length=tw)
for file in Path(npzfolder).glob("*.npz"):

# Get all npz files and filter by time if needed
all_files = list(Path(npzfolder).glob("*.npz"))

# Check if we have labeled files (_day or _night)
labeled_files = [f for f in all_files if f.stem.endswith('_day') or f.stem.endswith('_night')]
unlabeled_files = [f for f in all_files if not (f.stem.endswith('_day') or f.stem.endswith('_night'))]

if time_filter and unlabeled_files and not labeled_files:
print("Warning: Using time_filter with unlabeled .npz files.")
print("For best results, recalculate with 'action: full' or 'action: calculate' to split traces at boundaries.")
print("Currently using timestamp-based filtering on existing files.")

filtered_files = filter_npz_files_by_time(all_files, time_filter)

if time_filter:
nfiltered = len(filtered_files)
ntotal = len(all_files)
print(f"Time filter applied: {nfiltered}/{ntotal} files selected")

for file in filtered_files:
try:
ppsd.add_npz(str(file))
except Exception as e:
Expand Down Expand Up @@ -196,6 +196,8 @@ def process_dataset(entry, tw):
channels = entry["channels"]
output_folder = entry.get("output_folder", folder)
action = str(entry.get("action", "full"))
# Use timewindow from entry if available, otherwise use global tw
tw = entry.get("timewindow", tw)
inv = load_inventory(resp_file)

if not inv:
Expand Down Expand Up @@ -247,14 +249,17 @@ def process_dataset(entry, tw):
npzfolder = Path(folder) / f"npz_{channel}"

if action in ["calculate", "full"]:
calculate_ppsd(folder, npzfolder, channel, loc_code, inv, tw)
time_filter = entry.get("time_filter")
calculate_ppsd(folder, npzfolder, channel, loc_code, inv, tw, time_filter)

if action in ["plot", "full"]:
sample = find_miniseed(folder, channel, loc_code)
if sample:
time_filter = entry.get("time_filter")
plot_ppsd(
sample, channel, loc_code, inv, npzfolder,
output_folder, tw, plot_kwargs=plot_kwargs.copy()
output_folder, tw, plot_kwargs=plot_kwargs.copy(),
time_filter=time_filter
)
else:
print(f"No valid trace found in {folder} for {channel}")
Expand All @@ -265,7 +270,7 @@ def process_dataset(entry, tw):

def main(config_path):
config = load_config(config_path)
tw = config["timewindow"]
tw = config.get("timewindow", DEFAULT_TIME_WINDOW)
num_workers = config.get("num_workers", 1)
datasets = config["datasets"]

Expand Down
Loading