From f9ead1918c8cb6e5cbecfb4fb9965493a00ecc0c Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Tue, 6 Jan 2026 16:46:13 +0100 Subject: [PATCH 01/12] sva --- .../science_verification_analysis.py | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 rnog_analysis_tools/data_monitoring/science_verification_analysis.py diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py new file mode 100644 index 0000000..7655ae2 --- /dev/null +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -0,0 +1,127 @@ +from NuRadioReco.modules.io.RNO_G.readRNOGDataMattak import readRNOGData +import rnog_data.runtable as rt +from NuRadioReco.detector import detector +import logging +import os +import datetime +import numpy as np +from matplotlib import pyplot as plt +from NuRadioReco.utilities import units +from tqdm import tqdm +from argparse import ArgumentParser +import warnings +import pandas as pd +import NuRadioReco.framework.event +import NuRadioReco.framework.station +import NuRadioReco.framework.channel +import NuRadioReco.framework.trigger +from NuRadioReco.framework.parameters import channelParameters as chp +from NuRadioReco.modules.RNO_G.dataProviderRNOG import dataProviderRNOG +from NuRadioReco.modules.channelSignalReconstructor import channelSignalReconstructor as csr +from cycler import cycler +import matplotlib as mpl +from cmap import Colormap + +''' +This module can be used to test if the stations are working as expected. +''' + +# Channel mapping for the first seven RNO-G stations: +SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19 , 20] +DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 21, 22, 23] +UPWARD_CHANNELS_LIST = [13, 16, 19] +DOWNWARD_CHANNELS_LIST = [12, 14, 15, 17, 18] +ALL_CHANNELS_LIST = SURFACE_CHANNELS_LIST + DEEP_CHANNELS_LIST + +# Channel mapping for Station 14: +STATION_14_SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19] +STATION_14_DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 22, 23] +STATION_14_UPWARD_CHANNELS_LIST = [13, 15, 16, 18] +STATION_14_DOWNWARD_CHANNELS_LIST = [12, 14, 17, 19] +STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST + +# Matplotlib settings +cm = Colormap('tol:muted') +colors = cm.colors + +mpl.rcParams.update({ + 'font.family': 'sans-serif', + 'font.sans-serif': ['Helvetica', 'Arial', 'DejaVu Sans'], + 'font.size': 17, + + 'axes.labelsize': 17, + 'axes.titlesize': 18, + 'axes.linewidth': 1.2, + 'axes.grid': False, + + 'axes.prop_cycle': cycler(colors=colors), + + 'xtick.labelsize': 17, + 'ytick.labelsize': 17, + 'xtick.major.size': 6, + 'ytick.major.size': 6, + 'xtick.major.width': 1.2, + 'ytick.major.width': 1.2, + 'xtick.minor.size': 3, + 'ytick.minor.size': 3, + 'xtick.minor.visible': True, + 'ytick.minor.visible': True, + + 'lines.linewidth': 1.6, + 'lines.antialiased': True, + 'lines.markersize': 6, + + 'legend.fontsize': 14, + 'legend.frameon': False, + 'legend.handlelength': 2.2, + 'legend.borderpad': 0.3, + + 'figure.dpi': 120, + 'savefig.dpi': 300, + 'savefig.bbox': 'tight', +}) + +# Find the correct channel mapping for a given station +def station_channels(station_id: int): + if station_id == 14: + return STATION_14_SURFACE_CHANNELS_LIST, STATION_14_DEEP_CHANNELS_LIST, STATION_14_UPWARD_CHANNELS_LIST, STATION_14_DOWNWARD_CHANNELS_LIST, STATION_14_ALL_CHANNELS_LIST + else: + return SURFACE_CHANNELS_LIST, DEEP_CHANNELS_LIST, UPWARD_CHANNELS_LIST, DOWNWARD_CHANNELS_LIST, ALL_CHANNELS_LIST + +def normalize_all_to_down_reference(spec_arr, frequencies, down_channels, up_channels, f_low, f_high): + freq_mask = (frequencies >= f_low) & (frequencies <= f_high) + spec_arr = np.copy(spec_arr) + + # Use only down channels to define the reference + down = spec_arr[down_channels] + down_band_avg = np.mean(down[:, :, freq_mask], axis=2) # (n_down, n_events) + ref_band_avg = np.mean(down_band_avg, axis=0) # (n_events,) + + all_channels = up_channels + down_channels + all_spectra = spec_arr[all_channels] + + ch_band_avg = np.mean(all_spectra[:, :, freq_mask], axis=2) # (n_ch, n_events) + scale_factors = ref_band_avg[np.newaxis, :] / ch_band_avg # (n_ch, n_events) + + all_spectra_norm = all_spectra * scale_factors[:, :, np.newaxis] + spec_arr[all_channels] = all_spectra_norm + + return spec_arr, scale_factors + + +if __name__ == "__main__": + + argparser = ArgumentParser(description="RNO-G Science Verification Analysis") + argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze") + + run_selection = argparser.add_mutually_exclusive_group(required=True) + run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", + help="Run number(s) to analyze. Each run number should be given explicitly separated by a space, e.g. --runs 1001 1002 1005") + run_selection.add_argument("--run_range", nargs=2, type=int, metavar=("START_RUN", "END_RUN"), + help="Range of run numbers to analyze (inclusive). Provide start and end run numbers separated by a space, e.g. --run_range 1000 1050") + run_selection.add_argument("--time_range", nargs=2, type=int, metavar=("START_TIME", "END_TIME"), + help="Range of time numbers to analyze (inclusive). Provide start and end time numbers separated by a space, e.g. --time_range 2024-07-15 2024-09-30") + + surface_channels, deep_channels, upward_channels, downward_channels, all_channels = station_channels(argparser.parse_args().station_id) + + From c2e730dbd53f0ae7eafb6b87052ac9e41f602c92 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Tue, 6 Jan 2026 20:21:16 +0100 Subject: [PATCH 02/12] sva - data reading --- .../science_verification_analysis.py | 186 ++++++++++++++++-- 1 file changed, 167 insertions(+), 19 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 7655ae2..6ea83bb 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -17,10 +17,10 @@ import NuRadioReco.framework.trigger from NuRadioReco.framework.parameters import channelParameters as chp from NuRadioReco.modules.RNO_G.dataProviderRNOG import dataProviderRNOG -from NuRadioReco.modules.channelSignalReconstructor import channelSignalReconstructor as csr -from cycler import cycler + import matplotlib as mpl -from cmap import Colormap +#from cmap import Colormap +from collections import defaultdict ''' This module can be used to test if the stations are working as expected. @@ -41,8 +41,8 @@ STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST # Matplotlib settings -cm = Colormap('tol:muted') -colors = cm.colors +# cm = Colormap('tol:muted') +# colors = cm.colors mpl.rcParams.update({ 'font.family': 'sans-serif', @@ -54,7 +54,7 @@ 'axes.linewidth': 1.2, 'axes.grid': False, - 'axes.prop_cycle': cycler(colors=colors), + # 'axes.prop_cycle': cycler(colors=colors), 'xtick.labelsize': 17, 'ytick.labelsize': 17, @@ -81,6 +81,20 @@ 'savefig.bbox': 'tight', }) +def convert_events_information(event_info, convert_to_arrays=True): + + data = defaultdict(list) + + for ele in event_info.values(): + for k, v in ele.items(): + data[k].append(v) + + if convert_to_arrays: + for k in data: + data[k] = np.array(data[k]) + + return data + # Find the correct channel mapping for a given station def station_channels(station_id: int): if station_id == 14: @@ -88,7 +102,10 @@ def station_channels(station_id: int): else: return SURFACE_CHANNELS_LIST, DEEP_CHANNELS_LIST, UPWARD_CHANNELS_LIST, DOWNWARD_CHANNELS_LIST, ALL_CHANNELS_LIST -def normalize_all_to_down_reference(spec_arr, frequencies, down_channels, up_channels, f_low, f_high): +# Normalize surface channel spectra to the average of the down channels in the reference frequency band (500-650 MHz) +def normalize_surface_channels_to_down_reference(spec_arr, frequencies, down_channels, up_channels, f_low=500*units.MHz, f_high=650*units.MHz): + '''Normalize all surface channel spectra to the average of the down channels in the reference frequency band.''' + # spec_arr shape: (n_channels, n_events, n_freqs) freq_mask = (frequencies >= f_low) & (frequencies <= f_high) spec_arr = np.copy(spec_arr) @@ -97,31 +114,162 @@ def normalize_all_to_down_reference(spec_arr, frequencies, down_channels, up_cha down_band_avg = np.mean(down[:, :, freq_mask], axis=2) # (n_down, n_events) ref_band_avg = np.mean(down_band_avg, axis=0) # (n_events,) - all_channels = up_channels + down_channels - all_spectra = spec_arr[all_channels] + all_surface_channels = up_channels + down_channels + all_surface_spectra = spec_arr[all_surface_channels] - ch_band_avg = np.mean(all_spectra[:, :, freq_mask], axis=2) # (n_ch, n_events) + ch_band_avg = np.mean(all_surface_spectra[:, :, freq_mask], axis=2) # (n_ch, n_events) scale_factors = ref_band_avg[np.newaxis, :] / ch_band_avg # (n_ch, n_events) - all_spectra_norm = all_spectra * scale_factors[:, :, np.newaxis] - spec_arr[all_channels] = all_spectra_norm + all_surface_spectra_norm = all_surface_spectra * scale_factors[:, :, np.newaxis] + spec_arr[all_surface_channels] = all_surface_spectra_norm return spec_arr, scale_factors - + +def read_rnog_runtable(station_id: int, start_time: str, stop_time: str): + '''Get run numbers from the runtable tool for a given station and time range.''' + RunTable = rt.RunTable() + testrt = RunTable.get_table( start_time=start_time, stop_time=stop_time, stations=[station_id], run_types = ['physics']) + return testrt + +def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): + '''Read RNO-G data for a given station and list of run numbers using the specified backend.''' + file_list = [ f"/pnfs/ifh.de/acs/radio/diskonly/data/inbox/station14/run{run_id}/combined.root" for run_id in run_numbers] + n_files = len(file_list) + n_batches = n_files // 100 + 1 + print(f"Reading {n_files} files in {n_batches} batches using {backend} backend.") + + event_info = defaultdict(list) + + n_events_total = 0 + spec_batches = [] + trace_batches = [] + times_trace_batches = [] + snr_batches = [] + run_no_all = [] + times_all = [] + freqs = None + + from NuRadioReco.modules.channelSignalReconstructor import channelSignalReconstructor as csr + + for batch in np.array_split(np.array(file_list), n_batches): + tableReader = dataProviderRNOG() + tableReader.begin(files=batch.tolist(), + det=None, + reader_kwargs={"overwrite_sampling_rate":2.4*units.GHz, + "convert_to_voltage":False, + "apply_baseline_correction":"auto", + "mattak_kwargs":{"backend":backend}}) + event_info_tmp = tableReader.reader.get_events_information( + keys=["triggerType", "triggerTime", "readoutTime", "radiantThrs", "lowTrigThrs"]) + + event_info_tmp = convert_events_information(event_info_tmp, False) + for key, value in event_info_tmp.items(): + event_info[key] += value + + n_events = tableReader.reader.get_n_events() + n_events_total += n_events + + channel_list = [i for i in range(24)] + spec_arr = np.zeros((len(channel_list), n_events, 1025)) + trace_arr = np.zeros((len(channel_list), n_events, 2048)) + times_trace_arr = np.zeros((len(channel_list), n_events, 2048)) + snr_arr = np.zeros((len(channel_list), n_events)) + + run_no = [] + times = [] + event_ids = [] + + csr = csr() + csr.begin(debug=False) + + for idx, event in enumerate(tableReader.reader.run()): + station = event.get_station() + time = station.get_station_time().datetime64 + times.append(time) + run_no.append(event.get_run_number()) + + csr.run(evt=event, station=station, det=None, stored_noise=False) + for i_ch, ch in enumerate(channel_list): + channel = station.get_channel(ch) + + times_ch = channel.get_times() + times_trace_arr[i_ch, idx, :] = times_ch + + snr_dict = channel.get_parameter(chp.SNR) + snr_peak = snr_dict["peak_amplitude"] + snr_arr[i_ch, idx] = snr_peak + + spec = channel.get_frequency_spectrum() + spec_arr[i_ch, idx, :] = np.abs(spec) + + trace = channel.get_trace() + trace_arr[i_ch, idx, :] = trace + + if freqs is None and idx == 0 and i_ch == 0: + freqs = channel.get_frequencies() + + spec_batches.append(spec_arr) + trace_batches.append(trace_arr) + times_trace_batches.append(times_trace_arr) + snr_batches.append(snr_arr) + run_no_all.extend(run_no) + times_all.extend(times) + + spec_arr = np.concatenate(spec_batches, axis=1) + trace_arr = np.concatenate(trace_batches, axis=1) + times_trace_arr = np.concatenate(times_trace_batches, axis=1) + snr_arr = np.concatenate(snr_batches, axis=1) + + run_no = np.array(run_no_all) + times = np.array(times_all) + + for key, value in event_info.items(): + event_info[key] = np.array(value) + + inf_mask = np.isinf(event_info["triggerTime"]) + event_info["triggerTime"][inf_mask] = event_info["readoutTime"][inf_mask] + print(f"Found {np.sum(inf_mask)} events with inf trigger time (of {len(inf_mask)} events)") + + #print(f"n_events read: {spec_arr.shape[1]}, n_events_total: {n_events_total}") + #print(f"freqs shape: {freqs.shape}, spec_arr shape: {spec_arr.shape}, trace_arr shape: {trace_arr.shape}, times_trace_arr shape: {times_trace_arr.shape}, snr_arr shape: {snr_arr.shape}") + #print(f"freqs {freqs}") + #print(f"trigger types: {np.unique(event_info['triggerType'])}") if __name__ == "__main__": argparser = ArgumentParser(description="RNO-G Science Verification Analysis") - argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze") + argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze, e.g --station_id 14") + argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") run_selection = argparser.add_mutually_exclusive_group(required=True) - run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", - help="Run number(s) to analyze. Each run number should be given explicitly separated by a space, e.g. --runs 1001 1002 1005") + run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", + help="Run number(s) to analyze. Each run number should be given explicitly separated by a space, e.g. --runs 1001 1002 1005") run_selection.add_argument("--run_range", nargs=2, type=int, metavar=("START_RUN", "END_RUN"), - help="Range of run numbers to analyze (inclusive). Provide start and end run numbers separated by a space, e.g. --run_range 1000 1050") - run_selection.add_argument("--time_range", nargs=2, type=int, metavar=("START_TIME", "END_TIME"), - help="Range of time numbers to analyze (inclusive). Provide start and end time numbers separated by a space, e.g. --time_range 2024-07-15 2024-09-30") + help="Range of run numbers to analyze (inclusive). Provide start and end run numbers separated by a space, e.g. --run_range 1000 1050") + run_selection.add_argument("--time_range", nargs=2, type=str, metavar=("START_DATE", "END_DATE"), + help="Date range to analyze (inclusive). Provide start and end dates separated by a space in YYYY-MM-DD format, e.g. --time_range 2024-07-15 2024-09-30") + surface_channels, deep_channels, upward_channels, downward_channels, all_channels = station_channels(argparser.parse_args().station_id) + args = argparser.parse_args() + + station_id = args.station_id + backend = args.backend + if backend not in ["pyroot", "uproot"]: + raise ValueError("Backend should be either 'pyroot' or 'uproot'") + + if args.runs: + run_numbers = args.runs + elif args.run_range: + run_numbers = list(range(args.run_range[0], args.run_range[1] + 1)) + elif args.time_range: + start_time = args.time_range[0] + stop_time = args.time_range[1] + runtable = read_rnog_runtable(station_id, start_time, stop_time) + run_numbers = runtable['run_number'].tolist() + + read_rnog_data(station_id, run_numbers, backend=backend) + + From 1139667a864dfda9208b5ac0ee0c127fe939c0c6 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Tue, 6 Jan 2026 20:59:30 +0100 Subject: [PATCH 03/12] spectra plots --- .../science_verification_analysis.py | 64 +++++++++++++++++-- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 6ea83bb..70a2ca4 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -17,7 +17,7 @@ import NuRadioReco.framework.trigger from NuRadioReco.framework.parameters import channelParameters as chp from NuRadioReco.modules.RNO_G.dataProviderRNOG import dataProviderRNOG - +from cycler import cycler import matplotlib as mpl #from cmap import Colormap from collections import defaultdict @@ -41,8 +41,18 @@ STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST # Matplotlib settings -# cm = Colormap('tol:muted') -# colors = cm.colors + +CALM_DISTINCT = [ + "#4477AA", # blue + "#66CCEE", # cyan + "#228833", # green + "#CCBB44", # olive/yellow + "#EE6677", # soft red + "#AA3377", # purple + "#BBBBBB", # gray + "#332288", # deep indigo +] + mpl.rcParams.update({ 'font.family': 'sans-serif', @@ -54,7 +64,7 @@ 'axes.linewidth': 1.2, 'axes.grid': False, - # 'axes.prop_cycle': cycler(colors=colors), + 'axes.prop_cycle': cycler('color', CALM_DISTINCT), 'xtick.labelsize': 17, 'ytick.labelsize': 17, @@ -156,7 +166,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): tableReader.begin(files=batch.tolist(), det=None, reader_kwargs={"overwrite_sampling_rate":2.4*units.GHz, - "convert_to_voltage":False, + "convert_to_voltage":True, "apply_baseline_correction":"auto", "mattak_kwargs":{"backend":backend}}) event_info_tmp = tableReader.reader.get_events_information( @@ -230,11 +240,49 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): event_info["triggerTime"][inf_mask] = event_info["readoutTime"][inf_mask] print(f"Found {np.sum(inf_mask)} events with inf trigger time (of {len(inf_mask)} events)") + return spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info + #print(f"n_events read: {spec_arr.shape[1]}, n_events_total: {n_events_total}") #print(f"freqs shape: {freqs.shape}, spec_arr shape: {spec_arr.shape}, trace_arr shape: {trace_arr.shape}, times_trace_arr shape: {times_trace_arr.shape}, snr_arr shape: {snr_arr.shape}") #print(f"freqs {freqs}") #print(f"trigger types: {np.unique(event_info['triggerType'])}") +def plot_time_integrated_surface_spectra_unnormalized(spec_arr, freqs, upward_channels, downward_channels): + '''Plot time-integrated surface channel spectra.''' + plt.figure(figsize=(10, 6)) + for ch in upward_channels: + spec_mean = np.mean(spec_arr[ch, :, :], axis=0) + plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (up)', linestyle='-') + for ch in downward_channels: + spec_mean = np.mean(spec_arr[ch, :, :], axis=0) + plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (down)', linestyle='--') + plt.xlabel('Frequency [MHz]') + plt.xlim(0, 800) + plt.ylabel('Mean Amplitude Spectrum [V/GHz]') + plt.title('Time-Integrated Spectrum of Surface Channels') + plt.legend() + plt.grid() + plt.tight_layout() + plt.savefig('time_integrated_surface_spectra_unnormalized.png') + +def plot_time_integrated_surface_spectra_normalized(norm_spec_arr, freqs, upward_channels, downward_channels): + '''Plot time-integrated surface channel spectra.''' + plt.figure(figsize=(10, 6)) + for ch in upward_channels: + spec_mean = np.mean(norm_spec_arr[ch, :, :], axis=0) + plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (up)', linestyle='-') + for ch in downward_channels: + spec_mean = np.mean(norm_spec_arr[ch, :, :], axis=0) + plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (down)', linestyle='--') + plt.xlabel('Frequency [MHz]') + plt.xlim(0, 800) + plt.ylabel('Mean Amplitude Spectrum [V/GHz]') + plt.title('Time-Integrated Spectrum of Surface Channels') + plt.legend() + plt.grid() + plt.tight_layout() + plt.savefig('time_integrated_surface_spectra_normalized.png') + if __name__ == "__main__": argparser = ArgumentParser(description="RNO-G Science Verification Analysis") @@ -269,7 +317,9 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): runtable = read_rnog_runtable(station_id, start_time, stop_time) run_numbers = runtable['run_number'].tolist() - read_rnog_data(station_id, run_numbers, backend=backend) + spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) + norm_spec_arr, scale_factors = normalize_surface_channels_to_down_reference(spec_arr, freqs, downward_channels, upward_channels) - + plot_time_integrated_surface_spectra_unnormalized(spec_arr, freqs, upward_channels, downward_channels) + plot_time_integrated_surface_spectra_normalized(norm_spec_arr, freqs, upward_channels, downward_channels) From c475376aaff01eadea062f0cb211eea52a58baf4 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Wed, 7 Jan 2026 16:53:56 +0100 Subject: [PATCH 04/12] improvements to spectra plots --- .../science_verification_analysis.py | 81 +++++++++++++++---- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 70a2ca4..260d16d 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -21,6 +21,8 @@ import matplotlib as mpl #from cmap import Colormap from collections import defaultdict +from matplotlib.lines import Line2D +from matplotlib.patches import Patch ''' This module can be used to test if the stations are working as expected. @@ -83,7 +85,7 @@ 'legend.fontsize': 14, 'legend.frameon': False, - 'legend.handlelength': 2.2, + 'legend.handlelength': 1, 'legend.borderpad': 0.3, 'figure.dpi': 120, @@ -161,11 +163,11 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): from NuRadioReco.modules.channelSignalReconstructor import channelSignalReconstructor as csr - for batch in np.array_split(np.array(file_list), n_batches): + for batch in tqdm(np.array_split(np.array(file_list), n_batches), desc="Reading batches", unit="batch"): tableReader = dataProviderRNOG() tableReader.begin(files=batch.tolist(), det=None, - reader_kwargs={"overwrite_sampling_rate":2.4*units.GHz, + reader_kwargs={"overwrite_sampling_rate":2.4, "convert_to_voltage":True, "apply_baseline_correction":"auto", "mattak_kwargs":{"backend":backend}}) @@ -192,7 +194,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): csr = csr() csr.begin(debug=False) - for idx, event in enumerate(tableReader.reader.run()): + for idx, event in enumerate(tqdm(tableReader.reader.run(), total=n_events, desc="Events", unit="evt", leave=False)): station = event.get_station() time = station.get_station_time().datetime64 times.append(time) @@ -247,7 +249,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): #print(f"freqs {freqs}") #print(f"trigger types: {np.unique(event_info['triggerType'])}") -def plot_time_integrated_surface_spectra_unnormalized(spec_arr, freqs, upward_channels, downward_channels): +def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location): '''Plot time-integrated surface channel spectra.''' plt.figure(figsize=(10, 6)) for ch in upward_channels: @@ -256,16 +258,22 @@ def plot_time_integrated_surface_spectra_unnormalized(spec_arr, freqs, upward_ch for ch in downward_channels: spec_mean = np.mean(spec_arr[ch, :, :], axis=0) plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (down)', linestyle='--') + plt.xlabel('Frequency [MHz]') plt.xlim(0, 800) plt.ylabel('Mean Amplitude Spectrum [V/GHz]') plt.title('Time-Integrated Spectrum of Surface Channels') - plt.legend() + plt.legend(loc="upper right", + frameon=True, + fancybox=True, + framealpha=0.9, + edgecolor="black") plt.grid() plt.tight_layout() - plt.savefig('time_integrated_surface_spectra_unnormalized.png') + plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_unnormalized_{station_id}.pdf")) + -def plot_time_integrated_surface_spectra_normalized(norm_spec_arr, freqs, upward_channels, downward_channels): +def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location): '''Plot time-integrated surface channel spectra.''' plt.figure(figsize=(10, 6)) for ch in upward_channels: @@ -274,20 +282,62 @@ def plot_time_integrated_surface_spectra_normalized(norm_spec_arr, freqs, upward for ch in downward_channels: spec_mean = np.mean(norm_spec_arr[ch, :, :], axis=0) plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (down)', linestyle='--') + + periodiccolor2 = "mediumseagreen" + excesscolor = 'grey' + wb_color = "mediumvioletred" + normcolor = "steelblue" + + plt.axvspan(80, 120, color=excesscolor, alpha=0.3, label="_nolegend_") + plt.axvline(x=0.402e3, color=wb_color, linestyle='--', linewidth=1.2, label="_nolegend_", alpha=0.7) + plt.axvspan(0.278e3, 0.285e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.482e3, 0.485e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.240e3, 0.272e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.358e3, 0.378e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.136e3, 0.139e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.151e3, 0.157e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.125e3, 0.127e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(500, 650, color=normcolor, alpha=0.3, label="_nolegend_") + plt.xlabel('Frequency [MHz]') plt.xlim(0, 800) plt.ylabel('Mean Amplitude Spectrum [V/GHz]') plt.title('Time-Integrated Spectrum of Surface Channels') - plt.legend() + + ax = plt.gca() + line_legend = ax.legend( + loc="upper right", + frameon=True, + fancybox=True, + framealpha=0.9, + edgecolor="black") + + annotation_handles = [ + Patch(facecolor=excesscolor, alpha=0.3, label="Galactic Excess"), + Line2D([0], [0], color=wb_color, linestyle="--", linewidth=1.2, label="Weather Balloon"), + Patch(facecolor=periodiccolor2, alpha=0.3, label="Periodic Signal"), + Patch(facecolor=normcolor, alpha=0.3, label="Normalization Region"),] + + annotation_legend = ax.legend( + handles=annotation_handles, + loc="lower right", + frameon=True, + fancybox=True, + framealpha=0.9, + edgecolor="black") + + ax.add_artist(line_legend) + plt.grid() plt.tight_layout() - plt.savefig('time_integrated_surface_spectra_normalized.png') + plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_normalized_{station_id}.pdf")) if __name__ == "__main__": argparser = ArgumentParser(description="RNO-G Science Verification Analysis") argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze, e.g --station_id 14") argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") + argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") run_selection = argparser.add_mutually_exclusive_group(required=True) run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", @@ -315,11 +365,14 @@ def plot_time_integrated_surface_spectra_normalized(norm_spec_arr, freqs, upward start_time = args.time_range[0] stop_time = args.time_range[1] runtable = read_rnog_runtable(station_id, start_time, stop_time) - run_numbers = runtable['run_number'].tolist() + run_numbers = runtable['run'].tolist() + + # Create save location directory if it doesn't exist + save_location = os.path.expanduser(args.save_location) + os.makedirs(save_location, exist_ok=True) spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) norm_spec_arr, scale_factors = normalize_surface_channels_to_down_reference(spec_arr, freqs, downward_channels, upward_channels) - plot_time_integrated_surface_spectra_unnormalized(spec_arr, freqs, upward_channels, downward_channels) - plot_time_integrated_surface_spectra_normalized(norm_spec_arr, freqs, upward_channels, downward_channels) - + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location) + plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location) \ No newline at end of file From e225d194720153a026da10fbbced9b4f911bbc5e Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Wed, 7 Jan 2026 22:20:17 +0100 Subject: [PATCH 05/12] add sign test for spectra - forgot to slect FORCE trigger data, fix it --- .../science_verification_analysis.py | 244 ++++++++++++++++-- 1 file changed, 221 insertions(+), 23 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 260d16d..80f173f 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -32,7 +32,10 @@ SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19 , 20] DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 21, 22, 23] UPWARD_CHANNELS_LIST = [13, 16, 19] -DOWNWARD_CHANNELS_LIST = [12, 14, 15, 17, 18] +DOWNWARD_CHANNELS_LIST = [12, 14, 15, 17, 18, 20] +VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 22, 23] +HPOL_LIST = [4, 8, 11, 21] +PHASED_ARRAY_LIST = [0, 1, 2, 3] ALL_CHANNELS_LIST = SURFACE_CHANNELS_LIST + DEEP_CHANNELS_LIST # Channel mapping for Station 14: @@ -40,6 +43,9 @@ STATION_14_DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 22, 23] STATION_14_UPWARD_CHANNELS_LIST = [13, 15, 16, 18] STATION_14_DOWNWARD_CHANNELS_LIST = [12, 14, 17, 19] +STATION_14_VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 20, 22, 23] +STATION_14_HPOL_LIST = [4, 8, 11, 21] +STATION_14_PHASED_ARRAY_LIST = [0, 1, 2, 3] STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST # Matplotlib settings @@ -110,13 +116,13 @@ def convert_events_information(event_info, convert_to_arrays=True): # Find the correct channel mapping for a given station def station_channels(station_id: int): if station_id == 14: - return STATION_14_SURFACE_CHANNELS_LIST, STATION_14_DEEP_CHANNELS_LIST, STATION_14_UPWARD_CHANNELS_LIST, STATION_14_DOWNWARD_CHANNELS_LIST, STATION_14_ALL_CHANNELS_LIST + return STATION_14_SURFACE_CHANNELS_LIST, STATION_14_DEEP_CHANNELS_LIST, STATION_14_UPWARD_CHANNELS_LIST, STATION_14_DOWNWARD_CHANNELS_LIST, STATION_14_VPOL_LIST, STATION_14_HPOL_LIST, STATION_14_PHASED_ARRAY_LIST, STATION_14_ALL_CHANNELS_LIST else: - return SURFACE_CHANNELS_LIST, DEEP_CHANNELS_LIST, UPWARD_CHANNELS_LIST, DOWNWARD_CHANNELS_LIST, ALL_CHANNELS_LIST + return SURFACE_CHANNELS_LIST, DEEP_CHANNELS_LIST, UPWARD_CHANNELS_LIST, DOWNWARD_CHANNELS_LIST, VPOL_LIST, HPOL_LIST, PHASED_ARRAY_LIST, ALL_CHANNELS_LIST # Normalize surface channel spectra to the average of the down channels in the reference frequency band (500-650 MHz) -def normalize_surface_channels_to_down_reference(spec_arr, frequencies, down_channels, up_channels, f_low=500*units.MHz, f_high=650*units.MHz): - '''Normalize all surface channel spectra to the average of the down channels in the reference frequency band.''' +def normalize_channels(spec_arr, frequencies, down_channels, up_channels, f_low=500*units.MHz, f_high=650*units.MHz): + '''Normalize all surface channel spectra to the average of the down channels in the reference frequency band. The reference band is 500-650 MHz by default. This will be replaced by lab measurements in the future.''' # spec_arr shape: (n_channels, n_events, n_freqs) freq_mask = (frequencies >= f_low) & (frequencies <= f_high) spec_arr = np.copy(spec_arr) @@ -145,7 +151,23 @@ def read_rnog_runtable(station_id: int, start_time: str, stop_time: str): def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): '''Read RNO-G data for a given station and list of run numbers using the specified backend.''' - file_list = [ f"/pnfs/ifh.de/acs/radio/diskonly/data/inbox/station14/run{run_id}/combined.root" for run_id in run_numbers] + file_list = [] + valid_run_numbers = [] + missing_runs = [] + + for run_id in run_numbers: + path = f"/pnfs/ifh.de/acs/radio/diskonly/data/inbox/station{station_id}/run{run_id}/combined.root" + if os.path.isfile(path): + file_list.append(path) + valid_run_numbers.append(run_id) + else: + missing_runs.append(run_id) + + if missing_runs: + print(f"!!!! Skipping {len(missing_runs)} missing runs: {missing_runs} !!!!") + if not file_list: + raise FileNotFoundError("No combined.root files found for selected runs.") + n_files = len(file_list) n_batches = n_files // 100 + 1 print(f"Reading {n_files} files in {n_batches} batches using {backend} backend.") @@ -162,6 +184,8 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): freqs = None from NuRadioReco.modules.channelSignalReconstructor import channelSignalReconstructor as csr + csr = csr() + csr.begin(debug=False) for batch in tqdm(np.array_split(np.array(file_list), n_batches), desc="Reading batches", unit="batch"): tableReader = dataProviderRNOG() @@ -190,9 +214,6 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): run_no = [] times = [] event_ids = [] - - csr = csr() - csr.begin(debug=False) for idx, event in enumerate(tqdm(tableReader.reader.run(), total=n_events, desc="Events", unit="evt", leave=False)): station = event.get_station() @@ -249,7 +270,117 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): #print(f"freqs {freqs}") #print(f"trigger types: {np.unique(event_info['triggerType'])}") -def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location): +def find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels, freq_min, freq_max): + '''Find normalized amplitude ratio of upward vs downward channels in a given frequency band. This will be replaced by lab measurements in the future.''' + freq_mask = (freqs >= freq_min) & (freqs <= freq_max) + ratio_list = [] + spectrum_list = [] + + ref_specs = np.stack([norm_spec_arr[ch][:, freq_mask] for ch in reference_channels], axis=0) + ref_spectra_per_ch = np.median(ref_specs, axis=2) # (n_ref_ch, n_events) + ref_spec = np.median(ref_spectra_per_ch, axis=0) # (n_events,) + + for ch in upward_channels + downward_channels: + masked_spec = norm_spec_arr[ch][:, freq_mask] # (n_events, n_freqs) + masked_spec_med = np.median(masked_spec, axis=1) # (n_events,) + + spec_ratio = masked_spec_med / ref_spec + ratio_list.append(spec_ratio) + spectrum_list.append(masked_spec_med) + + return ratio_list, spectrum_list + + +def find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, upward_channels, downward_channels): + '''Find normalized amplitude ratio of upward vs downward channels in specific frequency bands. Backgrouns were defined using wiki page: https://radio.uchicago.edu/wiki/index.php/Features_observed_in_data''' + + # Galactic excess frequency band + if station_id == 14: + reference_channels_gal = [12, 14, 19] + else: + reference_channels_gal = downward_channels + freq_min_gal = 80 * units.MHz + freq_max_gal = 120 * units.MHz + ratio_arr_gal, _ = find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels_gal, freq_min_gal, freq_max_gal) + + # 360-380 MHz frequency band + reference_channels_360 = downward_channels + freq_min_360 = 360 * units.MHz + freq_max_360 = 380 * units.MHz + ratio_arr_360, _ = find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels_360, freq_min_360, freq_max_360) + + # 482-485 MHz frequency band + reference_channels_482 = downward_channels + freq_min_482 = 482 * units.MHz + freq_max_485 = 485 * units.MHz + ratio_arr_482, _ = find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels_482, freq_min_482, freq_max_485) + + return ratio_arr_gal, ratio_arr_360, ratio_arr_482 + +def excess_info_from_ratio(ratio_arr, alpha=0.01): + '''Calculate excess information from amplitude ratios in frequency bands.''' + from scipy.stats import binomtest + + log_ratio = np.log10(np.asarray(ratio_arr)) + median_log_ratio = np.median(log_ratio) + mean_log_ratio = np.mean(log_ratio) + frac_pos = np.mean(log_ratio > 0)/np.mean(log_ratio < 0) if np.mean(log_ratio < 0) != 0 else np.inf + + k = np.sum(log_ratio > 0) + n = int(log_ratio.size) + pval = binomtest(k, n, p=0.5, alternative="greater").pvalue + print(f"Binomial test: k={k}, n={n}, pval={pval:.3e}") + + if pval < alpha: + validation = "EXCESS (sign-test)" + else: + validation = "NO EXCESS (sign-test)" + + return { + "median_log_ratio": median_log_ratio, + "mean_log_ratio": mean_log_ratio, + "frac_pos": frac_pos, + "pval": pval, + "validation": validation + } + +def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, alpha=0.01): + '''Validate excess information from amplitude ratios in frequency bands.''' + gal_info = excess_info_from_ratio(ratio_arr_gal, alpha) + freq360_info = excess_info_from_ratio(ratio_arr_360, alpha) + freq482_info = excess_info_from_ratio(ratio_arr_482, alpha) + + return { + "galactic_excess": gal_info, + "freq_360_380MHz": freq360_info, + "freq_482_485MHz": freq482_info + } + +def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_order, save_location, station_id, run_label, bins=30): + fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) + bands = [("Galactic Excess (80–120 MHz)", ratio_arr_gal), + ("360–380 MHz", ratio_arr_360), + ("482–485 MHz", ratio_arr_482),] + + for ax, (title, ratio_list) in zip(axes, bands): + for ch, r in zip(channels_order, ratio_list): + r = np.asarray(r) + ax.hist(np.log10(r), bins=bins, histtype="step", linewidth=1.3, label=f"Ch {ch}", alpha=0.8) + + ax.set_title(title) + ax.set_xlabel("log10 Amplitude Ratio") + ax.grid(True, alpha=0.3) + + axes[0].set_ylabel("Counts") + + handles, labels = axes[0].get_legend_handles_labels() + fig.legend(handles, labels, loc="upper right", ncol=8, frameon=True) + plt.tight_layout() + fig.savefig(os.path.join(save_location,f"debug_amplitude_ratios_{station_id}_{run_label}.pdf",)) + plt.close(fig) + + +def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location, run_label): '''Plot time-integrated surface channel spectra.''' plt.figure(figsize=(10, 6)) for ch in upward_channels: @@ -261,7 +392,7 @@ def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freq plt.xlabel('Frequency [MHz]') plt.xlim(0, 800) - plt.ylabel('Mean Amplitude Spectrum [V/GHz]') + plt.ylabel('Amplitude Spectrum [V/GHz]') plt.title('Time-Integrated Spectrum of Surface Channels') plt.legend(loc="upper right", frameon=True, @@ -270,10 +401,10 @@ def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freq edgecolor="black") plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_unnormalized_{station_id}.pdf")) + plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_unnormalized_{station_id}_{run_label}.pdf")) -def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location): +def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location, run_label): '''Plot time-integrated surface channel spectra.''' plt.figure(figsize=(10, 6)) for ch in upward_channels: @@ -301,7 +432,7 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f plt.xlabel('Frequency [MHz]') plt.xlim(0, 800) - plt.ylabel('Mean Amplitude Spectrum [V/GHz]') + plt.ylabel('Amplitude Spectrum [V/GHz]') plt.title('Time-Integrated Spectrum of Surface Channels') ax = plt.gca() @@ -330,7 +461,30 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_normalized_{station_id}.pdf")) + plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_normalized_{station_id}_{run_label}.pdf")) + +def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels, hpol_channels, save_location, run_label): + '''Plot time-integrated deep channel spectra.''' + plt.figure(figsize=(10, 6)) + for ch in vpol_channels: + spec_mean = np.mean(spec_arr[ch, :, :], axis=0) + plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (VPOL)', linestyle='-') + for ch in hpol_channels: + spec_mean = np.mean(spec_arr[ch, :, :], axis=0) + plt.plot(freqs / units.MHz, spec_mean, label=f'Ch {ch} (HPOL)', linestyle='--') + + plt.xlabel('Frequency [MHz]') + plt.xlim(0, 800) + plt.ylabel('Amplitude Spectrum [V/GHz]') + plt.title('Time-Integrated Spectrum of Deep Channels') + plt.legend(loc="upper right", + frameon=True, + fancybox=True, + framealpha=0.9, + edgecolor="black") + plt.grid() + plt.tight_layout() + plt.savefig(os.path.join(save_location, f"time_integrated_deep_spectra_unnormalized_{station_id}_{run_label}.pdf")) if __name__ == "__main__": @@ -338,7 +492,9 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze, e.g --station_id 14") argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") - + argparser.add_argument("-ex", "--exclude-runs", nargs="+", type=int, default=[], metavar="RUN", help="Run number(s) to exclude, e.g. --exclude-runs 1005 1010") + argparser.add_argument("--debug_plot", action="store_true", help="If set, will create debug plots for amplitude ratios in frequency bands.") + run_selection = argparser.add_mutually_exclusive_group(required=True) run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", help="Run number(s) to analyze. Each run number should be given explicitly separated by a space, e.g. --runs 1001 1002 1005") @@ -348,7 +504,7 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f help="Date range to analyze (inclusive). Provide start and end dates separated by a space in YYYY-MM-DD format, e.g. --time_range 2024-07-15 2024-09-30") - surface_channels, deep_channels, upward_channels, downward_channels, all_channels = station_channels(argparser.parse_args().station_id) + surface_channels, deep_channels, upward_channels, downward_channels, vpol_channels, hpol_channels, phased_array_channels, all_channels = station_channels(argparser.parse_args().station_id) args = argparser.parse_args() @@ -362,17 +518,59 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f elif args.run_range: run_numbers = list(range(args.run_range[0], args.run_range[1] + 1)) elif args.time_range: - start_time = args.time_range[0] - stop_time = args.time_range[1] + start_time, stop_time = args.time_range runtable = read_rnog_runtable(station_id, start_time, stop_time) - run_numbers = runtable['run'].tolist() + run_numbers = runtable["run"].tolist() + else: + raise ValueError("No run selection provided") + + # Exclude specified runs + if args.exclude_runs: + exclude_set = set(args.exclude_runs) + run_numbers = [r for r in run_numbers if r not in exclude_set] + + run_numbers = sorted(run_numbers) + first_run = run_numbers[0] + last_run = run_numbers[-1] + + if first_run == last_run: + run_label = f"run_{first_run}" + else: + run_label = f"runs_{first_run}_{last_run}" + # Create save location directory if it doesn't exist save_location = os.path.expanduser(args.save_location) os.makedirs(save_location, exist_ok=True) spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) - norm_spec_arr, scale_factors = normalize_surface_channels_to_down_reference(spec_arr, freqs, downward_channels, upward_channels) + norm_spec_arr, scale_factors = normalize_channels(spec_arr, freqs, downward_channels, upward_channels) + + ratio_arr_gal, ratio_arr_360, ratio_arr_482 = find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, upward_channels, downward_channels) + channels_order = upward_channels + downward_channels + ch_to_idx = {ch: i for i, ch in enumerate(channels_order)} + print(ch_to_idx) + + for ch in surface_channels: + print(f"\nAnalyzing channel {ch}") + i = ch_to_idx[ch] + print(i) + + validation_results = validate_excess_in_bands( + ratio_arr_gal[i], + ratio_arr_360[i], + ratio_arr_482[i], + alpha=0.01 + ) + + for band, results in validation_results.items(): + print(f"=== {band} ===") + for key, value in results.items(): + print(f"{key}: {value}") + print("==============") + + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location, run_label) + plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location, run_label) - plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location) - plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location) \ No newline at end of file + if args.debug_plot: + debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) \ No newline at end of file From a38acd86a55fd881547ede890bab7ad1b8caf91c Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Thu, 8 Jan 2026 14:08:38 +0100 Subject: [PATCH 06/12] expand sign test with binomial CI and select FORCE trigger for spectrum analysis --- .../science_verification_analysis.py | 96 ++++++++++++------- 1 file changed, 62 insertions(+), 34 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 80f173f..1e9e256 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -50,18 +50,21 @@ # Matplotlib settings -CALM_DISTINCT = [ - "#4477AA", # blue - "#66CCEE", # cyan - "#228833", # green - "#CCBB44", # olive/yellow - "#EE6677", # soft red - "#AA3377", # purple - "#BBBBBB", # gray - "#332288", # deep indigo +COLORS = [ + "#1F77B4", # blue + "#FF4B4B", # bright red + "#2CA02C", # green + "#9467BD", # purple + "#17BECF", # cyan + "#D62728", # deep red + "#8C564B", # warm brown + "#1FA187", # teal-green + "#4E79A7", # steel blue + "#F76E5C", # coral + "#A55194", # magenta-purple + "#3D4ED7", # indigo ] - mpl.rcParams.update({ 'font.family': 'sans-serif', 'font.sans-serif': ['Helvetica', 'Arial', 'DejaVu Sans'], @@ -72,7 +75,7 @@ 'axes.linewidth': 1.2, 'axes.grid': False, - 'axes.prop_cycle': cycler('color', CALM_DISTINCT), + 'axes.prop_cycle': cycler('color', COLORS), 'xtick.labelsize': 17, 'ytick.labelsize': 17, @@ -113,6 +116,12 @@ def convert_events_information(event_info, convert_to_arrays=True): return data +def choose_trigger_type(event_info, trigger_type: str): + '''Choose events based on trigger type.''' + mask = event_info["triggerType"] == trigger_type + + return mask + # Find the correct channel mapping for a given station def station_channels(station_id: int): if station_id == 14: @@ -317,7 +326,7 @@ def find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, return ratio_arr_gal, ratio_arr_360, ratio_arr_482 -def excess_info_from_ratio(ratio_arr, alpha=0.01): +def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): '''Calculate excess information from amplitude ratios in frequency bands.''' from scipy.stats import binomtest @@ -328,13 +337,22 @@ def excess_info_from_ratio(ratio_arr, alpha=0.01): k = np.sum(log_ratio > 0) n = int(log_ratio.size) - pval = binomtest(k, n, p=0.5, alternative="greater").pvalue - print(f"Binomial test: k={k}, n={n}, pval={pval:.3e}") + result = binomtest(k, n, p=0.5, alternative="greater") + pval = result.pvalue + statistic = result.statistic + confidence_interval = result.proportion_ci(confidence_level=0.95) - if pval < alpha: - validation = "EXCESS (sign-test)" - else: + print(f"Binomial test for frequency range {freq_range_str}: k={k}, n={n}, pval={pval:.3e}, statistic={statistic:.3f}, 95% CI={confidence_interval}") + + if pval > alpha: validation = "NO EXCESS (sign-test)" + else: + if confidence_interval.low > 0.7: + validation = "STRONG EXCESS (sign-test)" + elif confidence_interval.low > 0.55: + validation = "MODERATE EXCESS (sign-test)" + else: + validation = "WEAK EXCESS (sign-test)" return { "median_log_ratio": median_log_ratio, @@ -346,9 +364,9 @@ def excess_info_from_ratio(ratio_arr, alpha=0.01): def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, alpha=0.01): '''Validate excess information from amplitude ratios in frequency bands.''' - gal_info = excess_info_from_ratio(ratio_arr_gal, alpha) - freq360_info = excess_info_from_ratio(ratio_arr_360, alpha) - freq482_info = excess_info_from_ratio(ratio_arr_482, alpha) + gal_info = excess_info_from_ratio(ratio_arr_gal, alpha, "80–120 MHz") + freq360_info = excess_info_from_ratio(ratio_arr_360, alpha, "360–380 MHz") + freq482_info = excess_info_from_ratio(ratio_arr_482, alpha, "482–485 MHz") return { "galactic_excess": gal_info, @@ -358,9 +376,9 @@ def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, alpha= def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_order, save_location, station_id, run_label, bins=30): fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) - bands = [("Galactic Excess (80–120 MHz)", ratio_arr_gal), - ("360–380 MHz", ratio_arr_360), - ("482–485 MHz", ratio_arr_482),] + bands = [("80–120 MHz (FORCE Trigger)", ratio_arr_gal), + ("360–380 MHz (FORCE Trigger)", ratio_arr_360), + ("482–485 MHz (FORCE Trigger)", ratio_arr_482),] for ax, (title, ratio_list) in zip(axes, bands): for ch, r in zip(channels_order, ratio_list): @@ -368,7 +386,7 @@ def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_orde ax.hist(np.log10(r), bins=bins, histtype="step", linewidth=1.3, label=f"Ch {ch}", alpha=0.8) ax.set_title(title) - ax.set_xlabel("log10 Amplitude Ratio") + ax.set_xlabel("log10(R)") ax.grid(True, alpha=0.3) axes[0].set_ylabel("Counts") @@ -376,7 +394,7 @@ def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_orde handles, labels = axes[0].get_legend_handles_labels() fig.legend(handles, labels, loc="upper right", ncol=8, frameon=True) plt.tight_layout() - fig.savefig(os.path.join(save_location,f"debug_amplitude_ratios_{station_id}_{run_label}.pdf",)) + fig.savefig(os.path.join(save_location,f"debug_amplitude_ratios_force_trigger_{station_id}_{run_label}.pdf",)) plt.close(fig) @@ -393,7 +411,7 @@ def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freq plt.xlabel('Frequency [MHz]') plt.xlim(0, 800) plt.ylabel('Amplitude Spectrum [V/GHz]') - plt.title('Time-Integrated Spectrum of Surface Channels') + plt.title('Time-Integrated Spectrum of Surface Channels (FORCE Trigger)') plt.legend(loc="upper right", frameon=True, fancybox=True, @@ -401,7 +419,7 @@ def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freq edgecolor="black") plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_unnormalized_{station_id}_{run_label}.pdf")) + plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location, run_label): @@ -433,7 +451,7 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f plt.xlabel('Frequency [MHz]') plt.xlim(0, 800) plt.ylabel('Amplitude Spectrum [V/GHz]') - plt.title('Time-Integrated Spectrum of Surface Channels') + plt.title('Time-Integrated Spectrum of Surface Channels (FORCE Trigger)') ax = plt.gca() line_legend = ax.legend( @@ -461,7 +479,7 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_normalized_{station_id}_{run_label}.pdf")) + plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_normalized_force_trigger_{station_id}_{run_label}.pdf")) def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels, hpol_channels, save_location, run_label): '''Plot time-integrated deep channel spectra.''' @@ -484,7 +502,7 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels edgecolor="black") plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_deep_spectra_unnormalized_{station_id}_{run_label}.pdf")) + plt.savefig(os.path.join(save_location, f"time_integrated_deep_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) if __name__ == "__main__": @@ -545,8 +563,18 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) norm_spec_arr, scale_factors = normalize_channels(spec_arr, freqs, downward_channels, upward_channels) + print(f"Event info trigger type: {event_info['triggerType']}, event info trigger type length: {len(event_info['triggerType'])}") + print(f"Spec arr shape: {spec_arr.shape}, Norm spec arr shape: {norm_spec_arr.shape}") + + force_mask = choose_trigger_type(event_info, "FORCE") + spec_arr_force = spec_arr[:, force_mask, :] + norm_spec_arr_force = norm_spec_arr[:, force_mask, :] + print(f"Number of FORCE trigger events: {spec_arr_force.shape[1]}") + + if len(spec_arr_force[1]) < 30: + print("!!!! Warning: Less than 30 FORCE-trigger events, results of the sign test may not be reliable. !!!!") - ratio_arr_gal, ratio_arr_360, ratio_arr_482 = find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, upward_channels, downward_channels) + ratio_arr_gal, ratio_arr_360, ratio_arr_482 = find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr_force, upward_channels, downward_channels) channels_order = upward_channels + downward_channels ch_to_idx = {ch: i for i, ch in enumerate(channels_order)} print(ch_to_idx) @@ -568,9 +596,9 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels for key, value in results.items(): print(f"{key}: {value}") print("==============") - - plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location, run_label) - plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location, run_label) + + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) + plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) if args.debug_plot: debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) \ No newline at end of file From 8f6a82131ab938acaa96eb6307b66e6cee402f1d Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Thu, 8 Jan 2026 22:18:58 +0100 Subject: [PATCH 07/12] spectrum test comlete, snr z score refrence calculation, plots and validation --- .../data_monitoring/expected_values.py | 256 ++++++++++++++++++ .../science_verification_analysis.py | 176 ++++++++++-- 2 files changed, 416 insertions(+), 16 deletions(-) create mode 100644 rnog_analysis_tools/data_monitoring/expected_values.py diff --git a/rnog_analysis_tools/data_monitoring/expected_values.py b/rnog_analysis_tools/data_monitoring/expected_values.py new file mode 100644 index 0000000..6c6f811 --- /dev/null +++ b/rnog_analysis_tools/data_monitoring/expected_values.py @@ -0,0 +1,256 @@ +from NuRadioReco.modules.io.RNO_G.readRNOGDataMattak import readRNOGData +import rnog_data.runtable as rt +from NuRadioReco.detector import detector +import logging +import os +import datetime +import numpy as np +from matplotlib import pyplot as plt +from NuRadioReco.utilities import units +from tqdm import tqdm +from argparse import ArgumentParser +import warnings +import pandas as pd +import NuRadioReco.framework.event +import NuRadioReco.framework.station +import NuRadioReco.framework.channel +import NuRadioReco.framework.trigger +from NuRadioReco.framework.parameters import channelParameters as chp +from NuRadioReco.modules.RNO_G.dataProviderRNOG import dataProviderRNOG +from cycler import cycler +import matplotlib as mpl +#from cmap import Colormap +from collections import defaultdict +from matplotlib.lines import Line2D +from matplotlib.patches import Patch +from science_verification_analysis import convert_events_information, choose_trigger_type, station_channels, read_rnog_runtable, read_rnog_data, calculate_statistics_log_snr, calculate_z_score_snr, outlier_flag, find_outlier_details, print_outlier_summary +import json +import matplotlib.dates as mdates +from datetime import timezone + + +'''This module can be used to find expected parameter values (SNR, glitching) for RNO-G stations from a known stable time period.''' + +# Channel mapping for the first seven RNO-G stations: +SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19 , 20] +DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 21, 22, 23] +UPWARD_CHANNELS_LIST = [13, 16, 19] +DOWNWARD_CHANNELS_LIST = [12, 14, 15, 17, 18, 20] +VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 22, 23] +HPOL_LIST = [4, 8, 11, 21] +PHASED_ARRAY_LIST = [0, 1, 2, 3] +ALL_CHANNELS_LIST = SURFACE_CHANNELS_LIST + DEEP_CHANNELS_LIST + +# Channel mapping for Station 14: +STATION_14_SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19] +STATION_14_DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 22, 23] +STATION_14_UPWARD_CHANNELS_LIST = [13, 15, 16, 18] +STATION_14_DOWNWARD_CHANNELS_LIST = [12, 14, 17, 19] +STATION_14_VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 20, 22, 23] +STATION_14_HPOL_LIST = [4, 8, 11, 21] +STATION_14_PHASED_ARRAY_LIST = [0, 1, 2, 3] +STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST + +# Matplotlib settings + +mpl.rcParams.update({ + 'font.family': 'sans-serif', + 'font.sans-serif': ['Helvetica', 'Arial', 'DejaVu Sans'], + 'font.size': 17, + + 'axes.labelsize': 17, + 'axes.titlesize': 18, + 'axes.linewidth': 1.2, + 'axes.grid': False, + + 'xtick.labelsize': 17, + 'ytick.labelsize': 17, + 'xtick.major.size': 6, + 'ytick.major.size': 6, + 'xtick.major.width': 1.2, + 'ytick.major.width': 1.2, + 'xtick.minor.size': 3, + 'ytick.minor.size': 3, + 'xtick.minor.visible': True, + 'ytick.minor.visible': True, + + 'lines.linewidth': 1.6, + 'lines.antialiased': True, + 'lines.markersize': 6, + + 'legend.fontsize': 14, + 'legend.frameon': False, + 'legend.handlelength': 1, + 'legend.borderpad': 0.3, + + 'figure.dpi': 120, + 'savefig.dpi': 300, + 'savefig.bbox': 'tight', +}) + +def find_k_value(z_score_log, channel_list, quantile=0.999): + '''Find the k-value corresponding to the given reference z-score array and quantile.''' + k_ch_list = {} + for ch in channel_list: + z_score_ch = z_score_log[ch] + k_ch = np.quantile(np.abs(z_score_ch), quantile) + k_ch_list[ch] = k_ch + return k_ch_list + +def save_k_values_json(k_values_log, station_id, save_location): + '''Save the k-values as a JSON file.''' + k_values_filename = f"station_{station_id}_k_ref_values_snr.json" + with open(k_values_filename, "w") as f: + json.dump(k_values_log, f, indent=4) + + print(f"K-values saved to {k_values_filename}") + +def choose_day_interval(times): + times = pd.to_datetime(times, utc=True) + total_days = (times.max() - times.min()).days + + if total_days < 10: + return 1 + elif total_days < 20: + return 2 + elif total_days < 40: + return 4 + elif total_days < 80: + return 7 + elif total_days < 150: + return 10 + elif total_days < 300: + return 15 + elif total_days < 600: + return 30 + else: + return 60 + +def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nrows=12,ncols=2,day_interval=None): + times = pd.to_datetime(times,utc=True) + channels = list(channels) + n_channels = len(channels) + + if day_interval is None: + day_interval = choose_day_interval(times) + + fig, axs = plt.subplots(nrows,ncols,figsize=(15,24),sharex=True) + axs = np.array(axs) + + for idx, ch in enumerate(channels): + r = idx//ncols + c = idx%ncols + ax = axs[r,c] + good_mask = ~flag[ch] + ax.scatter(times[good_mask], np.log10(snr_arr[ch][good_mask]), s=8,alpha=0.25, color="gray") + zex = np.abs(z_log[ch]) - k_list[ch] + zex = np.clip(zex,0,None) + sc = ax.scatter(times[flag[ch]], np.log10(snr_arr[ch][flag[ch]]), s=8,c=zex[flag[ch]], cmap="Reds") + cax = ax.inset_axes([1.02,0.1,0.05,0.8]) + plt.colorbar(sc,cax=cax) + ax.grid(alpha=0.4) + ax.text(0.85, 0.95, f"Ch {ch}", transform = ax.transAxes, ha = "left",va = "top", bbox = dict(boxstyle = "round, pad = 0.25", facecolor = "white", alpha = 0.8)) + + for idx in range(n_channels, nrows*ncols): + r = idx//ncols + c = idx%ncols + axs[r, c].set_visible(False) + + red = plt.cm.Reds(0.6) + legend_handles = [Line2D([0],[0],marker="o",color="none",markeredgecolor="gray",markerfacecolor="gray",markersize=6,label=r"$|z|\leq k$"), + Line2D([0],[0],marker="o",color="none",markeredgecolor=red,markerfacecolor=red,markersize=6,label=r"$|z|>k$")] + axs[0, 0].legend(handles = legend_handles, loc = "upper left") + + ticks_ax = axs[-1,0] + ticks_ax.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d",tz = timezone.utc)) + fig.autofmt_xdate() + + plt.subplots_adjust(bottom = 0.11, wspace = 0.38, left=0.08) + fig.supxlabel("Date [UTC]", x = 0.5, y = 0.06) + fig.supylabel(r"$\log_{10}(\mathrm{SNR})$", x = 0.02) + plt.savefig(os.path.join(save_location,f"snr_against_time_{station_id}_{run_label}.pdf")) + + +if __name__ == "__main__": + + argparser = ArgumentParser(description="RNO-G Science Verification Analysis") + argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze, e.g --station_id 14") + argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") + argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") + argparser.add_argument("-ex", "--exclude-runs", nargs="+", type=int, default=[], metavar="RUN", help="Run number(s) to exclude, e.g. --exclude-runs 1005 1010") + argparser.add_argument("--debug_plot", action="store_true", help="If set, will create debug plots.") + argparser.add_argument("--save_k_values", action="store_true", help="If set, will save the k-values to a JSON file.") + + run_selection = argparser.add_mutually_exclusive_group(required=True) + run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", + help="Run number(s) to analyze. Each run number should be given explicitly separated by a space, e.g. --runs 1001 1002 1005") + run_selection.add_argument("--run_range", nargs=2, type=int, metavar=("START_RUN", "END_RUN"), + help="Range of run numbers to analyze (inclusive). Provide start and end run numbers separated by a space, e.g. --run_range 1000 1050") + run_selection.add_argument("--time_range", nargs=2, type=str, metavar=("START_DATE", "END_DATE"), + help="Date range to analyze (inclusive). Provide start and end dates separated by a space in YYYY-MM-DD format, e.g. --time_range 2024-07-15 2024-09-30") + + + surface_channels, deep_channels, upward_channels, downward_channels, vpol_channels, hpol_channels, phased_array_channels, all_channels = station_channels(argparser.parse_args().station_id) + + args = argparser.parse_args() + + station_id = args.station_id + backend = args.backend + if backend not in ["pyroot", "uproot"]: + raise ValueError("Backend should be either 'pyroot' or 'uproot'") + + if args.runs: + run_numbers = args.runs + elif args.run_range: + run_numbers = list(range(args.run_range[0], args.run_range[1] + 1)) + elif args.time_range: + start_time, stop_time = args.time_range + runtable = read_rnog_runtable(station_id, start_time, stop_time) + run_numbers = runtable["run"].tolist() + else: + raise ValueError("No run selection provided") + + # Exclude specified runs + if args.exclude_runs: + exclude_set = set(args.exclude_runs) + run_numbers = [r for r in run_numbers if r not in exclude_set] + + run_numbers = sorted(run_numbers) + first_run = run_numbers[0] + last_run = run_numbers[-1] + + if first_run == last_run: + run_label = f"run_{first_run}" + else: + run_label = f"runs_{first_run}_{last_run}" + + + # Create save location directory if it doesn't exist + save_location = os.path.expanduser(args.save_location) + os.makedirs(save_location, exist_ok=True) + + spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) + + # Choose FORCE triggered events only + force_mask = choose_trigger_type(event_info, "FORCE") + snr_arr_force = snr_arr[:, force_mask] + event_info_force = {key: np.array(value)[force_mask] for key, value in event_info.items()} + times = np.array(times) + times_force = times[force_mask] + + log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list = calculate_statistics_log_snr(snr_arr_force) + + z_score_arr_log_snr = calculate_z_score_snr(log_snr_arr, log_mean_list, log_std_list, all_channels) + k_values_log_snr = find_k_value(z_score_arr_log_snr, all_channels, quantile=0.999) + + if args.save_k_values: + save_k_values_json(k_values_log_snr, station_id, save_location) + + flag_outliers_snr = outlier_flag(z_score_arr_log_snr, k_values_log_snr, all_channels) + + outlier_details_snr = find_outlier_details(z_score_arr_log_snr, k_values_log_snr, flag_outliers_snr, event_info_force, all_channels) + print_outlier_summary(outlier_details_snr) + + day_interval = choose_day_interval(times) + plot_snr_against_time(station_id, times_force, snr_arr_force, flag_outliers_snr, z_score_arr_log_snr, k_values_log_snr, all_channels, nrows=12, ncols=2, day_interval=day_interval) \ No newline at end of file diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 1e9e256..c1928dd 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -23,6 +23,11 @@ from collections import defaultdict from matplotlib.lines import Line2D from matplotlib.patches import Patch +from scipy.stats import skew +import json +import matplotlib.dates as mdates +from datetime import timezone + ''' This module can be used to test if the stations are working as expected. @@ -36,7 +41,7 @@ VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 22, 23] HPOL_LIST = [4, 8, 11, 21] PHASED_ARRAY_LIST = [0, 1, 2, 3] -ALL_CHANNELS_LIST = SURFACE_CHANNELS_LIST + DEEP_CHANNELS_LIST +ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23] # Channel mapping for Station 14: STATION_14_SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19] @@ -46,7 +51,7 @@ STATION_14_VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 20, 22, 23] STATION_14_HPOL_LIST = [4, 8, 11, 21] STATION_14_PHASED_ARRAY_LIST = [0, 1, 2, 3] -STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST +STATION_14_ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23] # Matplotlib settings @@ -205,7 +210,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): "apply_baseline_correction":"auto", "mattak_kwargs":{"backend":backend}}) event_info_tmp = tableReader.reader.get_events_information( - keys=["triggerType", "triggerTime", "readoutTime", "radiantThrs", "lowTrigThrs"]) + keys=["triggerType", "triggerTime", "readoutTime", "radiantThrs", "lowTrigThrs", "run", "eventNumber"]) event_info_tmp = convert_events_information(event_info_tmp, False) for key, value in event_info_tmp.items(): @@ -298,7 +303,7 @@ def find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channe spectrum_list.append(masked_spec_med) return ratio_list, spectrum_list - + def find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, upward_channels, downward_channels): '''Find normalized amplitude ratio of upward vs downward channels in specific frequency bands. Backgrouns were defined using wiki page: https://radio.uchicago.edu/wiki/index.php/Features_observed_in_data''' @@ -324,7 +329,12 @@ def find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, freq_max_485 = 485 * units.MHz ratio_arr_482, _ = find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels_482, freq_min_482, freq_max_485) - return ratio_arr_gal, ratio_arr_360, ratio_arr_482 + reference_channels_240 = downward_channels + freq_min_240 = 240 * units.MHz + freq_max_272 = 272 * units.MHz + ratio_arr_240, _ = find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels_240, freq_min_240, freq_max_272) + + return ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240 def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): '''Calculate excess information from amplitude ratios in frequency bands.''' @@ -347,7 +357,7 @@ def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): if pval > alpha: validation = "NO EXCESS (sign-test)" else: - if confidence_interval.low > 0.7: + if confidence_interval.low > 0.75: validation = "STRONG EXCESS (sign-test)" elif confidence_interval.low > 0.55: validation = "MODERATE EXCESS (sign-test)" @@ -362,23 +372,25 @@ def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): "validation": validation } -def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, alpha=0.01): +def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, alpha=0.01): '''Validate excess information from amplitude ratios in frequency bands.''' gal_info = excess_info_from_ratio(ratio_arr_gal, alpha, "80–120 MHz") freq360_info = excess_info_from_ratio(ratio_arr_360, alpha, "360–380 MHz") freq482_info = excess_info_from_ratio(ratio_arr_482, alpha, "482–485 MHz") - + freq240_info = excess_info_from_ratio(ratio_arr_240, alpha, "240–272 MHz") return { "galactic_excess": gal_info, "freq_360_380MHz": freq360_info, - "freq_482_485MHz": freq482_info + "freq_482_485MHz": freq482_info, + "freq_240_272MHz": freq240_info } -def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_order, save_location, station_id, run_label, bins=30): - fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) +def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order, save_location, station_id, run_label, bins=30): + fig, axes = plt.subplots(1, 4, figsize=(20, 5), sharey=True) bands = [("80–120 MHz (FORCE Trigger)", ratio_arr_gal), ("360–380 MHz (FORCE Trigger)", ratio_arr_360), - ("482–485 MHz (FORCE Trigger)", ratio_arr_482),] + ("482–485 MHz (FORCE Trigger)", ratio_arr_482), + ("240–272 MHz (FORCE Trigger)", ratio_arr_240)] for ax, (title, ratio_list) in zip(axes, bands): for ch, r in zip(channels_order, ratio_list): @@ -438,11 +450,11 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f normcolor = "steelblue" plt.axvspan(80, 120, color=excesscolor, alpha=0.3, label="_nolegend_") - plt.axvline(x=0.402e3, color=wb_color, linestyle='--', linewidth=1.2, label="_nolegend_", alpha=0.7) + plt.axvline(x=0.403e3, color=wb_color, linestyle='--', linewidth=1.2, label="_nolegend_", alpha=0.7) plt.axvspan(0.278e3, 0.285e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") plt.axvspan(0.482e3, 0.485e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") plt.axvspan(0.240e3, 0.272e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") - plt.axvspan(0.358e3, 0.378e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") + plt.axvspan(0.360e3, 0.380e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") plt.axvspan(0.136e3, 0.139e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") plt.axvspan(0.151e3, 0.157e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") plt.axvspan(0.125e3, 0.127e3, color=periodiccolor2, alpha=0.3, label="_nolegend_") @@ -504,6 +516,116 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels plt.tight_layout() plt.savefig(os.path.join(save_location, f"time_integrated_deep_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) + +def calculate_statistics_log_snr(snr_arr): + '''Calculate log10 statistics (mean, median, std, mean-median) for SNR values.''' + log_snr_arr = np.zeros((len(snr_arr), len(snr_arr[0]))) + for ch in range(len(snr_arr)): + snr_arr_ch = snr_arr[ch] + log_snr_arr_ch = np.log10(snr_arr_ch) + log_snr_arr[ch, :] = log_snr_arr_ch + log_mean_list = [] + log_median_list = [] + log_std_list = [] + log_difference_list = [] + + for ch in range(len(log_snr_arr)): + log_mean_list.append(np.mean(log_snr_arr[ch])) + log_median_list.append(np.median(log_snr_arr[ch])) + log_std_list.append(np.std(log_snr_arr[ch])) + log_difference_list.append(np.mean(log_snr_arr[ch]) - np.median(log_snr_arr[ch])) + + return log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list + +def calculate_z_score_snr(snr_arr, mean_list, std_list, channel_list): + '''Calculate the z-score for SNR values given mean and standard deviation lists for each channel.''' + z_score_arr = np.zeros((len(snr_arr), len(snr_arr[0]))) + for ch in channel_list: + snr_arr_ch = snr_arr[ch] + mean_ch = mean_list[ch] + std_ch = std_list[ch] + z_score_arr_ch = (snr_arr_ch - mean_ch) / std_ch + z_score_arr[ch,:] = z_score_arr_ch + + return z_score_arr + +def symmetry_metrics_channel_z_score(z_score): + return { + "mean": np.mean(z_score), + "median": np.median(z_score), + "skew": skew(z_score, bias=False), + "p_pos_3": np.mean(z_score > 3), + "p_neg_3": np.mean(z_score < -3), + "p_pos_5": np.mean(z_score > 5), + "p_neg_5": np.mean(z_score < -5), + } +def symmetry_metrics_z_score(z_score): + metrics = {} + for ch in range(len(z_score)): + z_score_ch = z_score[ch] + metrics[f"ch_{ch}"] = symmetry_metrics_channel_z_score(z_score_ch) + return metrics + +def load_k_values_json(filepath): + ''' Load k-values from a JSON file.''' + with open(filepath, "r") as f: + data = json.load(f) + k_values = {int(ch): float(k) for ch, k in data.items()} + + return k_values + +def outlier_flag(z_score_log, k_values_log, channel_list): + '''Flag outlier events based on the k-values for each channel.''' + flag = np.zeros((len(channel_list), len(z_score_log[0])), dtype=bool) + for ch in channel_list: + flag[ch, :] = np.abs(z_score_log[ch]) > k_values_log[ch] + + return flag + +def find_outlier_details(z_score_log, k_values_log, flag, event_info, channel_list): + '''Find details of outlier events for each channel.''' + outlier_details = {} + + for ch in channel_list: + outlier_indices = np.where(flag[ch, :])[0] + details_ch = [] + for idx in outlier_indices: + z_abs = np.abs(z_score_log[ch, idx]) + k_ch = k_values_log[ch] + delta = z_abs - k_ch + + details_ch.append({ + "run": int(event_info["run"][idx]), + "eventNumber": int(event_info["eventNumber"][idx]), + "z_abs": float(z_abs), + "k": float(k_ch), + "z_minus_k": float(delta), + }) + + outlier_details[ch] = details_ch + + return outlier_details + + +def print_outlier_summary(outlier_details): + '''Print a summary of outlier events for each channel.''' + for ch in sorted(outlier_details.keys()): + entries = outlier_details[ch] + n_outliers = len(entries) + + if n_outliers == 0: + print(f"Channel {ch}: 0 outliers") + continue + + k_ch = entries[0]["k"] + print(f"Channel {ch}: {n_outliers} outliers (k = {k_ch:.2f})") + + for e in entries: + print(f" - run {e['run']}, event {e['eventNumber']}, " + f"|z| = {e['z_abs']:.2f} (delta = {e['z_minus_k']:.2f} above k)") + + + if __name__ == "__main__": argparser = ArgumentParser(description="RNO-G Science Verification Analysis") @@ -561,12 +683,18 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels save_location = os.path.expanduser(args.save_location) os.makedirs(save_location, exist_ok=True) + # Read data spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) + + # Normalize surface channel spectra norm_spec_arr, scale_factors = normalize_channels(spec_arr, freqs, downward_channels, upward_channels) print(f"Event info trigger type: {event_info['triggerType']}, event info trigger type length: {len(event_info['triggerType'])}") print(f"Spec arr shape: {spec_arr.shape}, Norm spec arr shape: {norm_spec_arr.shape}") + # Select FORCE trigger events force_mask = choose_trigger_type(event_info, "FORCE") + + # Spectral analysis for FORCE trigger events only spec_arr_force = spec_arr[:, force_mask, :] norm_spec_arr_force = norm_spec_arr[:, force_mask, :] print(f"Number of FORCE trigger events: {spec_arr_force.shape[1]}") @@ -574,7 +702,7 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels if len(spec_arr_force[1]) < 30: print("!!!! Warning: Less than 30 FORCE-trigger events, results of the sign test may not be reliable. !!!!") - ratio_arr_gal, ratio_arr_360, ratio_arr_482 = find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr_force, upward_channels, downward_channels) + ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240 = find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr_force, upward_channels, downward_channels) channels_order = upward_channels + downward_channels ch_to_idx = {ch: i for i, ch in enumerate(channels_order)} print(ch_to_idx) @@ -588,6 +716,7 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels ratio_arr_gal[i], ratio_arr_360[i], ratio_arr_482[i], + ratio_arr_240[i], alpha=0.01 ) @@ -600,5 +729,20 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) + # SNR analysis + snr_arr_force = snr_arr[:, force_mask] + event_info_force = {key: np.array(value)[force_mask] for key, value in event_info.items()} + + log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list = calculate_statistics_log_snr(snr_arr_force) + + z_score_arr_log_snr = calculate_z_score_snr(log_snr_arr, log_mean_list, log_std_list, all_channels) + k_values_filename_snr = f"station_{station_id}_k_ref_values_snr.json" + k_values_log_snr = load_k_values_json(k_values_filename_snr) + flag_outliers_snr = outlier_flag(z_score_arr_log_snr, k_values_log_snr, all_channels) + + outlier_details_snr = find_outlier_details(z_score_arr_log_snr, k_values_log_snr, flag_outliers_snr, event_info_force, all_channels) + print_outlier_summary(outlier_details_snr) + + if args.debug_plot: - debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) \ No newline at end of file + debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) \ No newline at end of file From fb8225f7c38b62ca86edf39f6aec88df8194e4b0 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Fri, 9 Jan 2026 21:47:25 +0100 Subject: [PATCH 08/12] some vrms statistics: peak, skewness - add the kde/peak plot and report the outcome for vrms distributions --- .../data_monitoring/expected_values.py | 94 ++---- .../science_verification_analysis.py | 269 ++++++++++++++++-- 2 files changed, 269 insertions(+), 94 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/expected_values.py b/rnog_analysis_tools/data_monitoring/expected_values.py index 6c6f811..90d3aaf 100644 --- a/rnog_analysis_tools/data_monitoring/expected_values.py +++ b/rnog_analysis_tools/data_monitoring/expected_values.py @@ -23,13 +23,13 @@ from collections import defaultdict from matplotlib.lines import Line2D from matplotlib.patches import Patch -from science_verification_analysis import convert_events_information, choose_trigger_type, station_channels, read_rnog_runtable, read_rnog_data, calculate_statistics_log_snr, calculate_z_score_snr, outlier_flag, find_outlier_details, print_outlier_summary +from science_verification_analysis import convert_events_information, choose_trigger_type, station_channels, read_rnog_runtable, read_rnog_data, calculate_statistics_log_snr, calculate_z_score_snr, outlier_flag, find_outlier_details, print_outlier_summary, choose_day_interval, plot_snr_against_time import json import matplotlib.dates as mdates from datetime import timezone -'''This module can be used to find expected parameter values (SNR, glitching) for RNO-G stations from a known stable time period.''' +'''This module can be used to find expected parameter values for RNO-G stations from a known stable time period.''' # Channel mapping for the first seven RNO-G stations: SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19 , 20] @@ -39,7 +39,7 @@ VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 22, 23] HPOL_LIST = [4, 8, 11, 21] PHASED_ARRAY_LIST = [0, 1, 2, 3] -ALL_CHANNELS_LIST = SURFACE_CHANNELS_LIST + DEEP_CHANNELS_LIST +ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23] # Channel mapping for Station 14: STATION_14_SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19] @@ -49,7 +49,10 @@ STATION_14_VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 20, 22, 23] STATION_14_HPOL_LIST = [4, 8, 11, 21] STATION_14_PHASED_ARRAY_LIST = [0, 1, 2, 3] -STATION_14_ALL_CHANNELS_LIST = STATION_14_SURFACE_CHANNELS_LIST + STATION_14_DEEP_CHANNELS_LIST +STATION_14_ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23] + +# Script directory +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) # Matplotlib settings @@ -97,80 +100,18 @@ def find_k_value(z_score_log, channel_list, quantile=0.999): k_ch_list[ch] = k_ch return k_ch_list -def save_k_values_json(k_values_log, station_id, save_location): +def save_k_values_json(k_values_log, station_id, filename): '''Save the k-values as a JSON file.''' - k_values_filename = f"station_{station_id}_k_ref_values_snr.json" - with open(k_values_filename, "w") as f: + if not os.path.isabs(filename): + filepath = os.path.join(SCRIPT_DIR, filename) + else: + filepath = filename + + with open(filename, "w") as f: json.dump(k_values_log, f, indent=4) - print(f"K-values saved to {k_values_filename}") - -def choose_day_interval(times): - times = pd.to_datetime(times, utc=True) - total_days = (times.max() - times.min()).days - - if total_days < 10: - return 1 - elif total_days < 20: - return 2 - elif total_days < 40: - return 4 - elif total_days < 80: - return 7 - elif total_days < 150: - return 10 - elif total_days < 300: - return 15 - elif total_days < 600: - return 30 - else: - return 60 - -def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nrows=12,ncols=2,day_interval=None): - times = pd.to_datetime(times,utc=True) - channels = list(channels) - n_channels = len(channels) - - if day_interval is None: - day_interval = choose_day_interval(times) - - fig, axs = plt.subplots(nrows,ncols,figsize=(15,24),sharex=True) - axs = np.array(axs) - - for idx, ch in enumerate(channels): - r = idx//ncols - c = idx%ncols - ax = axs[r,c] - good_mask = ~flag[ch] - ax.scatter(times[good_mask], np.log10(snr_arr[ch][good_mask]), s=8,alpha=0.25, color="gray") - zex = np.abs(z_log[ch]) - k_list[ch] - zex = np.clip(zex,0,None) - sc = ax.scatter(times[flag[ch]], np.log10(snr_arr[ch][flag[ch]]), s=8,c=zex[flag[ch]], cmap="Reds") - cax = ax.inset_axes([1.02,0.1,0.05,0.8]) - plt.colorbar(sc,cax=cax) - ax.grid(alpha=0.4) - ax.text(0.85, 0.95, f"Ch {ch}", transform = ax.transAxes, ha = "left",va = "top", bbox = dict(boxstyle = "round, pad = 0.25", facecolor = "white", alpha = 0.8)) - - for idx in range(n_channels, nrows*ncols): - r = idx//ncols - c = idx%ncols - axs[r, c].set_visible(False) - - red = plt.cm.Reds(0.6) - legend_handles = [Line2D([0],[0],marker="o",color="none",markeredgecolor="gray",markerfacecolor="gray",markersize=6,label=r"$|z|\leq k$"), - Line2D([0],[0],marker="o",color="none",markeredgecolor=red,markerfacecolor=red,markersize=6,label=r"$|z|>k$")] - axs[0, 0].legend(handles = legend_handles, loc = "upper left") - - ticks_ax = axs[-1,0] - ticks_ax.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval)) - ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d",tz = timezone.utc)) - fig.autofmt_xdate() - - plt.subplots_adjust(bottom = 0.11, wspace = 0.38, left=0.08) - fig.supxlabel("Date [UTC]", x = 0.5, y = 0.06) - fig.supylabel(r"$\log_{10}(\mathrm{SNR})$", x = 0.02) - plt.savefig(os.path.join(save_location,f"snr_against_time_{station_id}_{run_label}.pdf")) - + print(f"K-values saved to {filename}") + if __name__ == "__main__": @@ -244,8 +185,9 @@ def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nr z_score_arr_log_snr = calculate_z_score_snr(log_snr_arr, log_mean_list, log_std_list, all_channels) k_values_log_snr = find_k_value(z_score_arr_log_snr, all_channels, quantile=0.999) + k_values_filename_snr = f"station_{station_id}_k_ref_values_snr.json" if args.save_k_values: - save_k_values_json(k_values_log_snr, station_id, save_location) + save_k_values_json(k_values_log_snr, station_id, k_values_filename_snr) flag_outliers_snr = outlier_flag(z_score_arr_log_snr, k_values_log_snr, all_channels) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index c1928dd..98aa523 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -27,6 +27,9 @@ import json import matplotlib.dates as mdates from datetime import timezone +from scipy.stats import gaussian_kde, skew +from scipy.signal import find_peaks + ''' @@ -53,21 +56,39 @@ STATION_14_PHASED_ARRAY_LIST = [0, 1, 2, 3] STATION_14_ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23] +# Script directory +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + # Matplotlib settings COLORS = [ - "#1F77B4", # blue - "#FF4B4B", # bright red - "#2CA02C", # green - "#9467BD", # purple - "#17BECF", # cyan - "#D62728", # deep red - "#8C564B", # warm brown - "#1FA187", # teal-green - "#4E79A7", # steel blue - "#F76E5C", # coral - "#A55194", # magenta-purple - "#3D4ED7", # indigo + "#4477AA", # strong blue + "#EE6677", # coral / red + "#228833", # green + "#CCBB44", # yellow + "#66CCEE", # light blue + "#AA3377", # purple + + "#882255", # wine + "#44AA99", # teal + "#084C1F", # deep green + "#332288", # indigo + "#AA4499", # magenta + "#771122", # burgundy + + "#7089A1", # steel blue + "#DDCC77", # sand + "#B0D8EC", # pale blue + "#CC6677", # dusty red + "#999933", # olive + "#DCB43C", # warm yellow + + "#006699", # dark cyan + "#0099CC", # vivid cyan + "#9955AA", # lavender purple + "#55AA55", # bright green + "#CC7711", # warm orange + "#555555", # neutral gray ] mpl.rcParams.update({ @@ -218,6 +239,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): n_events = tableReader.reader.get_n_events() n_events_total += n_events + print(f"Reading {n_events} events in this batch.") channel_list = [i for i in range(24)] spec_arr = np.zeros((len(channel_list), n_events, 1025)) @@ -277,12 +299,13 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): event_info["triggerTime"][inf_mask] = event_info["readoutTime"][inf_mask] print(f"Found {np.sum(inf_mask)} events with inf trigger time (of {len(inf_mask)} events)") + print(f"n_events read: {spec_arr.shape[1]}, n_events_total: {n_events_total}") + print(f"freqs shape: {freqs.shape}, spec_arr shape: {spec_arr.shape}, trace_arr shape: {trace_arr.shape}, times_trace_arr shape: {times_trace_arr.shape}, snr_arr shape: {snr_arr.shape}, times shape: {times.shape}, run_no shape: {run_no.shape} ") + print(f"trigger types: {np.unique(event_info['triggerType'])}") + return spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info - #print(f"n_events read: {spec_arr.shape[1]}, n_events_total: {n_events_total}") - #print(f"freqs shape: {freqs.shape}, spec_arr shape: {spec_arr.shape}, trace_arr shape: {trace_arr.shape}, times_trace_arr shape: {times_trace_arr.shape}, snr_arr shape: {snr_arr.shape}") - #print(f"freqs {freqs}") - #print(f"trigger types: {np.unique(event_info['triggerType'])}") + def find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels, freq_min, freq_max): '''Find normalized amplitude ratio of upward vs downward channels in a given frequency band. This will be replaced by lab measurements in the future.''' @@ -566,8 +589,12 @@ def symmetry_metrics_z_score(z_score): metrics[f"ch_{ch}"] = symmetry_metrics_channel_z_score(z_score_ch) return metrics -def load_k_values_json(filepath): +def load_k_values_json(filename): ''' Load k-values from a JSON file.''' + if not os.path.isabs(filename): + filepath = os.path.join(SCRIPT_DIR, filename) + else: + filepath = filename with open(filepath, "r") as f: data = json.load(f) k_values = {int(ch): float(k) for ch, k in data.items()} @@ -606,6 +633,93 @@ def find_outlier_details(z_score_log, k_values_log, flag, event_info, channel_li return outlier_details +def calculate_vrms(trace_arr, channel_list, event_info): + '''Calculate Vrms for each channel and event according to trigger types.''' + vrms_arr = np.std(trace_arr, axis=2) # (n_channels, n_events) + + force_mask = event_info["triggerType"] == "FORCE" + radiant0_mask = event_info["triggerType"] == "RADIANT0" + radiant1_mask = event_info["triggerType"] == "RADIANT1" + lt_mask = event_info["triggerType"] == "LT" + + vrms_arr_force = vrms_arr[:, force_mask] + vrms_arr_radiant0 = vrms_arr[:, radiant0_mask] + vrms_arr_radiant1 = vrms_arr[:, radiant1_mask] + vrms_arr_lt = vrms_arr[:, lt_mask] + + return vrms_arr, vrms_arr_force, vrms_arr_radiant0, vrms_arr_radiant1, vrms_arr_lt + +def kde_modality(vrms_arr, channel_list, bandwidth=None, grid_points = 512, peak_prominence=0.01): + '''Calculate KDE and modality for Vrms distributions.''' + + modality_dict = {} + + for ch in channel_list: + vrms_ch = vrms_arr[ch] + vrms_ch = vrms_ch[~np.isnan(vrms_ch)] + + kde = gaussian_kde(vrms_ch, bw_method=bandwidth) + vrms_min = np.min(vrms_ch) + vrms_max = np.max(vrms_ch) + vrms_grid = np.linspace(vrms_min, vrms_max, grid_points) + kde_values = kde(vrms_grid) + + # Adjust prominence to be relative to the KDE range + abs_prom = peak_prominence * (np.max(kde_values) - np.min(kde_values)) + + peaks, properties = find_peaks(kde_values, prominence=abs_prom) + + modality_dict[ch] = { + "kde": kde, + "vrms_grid": vrms_grid, + "kde_values": kde_values, + "n_peaks": len(peaks), + "peaks": peaks, + "prominences": properties["prominences"], + } + + return modality_dict + +def tail_fraction_and_trimmed_skew_two_sided(vrms_arr, channel_list, lower_percentile=25, upper_percentile=75, extreme_k=3): + '''Calculate tail fraction and two-sided trimmed skewness for Vrms distributions.''' + tail_dict = {} + + for ch in channel_list: + vrms_ch = vrms_arr[ch] + vrms_ch = vrms_ch[~np.isnan(vrms_ch)] + n_events = len(vrms_ch) + + full_skew = skew(vrms_ch, bias=False) + + q1, q3 = np.percentile(vrms_ch, [lower_percentile, upper_percentile]) + iqr = q3 - q1 + + lower_bound = q1 - extreme_k * iqr + upper_bound = q3 + extreme_k * iqr + + high_mask = vrms_ch > upper_bound + high_frac = np.mean(high_mask) + + low_mask = vrms_ch < lower_bound + low_frac = np.mean(low_mask) + + core_high = vrms_ch[~high_mask] + skew_trim_high = skew(core_high, bias=False) if len(core_high) > 10 else np.nan + + core_low = vrms_ch[~low_mask] + skew_trim_low = skew(core_low, bias=False) if len(core_low) > 10 else np.nan + + tail_dict[ch] = { + "n_events": n_events, + "full_skew": full_skew, + "high_tail_fraction": high_frac, + "low_tail_fraction": low_frac, + "trimmed_skew_high": skew_trim_high, + "trimmed_skew_low": skew_trim_low, + } + + return tail_dict + def print_outlier_summary(outlier_details): '''Print a summary of outlier events for each channel.''' @@ -623,7 +737,115 @@ def print_outlier_summary(outlier_details): for e in entries: print(f" - run {e['run']}, event {e['eventNumber']}, " f"|z| = {e['z_abs']:.2f} (delta = {e['z_minus_k']:.2f} above k)") + +def choose_day_interval(times): + times = pd.to_datetime(times, utc=True) + total_days = (times.max() - times.min()).days + + if total_days < 10: + return 1 + elif total_days < 20: + return 2 + elif total_days < 40: + return 4 + elif total_days < 80: + return 7 + elif total_days < 150: + return 10 + elif total_days < 300: + return 15 + elif total_days < 600: + return 30 + else: + return 60 + +def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nrows=12,ncols=2,day_interval=None): + times = pd.to_datetime(times,utc=True) + channels = list(channels) + n_channels = len(channels) + + if day_interval is None: + day_interval = choose_day_interval(times) + + fig, axs = plt.subplots(nrows,ncols,figsize=(15,24),sharex=True) + axs = np.array(axs) + + for idx, ch in enumerate(channels): + r = idx//ncols + c = idx%ncols + ax = axs[r,c] + good_mask = ~flag[ch] + ax.scatter(times[good_mask], np.log10(snr_arr[ch][good_mask]), s=8,alpha=0.25, color="gray") + zex = np.abs(z_log[ch]) - k_list[ch] + zex = np.clip(zex,0,None) + sc = ax.scatter(times[flag[ch]], np.log10(snr_arr[ch][flag[ch]]), s=8,c=zex[flag[ch]], cmap="Reds") + cax = ax.inset_axes([1.02,0.1,0.05,0.8]) + plt.colorbar(sc,cax=cax) + ax.grid(alpha=0.4) + ax.text(0.85, 0.95, f"Ch {ch}", transform = ax.transAxes, ha = "left",va = "top", bbox = dict(boxstyle = "round, pad = 0.25", facecolor = "white", alpha = 0.8)) + + for idx in range(n_channels, nrows*ncols): + r = idx//ncols + c = idx%ncols + axs[r, c].set_visible(False) + + red = plt.cm.Reds(0.6) + legend_handles = [Line2D([0],[0],marker="o",color="none",markeredgecolor="gray",markerfacecolor="gray",markersize=6,label=r"$|z|\leq k$"), + Line2D([0],[0],marker="o",color="none",markeredgecolor=red,markerfacecolor=red,markersize=6,label=r"$|z|>k$")] + axs[0, 0].legend(handles = legend_handles, loc = "upper left") + + ticks_ax = axs[-1,0] + time_span = (times.max() - times.min()).total_seconds() / 86400.0 # in days + + if time_span < 3: + # Use hourly ticks if less than 3 days + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=1)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + else: + # Use day ticks otherwise + ticks_ax.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d", tz = timezone.utc)) + + fig.autofmt_xdate() + + plt.subplots_adjust(bottom = 0.11, wspace = 0.38, left=0.08) + fig.supxlabel("Date [UTC]", x = 0.5, y = 0.06) + fig.supylabel(r"$\log_{10}(\mathrm{SNR})$", x = 0.02) + plt.savefig(os.path.join(save_location,f"snr_against_time_{station_id}_{run_label}.pdf")) +def debug_plot_snr_distribution(log_snr_arr, channel_list, save_location, station_id, run_label, bins=30): + fig, ax = plt.subplots(figsize=(10, 6)) + for ch in channel_list: + log_snr_ch = log_snr_arr[ch] + ax.hist(log_snr_ch, bins=bins, histtype="step", linewidth=1.3, label=f"Ch {ch}", alpha=0.8) + + ax.set_title("Log10 SNR Distribution (FORCE Trigger)") + ax.set_xlabel("log10(SNR)") + ax.set_ylabel("Counts") + ax.grid(True, alpha=0.3) + + handles, labels = ax.get_legend_handles_labels() + fig.legend(handles, labels, loc="upper right", ncol=8, frameon=True) + plt.tight_layout() + fig.savefig(os.path.join(save_location,f"debug_snr_distribution_force_trigger_{station_id}_{run_label}.pdf",)) + plt.close(fig) + +def debug_plot_z_score_snr(z_score_arr, channel_list, save_location, station_id, run_label, bins=30): + fig, ax = plt.subplots(figsize=(10, 6)) + for ch in channel_list: + z_score_ch = z_score_arr[ch] + ax.hist(z_score_ch, bins=bins, histtype="step", linewidth=1.3, label=f"Ch {ch}", alpha=0.8) + + ax.set_title("Z-Score SNR Distribution (FORCE Trigger)") + ax.set_xlabel("Z-Score(SNR)") + ax.set_ylabel("Counts") + ax.grid(True, alpha=0.3) + + handles, labels = ax.get_legend_handles_labels() + fig.legend(handles, labels, loc="upper right", ncol=8, frameon=True) + plt.tight_layout() + fig.savefig(os.path.join(save_location,f"debug_z_score_snr_force_trigger_{station_id}_{run_label}.pdf",)) + plt.close(fig) if __name__ == "__main__": @@ -726,12 +948,18 @@ def print_outlier_summary(outlier_details): print(f"{key}: {value}") print("==============") + # Surface spectrum plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) + # Deep spectrum (unnormalized) + plot_time_integrated_deep_spectra(station_id, spec_arr_force, freqs, vpol_channels, hpol_channels, save_location, run_label) + # SNR analysis snr_arr_force = snr_arr[:, force_mask] event_info_force = {key: np.array(value)[force_mask] for key, value in event_info.items()} + times = np.array(times) + times_force = times[force_mask] log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list = calculate_statistics_log_snr(snr_arr_force) @@ -743,6 +971,11 @@ def print_outlier_summary(outlier_details): outlier_details_snr = find_outlier_details(z_score_arr_log_snr, k_values_log_snr, flag_outliers_snr, event_info_force, all_channels) print_outlier_summary(outlier_details_snr) + day_interval = choose_day_interval(times) + plot_snr_against_time(station_id, times_force, snr_arr_force, flag_outliers_snr, z_score_arr_log_snr, k_values_log_snr, all_channels, nrows=12, ncols=2, day_interval=day_interval) + if args.debug_plot: - debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) \ No newline at end of file + debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) + debug_plot_snr_distribution(log_snr_arr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) + debug_plot_z_score_snr(z_score_arr_log_snr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) \ No newline at end of file From 0e3d3b0a8fb2b66297ec829612c1e3793c5b5729 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Sat, 10 Jan 2026 21:48:02 +0100 Subject: [PATCH 09/12] glitching analysis with binomtest, reference values snr st14 --- .../data_monitoring/expected_values.py | 20 +- .../science_verification_analysis.py | 474 +++++++++++++----- .../station_14_k_ref_values_snr.json | 26 + .../station_14_ref_log_mean_snr.json | 26 + .../station_14_ref_log_std_snr.json | 26 + 5 files changed, 427 insertions(+), 145 deletions(-) create mode 100644 rnog_analysis_tools/data_monitoring/station_14_k_ref_values_snr.json create mode 100644 rnog_analysis_tools/data_monitoring/station_14_ref_log_mean_snr.json create mode 100644 rnog_analysis_tools/data_monitoring/station_14_ref_log_std_snr.json diff --git a/rnog_analysis_tools/data_monitoring/expected_values.py b/rnog_analysis_tools/data_monitoring/expected_values.py index 90d3aaf..96d8e32 100644 --- a/rnog_analysis_tools/data_monitoring/expected_values.py +++ b/rnog_analysis_tools/data_monitoring/expected_values.py @@ -100,17 +100,17 @@ def find_k_value(z_score_log, channel_list, quantile=0.999): k_ch_list[ch] = k_ch return k_ch_list -def save_k_values_json(k_values_log, station_id, filename): - '''Save the k-values as a JSON file.''' +def save_values_json(values_log, filename): + '''Save the reference values as a JSON file.''' if not os.path.isabs(filename): filepath = os.path.join(SCRIPT_DIR, filename) else: filepath = filename - with open(filename, "w") as f: - json.dump(k_values_log, f, indent=4) + with open(filepath, "w") as f: + json.dump(values_log, f, indent=4) - print(f"K-values saved to {filename}") + print(f"Values saved to {filepath}") if __name__ == "__main__": @@ -120,8 +120,7 @@ def save_k_values_json(k_values_log, station_id, filename): argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") argparser.add_argument("-ex", "--exclude-runs", nargs="+", type=int, default=[], metavar="RUN", help="Run number(s) to exclude, e.g. --exclude-runs 1005 1010") - argparser.add_argument("--debug_plot", action="store_true", help="If set, will create debug plots.") - argparser.add_argument("--save_k_values", action="store_true", help="If set, will save the k-values to a JSON file.") + argparser.add_argument("--save_values", action="store_true", help="If set, will save the reference values to separate JSON files.") run_selection = argparser.add_mutually_exclusive_group(required=True) run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", @@ -186,8 +185,11 @@ def save_k_values_json(k_values_log, station_id, filename): k_values_log_snr = find_k_value(z_score_arr_log_snr, all_channels, quantile=0.999) k_values_filename_snr = f"station_{station_id}_k_ref_values_snr.json" - if args.save_k_values: - save_k_values_json(k_values_log_snr, station_id, k_values_filename_snr) + + if args.save_values: + save_values_json(k_values_log_snr, k_values_filename_snr) + save_values_json(log_mean_list, f"station_{station_id}_ref_log_mean_snr.json") + save_values_json(log_std_list, f"station_{station_id}_ref_log_std_snr.json") flag_outliers_snr = outlier_flag(z_score_arr_log_snr, k_values_log_snr, all_channels) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 98aa523..ceae8f0 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -9,7 +9,6 @@ from NuRadioReco.utilities import units from tqdm import tqdm from argparse import ArgumentParser -import warnings import pandas as pd import NuRadioReco.framework.event import NuRadioReco.framework.station @@ -17,6 +16,7 @@ import NuRadioReco.framework.trigger from NuRadioReco.framework.parameters import channelParameters as chp from NuRadioReco.modules.RNO_G.dataProviderRNOG import dataProviderRNOG +from NuRadioReco.framework.parameters import channelParametersRNOG as chp_rnog from cycler import cycler import matplotlib as mpl #from cmap import Colormap @@ -27,7 +27,7 @@ import json import matplotlib.dates as mdates from datetime import timezone -from scipy.stats import gaussian_kde, skew +from scipy.stats import gaussian_kde, skew, binomtest from scipy.signal import find_peaks @@ -214,6 +214,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): trace_batches = [] times_trace_batches = [] snr_batches = [] + glitch_batches = [] run_no_all = [] times_all = [] freqs = None @@ -222,6 +223,10 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): csr = csr() csr.begin(debug=False) + from NuRadioReco.modules.RNO_G.channelGlitchDetector import channelGlitchDetector as cgd_rnog + cgd_rnog = cgd_rnog() + cgd_rnog.begin() + for batch in tqdm(np.array_split(np.array(file_list), n_batches), desc="Reading batches", unit="batch"): tableReader = dataProviderRNOG() tableReader.begin(files=batch.tolist(), @@ -246,6 +251,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): trace_arr = np.zeros((len(channel_list), n_events, 2048)) times_trace_arr = np.zeros((len(channel_list), n_events, 2048)) snr_arr = np.zeros((len(channel_list), n_events)) + glitch_arr = np.zeros((len(channel_list), n_events)) run_no = [] times = [] @@ -258,6 +264,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): run_no.append(event.get_run_number()) csr.run(evt=event, station=station, det=None, stored_noise=False) + cgd_rnog.run(event=event, station=station, det=None) for i_ch, ch in enumerate(channel_list): channel = station.get_channel(ch) @@ -267,6 +274,9 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): snr_dict = channel.get_parameter(chp.SNR) snr_peak = snr_dict["peak_amplitude"] snr_arr[i_ch, idx] = snr_peak + + glitching_values = channel.get_parameter(chp_rnog.glitch_test_statistic) + glitch_arr[i_ch, idx] = glitching_values spec = channel.get_frequency_spectrum() spec_arr[i_ch, idx, :] = np.abs(spec) @@ -281,6 +291,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): trace_batches.append(trace_arr) times_trace_batches.append(times_trace_arr) snr_batches.append(snr_arr) + glitch_batches.append(glitch_arr) run_no_all.extend(run_no) times_all.extend(times) @@ -288,6 +299,7 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): trace_arr = np.concatenate(trace_batches, axis=1) times_trace_arr = np.concatenate(times_trace_batches, axis=1) snr_arr = np.concatenate(snr_batches, axis=1) + glitch_arr = np.concatenate(glitch_batches, axis=1) run_no = np.array(run_no_all) times = np.array(times_all) @@ -303,10 +315,10 @@ def read_rnog_data(station_id: int, run_numbers: list, backend: str = "pyroot"): print(f"freqs shape: {freqs.shape}, spec_arr shape: {spec_arr.shape}, trace_arr shape: {trace_arr.shape}, times_trace_arr shape: {times_trace_arr.shape}, snr_arr shape: {snr_arr.shape}, times shape: {times.shape}, run_no shape: {run_no.shape} ") print(f"trigger types: {np.unique(event_info['triggerType'])}") - return spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info + return spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info, glitch_arr - +#### Spectral analysis functions #### def find_amplitude_ratio_in_band(station_id, freqs, norm_spec_arr, upward_channels, downward_channels, reference_channels, freq_min, freq_max): '''Find normalized amplitude ratio of upward vs downward channels in a given frequency band. This will be replaced by lab measurements in the future.''' freq_mask = (freqs >= freq_min) & (freqs <= freq_max) @@ -361,7 +373,6 @@ def find_amplitude_ratio_in_band_specific_bkg(station_id, freqs, norm_spec_arr, def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): '''Calculate excess information from amplitude ratios in frequency bands.''' - from scipy.stats import binomtest log_ratio = np.log10(np.asarray(ratio_arr)) median_log_ratio = np.median(log_ratio) @@ -373,9 +384,9 @@ def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): result = binomtest(k, n, p=0.5, alternative="greater") pval = result.pvalue statistic = result.statistic - confidence_interval = result.proportion_ci(confidence_level=0.95) + confidence_interval = result.proportion_ci(confidence_level=0.99) - print(f"Binomial test for frequency range {freq_range_str}: k={k}, n={n}, pval={pval:.3e}, statistic={statistic:.3f}, 95% CI={confidence_interval}") + print(f"Binomial test for frequency range {freq_range_str}: k={k}, n={n}, pval={pval:.3e}, statistic={statistic:.3f}, 99% CI={confidence_interval}") if pval > alpha: validation = "NO EXCESS (sign-test)" @@ -408,31 +419,6 @@ def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_ "freq_240_272MHz": freq240_info } -def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order, save_location, station_id, run_label, bins=30): - fig, axes = plt.subplots(1, 4, figsize=(20, 5), sharey=True) - bands = [("80–120 MHz (FORCE Trigger)", ratio_arr_gal), - ("360–380 MHz (FORCE Trigger)", ratio_arr_360), - ("482–485 MHz (FORCE Trigger)", ratio_arr_482), - ("240–272 MHz (FORCE Trigger)", ratio_arr_240)] - - for ax, (title, ratio_list) in zip(axes, bands): - for ch, r in zip(channels_order, ratio_list): - r = np.asarray(r) - ax.hist(np.log10(r), bins=bins, histtype="step", linewidth=1.3, label=f"Ch {ch}", alpha=0.8) - - ax.set_title(title) - ax.set_xlabel("log10(R)") - ax.grid(True, alpha=0.3) - - axes[0].set_ylabel("Counts") - - handles, labels = axes[0].get_legend_handles_labels() - fig.legend(handles, labels, loc="upper right", ncol=8, frameon=True) - plt.tight_layout() - fig.savefig(os.path.join(save_location,f"debug_amplitude_ratios_force_trigger_{station_id}_{run_label}.pdf",)) - plt.close(fig) - - def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location, run_label): '''Plot time-integrated surface channel spectra.''' plt.figure(figsize=(10, 6)) @@ -539,7 +525,7 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels plt.tight_layout() plt.savefig(os.path.join(save_location, f"time_integrated_deep_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) - +#### SNR analysis functions #### def calculate_statistics_log_snr(snr_arr): '''Calculate log10 statistics (mean, median, std, mean-median) for SNR values.''' log_snr_arr = np.zeros((len(snr_arr), len(snr_arr[0]))) @@ -547,26 +533,25 @@ def calculate_statistics_log_snr(snr_arr): snr_arr_ch = snr_arr[ch] log_snr_arr_ch = np.log10(snr_arr_ch) log_snr_arr[ch, :] = log_snr_arr_ch - log_mean_list = [] - log_median_list = [] - log_std_list = [] - log_difference_list = [] + log_mean_dict = {} + log_median_dict = {} + log_std_dict = {} + log_difference_dict = {} for ch in range(len(log_snr_arr)): - log_mean_list.append(np.mean(log_snr_arr[ch])) - log_median_list.append(np.median(log_snr_arr[ch])) - log_std_list.append(np.std(log_snr_arr[ch])) - log_difference_list.append(np.mean(log_snr_arr[ch]) - np.median(log_snr_arr[ch])) - - return log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list + log_mean_dict[ch] = np.mean(log_snr_arr[ch]) + log_median_dict[ch] = np.median(log_snr_arr[ch]) + log_std_dict[ch] = np.std(log_snr_arr[ch]) + log_difference_dict[ch] = np.mean(log_snr_arr[ch]) - np.median(log_snr_arr[ch]) -def calculate_z_score_snr(snr_arr, mean_list, std_list, channel_list): + return log_snr_arr, log_mean_dict, log_median_dict, log_std_dict, log_difference_dict +def calculate_z_score_snr(snr_arr, ref_mean_dict, ref_std_dict, channel_list): '''Calculate the z-score for SNR values given mean and standard deviation lists for each channel.''' z_score_arr = np.zeros((len(snr_arr), len(snr_arr[0]))) for ch in channel_list: snr_arr_ch = snr_arr[ch] - mean_ch = mean_list[ch] - std_ch = std_list[ch] + mean_ch = ref_mean_dict[ch] + std_ch = ref_std_dict[ch] z_score_arr_ch = (snr_arr_ch - mean_ch) / std_ch z_score_arr[ch,:] = z_score_arr_ch @@ -589,7 +574,7 @@ def symmetry_metrics_z_score(z_score): metrics[f"ch_{ch}"] = symmetry_metrics_channel_z_score(z_score_ch) return metrics -def load_k_values_json(filename): +def load_values_json(filename): ''' Load k-values from a JSON file.''' if not os.path.isabs(filename): filepath = os.path.join(SCRIPT_DIR, filename) @@ -597,9 +582,9 @@ def load_k_values_json(filename): filepath = filename with open(filepath, "r") as f: data = json.load(f) - k_values = {int(ch): float(k) for ch, k in data.items()} + values = {int(ch): float(k) for ch, k in data.items()} - return k_values + return values def outlier_flag(z_score_log, k_values_log, channel_list): '''Flag outlier events based on the k-values for each channel.''' @@ -633,7 +618,106 @@ def find_outlier_details(z_score_log, k_values_log, flag, event_info, channel_li return outlier_details -def calculate_vrms(trace_arr, channel_list, event_info): +def print_outlier_summary(outlier_details): + '''Print a summary of outlier events for each channel.''' + for ch in sorted(outlier_details.keys()): + entries = outlier_details[ch] + n_outliers = len(entries) + + if n_outliers == 0: + print(f"Channel {ch}: 0 outliers") + continue + + k_ch = entries[0]["k"] + print(f"Channel {ch}: {n_outliers} outliers (k = {k_ch:.2f})") + + for e in entries: + print(f" - run {e['run']}, event {e['eventNumber']}, " + f"|z| = {e['z_abs']:.2f} (delta = {e['z_minus_k']:.2f} above k)") + + +def choose_day_interval(times): + times = pd.to_datetime(times, utc=True) + total_days = (times.max() - times.min()).days + + if total_days < 10: + return 1 + elif total_days < 20: + return 2 + elif total_days < 40: + return 4 + elif total_days < 80: + return 7 + elif total_days < 150: + return 10 + elif total_days < 300: + return 15 + elif total_days < 600: + return 30 + else: + return 60 + +def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nrows=12,ncols=2,day_interval=None): + times = pd.to_datetime(times,utc=True) + channels = list(channels) + n_channels = len(channels) + + if day_interval is None: + day_interval = choose_day_interval(times) + + fig, axs = plt.subplots(nrows,ncols,figsize=(15,24),sharex=True) + axs = np.array(axs) + + for idx, ch in enumerate(channels): + r = idx//ncols + c = idx%ncols + ax = axs[r,c] + good_mask = ~flag[ch] + ax.scatter(times[good_mask], np.log10(snr_arr[ch][good_mask]), s=8,alpha=0.25, color="gray") + zex = np.abs(z_log[ch]) - k_list[ch] + zex = np.clip(zex,0,None) + sc = ax.scatter(times[flag[ch]], np.log10(snr_arr[ch][flag[ch]]), s=8,c=zex[flag[ch]], cmap="Reds") + cax = ax.inset_axes([1.02,0.1,0.05,0.8]) + plt.colorbar(sc,cax=cax) + ax.grid(alpha=0.4) + ax.text(0.85, 0.95, f"Ch {ch}", transform = ax.transAxes, ha = "left",va = "top", bbox = dict(boxstyle = "round, pad = 0.25", facecolor = "white", alpha = 0.8)) + + for idx in range(n_channels, nrows*ncols): + r = idx//ncols + c = idx%ncols + axs[r, c].set_visible(False) + + red = plt.cm.Reds(0.6) + legend_handles = [Line2D([0],[0],marker="o",color="none",markeredgecolor="gray",markerfacecolor="gray",markersize=6,label=r"$|z|\leq k$"), + Line2D([0],[0],marker="o",color="none",markeredgecolor=red,markerfacecolor=red,markersize=6,label=r"$|z|>k$")] + axs[0, 0].legend(handles = legend_handles, loc = "upper left") + + ticks_ax = axs[-1,0] + time_span = (times.max() - times.min()).total_seconds() / 86400.0 # in days + + if time_span < 1: + # Use 2h ticks if less than 1 day + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=2)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + elif time_span < 3: + # Use 6h ticks if less than 3 days + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=6)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + else: + # Use day ticks otherwise + ticks_ax.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d", tz = timezone.utc)) + + fig.autofmt_xdate() + + plt.subplots_adjust(bottom = 0.11, wspace = 0.38, left=0.08) + fig.supxlabel("Date [UTC]", x = 0.5, y = 0.06) + fig.supylabel(r"$\log_{10}(\mathrm{SNR})$", x = 0.02) + plt.savefig(os.path.join(save_location,f"snr_against_time_{station_id}_{run_label}.pdf")) + + +#### Vrms analysis functions #### +def calculate_vrms(trace_arr, event_info): '''Calculate Vrms for each channel and event according to trigger types.''' vrms_arr = np.std(trace_arr, axis=2) # (n_channels, n_events) @@ -667,7 +751,7 @@ def kde_modality(vrms_arr, channel_list, bandwidth=None, grid_points = 512, peak # Adjust prominence to be relative to the KDE range abs_prom = peak_prominence * (np.max(kde_values) - np.min(kde_values)) - peaks, properties = find_peaks(kde_values, prominence=abs_prom) + peaks, properties = find_peaks(kde_values, prominence=abs_prom, height=0.05*np.max(kde_values)) modality_dict[ch] = { "kde": kde, @@ -720,98 +804,117 @@ def tail_fraction_and_trimmed_skew_two_sided(vrms_arr, channel_list, lower_perce return tail_dict +def report_vrms_characteristics(modality_dict, tail_dict, channel_list): + for ch in channel_list: + # modality from KDE peak count + n_peaks = modality_dict[ch]["n_peaks"] + if n_peaks == 0: + modality = "flat/noisy" + elif n_peaks == 1: + modality = "unimodal" + elif n_peaks == 2: + modality = "bimodal" + else: + modality = f"multimodal ({n_peaks} peaks)" + + # tail + skewness characteristics + full_skew = tail_dict[ch]["full_skew"] + high_frac = tail_dict[ch]["high_tail_fraction"] + low_frac = tail_dict[ch]["low_tail_fraction"] + skew_trim_h = tail_dict[ch]["trimmed_skew_high"] + skew_trim_l = tail_dict[ch]["trimmed_skew_low"] + + # classify tail behavior + if 0 < high_frac < 0.01 and full_skew > 0.5 and not np.isnan(skew_trim_h): + tail_label = "rare high extremes" + tail_frac = high_frac + elif 0.01 <= high_frac < 0.05 and full_skew > 0: + tail_label = "moderate high skew" + tail_frac = high_frac + elif high_frac >= 0.05 and full_skew > 0: + tail_label = "bulk high skew" + tail_frac = high_frac + elif 0 < low_frac < 0.01 and full_skew < -0.5 and not np.isnan(skew_trim_l): + tail_label = "rare low extremes" + tail_frac = low_frac + elif 0.01 <= low_frac < 0.05 and full_skew < 0: + tail_label = "moderate low skew" + tail_frac = low_frac + elif low_frac >= 0.05 and full_skew < 0: + tail_label = "bulk low skew" + tail_frac = low_frac + else: + tail_label = "no significant tails" + tail_frac = None -def print_outlier_summary(outlier_details): - '''Print a summary of outlier events for each channel.''' - for ch in sorted(outlier_details.keys()): - entries = outlier_details[ch] - n_outliers = len(entries) - - if n_outliers == 0: - print(f"Channel {ch}: 0 outliers") - continue - - k_ch = entries[0]["k"] - print(f"Channel {ch}: {n_outliers} outliers (k = {k_ch:.2f})") + # output summary + if tail_frac is not None: + tail_label += f" (fraction: {tail_frac:.3f})" + print(f"Ch {ch:02d}: {modality} ({tail_label})") + else: + print(f"Ch {ch:02d}: {modality} ({tail_label})") - for e in entries: - print(f" - run {e['run']}, event {e['eventNumber']}, " - f"|z| = {e['z_abs']:.2f} (delta = {e['z_minus_k']:.2f} above k)") - -def choose_day_interval(times): - times = pd.to_datetime(times, utc=True) - total_days = (times.max() - times.min()).days +#### Glitching analysis functions #### +def binomtest_glitch_fraction(glitch_arr, channel_list, alpha=0.01): + '''Perform binomial test on glitch fractions for each channel (with p0=0.1).''' + glitch_info = {} + n_events = glitch_arr.shape[1] - if total_days < 10: - return 1 - elif total_days < 20: - return 2 - elif total_days < 40: - return 4 - elif total_days < 80: - return 7 - elif total_days < 150: - return 10 - elif total_days < 300: - return 15 - elif total_days < 600: - return 30 - else: - return 60 + for ch in channel_list: + glitch_ch = glitch_arr[ch] + n_glitches = np.sum(glitch_ch > 0) -def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nrows=12,ncols=2,day_interval=None): - times = pd.to_datetime(times,utc=True) - channels = list(channels) - n_channels = len(channels) + result = binomtest(n_glitches, n_events, p=0.1, alternative="greater") + pval = result.pvalue + statistic = result.statistic + confidence_interval = result.proportion_ci(confidence_level=0.99) - if day_interval is None: - day_interval = choose_day_interval(times) + if pval > alpha: + validation = "NO EXCESSIVE GLITCHING" + else: + if confidence_interval.low > 0.3: + validation = "STRONG EXCESSIVE GLITCHING" + elif confidence_interval.low > 0.2: + validation = "MODERATE EXCESSIVE GLITCHING" + else: + validation = "WEAK EXCESSIVE GLITCHING" + + glitch_info[ch] = { + "n_glitches": int(n_glitches), + "n_events": int(n_events), + "pval": float(pval), + "confidence_interval": (float(confidence_interval.low), float(confidence_interval.high)), + "glitch_fraction": float(n_glitches / n_events), + "validation": validation, + } - fig, axs = plt.subplots(nrows,ncols,figsize=(15,24),sharex=True) - axs = np.array(axs) + return glitch_info - for idx, ch in enumerate(channels): - r = idx//ncols - c = idx%ncols - ax = axs[r,c] - good_mask = ~flag[ch] - ax.scatter(times[good_mask], np.log10(snr_arr[ch][good_mask]), s=8,alpha=0.25, color="gray") - zex = np.abs(z_log[ch]) - k_list[ch] - zex = np.clip(zex,0,None) - sc = ax.scatter(times[flag[ch]], np.log10(snr_arr[ch][flag[ch]]), s=8,c=zex[flag[ch]], cmap="Reds") - cax = ax.inset_axes([1.02,0.1,0.05,0.8]) - plt.colorbar(sc,cax=cax) - ax.grid(alpha=0.4) - ax.text(0.85, 0.95, f"Ch {ch}", transform = ax.transAxes, ha = "left",va = "top", bbox = dict(boxstyle = "round, pad = 0.25", facecolor = "white", alpha = 0.8)) - - for idx in range(n_channels, nrows*ncols): - r = idx//ncols - c = idx%ncols - axs[r, c].set_visible(False) - red = plt.cm.Reds(0.6) - legend_handles = [Line2D([0],[0],marker="o",color="none",markeredgecolor="gray",markerfacecolor="gray",markersize=6,label=r"$|z|\leq k$"), - Line2D([0],[0],marker="o",color="none",markeredgecolor=red,markerfacecolor=red,markersize=6,label=r"$|z|>k$")] - axs[0, 0].legend(handles = legend_handles, loc = "upper left") +#### Debug plots #### +def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order, save_location, station_id, run_label, bins=30): + fig, axes = plt.subplots(1, 4, figsize=(20, 5), sharey=True) + bands = [("80–120 MHz (FORCE Trigger)", ratio_arr_gal), + ("360–380 MHz (FORCE Trigger)", ratio_arr_360), + ("482–485 MHz (FORCE Trigger)", ratio_arr_482), + ("240–272 MHz (FORCE Trigger)", ratio_arr_240)] - ticks_ax = axs[-1,0] - time_span = (times.max() - times.min()).total_seconds() / 86400.0 # in days + for ax, (title, ratio_list) in zip(axes, bands): + for ch, r in zip(channels_order, ratio_list): + r = np.asarray(r) + ax.hist(np.log10(r), bins=bins, histtype="step", linewidth=1.3, label=f"Ch {ch}", alpha=0.8) - if time_span < 3: - # Use hourly ticks if less than 3 days - ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=1)) - ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) - else: - # Use day ticks otherwise - ticks_ax.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval)) - ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d", tz = timezone.utc)) + ax.set_title(title) + ax.set_xlabel("log10(R)") + ax.grid(True, alpha=0.3) - fig.autofmt_xdate() + axes[0].set_ylabel("Counts") - plt.subplots_adjust(bottom = 0.11, wspace = 0.38, left=0.08) - fig.supxlabel("Date [UTC]", x = 0.5, y = 0.06) - fig.supylabel(r"$\log_{10}(\mathrm{SNR})$", x = 0.02) - plt.savefig(os.path.join(save_location,f"snr_against_time_{station_id}_{run_label}.pdf")) + handles, labels = axes[0].get_legend_handles_labels() + fig.legend(handles, labels, loc="upper right", ncol=8, frameon=True) + plt.tight_layout() + fig.savefig(os.path.join(save_location,f"debug_amplitude_ratios_force_trigger_{station_id}_{run_label}.pdf",)) + plt.close(fig) def debug_plot_snr_distribution(log_snr_arr, channel_list, save_location, station_id, run_label, bins=30): fig, ax = plt.subplots(figsize=(10, 6)) @@ -847,6 +950,47 @@ def debug_plot_z_score_snr(z_score_arr, channel_list, save_location, station_id, fig.savefig(os.path.join(save_location,f"debug_z_score_snr_force_trigger_{station_id}_{run_label}.pdf",)) plt.close(fig) +def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_id, run_label, trigger_label, save_location, n_rows=12, n_cols=2): + fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 36)) + axes = axes.flatten() + + for idx, ch in enumerate(channel_list): + ax = axes[idx] + info = modality_dict[ch] + vrms = vrms_arr[ch] + vrms = vrms[np.isfinite(vrms)] + vrms_grid = info["vrms_grid"] + kde_values = info["kde_values"] + peaks = info["peaks"] + n_peaks = info["n_peaks"] + + if n_peaks == 0: + modality = "flat/noisy" + elif n_peaks == 1: + modality = "unimodal" + elif n_peaks == 2: + modality = "bimodal" + else: + modality = f"multimodal ({n_peaks})" + + # histogram + ax.hist(vrms, bins=30, density=True, alpha=0.3, color="gray") + # kde curve + ax.plot(vrms_grid, kde_values, color="blue", lw=1.5) + # peaks + if len(peaks) > 0: + ax.plot(vrms_grid[peaks], kde_values[peaks], "ro", markersize=5) + + ax.set_title(f"Ch {ch}: {modality}") + ax.set_xlabel("Vrms Values [V]") + ax.set_ylabel("KDE Density") + + for i in range(len(channel_list), n_rows * n_cols): + axes[i].axis("off") + + plt.tight_layout() + plt.savefig(os.path.join(save_location,f"debug_vrms_hist_kde_density_peaks_{station_id}_{run_label}_{trigger_label}.pdf",)) + if __name__ == "__main__": @@ -906,7 +1050,7 @@ def debug_plot_z_score_snr(z_score_arr, channel_list, save_location, station_id, os.makedirs(save_location, exist_ok=True) # Read data - spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend) + spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info, glitch_arr = read_rnog_data(station_id, run_numbers, backend=backend) # Normalize surface channel spectra norm_spec_arr, scale_factors = normalize_channels(spec_arr, freqs, downward_channels, upward_channels) @@ -962,10 +1106,13 @@ def debug_plot_z_score_snr(z_score_arr, channel_list, save_location, station_id, times_force = times[force_mask] log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list = calculate_statistics_log_snr(snr_arr_force) - - z_score_arr_log_snr = calculate_z_score_snr(log_snr_arr, log_mean_list, log_std_list, all_channels) + ref_log_mean_filename = f"station_{station_id}_ref_log_mean_snr.json" + ref_log_std_filename = f"station_{station_id}_ref_log_std_snr.json" + ref_log_mean_list = load_values_json(ref_log_mean_filename) + ref_log_std_list = load_values_json(ref_log_std_filename) + z_score_arr_log_snr = calculate_z_score_snr(log_snr_arr, ref_log_mean_list, ref_log_std_list, all_channels) k_values_filename_snr = f"station_{station_id}_k_ref_values_snr.json" - k_values_log_snr = load_k_values_json(k_values_filename_snr) + k_values_log_snr = load_values_json(k_values_filename_snr) flag_outliers_snr = outlier_flag(z_score_arr_log_snr, k_values_log_snr, all_channels) outlier_details_snr = find_outlier_details(z_score_arr_log_snr, k_values_log_snr, flag_outliers_snr, event_info_force, all_channels) @@ -974,8 +1121,63 @@ def debug_plot_z_score_snr(z_score_arr, channel_list, save_location, station_id, day_interval = choose_day_interval(times) plot_snr_against_time(station_id, times_force, snr_arr_force, flag_outliers_snr, z_score_arr_log_snr, k_values_log_snr, all_channels, nrows=12, ncols=2, day_interval=day_interval) + # Vrms analysis + vrms_arr, vrms_arr_force, vrms_arr_radiant0, vrms_arr_radiant1, vrms_arr_lt = calculate_vrms(trace_arr, event_info) + + modality_dict_force = kde_modality(vrms_arr_force, all_channels, bandwidth=None, grid_points=512, peak_prominence=0.01) + tail_dict_force = tail_fraction_and_trimmed_skew_two_sided(vrms_arr_force, all_channels, lower_percentile=25, upper_percentile=75, extreme_k=3) + print("\nVrms characteristics for FORCE trigger events:") + if len(vrms_arr_force[1]) < 100: + print(f"!!!! Warning: FORCE trigger has less than 100 valid Vrms entries ({len(vrms_arr_force)}). Results for the Vrms statistics may be unreliable. !!!!") + report_vrms_characteristics(modality_dict_force, tail_dict_force, all_channels) + + modality_dict_radiant0 = kde_modality(vrms_arr_radiant0, all_channels, bandwidth=None, grid_points=512, peak_prominence=0.01) + tail_dict_radiant0 = tail_fraction_and_trimmed_skew_two_sided(vrms_arr_radiant0, all_channels, lower_percentile=25, upper_percentile=75, extreme_k=3) + print("\nVrms characteristics for RADIANT0 trigger events:") + if len(vrms_arr_radiant0[1]) < 100: + print(f"!!!! Warning: RADIANT0 trigger has less than 100 valid Vrms entries ({len(vrms_arr_radiant0)}). Results for the Vrms statistics may be unreliable. !!!!") + report_vrms_characteristics(modality_dict_force, tail_dict_force, all_channels) + report_vrms_characteristics(modality_dict_radiant0, tail_dict_radiant0, all_channels) + + modality_dict_radiant1 = kde_modality(vrms_arr_radiant1, all_channels, bandwidth=None, grid_points=512, peak_prominence=0.01) + tail_dict_radiant1 = tail_fraction_and_trimmed_skew_two_sided(vrms_arr_radiant1, all_channels, lower_percentile=25, upper_percentile=75, extreme_k=3) + print("\nVrms characteristics for RADIANT1 trigger events:") + if len(vrms_arr_radiant1[1]) < 100: + print(f"!!!! Warning: RADIANT1 trigger has less than 100 valid Vrms entries ({len(vrms_arr_radiant1)}). Results for the Vrms statistics may be unreliable. !!!!") + report_vrms_characteristics(modality_dict_radiant1, tail_dict_radiant1, all_channels) + + modality_dict_lt = kde_modality(vrms_arr_lt, all_channels, bandwidth=None, grid_points=512, peak_prominence=0.01) + tail_dict_lt = tail_fraction_and_trimmed_skew_two_sided(vrms_arr_lt, all_channels, lower_percentile=25, upper_percentile=75, extreme_k=3) + print("\nVrms characteristics for LT trigger events:") + if len(vrms_arr_lt[1]) < 100: + print(f"!!!! Warning: LT trigger has less than 100 valid Vrms entries ({len(vrms_arr_lt)}). Results for the Vrms statistics may be unreliable. !!!!") + report_vrms_characteristics(modality_dict_lt, tail_dict_lt, all_channels) + + # The Vrms statistics can be misleading (especially for low event number) so the debugging plots are always generated + debug_plot_vrms_distribution(vrms_arr_force, modality_dict_force, channel_list=all_channels, station_id=station_id, run_label=run_label, trigger_label="force", save_location=save_location, n_rows=12, n_cols=2) + debug_plot_vrms_distribution(vrms_arr_radiant0, modality_dict_radiant0, channel_list=all_channels, station_id=station_id, run_label=run_label, trigger_label="radiant0", save_location=save_location, n_rows=12, n_cols=2) + debug_plot_vrms_distribution(vrms_arr_radiant1, modality_dict_radiant1, channel_list=all_channels, station_id=station_id, run_label=run_label, trigger_label="radiant1", save_location=save_location, n_rows=12, n_cols=2) + debug_plot_vrms_distribution(vrms_arr_lt, modality_dict_lt, channel_list=all_channels, station_id=station_id, run_label=run_label, trigger_label="lt", save_location=save_location, n_rows=12, n_cols=2) + + # Glitching analysis + glitch_info = binomtest_glitch_fraction(glitch_arr, all_channels, alpha=0.01) + print("\nGlitching analysis results:") + for ch in all_channels: + info = glitch_info[ch] + print( + f"Channel {ch:2d} | " + f"n_glitches: {info['n_glitches']:4d} | " + f"n_events: {info['n_events']:<4d} | " + f"(frac={info['glitch_fraction']:.3f}) | " + f"p={info['pval']:.2e} | " + f"CI99%={info['confidence_interval']} | " + f"{info['validation']}" + ) + # Debug plots if args.debug_plot: debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) debug_plot_snr_distribution(log_snr_arr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) - debug_plot_z_score_snr(z_score_arr_log_snr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) \ No newline at end of file + debug_plot_z_score_snr(z_score_arr_log_snr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) + + \ No newline at end of file diff --git a/rnog_analysis_tools/data_monitoring/station_14_k_ref_values_snr.json b/rnog_analysis_tools/data_monitoring/station_14_k_ref_values_snr.json new file mode 100644 index 0000000..7a4a669 --- /dev/null +++ b/rnog_analysis_tools/data_monitoring/station_14_k_ref_values_snr.json @@ -0,0 +1,26 @@ +{ + "0": 3.4629415672776167, + "1": 3.5406871937295157, + "2": 4.246244014825565, + "3": 3.672415668446294, + "4": 4.094566736622663, + "5": 3.6581208290632143, + "6": 3.440977286232869, + "7": 3.8994273526892678, + "8": 3.7417573367175354, + "9": 3.808461600655682, + "10": 4.076200180139419, + "11": 3.9641497414678657, + "12": 3.804236990514898, + "13": 3.825948999629254, + "14": 3.8667461848225826, + "15": 3.5472320507700044, + "16": 4.471159862666153, + "17": 3.5691470733333484, + "18": 4.035209249337079, + "19": 3.8717437067333034, + "20": 3.5179733714101973, + "21": 3.893881748029506, + "22": 4.04091017938111, + "23": 3.790197046702542 +} \ No newline at end of file diff --git a/rnog_analysis_tools/data_monitoring/station_14_ref_log_mean_snr.json b/rnog_analysis_tools/data_monitoring/station_14_ref_log_mean_snr.json new file mode 100644 index 0000000..9c12ffd --- /dev/null +++ b/rnog_analysis_tools/data_monitoring/station_14_ref_log_mean_snr.json @@ -0,0 +1,26 @@ +{ + "0": 0.570153460733576, + "1": 0.5983102750670379, + "2": 0.5621653627696398, + "3": 0.5639886445582059, + "4": 0.5838963233587718, + "5": 0.5659645577104686, + "6": 0.5619701665484542, + "7": 0.561757791919875, + "8": 0.5557990116357686, + "9": 0.5582766314874003, + "10": 0.5513242727617559, + "11": 0.5578905303442172, + "12": 0.5595357051234465, + "13": 0.5619655262001262, + "14": 0.570684207905447, + "15": 0.5671872278899981, + "16": 0.5622529386994631, + "17": 0.5680683100381787, + "18": 0.5586113204599104, + "19": 0.5598134402261741, + "20": 0.5608754315376695, + "21": 0.5716030512212262, + "22": 0.5703699054375464, + "23": 0.554889692787254 +} \ No newline at end of file diff --git a/rnog_analysis_tools/data_monitoring/station_14_ref_log_std_snr.json b/rnog_analysis_tools/data_monitoring/station_14_ref_log_std_snr.json new file mode 100644 index 0000000..607928d --- /dev/null +++ b/rnog_analysis_tools/data_monitoring/station_14_ref_log_std_snr.json @@ -0,0 +1,26 @@ +{ + "0": 0.0375959601805741, + "1": 0.04191032890183429, + "2": 0.037356688322661404, + "3": 0.036420229254128855, + "4": 0.03868338583436355, + "5": 0.03763507487611656, + "6": 0.03836545002072578, + "7": 0.03774663771504275, + "8": 0.03596666164221151, + "9": 0.0379168838108702, + "10": 0.03632514803263139, + "11": 0.037057883093033345, + "12": 0.036779644978815695, + "13": 0.03768650455239701, + "14": 0.03810952174754539, + "15": 0.037920302646738784, + "16": 0.038240663023588216, + "17": 0.037174254561981264, + "18": 0.038908103823895486, + "19": 0.038026204837319826, + "20": 0.03726881424439382, + "21": 0.03850896793558693, + "22": 0.03750933898249592, + "23": 0.037658968552514285 +} \ No newline at end of file From 2a8a1f20b487dd090e45dccf2a6b358bc5baed48 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Sun, 11 Jan 2026 17:07:40 +0100 Subject: [PATCH 10/12] trigger rate/threshold plot and vrms against time plot --- .../science_verification_analysis.py | 169 +++++++++++++++++- 1 file changed, 167 insertions(+), 2 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index ceae8f0..5454868 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -697,11 +697,11 @@ def plot_snr_against_time(station_id,times,snr_arr,flag,z_log,k_list,channels,nr if time_span < 1: # Use 2h ticks if less than 1 day - ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=2)) + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=6)) ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) elif time_span < 3: # Use 6h ticks if less than 3 days - ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=6)) + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=12)) ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) else: # Use day ticks otherwise @@ -854,6 +854,167 @@ def report_vrms_characteristics(modality_dict, tail_dict, channel_list): else: print(f"Ch {ch:02d}: {modality} ({tail_label})") +def plot_vrms_values_against_time(times, vrms_arr_all, channel_list, station_id, run_label, save_location, n_rows = 12, n_cols = 2, day_interval=5): + '''Plot Vrms distributions for different trigger types.''' + + force_mask = event_info["triggerType"] == "FORCE" + radiant0_mask = event_info["triggerType"] == "RADIANT0" + radiant1_mask = event_info["triggerType"] == "RADIANT1" + lt_mask = event_info["triggerType"] == "LT" + + trigger_masks = {"FORCE": force_mask, + "RADIANT0": radiant0_mask, + "RADIANT1": radiant1_mask, + "LT": lt_mask,} + + trigger_colors = {"FORCE": "tab:blue", + "RADIANT0": "tab:orange", + "RADIANT1": "tab:green", + "LT": "tab:red",} + + times = np.asarray(times) + vrms_arr_all = np.asarray(vrms_arr_all) + + n_channels = len(channel_list) + + fig, axs = plt.subplots(n_rows, n_cols, figsize=(16, 36), sharex=True, squeeze=False) + axs = axs.ravel() + + legend_handles = {} + + for idx, ch in enumerate(channel_list): + ax = axs[idx] + vrms_ch = vrms_arr_all[ch] + + for trig_name, trig_mask in trigger_masks.items(): + times_trig = times[trig_mask] + vrms_trig = vrms_ch[trig_mask] + + scatter = ax.scatter(times_trig, vrms_trig, s=8, alpha=0.7, label=trig_name, color=trigger_colors[trig_name]) + if trig_name not in legend_handles: + legend_handles[trig_name] = scatter + + + ax.set_title(f"Channel {ch}") + ax.grid(alpha=0.4) + + for j in range(len(channel_list), len(axs)): + axs[j].set_visible(False) + + ticks_ax = axs[-1] + time_span = times.max() - times.min() + time_span_days = time_span / np.timedelta64(1, "D") + + if time_span_days < 1: + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=6)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + elif time_span_days < 3: + ticks_ax.xaxis.set_major_locator(mdates.HourLocator(interval=12)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + else: + ticks_ax.xaxis.set_major_locator(mdates.DayLocator(interval=day_interval)) + ticks_ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d", tz=timezone.utc)) + + fig.legend(handles=[legend_handles[k] for k in trigger_masks.keys()], labels=list(trigger_masks.keys()), + loc="lower center", ncol=4, frameon=True, markerscale=2, bbox_to_anchor=(0.5, 0.005)) + + fig.supxlabel("Date [UTC]", x=0.5, y=0.02) + fig.supylabel(r"$V_\mathrm{rms}$ [V]", x=0.02) + plt.subplots_adjust(bottom=0.05, wspace=0.2, hspace=0.4, left=0.1) + plt.savefig(os.path.join(save_location, f"vrms_against_time_{station_id}_{run_label}.pdf")) + plt.close(fig) + +#### Trigger analysis (adapted from the plot_trigger() function from analyze_run.py) #### +def compute_radiant_thresholds(event_info, down_channels, up_channels): + radiant = event_info["radiantThrs"] + + downward = radiant[:, down_channels].mean(axis=1) + upward = radiant[:, up_channels].mean(axis=1) + low_trig = event_info["lowTrigThrs"].mean(axis=1) + + return upward, downward, low_trig + +def plot_trigger_rate_with_thresholds(station_id, event_info, down_channels, up_channels, run_label, day_interval, bin_width_initial=300, max_bins=800, save_location=None): + + trigger_times = np.asarray(event_info["triggerTime"]) + readout_times = np.asarray(event_info["readoutTime"]) + + run_duration = trigger_times.max() - trigger_times.min() + run_duration_readout = readout_times.max() - readout_times.min() + + bin_width = bin_width_initial + nbins = int(run_duration // bin_width) + if nbins > max_bins: + bin_width = 3600 # 1 hour + nbins = int(run_duration // bin_width) + + times = np.array([datetime.datetime.fromtimestamp(ts, tz=datetime.timezone.utc) for ts in trigger_times]) + time_span = times.max() - times.min() + time_span_days = time_span.total_seconds() / 86400.0 # convert to days + + fig, ax_rate = plt.subplots(figsize=(12, 6)) + + ax_rate.grid(True, which="both", ls="--", lw=0.35, alpha=0.5) + + weights_total = np.full(times.shape[0], 1.0 / bin_width) + _, bin_edges, _ = ax_rate.hist(times, bins=nbins, weights=weights_total, histtype="step", color="k", label="Total Rate",) + + triggers = np.unique(event_info["triggerType"]) + trigger_colors = { + "FORCE": "tab:blue", + "RADIANT0": "tab:orange", + "RADIANT1": "tab:green", + "LT": "tab:red",} + + for trigger in triggers: + mask = event_info["triggerType"] == trigger + n_mask = mask.sum() + if n_mask == 0: + continue + + color = trigger_colors.get(trigger) + + ax_rate.hist(times[mask], bins=bin_edges, weights=np.full(n_mask, 1.0 / bin_width), histtype="step", lw=1.1, label=str(trigger), color=color,) + + ax_rate.set_ylabel("Trigger Rate [Hz]") + ax_rate.set_yscale("log") + + if time_span_days < 1: + # Use 6h ticks if less than 1 day + ax_rate.xaxis.set_major_locator(mdates.HourLocator(interval=6)) + ax_rate.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + elif time_span_days < 3: + # Use 12h ticks if less than 3 days + ax_rate.xaxis.set_major_locator(mdates.HourLocator(interval=12)) + ax_rate.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d\n%H:%M", tz=timezone.utc)) + else: + # Use day ticks otherwise + ax_rate.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval)) + ax_rate.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d", tz = timezone.utc)) + + ax_rate.tick_params(axis="x", rotation=25) + ax_rate.set_xlabel("Time (UTC)") + + upward, downward, lt = compute_radiant_thresholds(event_info, down_channels, up_channels) + scale = 2.5 / 16777215.0 # convert register to Volts + + ax_thr = ax_rate.twinx() + + ax_thr.plot(times, upward * scale, ls="--", lw=2, color="darkmagenta", label="RADIANT Up (avg)",) + ax_thr.plot(times, downward * scale, ls="--", lw=2, color="darkgreen", label="RADIANT Down (avg)",) + ax_thr.plot(times, lt * scale, ls="--", lw=2, color="mediumblue", label="LT (avg)",) + + ax_thr.set_ylabel("Threshold [V]") + + h1, l1 = ax_rate.get_legend_handles_labels() + h2, l2 = ax_thr.get_legend_handles_labels() + ax_rate.legend(h1 + h2, l1 + l2, loc="upper left", bbox_to_anchor=(1.1, 1), borderaxespad=0., frameon=True, framealpha=1.0,) + + fig.tight_layout() + fig.savefig(os.path.join(save_location, f"trigger_rate_with_thresholds_{station_id}_{run_label}.pdf")) + + return fig, ax_rate, ax_thr + #### Glitching analysis functions #### def binomtest_glitch_fraction(glitch_arr, channel_list, alpha=0.01): '''Perform binomial test on glitch fractions for each channel (with p0=0.1).''' @@ -1152,6 +1313,7 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ if len(vrms_arr_lt[1]) < 100: print(f"!!!! Warning: LT trigger has less than 100 valid Vrms entries ({len(vrms_arr_lt)}). Results for the Vrms statistics may be unreliable. !!!!") report_vrms_characteristics(modality_dict_lt, tail_dict_lt, all_channels) + plot_vrms_values_against_time(times, vrms_arr, all_channels, station_id, run_label, save_location, n_rows=12, n_cols=2, day_interval=day_interval) # The Vrms statistics can be misleading (especially for low event number) so the debugging plots are always generated debug_plot_vrms_distribution(vrms_arr_force, modality_dict_force, channel_list=all_channels, station_id=station_id, run_label=run_label, trigger_label="force", save_location=save_location, n_rows=12, n_cols=2) @@ -1174,6 +1336,9 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ f"{info['validation']}" ) + # Trigger rate with thresholds plot + fig_trigger, ax_rate, ax_thr = plot_trigger_rate_with_thresholds(station_id, event_info, downward_channels, upward_channels, run_label, day_interval, bin_width_initial=300, max_bins=800, save_location=save_location) + # Debug plots if args.debug_plot: debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) From 6aef2094af52d384291c4cd4b269492758659fb2 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Sun, 11 Jan 2026 20:36:18 +0100 Subject: [PATCH 11/12] block offset + glitching 99% quantile --- .../science_verification_analysis.py | 122 +++++++++++++++++- 1 file changed, 119 insertions(+), 3 deletions(-) diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index 5454868..ab2a07f 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -29,7 +29,8 @@ from datetime import timezone from scipy.stats import gaussian_kde, skew, binomtest from scipy.signal import find_peaks - +from matplotlib import colors, cm +from NuRadioReco.modules.RNO_G.channelBlockOffsetFitter import fit_block_offsets ''' @@ -1051,6 +1052,117 @@ def binomtest_glitch_fraction(glitch_arr, channel_list, alpha=0.01): return glitch_info +def glitching_violin_plot(glitch_arr, channel_list, station_id, run_label, save_location): + data = [glitch_arr[ch] for ch in channel_list] + means = np.array([np.mean(glitch_arr[ch]) for ch in channel_list]) + + fig, ax = plt.subplots(figsize=(12, 10)) + + parts = ax.violinplot(data, positions=channel_list, showextrema=True, showmedians=True, vert=False, side="high", widths=1.8,) + + norm = colors.Normalize(vmin=np.min(means), vmax=np.max(means), clip=False) + sm = cm.ScalarMappable(norm=norm, cmap="Blues") + + for idx, pc in enumerate(parts["bodies"]): + pc.set_facecolor(sm.to_rgba(means[idx])) + pc.set_alpha(0.5) + pc.set_edgecolor("k") + + parts["cmins"].set_linewidth(0.2) + parts["cmaxes"].set_linewidth(0.2) + parts["cbars"].set_linewidth(0.5) + parts["cmins"].set_color("k") + parts["cmaxes"].set_color("k") + parts["cbars"].set_color("k") + parts["cmedians"].set_color("k") + parts["cmedians"].set_linewidth(1) + + cb = plt.colorbar(sm, ax=ax, pad=0.02) + cb.set_label("Mean test statistics") + + ax.set_xlabel("Glitching test statistics") + ax.set_ylabel("Channel") + ax.set_yticks(channel_list) + ax.grid(True, alpha=0.3) + ax.set_ylim(min(channel_list) - 1, max(channel_list) + 1) + + plt.tight_layout() + plt.savefig(os.path.join(save_location, f"glitching_violin_plot_{station_id}_{run_label}.pdf")) + plt.close(fig) + +def choose_bin_size(times): + total_hours = (times.max() - times.min()).total_seconds() / 3600.0 + if total_hours < 12: + return "30min" + elif total_hours < 24: + return "1h" + elif total_hours < 3 * 24: + return "2h" + elif total_hours < 7 * 24: + return "6h" + elif total_hours < 30 * 24: + return "12h" + else: + return "24h" + +def plot_glitch_q99_over_time(times, glitch_arr, channels, station_id, run_label, save_location): + times = pd.to_datetime(times) + df = pd.DataFrame(glitch_arr.T, index=times, columns=channels) + + bin_rule = choose_bin_size(times) + q99 = df.resample(bin_rule).quantile(0.99) + + fig, ax = plt.subplots(figsize=(12, 5)) + + for ch in channels: + ax.plot(q99.index, q99[ch], marker=".", linestyle="-", label=f"ch {ch}") + + ax.set_xlabel("Date [UTC]") + ax.set_ylabel(f"99% quantile glitching ts ({bin_rule} bins)") + ax.grid(True, alpha=0.3) + ax.legend(ncol=3) + + plt.tight_layout() + plt.savefig(os.path.join(save_location, f"glitch_q99_{station_id}_{run_label}.pdf")) + plt.close(fig) + +#### Block offsets #### +def plot_block_offsets_violin(trace_arr, event_info, channel_list, station_id, run_label, save_location): + force_mask = event_info["triggerType"] == "FORCE" + trace_arr_force = trace_arr[:, force_mask, :] + n_force_events = trace_arr_force.shape[1] + + fit_blocks_flat = [] + + for ch in channel_list: + traces_force_ch = trace_arr_force[ch] + offsets_ch = np.array([fit_block_offsets(trace, sampling_rate=2.4) for trace in traces_force_ch]) + fit_blocks_flat.append(offsets_ch.ravel()) + + fit_blocks_flat = np.array(fit_blocks_flat) + + fig, ax = plt.subplots(figsize=(12, 6)) + parts = ax.violinplot(fit_blocks_flat.T, positions=channel_list, showextrema=True, showmedians=True, vert=False, side="high", widths=1.8,) + + for pc in parts["bodies"]: + pc.set_facecolor("lightblue") + pc.set_edgecolor("black") + pc.set_alpha(0.7) + + parts["cmedians"].set_color("black") + parts["cmedians"].set_linewidth(1.8) + + ax.set_xlabel("Fitted block offset [V]") + ax.set_ylabel("Channel") + ax.grid(True, alpha=0.3) + + ax.plot(np.nan, np.nan, label=f"{n_force_events} FORCE triggers", color="k") + ax.plot(np.nan, np.nan, label="block-offset fit", color="C0") + ax.legend(loc="best") + + plt.tight_layout() + plt.savefig(os.path.join(save_location, f"block_offset_violin_{station_id}_{run_label}.pdf")) + plt.close(fig) #### Debug plots #### def debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order, save_location, station_id, run_label, bins=30): @@ -1335,14 +1447,18 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ f"CI99%={info['confidence_interval']} | " f"{info['validation']}" ) + glitching_violin_plot(glitch_arr, all_channels, station_id, run_label, save_location) + plot_glitch_q99_over_time(np.array(times), glitch_arr, all_channels, station_id, run_label, save_location) # Trigger rate with thresholds plot fig_trigger, ax_rate, ax_thr = plot_trigger_rate_with_thresholds(station_id, event_info, downward_channels, upward_channels, run_label, day_interval, bin_width_initial=300, max_bins=800, save_location=save_location) + # Block offsets plot + plot_block_offsets_violin(trace_arr, event_info, all_channels, station_id, run_label, save_location) + # Debug plots if args.debug_plot: debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) debug_plot_snr_distribution(log_snr_arr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) - debug_plot_z_score_snr(z_score_arr_log_snr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) - + debug_plot_z_score_snr(z_score_arr_log_snr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) \ No newline at end of file From 1799a564f8fe0dd90359a33aae10b8e5fe4ea5e0 Mon Sep 17 00:00:00 2001 From: Zeynep Su Selcuk Date: Mon, 2 Feb 2026 21:41:41 +0100 Subject: [PATCH 12/12] summary csv --- ...ected_values.py => expected_snr_values.py} | 3 +- .../science_verification_analysis.py | 347 +++++++++++++++--- 2 files changed, 307 insertions(+), 43 deletions(-) rename rnog_analysis_tools/data_monitoring/{expected_values.py => expected_snr_values.py} (96%) diff --git a/rnog_analysis_tools/data_monitoring/expected_values.py b/rnog_analysis_tools/data_monitoring/expected_snr_values.py similarity index 96% rename from rnog_analysis_tools/data_monitoring/expected_values.py rename to rnog_analysis_tools/data_monitoring/expected_snr_values.py index 96d8e32..8a16cb2 100644 --- a/rnog_analysis_tools/data_monitoring/expected_values.py +++ b/rnog_analysis_tools/data_monitoring/expected_snr_values.py @@ -120,7 +120,8 @@ def save_values_json(values_log, filename): argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") argparser.add_argument("-ex", "--exclude-runs", nargs="+", type=int, default=[], metavar="RUN", help="Run number(s) to exclude, e.g. --exclude-runs 1005 1010") - argparser.add_argument("--save_values", action="store_true", help="If set, will save the reference values to separate JSON files.") + argparser.add_argument("--save_values", action="store_true", help="If set, will save the snr reference values to separate JSON files in the script directory.") + argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") run_selection = argparser.add_mutually_exclusive_group(required=True) run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", diff --git a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py index ab2a07f..1dc3681 100644 --- a/rnog_analysis_tools/data_monitoring/science_verification_analysis.py +++ b/rnog_analysis_tools/data_monitoring/science_verification_analysis.py @@ -31,7 +31,8 @@ from scipy.signal import find_peaks from matplotlib import colors, cm from NuRadioReco.modules.RNO_G.channelBlockOffsetFitter import fit_block_offsets - +import builtins +import csv ''' This module can be used to test if the stations are working as expected. @@ -129,6 +130,27 @@ 'savefig.bbox': 'tight', }) +_print = builtins.print +_log_file = None + +def start_print_logging(path): + global _log_file + _log_file = open(path, "w") + + def logged_print(*args, **kwargs): + _print(*args, **kwargs) # still print to terminal + if _log_file is not None: + _print(*args, **kwargs, file=_log_file) + + builtins.print = logged_print + +def stop_print_logging(): + global _log_file + if _log_file is not None: + _log_file.close() + _log_file = None + builtins.print = _print + def convert_events_information(event_info, convert_to_arrays=True): data = defaultdict(list) @@ -390,14 +412,14 @@ def excess_info_from_ratio(ratio_arr, alpha=0.01, freq_range_str=""): print(f"Binomial test for frequency range {freq_range_str}: k={k}, n={n}, pval={pval:.3e}, statistic={statistic:.3f}, 99% CI={confidence_interval}") if pval > alpha: - validation = "NO EXCESS (sign-test)" + validation = "NO EXCESS" else: if confidence_interval.low > 0.75: - validation = "STRONG EXCESS (sign-test)" + validation = "STRONG EXCESS" elif confidence_interval.low > 0.55: - validation = "MODERATE EXCESS (sign-test)" + validation = "MODERATE EXCESS" else: - validation = "WEAK EXCESS (sign-test)" + validation = "WEAK EXCESS" return { "median_log_ratio": median_log_ratio, @@ -420,7 +442,7 @@ def validate_excess_in_bands(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_ "freq_240_272MHz": freq240_info } -def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location, run_label): +def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freqs, upward_channels, downward_channels, save_location, run_label, trigger_label): '''Plot time-integrated surface channel spectra.''' plt.figure(figsize=(10, 6)) for ch in upward_channels: @@ -441,7 +463,7 @@ def plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr, freq edgecolor="black") plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) + plt.savefig(os.path.join(save_location, f"{trigger_label}_time_integrated_surface_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, freqs, upward_channels, downward_channels, save_location, run_label): @@ -503,7 +525,7 @@ def plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr, f plt.tight_layout() plt.savefig(os.path.join(save_location, f"time_integrated_surface_spectra_normalized_force_trigger_{station_id}_{run_label}.pdf")) -def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels, hpol_channels, save_location, run_label): +def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels, hpol_channels, save_location, run_label, trigger_label): '''Plot time-integrated deep channel spectra.''' plt.figure(figsize=(10, 6)) for ch in vpol_channels: @@ -524,7 +546,7 @@ def plot_time_integrated_deep_spectra(station_id, spec_arr, freqs, vpol_channels edgecolor="black") plt.grid() plt.tight_layout() - plt.savefig(os.path.join(save_location, f"time_integrated_deep_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) + plt.savefig(os.path.join(save_location, f"{trigger_label}_time_integrated_deep_spectra_unnormalized_force_trigger_{station_id}_{run_label}.pdf")) #### SNR analysis functions #### def calculate_statistics_log_snr(snr_arr): @@ -765,7 +787,7 @@ def kde_modality(vrms_arr, channel_list, bandwidth=None, grid_points = 512, peak return modality_dict -def tail_fraction_and_trimmed_skew_two_sided(vrms_arr, channel_list, lower_percentile=25, upper_percentile=75, extreme_k=3): +def tail_fraction_and_trimmed_skew_two_sided(vrms_arr, channel_list, lower_percentile=25, upper_percentile=75, extreme_k=2): '''Calculate tail fraction and two-sided trimmed skewness for Vrms distributions.''' tail_dict = {} @@ -807,7 +829,6 @@ def tail_fraction_and_trimmed_skew_two_sided(vrms_arr, channel_list, lower_perce def report_vrms_characteristics(modality_dict, tail_dict, channel_list): for ch in channel_list: - # modality from KDE peak count n_peaks = modality_dict[ch]["n_peaks"] if n_peaks == 0: modality = "flat/noisy" @@ -818,32 +839,55 @@ def report_vrms_characteristics(modality_dict, tail_dict, channel_list): else: modality = f"multimodal ({n_peaks} peaks)" - # tail + skewness characteristics full_skew = tail_dict[ch]["full_skew"] high_frac = tail_dict[ch]["high_tail_fraction"] low_frac = tail_dict[ch]["low_tail_fraction"] skew_trim_h = tail_dict[ch]["trimmed_skew_high"] skew_trim_l = tail_dict[ch]["trimmed_skew_low"] - # classify tail behavior - if 0 < high_frac < 0.01 and full_skew > 0.5 and not np.isnan(skew_trim_h): + if np.isnan(full_skew): + tail_label = "no significant tails" + tail_frac = None + if tail_frac is not None: + tail_label += f" (fraction: {tail_frac:.3f})" + print(f"Ch {ch:02d}: {modality} ({tail_label})") + continue + + if not np.isnan(skew_trim_h): + dskew_h = full_skew - skew_trim_h + else: + dskew_h = 0.0 + if not np.isnan(skew_trim_l): + dskew_l = full_skew - skew_trim_l + else: + dskew_l = 0.0 + + strong_skew = 0.3 + extreme_skew = 0.5 + delta_skew_min = 0.25 + + if 0 < high_frac < 0.01 and full_skew > extreme_skew and dskew_h > delta_skew_min: tail_label = "rare high extremes" tail_frac = high_frac - elif 0.01 <= high_frac < 0.05 and full_skew > 0: - tail_label = "moderate high skew" - tail_frac = high_frac - elif high_frac >= 0.05 and full_skew > 0: - tail_label = "bulk high skew" - tail_frac = high_frac - elif 0 < low_frac < 0.01 and full_skew < -0.5 and not np.isnan(skew_trim_l): + + elif 0 < low_frac < 0.01 and full_skew < -extreme_skew and dskew_l < -delta_skew_min: tail_label = "rare low extremes" tail_frac = low_frac - elif 0.01 <= low_frac < 0.05 and full_skew < 0: - tail_label = "moderate low skew" - tail_frac = low_frac - elif low_frac >= 0.05 and full_skew < 0: - tail_label = "bulk low skew" + + elif full_skew > strong_skew: + if high_frac < 0.05: + tail_label = "moderate high skew" + else: + tail_label = "bulk high skew" + tail_frac = high_frac + + elif full_skew < -strong_skew: + if low_frac < 0.05: + tail_label = "moderate low skew" + else: + tail_label = "bulk low skew" tail_frac = low_frac + else: tail_label = "no significant tails" tail_frac = None @@ -851,9 +895,8 @@ def report_vrms_characteristics(modality_dict, tail_dict, channel_list): # output summary if tail_frac is not None: tail_label += f" (fraction: {tail_frac:.3f})" - print(f"Ch {ch:02d}: {modality} ({tail_label})") - else: - print(f"Ch {ch:02d}: {modality} ({tail_label})") + print(f"Ch {ch:02d}: {modality} ({tail_label})") + def plot_vrms_values_against_time(times, vrms_arr_all, channel_list, station_id, run_label, save_location, n_rows = 12, n_cols = 2, day_interval=5): '''Plot Vrms distributions for different trigger types.''' @@ -878,7 +921,7 @@ def plot_vrms_values_against_time(times, vrms_arr_all, channel_list, station_id, n_channels = len(channel_list) - fig, axs = plt.subplots(n_rows, n_cols, figsize=(16, 36), sharex=True, squeeze=False) + fig, axs = plt.subplots(n_rows, n_cols, figsize=(15, 24), sharex=True, squeeze=False) axs = axs.ravel() legend_handles = {} @@ -891,7 +934,7 @@ def plot_vrms_values_against_time(times, vrms_arr_all, channel_list, station_id, times_trig = times[trig_mask] vrms_trig = vrms_ch[trig_mask] - scatter = ax.scatter(times_trig, vrms_trig, s=8, alpha=0.7, label=trig_name, color=trigger_colors[trig_name]) + scatter = ax.scatter(times_trig, vrms_trig, s=8, alpha=0.5, label=trig_name, color=trigger_colors[trig_name]) if trig_name not in legend_handles: legend_handles[trig_name] = scatter @@ -919,9 +962,9 @@ def plot_vrms_values_against_time(times, vrms_arr_all, channel_list, station_id, fig.legend(handles=[legend_handles[k] for k in trigger_masks.keys()], labels=list(trigger_masks.keys()), loc="lower center", ncol=4, frameon=True, markerscale=2, bbox_to_anchor=(0.5, 0.005)) - fig.supxlabel("Date [UTC]", x=0.5, y=0.02) fig.supylabel(r"$V_\mathrm{rms}$ [V]", x=0.02) - plt.subplots_adjust(bottom=0.05, wspace=0.2, hspace=0.4, left=0.1) + plt.subplots_adjust(bottom = 0.05, wspace = 0.15, left=0.1) + fig.supxlabel("Date [UTC]", x = 0.5, y = 0.06) plt.savefig(os.path.join(save_location, f"vrms_against_time_{station_id}_{run_label}.pdf")) plt.close(fig) @@ -1112,7 +1155,7 @@ def plot_glitch_q99_over_time(times, glitch_arr, channels, station_id, run_label bin_rule = choose_bin_size(times) q99 = df.resample(bin_rule).quantile(0.99) - fig, ax = plt.subplots(figsize=(12, 5)) + fig, ax = plt.subplots(figsize=(12, 10)) for ch in channels: ax.plot(q99.index, q99[ch], marker=".", linestyle="-", label=f"ch {ch}") @@ -1120,7 +1163,7 @@ def plot_glitch_q99_over_time(times, glitch_arr, channels, station_id, run_label ax.set_xlabel("Date [UTC]") ax.set_ylabel(f"99% quantile glitching ts ({bin_rule} bins)") ax.grid(True, alpha=0.3) - ax.legend(ncol=3) + ax.legend(ncol=3, frameon=True, framealpha=0.9, edgecolor="black") plt.tight_layout() plt.savefig(os.path.join(save_location, f"glitch_q99_{station_id}_{run_label}.pdf")) @@ -1264,6 +1307,182 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ plt.tight_layout() plt.savefig(os.path.join(save_location,f"debug_vrms_hist_kde_density_peaks_{station_id}_{run_label}_{trigger_label}.pdf",)) +def channel_health(row): + severity = {"OK": 0, "!!": 1, "X": 2} + vals = [severity[v] for v in row if v in severity] + if not vals: + return "-" + inv = {0: "OK", 1: "!!", 2: "X"} + return inv[max(vals)] + + +def create_result_csv_file(station_id, run_label, n_events_force, surface_channels, downward_channels, upward_channels, all_channels, validation_results, glitch_info, modality_dict_force, modality_dict_lt, + modality_dict_radiant0, modality_dict_radiant1, outlier_details, save_location): + out_csv_file = os.path.join(save_location, f"validation_summary_station{station_id}_{run_label}.csv") + ch_list = list(all_channels) + + spectral_col = [] + glitch_col = [] + modality_force_col = [] + modality_lt_col = [] + modality_radiant0_col = [] + modality_radiant1_col = [] + snr_col = [] + + for ch in ch_list: + df_spec_val = "" + if ch in surface_channels: + spectral_validation = None + vr = validation_results.get(ch, {}) + gal = vr.get("galactic_excess", {}) + spectral_validation = gal.get("validation", None) + + if spectral_validation is None: + df_spec_val = "?" + else: + if ch in downward_channels: + print(f"spectral_validation: {spectral_validation}") + if spectral_validation == "NO EXCESS": + df_spec_val = "OK" + elif spectral_validation == "WEAK EXCESS": + df_spec_val = "!!" + elif spectral_validation in ["MODERATE EXCESS", "STRONG EXCESS"]: + df_spec_val = "X" + else: + df_spec_val = "?" + elif ch in upward_channels: + print(f"spectral_validation: {spectral_validation}") + if spectral_validation in ["STRONG EXCESS", "MODERATE EXCESS"]: + df_spec_val = "OK" + elif spectral_validation == "WEAK EXCESS": + df_spec_val = "!!" + elif spectral_validation == "NO EXCESS": + df_spec_val = "X" + else: + df_spec_val = "?" + else: + df_spec_val = "?" + else: + df_spec_val = "-" + + spectral_col.append(df_spec_val) + + # Glitching column + if glitch_info is None: + glitch_val = "-" + glitch_col.append(glitch_val) + else: + info = glitch_info.get(ch, None) + glitch_val_raw = info.get("validation") if info is not None else "-" + if glitch_val_raw == "NO EXCESSIVE GLITCHING": + glitch_val = "OK" + elif glitch_val_raw == "WEAK EXCESSIVE GLITCHING": + glitch_val = "!!" + elif glitch_val_raw in ["MODERATE EXCESSIVE GLITCHING", "STRONG EXCESSIVE GLITCHING"]: + glitch_val = "X" + else: + glitch_val = "-" + glitch_col.append(glitch_val) + + # Vrms analysis column + if modality_dict_force is None: + modality_value = "-" + modality_force_col.append(modality_value) + else: + n_peaks = modality_dict_force[ch]["n_peaks"] + if n_peaks == 0: + modality_value = "!!" + elif n_peaks == 1: + modality_value = "OK" + elif n_peaks == 2: + modality_value = "X" + else: + modality_value = f"X" + modality_force_col.append(modality_value) + + if modality_dict_lt is None: + modality_value = "-" + modality_lt_col.append(modality_value) + else: + n_peaks = modality_dict_lt[ch]["n_peaks"] + if n_peaks == 0: + modality_value = "!!" + elif n_peaks == 1: + modality_value = "OK" + elif n_peaks == 2: + modality_value = "X" + else: + modality_value = f"X" + modality_lt_col.append(modality_value) + + if modality_dict_radiant0 is None: + modality_value = "-" + modality_radiant0_col.append(modality_value) + else: + n_peaks = modality_dict_radiant0[ch]["n_peaks"] + if n_peaks == 0: + modality_value = "!!" + elif n_peaks == 1: + modality_value = "OK" + elif n_peaks == 2: + modality_value = "X" + else: + modality_value = "X" + modality_radiant0_col.append(modality_value) + if modality_dict_radiant1 is None: + modality_value = "-" + modality_radiant1_col.append(modality_value) + else: + n_peaks = modality_dict_radiant1[ch]["n_peaks"] + if n_peaks == 0: + modality_value = "!!" + elif n_peaks == 1: + modality_value = "OK" + elif n_peaks == 2: + modality_value = "X" + else: + modality_value = "X" + modality_radiant1_col.append(modality_value) + + # SNR validation column + outlier_ch_info = outlier_details.get(ch, []) + n_out = len(outlier_ch_info) + if n_out == 0: + snr_value = "OK" + else: + max_delta = max(abs(o.get("z_minus_k", 0.0)) for o in outlier_ch_info) + frac_out = n_out / n_events_force if n_events_force > 0 else 0.0 + if max_delta < 3.0: + snr_value = "OK" + + elif max_delta < 5.0: + snr_value = "OK" if frac_out < 0.01 else "!!" + + else: # max_delta >= 5 + if n_out == 1 and frac_out < 0.001: + snr_value = "OK" + elif frac_out < 0.01: + snr_value = "!!" + else: + snr_value = "X" + snr_col.append(snr_value) + + df = pd.DataFrame({ + "Channel": ch_list, + "SNR": snr_col, + "Galaxy (FORCE)": spectral_col, + "Vrms (FORCE)": modality_force_col, + "Vrms (LT)": modality_lt_col, + "Vrms (RADIANT0)": modality_radiant0_col, + "Vrms (RADIANT1)": modality_radiant1_col, + "Glitching": glitch_col, + }) + + health_cols =["SNR", "Galaxy (FORCE)", "Vrms (FORCE)", "Vrms (LT)", "Vrms (RADIANT0)", "Vrms (RADIANT1)", "Glitching"] + df["Channel Health"] = df[health_cols].apply(channel_health, axis=1) + df.to_csv(out_csv_file, index=False) + print(f"Validation summary saved to {out_csv_file}") + if __name__ == "__main__": @@ -1272,7 +1491,7 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot") argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots") argparser.add_argument("-ex", "--exclude-runs", nargs="+", type=int, default=[], metavar="RUN", help="Run number(s) to exclude, e.g. --exclude-runs 1005 1010") - argparser.add_argument("--debug_plot", action="store_true", help="If set, will create debug plots for amplitude ratios in frequency bands.") + argparser.add_argument("--debug_plot", action="store_true", help="If set, will create debug plots.") run_selection = argparser.add_mutually_exclusive_group(required=True) run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS", @@ -1322,12 +1541,15 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ save_location = os.path.expanduser(args.save_location) os.makedirs(save_location, exist_ok=True) + result_txt = os.path.join(save_location, f"result_station{station_id}_{run_label}.txt") + start_print_logging(result_txt) + # Read data spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info, glitch_arr = read_rnog_data(station_id, run_numbers, backend=backend) # Normalize surface channel spectra norm_spec_arr, scale_factors = normalize_channels(spec_arr, freqs, downward_channels, upward_channels) - print(f"Event info trigger type: {event_info['triggerType']}, event info trigger type length: {len(event_info['triggerType'])}") + print(f"Event info trigger type: {event_info['triggerType']}") print(f"Spec arr shape: {spec_arr.shape}, Norm spec arr shape: {norm_spec_arr.shape}") # Select FORCE trigger events @@ -1346,6 +1568,7 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ ch_to_idx = {ch: i for i, ch in enumerate(channels_order)} print(ch_to_idx) + all_validation_results = {} for ch in surface_channels: print(f"\nAnalyzing channel {ch}") i = ch_to_idx[ch] @@ -1358,6 +1581,7 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ ratio_arr_240[i], alpha=0.01 ) + all_validation_results[ch] = validation_results for band, results in validation_results.items(): print(f"=== {band} ===") @@ -1365,13 +1589,30 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ print(f"{key}: {value}") print("==============") + lt_mask = choose_trigger_type(event_info, "LT") + spec_arr_lt = spec_arr[:, lt_mask, :] + + radiant0_mask = choose_trigger_type(event_info, "RADIANT0") + spec_arr_radiant0 = spec_arr[:, radiant0_mask, :] + + radiant1_mask = choose_trigger_type(event_info, "RADIANT1") + spec_arr_radiant1 = spec_arr[:, radiant1_mask, :] + # Surface spectrum - plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label, trigger_label="force") + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_lt, freqs, upward_channels, downward_channels, save_location, run_label, trigger_label="lt") + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_radiant0, freqs, upward_channels, downward_channels, save_location, run_label, trigger_label="radiant0") + plot_time_integrated_surface_spectra_unnormalized(station_id, spec_arr_radiant1, freqs, upward_channels, downward_channels, save_location, run_label, trigger_label="radiant1") + + # Normalized surface spectrum - only force plot_time_integrated_surface_spectra_normalized(station_id, norm_spec_arr_force, freqs, upward_channels, downward_channels, save_location, run_label) # Deep spectrum (unnormalized) - plot_time_integrated_deep_spectra(station_id, spec_arr_force, freqs, vpol_channels, hpol_channels, save_location, run_label) - + plot_time_integrated_deep_spectra(station_id, spec_arr_force, freqs, vpol_channels, hpol_channels, save_location, run_label, trigger_label="force") + plot_time_integrated_deep_spectra(station_id, spec_arr_lt, freqs, vpol_channels, hpol_channels, save_location, run_label, trigger_label="lt") + plot_time_integrated_deep_spectra(station_id, spec_arr_radiant0, freqs, vpol_channels, hpol_channels, save_location, run_label, trigger_label="radiant0") + plot_time_integrated_deep_spectra(station_id, spec_arr_radiant1, freqs, vpol_channels, hpol_channels, save_location, run_label, trigger_label="radiant1") + # SNR analysis snr_arr_force = snr_arr[:, force_mask] event_info_force = {key: np.array(value)[force_mask] for key, value in event_info.items()} @@ -1396,6 +1637,7 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ # Vrms analysis vrms_arr, vrms_arr_force, vrms_arr_radiant0, vrms_arr_radiant1, vrms_arr_lt = calculate_vrms(trace_arr, event_info) + print(f"Number of RADIANT0 trigger events: {len(vrms_arr_radiant0[1])}, Number of RADIANT1 trigger events: {len(vrms_arr_radiant1[1])}, Number of LT trigger events: {len(vrms_arr_lt[1])}") modality_dict_force = kde_modality(vrms_arr_force, all_channels, bandwidth=None, grid_points=512, peak_prominence=0.01) tail_dict_force = tail_fraction_and_trimmed_skew_two_sided(vrms_arr_force, all_channels, lower_percentile=25, upper_percentile=75, extreme_k=3) @@ -1409,7 +1651,6 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ print("\nVrms characteristics for RADIANT0 trigger events:") if len(vrms_arr_radiant0[1]) < 100: print(f"!!!! Warning: RADIANT0 trigger has less than 100 valid Vrms entries ({len(vrms_arr_radiant0)}). Results for the Vrms statistics may be unreliable. !!!!") - report_vrms_characteristics(modality_dict_force, tail_dict_force, all_channels) report_vrms_characteristics(modality_dict_radiant0, tail_dict_radiant0, all_channels) modality_dict_radiant1 = kde_modality(vrms_arr_radiant1, all_channels, bandwidth=None, grid_points=512, peak_prominence=0.01) @@ -1461,4 +1702,26 @@ def debug_plot_vrms_distribution(vrms_arr, modality_dict, channel_list, station_ debug_plot_ratios(ratio_arr_gal, ratio_arr_360, ratio_arr_482, ratio_arr_240, channels_order=channels_order, save_location=save_location, station_id=station_id, run_label=run_label, bins=30,) debug_plot_snr_distribution(log_snr_arr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) debug_plot_z_score_snr(z_score_arr_log_snr, channel_list=all_channels, save_location=save_location, station_id=station_id, run_label=run_label, bins=30) - \ No newline at end of file + + stop_print_logging() + + # Create summary CSV file + n_events_force = spec_arr_force.shape[1] + create_result_csv_file( + station_id, + run_label, + n_events_force, + surface_channels, + downward_channels, + upward_channels, + all_channels, + all_validation_results, + glitch_info, + modality_dict_force, + modality_dict_lt, + modality_dict_radiant0, + modality_dict_radiant1, + outlier_details_snr, + save_location, + ) + \ No newline at end of file