Skip to content
201 changes: 201 additions & 0 deletions rnog_analysis_tools/data_monitoring/expected_snr_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
from NuRadioReco.modules.io.RNO_G.readRNOGDataMattak import readRNOGData
import rnog_data.runtable as rt
from NuRadioReco.detector import detector
import logging
import os
import datetime
import numpy as np
from matplotlib import pyplot as plt
from NuRadioReco.utilities import units
from tqdm import tqdm
from argparse import ArgumentParser
import warnings
import pandas as pd
import NuRadioReco.framework.event
import NuRadioReco.framework.station
import NuRadioReco.framework.channel
import NuRadioReco.framework.trigger
from NuRadioReco.framework.parameters import channelParameters as chp
from NuRadioReco.modules.RNO_G.dataProviderRNOG import dataProviderRNOG
from cycler import cycler
import matplotlib as mpl
#from cmap import Colormap
from collections import defaultdict
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from science_verification_analysis import convert_events_information, choose_trigger_type, station_channels, read_rnog_runtable, read_rnog_data, calculate_statistics_log_snr, calculate_z_score_snr, outlier_flag, find_outlier_details, print_outlier_summary, choose_day_interval, plot_snr_against_time
import json
import matplotlib.dates as mdates
from datetime import timezone


'''This module can be used to find expected parameter values for RNO-G stations from a known stable time period.'''
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love that you gave it a doc-string. Can you move it up to the top of the file?


# Channel mapping for the first seven RNO-G stations:
SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19 , 20]
DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 21, 22, 23]
UPWARD_CHANNELS_LIST = [13, 16, 19]
DOWNWARD_CHANNELS_LIST = [12, 14, 15, 17, 18, 20]
VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 22, 23]
HPOL_LIST = [4, 8, 11, 21]
PHASED_ARRAY_LIST = [0, 1, 2, 3]
ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23]

# Channel mapping for Station 14:
STATION_14_SURFACE_CHANNELS_LIST = [12, 13, 14, 15, 16, 17, 18 , 19]
STATION_14_DEEP_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 22, 23]
STATION_14_UPWARD_CHANNELS_LIST = [13, 15, 16, 18]
STATION_14_DOWNWARD_CHANNELS_LIST = [12, 14, 17, 19]
STATION_14_VPOL_LIST = [0, 1, 2, 3, 5, 6, 7, 9, 10, 20, 22, 23]
STATION_14_HPOL_LIST = [4, 8, 11, 21]
STATION_14_PHASED_ARRAY_LIST = [0, 1, 2, 3]
STATION_14_ALL_CHANNELS_LIST = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 , 19 , 20, 21, 22, 23]
Comment on lines +34 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that something which can be either defined globally or in a config file? Also I was wondering if it makes sense to generalize this a bit by only defining a dict with keys which act as label for the channel group and values which list the channel id's, e.g.:

channel_groups = {
   "phased_array": [0, 1, 2, 3],
   "surface": [12, 13, ...],
    ....
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think I should put all of the station-specific stuff into a config file, including the analysis. This way it would also be easier for station experts to change some parameters if needed.


# Script directory
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

# Matplotlib settings

mpl.rcParams.update({
'font.family': 'sans-serif',
'font.sans-serif': ['Helvetica', 'Arial', 'DejaVu Sans'],
'font.size': 17,

'axes.labelsize': 17,
'axes.titlesize': 18,
'axes.linewidth': 1.2,
'axes.grid': False,

'xtick.labelsize': 17,
'ytick.labelsize': 17,
'xtick.major.size': 6,
'ytick.major.size': 6,
'xtick.major.width': 1.2,
'ytick.major.width': 1.2,
'xtick.minor.size': 3,
'ytick.minor.size': 3,
'xtick.minor.visible': True,
'ytick.minor.visible': True,

'lines.linewidth': 1.6,
'lines.antialiased': True,
'lines.markersize': 6,

'legend.fontsize': 14,
'legend.frameon': False,
'legend.handlelength': 1,
'legend.borderpad': 0.3,

'figure.dpi': 120,
'savefig.dpi': 300,
'savefig.bbox': 'tight',
})
Comment on lines +59 to +92
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you specify that in an extra file? Makes the scripts easier to read.


def find_k_value(z_score_log, channel_list, quantile=0.999):
'''Find the k-value corresponding to the given reference z-score array and quantile.'''
k_ch_list = {}
for ch in channel_list:
z_score_ch = z_score_log[ch]
k_ch = np.quantile(np.abs(z_score_ch), quantile)
k_ch_list[ch] = k_ch
return k_ch_list

def save_values_json(values_log, filename):
'''Save the reference values as a JSON file.'''
if not os.path.isabs(filename):
filepath = os.path.join(SCRIPT_DIR, filename)
else:
filepath = filename

with open(filepath, "w") as f:
json.dump(values_log, f, indent=4)

print(f"Values saved to {filepath}")


if __name__ == "__main__":

argparser = ArgumentParser(description="RNO-G Science Verification Analysis")
argparser.add_argument("-st", "--station_id", type=int, required=True, help="Station to analyze, e.g --station_id 14")
argparser.add_argument("-b", "--backend", type=str, default="pyroot", help="Backend to use for reading data, should be either pyroot or uproot (default: pyroot), e.g. --backend pyroot or --backend uproot")
argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots")
argparser.add_argument("-ex", "--exclude-runs", nargs="+", type=int, default=[], metavar="RUN", help="Run number(s) to exclude, e.g. --exclude-runs 1005 1010")
argparser.add_argument("--save_values", action="store_true", help="If set, will save the snr reference values to separate JSON files in the script directory.")
argparser.add_argument("-sl", "--save_location", type=str, default=".", help="Location to save the output plots (default: current directory), e.g. --save_location /path/to/save/plots")

run_selection = argparser.add_mutually_exclusive_group(required=True)
run_selection.add_argument("--runs", nargs="+", type=int, metavar="RUN_NUMBERS",
help="Run number(s) to analyze. Each run number should be given explicitly separated by a space, e.g. --runs 1001 1002 1005")
run_selection.add_argument("--run_range", nargs=2, type=int, metavar=("START_RUN", "END_RUN"),
help="Range of run numbers to analyze (inclusive). Provide start and end run numbers separated by a space, e.g. --run_range 1000 1050")
run_selection.add_argument("--time_range", nargs=2, type=str, metavar=("START_DATE", "END_DATE"),
help="Date range to analyze (inclusive). Provide start and end dates separated by a space in YYYY-MM-DD format, e.g. --time_range 2024-07-15 2024-09-30")


surface_channels, deep_channels, upward_channels, downward_channels, vpol_channels, hpol_channels, phased_array_channels, all_channels = station_channels(argparser.parse_args().station_id)

args = argparser.parse_args()

station_id = args.station_id
backend = args.backend
if backend not in ["pyroot", "uproot"]:
raise ValueError("Backend should be either 'pyroot' or 'uproot'")

if args.runs:
run_numbers = args.runs
elif args.run_range:
run_numbers = list(range(args.run_range[0], args.run_range[1] + 1))
elif args.time_range:
start_time, stop_time = args.time_range
runtable = read_rnog_runtable(station_id, start_time, stop_time)
run_numbers = runtable["run"].tolist()
else:
raise ValueError("No run selection provided")

# Exclude specified runs
if args.exclude_runs:
exclude_set = set(args.exclude_runs)
run_numbers = [r for r in run_numbers if r not in exclude_set]

run_numbers = sorted(run_numbers)
first_run = run_numbers[0]
last_run = run_numbers[-1]

if first_run == last_run:
run_label = f"run_{first_run}"
else:
run_label = f"runs_{first_run}_{last_run}"


# Create save location directory if it doesn't exist
save_location = os.path.expanduser(args.save_location)
os.makedirs(save_location, exist_ok=True)

spec_arr, trace_arr, times_trace_arr, snr_arr, run_no, times, freqs, event_info = read_rnog_data(station_id, run_numbers, backend=backend)

# Choose FORCE triggered events only
force_mask = choose_trigger_type(event_info, "FORCE")
snr_arr_force = snr_arr[:, force_mask]
event_info_force = {key: np.array(value)[force_mask] for key, value in event_info.items()}
times = np.array(times)
times_force = times[force_mask]

log_snr_arr, log_mean_list, log_median_list, log_std_list, log_difference_list = calculate_statistics_log_snr(snr_arr_force)

z_score_arr_log_snr = calculate_z_score_snr(log_snr_arr, log_mean_list, log_std_list, all_channels)
k_values_log_snr = find_k_value(z_score_arr_log_snr, all_channels, quantile=0.999)

k_values_filename_snr = f"station_{station_id}_k_ref_values_snr.json"

if args.save_values:
save_values_json(k_values_log_snr, k_values_filename_snr)
save_values_json(log_mean_list, f"station_{station_id}_ref_log_mean_snr.json")
save_values_json(log_std_list, f"station_{station_id}_ref_log_std_snr.json")
Comment on lines +188 to +193
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you stored these files in the repo - was that intended? Are those files meant to be created or used by other users? Does it make sense to add more meta data to the json files themself? Examples: station_id, timeperiod used (maybe with a list of runs, ...).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this script is only meant to be used by the station experts to calculate the expected snr values per station. The script then automatically saves these files in the repo so that the science_verification_analysis.py can use them to perform the snr analysis.
I think adding more metadata would be very nice, so that it is reproducably and we can also track if there is a change in the expected values over time.


flag_outliers_snr = outlier_flag(z_score_arr_log_snr, k_values_log_snr, all_channels)

outlier_details_snr = find_outlier_details(z_score_arr_log_snr, k_values_log_snr, flag_outliers_snr, event_info_force, all_channels)
print_outlier_summary(outlier_details_snr)

day_interval = choose_day_interval(times)
plot_snr_against_time(station_id, times_force, snr_arr_force, flag_outliers_snr, z_score_arr_log_snr, k_values_log_snr, all_channels, nrows=12, ncols=2, day_interval=day_interval)
Loading