From d962b52754b1acff269d5c9266f59c52419ceccf Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Tue, 10 Dec 2024 08:43:32 -0500 Subject: [PATCH 1/7] First commit of config.yml --- benchmarks/output_branch_size_scan/config.yml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 benchmarks/output_branch_size_scan/config.yml diff --git a/benchmarks/output_branch_size_scan/config.yml b/benchmarks/output_branch_size_scan/config.yml new file mode 100644 index 00000000..7efd3875 --- /dev/null +++ b/benchmarks/output_branch_size_scan/config.yml @@ -0,0 +1,22 @@ +sim:output_branch_size_scan: + stage: simulate + extends: .det_benchmark + script: + + + +bench:output_branch_size_scan: + stage: benchmarks + extends: .det_benchmark + needs: + - ["sim:output_branch_size_scan"] + script: + + +results:output_branch_size_scan: + stage: collect + extends: .det_benchmark + needs: + - ["bench:output_branch_size_scan"] + script: + From e154ebd0c8dc2e7aa7d38761620d77e4205e66b0 Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Wed, 18 Dec 2024 11:34:06 -0500 Subject: [PATCH 2/7] Placeholder benchmarking shell wrapper script Get branch sizes for a current campaign file and the default file and then compare them in a plot --- .../output_branch_size_scan/output_branch_size_scan.sh | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 benchmarks/output_branch_size_scan/output_branch_size_scan.sh diff --git a/benchmarks/output_branch_size_scan/output_branch_size_scan.sh b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh new file mode 100644 index 00000000..0a1ba567 --- /dev/null +++ b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/ -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_current.txt +root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/ -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_default.txt +python plot_branch_sizes.py -c branch_size_current.txt -d branch_size_default.txt From dd0fc645fb1ff0fde71be3b3652584535a82f7c9 Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Wed, 18 Dec 2024 11:38:11 -0500 Subject: [PATCH 3/7] Plotting script to draw branch sizes --- benchmarks/output_branch_size_scan/plot.py | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 benchmarks/output_branch_size_scan/plot.py diff --git a/benchmarks/output_branch_size_scan/plot.py b/benchmarks/output_branch_size_scan/plot.py new file mode 100644 index 00000000..ddfd0486 --- /dev/null +++ b/benchmarks/output_branch_size_scan/plot.py @@ -0,0 +1,63 @@ +import pandas as pd +import argparse +import matplotlib.pyplot as plt + +parser = argparse.ArgumentParser(prog='Plot output branch sizes', description='Plot output branch sizes') + +parser.add_argument("-c", dest="current_campaign_file", action="store", required=True, help="Enter the current campaign file") +parser.add_argument("-d", dest="default_file", action="store", required=True, help="Enter the default file") + +args=parser.parse_args() + + +campaign1=args.current_campaign_file +campaign2=args.default_file + + +# Load the data from the CSV file +df1 = pd.read_csv(campaign1+'.txt', header=None) +df2 = pd.read_csv(campaign2+'.txt', header=None) + +# Plot the third column ('Value') against the first column ('Object') +plt.figure(figsize=(10,6)) +plt.scatter(df1.iloc[:,0], df1.iloc[:,2]) +plt.scatter(df2.iloc[:,0], df2.iloc[:,2]) + +plt.title("Branch Sizes (Bytes) vs Branch Names") + + + + +# Show the figure +plt.tight_layout() +plt.yscale('log') +plt.savefig(campaign1+'_vs_'+campaign2+'.png') + +print(df1) +print(df2) + +# Assuming both dataframes have the same structure and the first column is branch name +# Merge the two dataframes on the branch name (first column) +merged_df = pd.merge(df1.iloc[:, [0, 2]], df2.iloc[:, [0, 2]], on=df1.columns[0], suffixes=('_' + campaign1, '_' + campaign2)) + +# Create a new column that calculates the difference between the third columns of the two DataFrames +merged_df['Difference'] = merged_df.iloc[:, 1] - merged_df.iloc[:, 2] + +# Create a new DataFrame with the branch names and the difference +result_df = merged_df[[df1.columns[0], 'Difference']] + +# Display the resulting DataFrame +print(result_df) + +# Sort the DataFrame by the absolute value of the difference in descending order +sorted_df = result_df.reindex(result_df['Difference'].abs().sort_values(ascending=False).index) + +# Pick the top 10 branches with the largest differences +top_20_branches = sorted_df.head(20) + +# Display the top 10 branches +print(top_20_branches) + + +# Optionally, save it to a new CSV file +sorted_df.to_csv(f"{campaign1}_vs_{campaign2}_difference.csv", index=False) From 6645a6372a22fc695b7e8bac54e67d8e65f952ed Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Wed, 18 Dec 2024 11:39:49 -0500 Subject: [PATCH 4/7] Modify name of plotting script --- benchmarks/output_branch_size_scan/output_branch_size_scan.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/output_branch_size_scan/output_branch_size_scan.sh b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh index 0a1ba567..c812756b 100644 --- a/benchmarks/output_branch_size_scan/output_branch_size_scan.sh +++ b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh @@ -2,4 +2,4 @@ root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/ -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_current.txt root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/ -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_default.txt -python plot_branch_sizes.py -c branch_size_current.txt -d branch_size_default.txt +python plot.py -c branch_size_current.txt -d branch_size_default.txt From 0dc7a9328b7a72d0104ee9fd662dd8219eda9b26 Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Wed, 18 Dec 2024 11:55:31 -0500 Subject: [PATCH 5/7] Sample script to generate some events and run reconstruction --- .../output_branch_size_scan/generate.sh | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 benchmarks/output_branch_size_scan/generate.sh diff --git a/benchmarks/output_branch_size_scan/generate.sh b/benchmarks/output_branch_size_scan/generate.sh new file mode 100644 index 00000000..6a7d4db8 --- /dev/null +++ b/benchmarks/output_branch_size_scan/generate.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -Euo pipefail +trap 's=$?; echo "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR +IFS=$'\n\t' + +NUM_EVENTS=400 +INPUT_FILE=root://dtn-eic.jlab.org//work/eic2/EPIC/EVGEN/DIS/NC/18x275/minQ2=1/pythia8NCDIS_18x275_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.hepmc3.tree.root + +DETECTOR_CONFIG=epic_craterlake +EBEAM=18 +PBEAM=275 + +npsim \ + --runType batch \ + --random.seed 1 \ + --random.enableEventSeed \ + --printLevel WARNING \ + --skipNEvents 0 \ + --numberOfEvents 400 \ + --filter.tracker 'edep0' \ + --hepmc3.useHepMC3 true \ + --compactFile ${DETECTOR_PATH}/${DETECTOR_CONFIG}${EBEAM:+${PBEAM:+_${EBEAM}x${PBEAM}}}.xml \ + --inputFiles ${INPUT_FILE} \ + --outputFile current_campaign.edm4hep.root + +eicrecon \ + -Ppodio:output_file="current_campaign.eicrecon.tree.edm4eic.root" \ + -Pjana:warmup_timeout=0 -Pjana:timeout=0 \ + -Pplugins=janadot \ + "current_campaign.edm4hep.root" + + From 548fe5cd8693fbaec657455c66b0703d26f7137f Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Wed, 18 Dec 2024 11:57:02 -0500 Subject: [PATCH 6/7] Change name of current campaign file to extract branch sizes from --- benchmarks/output_branch_size_scan/output_branch_size_scan.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/output_branch_size_scan/output_branch_size_scan.sh b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh index c812756b..4c1f908c 100644 --- a/benchmarks/output_branch_size_scan/output_branch_size_scan.sh +++ b/benchmarks/output_branch_size_scan/output_branch_size_scan.sh @@ -1,5 +1,5 @@ #!/bin/bash -root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/ -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_current.txt +root -q -b eicrecon.tree.edm4eic.root -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_current.txt root -q -b root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/ -e 'for (auto b : *events->GetListOfLeaves()) { if (events->GetBranch(b->GetName()) == nullptr) continue; cout << events->GetBranch(b->GetName())->GetTotalSize() << " " << b->GetName() << endl; }' | sort -n > branch_size_default.txt python plot.py -c branch_size_current.txt -d branch_size_default.txt From 978766cad7463a914ee01f4181441c8b231eeb2a Mon Sep 17 00:00:00 2001 From: Sakib Rahman Date: Wed, 18 Dec 2024 11:58:09 -0500 Subject: [PATCH 7/7] Execute scripts in different steps --- benchmarks/output_branch_size_scan/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/output_branch_size_scan/config.yml b/benchmarks/output_branch_size_scan/config.yml index 7efd3875..952e1480 100644 --- a/benchmarks/output_branch_size_scan/config.yml +++ b/benchmarks/output_branch_size_scan/config.yml @@ -2,6 +2,7 @@ sim:output_branch_size_scan: stage: simulate extends: .det_benchmark script: + bash generate.sh @@ -11,6 +12,7 @@ bench:output_branch_size_scan: needs: - ["sim:output_branch_size_scan"] script: + bash output_branch_size_scan.sh results:output_branch_size_scan: