Skip to content

Commit 5634e64

Browse files
committed
combine daily rvs into big csv
1 parent 1afdf30 commit 5634e64

File tree

1 file changed

+141
-0
lines changed

1 file changed

+141
-0
lines changed

examples/combine_daily_rvs_1.0.jl

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
using Logging
2+
using ArgParse
3+
4+
@info "# Parsing arguments..."
5+
function parse_commandline()
6+
s = ArgParseSettings( description = "Combined NEID solar daily reports into one csv file.")
7+
add_arg_group!(s, "Files", :argg_files)
8+
@add_arg_table! s begin
9+
"input_path"
10+
help = "Path for input files"
11+
arg_type = String
12+
default = pwd()
13+
"output"
14+
help = "Filename for output (csv)"
15+
arg_type = String
16+
default = "summary.csv"
17+
"--input_filename"
18+
help = "Filename for daily tables (csv)"
19+
arg_type = String
20+
default = "daily_summary.toml"
21+
"--exclude_filename"
22+
help = "Filename with dates to exclude (csv)"
23+
arg_type = String
24+
"--overwrite"
25+
help = "Specify it's ok to overwrite the output file."
26+
#default = true
27+
action = :store_true
28+
"--log_info"
29+
help = "Print info-level messages."
30+
action = :store_true
31+
"--log_debug"
32+
help = "Print debug-level messages."
33+
action = :store_true
34+
end
35+
36+
return parse_args(s)
37+
end
38+
args = parse_commandline()
39+
if args["log_debug"]
40+
global_logger(ConsoleLogger(stderr, Logging.Debug))
41+
elseif args["log_info"]
42+
global_logger(ConsoleLogger(stderr, Logging.Info))
43+
end
44+
45+
input_fn = args["input_filename"]
46+
input_path = args["input_path"]
47+
output_fn = joinpath(input_path,args["output"])
48+
49+
if !(args["overwrite"] || !isfile(output_fn) || (filesize(output_fn)==0))
50+
@error "Can't overwrite " output_filename=output_fn
51+
exit(1)
52+
else
53+
#touch(output_fn) # Should we create empty file as a lock?
54+
end
55+
56+
@info "# Loading packages"
57+
using Dates, TOML
58+
using CSV, DataFrames, Query, Glob
59+
60+
@info "# Finding files named $input_fn"
61+
files1 = glob([r"\d{2}",args["input_filename"]],args["input_path"])
62+
files2 = glob([r"\d{2}",r"\d{2}",args["input_filename"]],args["input_path"])
63+
files3 = glob([r"\d{4}",r"\d{2}",r"\d{2}",args["input_filename"]],args["input_path"])
64+
files = vcat(files1,files2,files3)
65+
@assert length(files) >= 1
66+
67+
if !isnothing(args["exclude_filename"]) && isfile(args["exclude_filename"]) && (filesize(args["exclude_filename"])>0)
68+
@info "# Reading days to exclude."
69+
df_exclude = CSV.read(args["exclude_filename"],DataFrame)
70+
@info "# Found $(size(df_exclude,1)) dates to exclude"
71+
else
72+
df_exclude = DataFrame()
73+
end
74+
75+
@info "# Parsing daily csv files." num_files=length(files)
76+
daily = Vector{DataFrame}(undef, size(files,1) )
77+
println("files = ", files)
78+
flush(stdout)
79+
flush(stderr)
80+
j = 0
81+
for file in files
82+
if filesize(file) >0
83+
#=
84+
fn_toml = replace(file,"csv"=>"toml")
85+
if filesize(fn_toml)>0
86+
@info "# Processing $fn_toml"
87+
t = TOML.parsefile(fn_toml)
88+
89+
if haskey(t,"obs_date") && haskey(t["obs_date"],"string") && (size(df_exclude,1)>=1) && ( in(t["obs_date"]["string"], df_exclude.date_to_exclude) )
90+
continue
91+
end
92+
end
93+
=#
94+
@info "# Processing $file"
95+
d = CSV.read(file,DataFrame)
96+
if in(Date(julian2datetime(first(d.jd_drp))), df_exclude.date_to_exclude) continue end
97+
global j += 1
98+
daily[j] = d
99+
end
100+
end
101+
num_days_with_usable_obs = j
102+
resize!(daily,num_days_with_usable_obs)
103+
104+
@info "# Making dataframe"
105+
df = DataFrame()
106+
for j in 1:num_days_with_usable_obs
107+
append!(df,daily[j],cols=:union)
108+
end
109+
110+
df_sorted = sort!(df,"jd_drp")
111+
112+
#=
113+
daily_flat = flatten_dict.(daily)
114+
first_cols = ["obs_date.string", "obs_date.mean_bjd","num_rvs.good"]
115+
cols_to_exclude = ["report.hostname", "title", "report.input_md5", "report.input_file"]
116+
df = array_of_dict_to_dataframe( daily_flat, first_cols=first_cols, exclude_cols=cols_to_exclude )
117+
118+
@info "# Sorting dataframe"
119+
df_sorted = sort!(df,Symbol("obs_date.mean_bjd") )
120+
=#
121+
122+
#=
123+
df.obs_date = map(day->day["obs_date"]["string"], daily)
124+
df.obs_date = map(day->day["obs_date"]["string"], daily)
125+
df.mean_bjd = map(day->day["obs_date"]["mean_bjd"], daily)
126+
df.num_rvs_usable = map(day->day["num_rvs"]["usable"], daily)
127+
df.num_rvs_good = map(day->day["num_rvs"]["good"], daily)
128+
df.mean_rv_drp = map(day->day["rv"]["drp"]["mean_rv"], daily)
129+
df.median_rv_drp = map(day->day["rv"]["drp"]["mean_rv"], daily)
130+
df.median_σ_rv = map(day->day["rv"]["drp"]["median_σ_rv"], daily)
131+
df.rms_rv_drp = map(day->day["rv"]["drp"]["rms_rvs"], daily)
132+
df.winsor_mean_rv_drp = map(day->day["rv"]["drp"]["winsor_mean_rv"], daily)
133+
df.winsor_rms_rv_drp = map(day->day["rv"]["drp"]["winsor_rms_rv"], daily)
134+
@info "# Sorting dataframe"
135+
df_sorted = df |> @orderby( _.mean_bjd ) |> DataFrame
136+
=#
137+
138+
@info "# Writing CSV file"
139+
CSV.write(output_fn,df_sorted )
140+
141+

0 commit comments

Comments
 (0)